Skip to content
Snippets Groups Projects
bikes_santander.py 2.07 KiB
Newer Older


"""
Experiments for London Cycling Dataset

Nodes	735
Temporal Edges	32,258
Time span	1 day
"""

import experiment
import os
import pandas as pd

# read data

filepath = os.path.join("Data","9b-Journey-Data-Extract-06Sep15-19Sep15.csv")
# pick 9th of September-2015
start_date = "2015-9-9 0:00:00"
end_date = "2015-9-9 23:59:59"

# Read data
df = pd.read_csv(filepath, dtype={'StartStation Id': 'Int64', 'EndStation Id': 'Int64'}, usecols=lambda x: x in ['Start Date', 'StartStation Id',  'EndStation Id'], parse_dates=['Start Date'])
df=df.set_axis(['source', 'timestamp', 'target'], axis=1)


# Remove null value
df = df[df['target'].isnull() != True]
#sort
df=df.sort_values('timestamp')

# Filter dates    
if start_date and end_date:
    after_start_date = df["timestamp"] >= start_date
    before_end_date = df["timestamp"] <= end_date
    between_two_dates = after_start_date & before_end_date
    df = df.loc[between_two_dates]
   
# Remove self-loops
df = df[((df['source'] ) != (df['target']))] 

# convert datetime to epoch
df['timestamp'] = df['timestamp'].astype('int64')//1e9

def _swap (row):
    if row['source'] > row['target']:
        row['source'] , row['target'] =row['target'] , row['source']
    return row
    
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue

df = df[:1000]

# # Experiments


# User parameters
num_roles=3
num_segments=7
num_levels=5
dest_folder='./Results/bikes/'

# tuning parameters
theta = 1e-5
eta = 1
tuning_params= {'theta':theta,'eta':eta}

import time
start_time = time.time()

exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)    
exp_obj.execute()

print("--- %s seconds ---" % (time.time() - start_time))


# likelihood for single group and single segment # Normlaized likelihood
# num_roles=1
# num_segments=1
# num_levels=1
# exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)    
# exp_obj.execute()