Skip to content
Snippets Groups Projects
case-study.py 5.54 KiB
Newer Older



"""
Experiments for London Cycling Dataset
CASE STUDY-6

Nodes	735
Temporal Edges	32,258
Time span	1 day
"""

import experiment
import os
import pandas as pd

# read data

filepath = os.path.join("Data","9b-Journey-Data-Extract-06Sep15-19Sep15.csv")
# pick 9th of September-2015
start_date = "2015-9-9 0:00:00"
end_date = "2015-9-9 23:59:59"

# Read data
df = pd.read_csv(filepath, dtype={'StartStation Id': 'Int64', 'EndStation Id': 'Int64'}, usecols=lambda x: x in ['Start Date', 'StartStation Id',  'EndStation Id'], parse_dates=['Start Date'])
df=df.set_axis(['source', 'timestamp', 'target'], axis=1)

# Remove null value
df = df[df['target'].isnull() != True]
#sort
df=df.sort_values('timestamp')

# Filter dates    
if start_date and end_date:
    after_start_date = df["timestamp"] >= start_date
    before_end_date = df["timestamp"] <= end_date
    between_two_dates = after_start_date & before_end_date
    df = df.loc[between_two_dates]
   
# Remove self-loops
df = df[((df['source'] ) != (df['target']))] 

# convert datetime to epoch
df['timestamp'] = df['timestamp'].astype('int64')//1e9

def _swap (row):
    if row['source'] > row['target']:
        row['source'] , row['target'] =row['target'] , row['source']
    return row
    
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue

# # Experiments
# User parameters
num_roles=5
num_segments=5
num_levels=2
algo_ver= 3
dest_folder='./Results/bikes/'

# tuning parameters
theta = 1e-5
eta = 1
tuning_params= {'theta':theta,'eta':eta}

import time
start_time = time.time()

exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)    
[it,likelihood,group_dic,lambda_estimates,change_points_arr] = exp_obj.execute()

print("--- %s seconds ---" % (time.time() - start_time))


# likelihood for single group and single segment # Normlaized likelihood
# num_roles=1
# num_segments=1
# num_levels=1
# exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)    
# exp_obj.execute()


import plotly.express as px
import pandas as pd
import os
import plotly.io as pio
pio.renderers.default = 'svg'
# pio.renderers.default = 'browser'

filepath = os.path.join("Data","stations_df.csv")
# filepath = os.path.join("dt.csv")
df_stations = pd.read_csv(filepath, delimiter=' ')


df_stations = df_stations.filter(items=['id', 'lat','long','name'])
df_stations['grp'] = -1

import numpy as np

nodes_arr = np.union1d(df['target'],df['source']).astype(int) 
# list of nodes         
nodes = nodes_arr.tolist()

for i, row in df_stations.iterrows():
    _id = row[0]
    
    if _id in nodes:
        df_stations.at[i,'grp'] = group_dic[_id]
df_stations = df_stations[df_stations['grp'] != -1]     

import plotly.graph_objects as go
import plotly.io as pio  

#pio.renderers.default = 'svg'
pio.renderers.default = 'browser'
# df_stations["grp"] = df_stations["grp"].astype(int)

#case-1
df_stations["grp"] = df_stations["grp"].astype(int)
fig = go.Figure(data=go.Scattergeo(
        lon = df_stations['long'],
        lat = df_stations['lat'],
        text = df_stations['name'],
        mode = 'markers',
        marker_color = df_stations['grp'],
        marker = dict(size = 10, symbol = df_stations["grp"]),
        textfont=dict(
        color='black',
        size=18,
        family='Times New Roman'
     )
        ))



fig.update_geos(fitbounds="locations")
fig.update_layout(mapbox_style="open-street-map")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


#case-2
# import plotly.express as px
df_stations["grp"] = df_stations["grp"].astype(int)
fig = px.scatter_mapbox(df_stations, lat="lat", lon="long",     color="grp", hover_name="name", hover_data=["name"],
                        color_discrete_sequence=["fuchsia"], zoom=11, height=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


#case-3
df_stations["grp"] = df_stations["grp"].astype(str)
# import plotly.express as px

fig = px.scatter_mapbox(df_stations, lat="lat", lon="long",     color="grp", hover_name="name", hover_data=["name"],
          color_discrete_sequence=px.colors.qualitative.G10, zoom=11, height=800,category_orders=df_stations['grp'])
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


#case-4
df_stations["grp"] = df_stations["grp"].astype(str)
import plotly.express as px

fig = px.scatter_mapbox(df_stations, lat="lat", lon="long",     color="grp", hover_name="name", hover_data=["name"],
                        
                        color_discrete_sequence=["green", "yellow", "blue", "red", "magenta"], zoom=11, height=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


##############################CORRECT#########################
#case-5
df_stations["grp"] = df_stations["grp"].astype(int)
import plotly.express as px

fig = px.scatter_mapbox(df_stations, lat="lat", lon="long",     color="grp", hover_name="name", hover_data=["name"], 
                        
                        size="grp",color_continuous_scale=px.colors.cyclical.IceFire, size_max=13, zoom=11, height=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()