Skip to content
Snippets Groups Projects
Commit 22d0e78e authored by Chamalee Wickrama Arachch's avatar Chamalee Wickrama Arachch
Browse files

experiments added: revision

parent c356edb5
No related branches found
No related tags found
No related merge requests found
.DS_Store 0 → 100644
File added
This diff is collapsed.
"""
Experiments for London Cycling Dataset
CASE STUDY-6
Nodes 735
Temporal Edges 32,258
Time span 1 day
"""
import experiment
import os
import pandas as pd
# read data
filepath = os.path.join("Data","9b-Journey-Data-Extract-06Sep15-19Sep15.csv")
# pick 9th of September-2015
start_date = "2015-9-9 0:00:00"
end_date = "2015-9-9 23:59:59"
# Read data
df = pd.read_csv(filepath, dtype={'StartStation Id': 'Int64', 'EndStation Id': 'Int64'}, usecols=lambda x: x in ['Start Date', 'StartStation Id', 'EndStation Id'], parse_dates=['Start Date'])
df=df.set_axis(['source', 'timestamp', 'target'], axis=1)
# Remove null value
df = df[df['target'].isnull() != True]
#sort
df=df.sort_values('timestamp')
# Filter dates
if start_date and end_date:
after_start_date = df["timestamp"] >= start_date
before_end_date = df["timestamp"] <= end_date
between_two_dates = after_start_date & before_end_date
df = df.loc[between_two_dates]
# Remove self-loops
df = df[((df['source'] ) != (df['target']))]
# convert datetime to epoch
df['timestamp'] = df['timestamp'].astype('int64')//1e9
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
# # Experiments
# User parameters
num_roles=5
num_segments=5
num_levels=2
algo_ver= 3
dest_folder='./Results/bikes/'
# tuning parameters
theta = 1e-5
eta = 1
tuning_params= {'theta':theta,'eta':eta}
import time
start_time = time.time()
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
[it,likelihood,group_dic,lambda_estimates,change_points_arr] = exp_obj.execute()
print("--- %s seconds ---" % (time.time() - start_time))
# likelihood for single group and single segment # Normlaized likelihood
# num_roles=1
# num_segments=1
# num_levels=1
# exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
# exp_obj.execute()
import plotly.express as px
import pandas as pd
import os
import plotly.io as pio
pio.renderers.default = 'svg'
# pio.renderers.default = 'browser'
filepath = os.path.join("Data","stations_df.csv")
# filepath = os.path.join("dt.csv")
df_stations = pd.read_csv(filepath, delimiter=' ')
df_stations = df_stations.filter(items=['id', 'lat','long','name'])
df_stations['grp'] = -1
import numpy as np
nodes_arr = np.union1d(df['target'],df['source']).astype(int)
# list of nodes
nodes = nodes_arr.tolist()
for i, row in df_stations.iterrows():
_id = row[0]
if _id in nodes:
df_stations.at[i,'grp'] = group_dic[_id]
df_stations = df_stations[df_stations['grp'] != -1]
import plotly.graph_objects as go
import plotly.io as pio
#pio.renderers.default = 'svg'
pio.renderers.default = 'browser'
# df_stations["grp"] = df_stations["grp"].astype(int)
#case-1
df_stations["grp"] = df_stations["grp"].astype(int)
fig = go.Figure(data=go.Scattergeo(
lon = df_stations['long'],
lat = df_stations['lat'],
text = df_stations['name'],
mode = 'markers',
marker_color = df_stations['grp'],
marker = dict(size = 10, symbol = df_stations["grp"]),
textfont=dict(
color='black',
size=18,
family='Times New Roman'
)
))
fig.update_geos(fitbounds="locations")
fig.update_layout(mapbox_style="open-street-map")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
#case-2
# import plotly.express as px
df_stations["grp"] = df_stations["grp"].astype(int)
fig = px.scatter_mapbox(df_stations, lat="lat", lon="long", color="grp", hover_name="name", hover_data=["name"],
color_discrete_sequence=["fuchsia"], zoom=11, height=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
#case-3
df_stations["grp"] = df_stations["grp"].astype(str)
# import plotly.express as px
fig = px.scatter_mapbox(df_stations, lat="lat", lon="long", color="grp", hover_name="name", hover_data=["name"],
color_discrete_sequence=px.colors.qualitative.G10, zoom=11, height=800,category_orders=df_stations['grp'])
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
#case-4
df_stations["grp"] = df_stations["grp"].astype(str)
import plotly.express as px
fig = px.scatter_mapbox(df_stations, lat="lat", lon="long", color="grp", hover_name="name", hover_data=["name"],
color_discrete_sequence=["green", "yellow", "blue", "red", "magenta"], zoom=11, height=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
##############################CORRECT#########################
#case-5
df_stations["grp"] = df_stations["grp"].astype(int)
import plotly.express as px
fig = px.scatter_mapbox(df_stations, lat="lat", lon="long", color="grp", hover_name="name", hover_data=["name"],
size="grp",color_continuous_scale=px.colors.cyclical.IceFire, size_max=13, zoom=11, height=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_geos(fitbounds="locations")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
......@@ -50,6 +50,8 @@ class Experiment:
#Initialize lamda
lambda_estimates = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
global itr
### K-segmentation ###
if self.algo_ver == 1:
opt = optimize.Optimize( group_dic,lambda_estimates,change_points_arr,nodes,num_roles,num_segments,dic,None,self.tuning_params)
......@@ -62,13 +64,16 @@ class Experiment:
### (K,H)-segmentation variant-2 ###
elif self.algo_ver == 3:
opt = optimize.Optimize( group_dic,lambda_estimates,change_points_arr,nodes,num_roles,num_segments,dic,self.num_levels,self.tuning_params)
[group_dic,lambda_estimates,change_points_arr,likelihood] = opt.k_h_seg_var_2()
[group_dic,lambda_estimates,change_points_arr,likelihood,itr] = opt.k_h_seg_var_2()
print (itr)
### Level-dependent (K,H)-segmentation variant-2 ###
elif self.algo_ver == 4:
opt = optimize.Optimize( group_dic,lambda_estimates,change_points_arr,nodes,num_roles,num_segments,dic,self.num_levels,self.tuning_params)
[group_dic,lambda_estimates,change_points_arr,likelihood,g_mapping] = opt.mm_k_h_seg_var_2()
[group_dic,lambda_estimates,change_points_arr,likelihood,g_mapping,itr] = opt.mm_k_h_seg_var_2()
print (itr)
# print('g_mapping_discoverd {}'.format(g_mapping))
# for e_h in range(0,self.num_levels):
# g_a = group_dic[e_h]
......@@ -85,6 +90,7 @@ class Experiment:
# list_of_groups[val].append(idx)
# print('group assignments: {}'.format(list_of_groups))
# print('lambdas: {}'.format(lambda_estimates))
# return [likelihood,group_dic,lambda_estimates,change_points_arr]
# return [likelihood,group_dic]
return likelihood
\ No newline at end of file
return [itr,likelihood,group_dic,lambda_estimates,change_points_arr]
# return [_itr,likelihood]
\ No newline at end of file
"""
Affect of Lambda
Dataset-1
"""
import numpy as np
import pandas as pd
import utils
import sbm_core
import math
from itertools import combinations
import itertools
from sklearn.metrics.cluster import adjusted_rand_score
# Initilaize
np.random.seed(107)
num_roles=2
num_vertices=25
num_segments = 2
NO_SAMPLES= 95
group_assignment= np.random.randint(num_roles, size=(num_vertices))
nodes = np.arange(num_vertices)
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in enumerate(group_assignment):
list_of_groups[val].append(nodes[idx])
print(list_of_groups)
size_all_pairs = {}
for k in range(0, num_roles):
for g in range(k, num_roles):
U=list_of_groups[k]
W=list_of_groups[g]
if k == g:
size_all_pairs[k,g] = math.comb(len(U), 2)
if k != g:
size_all_pairs[k,g] = len(U)*len(W)
lamda_arr = np.ones((num_roles, num_roles,num_segments) , dtype=float)
lamda_arr = 1e-1* np.random.randint(1,9, size=(num_roles, num_roles,num_segments))
#set value for each iteration ( 0 - 8 )
_itr = 8
_itr = 0
yu = (9-_itr)*.1
lamda_arr[0,0]=[yu, 0.1]
lamda_arr[0,1]= [0.1, yu]
lamda_arr[1,0]=lamda_arr[0,1]
lamda_arr[1,1]=[yu, yu]
lamda_arr_act = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int)
df_all= None
points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES))
list1 = []
# Generate piecewise non-homogeneous poisson process
for k in range(0, num_roles):
for g in range(k, num_roles):
comb = []
if k == g:
comb = list(combinations(list_of_groups[k], 2))
# print(type(comb))
else:
# comb = []
key_data = [list_of_groups[k],list_of_groups[g],]
comb = list(itertools.product(*key_data))
# print(comb)
if len(comb) != size_all_pairs[k,g]:
print('not equal..')
change_points_arr[k,g,:] = points
lamda_arr[k,g,:] = lamda_arr[g,k,:]
tot_count = np.zeros((num_segments) , dtype=float)
for pair in comb:
for d in range(0,num_segments):
s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES)
# print(np.count_nonzero(s))
tot_count[d] += np.count_nonzero(s)
list1=[i for i, e in enumerate(s) if e != 0]
if len(list1) == 0:
print('zero')
list1 = [x+points[d] for x in list1]
df = pd.DataFrame(data=list1)
df.columns =['timestamp']
N= df.size
list_start_stations =[pair[0]] * N
list_end_stations =[pair[1]] * N
df['source'] = list_start_stations
df['target'] = list_end_stations
df_all=pd.concat([df_all, df], ignore_index=True)
for d in range(0,num_segments):
lamda_arr_act[k,g,d] = tot_count[d]/(NO_SAMPLES*len(comb))
# print(tot_count[d])
## Other preparations
# Remove self loops
df_all = df_all[((df_all['source'] ) != (df_all['target']))]
#sort
df_all=df_all.sort_values('timestamp')
df_all = df_all[['target', 'timestamp','source']]
# Save as .csv file
# df_all.to_csv('./Data/synthetic_ground_truth_g1.csv')
df=df_all
dest_folder='./Results/synthetic/3'
t_df = df['timestamp']
nodes_arr = np.union1d(df['target'],df['source']).astype(int)
# list of nodes
nodes = nodes_arr.tolist()
num_vertices = len(nodes)
# node-group dictionary
group_dic = {}
keys = nodes
values = list(group_assignment)
group_dic = dict(zip(keys,values))
# create a new dictionary - key: node-pair , value: list of timestamps
dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict()
print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr))
liklihood_sum = sbm_core.compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic)
print(' Initial Actual likelihood .......%f'%liklihood_sum)
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
# Experiment
import experiment
# User parameters
num_roles=2
num_segments=2
num_levels=2# Optional arg
algo_ver=3
dest_folder='./Results/synthetic/'
# tuning parameters
theta = 0
eta = 1
tuning_params= {'theta':theta,'eta':eta}
import time
start_time = time.time()
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
[itr_d,likelihood_d,group_dic_d,lambda_estimates_d,change_points_arr_d] = exp_obj.execute()
print("--- %s seconds ---" % (time.time() - start_time))
t_df = sorted(t_df)
chg_points = change_points_arr_d[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
dis_arr = list()
gt_arr = list()
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
dis_arr.append(d)
chg_points = change_points_arr[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
gt_arr.append(d)
ind = adjusted_rand_score(gt_arr,dis_arr)
print('rand index: seg {} : {}'.format(_itr, ind))
g1= group_dic_d
g2= group_dic_d[1]
ds= list(group_dic_d.values() )
gt1 = list(g1.values())
ind_grp=adjusted_rand_score(ds,gt1)
print('rand index: group {} : {}'.format(_itr, ind_grp))
# likelihood for single group and single segment # Normlaized likelihood
# num_roles=1
# num_segments=1
# num_levels=1
# exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
# exp_obj.execute()
# 0.9757628970136779
# 0.9766007299178403
# 0.976928378298833
# 0.9768191326709813
# 0.9786828903097778
# 0.9762078453564311
# 0.9731767168042805
# 0.9753286095154328
# 0.5773249110735138
# 3
# 3
# 2
# 3
# 3
# 3
# 3
# 4
# 4
"""
Effect of lambda: LD
Dataset-1
"""
import numpy as np
import pandas as pd
import utils
import sbm_core
import math
from itertools import combinations
import itertools
from sklearn.metrics.cluster import adjusted_rand_score
# Initilaize
np.random.seed(155)
res = np.zeros((9,5) , dtype=float)
for _itr in range(8,9):
num_roles=2
num_vertices=20
num_segments = 4
num_levels = 2
NO_SAMPLES= 200
nodes = np.arange(num_vertices)
lamda_arr_act = np.zeros((num_roles, num_roles,num_levels) , dtype=float)
H =num_levels
print('k-h levels %d'%(num_levels))
# h-level lambda estimates
lambda_estimates_h = np.random.rand(num_roles, num_roles, H)
lambda_estimates_h = 1e-2*np.random.randint(11,99, size=(num_roles, num_roles, H))
# Make high variant lambdas
yu = (9-_itr)*.1
lambda_estimates_h[0,0,:] = [yu, 0.1]
lambda_estimates_h[0,1,:] = [0.1, yu]
lambda_estimates_h[1,0,:] = lambda_estimates_h[0,1,:]
lambda_estimates_h[1,1,:] = [yu, yu]
l1 =list(range(0, H))
l2 = []
if num_segments > num_levels:
l2 = [np.random.randint(0,H) for i in range(num_segments-H)]
# Mapping from segment to a level
g_mapping= np.array(l1 + l2)
print('g mapping {}'.format(g_mapping))
# initilaize group assignment randomly
group_assignment_arr= np.random.randint(num_roles, size=(num_levels,num_vertices))
# node-group dictionary
group_dic = {}
for i in range(0,num_levels ):
level = i
group_dic_level = {}
keys = nodes
values = list(group_assignment_arr[level])
group_dic_level = dict(zip(keys,values))
group_dic[i] = group_dic_level
print('initial')
# print(group_dic)
for e_h in range(0,num_segments):
g_a = group_dic[g_mapping[e_h]]
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in g_a.items():
list_of_groups[val].append(idx)
print('group assignments {}: {}'.format(e_h,list_of_groups))
# Plotting
#Initialize lamda
lamda_arr = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
for d in range(0, num_segments):
for k in range(0, num_roles):
for g in range(k, num_roles):
lamda_arr[k,g, d]= lambda_estimates_h[k,g,g_mapping[d]]
lamda_arr[g,k, d]= lamda_arr[k,g, d]
change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int)
df_all= None
points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES))
list1 = []
level_seg_mapping = {}
for d in range(num_segments):
level = g_mapping[d]
if level in level_seg_mapping:
level_seg_mapping[level].append(d)
else:
level_seg_mapping[level] = []
level_seg_mapping[level].append(d)
# %%
# Generate piecewise non-homogeneous poisson process
tot_count = np.zeros((num_levels) , dtype=float)
com_len = np.zeros((num_levels) , dtype=float)
# for pair in comb:
for i in range(0,num_levels):
# i = g_mapping[d]
group_assignment = group_assignment_arr[i]
print(group_assignment)
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in enumerate(group_assignment):
list_of_groups[val].append(nodes[idx])
# print(list_of_groups)
size_all_pairs = {}
for kk in range(0, num_roles):
for gg in range(kk, num_roles):
U=list_of_groups[kk]
W=list_of_groups[gg]
if kk == gg:
size_all_pairs[kk,gg] = math.comb(len(U), 2)
if kk != gg:
size_all_pairs[kk,gg] = len(U)*len(W)
for k in range(0, num_roles):
for g in range(k, num_roles):
change_points_arr[k,g,:] = points
lamda_arr[k,g,:] = lamda_arr[g,k,:]
comb = []
if k == g:
comb = list(combinations(list_of_groups[k], 2))
# print(type(comb))
else:
# comb = []
key_data = [list_of_groups[k],list_of_groups[g],]
comb = list(itertools.product(*key_data))
# print(comb)
if len(comb) != size_all_pairs[k,g]:
print('not equal..')
print('d val {}'.format( d))
com_len[i] = len(comb)
# print('comb len {}'.format( com_len[d]))
tot_count[i] = 0
for pair in comb:
s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES)
# print(np.count_nonzero(s))
tot_count[i] += np.count_nonzero(s)
list_org=[i for i, e in enumerate(s) if e != 0]
if len(list_org) == 0:
print('zero')
for d in level_seg_mapping[i]:
list1 = [x+points[d] for x in list_org]
df= None
df = pd.DataFrame(data=list1)
df.columns =['timestamp']
# print(list1)
# if max(list1) > 799:
# print('{} {}'.format(d, max(list1)))
N= df.size
# print(pair)
# print(pair[0])
list_start_stations =[pair[0]] * N
list_end_stations =[pair[1]] * N
df['source'] = list_start_stations
df['target'] = list_end_stations
df_all=pd.concat([df_all, df], ignore_index=True)
# for dd in level_seg_mapping:
# dd = d
lamda_arr_act[k,g,i] = round(((tot_count[i])/(NO_SAMPLES*com_len[i])),1)
lamda_arr_act[g,k,i] = lamda_arr_act[k,g,i]
# print('tot count')
# print(tot_count[dd])
# print(' {} {} {} {} : k g d :lamb'.format(k,g,d,lamda_arr_act[g,k,dd]))
print(' {} {} {} {} : k g d :lamb'.format(k,g,i,lamda_arr_act[g,k,i]))
# Remove self loops
df_all = df_all[((df_all['source'] ) != (df_all['target']))]
#sort
df_all=df_all.sort_values('timestamp')
df_all = df_all[['target', 'timestamp','source']]
# Save as .csv file
# df_all.to_csv('./Data/synthetic_ground_truth_g1.csv')
df= None
df=df_all
dest_folder='./Results/synthetic/3'
t_df = df['timestamp']
nodes_arr = np.union1d(df['target'],df['source']).astype(int)
# list of nodes
nodes = nodes_arr.tolist()
num_vertices = len(nodes)
# create a new dictionary - key: node-pair , value: list of timestamps
dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict()
print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr))
# liklihood_sum = sbm_core.mm_compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic,g_mapping)
# print(' Initial Actual likelihood .......%f'%liklihood_sum)
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
chg_points = change_points_arr[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
gt_arr = list()
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
gt_arr.append(d)
# Experiment
import experiment
# User parameters
# num_roles=2
# num_segments=10
# num_levels=5# Optional arg
algo_ver=4
dest_folder='./Results/synthetic/'
# tuning parameters
theta = 1e-7
eta = 1
tuning_params= {'theta':theta,'eta':eta}
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
# [likelihood_f,group_dic_f] = exp_obj.execute()
[it,ll1,group_dic_d,lambda_estimates,change_points_arr_d]= exp_obj.execute()
# SEGMENTATION ACCURACY
t_df = sorted(t_df)
chg_points = change_points_arr_d[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
dis_arr = list()
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
dis_arr.append(d)
gt_arr= np.array(gt_arr, dtype=np.float64)
dis_arr= np.array(dis_arr, dtype=np.float64)
ind_seg = adjusted_rand_score(gt_arr,dis_arr)
print('ind {} : {}'.format(_itr, ind_seg))
liklihood_sum = sbm_core.mm_compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic,g_mapping)
print(' Initial Actual likelihood .......%f'%liklihood_sum)
print('g mapping {}'.format(g_mapping))
for e_h in range(0,num_segments):
g_a = group_dic[g_mapping[e_h]]
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in g_a.items():
list_of_groups[val].append(idx)
print('group assignments {}: {}'.format(e_h,list_of_groups))
#group ass, of level 1
list_of_groups_1= [[] for _ in range(num_roles)]
#group ass, of level 2
list_of_groups_2= [[] for _ in range(num_roles)]
g1= group_dic_d[0]
g2= group_dic_d[1]
found_cont = 0
for i_h in range(0,num_levels):
# i_h level
grp = group_dic_d[i_h]
list_of_groups_d= [[] for _ in range(num_roles)]
for idx, val in grp.items():
list_of_groups_d[val].append(idx)
ds= list(group_dic_d[i_h].values() )
gt1 = list(g1.values())
gt2 = list(g2.values())
ind1=adjusted_rand_score(ds,gt1)
ind2=adjusted_rand_score(ds,gt2)
d_in = max(ind1,ind2)
found_cont += d_in
ind = found_cont/2
res[_itr][1] = ind
res[_itr][4] = ind_seg
print('end')
# 0.989349
# 0.9899235585218414
# 0.9887209171780673
# 0.9900141929986654
# 0.9900915114849232
# 0.9895393785077311
# 0.9890441642420313
# 0.5056343918828786
# 0.489279
# 2
# 3
# 2
# 3
# 3
# 3
# 4
# 3
# 3
"""
Affect of Lambda
Dataset-2
"""
import numpy as np
import pandas as pd
import utils
import sbm_core
import math
from itertools import combinations
import itertools
from sklearn.metrics.cluster import adjusted_rand_score
# Initilaize
np.random.seed(1137)
num_roles=2
num_vertices=25
num_segments = 2
NO_SAMPLES= 100
group_assignment= np.random.randint(num_roles, size=(num_vertices))
nodes = np.arange(num_vertices)
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in enumerate(group_assignment):
list_of_groups[val].append(nodes[idx])
print(list_of_groups)
size_all_pairs = {}
for k in range(0, num_roles):
for g in range(k, num_roles):
U=list_of_groups[k]
W=list_of_groups[g]
if k == g:
size_all_pairs[k,g] = math.comb(len(U), 2)
if k != g:
size_all_pairs[k,g] = len(U)*len(W)
lamda_arr = np.ones((num_roles, num_roles,num_segments) , dtype=float)
lamda_arr = 1e-1* np.random.randint(1,9, size=(num_roles, num_roles,num_segments))
#set value for each iteration ( 0 - 8 )
_itr = 8
_itr = 0
yu = (9-_itr)*.1
lamda_arr[0,0]=[yu, 0.1]
lamda_arr[0,1]= [0.1, yu]
lamda_arr[1,0]=lamda_arr[0,1]
lamda_arr[1,1]=[yu, yu]
lamda_arr_act = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int)
df_all= None
points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES))
list1 = []
# Generate piecewise non-homogeneous poisson process
for k in range(0, num_roles):
for g in range(k, num_roles):
comb = []
if k == g:
comb = list(combinations(list_of_groups[k], 2))
# print(type(comb))
else:
# comb = []
key_data = [list_of_groups[k],list_of_groups[g],]
comb = list(itertools.product(*key_data))
# print(comb)
if len(comb) != size_all_pairs[k,g]:
print('not equal..')
change_points_arr[k,g,:] = points
lamda_arr[k,g,:] = lamda_arr[g,k,:]
tot_count = np.zeros((num_segments) , dtype=float)
for pair in comb:
for d in range(0,num_segments):
s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES)
# print(np.count_nonzero(s))
tot_count[d] += np.count_nonzero(s)
list1=[i for i, e in enumerate(s) if e != 0]
if len(list1) == 0:
print('zero')
list1 = [x+points[d] for x in list1]
df = pd.DataFrame(data=list1)
df.columns =['timestamp']
N= df.size
list_start_stations =[pair[0]] * N
list_end_stations =[pair[1]] * N
df['source'] = list_start_stations
df['target'] = list_end_stations
df_all=pd.concat([df_all, df], ignore_index=True)
for d in range(0,num_segments):
lamda_arr_act[k,g,d] = tot_count[d]/(NO_SAMPLES*len(comb))
# print(tot_count[d])
## Other preparations
# Remove self loops
df_all = df_all[((df_all['source'] ) != (df_all['target']))]
#sort
df_all=df_all.sort_values('timestamp')
df_all = df_all[['target', 'timestamp','source']]
# Save as .csv file
# df_all.to_csv('./Data/synthetic_ground_truth_g1.csv')
df=df_all
dest_folder='./Results/synthetic/3'
t_df = df['timestamp']
nodes_arr = np.union1d(df['target'],df['source']).astype(int)
# list of nodes
nodes = nodes_arr.tolist()
num_vertices = len(nodes)
# node-group dictionary
group_dic = {}
keys = nodes
values = list(group_assignment)
group_dic = dict(zip(keys,values))
# create a new dictionary - key: node-pair , value: list of timestamps
dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict()
print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr))
liklihood_sum = sbm_core.compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic)
print(' Initial Actual likelihood .......%f'%liklihood_sum)
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
# Experiment
import experiment
# User parameters
num_roles=2
num_segments=2
num_levels=2# Optional arg
algo_ver=3
dest_folder='./Results/synthetic/'
# tuning parameters
theta = 0
eta = 1
tuning_params= {'theta':theta,'eta':eta}
import time
start_time = time.time()
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
[itr_d,likelihood_d,group_dic_d,lambda_estimates_d,change_points_arr_d] = exp_obj.execute()
print("--- %s seconds ---" % (time.time() - start_time))
t_df = sorted(t_df)
chg_points = change_points_arr_d[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
dis_arr = list()
gt_arr = list()
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
dis_arr.append(d)
chg_points = change_points_arr[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
gt_arr.append(d)
ind = adjusted_rand_score(gt_arr,dis_arr)
print('rand index: seg {} : {}'.format(_itr, ind))
g1= group_dic_d
g2= group_dic_d[1]
ds= list(group_dic_d.values() )
gt1 = list(g1.values())
ind_grp=adjusted_rand_score(ds,gt1)
print('rand index: group {} : {}'.format(_itr, ind_grp))
# 0.9785444674036701
# 0.9791525131372905
# 0.981440657362889
# 0.9780947193990287
# 0.9785576050121263
# 0.9768656988977588
# 0.9794087578274921
# 0.9785467310928326
# 0.8326828222297133
# 3
# 3
# 3
# 3
# 3
# 3
# 5
# 5
# 5
"""
Effect of lambda: LD
Dataset-2
"""
import numpy as np
import pandas as pd
import utils
import sbm_core
import math
from itertools import combinations
import itertools
from sklearn.metrics.cluster import adjusted_rand_score
# Initilaize
np.random.seed(3457325)
res = np.zeros((9,5) , dtype=float)
for _itr in range(0,1):
num_roles=2
num_vertices=20
num_segments = 4
num_levels = 2
NO_SAMPLES= 200
nodes = np.arange(num_vertices)
lamda_arr_act = np.zeros((num_roles, num_roles,num_levels) , dtype=float)
H =num_levels
print('k-h levels %d'%(num_levels))
# h-level lambda estimates
lambda_estimates_h = np.random.rand(num_roles, num_roles, H)
lambda_estimates_h = 1e-2*np.random.randint(11,99, size=(num_roles, num_roles, H))
# Make high variant lambdas
yu = (9-_itr)*.1
lambda_estimates_h[0,0,:] = [yu, 0.1]
lambda_estimates_h[0,1,:] = [0.1, yu]
lambda_estimates_h[1,0,:] = lambda_estimates_h[0,1,:]
lambda_estimates_h[1,1,:] = [yu, yu]
l1 =list(range(0, H))
l2 = []
if num_segments > num_levels:
l2 = [np.random.randint(0,H) for i in range(num_segments-H)]
# Mapping from segment to a level
g_mapping= np.array(l1 + l2)
print('g mapping {}'.format(g_mapping))
# initilaize group assignment randomly
group_assignment_arr= np.random.randint(num_roles, size=(num_levels,num_vertices))
# node-group dictionary
group_dic = {}
for i in range(0,num_levels ):
level = i
group_dic_level = {}
keys = nodes
values = list(group_assignment_arr[level])
group_dic_level = dict(zip(keys,values))
group_dic[i] = group_dic_level
print('initial')
# print(group_dic)
for e_h in range(0,num_segments):
g_a = group_dic[g_mapping[e_h]]
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in g_a.items():
list_of_groups[val].append(idx)
print('group assignments {}: {}'.format(e_h,list_of_groups))
# Plotting
#Initialize lamda
lamda_arr = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
for d in range(0, num_segments):
for k in range(0, num_roles):
for g in range(k, num_roles):
lamda_arr[k,g, d]= lambda_estimates_h[k,g,g_mapping[d]]
lamda_arr[g,k, d]= lamda_arr[k,g, d]
change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int)
df_all= None
points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES))
list1 = []
level_seg_mapping = {}
for d in range(num_segments):
level = g_mapping[d]
if level in level_seg_mapping:
level_seg_mapping[level].append(d)
else:
level_seg_mapping[level] = []
level_seg_mapping[level].append(d)
# %%
# Generate piecewise non-homogeneous poisson process
tot_count = np.zeros((num_levels) , dtype=float)
com_len = np.zeros((num_levels) , dtype=float)
# for pair in comb:
for i in range(0,num_levels):
# i = g_mapping[d]
group_assignment = group_assignment_arr[i]
print(group_assignment)
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in enumerate(group_assignment):
list_of_groups[val].append(nodes[idx])
# print(list_of_groups)
size_all_pairs = {}
for kk in range(0, num_roles):
for gg in range(kk, num_roles):
U=list_of_groups[kk]
W=list_of_groups[gg]
if kk == gg:
size_all_pairs[kk,gg] = math.comb(len(U), 2)
if kk != gg:
size_all_pairs[kk,gg] = len(U)*len(W)
for k in range(0, num_roles):
for g in range(k, num_roles):
change_points_arr[k,g,:] = points
lamda_arr[k,g,:] = lamda_arr[g,k,:]
comb = []
if k == g:
comb = list(combinations(list_of_groups[k], 2))
# print(type(comb))
else:
# comb = []
key_data = [list_of_groups[k],list_of_groups[g],]
comb = list(itertools.product(*key_data))
# print(comb)
if len(comb) != size_all_pairs[k,g]:
print('not equal..')
print('d val {}'.format( d))
com_len[i] = len(comb)
# print('comb len {}'.format( com_len[d]))
tot_count[i] = 0
for pair in comb:
s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES)
# print(np.count_nonzero(s))
tot_count[i] += np.count_nonzero(s)
list_org=[i for i, e in enumerate(s) if e != 0]
if len(list_org) == 0:
print('zero')
for d in level_seg_mapping[i]:
list1 = [x+points[d] for x in list_org]
df= None
df = pd.DataFrame(data=list1)
df.columns =['timestamp']
N= df.size
list_start_stations =[pair[0]] * N
list_end_stations =[pair[1]] * N
df['source'] = list_start_stations
df['target'] = list_end_stations
df_all=pd.concat([df_all, df], ignore_index=True)
# for dd in level_seg_mapping:
# dd = d
lamda_arr_act[k,g,i] = round(((tot_count[i])/(NO_SAMPLES*com_len[i])),1)
lamda_arr_act[g,k,i] = lamda_arr_act[k,g,i]
# print('tot count')
# print(tot_count[dd])
# print(' {} {} {} {} : k g d :lamb'.format(k,g,d,lamda_arr_act[g,k,dd]))
print(' {} {} {} {} : k g d :lamb'.format(k,g,i,lamda_arr_act[g,k,i]))
# Remove self loops
df_all = df_all[((df_all['source'] ) != (df_all['target']))]
#sort
df_all=df_all.sort_values('timestamp')
df_all = df_all[['target', 'timestamp','source']]
# Save as .csv file
# df_all.to_csv('./Data/synthetic_ground_truth_g1.csv')
df= None
df=df_all
dest_folder='./Results/synthetic/3'
t_df = df['timestamp']
nodes_arr = np.union1d(df['target'],df['source']).astype(int)
# list of nodes
nodes = nodes_arr.tolist()
num_vertices = len(nodes)
# create a new dictionary - key: node-pair , value: list of timestamps
dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict()
print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr))
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
chg_points = change_points_arr[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
gt_arr = list()
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
gt_arr.append(d)
# Experiment
import experiment
# User parameters
algo_ver=4
dest_folder='./Results/synthetic/'
# tuning parameters
theta = 1e-7
eta = 1
tuning_params= {'theta':theta,'eta':eta}
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
# [likelihood_f,group_dic_f] = exp_obj.execute()
[it,ll1,group_dic_d,lambda_estimates,change_points_arr_d]= exp_obj.execute()
# SEGMENTATION ACCURACY
t_df = sorted(t_df)
chg_points = change_points_arr_d[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
dis_arr = list()
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
dis_arr.append(d)
gt_arr= np.array(gt_arr, dtype=np.float64)
dis_arr= np.array(dis_arr, dtype=np.float64)
ind_seg = adjusted_rand_score(gt_arr,dis_arr)
print('ind {} : {}'.format(_itr, ind_seg))
liklihood_sum = sbm_core.mm_compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic,g_mapping)
print(' Initial Actual likelihood .......%f'%liklihood_sum)
print('g mapping {}'.format(g_mapping))
for e_h in range(0,num_segments):
g_a = group_dic[g_mapping[e_h]]
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in g_a.items():
list_of_groups[val].append(idx)
print('group assignments {}: {}'.format(e_h,list_of_groups))
#group ass, of level 1
list_of_groups_1= [[] for _ in range(num_roles)]
#group ass, of level 2
list_of_groups_2= [[] for _ in range(num_roles)]
g1= group_dic_d[0]
g2= group_dic_d[1]
found_cont = 0
for i_h in range(0,num_levels):
# i_h level
grp = group_dic_d[i_h]
list_of_groups_d= [[] for _ in range(num_roles)]
for idx, val in grp.items():
list_of_groups_d[val].append(idx)
ds= list(group_dic_d[i_h].values() )
gt1 = list(g1.values())
gt2 = list(g2.values())
ind1=adjusted_rand_score(ds,gt1)
ind2=adjusted_rand_score(ds,gt2)
d_in = max(ind1,ind2)
found_cont += d_in
ind = found_cont/2
res[_itr][1] = ind
res[_itr][4] = ind_seg
print('end')
# 0.9898951352373943
# 0.9904822820772498
# 0.9894069501702982
# 0.9892811884102554
# 0.9893223431465236
# 0.9886669698061425
# 0.4041524218474172
# 0.4968349779236352
# 0.49583738728791915
# 3
# 3
# 3
# 6
# 4
# 3
# 5
# 4
# 3
......@@ -155,7 +155,7 @@ class Optimize:
_prev_val = _curr_val
_curr_val = liklihood_sum
_itr+=1
return [self.group_assignment,self.lambda_estimates,self.change_points_arr,liklihood_sum]
return [self.group_assignment,self.lambda_estimates,self.change_points_arr,liklihood_sum,_itr]
def com_h_lvl_lambda_group(self):
......@@ -218,23 +218,23 @@ class Optimize:
print("iteration no........... %d " %(_itr+1))
self.group_assignment=sbm_core.mm_group_assignment_ver2_2(self.nodes,self.num_roles,self.num_segments,self.lambda_estimates,self.group_assignment,self.change_points_arr,self.df,self.g_mapping)
print('after grouping')
liklihood_sum = sbm_core.mm_compute_cost(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df,self.g_mapping)
# print('after grouping')
# liklihood_sum = sbm_core.mm_compute_cost(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df,self.g_mapping)
self.lambda_estimates=sbm_core.mm_estimate_lamda_kh(self.num_roles,self.num_segments,self.lambda_estimates,self.group_assignment,self.change_points_arr,self.df,self.g_mapping,self.tuning_params)
print('after lambda estimate')
liklihood_sum = sbm_core.mm_compute_cost(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df,self.g_mapping)
# print('after lambda estimate')
# liklihood_sum = sbm_core.mm_compute_cost(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df,self.g_mapping)
print('after seg')
# self.change_points_arr = sbm_core.dyn_prog_seg(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df)
[self.change_points_arr,self.g_mapping,self.group_assignment]=sbm_core.mm_linear_seg_ver_2(self.num_roles,self.num_segments,self.group_assignment,self.lambda_estimates,self.change_points_arr,self.df,self.g_mapping)
liklihood_sum = sbm_core.mm_compute_cost(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df,self.g_mapping)
# liklihood_sum = sbm_core.mm_compute_cost(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df,self.g_mapping)
self.lambda_estimates=sbm_core.mm_estimate_lamda_kh(self.num_roles,self.num_segments,self.lambda_estimates,self.group_assignment,self.change_points_arr,self.df,self.g_mapping,self.tuning_params)
print('after lambda estimate')
# print('after lambda estimate')
liklihood_sum = sbm_core.mm_compute_cost(self.group_assignment,self.lambda_estimates,self.change_points_arr,self.num_roles,self.num_segments,self.df,self.g_mapping)
print(' %d %f'%(_itr+1,liklihood_sum))
_prev_val = _curr_val
_curr_val = liklihood_sum
_itr+=1
return [self.group_assignment,self.lambda_estimates,self.change_points_arr,liklihood_sum,self.g_mapping]
\ No newline at end of file
return [self.group_assignment,self.lambda_estimates,self.change_points_arr,liklihood_sum,self.g_mapping,_itr]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment