Newer
Older
"""
Effect of lambda: LD
Dataset-2
"""
import numpy as np
import pandas as pd
import utils
import sbm_core
import math
from itertools import combinations
from sklearn.metrics.cluster import adjusted_rand_score
# Initilaize
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
num_roles=2
num_vertices=20
num_segments = 4
num_levels = 2
NO_SAMPLES= 1850
nodes = np.arange(num_vertices)
lamda_arr_act = np.zeros((num_roles, num_roles,num_levels) , dtype=float)
H =num_levels
# h-level lambda estimates
lambda_estimates_h = np.random.rand(num_roles, num_roles, H)
# set value for each delta ( 0.01 - 1)
yu = 8*.1
lambda_estimates_h[0,0,:] = [yu, 0.01]
lambda_estimates_h[0,1,:] = [0.01, yu]
lambda_estimates_h[1,0,:] = lambda_estimates_h[0,1,:]
lambda_estimates_h[1,1,:] = [yu, yu]
l1 =list(range(0, H))
l2 = []
if num_segments > num_levels:
l2 = [np.random.randint(0,H) for i in range(num_segments-H)]
# Mapping from segment to a level
g_mapping= np.array(l1 + l2)
# print('g mapping {}'.format(g_mapping))
# initilaize group assignment randomly
group_assignment_arr= np.random.randint(num_roles, size=(num_levels,num_vertices))
# node-group dictionary
group_dic = {}
for i in range(0,num_levels ):
level = i
group_dic_level = {}
keys = nodes
values = list(group_assignment_arr[level])
group_dic_level = dict(zip(keys,values))
group_dic[i] = group_dic_level
for e_h in range(0,num_segments):
g_a = group_dic[g_mapping[e_h]]
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in g_a.items():
list_of_groups[val].append(idx)
# print('group assignments {}: {}'.format(e_h,list_of_groups))
#Initialize lamda
lamda_arr = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
for d in range(0, num_segments):
for k in range(0, num_roles):
for g in range(k, num_roles):
lamda_arr[k,g, d]= lambda_estimates_h[k,g,g_mapping[d]]
lamda_arr[g,k, d]= lamda_arr[k,g, d]
change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int)
df_all= None
points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES))
list1 = []
level_seg_mapping = {}
for d in range(num_segments):
level = g_mapping[d]
if level in level_seg_mapping:
level_seg_mapping[level].append(d)
else:
level_seg_mapping[level] = []
level_seg_mapping[level].append(d)
# %% 20 iteartions
for itr_no in range(0,20):
# Generate piecewise non-homogeneous poisson process
tot_count = np.zeros((num_levels) , dtype=float)
com_len = np.zeros((num_levels) , dtype=float)
# for pair in comb:
for i in range(0,num_levels):
# i = g_mapping[d]
group_assignment = group_assignment_arr[i]
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in enumerate(group_assignment):
list_of_groups[val].append(nodes[idx])
for kk in range(0, num_roles):
for gg in range(kk, num_roles):
U=list_of_groups[kk]
W=list_of_groups[gg]
if kk == gg:
size_all_pairs[kk,gg] = math.comb(len(U), 2)
if kk != gg:
size_all_pairs[kk,gg] = len(U)*len(W)
for k in range(0, num_roles):
for g in range(k, num_roles):
change_points_arr[k,g,:] = points
lamda_arr[k,g,:] = lamda_arr[g,k,:]
# print(type(comb))
else:
# comb = []
key_data = [list_of_groups[k],list_of_groups[g],]
# print(comb)
if len(comb) != size_all_pairs[k,g]:
print('not equal..')
for pair in comb:
s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES)
# print(np.count_nonzero(s))
tot_count[i] += np.count_nonzero(s)
list_org=[i for i, e in enumerate(s) if e != 0]
if len(list_org) == 0:
print('zero')
list1 = [x+points[d] for x in list_org]
df= None
df = pd.DataFrame(data=list1)
df.columns =['timestamp']
N= df.size
list_end_stations =[pair[1]] * N
df['target'] = list_end_stations
df_all=pd.concat([df_all, df], ignore_index=True)
lamda_arr_act[k,g,i] = round(((tot_count[i])/(NO_SAMPLES*com_len[i])),3)
lamda_arr_act[g,k,i] = lamda_arr_act[k,g,i]
# print(' {} {} {} {} : k g d :lamb'.format(k,g,i,lamda_arr_act[g,k,i]))
df_all = df_all[((df_all['source'] ) != (df_all['target']))]
#sort
df_all=df_all.sort_values('timestamp')
df_all = df_all[['target', 'timestamp','source']]
# Save as .csv file
# df_all.to_csv('./Data/synthetic_ground_truth_g1.csv')
df= None
df=df_all
dest_folder='./Results/synthetic/3'
t_df = df['timestamp']
nodes_arr = np.union1d(df['target'],df['source']).astype(int)
# list of nodes
nodes = nodes_arr.tolist()
num_vertices = len(nodes)
# create a new dictionary - key: node-pair , value: list of timestamps
dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict()
# print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr))
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
chg_points = change_points_arr[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
gt_arr.append(d)
# Experiment
import experiment
# User parameters
algo_ver=4
dest_folder='./Results/synthetic/'
# tuning parameters
theta = 1e-7
eta = 1
tuning_params= {'theta':theta,'eta':eta}
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
# [likelihood_f,group_dic_f] = exp_obj.execute()
[it,ll1,group_dic_d,lambda_estimates,change_points_arr_d]= exp_obj.execute()
# SEGMENTATION ACCURACY
t_df = sorted(t_df)
chg_points = change_points_arr_d[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
for item in list_time_stamps:
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
dis_arr.append(d)
gt_arr= np.array(gt_arr, dtype=np.float64)
dis_arr= np.array(dis_arr, dtype=np.float64)
ind_seg = adjusted_rand_score(gt_arr,dis_arr)
# print('ind {} : {}'.format(_itr, ind_seg))
# print('g mapping {}'.format(g_mapping))
for e_h in range(0,num_segments):
g_a = group_dic[g_mapping[e_h]]
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in g_a.items():
list_of_groups[val].append(idx)
# print('group assignments {}: {}'.format(e_h,list_of_groups))
g1= group_dic_d[0]
g2= group_dic_d[1]
# print('rand index: group {} : {}'.format(_itr, ind_grp))
for i_h in range(0,num_levels):
# i_h level
grp = group_dic_d[i_h]
list_of_groups_d= [[] for _ in range(num_roles)]
for idx, val in grp.items():
list_of_groups_d[val].append(idx)
ds= list(group_dic_d[i_h].values() )
gt1 = list(g1.values())
ind1=adjusted_rand_score(ds,gt1)
d_in = max(ind1,ind2)
found_cont += d_in
results[itr_no][0] = ind_seg
results[itr_no][1] = it
results[itr_no][2] = ind
# print(ind_seg)
# print(lamda_arr)
# print(lamda_arr_act)
print('end')
import pickle
# pickle.dump(results, open('max-small-file-{}.pickle'.format(_itr), 'wb'))
arr = results
ll_avg_val = (sum(arr)/len(arr))
print(ll_avg_val)
print(max(arr[:,0]))
print(min(arr[:,0]))
print(max(arr[:,1]))
print(min(arr[:,1]))