Newer
Older
import numpy as np
import pandas as pd
import utils
import sbm_core
import math
from itertools import combinations
from sklearn.metrics.cluster import adjusted_rand_score
# Initilaize
num_roles=2
num_vertices=20
num_segments = 2
NO_SAMPLES= 1850
group_assignment= np.random.randint(num_roles, size=(num_vertices))
list_of_groups= [[] for _ in range(num_roles)]
for idx, val in enumerate(group_assignment):
list_of_groups[val].append(nodes[idx])
size_all_pairs = {}
for k in range(0, num_roles):
for g in range(k, num_roles):
U=list_of_groups[k]
W=list_of_groups[g]
if k == g:
size_all_pairs[k,g] = math.comb(len(U), 2)
if k != g:
size_all_pairs[k,g] = len(U)*len(W)
lamda_arr = np.ones((num_roles, num_roles,num_segments) , dtype=float)
lamda_arr[0,0]=[yu, 0.01]
lamda_arr[0,1]= [0.01, yu]
lamda_arr[1,0]=lamda_arr[0,1]
lamda_arr[1,1]=[yu, yu]
lamda_arr_act = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int)
df_all= None
points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES))
list1 = []
df_all= None
# Generate piecewise non-homogeneous poisson process
for k in range(0, num_roles):
for g in range(k, num_roles):
comb = []
if k == g:
# print(type(comb))
else:
# comb = []
key_data = [list_of_groups[k],list_of_groups[g],]
# print(comb)
if len(comb) != size_all_pairs[k,g]:
print('not equal..')
change_points_arr[k,g,:] = points
lamda_arr[k,g,:] = lamda_arr[g,k,:]
tot_count = np.zeros((num_segments) , dtype=float)
s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES)
# print(np.count_nonzero(s))
tot_count[d] += np.count_nonzero(s)
list1=[i for i, e in enumerate(s) if e != 0]
if len(list1) == 0:
print('zero')
list1 = [x+points[d] for x in list1]
list_end_stations =[pair[1]] * N
df['target'] = list_end_stations
df_all=pd.concat([df_all, df], ignore_index=True)
for d in range(0,num_segments):
lamda_arr_act[k,g,d] = tot_count[d]/(NO_SAMPLES*len(comb))
# print(tot_count[d])
## Other preparations
df_all = df_all[((df_all['source'] ) != (df_all['target']))]
#sort
df_all=df_all.sort_values('timestamp')
df_all = df_all[['target', 'timestamp','source']]
# Save as .csv file
# df_all.to_csv('./Data/synthetic_ground_truth_g1.csv')
df=df_all
dest_folder='./Results/synthetic/3'
t_df = df['timestamp']
nodes_arr = np.union1d(df['target'],df['source']).astype(int)
# list of nodes
nodes = nodes_arr.tolist()
num_vertices = len(nodes)
# node-group dictionary
group_dic = {}
keys = nodes
values = list(group_assignment)
group_dic = dict(zip(keys,values))
# create a new dictionary - key: node-pair , value: list of timestamps
dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict()
# print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr))
liklihood_sum = sbm_core.compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic)
# print(' Initial Actual likelihood .......%f'%liklihood_sum)
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
# User parameters
num_roles=2
algo_ver=3
dest_folder='./Results/synthetic/'
eta = 1
tuning_params= {'theta':theta,'eta':eta}
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)
[itr_d,likelihood_d,group_dic_d,lambda_estimates_d,change_points_arr_d] = exp_obj.execute()
chg_points = change_points_arr_d[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
dis_arr.append(d)
chg_points = change_points_arr[0,0,:]
ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
ranges_arr[0][0] = 0
list_time_stamps = list(t_df)
# iterate over timestamps list
# find the segment which the timestamp belongs
# (is dependent on which groups the two nodes belong)
d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item))
gt_arr.append(d)
ind = adjusted_rand_score(gt_arr,dis_arr)
# print('rand index: seg {} : {}'.format(_itr, ind))
g2= group_dic_d[1]
ds= list(group_dic_d.values() )
gt1 = list(g1.values())
ind_grp=adjusted_rand_score(ds,gt1)
# print('rand index: group {} : {}'.format(_itr, ind_grp))
results[itr_no][0] = ind
results[itr_no][1] = itr_d
results[itr_no][2] = ind_grp
arr = results
ll_avg_val = (sum(arr)/len(arr))
print(ll_avg_val)
print(min(arr[:,0]))
print(max(arr[:,1]))