""" Effect of lambda: LD Dataset-1 """ import numpy as np import pandas as pd import utils import sbm_core import math from itertools import combinations import itertools from sklearn.metrics.cluster import adjusted_rand_score # Initilaize np.random.seed(155) res = np.zeros((9,5) , dtype=float) for _itr in range(8,9): num_roles=2 num_vertices=20 num_segments = 4 num_levels = 2 NO_SAMPLES= 200 nodes = np.arange(num_vertices) lamda_arr_act = np.zeros((num_roles, num_roles,num_levels) , dtype=float) H =num_levels print('k-h levels %d'%(num_levels)) # h-level lambda estimates lambda_estimates_h = np.random.rand(num_roles, num_roles, H) lambda_estimates_h = 1e-2*np.random.randint(11,99, size=(num_roles, num_roles, H)) # Make high variant lambdas yu = (9-_itr)*.1 lambda_estimates_h[0,0,:] = [yu, 0.1] lambda_estimates_h[0,1,:] = [0.1, yu] lambda_estimates_h[1,0,:] = lambda_estimates_h[0,1,:] lambda_estimates_h[1,1,:] = [yu, yu] l1 =list(range(0, H)) l2 = [] if num_segments > num_levels: l2 = [np.random.randint(0,H) for i in range(num_segments-H)] # Mapping from segment to a level g_mapping= np.array(l1 + l2) print('g mapping {}'.format(g_mapping)) # initilaize group assignment randomly group_assignment_arr= np.random.randint(num_roles, size=(num_levels,num_vertices)) # node-group dictionary group_dic = {} for i in range(0,num_levels ): level = i group_dic_level = {} keys = nodes values = list(group_assignment_arr[level]) group_dic_level = dict(zip(keys,values)) group_dic[i] = group_dic_level print('initial') # print(group_dic) for e_h in range(0,num_segments): g_a = group_dic[g_mapping[e_h]] list_of_groups= [[] for _ in range(num_roles)] for idx, val in g_a.items(): list_of_groups[val].append(idx) print('group assignments {}: {}'.format(e_h,list_of_groups)) # Plotting #Initialize lamda lamda_arr = np.zeros((num_roles, num_roles,num_segments) , dtype=float) for d in range(0, num_segments): for k in range(0, num_roles): for g in range(k, num_roles): lamda_arr[k,g, d]= lambda_estimates_h[k,g,g_mapping[d]] lamda_arr[g,k, d]= lamda_arr[k,g, d] change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int) df_all= None points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES)) list1 = [] level_seg_mapping = {} for d in range(num_segments): level = g_mapping[d] if level in level_seg_mapping: level_seg_mapping[level].append(d) else: level_seg_mapping[level] = [] level_seg_mapping[level].append(d) # %% # Generate piecewise non-homogeneous poisson process tot_count = np.zeros((num_levels) , dtype=float) com_len = np.zeros((num_levels) , dtype=float) # for pair in comb: for i in range(0,num_levels): # i = g_mapping[d] group_assignment = group_assignment_arr[i] print(group_assignment) list_of_groups= [[] for _ in range(num_roles)] for idx, val in enumerate(group_assignment): list_of_groups[val].append(nodes[idx]) # print(list_of_groups) size_all_pairs = {} for kk in range(0, num_roles): for gg in range(kk, num_roles): U=list_of_groups[kk] W=list_of_groups[gg] if kk == gg: size_all_pairs[kk,gg] = math.comb(len(U), 2) if kk != gg: size_all_pairs[kk,gg] = len(U)*len(W) for k in range(0, num_roles): for g in range(k, num_roles): change_points_arr[k,g,:] = points lamda_arr[k,g,:] = lamda_arr[g,k,:] comb = [] if k == g: comb = list(combinations(list_of_groups[k], 2)) # print(type(comb)) else: # comb = [] key_data = [list_of_groups[k],list_of_groups[g],] comb = list(itertools.product(*key_data)) # print(comb) if len(comb) != size_all_pairs[k,g]: print('not equal..') print('d val {}'.format( d)) com_len[i] = len(comb) # print('comb len {}'.format( com_len[d])) tot_count[i] = 0 for pair in comb: s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES) # print(np.count_nonzero(s)) tot_count[i] += np.count_nonzero(s) list_org=[i for i, e in enumerate(s) if e != 0] if len(list_org) == 0: print('zero') for d in level_seg_mapping[i]: list1 = [x+points[d] for x in list_org] df= None df = pd.DataFrame(data=list1) df.columns =['timestamp'] # print(list1) # if max(list1) > 799: # print('{} {}'.format(d, max(list1))) N= df.size # print(pair) # print(pair[0]) list_start_stations =[pair[0]] * N list_end_stations =[pair[1]] * N df['source'] = list_start_stations df['target'] = list_end_stations df_all=pd.concat([df_all, df], ignore_index=True) # for dd in level_seg_mapping: # dd = d lamda_arr_act[k,g,i] = round(((tot_count[i])/(NO_SAMPLES*com_len[i])),1) lamda_arr_act[g,k,i] = lamda_arr_act[k,g,i] # print('tot count') # print(tot_count[dd]) # print(' {} {} {} {} : k g d :lamb'.format(k,g,d,lamda_arr_act[g,k,dd])) print(' {} {} {} {} : k g d :lamb'.format(k,g,i,lamda_arr_act[g,k,i])) # Remove self loops df_all = df_all[((df_all['source'] ) != (df_all['target']))] #sort df_all=df_all.sort_values('timestamp') df_all = df_all[['target', 'timestamp','source']] # Save as .csv file # df_all.to_csv('./Data/synthetic_ground_truth_g1.csv') df= None df=df_all dest_folder='./Results/synthetic/3' t_df = df['timestamp'] nodes_arr = np.union1d(df['target'],df['source']).astype(int) # list of nodes nodes = nodes_arr.tolist() num_vertices = len(nodes) # create a new dictionary - key: node-pair , value: list of timestamps dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict() print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr)) # liklihood_sum = sbm_core.mm_compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic,g_mapping) # print(' Initial Actual likelihood .......%f'%liklihood_sum) def _swap (row): if row['source'] > row['target']: row['source'] , row['target'] =row['target'] , row['source'] return row # Undirected graph df=df.apply(lambda row: _swap(row), axis=1) #scale timestamps for zeroth reference point refValue = df['timestamp'].min() df['timestamp'] -= refValue chg_points = change_points_arr[0,0,:] ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)] ranges_arr[0][0] = 0 list_time_stamps = list(t_df) # iterate over timestamps list gt_arr = list() for item in list_time_stamps: # find the segment which the timestamp belongs # (is dependent on which groups the two nodes belong) d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item)) gt_arr.append(d) # Experiment import experiment # User parameters # num_roles=2 # num_segments=10 # num_levels=5# Optional arg algo_ver=4 dest_folder='./Results/synthetic/' # tuning parameters theta = 1e-7 eta = 1 tuning_params= {'theta':theta,'eta':eta} exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue) # [likelihood_f,group_dic_f] = exp_obj.execute() [it,ll1,group_dic_d,lambda_estimates,change_points_arr_d]= exp_obj.execute() # SEGMENTATION ACCURACY t_df = sorted(t_df) chg_points = change_points_arr_d[0,0,:] ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)] ranges_arr[0][0] = 0 list_time_stamps = list(t_df) # iterate over timestamps list dis_arr = list() for item in list_time_stamps: # find the segment which the timestamp belongs # (is dependent on which groups the two nodes belong) d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item)) dis_arr.append(d) gt_arr= np.array(gt_arr, dtype=np.float64) dis_arr= np.array(dis_arr, dtype=np.float64) ind_seg = adjusted_rand_score(gt_arr,dis_arr) print('ind {} : {}'.format(_itr, ind_seg)) liklihood_sum = sbm_core.mm_compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic,g_mapping) print(' Initial Actual likelihood .......%f'%liklihood_sum) print('g mapping {}'.format(g_mapping)) for e_h in range(0,num_segments): g_a = group_dic[g_mapping[e_h]] list_of_groups= [[] for _ in range(num_roles)] for idx, val in g_a.items(): list_of_groups[val].append(idx) print('group assignments {}: {}'.format(e_h,list_of_groups)) #group ass, of level 1 list_of_groups_1= [[] for _ in range(num_roles)] #group ass, of level 2 list_of_groups_2= [[] for _ in range(num_roles)] g1= group_dic_d[0] g2= group_dic_d[1] found_cont = 0 for i_h in range(0,num_levels): # i_h level grp = group_dic_d[i_h] list_of_groups_d= [[] for _ in range(num_roles)] for idx, val in grp.items(): list_of_groups_d[val].append(idx) ds= list(group_dic_d[i_h].values() ) gt1 = list(g1.values()) gt2 = list(g2.values()) ind1=adjusted_rand_score(ds,gt1) ind2=adjusted_rand_score(ds,gt2) d_in = max(ind1,ind2) found_cont += d_in ind = found_cont/2 res[_itr][1] = ind res[_itr][4] = ind_seg print('end') # 0.989349 # 0.9899235585218414 # 0.9887209171780673 # 0.9900141929986654 # 0.9900915114849232 # 0.9895393785077311 # 0.9890441642420313 # 0.5056343918828786 # 0.489279 # 2 # 3 # 2 # 3 # 3 # 3 # 4 # 3 # 3