""" Affect of Lambda Dataset-2 """ import numpy as np import pandas as pd import utils import sbm_core import math from itertools import combinations import itertools from sklearn.metrics.cluster import adjusted_rand_score # Initilaize np.random.seed(1137) num_roles=2 num_vertices=25 num_segments = 2 NO_SAMPLES= 100 group_assignment= np.random.randint(num_roles, size=(num_vertices)) nodes = np.arange(num_vertices) list_of_groups= [[] for _ in range(num_roles)] for idx, val in enumerate(group_assignment): list_of_groups[val].append(nodes[idx]) print(list_of_groups) size_all_pairs = {} for k in range(0, num_roles): for g in range(k, num_roles): U=list_of_groups[k] W=list_of_groups[g] if k == g: size_all_pairs[k,g] = math.comb(len(U), 2) if k != g: size_all_pairs[k,g] = len(U)*len(W) lamda_arr = np.ones((num_roles, num_roles,num_segments) , dtype=float) lamda_arr = 1e-1* np.random.randint(1,9, size=(num_roles, num_roles,num_segments)) #set value for each iteration ( 0 - 8 ) _itr = 8 _itr = 0 yu = (9-_itr)*.1 lamda_arr[0,0]=[yu, 0.1] lamda_arr[0,1]= [0.1, yu] lamda_arr[1,0]=lamda_arr[0,1] lamda_arr[1,1]=[yu, yu] lamda_arr_act = np.zeros((num_roles, num_roles,num_segments) , dtype=float) change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int) df_all= None points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES)) list1 = [] # Generate piecewise non-homogeneous poisson process for k in range(0, num_roles): for g in range(k, num_roles): comb = [] if k == g: comb = list(combinations(list_of_groups[k], 2)) # print(type(comb)) else: # comb = [] key_data = [list_of_groups[k],list_of_groups[g],] comb = list(itertools.product(*key_data)) # print(comb) if len(comb) != size_all_pairs[k,g]: print('not equal..') change_points_arr[k,g,:] = points lamda_arr[k,g,:] = lamda_arr[g,k,:] tot_count = np.zeros((num_segments) , dtype=float) for pair in comb: for d in range(0,num_segments): s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES) # print(np.count_nonzero(s)) tot_count[d] += np.count_nonzero(s) list1=[i for i, e in enumerate(s) if e != 0] if len(list1) == 0: print('zero') list1 = [x+points[d] for x in list1] df = pd.DataFrame(data=list1) df.columns =['timestamp'] N= df.size list_start_stations =[pair[0]] * N list_end_stations =[pair[1]] * N df['source'] = list_start_stations df['target'] = list_end_stations df_all=pd.concat([df_all, df], ignore_index=True) for d in range(0,num_segments): lamda_arr_act[k,g,d] = tot_count[d]/(NO_SAMPLES*len(comb)) # print(tot_count[d]) ## Other preparations # Remove self loops df_all = df_all[((df_all['source'] ) != (df_all['target']))] #sort df_all=df_all.sort_values('timestamp') df_all = df_all[['target', 'timestamp','source']] # Save as .csv file # df_all.to_csv('./Data/synthetic_ground_truth_g1.csv') df=df_all dest_folder='./Results/synthetic/3' t_df = df['timestamp'] nodes_arr = np.union1d(df['target'],df['source']).astype(int) # list of nodes nodes = nodes_arr.tolist() num_vertices = len(nodes) # node-group dictionary group_dic = {} keys = nodes values = list(group_assignment) group_dic = dict(zip(keys,values)) # create a new dictionary - key: node-pair , value: list of timestamps dic=df.groupby(['source','target'])['timestamp'].apply(list).to_dict() print('{} {} {} '.format(group_dic, lamda_arr_act,change_points_arr)) liklihood_sum = sbm_core.compute_cost(group_dic,lamda_arr_act,change_points_arr,num_roles,num_segments,dic) print(' Initial Actual likelihood .......%f'%liklihood_sum) def _swap (row): if row['source'] > row['target']: row['source'] , row['target'] =row['target'] , row['source'] return row # Undirected graph df=df.apply(lambda row: _swap(row), axis=1) #scale timestamps for zeroth reference point refValue = df['timestamp'].min() df['timestamp'] -= refValue # Experiment import experiment # User parameters num_roles=2 num_segments=2 num_levels=2# Optional arg algo_ver=3 dest_folder='./Results/synthetic/' # tuning parameters theta = 0 eta = 1 tuning_params= {'theta':theta,'eta':eta} import time start_time = time.time() exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue) [itr_d,likelihood_d,group_dic_d,lambda_estimates_d,change_points_arr_d] = exp_obj.execute() print("--- %s seconds ---" % (time.time() - start_time)) t_df = sorted(t_df) chg_points = change_points_arr_d[0,0,:] ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)] ranges_arr[0][0] = 0 list_time_stamps = list(t_df) # iterate over timestamps list dis_arr = list() gt_arr = list() for item in list_time_stamps: # find the segment which the timestamp belongs # (is dependent on which groups the two nodes belong) d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item)) dis_arr.append(d) chg_points = change_points_arr[0,0,:] ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)] ranges_arr[0][0] = 0 list_time_stamps = list(t_df) # iterate over timestamps list for item in list_time_stamps: # find the segment which the timestamp belongs # (is dependent on which groups the two nodes belong) d = sbm_core._findSegment(ranges_arr, len(ranges_arr) , int(item)) gt_arr.append(d) ind = adjusted_rand_score(gt_arr,dis_arr) print('rand index: seg {} : {}'.format(_itr, ind)) g1= group_dic_d g2= group_dic_d[1] ds= list(group_dic_d.values() ) gt1 = list(g1.values()) ind_grp=adjusted_rand_score(ds,gt1) print('rand index: group {} : {}'.format(_itr, ind_grp)) # 0.9785444674036701 # 0.9791525131372905 # 0.981440657362889 # 0.9780947193990287 # 0.9785576050121263 # 0.9768656988977588 # 0.9794087578274921 # 0.9785467310928326 # 0.8326828222297133 # 3 # 3 # 3 # 3 # 3 # 3 # 5 # 5 # 5