synthetic_experiment_time_vs_edges.py



"""
SYNTHETIC :Generate piecewise non-homogeneous poisson point process (NHPPP)

To check the running time vs number of edges

To reproduce the results of the paper:
set NO_SAMPLES = 50 from list_samples =  [50,100,150,200] ; 
one at a time


"""
import numpy as np
import pandas as pd
import math
from itertools import combinations
import itertools 
import time
import experiment
from streamlit import caching

#  Initilaize
np.random.seed(113)

# To reproduce the results of the paper:
# set NO_SAMPLES = 50 from list_samples = [50,100,150,200] ; 
# one at a time

NO_SAMPLES= 50

num_roles=3
num_vertices=20
num_segments = 3

caching.clear_cache()

group_assignment= np.random.randint(num_roles, size=(num_vertices))

nodes = np.arange(num_vertices) 

list_of_groups=  [[] for _ in range(num_roles)]
   
for idx, val in enumerate(group_assignment):
    list_of_groups[val].append(nodes[idx])
    
# print(list_of_groups)

size_all_pairs = {}
for k in range(0, num_roles):
    for g in range(k, num_roles):
        U=list_of_groups[k]
        W=list_of_groups[g]

        if k == g:
            size_all_pairs[k,g] = math.comb(len(U), 2)
        if k != g:
            size_all_pairs[k,g] = len(U)*len(W)

lamda_arr = np.ones((num_roles, num_roles,num_segments) , dtype=float)
lamda_arr = 1e-2* np.random.randint(11,99, size=(num_roles, num_roles,num_segments))

change_points_arr = np.zeros((num_roles, num_roles, num_segments+1) , dtype=int)
df_all= None

points= list(range(0, (num_segments+1)*NO_SAMPLES, NO_SAMPLES))

list1 = []

#  Generate piecewise non-homogeneous poisson process
for k in range(0, num_roles):
        for g in range(k, num_roles):
            comb = []
            if k == g:
                comb = list(combinations(list_of_groups[k], 2)) 
                # print(type(comb))
            else:
                # comb = []
                key_data = [list_of_groups[k],list_of_groups[g],]
                comb = list(itertools.product(*key_data)) 
                # print(comb)
            if len(comb) != size_all_pairs[k,g]:
                print('not equal..')
            
            change_points_arr[k,g,:] = points
            lamda_arr[k,g,:] = lamda_arr[g,k,:]
            for pair in comb:
                
                for d in range(0,num_segments):

                    s = np.random.poisson(lamda_arr[k,g,d], NO_SAMPLES)   
                    list1=[i for i, e in enumerate(s) if e != 0]
               
                    if len(list1) == 0:
                        print('zero')
                    list1 = [x+points[d] for x in list1]
                    
                    df = pd.DataFrame(data=list1)
                    df.columns =['timestamp']
                                       
                    N= df.size                 
                    list_start_stations =[pair[0]] * N                    
                    list_end_stations =[pair[1]] * N
                    
                    df['source'] = list_start_stations 
                    df['target'] = list_end_stations
     
                    df_all=pd.concat([df_all, df], ignore_index=True)

## Other preparations

# Remove self loops
df_all = df_all[((df_all['source'] ) != (df_all['target']))] 
#sort
df_all=df_all.sort_values('timestamp')
df_all = df_all[['target', 'timestamp','source']]

# Save as .csv file
# df_all.to_csv('./Data/synthetic_ground_truth_g1.csv')


df=df_all
dest_folder='./Results/synthetic/3'
t_df = df['timestamp']
df = df.sample(frac=1).reset_index(drop=True)
#sort
df=df.sort_values('timestamp')

nodes_arr = np.union1d(df['target'],df['source']).astype(int) 
# list of nodes         
nodes = nodes_arr.tolist()
num_vertices = len(nodes)
    
def _swap (row):
    if row['source'] > row['target']:
        row['source'] , row['target'] =row['target'] , row['source']
    return row
    
# Undirected graph
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue


# Experiments

# User parameters
num_roles=3
num_segments=5
num_levels=3# Optional arg
algo_ver=2
dest_folder='./Results/synthetic/'

# tuning parameters
theta = 1e-20
eta = 1
tuning_params= {'theta':theta,'eta':eta}

start_time = time.time()
exp_obj = experiment.Experiment(df,num_roles,num_segments,algo_ver,dest_folder,tuning_params,num_levels,refValue)    
exp_obj.execute()

print("--- %s seconds ---" % (time.time() - start_time))
print("no of edges: %d"%df.shape[0])
 

# running time can be dependent on the machine you run. 
# However, it should have a linear trend w.r.t. edges with algo_ver=3.
# However, it should have a quadratic trend w.r.t. edges with algo_ver=2.