sbm_core.py



# ###################################################################################
# ### Utility Functions for Maximum Likelihood Estimation (MLE) and Segmentation  ###
# ###                  ( based on stochastic blockmodels )                        ###
# ###################################################################################


import math
import numpy as np
import utils
import networkx as nx
import copy
import itertools
import collections
import random

# returns the index of the segment  in which particular timestamp lies using binary search 
def  _findSegment(a, n, K): 
  
    start = 0
    end = n - 1
  
    # Binary search 
    while (end <= end): 
        
        if end >= start:
            # mid point 
            mid = (start + end) //2
            #  element  found 
            if (K >= a[mid][0] and K <= a[mid][1]):                 
                return mid 
      
            # first half 
            elif (K > a[mid][1]): 
                start = mid +  1
      
            # second half 
            elif (K < a[mid][0]): 
                end = mid  - 1
        else:
            # print('K : {} MID: {} START:{} END: {} ARRA : {}, n: {}'.format(K,mid, start,end, a,n))
            # Not found 
            print('Not Found')
            return -1
        
# split an empty group if it does exist
def _split_the_empty_group(num_roles,list_of_groups,group_assignment):
    
    _is_grp_emty = False
    _empty_grp_index = 0
    
    # check whether a group is empty
    for k in range(0, num_roles):
        
        if len( list_of_groups[k]) == 0:
            
            _is_grp_emty = True
            _empty_grp_index = k
            
            print('group is empty..')
           
    # if group is empty, split the largest group        
    if  _is_grp_emty:

        list_len = [len(i) for i in list_of_groups] 
        max_len_index =  np.argmax(list_len)

        two_split = np.array_split(list_of_groups[max_len_index], 2)
        list_of_groups[max_len_index] =  two_split[0].tolist()
        list_of_groups[_empty_grp_index] =  two_split[1].tolist()
        
        # update new group assignments        
        for node in list_of_groups[_empty_grp_index]:
            group_assignment[node] = _empty_grp_index
        
        print(group_assignment)
    return group_assignment 
        
#  Assign groups ( ver 1 )
def assign_groups(group_assignment,lambda_estimates,change_points_arr,nodes,num_roles,num_segments,dic):

    list_of_groups=  [[] for _ in range(num_roles)]
    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)     
    
    for v in nodes:       
        # remove v 
        for g in range(0, num_roles):        
            
            # Extract Group g
            _list_g = list_of_groups[g]
            
            if v in _list_g :
                _list_g.remove(v)
        
        # store current group assignment 
        max_i = group_assignment[v]
        
        # likelihoods when node v belongs to each group i
        likelihood_sum = np.zeros((num_roles) , dtype=float) 
        
        # if node v belongs to group i
        for i in range(0, num_roles): 

            likelihood_sum[i] = 0
            
            # interactions with other groups and own group
            for j in range(0, num_roles):
                
                W = list_of_groups[j]
                
                # compute edge counts; with  group  j, within each particular segment
                edge_counts = np.zeros(( num_segments) , dtype=int) 
                
                for w in W:
                    
                    v1 =  v
                    v2 =  w
                    
                    # undirected ; order of tuple
                    if v1 > v2:
                        v1, v2 = v2, v1
                    
                    # get the list of timestamps corresponding to (v1,v2) pair
                    if dic.get((v1,v2)):
                        # change-points of (i,j) group pair
                        chg_points = change_points_arr[i,j,:]
                        
                        # build ranges from change points
                        # ex: [0 20340 29460 47640 67800 82560] implies
                        # [[0, 20340], [20341, 29460], [29461, 47640], [47641, 67800], [67801, 82560]]
                        ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
                        ranges_arr[0][0] = 0 
                        
                        list_time_stamps = dic.get((v1,v2))
                        
                        # iterate over timestamps list
                        for item in list_time_stamps:
                            
                            # find the segment which the timestamp belongs
                            # (is dependent on which groups the two nodes belong)
                            d =  _findSegment(ranges_arr, len(ranges_arr) , int(item)) 
                            edge_counts[d] += 1
                    
                for d in range(0, num_segments):
                    delta_t =  change_points_arr[i,j,d+1] -  change_points_arr[i,j,d]
                    
                    # include the initial time-stamp
                    if d == 0:
                        delta_t += 1
                        
                    if lambda_estimates[i,j,d]!= 0:
                        likelihood_sum[i] += (edge_counts[d]*math.log(lambda_estimates[i,j,d]) - len(W)*lambda_estimates[i,j,d]*delta_t)
                    else:
                        print('lambda zero..')
        
        if  max(likelihood_sum) != 0:  
            max_i = np.argmax(likelihood_sum)
            group_assignment[v] = max_i
            
        list_of_groups[max_i].append(v)
    
    # To cater the emptiness of a group   
    group_assignment = _split_the_empty_group(num_roles,list_of_groups,group_assignment)    
    
    # print(list_of_groups)  
    return group_assignment 


#  Assign groups ( ver 2.2 )
def group_assignment_ver2_2(nodes,num_roles,num_segments,lambda_estimates,group_assignment,change_points_arr,dic):  
    
    list_of_groups=  [[] for _ in range(num_roles)]
    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)
    # print(list_of_groups)
    
    # no of pairs of groups or roles
    size_all_pairs = int(num_roles*(num_roles + 1)/2)  
    
    # convert tensor to 2-d array
    l_array = np.zeros((size_all_pairs,num_segments) , dtype=float )
   
    cnt = 0    
    for z1 in range(0, num_roles):
        for z2 in range(z1, num_roles):           
            for d in range(0, num_segments):
                l_array[cnt,d]= lambda_estimates[z1,z2,d]
            cnt += 1
    
    # find h-levels
    unique_cols = np.unique(l_array, axis=1)        
    h = len(unique_cols[0])
    # print('h levels %d'%h)
    
    # h-level lamda estimates    
    lambda_estimates_h = np.zeros((num_roles, num_roles, h) , dtype=float )
    
    cnt = 0    
    for z1 in range(0, num_roles):
        for z2 in range(z1, num_roles):
            for d in range(0, h):
                lambda_estimates_h[z1,z2,d]= unique_cols[cnt,d]
                lambda_estimates_h[z2,z1,d]= unique_cols[cnt,d]
            cnt += 1 
    
    # segment to level dictionary mapping
    seg_level_dic = {}
    lst = lambda_estimates_h[0,0,:]
       
    for d in range(num_segments):   
        _h  = [ind for ind, x in enumerate( lst) if x == lambda_estimates[0,0,d]]
        seg_level_dic[d] = _h[0]   
    # print('seg_level_dic : {}'.format(seg_level_dic))
    
    # Multidigraph object 
    G = nx.MultiDiGraph()
    G.add_nodes_from(nodes)
    G.add_edges_from(list(dic))
    G = G.to_undirected()  
    
    # iterate over all nodes
    for v in nodes:
        
        # remove v 
        for g in range(0, num_roles):        
            #Extract Group g
            _list_g = list_of_groups[g]
            
            if v in _list_g :
                _list_g.remove(v)
        
        # store current group of v
        max_i = group_assignment[v]
        
         # likelihoods when node v belongs to each group i
        likelihood_sum = np.zeros((num_roles) , dtype=float)
        
        # edge counts in particular group and level
        edge_counts = np.zeros((num_roles, h) , dtype=int) 
        
        # find neighbours of node v
        neighbour_nodes  = [n for n in G.neighbors(v)]
        # print('neignbours {}'.format(neighbour_nodes))
        
        # iterate over all neigbour nodes of v
        for neigh in neighbour_nodes:
            
            v1 = v
            v2 = neigh
            
            # order of tuple : undirected
            if v1 > v2:
                v1, v2 = v2, v1
            
            i=group_assignment.get(v1)
            j=group_assignment.get(v2)
            
            # change-points of (i,j) group pair; in this case, equally partitioned
            chg_points = change_points_arr[i,j,:]
        
            ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
            ranges_arr[0][0] = 0
            
            # list of timestamps corresponding to (v1,v2) pair
            list_time_stamps = dic.get((v1,v2))
            
            # iterate over timestamps list
            for item in list_time_stamps:
                # find the segment which the timestamp belongs
                # (is primarily dependent on which groups the two nodes belong)
                # (herein, equally segmented)
                d =  _findSegment(ranges_arr,  len(ranges_arr) , int(item))                 
                _h = seg_level_dic[d]
                                
                if j == group_assignment.get(v):                    
                    edge_counts[i,_h] += 1
                else:                       
                    edge_counts[j,_h] += 1
        
        #  merged segment(time duration) according to level
        t = np.zeros((h) , dtype=float) 
        
        for d in range (0,num_segments):
        
            _h= seg_level_dic[d]
            delta_t =  change_points_arr[0,0,d+1] -  change_points_arr[0,0,d]
            
            if d == 0:
              delta_t +=  1 
        
            t[_h] = t[_h] + delta_t 
 
        # if node a belongs to group a  
        for a in range(0, num_roles): 
            
            likelihood_sum[a] = 0
            
            for j in range(0, num_roles):                   
                W = list_of_groups[j]  
                
                # maximum possible number of  ways of interacting with node v
                factor = len(W)
                
                for k in range(0, h):
  
                    if lambda_estimates_h[a,j,k] != 0 :
                        likelihood_sum[a] += (edge_counts[j,k]*math.log(lambda_estimates_h[a,j,k])  - factor*lambda_estimates_h[a,j,k]*t[k])
                    else:
                        print('lambda zero')
                         
        if  max(likelihood_sum) != 0:  
            max_i = np.argmax(likelihood_sum)
            group_assignment[v] = max_i
              
        list_of_groups[max_i].append(v)
    
    # To cater the emptiness of a group 
    group_assignment = _split_the_empty_group(num_roles,list_of_groups,group_assignment)  
    
    # print(list_of_groups)     
    # print(group_assignment)
    return group_assignment

# Estimate_lamda
def estimate_lamda(group_assignment,lambda_estimates,change_points_arr,num_roles,num_segments,dic,tuning_params=None):
    lambda_estimates = np.zeros((num_roles, num_roles,num_segments) , dtype=float)
    list_of_groups=  [[] for _ in range(num_roles)]

    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)
   
    # create dictionary to store interaction counts
    # ( no of interactions between ith and jth group within dth segment)              
    key_data = [
        range(0,num_roles),
        range(0,num_roles),
        range(0,num_segments),
    ]
    keys = list(itertools.product(*key_data))  
    i_j_d = {key: 0 for key in keys}           
              
    for key, val in dic.items():
        
        i=group_assignment.get(key[0])
        j=group_assignment.get(key[1])

        # change-points of (i,j) group pair
        chg_points = change_points_arr[i,j,:]
        
        ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
        ranges_arr[0][0]=0
        
        n = len(ranges_arr) 
        
        for item in val:
            
            d =  _findSegment(ranges_arr, n, int(item)) 
            
            # undirected
            if i>j:
                i,j=j,i  
                
            i_j_d[(i,j,d)] += 1    
        
    for k in range(0, num_roles):
        for g in range(k,num_roles):
                U=list_of_groups[k]
                W=list_of_groups[g]  
                
                size_all_pairs = 0
                if k == g:
                    size_all_pairs = math.comb(len(U), 2)
                if k != g:
                    size_all_pairs = len(U)*len(W)
                
                for d in range(0, num_segments):                    
                    inter_count = i_j_d[(k,g,d)] 
                    delta_t = (change_points_arr[k,g,d+1] - change_points_arr[k,g,d])
                    if d == 0:
                        delta_t += 1 
                    lambda_estimates[k,g,d] = (inter_count+tuning_params['theta'])/((delta_t*size_all_pairs)+tuning_params['eta'])
                    lambda_estimates[g,k,d] = lambda_estimates[k,g,d]
                            
                # print('{} {} {}'.format(k,g,lambda_estimates[k,g,:]) )
    return lambda_estimates

# Estimate (k-h) lamda
def estimate_lamda_kh(num_roles,num_segments,lambda_estimates,group_assignment,change_points_arr,dic,tuning_params=None):
    list_of_groups=  [[] for _ in range(num_roles)]
       
    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)

    # create dictionary to store interaction counts
    # ( no of interactions between ith and jth group within dth segment)              
    key_data = [
        range(0,num_roles),
        range(0,num_roles),
        range(0,num_segments),
    ]
    keys = list(itertools.product(*key_data))  
    i_j_d = {key: 0 for key in keys}  
    
    for key, val in dic.items():
        
        i=group_assignment.get(key[0])
        j=group_assignment.get(key[1])

        # change-points of (i,j) group pair
        chg_points = change_points_arr[i,j,:]
        
        ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
        ranges_arr[0][0]=0
        
        n = len(ranges_arr) 
        
        for item in val:
            d =  _findSegment(ranges_arr, n, int(item)) 
            # undirected graph        
            if i>j:
                i,j=j,i   
                     
            i_j_d[(i,j,d)] += 1    
                  
    for k in range(0, num_roles):
        for g in range(k,num_roles):
  
            U=list_of_groups[k]
            W=list_of_groups[g]  
            
            size_all_pairs = 0
            if k == g:
                size_all_pairs = math.comb(len(U), 2)
            if k != g:
                size_all_pairs = len(U)*len(W)

            _current_lamda_val = lambda_estimates[k,g,:].tolist()                
            _unique_val = list(set(_current_lamda_val))
                        
            h = len(_unique_val)
            
            for i in range(0,h):
                
                grp =[]
                
                for  j in range(0, num_segments):
    
                    if math.isclose(_current_lamda_val[j], _unique_val[i]):
                        grp.append(j)
                        
                inter_count = 0
                delta_t = 0
                
                for d in grp:

                    delta_t += (change_points_arr[k,g,d+1] - change_points_arr[k,g,d])
                    
                    if d == 0:
                        delta_t += 1 
                    
                    inter_count += i_j_d[(k,g,d)]   
                                                       
                alpha = inter_count
                beeta = size_all_pairs*delta_t
                lamda = (alpha+ random.random()*tuning_params['theta'])/(beeta+tuning_params['eta'])
                
                for jj in grp:
                    lambda_estimates[k,g,jj] = lamda
                    lambda_estimates[g,k,jj] = lamda    
            # print('{} {} {}'.format(k,g,lambda_estimates[k,g,:]) )          
        
    return lambda_estimates

# Compute cost (edge by edge)
def com_cost(num_roles,num_segments,lamda_estimates,change_points_arr,group_assignment,dic):    

    
    list_of_groups=  [[] for _ in range(num_roles)]
       
    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)
    
    i_j_d = {}
    
    for i in range(0, num_roles):
        for j in range(0, num_roles):
           for d in range(0, num_segments): 
               i_j_d[(i,j,d)] = 0
       
    for key, val in dic.items():
        
        i=group_assignment.get(key[0])
        j=group_assignment.get(key[1])
        
        if i>j:
            i,j=j,i
            
        a = change_points_arr[i,j,:]
        
        ranges_arr = [ [a[s]+1,a[s+1]] for s in range(0,len(a)-1)]
        ranges_arr[0][0]=0
        
        n = len(ranges_arr) 
        
        for item in val:

            d =  _findSegment(ranges_arr, n, int(item))                         
            i_j_d[(i,j,d)] += 1

 
    liklihood_sum = 0

    for d in range(0, num_segments):
        for k in range(0, num_roles):
            for g in range(k, num_roles):
    
                U=list_of_groups[k]
                W=list_of_groups[g]
    
                size_all_pairs = 0
                if k == g:
                    size_all_pairs = math.comb(len(U), 2)
                if k != g:
                    size_all_pairs = len(U)*len(W)
        
                alpha = (size_all_pairs * lamda_estimates[k,g,d])
                
                delta= change_points_arr[k,g,d+1]-change_points_arr[k,g,d]
                
                if d == 0:
                    delta += 1 
                
                if lamda_estimates[k,g,d] != 0:
                    liklihood_sum += (i_j_d[(k,g,d)]* math.log(lamda_estimates[k,g,d])- (alpha*delta)) 
    print('Likelihood sum: %f'%(liklihood_sum))  
        
         
    return   liklihood_sum  


# Compute cost 
def compute_cost(group_assignment,lambda_estimates,change_points_arr,num_roles,num_segments,dic):
    # print('compute cost...')
    
    list_of_groups=  [[] for _ in range(num_roles)]
 
    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)

    # create dictionary to store interaction counts
    # ( no of interactions between ith and jth group within dth segment)              
    key_data = [
        range(0,num_roles),
        range(0,num_roles),
        range(0,num_segments),
    ]
    keys = list(itertools.product(*key_data))  
    i_j_d = {key: 0 for key in keys}  
               
    for key, val in dic.items():
        
        i=group_assignment.get(key[0])
        j=group_assignment.get(key[1])
        
        chg_points = change_points_arr[i,j,:]
        
        ranges_arr = [ [chg_points[s]+1,chg_points[s+1]] for s in range(0,len(chg_points)-1)]
        ranges_arr[0][0]=0
        
        n = len(ranges_arr) 
        # print('{} {}'.format(chg_points,ranges_arr))
        for item in val:
            d =  _findSegment(ranges_arr, n, int(item)) 
            # undirected
            if i>j:
                i,j=j,i  
                
            i_j_d[(i,j,d)] += 1 
            
    liklihood_sum = 0

    for k in range(0, num_roles):
        for g in range(k,num_roles):

            U=list_of_groups[k]
            W=list_of_groups[g]  
            
            size_all_pairs = 0
            if k == g:
                size_all_pairs = math.comb(len(U), 2)
            if k != g:
                size_all_pairs = len(U)*len(W)                    
            
            for d in range(0, num_segments):                    
                inter_count = i_j_d[(k,g,d)]
               
                delta_t = (change_points_arr[k,g,d+1] - change_points_arr[k,g,d])
                
                if d == 0:
                    delta_t += 1 
                
                if lambda_estimates[k,g,d] != 0:
                    liklihood_sum += (inter_count*math.log(lambda_estimates[k,g,d]) - size_all_pairs*lambda_estimates[k,g,d]*delta_t)
                # print('Likelihood sum: %d %d %f'%(k,g,liklihood_sum))  
            # print('Likelihood sum: %d %d %f'%(k,g,temp))
    # print('Likelihood sum: %f'%(liklihood_sum))     
    return liklihood_sum

#  Estimate change points ( Naive Dynamic programming)
def dyn_prog_seg(group_assignment,lambda_estimates,change_points_arr,num_roles,num_segments,dic):      
        
    #create a dictionary : key:timestamp, value:list of interaction-group pairs
    time_group_pairs = {}
    
    # store time stamps
    time_stamps=[]
    
    # 'dic': key: node-pair, value: list of time stamps
    for key, val in dic.items():
         
        i=group_assignment.get(key[0])
        j=group_assignment.get(key[1])
        
        # undirected
        if i > j:
            i,j = j,i
    
        for item in val:  
    
            if  item not in time_group_pairs:
                time_group_pairs[item] = []                      
            time_group_pairs[(item)] += {(i,j)}  
            
            if item not in time_stamps:
                time_stamps.append(item)                                 
    # print(time_group_pairs)
    # print(time_stamps)
      
    # prepare timestamp grid
    time_stamps  = sorted(time_stamps) 
    # print(time_stamps_unique)
    n =  len(time_stamps)    
    grid = time_stamps 
            
    #cumulative edge count  dictionary for all timestamps
    cum_cnt = {}
    #cumulative edge count for instant
    total_cum_cnt = np.zeros(( num_roles, num_roles) , dtype=int) 
        
    #cumulative edge count
    for  ind, tmsp in enumerate(time_stamps):
        lst = time_group_pairs.get(tmsp)
    
        for key in lst:
            total_cum_cnt[key[0],key[1]] += 1
        
        for k in range(0, num_roles):
            for g in range(k, num_roles):
                cum_cnt[ind,k,g] = total_cum_cnt[k,g]                       
    # print(total_cum_cnt) 

    # group assignment related params      
    list_of_groups=  [[] for _ in range(num_roles)]       
    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)
 
    size_all_pairs = {}
    for k in range(0, num_roles):
        for g in range(k, num_roles):
            U=list_of_groups[k]
            W=list_of_groups[g]

            if k == g:
                size_all_pairs[k,g] = math.comb(len(U), 2)
            if k != g:
                size_all_pairs[k,g] = len(U)*len(W)
       
    cost = {}
    startsfrom = np.zeros((n,num_segments) , dtype=int) 
    
    for ind,tmp in enumerate(grid):
        tot_likelihood = 0
        
        for k in range(0, num_roles):
            for g in range(k, num_roles):
                
                intercount = cum_cnt[ind,k,g]
                delta_t = tmp+1
                
                if tmp == 0:
                      delta_t = tmp
                list_likelihood = [-(intercount*math.log(lam) - (size_all_pairs[k,g] *lam*(delta_t)))  for lam in lambda_estimates[k,g,:]] 
                tot_likelihood += min(list_likelihood)
                
        cost[0,ind,0] = tot_likelihood 
               
        
    for h in range(1, num_segments):
        for i in range((h+1), n):
            cost[0,i,h] = math.inf
            startsfrom[i,h] = -1;
    
            for j in range(h,i):
                                            
                tot_likelihood = 0 
                
                for k in range(0, num_roles):
                    for g in range(k, num_roles):
                
                        inter_count =  cum_cnt[i,k,g] - cum_cnt[j,k,g]
        
                        # compute cost[j,i,0]
                        list_likelihood = [-(inter_count*math.log(lam) - (size_all_pairs[k,g] *lam*(grid[i]-grid[j])))  for lam in lambda_estimates[k,g,:]] 
                        tot_likelihood += min(list_likelihood)
                        
                cost[j,i,0]  =  tot_likelihood
                    
                if cost[0,j,h-1] + cost[j,i,0] <= cost[0,i,h]:
                    cost[0,i,h] = cost[0,j,h-1]+cost[j,i,0]                            
                    startsfrom[i,h] = j

    cost_tot = (cost[0,(n-1),(num_segments-1)])
    print(cost_tot)
        
    boundary_point_array=[grid[n-1]]    
    counter = num_segments-1
    boundary_point = n-1
    # b=[]    
                  
    while counter > -1:

        boundary_point = startsfrom[boundary_point, counter]
        boundary_point_array.append(grid[boundary_point])
        # b.append(boundary_point)
        counter -= 1
    # print(b)
    change_points_arr[:,:,:] = list(reversed(boundary_point_array))
    change_points_arr[:,:,:] = list(reversed(boundary_point_array))

    return change_points_arr

# Estimate change points : LINEAR algorithm ( Fast segmentation with SMAWK ) - Ver 2
def linear_seg_ver_2(num_roles,num_segments,group_assignment,lambda_estimates,change_points_arr,dic):
    # no of pairs of groups or roles
    size_all_pairs = int(num_roles*(num_roles + 1)/2)  
    # convert tensor to 2-d array
    l_array = np.zeros((size_all_pairs,num_segments) , dtype=float )
   
    cnt = 0    
    for z1 in range(0, num_roles):
        for z2 in range(z1, num_roles):           
            for d in range(0, num_segments):
                l_array[cnt,d]= lambda_estimates[z1,z2,d]
            # print(l_array[cnt,:])
            cnt += 1
    
    # find h-levels
    unique_cols = np.unique(l_array, axis=1) 
    # print(unique_cols)       
    h = len(unique_cols[0]) 
    # print(unique_cols[0])  
    # print('h levels initials inside seg: %d'%h)
    
    # h-level lamda estimates    
    lambda_estimates_h = np.zeros((num_roles, num_roles, h) , dtype=float )
    
    cnt = 0   
    
    for z1 in range(0, num_roles):
        for z2 in range(z1, num_roles):
            for d in range(0, h):
                lambda_estimates_h[z1,z2,d]= unique_cols[cnt,d]
                lambda_estimates_h[z2,z1,d]= unique_cols[cnt,d]
            cnt += 1

    time_edges = {}
    time_stamps=[]

    for key, val in dic.items():
    # print('{} {}'.format(key[0],key[1]))
        
        for item in val:  
            if  item not in time_edges:
                time_edges[item] = []                      
            time_edges[(item)] += {key}
            time_stamps.append(item)
 
    # find unique time-stamps
    # (there are cases where multiple edges occur at the same timestamp )
    time_stamps = sorted(time_stamps)
    time_stamps_unique =  sorted(list(set(time_stamps)))    

    n =  len(time_stamps_unique)
      
    startsfrom = np.zeros((n,num_segments) , dtype=int) 
    lamda_value = np.zeros((n,num_segments) , dtype=int )
    
    c = np.zeros((n, h) , dtype=float)
    o_ek = np.zeros((n, num_segments) , dtype=float)
    
    list_of_groups=  [[] for _ in range(num_roles)]       
    for idx, val in group_assignment.items():
        list_of_groups[val].append(idx)        
    
    for d in range(0, h):
  
        alpha = 0
        cnt = 0

        for k in range(0, num_roles):
            for g in range(k, num_roles):
    
                U=list_of_groups[k]
                W=list_of_groups[g]
    
                size_all_pairs = 0
                if k == g:
                    size_all_pairs = math.comb(len(U), 2)
                if k != g:
                    size_all_pairs = len(U)*len(W)
                
                if  lambda_estimates_h[k,g,d] != 0:
                    alpha +=   (size_all_pairs * lambda_estimates_h[k,g,d])   
                elif not U or not W:
                    print('list is empty...')  
                             
        for  ind, tmsp in enumerate(time_stamps_unique):
            lst = time_edges.get(tmsp)

            for key in lst:
                
                i=group_assignment.get(key[0])
                j=group_assignment.get(key[1])
   
                cnt += math.log(lambda_estimates_h[i,j,d])
                
            c[ind,d] = cnt - alpha*tmsp
                                  
    for e in range(0, n):
        # print('{} : {} {}'.format(c[e,:],max(c[e,:]),np.argmax(c[e,:])))
        o_ek[e,0] = max(c[e,:])
        lamda_value[e,0] = np.argmax(c[e,:])
    
    
    LARGE_VAL = pow(10, 10)
    
    for kk in range(1, num_segments):
        max_m = np.zeros((n, h) , dtype=float)
        indices = np.zeros((n, num_segments) , dtype=int)
        
        for a in range(0, h):                       
            def lookup(i,j):
                x = -LARGE_VAL
                
                if not ((j < kk) | (j >= i)):                
                    x = o_ek[j,kk-1] + c[i,a] - c[j,a]
                              
                return x

            rows = list(range(0, n))
            cols =  list(range(0, n))
            
            col_set = utils.smawk(rows,cols,lookup)
            # col_set = utils.Maxcompute(rows,cols,lookup)
                      
            for edg in range(0, n): 
                indices[edg,a] = col_set[edg]
                max_m[edg,a] = lookup(edg,indices[edg,a])
    
        for e in range(0, n):    
            o_ek[e,kk] = max(max_m[e,:]) 
            lamda_value[e,kk] = np.argmax(max_m[e,:])
            startsfrom[e,kk] = indices[e,lamda_value[e,kk]]
        
    boundary_point_array=[n-1]           
    counter = num_segments-1
    boundary_point = n-1
                                      
    new_lambda_estimates = np.zeros((num_roles, num_roles,num_segments) , dtype=float)  
    # print(o_ek[boundary_point,counter])   
               
    while counter > -1:       
        # print(o_ek[boundary_point,counter]) 

        d= int(lamda_value[boundary_point,counter])
        # update new lamdas
        for i1 in range(0, num_roles):
            for i2 in range(i1, num_roles): 
                new_lambda_estimates[i1,i2,counter] = lambda_estimates_h[i1,i2,d] 
                new_lambda_estimates[i2,i1,counter] = lambda_estimates_h[i1,i2,d] 
        
        boundary_point = startsfrom[boundary_point, counter]        
        boundary_point_array.append(boundary_point)    
        counter -= 1
    
    # print(list(reversed(boundary_point_array)))
    b_list = []
    for b in boundary_point_array:
        b_list.append(time_stamps_unique[b])
        
    
    change_points_arr_ = list(reversed(b_list))
    change_points_arr[:,:,:] = change_points_arr_
    
    print(change_points_arr_)    
    lambda_estimates = new_lambda_estimates
    
    # Make sure h-levels do exist
    
    # convert tensor to 2-d array
    size_all_pairs = int(num_roles*(num_roles + 1)/2) 
    l_array_new = np.zeros((size_all_pairs,num_segments) , dtype=float )
   
    cnt = 0    
    for z1 in range(0, num_roles):
        for z2 in range(z1, num_roles):           
            for d in range(0, num_segments):
                l_array_new[cnt,d]= lambda_estimates[z1,z2,d]
            # print(l_array_new[cnt,:])
            cnt += 1
    
    # find h-levels
    unique_cols_new = np.unique(l_array_new, axis=1)        
    h_current = len(unique_cols_new[0]) 
    # print('new h levels: %d'%h_current)
    
    if h > h_current:
        print('No of h-levels does not satisfied... (h:{} , h-new:{})'.format(h, h_current))
        # print('compute cost before modifying lambdas...')
        # compute_cost(group_assignment,lambda_estimates,change_points_arr,num_roles,num_segments,dic)
        
        h_levels = unique_cols[0]
        current_g_mapping = []
        
        lst = l_array_new[0,:]
                
        for i in lst:            
            for idx in range(0,h):
                
                if i == h_levels[idx]:
                    current_g_mapping.append(idx)
        # print([ele for ele in range(0,h)]   ) 
        # print(current_g_mapping)   
        
        list_all =  [ele for ele in range(0,h)] 

        a = set(current_g_mapping)
        b = set(list_all)
        
        diff = list(b - a)        
        n = h - len(a)
        missing_list = random.sample(diff, n)
        # print(missing_list)
       
        duplicate_entries = [item for item, count in collections.Counter(current_g_mapping).items() if count > 1]
        # print(duplicate_entries)
        
        new_g_mapping = current_g_mapping.copy()
        # we have duplicate and missing list
        
        changes= []

        for idx,m_e in  enumerate(missing_list):
            # find index of duplicate entries
            if  idx < len(duplicate_entries) :
                ind = current_g_mapping.index(duplicate_entries[idx])
            else:
                new_duplicate_entries = [item for item, count in collections.Counter(new_g_mapping).items() if count > 1]
                # print(len(new_duplicate_entries))
                duplicate_entries.append(new_duplicate_entries[0])
                ind = new_g_mapping.index(duplicate_entries[idx])
            # update duplicate by a missing entry
            new_g_mapping[ind] = m_e
            # add (index,val) pair
            changes.append([ind,m_e])                
            
        # print(new_g_mapping )        
        # print(changes)
        
        # update lamda esimates
        # we have current h-level indices (segment, index) and missing h-values to be updated

        m_lambda_estimates =  copy.copy(lambda_estimates)
        
        for ele in changes:
            d = ele[0]
            val = ele[1]
            # print('{} ,{}'.format(d,val))
            # print(m_lambda_estimates)
            # print(lambda_estimates_h)
            for i1 in range(0, num_roles):
                for i2 in range(i1, num_roles): 
                    m_lambda_estimates[i1,i2,d] = lambda_estimates_h[i1,i2,val] 
                    m_lambda_estimates[i2,i1,d] = lambda_estimates_h[i1,i2,val]
                        
        # print(m_lambda_estimates)
        # print('after changing lambdas...')