Skip to content
Snippets Groups Projects
Commit 0ba3d6ad authored by Chamalee Wickrama Arachch's avatar Chamalee Wickrama Arachch
Browse files

Update real-facebook.py

parent d6e5cb68
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 21 14:35:25 2022
@author: chamwick
"""
Facebook dataset
"""
import networkx as nx
......@@ -17,8 +14,6 @@ import os
import copy
import numpy as np
import utils
#import charikar, greedy, dynprogr, plotting
import copy
import time
import os
......@@ -26,33 +21,6 @@ import pandas as pd
from dsalgo import *
from find_densest_distinct_sets import *
# read data
# filepath = os.path.join("Data","as.txt")
# header_list = ['source','target', 'timestamp']
# df = pd.read_table(filepath,sep='\t',names=header_list)
# # Remove self-loops
# df = df[((df['source'] ) != (df['target']))]
# keys = df.timestamp.unique()
# data_dic = {k: [] for k in keys}
# # data_dic = {k: [] for k in range(2006, 2016)}
# # data_dic = {k: [] for k in range(2006, 2010)}
# print(len(data_dic.keys()))
# for _row in df.values:
# # if _row[2] < 2016 and _row[2] > 2005:
# # if _row[2] < 2010 and _row[2] > 2005:
# data_dic[_row[2]].append((_row[0],_row[1]))
# snapshots = []
# sub = []
# deg = []
# nodes = set()
#filepath = os.path.join(".","..","DATA",sys.argv[1])
filepath = os.path.join("Data","facebook.txt")
edgesTS, nodes, edges = utils.readFile(filepath)
......@@ -60,7 +28,6 @@ df = pd.DataFrame(edgesTS)
df.columns = ['source','target','timestamp']
# header_list = ['source','target', 'timestamp']
# Remove null value
df = df[df['target'].isnull() != True]
df = df[df['source'].isnull() != True]
......@@ -69,16 +36,13 @@ df = df[df['timestamp'].isnull() != True]
df=df.sort_values('timestamp')
def _swap (row):
if row['source'] > row['target']:
row['source'] , row['target'] =row['target'] , row['source']
return row
# Undirected graph
# df=df.apply(lambda row: _swap(row), axis=1)
df=df.apply(lambda row: _swap(row), axis=1)
#scale timestamps for zeroth reference point
refValue = df['timestamp'].min()
df['timestamp'] -= refValue
......@@ -87,18 +51,11 @@ df['timestamp'] -= refValue
df = df[((df['source'] ) != (df['target']))]
keys = df.timestamp.unique()
data_dic = {k: [] for k in keys}
# data_dic = {k: [] for k in range(2006, 2016)}
# data_dic = {k: [] for k in range(2006, 2010)}
print('TAU')
print(len(data_dic.keys()))
for _row in df.values:
# if _row[2] < 2016 and _row[2] > 2005:
# if _row[2] < 2010 and _row[2] > 2005:
# print(_row)
# print(_row[0])
# print(_row[1])
# print(_row[2])
data_dic[_row[2]].append((_row[0],_row[1]))
snapshots = []
......@@ -147,11 +104,28 @@ avg_edges /= numberOfGraphs
print('avg edges: %s'%avg_edges)
print('nodes: {}'.format(len(nodes)))
#%% algo
comb = snapshots[0]
for i in range(1, len(snapshots)):
sub = []
deg = []
H = snapshots[i]
comb = nx.compose(comb, H)
nodes = comb.nodes()
[obj1, subg1] =ip_based_dcs_sum(0, snapshots, comb)
densities = []
for i in range(len(snapshots)):
G = snapshots[i]
sub_g = G.subgraph(subg1)
den = sub_g.number_of_edges()/len(subg1)
densities.append(den)
# print(den, end=", ")
print('IP-based')
print(sum(densities))
dcs = set(subg1)
dcs_den = sum(densities)
#%%
class GraphObj:
......@@ -172,14 +146,11 @@ class GraphObj:
def get_wd (self):
return self.w_d
#%% create avg graph
g = GraphObj()
for x, y in edg_lst:
g.AddEdge(x,y,1)
w_d = g.get_wd()
G_avg=nx.Graph()
......@@ -187,7 +158,7 @@ G_avg=nx.Graph()
for idx, val in w_d.items():
G_avg.add_edge(idx[0],idx[1],weight=val)
#%%
#%% Other methods
print('dcs density LP')
d, induced , dcs1 = densest_subgraph_w(G_avg)
print(len(dcs1))
......@@ -206,41 +177,6 @@ for key in range(0,numberOfGraphs):
den+= (subgrpah_snap.number_of_edges()/ len(dcs2) )
print(den)
exact_R = exact_densest(G_avg)
# print('subgraph induced by', exact_R[0])
print('density =', exact_R[1])
# dcs3 = set(exact_R[0])
# print('densest common: unweighted')
# print(len(dcs3))
# den = 0
# for key in range(0,numberOfGraphs):
# subgrpah_snap = snapshots[key].subgraph(dcs3)
# den+= (subgrpah_snap.number_of_edges()/ len(dcs3) )
# print(den)
#%% algo
comb = snapshots[0]
for i in range(1, len(snapshots)):
H = snapshots[i]
comb = nx.compose(comb, H)
nodes = comb.nodes()
[obj1, subg1] =ip_based_dcs_sum(0, snapshots, comb)
densities = []
for i in range(len(snapshots)):
G = snapshots[i]
sub_g = G.subgraph(subg1)
den = sub_g.number_of_edges()/len(subg1)
densities.append(den)
# print(den, end=", ")
print('IP-based')
print(sum(densities))
dcs = set(subg1)
dcs_den = sum(densities)
#%%
ld = []
......@@ -250,18 +186,15 @@ for snap in snapshots:
local_sum_den += d
ld.append(set(sol))
print('local sum denisty')
print(local_sum_den )
print(local_sum_den )
#%%
arr = [0.3,0.5,0.7]
algo_ver = 0
# lam = item
algo_ver = 0
k = len(snapshots)
for item in arr:
print('----------------------')
......@@ -275,8 +208,7 @@ for item in arr:
print(item)
lam = item*dcs_den/k
# lam = 2*lam*den/(k*(k-1))
lam = 2
# lam = 2*lam*den/(k*(k-1))
print(lam)
[s1,set_dic1] = soft_1_1(snapshots, ld, lam)
......@@ -292,13 +224,14 @@ for item in arr:
# lam = 5
print(item)
lam = item*dcs_den/k
print(lam)
print('lam ::: ',lam)
set_dic = soft_2(snapshots, dcs, lam, nodes)
else:
print('no algo found')
# Results
# Results
print('sets density')
den= 0
......@@ -312,19 +245,17 @@ for item in arr:
# print(min(d_l))
print(den)
# den= 0
# print('dcs density')
# d_l = []
# for key in range(0,numberOfGraphs):
# den+=comDensity(snapshots[key],dcs)
# d_l.append(comDensity(snapshots[key],dcs))
# # print(d_l)
# # print(min(d_l))
# print(den)
den= 0
print('dcs density')
d_l = []
for key in range(0,numberOfGraphs):
den+=comDensity(snapshots[key],dcs)
d_l.append(comDensity(snapshots[key],dcs))
# print(d_l)
# print(min(d_l))
print(den)
# Jaccard values
print('discovered min jac')
s1 =set()
......@@ -343,36 +274,5 @@ for item in arr:
val = jaccard_similarity(s1,s2)
jac.append(val)
print(min(jac))
print(np.average(jac))
#%% avg jaccard
# dcs density
# 8.382352941176471
# iteration no........... 1
# current val: 2.3000171130315734 - prev: 0
# iteration no........... 2
# current val: 2.672977022977023 - prev: 2.3000171130315734
# iteration no........... 3
# current val: 3.0289173789173787 - prev: 2.672977022977023
# iteration no........... 4
# current val: 3.058017908017908 - prev: 3.0289173789173787
# iteration no........... 5
# current val: 3.058017908017908 - prev: 3.058017908017908
# --- 27.18460488319397 seconds ---
# sets density
# size 36
# size 30
# size 33
# size 33
# [2.5, 2.933333333333333, 2.696969696969697, 2.484848484848485]
# 2.484848484848485
# 10.615151515151515
# dcs density
# [1.7058823529411764, 2.7058823529411766, 2.0588235294117645, 1.911764705882353]
# 1.7058823529411764
# 8.382352941176471
# discovered min jac
# 0.5
# # 0.29118329466357307
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment