-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathT.py
267 lines (236 loc) · 9.16 KB
/
T.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import sys
import scipy
from scipy import io
import community #module I built from networkX stuff.
from networkx import chordal_alg
import random
import networkx as nx
import numpy as np
import os
import pickle
import csv
import random
from time import time, sleep
from py2neo import neo4j, rel, node
import pandas as pd
import pickle
from oct2py import Oct2Py
from oct2py import octave
import grow
import matplotlib.pylab as plt
sparse = True
directed = True
randomgrowth=False
wholegrowth=False
growthfactor= 3000
num_measurements = 10
verbose = True
force_connected = True
plot = False
draw = True
drawgraph = 'moral'
plotx = 'nodegrowth'
ploty = 'maxclique'
ploty2 = 'modularity'
# initialize database
graph_db = neo4j.GraphDatabaseService()
#get the pickled dictionary of nodes and node names that is made while the csv file is loaded.
try:
nodes = open('nodes.p','r')
nodes = pickle.load(nodes)
except:
print 'Your graph is empty. Please loab a graph into the database.'
1/0
data = []
if graph_db.size < 2:
print 'Your graph is empty. Please load a graph into the database.'
1/0
#make a list of all the nodes in the database to randomly choose from, but only if force connected graph option is false.
if force_connected == False:
possiblenodes = nodes
print 'Graph will not be fully connected, use "force_connected = True" if you want it to be fully connected'
# here we figure out at what points to measure if the user wants a spare measumement or to measure every time a node is added. This speeds up big graph growths a lot!
if sparse == True:
sparsemeasurements = [6,7,8,9,growthfactor]
measurements = np.logspace(1, (int(len(str(growthfactor))))-1, num_measurements)
for x in measurements:
sparsemeasurements.append(int(x))
else:
sparsemeasurements = range(2,growthfactor)
# this will actually only work for directed graph because we are moralizing, but I want to leave the option for later. Perhaps I can just skip moralization for undirected graph.
if directed:
graph = nx.DiGraph()
else:
graph = nx.graph()
# this will grow the whole graph. Takes a while for big ones!
if wholegrowth == True:
growthfactor = len(nodes)
# pick an initial node to start from
initial_node = random.choice(nodes.keys()) #randomly get a node to add from dictionary
if verbose:
print 'Starting from ' + initial_node
used = 0
#measure the nodegrowth
nodegrowth = graph.size()
while nodegrowth < growthfactor: # make sure we aren't above how many nodes we want to measure in the graph
if force_connected == True:
if nodegrowth > 1:
possiblenodes = graph.nodes() # get all nodes in graph.
fromnode = random.choice(possiblenodes) #pick random node in graph to grow from.
if verbose:
print 'Using ' + str(fromnode) + ' to find new node'
else: #this is because you can't do random from one node at the start.
fromnode = initial_node
if verbose:
print 'using initial node'
used = used+1
if used > 5:
1/0
fromnode = nodes[fromnode] #get DB version of the node.
#get all relationpships in graph DB for that node
new_node_rels = list(graph_db.match(end_node = fromnode, bidirectional=True))
new_rel = random.choice(new_node_rels)
# is the new node a parentt or child of the node we are growing from?
if new_rel.end_node == fromnode:
new_node = new_rel.start_node
if new_rel.start_node == fromnode:
new_node = new_rel.end_node
assert new_node != fromnode
if force_connected == False:
new_node = random.choice(possiblenodes.values())
print 'adding' + str(new_node)
# go through the list of edges that have the new node as a part of it, and only add the edge if they are between the new node and a node in the graph already.
rels = list(graph_db.match(start_node=new_node)) #query graph for edges from that node
for edge in rels:
newnodename = edge.start_node.get_properties()
newnodename = newnodename.get('name')
newnodename = newnodename.encode()
endnodename = edge.end_node.get_properties()
endnodename = endnodename.get('name')
endnodename = endnodename.encode()
if newnodename not in graph: #check to see if new node is in graph
graph.add_node(newnodename) # add if not
if verbose:
print 'added ' + str(newnodename)
if endnodename in graph: #check to see if end node is in graph
graph.add_edge(newnodename, endnodename) #add it if it is
if verbose:
print 'connected ' + newnodename +' to '+ endnodename
rels = list(graph_db.match(end_node=new_node)) #query graph for edges to that node
for edge in rels:
newnodename = edge.end_node.get_properties()
newnodename = newnodename.get('name')
newnodename = newnodename.encode()
startnodename = edge.start_node.get_properties()
startnodename = startnodename.get('name')
startnodename = startnodename.encode()
if newnodename not in graph: #check to see if new node is in graph
graph.add_node(newnodename) # add if not
if verbose:
print 'added ' + str(newnodename)
if startnodename in graph: #check to see if end node is in graph
graph.add_edge(startnodename, newnodename) #add it if it is
if verbose:
print 'connected ' + startnodename +' to '+ newnodename
nodegrowth = len(graph.nodes())
if verbose:
print 'Graph has ' + str(nodegrowth) + ' nodes.'
edgegrowth = len(graph.edges())
if verbose:
print 'Graph has ' + str(edgegrowth) + ' edges.'
if nodegrowth in sparsemeasurements:
start_time = time()
try:
modgraph = nx.Graph(graph) #make it into a networkx graph
partition = community.best_partition(modgraph) #find the partition with maximal modularity
modularity = community.modularity(partition,modgraph) #calculate modularity with that partition
except:
modularity = 0.0
moral = nx.to_numpy_matrix(graph)
moralized, moral_edges = octave.moralize(moral)
ismoral = False
tries = 0
while ismoral == False and tries < 5: #sometimes oct2py takes too long to return I think
try:
moralized, moral_edges = octave.moralize(moral)
ismoral = True
except:
ismoral = False
print 'Octave Error, trying again!'
tries = tries + 1
order = range(len(moralized)+1)
order = order[1:]
triangulated, cliques, fill_ins = octave.triangulate(moralized,order)
istriangulated = False
tries = 0
while istriangulated == False and tries < 5: #sometimes oct2py takes too long to return I think
try:
triangulated, cliques, fill_ins = octave.triangulate(moralized,order)
istriangulated = True
tries = 5
except:
istriangulated = False
print 'Octave Error, trying again!'
tries = tries + 1
cliquesizes = []
for x in cliques:
size = len(x)
cliquesizes.append(size)
maxclique = max(cliquesizes)
avgclique = np.mean(cliquesizes)
end_time = time()
run_time = end_time - start_time
data.append([nodegrowth, len(graph.edges()), modularity, maxclique,avgclique,run_time])
sparsemeasurements.remove(nodegrowth) #so we don't calculate clique size more than once!
if verbose:
print 'took ' + str(run_time) + ' to run last computation'
print 'Growing graph with ' + str(nodegrowth) + ' nodes, ' + str(modularity) + ' modularity, max clique of ' + str(maxclique) + ', ' + str(len(graph.edges())) + ' edges.'
if plot == True:
df = pd.DataFrame(data, columns= ('nodegrowth','edgegrowth', 'modularity','maxclique','avgclique','run_time'))
plt.close()
fig = plt.figure(figsize=(24,16))
ax = fig.add_subplot(1,1,1)
ax2 = ax.twinx()
y1 = df[ploty]
y2 = df[ploty2]
x = df[plotx]
ax.set_xlabel('%s in Graph' %(plotx),fontsize=20)
line1, = ax.plot(x, y1, label = ploty)
line2, = ax2.plot(x, y2, label = ploty2, color = 'red')
ax.set_ylabel(ploty,fontsize=20)
ax2.set_ylabel(ploty2, fontsize=20)
plt.suptitle('Graph Growth', fontsize=30)
plt.legend((line1,line2),(ploty , ploty2), loc='upper center', frameon=False, fontsize=20)
plt.show()
if draw == True:
if drawgraph == 'triangulated':
G = nx.from_numpy_matrix(triangulated)
elif drawgraph == 'moral':
G = nx.from_numpy_matrix(moralized)
elif drawgraph == 'directed':
G = graph()
# position is stored as node attribute data for random_geometric_graph
plt.close()
pos = nx.random_layout(G)
# find node near center (0.5,0.5)
dmin=1
ncenter=0
for n in pos:
x,y=pos[n]
d=(x-0.5)**2+(y-0.5)**2
if d<dmin:
ncenter=n
dmin=d
# color by path length from node near center
p=nx.single_source_shortest_path_length(G,ncenter)
plt.figure(figsize=(15,15))
nx.draw_networkx_edges(G,pos,nodelist=[ncenter],alpha=0.2)
nx.draw_networkx_nodes(G,pos,nodelist=p.keys(),
node_size=20,
node_color=p.values(),
cmap=plt.cm.Reds_r)
plt.xlim(-0.05,1.05)
plt.ylim(-0.05,1.05)
plt.axis('off')
plt.show()
df = pd.DataFrame(data, columns= ('nodegrowth','edgegrowth', 'modularity','maxclique','avgclique','run_time'))