Skip to content

Commit acce13c

Browse files
siva1098cclauss
authored andcommitted
Add graphs/frequent_pattern_graph_miner.py (TheAlgorithms#1866)
* Add files via upload * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss <[email protected]> * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Update frequent_pattern_graph_miner.py * Whitespace changes * Format with psf/black Co-authored-by: Christian Clauss <[email protected]>
1 parent 8dc00d3 commit acce13c

File tree

1 file changed

+232
-0
lines changed

1 file changed

+232
-0
lines changed

Diff for: graphs/frequent_pattern_graph_miner.py

+232
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
"""
2+
FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs
3+
4+
A novel Frequent Pattern Graph Mining algorithm, FP-GraphMiner, that compactly
5+
represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph).
6+
This graph can be used to efficiently mine frequent subgraphs including maximal
7+
frequent subgraphs and maximum common subgraphs.
8+
9+
URL: https://www.researchgate.net/publication/235255851
10+
"""
11+
# fmt: off
12+
edge_array = [
13+
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', 'cd-e2', 'ce-e4',
14+
'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3'],
15+
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'cd-e2', 'de-e1', 'df-e8',
16+
'ef-e3', 'eg-e2', 'fg-e6'],
17+
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'de-e1', 'df-e8', 'dg-e5', 'ef-e3', 'eg-e2',
18+
'eh-e12', 'fg-e6', 'fh-e10', 'gh-e6'],
19+
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'bh-e12', 'cd-e2', 'df-e8', 'dh-e10'],
20+
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'cd-e2', 'ce-e4', 'de-e1', 'df-e8',
21+
'dg-e5', 'ef-e3', 'eg-e2', 'fg-e6']
22+
]
23+
# fmt: on
24+
25+
26+
def get_distinct_edge(edge_array):
27+
"""
28+
Return Distinct edges from edge array of multiple graphs
29+
>>> sorted(get_distinct_edge(edge_array))
30+
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
31+
"""
32+
distinct_edge = set()
33+
for row in edge_array:
34+
for item in row:
35+
distinct_edge.add(item[0])
36+
return list(distinct_edge)
37+
38+
39+
def get_bitcode(edge_array, distinct_edge):
40+
"""
41+
Return bitcode of distinct_edge
42+
"""
43+
bitcode = ["0"] * len(edge_array)
44+
for i, row in enumerate(edge_array):
45+
for item in row:
46+
if distinct_edge in item[0]:
47+
bitcode[i] = "1"
48+
break
49+
return "".join(bitcode)
50+
51+
52+
def get_frequency_table(edge_array):
53+
"""
54+
Returns Frequency Table
55+
"""
56+
distinct_edge = get_distinct_edge(edge_array)
57+
frequency_table = dict()
58+
59+
for item in distinct_edge:
60+
bit = get_bitcode(edge_array, item)
61+
# print('bit',bit)
62+
# bt=''.join(bit)
63+
s = bit.count("1")
64+
frequency_table[item] = [s, bit]
65+
# Store [Distinct edge, WT(Bitcode), Bitcode] in descending order
66+
sorted_frequency_table = [
67+
[k, v[0], v[1]]
68+
for k, v in sorted(frequency_table.items(), key=lambda v: v[1][0], reverse=True)
69+
]
70+
return sorted_frequency_table
71+
72+
73+
def get_nodes(frequency_table):
74+
"""
75+
Returns nodes
76+
format nodes={bitcode:edges that represent the bitcode}
77+
>>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'],
78+
... ['bd', 5, '11111'], ['bc', 5, '11111']])
79+
{'11111': ['ab', 'ac', 'df', 'bd', 'bc']}
80+
"""
81+
nodes = {}
82+
for i, item in enumerate(frequency_table):
83+
nodes.setdefault(item[2], []).append(item[0])
84+
return nodes
85+
86+
87+
def get_cluster(nodes):
88+
"""
89+
Returns cluster
90+
format cluster:{WT(bitcode):nodes with same WT}
91+
"""
92+
cluster = {}
93+
for key, value in nodes.items():
94+
cluster.setdefault(key.count("1"), {})[key] = value
95+
return cluster
96+
97+
98+
def get_support(cluster):
99+
"""
100+
Returns support
101+
>>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']},
102+
... 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']},
103+
... 3: {'11001': ['ad'], '10101': ['dg']},
104+
... 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'],
105+
... '10001': ['ce']},
106+
... 1: {'00100': ['fh', 'eh'], '10000': ['hi']}})
107+
[100.0, 80.0, 60.0, 40.0, 20.0]
108+
"""
109+
return [i * 100 / len(cluster) for i in cluster]
110+
111+
112+
def print_all() -> None:
113+
print("\nNodes\n")
114+
for key, value in nodes.items():
115+
print(key, value)
116+
print("\nSupport\n")
117+
print(support)
118+
print("\n Cluster \n")
119+
for key, value in sorted(cluster.items(), reverse=True):
120+
print(key, value)
121+
print("\n Graph\n")
122+
for key, value in graph.items():
123+
print(key, value)
124+
print("\n Edge List of Frequent subgraphs \n")
125+
for edge_list in freq_subgraph_edge_list:
126+
print(edge_list)
127+
128+
129+
def create_edge(nodes, graph, cluster, c1):
130+
"""
131+
create edge between the nodes
132+
"""
133+
for i in cluster[c1].keys():
134+
count = 0
135+
c2 = c1 + 1
136+
while c2 < max(cluster.keys()):
137+
for j in cluster[c2].keys():
138+
"""
139+
creates edge only if the condition satisfies
140+
"""
141+
if int(i, 2) & int(j, 2) == int(i, 2):
142+
if tuple(nodes[i]) in graph:
143+
graph[tuple(nodes[i])].append(nodes[j])
144+
else:
145+
graph[tuple(nodes[i])] = [nodes[j]]
146+
count += 1
147+
if count == 0:
148+
c2 = c2 + 1
149+
else:
150+
break
151+
152+
153+
def construct_graph(cluster, nodes):
154+
X = cluster[max(cluster.keys())]
155+
cluster[max(cluster.keys()) + 1] = "Header"
156+
graph = {}
157+
for i in X:
158+
if tuple(["Header"]) in graph:
159+
graph[tuple(["Header"])].append(X[i])
160+
else:
161+
graph[tuple(["Header"])] = [X[i]]
162+
for i in X:
163+
graph[tuple(X[i])] = [["Header"]]
164+
i = 1
165+
while i < max(cluster) - 1:
166+
create_edge(nodes, graph, cluster, i)
167+
i = i + 1
168+
return graph
169+
170+
171+
def myDFS(graph, start, end, path=[]):
172+
"""
173+
find different DFS walk from given node to Header node
174+
"""
175+
path = path + [start]
176+
if start == end:
177+
paths.append(path)
178+
for node in graph[start]:
179+
if tuple(node) not in path:
180+
myDFS(graph, tuple(node), end, path)
181+
182+
183+
def find_freq_subgraph_given_support(s, cluster, graph):
184+
"""
185+
find edges of multiple frequent subgraphs
186+
"""
187+
k = int(s / 100 * (len(cluster) - 1))
188+
for i in cluster[k].keys():
189+
myDFS(graph, tuple(cluster[k][i]), tuple(["Header"]))
190+
191+
192+
def freq_subgraphs_edge_list(paths):
193+
"""
194+
returns Edge list for frequent subgraphs
195+
"""
196+
freq_sub_EL = []
197+
for edges in paths:
198+
EL = []
199+
for j in range(len(edges) - 1):
200+
temp = list(edges[j])
201+
for e in temp:
202+
edge = (e[0], e[1])
203+
EL.append(edge)
204+
freq_sub_EL.append(EL)
205+
return freq_sub_EL
206+
207+
208+
def preprocess(edge_array):
209+
"""
210+
Preprocess the edge array
211+
>>> preprocess([['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12',
212+
... 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3',
213+
... 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3']])
214+
215+
"""
216+
for i in range(len(edge_array)):
217+
for j in range(len(edge_array[i])):
218+
t = edge_array[i][j].split("-")
219+
edge_array[i][j] = t
220+
221+
222+
if __name__ == "__main__":
223+
preprocess(edge_array)
224+
frequency_table = get_frequency_table(edge_array)
225+
nodes = get_nodes(frequency_table)
226+
cluster = get_cluster(nodes)
227+
support = get_support(cluster)
228+
graph = construct_graph(cluster, nodes)
229+
find_freq_subgraph_given_support(60, cluster, graph)
230+
paths = []
231+
freq_subgraph_edge_list = freq_subgraphs_edge_list(paths)
232+
print_all()

0 commit comments

Comments
 (0)