Skip to content

Add graphs/frequent_pattern_graph_miner.py #1866

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
May 7, 2020
Merged
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
3422b71
Add files via upload
siva1098 Apr 15, 2020
c0d38d6
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 15, 2020
d1ccba5
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 15, 2020
aadb8d1
Update frequent_pattern_graph_miner.py
siva1098 Apr 15, 2020
87d405e
Update frequent_pattern_graph_miner.py
siva1098 Apr 15, 2020
6b2ebb0
Update frequent_pattern_graph_miner.py
siva1098 Apr 15, 2020
072f971
Update frequent_pattern_graph_miner.py
siva1098 Apr 18, 2020
116877a
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 18, 2020
09f87e1
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 18, 2020
46e04f8
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 18, 2020
e4d0b79
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 18, 2020
7456e09
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 18, 2020
7321986
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 18, 2020
9fd973d
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
3fb1340
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
0a1ea2f
Update frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
32adf11
Update frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
c519794
Update frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
e38e510
Update frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
e222c53
Update frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
45b2611
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 19, 2020
937d288
Update frequent_pattern_graph_miner.py
siva1098 Apr 20, 2020
9631750
Update frequent_pattern_graph_miner.py
siva1098 Apr 20, 2020
36a0c9f
Update frequent_pattern_graph_miner.py
siva1098 Apr 20, 2020
67cc254
Update frequent_pattern_graph_miner.py
siva1098 Apr 20, 2020
0420044
Update frequent_pattern_graph_miner.py
siva1098 Apr 21, 2020
b7c1a93
Update frequent_pattern_graph_miner.py
siva1098 Apr 21, 2020
39dbbf2
Update graphs/frequent_pattern_graph_miner.py
siva1098 Apr 22, 2020
3558e91
Update frequent_pattern_graph_miner.py
siva1098 Apr 22, 2020
81f67ee
Update frequent_pattern_graph_miner.py
siva1098 Apr 24, 2020
39d2ddf
Update frequent_pattern_graph_miner.py
siva1098 Apr 25, 2020
fdb4c33
Update frequent_pattern_graph_miner.py
siva1098 Apr 25, 2020
b229de0
Update frequent_pattern_graph_miner.py
siva1098 Apr 25, 2020
860748d
Update frequent_pattern_graph_miner.py
siva1098 Apr 30, 2020
9a50cf6
Update frequent_pattern_graph_miner.py
siva1098 Apr 30, 2020
358fa99
Whitespace changes
cclauss May 4, 2020
a8ae4c8
Format with psf/black
cclauss May 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 232 additions & 0 deletions graphs/frequent_pattern_graph_miner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
"""
FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs

A novel Frequent Pattern Graph Mining algorithm, FP-GraphMiner, that compactly
represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph).
This graph can be used to efficiently mine frequent subgraphs including maximal
frequent subgraphs and maximum common subgraphs.

URL: https://www.researchgate.net/publication/235255851
"""
# fmt: off
edge_array = [
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', 'cd-e2', 'ce-e4',
'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3'],
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'cd-e2', 'de-e1', 'df-e8',
'ef-e3', 'eg-e2', 'fg-e6'],
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'de-e1', 'df-e8', 'dg-e5', 'ef-e3', 'eg-e2',
'eh-e12', 'fg-e6', 'fh-e10', 'gh-e6'],
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'bh-e12', 'cd-e2', 'df-e8', 'dh-e10'],
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'cd-e2', 'ce-e4', 'de-e1', 'df-e8',
'dg-e5', 'ef-e3', 'eg-e2', 'fg-e6']
]
# fmt: on


def get_distinct_edge(edge_array):
"""
Return Distinct edges from edge array of multiple graphs
>>> sorted(get_distinct_edge(edge_array))
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
"""
distinct_edge = set()
for row in edge_array:
for item in row:
distinct_edge.add(item[0])
return list(distinct_edge)


def get_bitcode(edge_array, distinct_edge):
"""
Return bitcode of distinct_edge
"""
bitcode = ["0"] * len(edge_array)
for i, row in enumerate(edge_array):
for item in row:
if distinct_edge in item[0]:
bitcode[i] = "1"
break
return "".join(bitcode)


def get_frequency_table(edge_array):
"""
Returns Frequency Table
"""
distinct_edge = get_distinct_edge(edge_array)
frequency_table = dict()

for item in distinct_edge:
bit = get_bitcode(edge_array, item)
# print('bit',bit)
# bt=''.join(bit)
s = bit.count("1")
frequency_table[item] = [s, bit]
# Store [Distinct edge, WT(Bitcode), Bitcode] in descending order
sorted_frequency_table = [
[k, v[0], v[1]]
for k, v in sorted(frequency_table.items(), key=lambda v: v[1][0], reverse=True)
]
return sorted_frequency_table


def get_nodes(frequency_table):
"""
Returns nodes
format nodes={bitcode:edges that represent the bitcode}
>>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'],
... ['bd', 5, '11111'], ['bc', 5, '11111']])
{'11111': ['ab', 'ac', 'df', 'bd', 'bc']}
"""
nodes = {}
for i, item in enumerate(frequency_table):
nodes.setdefault(item[2], []).append(item[0])
return nodes


def get_cluster(nodes):
"""
Returns cluster
format cluster:{WT(bitcode):nodes with same WT}
"""
cluster = {}
for key, value in nodes.items():
cluster.setdefault(key.count("1"), {})[key] = value
return cluster


def get_support(cluster):
"""
Returns support
>>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']},
... 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']},
... 3: {'11001': ['ad'], '10101': ['dg']},
... 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'],
... '10001': ['ce']},
... 1: {'00100': ['fh', 'eh'], '10000': ['hi']}})
[100.0, 80.0, 60.0, 40.0, 20.0]
"""
return [i * 100 / len(cluster) for i in cluster]


def print_all() -> None:
print("\nNodes\n")
for key, value in nodes.items():
print(key, value)
print("\nSupport\n")
print(support)
print("\n Cluster \n")
for key, value in sorted(cluster.items(), reverse=True):
print(key, value)
print("\n Graph\n")
for key, value in graph.items():
print(key, value)
print("\n Edge List of Frequent subgraphs \n")
for edge_list in freq_subgraph_edge_list:
print(edge_list)


def create_edge(nodes, graph, cluster, c1):
"""
create edge between the nodes
"""
for i in cluster[c1].keys():
count = 0
c2 = c1 + 1
while c2 < max(cluster.keys()):
for j in cluster[c2].keys():
"""
creates edge only if the condition satisfies
"""
if int(i, 2) & int(j, 2) == int(i, 2):
if tuple(nodes[i]) in graph:
graph[tuple(nodes[i])].append(nodes[j])
else:
graph[tuple(nodes[i])] = [nodes[j]]
count += 1
if count == 0:
c2 = c2 + 1
else:
break


def construct_graph(cluster, nodes):
X = cluster[max(cluster.keys())]
cluster[max(cluster.keys()) + 1] = "Header"
graph = {}
for i in X:
if tuple(["Header"]) in graph:
graph[tuple(["Header"])].append(X[i])
else:
graph[tuple(["Header"])] = [X[i]]
for i in X:
graph[tuple(X[i])] = [["Header"]]
i = 1
while i < max(cluster) - 1:
create_edge(nodes, graph, cluster, i)
i = i + 1
return graph


def myDFS(graph, start, end, path=[]):
"""
find different DFS walk from given node to Header node
"""
path = path + [start]
if start == end:
paths.append(path)
for node in graph[start]:
if tuple(node) not in path:
myDFS(graph, tuple(node), end, path)


def find_freq_subgraph_given_support(s, cluster, graph):
"""
find edges of multiple frequent subgraphs
"""
k = int(s / 100 * (len(cluster) - 1))
for i in cluster[k].keys():
myDFS(graph, tuple(cluster[k][i]), tuple(["Header"]))


def freq_subgraphs_edge_list(paths):
"""
returns Edge list for frequent subgraphs
"""
freq_sub_EL = []
for edges in paths:
EL = []
for j in range(len(edges) - 1):
temp = list(edges[j])
for e in temp:
edge = (e[0], e[1])
EL.append(edge)
freq_sub_EL.append(EL)
return freq_sub_EL


def preprocess(edge_array):
"""
Preprocess the edge array
>>> preprocess([['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12',
... 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3',
... 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3']])

"""
for i in range(len(edge_array)):
for j in range(len(edge_array[i])):
t = edge_array[i][j].split("-")
edge_array[i][j] = t


if __name__ == "__main__":
preprocess(edge_array)
frequency_table = get_frequency_table(edge_array)
nodes = get_nodes(frequency_table)
cluster = get_cluster(nodes)
support = get_support(cluster)
graph = construct_graph(cluster, nodes)
find_freq_subgraph_given_support(60, cluster, graph)
paths = []
freq_subgraph_edge_list = freq_subgraphs_edge_list(paths)
print_all()