From 3422b71a0fa19581fcd782dfec570597fbd32e44 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 15 Apr 2020 12:25:37 +0530 Subject: [PATCH 01/37] Add files via upload --- graphs/frequent_pattern_graph_miner.py | 183 +++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 graphs/frequent_pattern_graph_miner.py diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py new file mode 100644 index 000000000000..6428c038ea38 --- /dev/null +++ b/graphs/frequent_pattern_graph_miner.py @@ -0,0 +1,183 @@ +''' +FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs + +A novel Frequent Pattern Graph Mining algorithm, +FP-GraphMiner, that compactly represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph). +This graph can be used to efficiently mine frequent subgraphs including maximal frequent subgraphs and maximum common subgraphs. + +URL:https://www.researchgate.net/publication/235255851_FP-GraphMiner_-_A_Fast_Frequent_Pattern_Mining_Algorithm_for_Network_Graphs + +''' +def get_DE(EA): + ''' + Return Distinct edges from edge array of multiple graphs + ''' + DE=set() + for i in range(len(EA)): + for j in range(len(EA[i])): + DE.add(EA[i][j][0]) + de=list(DE) + return(de) + +def get_bitcode(EA,DE): + ''' + Return bitcode of DE + ''' + bitcode=['0' for i in range(len(EA))] + #bitcode="0" * len(EA) + for i in range(len(EA)): + for j in range(len(EA[i])): + if DE in EA[i][j][0]: + bitcode[i]='1' + break + return bitcode + +def get_FT(EA): + ''' + Returns FT,cluster,nodes,support + ''' + DE=get_DE(EA) + FT=dict() + for i in range(len(DE)): + bit=get_bitcode(EA,DE[i]) + bt=''.join(bit) + #print(bt) + s=bt.count('1') + FT[DE[i]]=[s,bt] + ''' + Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order + ''' + Sorted_FT=[[k,v[0],v[1]] for k,v in sorted(FT.items(),key=lambda v:v[1][0],reverse=True)] + ''' + format cluster:{WT(bitcode):nodes with same WT} + ''' + cluster={} + ''' + format nodes={bitcode:edges that represent the bitcode} + ''' + nodes={} + support=[] + for i in range(len(Sorted_FT)): + nodes.setdefault(Sorted_FT[i][2],[]).append(Sorted_FT[i][0]) + for i in nodes.keys(): + cluster.setdefault(i.count('1'),{})[i]=nodes[i] + for i in cluster.keys(): + support.append(i*100/len(cluster.keys())) + + return Sorted_FT,cluster,nodes,support + +def print_all(): + print("\nNodes\n") + for i in nodes.keys(): + print(i,nodes[i]) + print("\nSupport\n") + print(support) + '''print("\n Edge List\n") + for i in EL: + print(i)''' + print("\n Cluster \n") + for i in sorted(cluster.keys(),reverse=True): + print(i,cluster[i]) + print("\n Graph\n") + for i in G.keys(): + print(i,G[i]) + print("\n Edge List of Frequent subgraphs \n") + for i in freq_sub_EL: + print(i) + +def create_edge(nodes,G,cluster,c1): + ''' + create edge between the nodes + ''' + for i in cluster[c1].keys(): + count=0 + c2=c1+1 + while c2 < max(cluster.keys()): + for j in cluster[c2].keys(): + ''' + creates edge only if the condition satisfies + ''' + if(int(i,2) & int(j,2) == int(i,2)): + if tuple(nodes[i]) in G: + G[tuple(nodes[i])].append(nodes[j]) + else: + G[tuple(nodes[i])]=[nodes[j]] + count+=1 + if(count==0): + c2=c2+1 + else: + break + +def construct_graph(cluster,nodes): + X=cluster[max(cluster.keys())] + cluster[max(cluster.keys())+1]='Header' + G={} + for i in X.keys(): + if tuple(['Header']) in G: + G[tuple(['Header'])].append(X[i]) + else: + G[tuple(['Header'])]=[X[i]] + for i in X.keys(): + G[tuple(X[i])]=[['Header']] + i=1 + while i < max(cluster.keys())-1: + create_edge(nodes,G,cluster,i) + i=i+1 + + return G + +def myDFS(graph,start,end,path=[]): + ''' + find different DFS walk from given node to Header node + ''' + path=path+[start] + if start==end or ''.join(list(start))== end: + paths.append(path) + for node in graph[start]: + if tuple(node) not in path: + myDFS(graph,tuple(node),end,path) + +def find_freq_subgraph_given_support(s,cluster,G): + ''' + find edges of multiple frequent subgraphs + ''' + k=int(s/100*(len(cluster)-1)) + freq_subgraphs=[] + for i in cluster[k].keys(): + myDFS(G,tuple(cluster[k][i]),tuple(['Header'])) + +def freq_subgraphs_EL(paths): + ''' + returns Edge list for frequent subgraphs + ''' + freq_sub_EL=[] + for edges in paths: + EL=[] + for j in range(len(edges)-1): + temp=list(edges[j]) + for e in temp: + edge=(e[0],e[1]) + EL.append(edge) + freq_sub_EL.append(EL) + + return freq_sub_EL + +if __name__ == "__main__": + EA=[ + ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3'], + ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','cd-e2','de-e1','df-e8','ef-e3','eg-e2','fg-e6'], + ['ab-e1','ac-e3','bc-e4','bd-e2','de-e1','df-e8','dg-e5','ef-e3','eg-e2','eh-e12','fg-e6','fh-e10','gh-e6'], + ['ab-e1','ac-e3','bc-e4','bd-e2','bh-e12','cd-e2','df-e8','dh-e10'], + ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','cd-e2','ce-e4','de-e1','df-e8','dg-e5','ef-e3','eg-e2','fg-e6'] + ] + for i in range(len(EA)): + for j in range(len(EA[i])): + t=EA[i][j].split('-') + EA[i][j]=t + + FT,cluster,nodes,support=get_FT(EA) + G=construct_graph(cluster,nodes) + paths = [] + find_freq_subgraph_given_support(60,cluster,G) + freq_sub_EL=freq_subgraphs_EL(paths) + print_all() From c0d38d6ec92f88218d479eca8e9f5bd0d94b6e9a Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 15 Apr 2020 16:24:24 +0530 Subject: [PATCH 02/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 6428c038ea38..ee1a865b026d 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -16,8 +16,7 @@ def get_DE(EA): for i in range(len(EA)): for j in range(len(EA[i])): DE.add(EA[i][j][0]) - de=list(DE) - return(de) + return list(DE) # avoid unneeded parens and avoid creating variables that only last for one line. def get_bitcode(EA,DE): ''' From d1ccba58261f8bc3801968e308ea7d969f6861d2 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 15 Apr 2020 17:29:38 +0530 Subject: [PATCH 03/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index ee1a865b026d..c952a0d1eb2d 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -58,8 +58,9 @@ def get_FT(EA): support=[] for i in range(len(Sorted_FT)): nodes.setdefault(Sorted_FT[i][2],[]).append(Sorted_FT[i][0]) - for i in nodes.keys(): - cluster.setdefault(i.count('1'),{})[i]=nodes[i] + for key, value in nodes.items(): + cluster.setdefault(key.count('1'), {})[key] = value + for i in cluster.keys(): support.append(i*100/len(cluster.keys())) From aadb8d16118edf612558250dfe1f9e7cd4f342c0 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 15 Apr 2020 17:35:18 +0530 Subject: [PATCH 04/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 132 +++++++++++++------------ 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index c952a0d1eb2d..ed506e751c7c 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -8,45 +8,48 @@ URL:https://www.researchgate.net/publication/235255851_FP-GraphMiner_-_A_Fast_Frequent_Pattern_Mining_Algorithm_for_Network_Graphs ''' -def get_DE(EA): +def get_distinct_edge(edge_array): ''' Return Distinct edges from edge array of multiple graphs ''' - DE=set() - for i in range(len(EA)): - for j in range(len(EA[i])): - DE.add(EA[i][j][0]) - return list(DE) # avoid unneeded parens and avoid creating variables that only last for one line. + distinct_edge=set() -def get_bitcode(EA,DE): + for i in enumerate(edge_array): + for j in enumerate(i[1]): + distinct_edge.add(j[1][0]) + + return list(distinct_edge) + +def get_bitcode(edge_array,distinct_edge): ''' - Return bitcode of DE + Return bitcode of distinct_edge ''' - bitcode=['0' for i in range(len(EA))] - #bitcode="0" * len(EA) - for i in range(len(EA)): - for j in range(len(EA[i])): - if DE in EA[i][j][0]: - bitcode[i]='1' + bitcode=['0' for i in enumerate(edge_array)] + + for i in enumerate(edge_array): + for j in enumerate(i[1]): + if distinct_edge in j[1][0]: + bitcode[i[0]]='1' break + return bitcode -def get_FT(EA): +def get_frequency_table(edge_array): ''' - Returns FT,cluster,nodes,support + Returns Frequency Table,cluster,nodes,support ''' - DE=get_DE(EA) - FT=dict() - for i in range(len(DE)): - bit=get_bitcode(EA,DE[i]) + distinct_edge=get_distinct_edge(edge_array) + frequency_table=dict() + + for i in enumerate(distinct_edge): + bit=get_bitcode(edge_array,i[1]) bt=''.join(bit) - #print(bt) s=bt.count('1') - FT[DE[i]]=[s,bt] + frequency_table[i[1]]=[s,bt] ''' Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order ''' - Sorted_FT=[[k,v[0],v[1]] for k,v in sorted(FT.items(),key=lambda v:v[1][0],reverse=True)] + sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] ''' format cluster:{WT(bitcode):nodes with same WT} ''' @@ -56,36 +59,38 @@ def get_FT(EA): ''' nodes={} support=[] - for i in range(len(Sorted_FT)): - nodes.setdefault(Sorted_FT[i][2],[]).append(Sorted_FT[i][0]) - for key, value in nodes.items(): - cluster.setdefault(key.count('1'), {})[key] = value - for i in cluster.keys(): - support.append(i*100/len(cluster.keys())) + for i in enumerate(sorted_frequency_table): + nodes.setdefault(i[1][2],[]).append(i[1][0]) - return Sorted_FT,cluster,nodes,support + for key,value in nodes.items(): + cluster.setdefault(key.count('1'),{})[key]=value + + for i in cluster: + support.append(i*100/len(cluster)) + + return sorted_frequency_table,cluster,nodes,support def print_all(): print("\nNodes\n") - for i in nodes.keys(): - print(i,nodes[i]) + for key,value in nodes.items(): + print(key,value) print("\nSupport\n") print(support) '''print("\n Edge List\n") for i in EL: print(i)''' print("\n Cluster \n") - for i in sorted(cluster.keys(),reverse=True): - print(i,cluster[i]) + for key,value in sorted(cluster.items(),reverse=True): + print(key, value) print("\n Graph\n") - for i in G.keys(): - print(i,G[i]) + for key,value in graph.items(): + print(key, value) print("\n Edge List of Frequent subgraphs \n") - for i in freq_sub_EL: - print(i) - -def create_edge(nodes,G,cluster,c1): + for edge_list in freq_subgraph_edge_list: + print(edge_list) + +def create_edge(nodes,graph,cluster,c1): ''' create edge between the nodes ''' @@ -98,10 +103,10 @@ def create_edge(nodes,G,cluster,c1): creates edge only if the condition satisfies ''' if(int(i,2) & int(j,2) == int(i,2)): - if tuple(nodes[i]) in G: - G[tuple(nodes[i])].append(nodes[j]) + if tuple(nodes[i]) in graph: + graph[tuple(nodes[i])].append(nodes[j]) else: - G[tuple(nodes[i])]=[nodes[j]] + graph[tuple(nodes[i])]=[nodes[j]] count+=1 if(count==0): c2=c2+1 @@ -111,42 +116,42 @@ def create_edge(nodes,G,cluster,c1): def construct_graph(cluster,nodes): X=cluster[max(cluster.keys())] cluster[max(cluster.keys())+1]='Header' - G={} + graph={} for i in X.keys(): - if tuple(['Header']) in G: - G[tuple(['Header'])].append(X[i]) + if tuple(['Header']) in graph: + graph[tuple(['Header'])].append(X[i]) else: - G[tuple(['Header'])]=[X[i]] + graph[tuple(['Header'])]=[X[i]] for i in X.keys(): - G[tuple(X[i])]=[['Header']] + graph[tuple(X[i])]=[['Header']] i=1 while i < max(cluster.keys())-1: - create_edge(nodes,G,cluster,i) + create_edge(nodes,graph,cluster,i) i=i+1 - return G + return graph def myDFS(graph,start,end,path=[]): ''' find different DFS walk from given node to Header node ''' path=path+[start] - if start==end or ''.join(list(start))== end: + if start==end: paths.append(path) for node in graph[start]: if tuple(node) not in path: myDFS(graph,tuple(node),end,path) -def find_freq_subgraph_given_support(s,cluster,G): +def find_freq_subgraph_given_support(s,cluster,graph): ''' find edges of multiple frequent subgraphs ''' k=int(s/100*(len(cluster)-1)) freq_subgraphs=[] for i in cluster[k].keys(): - myDFS(G,tuple(cluster[k][i]),tuple(['Header'])) + myDFS(graph,tuple(cluster[k][i]),tuple(['Header'])) -def freq_subgraphs_EL(paths): +def freq_subgraphs_edge_list(paths): ''' returns Edge list for frequent subgraphs ''' @@ -163,21 +168,22 @@ def freq_subgraphs_EL(paths): return freq_sub_EL if __name__ == "__main__": - EA=[ + edge_array=[ ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3'], ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','cd-e2','de-e1','df-e8','ef-e3','eg-e2','fg-e6'], ['ab-e1','ac-e3','bc-e4','bd-e2','de-e1','df-e8','dg-e5','ef-e3','eg-e2','eh-e12','fg-e6','fh-e10','gh-e6'], ['ab-e1','ac-e3','bc-e4','bd-e2','bh-e12','cd-e2','df-e8','dh-e10'], ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','cd-e2','ce-e4','de-e1','df-e8','dg-e5','ef-e3','eg-e2','fg-e6'] ] - for i in range(len(EA)): - for j in range(len(EA[i])): - t=EA[i][j].split('-') - EA[i][j]=t + for i in range(len(edge_array)): + for j in range(len(edge_array[i])): + t=edge_array[i][j].split('-') + edge_array[i][j]=t - FT,cluster,nodes,support=get_FT(EA) - G=construct_graph(cluster,nodes) + frequency_table,cluster,nodes,support=get_frequency_table(edge_array) + graph=construct_graph(cluster,nodes) paths = [] - find_freq_subgraph_given_support(60,cluster,G) - freq_sub_EL=freq_subgraphs_EL(paths) + find_freq_subgraph_given_support(60,cluster,graph) + freq_subgraph_edge_list=freq_subgraphs_edge_list(paths) + print_all() From 87d405e950902191b74979352a11c7c3e3c92c1a Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 15 Apr 2020 18:49:53 +0530 Subject: [PATCH 05/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index ed506e751c7c..77b1c5b4745a 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -14,9 +14,9 @@ def get_distinct_edge(edge_array): ''' distinct_edge=set() - for i in enumerate(edge_array): - for j in enumerate(i[1]): - distinct_edge.add(j[1][0]) + for i,row in enumerate(edge_array): + for j,item in enumerate(row): + distinct_edge.add(item[0]) return list(distinct_edge) @@ -26,11 +26,12 @@ def get_bitcode(edge_array,distinct_edge): ''' bitcode=['0' for i in enumerate(edge_array)] - for i in enumerate(edge_array): - for j in enumerate(i[1]): - if distinct_edge in j[1][0]: - bitcode[i[0]]='1' + for i,row in enumerate(edge_array): + for j,item in enumerate(row): + if distinct_edge in item[0]: + bitcode[i]='1' break + return bitcode @@ -41,11 +42,11 @@ def get_frequency_table(edge_array): distinct_edge=get_distinct_edge(edge_array) frequency_table=dict() - for i in enumerate(distinct_edge): - bit=get_bitcode(edge_array,i[1]) + for i,item in enumerate(distinct_edge): + bit=get_bitcode(edge_array,item) bt=''.join(bit) s=bt.count('1') - frequency_table[i[1]]=[s,bt] + frequency_table[item]=[s,bt] ''' Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order ''' @@ -60,8 +61,8 @@ def get_frequency_table(edge_array): nodes={} support=[] - for i in enumerate(sorted_frequency_table): - nodes.setdefault(i[1][2],[]).append(i[1][0]) + for i,item in enumerate(sorted_frequency_table): + nodes.setdefault(item[2],[]).append(item[0]) for key,value in nodes.items(): cluster.setdefault(key.count('1'),{})[key]=value From 6b2ebb07d5250beb4931561246da5e7a43e280b9 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 15 Apr 2020 18:54:36 +0530 Subject: [PATCH 06/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 77b1c5b4745a..b76e42c08654 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -8,7 +8,9 @@ URL:https://www.researchgate.net/publication/235255851_FP-GraphMiner_-_A_Fast_Frequent_Pattern_Mining_Algorithm_for_Network_Graphs ''' -def get_distinct_edge(edge_array): +from typing import List + +def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: ''' Return Distinct edges from edge array of multiple graphs ''' From 072f971f0fed8d0805ed51813f6badf9b99b1a57 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 18 Apr 2020 11:33:38 +0530 Subject: [PATCH 07/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index b76e42c08654..18e664d84484 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -10,6 +10,14 @@ ''' from typing import List +edge_array=[ + ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3'], + ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','cd-e2','de-e1','df-e8','ef-e3','eg-e2','fg-e6'], + ['ab-e1','ac-e3','bc-e4','bd-e2','de-e1','df-e8','dg-e5','ef-e3','eg-e2','eh-e12','fg-e6','fh-e10','gh-e6'], + ['ab-e1','ac-e3','bc-e4','bd-e2','bh-e12','cd-e2','df-e8','dh-e10'], + ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','cd-e2','ce-e4','de-e1','df-e8','dg-e5','ef-e3','eg-e2','fg-e6'] + ] + def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: ''' Return Distinct edges from edge array of multiple graphs @@ -169,20 +177,15 @@ def freq_subgraphs_edge_list(paths): freq_sub_EL.append(EL) return freq_sub_EL - -if __name__ == "__main__": - edge_array=[ - ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3'], - ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','cd-e2','de-e1','df-e8','ef-e3','eg-e2','fg-e6'], - ['ab-e1','ac-e3','bc-e4','bd-e2','de-e1','df-e8','dg-e5','ef-e3','eg-e2','eh-e12','fg-e6','fh-e10','gh-e6'], - ['ab-e1','ac-e3','bc-e4','bd-e2','bh-e12','cd-e2','df-e8','dh-e10'], - ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','cd-e2','ce-e4','de-e1','df-e8','dg-e5','ef-e3','eg-e2','fg-e6'] - ] +def preprocess(edge_array: List[List[str]) -> List[List[List[str]]]: for i in range(len(edge_array)): for j in range(len(edge_array[i])): t=edge_array[i][j].split('-') edge_array[i][j]=t - + +if __name__ == "__main__": + + preprocess(edge_array) frequency_table,cluster,nodes,support=get_frequency_table(edge_array) graph=construct_graph(cluster,nodes) paths = [] From 116877aac3e33529b3107bac04e3f4946acd2f43 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 18 Apr 2020 12:03:30 +0530 Subject: [PATCH 08/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 18e664d84484..ef3cf59292f5 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -57,9 +57,7 @@ def get_frequency_table(edge_array): bt=''.join(bit) s=bt.count('1') frequency_table[item]=[s,bt] - ''' - Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order - ''' + # Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] ''' format cluster:{WT(bitcode):nodes with same WT} From 09f87e139420b5eb774ae4cf7d1a02c7bde7e9e9 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 18 Apr 2020 12:03:52 +0530 Subject: [PATCH 09/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index ef3cf59292f5..3b34a857d147 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -59,9 +59,7 @@ def get_frequency_table(edge_array): frequency_table[item]=[s,bt] # Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] - ''' - format cluster:{WT(bitcode):nodes with same WT} - ''' + # format cluster:{WT(bitcode):nodes with same WT} cluster={} ''' format nodes={bitcode:edges that represent the bitcode} From 46e04f881a2eda607bcc37be26b6f783ad7584ff Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 18 Apr 2020 12:04:04 +0530 Subject: [PATCH 10/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 3b34a857d147..6162c6356adc 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -61,9 +61,7 @@ def get_frequency_table(edge_array): sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] # format cluster:{WT(bitcode):nodes with same WT} cluster={} - ''' - format nodes={bitcode:edges that represent the bitcode} - ''' + # format nodes={bitcode:edges that represent the bitcode} nodes={} support=[] From e4d0b7930c14b5af4eeecd7d8b252461c13b76cd Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 18 Apr 2020 12:04:44 +0530 Subject: [PATCH 11/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 6162c6356adc..8e77f3cedc6f 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -82,9 +82,9 @@ def print_all(): print(key,value) print("\nSupport\n") print(support) - '''print("\n Edge List\n") - for i in EL: - print(i)''' + # print("\n Edge List\n") + # for i in EL: + # print(i) print("\n Cluster \n") for key,value in sorted(cluster.items(),reverse=True): print(key, value) From 7456e095f01a5e6c2f7a32b405eebf7e3e8ac5dc Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 18 Apr 2020 12:05:44 +0530 Subject: [PATCH 12/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 8e77f3cedc6f..493e93ff04d9 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -171,7 +171,8 @@ def freq_subgraphs_edge_list(paths): freq_sub_EL.append(EL) return freq_sub_EL -def preprocess(edge_array: List[List[str]) -> List[List[List[str]]]: +def preprocess(edge_array: List[List[str]]) -> List[List[List[str]]]: + for i in range(len(edge_array)): for j in range(len(edge_array[i])): t=edge_array[i][j].split('-') From 73219865c843dc02a5236fa7aa8d35f44ec4da3e Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 18 Apr 2020 12:30:55 +0530 Subject: [PATCH 13/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 493e93ff04d9..e5e107b961da 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -21,7 +21,12 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: ''' Return Distinct edges from edge array of multiple graphs + >>> sorted(get_distinct_edge(edge_array)) + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] + >>> sorted(get_distinct_edge(edge_array)) + ['z', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] ''' + distinct_edge=set() for i,row in enumerate(edge_array): From 9fd973d94203a2270aa70030d5d0bc34f29cc316 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 18:00:43 +0530 Subject: [PATCH 14/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index e5e107b961da..f1f2297f2391 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -29,8 +29,8 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: distinct_edge=set() - for i,row in enumerate(edge_array): - for j,item in enumerate(row): + for row in edge_array: + for item in row: distinct_edge.add(item[0]) return list(distinct_edge) From 3fb1340442843612e9c1cfba0a23f723ebe01198 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 18:01:01 +0530 Subject: [PATCH 15/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index f1f2297f2391..6bfd75905db9 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -39,7 +39,8 @@ def get_bitcode(edge_array,distinct_edge): ''' Return bitcode of distinct_edge ''' - bitcode=['0' for i in enumerate(edge_array)] + bitcode=['0' for i in range(edge_array)] + for i,row in enumerate(edge_array): for j,item in enumerate(row): From 0a1ea2f15b9a4e1d1a5ff84fe8a8adbd2b4e5f73 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 18:06:09 +0530 Subject: [PATCH 16/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 6bfd75905db9..27ec903cc947 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -35,7 +35,7 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: return list(distinct_edge) -def get_bitcode(edge_array,distinct_edge): +def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: ''' Return bitcode of distinct_edge ''' @@ -51,7 +51,7 @@ def get_bitcode(edge_array,distinct_edge): return bitcode -def get_frequency_table(edge_array): +def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: ''' Returns Frequency Table,cluster,nodes,support ''' @@ -82,7 +82,7 @@ def get_frequency_table(edge_array): return sorted_frequency_table,cluster,nodes,support -def print_all(): +def print_all() -> None: print("\nNodes\n") for key,value in nodes.items(): print(key,value) @@ -101,7 +101,7 @@ def print_all(): for edge_list in freq_subgraph_edge_list: print(edge_list) -def create_edge(nodes,graph,cluster,c1): +def create_edge(nodes: Dict[str,List[str]],graph: Dict[tuple,List[List[str]]],cluster: Dict[int,Dict[str,List[str]]],c1: int) -> None: ''' create edge between the nodes ''' @@ -124,7 +124,7 @@ def create_edge(nodes,graph,cluster,c1): else: break -def construct_graph(cluster,nodes): +def construct_graph(cluster: Dict[int,Dict[str,List[str]]],nodes: Dict[str,List[str]]) -> Dict[tuple,List[List[str]]]: X=cluster[max(cluster.keys())] cluster[max(cluster.keys())+1]='Header' graph={} @@ -142,7 +142,7 @@ def construct_graph(cluster,nodes): return graph -def myDFS(graph,start,end,path=[]): +def myDFS(graph: Dict[tuple,List[List[str]]],start: tuple,end: tuple,path=[]): ''' find different DFS walk from given node to Header node ''' @@ -153,7 +153,7 @@ def myDFS(graph,start,end,path=[]): if tuple(node) not in path: myDFS(graph,tuple(node),end,path) -def find_freq_subgraph_given_support(s,cluster,graph): +def find_freq_subgraph_given_support(s: int,cluster: Dict[int,Dict[str,List[str]]],graph: Dict[tuple,List[List[str]]]) -> None: ''' find edges of multiple frequent subgraphs ''' @@ -162,7 +162,7 @@ def find_freq_subgraph_given_support(s,cluster,graph): for i in cluster[k].keys(): myDFS(graph,tuple(cluster[k][i]),tuple(['Header'])) -def freq_subgraphs_edge_list(paths): +def freq_subgraphs_edge_list(paths: List[List[tuple]]) -> List[List[tuple]]: ''' returns Edge list for frequent subgraphs ''' From 32adf11b31e9478ac9b796d5645a85fa2da4ab84 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 18:06:47 +0530 Subject: [PATCH 17/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 27ec903cc947..0d1c416ffc67 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -9,6 +9,7 @@ ''' from typing import List +from typing import Dict edge_array=[ ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3'], From c519794a1d6e70c4fc160e02fdd18ed07d1975a8 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 18:17:12 +0530 Subject: [PATCH 18/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 0d1c416ffc67..749769450b51 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -24,8 +24,6 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: Return Distinct edges from edge array of multiple graphs >>> sorted(get_distinct_edge(edge_array)) ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] - >>> sorted(get_distinct_edge(edge_array)) - ['z', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] ''' distinct_edge=set() From e38e510f10d7bfb36b3f902c684c925edafeda3c Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 20:53:05 +0530 Subject: [PATCH 19/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 749769450b51..eb76122ae897 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -37,6 +37,10 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: ''' Return bitcode of distinct_edge + >>> get_bitcode(edge_array, get_distinct_edge(edge_array)) + ['1', '1', '0', '1', '0'] + >>> get_bitcode(edge_array, get_distinct_edge(edge_array)) + ['a', '1', '0', '1', '0'] ''' bitcode=['0' for i in range(edge_array)] From e222c53e24b092416954b8726fd4cb1ca4d02d39 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 21:01:41 +0530 Subject: [PATCH 20/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index eb76122ae897..9220b1e44e24 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -39,8 +39,6 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: Return bitcode of distinct_edge >>> get_bitcode(edge_array, get_distinct_edge(edge_array)) ['1', '1', '0', '1', '0'] - >>> get_bitcode(edge_array, get_distinct_edge(edge_array)) - ['a', '1', '0', '1', '0'] ''' bitcode=['0' for i in range(edge_array)] From 45b2611252f0ad1cbd8f87e7fbbd4213c10b1b7e Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sun, 19 Apr 2020 21:45:44 +0530 Subject: [PATCH 21/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 9220b1e44e24..57bb045addb5 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -40,7 +40,8 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: >>> get_bitcode(edge_array, get_distinct_edge(edge_array)) ['1', '1', '0', '1', '0'] ''' - bitcode=['0' for i in range(edge_array)] + bitcode=['0'] * len(edge_array) + for i,row in enumerate(edge_array): From 937d288f21dcf48aa913cae114b2a501d6a827f8 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Mon, 20 Apr 2020 09:50:43 +0530 Subject: [PATCH 22/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 57bb045addb5..70cfa25822e3 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -34,7 +34,7 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: return list(distinct_edge) -def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: +def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> List[str]: ''' Return bitcode of distinct_edge >>> get_bitcode(edge_array, get_distinct_edge(edge_array)) From 96317507fbd819b0a7b4c34dc389b0fbf38aaab0 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Mon, 20 Apr 2020 10:36:13 +0530 Subject: [PATCH 23/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 70cfa25822e3..70b6d2577c0c 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -37,7 +37,7 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> List[str]: ''' Return bitcode of distinct_edge - >>> get_bitcode(edge_array, get_distinct_edge(edge_array)) + >>> get_bitcode(edge_array, get_distinct_edge(edge_array)[0]) ['1', '1', '0', '1', '0'] ''' bitcode=['0'] * len(edge_array) From 36a0c9f3222c537362c04db982b288d0cc8807b6 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Mon, 20 Apr 2020 11:19:35 +0530 Subject: [PATCH 24/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 70b6d2577c0c..b62e725e2b25 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -38,7 +38,7 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> List[st ''' Return bitcode of distinct_edge >>> get_bitcode(edge_array, get_distinct_edge(edge_array)[0]) - ['1', '1', '0', '1', '0'] + ['1', '1', '1', '1', '1'] ''' bitcode=['0'] * len(edge_array) From 67cc2541c67adcfbd017706ee1201217f91d687b Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Mon, 20 Apr 2020 11:36:14 +0530 Subject: [PATCH 25/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index b62e725e2b25..6661cbaccc66 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -38,7 +38,7 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> List[st ''' Return bitcode of distinct_edge >>> get_bitcode(edge_array, get_distinct_edge(edge_array)[0]) - ['1', '1', '1', '1', '1'] + ['1', '0', '1', '0', '0'] ''' bitcode=['0'] * len(edge_array) From 0420044892b185ec957bcc48ec511906ef15aa5f Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Tue, 21 Apr 2020 14:37:19 +0530 Subject: [PATCH 26/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 6661cbaccc66..88ac899ab5ce 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -34,11 +34,11 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: return list(distinct_edge) -def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> List[str]: +def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: ''' Return bitcode of distinct_edge >>> get_bitcode(edge_array, get_distinct_edge(edge_array)[0]) - ['1', '0', '1', '0', '0'] + '11111' ''' bitcode=['0'] * len(edge_array) @@ -51,7 +51,7 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> List[st break - return bitcode + return ''.join(bitcode) def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: ''' @@ -62,9 +62,9 @@ def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: for i,item in enumerate(distinct_edge): bit=get_bitcode(edge_array,item) - bt=''.join(bit) - s=bt.count('1') - frequency_table[item]=[s,bt] + #bt=''.join(bit) + s=bit.count('1') + frequency_table[item]=[s, bit] # Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] # format cluster:{WT(bitcode):nodes with same WT} From b7c1a93a8ba2802b311796970d291454a5d21805 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Tue, 21 Apr 2020 15:02:35 +0530 Subject: [PATCH 27/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 88ac899ab5ce..59ae1d96dcd3 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -38,7 +38,7 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: ''' Return bitcode of distinct_edge >>> get_bitcode(edge_array, get_distinct_edge(edge_array)[0]) - '11111' + '11101' ''' bitcode=['0'] * len(edge_array) From 39dbbf2eb8d3eddd8d139f45c0ddd5a63e675178 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 22 Apr 2020 16:49:38 +0530 Subject: [PATCH 28/37] Update graphs/frequent_pattern_graph_miner.py Co-Authored-By: Christian Clauss --- graphs/frequent_pattern_graph_miner.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 59ae1d96dcd3..afd28804b3a1 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -41,16 +41,11 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: '11101' ''' bitcode=['0'] * len(edge_array) - - - - for i,row in enumerate(edge_array): - for j,item in enumerate(row): + for i, row in enumerate(edge_array): + for item in row: if distinct_edge in item[0]: bitcode[i]='1' break - - return ''.join(bitcode) def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: From 3558e91074186c6a3854ecd674471124e13c3794 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Wed, 22 Apr 2020 17:08:48 +0530 Subject: [PATCH 29/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 60 +++++++++++++++++++------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index afd28804b3a1..3440b6075d45 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -50,44 +50,67 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: ''' - Returns Frequency Table,cluster,nodes,support + Returns Frequency Table ''' distinct_edge=get_distinct_edge(edge_array) frequency_table=dict() - for i,item in enumerate(distinct_edge): + + for item in distinct_edge: bit=get_bitcode(edge_array,item) + #print('bit',bit) #bt=''.join(bit) s=bit.count('1') - frequency_table[item]=[s, bit] - # Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order + frequency_table[item]=[s,bit] + ''' + Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order + ''' sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] - # format cluster:{WT(bitcode):nodes with same WT} - cluster={} - # format nodes={bitcode:edges that represent the bitcode} + + return sorted_frequency_table + +def get_nodes(frequency_table: List[List[str]]) -> Dict[str,List[str]]: + ''' + Returns nodes + format nodes={bitcode:edges that represent the bitcode} + ''' + nodes={} - support=[] - for i,item in enumerate(sorted_frequency_table): + for i,item in enumerate(frequency_table): nodes.setdefault(item[2],[]).append(item[0]) - + + return nodes + +def get_cluster(nodes: Dict[str,List[str]]) -> Dict[int,Dict[str,List[str]]]: + ''' + Returns cluster + format cluster:{WT(bitcode):nodes with same WT} + ''' + + cluster={} for key,value in nodes.items(): cluster.setdefault(key.count('1'),{})[key]=value + return cluster + +def get_support(cluster :Dict[int,Dict[str,List[str]]])-> List[float]: + ''' + Returns support + format cluster:{WT(bitcode):nodes with same WT} + ''' + + support=[] for i in cluster: support.append(i*100/len(cluster)) - return sorted_frequency_table,cluster,nodes,support - + return support def print_all() -> None: print("\nNodes\n") for key,value in nodes.items(): print(key,value) print("\nSupport\n") print(support) - # print("\n Edge List\n") - # for i in EL: - # print(i) print("\n Cluster \n") for key,value in sorted(cluster.items(),reverse=True): print(key, value) @@ -184,8 +207,13 @@ def preprocess(edge_array: List[List[str]]) -> List[List[List[str]]]: if __name__ == "__main__": preprocess(edge_array) - frequency_table,cluster,nodes,support=get_frequency_table(edge_array) + + frequency_table=get_frequency_table(edge_array) + nodes=get_nodes(frequency_table) + cluster=get_cluster(nodes) + support=get_support(cluster) graph=construct_graph(cluster,nodes) + paths = [] find_freq_subgraph_given_support(60,cluster,graph) freq_subgraph_edge_list=freq_subgraphs_edge_list(paths) From 81f67ee1544f3439329899a22271b68c92c2dc0b Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Fri, 24 Apr 2020 18:36:30 +0530 Subject: [PATCH 30/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 3440b6075d45..38906d95e462 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -178,7 +178,7 @@ def find_freq_subgraph_given_support(s: int,cluster: Dict[int,Dict[str,List[str] find edges of multiple frequent subgraphs ''' k=int(s/100*(len(cluster)-1)) - freq_subgraphs=[] + for i in cluster[k].keys(): myDFS(graph,tuple(cluster[k][i]),tuple(['Header'])) From 39d2ddf5163caa67d79fb3641114467b35529f90 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 25 Apr 2020 12:38:04 +0530 Subject: [PATCH 31/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 38906d95e462..f692cd92ea27 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -73,6 +73,8 @@ def get_nodes(frequency_table: List[List[str]]) -> Dict[str,List[str]]: ''' Returns nodes format nodes={bitcode:edges that represent the bitcode} + >>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'], ['bd', 5, '11111'], ['bc', 5, '11111']]) + {'11111': ['ab', 'ac', 'df', 'bd', 'bc']} ''' nodes={} @@ -97,7 +99,8 @@ def get_cluster(nodes: Dict[str,List[str]]) -> Dict[int,Dict[str,List[str]]]: def get_support(cluster :Dict[int,Dict[str,List[str]]])-> List[float]: ''' Returns support - format cluster:{WT(bitcode):nodes with same WT} + >>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']}, 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']}, 3: {'11001': ['ad'], '10101': ['dg']}, 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'], '10001': ['ce']}, 1: {'00100': ['fh', 'eh'], '10000': ['hi']}}) + [20.0, 40.0, 60.0, 80.0, 100.0] ''' support=[] @@ -198,7 +201,11 @@ def freq_subgraphs_edge_list(paths: List[List[tuple]]) -> List[List[tuple]]: return freq_sub_EL def preprocess(edge_array: List[List[str]]) -> List[List[List[str]]]: - + ''' + Preprocess the edge array + >>> preprocess([['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3']]) + + ''' for i in range(len(edge_array)): for j in range(len(edge_array[i])): t=edge_array[i][j].split('-') From fdb4c33166bbb2450de3d356a5c98682feb8e139 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 25 Apr 2020 14:00:12 +0530 Subject: [PATCH 32/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index f692cd92ea27..0d8d9a3898d4 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -100,7 +100,7 @@ def get_support(cluster :Dict[int,Dict[str,List[str]]])-> List[float]: ''' Returns support >>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']}, 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']}, 3: {'11001': ['ad'], '10101': ['dg']}, 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'], '10001': ['ce']}, 1: {'00100': ['fh', 'eh'], '10000': ['hi']}}) - [20.0, 40.0, 60.0, 80.0, 100.0] + [100.0, 80.0, 60.0, 40.0, 20.0] ''' support=[] From b229de02e1939bac5a012fffe9e575334fe3df63 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Sat, 25 Apr 2020 14:19:35 +0530 Subject: [PATCH 33/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 0d8d9a3898d4..2889526f48b5 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -99,7 +99,8 @@ def get_cluster(nodes: Dict[str,List[str]]) -> Dict[int,Dict[str,List[str]]]: def get_support(cluster :Dict[int,Dict[str,List[str]]])-> List[float]: ''' Returns support - >>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']}, 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']}, 3: {'11001': ['ad'], '10101': ['dg']}, 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'], '10001': ['ce']}, 1: {'00100': ['fh', 'eh'], '10000': ['hi']}}) + >>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']}, 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']}, 3: {'11001': ['ad'], '10101': ['dg']}, \ + 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'], '10001': ['ce']}, 1: {'00100': ['fh', 'eh'], '10000': ['hi']}}) [100.0, 80.0, 60.0, 40.0, 20.0] ''' From 860748d4c14acff657d08f1d64952f2bb9d8bb84 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Thu, 30 Apr 2020 10:55:20 +0530 Subject: [PATCH 34/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 2889526f48b5..945c99409a66 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -38,7 +38,7 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: ''' Return bitcode of distinct_edge >>> get_bitcode(edge_array, get_distinct_edge(edge_array)[0]) - '11101' + '11111' ''' bitcode=['0'] * len(edge_array) for i, row in enumerate(edge_array): From 9a50cf6fad40309305a912df3ae718f456161b24 Mon Sep 17 00:00:00 2001 From: siva1098 <32545976+siva1098@users.noreply.github.com> Date: Thu, 30 Apr 2020 11:19:39 +0530 Subject: [PATCH 35/37] Update frequent_pattern_graph_miner.py --- graphs/frequent_pattern_graph_miner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 945c99409a66..30144a1439dc 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -37,8 +37,6 @@ def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: ''' Return bitcode of distinct_edge - >>> get_bitcode(edge_array, get_distinct_edge(edge_array)[0]) - '11111' ''' bitcode=['0'] * len(edge_array) for i, row in enumerate(edge_array): From 358fa99d560a8497baff95a52e2be7b6109415e8 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Mon, 4 May 2020 15:31:28 +0200 Subject: [PATCH 36/37] Whitespace changes --- graphs/frequent_pattern_graph_miner.py | 43 +++++++++++++------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index 30144a1439dc..b61a792a948a 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -8,8 +8,7 @@ URL:https://www.researchgate.net/publication/235255851_FP-GraphMiner_-_A_Fast_Frequent_Pattern_Mining_Algorithm_for_Network_Graphs ''' -from typing import List -from typing import Dict +from typing import Dict, List edge_array=[ ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3'], @@ -19,21 +18,20 @@ ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','cd-e2','ce-e4','de-e1','df-e8','dg-e5','ef-e3','eg-e2','fg-e6'] ] + def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: ''' Return Distinct edges from edge array of multiple graphs >>> sorted(get_distinct_edge(edge_array)) ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] ''' - distinct_edge=set() - for row in edge_array: for item in row: distinct_edge.add(item[0]) - return list(distinct_edge) + def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: ''' Return bitcode of distinct_edge @@ -46,13 +44,13 @@ def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: break return ''.join(bitcode) + def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: ''' Returns Frequency Table ''' distinct_edge=get_distinct_edge(edge_array) frequency_table=dict() - for item in distinct_edge: bit=get_bitcode(edge_array,item) @@ -63,10 +61,10 @@ def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: ''' Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order ''' - sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] - + sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] return sorted_frequency_table + def get_nodes(frequency_table: List[List[str]]) -> Dict[str,List[str]]: ''' Returns nodes @@ -74,14 +72,12 @@ def get_nodes(frequency_table: List[List[str]]) -> Dict[str,List[str]]: >>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'], ['bd', 5, '11111'], ['bc', 5, '11111']]) {'11111': ['ab', 'ac', 'df', 'bd', 'bc']} ''' - nodes={} - for i,item in enumerate(frequency_table): nodes.setdefault(item[2],[]).append(item[0]) - return nodes + def get_cluster(nodes: Dict[str,List[str]]) -> Dict[int,Dict[str,List[str]]]: ''' Returns cluster @@ -122,7 +118,8 @@ def print_all() -> None: print("\n Edge List of Frequent subgraphs \n") for edge_list in freq_subgraph_edge_list: print(edge_list) - + + def create_edge(nodes: Dict[str,List[str]],graph: Dict[tuple,List[List[str]]],cluster: Dict[int,Dict[str,List[str]]],c1: int) -> None: ''' create edge between the nodes @@ -146,6 +143,7 @@ def create_edge(nodes: Dict[str,List[str]],graph: Dict[tuple,List[List[str]]],cl else: break + def construct_graph(cluster: Dict[int,Dict[str,List[str]]],nodes: Dict[str,List[str]]) -> Dict[tuple,List[List[str]]]: X=cluster[max(cluster.keys())] cluster[max(cluster.keys())+1]='Header' @@ -164,6 +162,7 @@ def construct_graph(cluster: Dict[int,Dict[str,List[str]]],nodes: Dict[str,List[ return graph + def myDFS(graph: Dict[tuple,List[List[str]]],start: tuple,end: tuple,path=[]): ''' find different DFS walk from given node to Header node @@ -174,7 +173,8 @@ def myDFS(graph: Dict[tuple,List[List[str]]],start: tuple,end: tuple,path=[]): for node in graph[start]: if tuple(node) not in path: myDFS(graph,tuple(node),end,path) - + + def find_freq_subgraph_given_support(s: int,cluster: Dict[int,Dict[str,List[str]]],graph: Dict[tuple,List[List[str]]]) -> None: ''' find edges of multiple frequent subgraphs @@ -184,6 +184,7 @@ def find_freq_subgraph_given_support(s: int,cluster: Dict[int,Dict[str,List[str] for i in cluster[k].keys(): myDFS(graph,tuple(cluster[k][i]),tuple(['Header'])) + def freq_subgraphs_edge_list(paths: List[List[tuple]]) -> List[List[tuple]]: ''' returns Edge list for frequent subgraphs @@ -196,9 +197,10 @@ def freq_subgraphs_edge_list(paths: List[List[tuple]]) -> List[List[tuple]]: for e in temp: edge=(e[0],e[1]) EL.append(edge) - freq_sub_EL.append(EL) - + freq_sub_EL.append(EL) return freq_sub_EL + + def preprocess(edge_array: List[List[str]]) -> List[List[List[str]]]: ''' Preprocess the edge array @@ -209,19 +211,16 @@ def preprocess(edge_array: List[List[str]]) -> List[List[List[str]]]: for j in range(len(edge_array[i])): t=edge_array[i][j].split('-') edge_array[i][j]=t - -if __name__ == "__main__": - + + +if __name__ == "__main__": preprocess(edge_array) - frequency_table=get_frequency_table(edge_array) nodes=get_nodes(frequency_table) cluster=get_cluster(nodes) support=get_support(cluster) graph=construct_graph(cluster,nodes) - - paths = [] find_freq_subgraph_given_support(60,cluster,graph) + paths = [] freq_subgraph_edge_list=freq_subgraphs_edge_list(paths) - print_all() From a8ae4c80ece3edc35a29ebf48c84711ee523b8fe Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 7 May 2020 23:20:22 +0200 Subject: [PATCH 37/37] Format with psf/black --- graphs/frequent_pattern_graph_miner.py | 286 +++++++++++++------------ 1 file changed, 146 insertions(+), 140 deletions(-) diff --git a/graphs/frequent_pattern_graph_miner.py b/graphs/frequent_pattern_graph_miner.py index b61a792a948a..aa14fbdd3a3c 100644 --- a/graphs/frequent_pattern_graph_miner.py +++ b/graphs/frequent_pattern_graph_miner.py @@ -1,226 +1,232 @@ -''' +""" FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs -A novel Frequent Pattern Graph Mining algorithm, -FP-GraphMiner, that compactly represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph). -This graph can be used to efficiently mine frequent subgraphs including maximal frequent subgraphs and maximum common subgraphs. - -URL:https://www.researchgate.net/publication/235255851_FP-GraphMiner_-_A_Fast_Frequent_Pattern_Mining_Algorithm_for_Network_Graphs - -''' -from typing import Dict, List - -edge_array=[ - ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3'], - ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','cd-e2','de-e1','df-e8','ef-e3','eg-e2','fg-e6'], - ['ab-e1','ac-e3','bc-e4','bd-e2','de-e1','df-e8','dg-e5','ef-e3','eg-e2','eh-e12','fg-e6','fh-e10','gh-e6'], - ['ab-e1','ac-e3','bc-e4','bd-e2','bh-e12','cd-e2','df-e8','dh-e10'], - ['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','cd-e2','ce-e4','de-e1','df-e8','dg-e5','ef-e3','eg-e2','fg-e6'] +A novel Frequent Pattern Graph Mining algorithm, FP-GraphMiner, that compactly +represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph). +This graph can be used to efficiently mine frequent subgraphs including maximal +frequent subgraphs and maximum common subgraphs. + +URL: https://www.researchgate.net/publication/235255851 +""" +# fmt: off +edge_array = [ + ['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', 'cd-e2', 'ce-e4', + 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3'], + ['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'cd-e2', 'de-e1', 'df-e8', + 'ef-e3', 'eg-e2', 'fg-e6'], + ['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'de-e1', 'df-e8', 'dg-e5', 'ef-e3', 'eg-e2', + 'eh-e12', 'fg-e6', 'fh-e10', 'gh-e6'], + ['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'bh-e12', 'cd-e2', 'df-e8', 'dh-e10'], + ['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', + 'dg-e5', 'ef-e3', 'eg-e2', 'fg-e6'] ] +# fmt: on -def get_distinct_edge(edge_array: List[List[str]]) -> List[str]: - ''' +def get_distinct_edge(edge_array): + """ Return Distinct edges from edge array of multiple graphs >>> sorted(get_distinct_edge(edge_array)) ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] - ''' - distinct_edge=set() - for row in edge_array: + """ + distinct_edge = set() + for row in edge_array: for item in row: distinct_edge.add(item[0]) return list(distinct_edge) -def get_bitcode(edge_array: List[List[str]],distinct_edge: List[str]) -> str: - ''' +def get_bitcode(edge_array, distinct_edge): + """ Return bitcode of distinct_edge - ''' - bitcode=['0'] * len(edge_array) + """ + bitcode = ["0"] * len(edge_array) for i, row in enumerate(edge_array): for item in row: if distinct_edge in item[0]: - bitcode[i]='1' + bitcode[i] = "1" break - return ''.join(bitcode) + return "".join(bitcode) -def get_frequency_table(edge_array:List[List[str]]) -> List[List[str]]: - ''' +def get_frequency_table(edge_array): + """ Returns Frequency Table - ''' - distinct_edge=get_distinct_edge(edge_array) - frequency_table=dict() + """ + distinct_edge = get_distinct_edge(edge_array) + frequency_table = dict() for item in distinct_edge: - bit=get_bitcode(edge_array,item) - #print('bit',bit) - #bt=''.join(bit) - s=bit.count('1') - frequency_table[item]=[s,bit] - ''' - Store [Distinct edge, WT(Bitcode), Bitcode] in Descending order - ''' - sorted_frequency_table=[[k,v[0],v[1]] for k,v in sorted(frequency_table.items(),key=lambda v:v[1][0],reverse=True)] + bit = get_bitcode(edge_array, item) + # print('bit',bit) + # bt=''.join(bit) + s = bit.count("1") + frequency_table[item] = [s, bit] + # Store [Distinct edge, WT(Bitcode), Bitcode] in descending order + sorted_frequency_table = [ + [k, v[0], v[1]] + for k, v in sorted(frequency_table.items(), key=lambda v: v[1][0], reverse=True) + ] return sorted_frequency_table -def get_nodes(frequency_table: List[List[str]]) -> Dict[str,List[str]]: - ''' +def get_nodes(frequency_table): + """ Returns nodes format nodes={bitcode:edges that represent the bitcode} - >>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'], ['bd', 5, '11111'], ['bc', 5, '11111']]) + >>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'], + ... ['bd', 5, '11111'], ['bc', 5, '11111']]) {'11111': ['ab', 'ac', 'df', 'bd', 'bc']} - ''' - nodes={} - for i,item in enumerate(frequency_table): - nodes.setdefault(item[2],[]).append(item[0]) + """ + nodes = {} + for i, item in enumerate(frequency_table): + nodes.setdefault(item[2], []).append(item[0]) return nodes -def get_cluster(nodes: Dict[str,List[str]]) -> Dict[int,Dict[str,List[str]]]: - ''' +def get_cluster(nodes): + """ Returns cluster format cluster:{WT(bitcode):nodes with same WT} - ''' - - cluster={} - for key,value in nodes.items(): - cluster.setdefault(key.count('1'),{})[key]=value - + """ + cluster = {} + for key, value in nodes.items(): + cluster.setdefault(key.count("1"), {})[key] = value return cluster -def get_support(cluster :Dict[int,Dict[str,List[str]]])-> List[float]: - ''' + +def get_support(cluster): + """ Returns support - >>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']}, 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']}, 3: {'11001': ['ad'], '10101': ['dg']}, \ - 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'], '10001': ['ce']}, 1: {'00100': ['fh', 'eh'], '10000': ['hi']}}) + >>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']}, + ... 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']}, + ... 3: {'11001': ['ad'], '10101': ['dg']}, + ... 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'], + ... '10001': ['ce']}, + ... 1: {'00100': ['fh', 'eh'], '10000': ['hi']}}) [100.0, 80.0, 60.0, 40.0, 20.0] - ''' - - support=[] - for i in cluster: - support.append(i*100/len(cluster)) - - return support + """ + return [i * 100 / len(cluster) for i in cluster] + + def print_all() -> None: print("\nNodes\n") - for key,value in nodes.items(): - print(key,value) + for key, value in nodes.items(): + print(key, value) print("\nSupport\n") print(support) print("\n Cluster \n") - for key,value in sorted(cluster.items(),reverse=True): + for key, value in sorted(cluster.items(), reverse=True): print(key, value) print("\n Graph\n") - for key,value in graph.items(): + for key, value in graph.items(): print(key, value) print("\n Edge List of Frequent subgraphs \n") for edge_list in freq_subgraph_edge_list: print(edge_list) -def create_edge(nodes: Dict[str,List[str]],graph: Dict[tuple,List[List[str]]],cluster: Dict[int,Dict[str,List[str]]],c1: int) -> None: - ''' - create edge between the nodes - ''' +def create_edge(nodes, graph, cluster, c1): + """ + create edge between the nodes + """ for i in cluster[c1].keys(): - count=0 - c2=c1+1 + count = 0 + c2 = c1 + 1 while c2 < max(cluster.keys()): for j in cluster[c2].keys(): - ''' + """ creates edge only if the condition satisfies - ''' - if(int(i,2) & int(j,2) == int(i,2)): - if tuple(nodes[i]) in graph: - graph[tuple(nodes[i])].append(nodes[j]) - else: - graph[tuple(nodes[i])]=[nodes[j]] - count+=1 - if(count==0): - c2=c2+1 + """ + if int(i, 2) & int(j, 2) == int(i, 2): + if tuple(nodes[i]) in graph: + graph[tuple(nodes[i])].append(nodes[j]) + else: + graph[tuple(nodes[i])] = [nodes[j]] + count += 1 + if count == 0: + c2 = c2 + 1 else: break -def construct_graph(cluster: Dict[int,Dict[str,List[str]]],nodes: Dict[str,List[str]]) -> Dict[tuple,List[List[str]]]: - X=cluster[max(cluster.keys())] - cluster[max(cluster.keys())+1]='Header' - graph={} - for i in X.keys(): - if tuple(['Header']) in graph: - graph[tuple(['Header'])].append(X[i]) +def construct_graph(cluster, nodes): + X = cluster[max(cluster.keys())] + cluster[max(cluster.keys()) + 1] = "Header" + graph = {} + for i in X: + if tuple(["Header"]) in graph: + graph[tuple(["Header"])].append(X[i]) else: - graph[tuple(['Header'])]=[X[i]] - for i in X.keys(): - graph[tuple(X[i])]=[['Header']] - i=1 - while i < max(cluster.keys())-1: - create_edge(nodes,graph,cluster,i) - i=i+1 - + graph[tuple(["Header"])] = [X[i]] + for i in X: + graph[tuple(X[i])] = [["Header"]] + i = 1 + while i < max(cluster) - 1: + create_edge(nodes, graph, cluster, i) + i = i + 1 return graph -def myDFS(graph: Dict[tuple,List[List[str]]],start: tuple,end: tuple,path=[]): - ''' +def myDFS(graph, start, end, path=[]): + """ find different DFS walk from given node to Header node - ''' - path=path+[start] - if start==end: - paths.append(path) + """ + path = path + [start] + if start == end: + paths.append(path) for node in graph[start]: if tuple(node) not in path: - myDFS(graph,tuple(node),end,path) + myDFS(graph, tuple(node), end, path) -def find_freq_subgraph_given_support(s: int,cluster: Dict[int,Dict[str,List[str]]],graph: Dict[tuple,List[List[str]]]) -> None: - ''' +def find_freq_subgraph_given_support(s, cluster, graph): + """ find edges of multiple frequent subgraphs - ''' - k=int(s/100*(len(cluster)-1)) - + """ + k = int(s / 100 * (len(cluster) - 1)) for i in cluster[k].keys(): - myDFS(graph,tuple(cluster[k][i]),tuple(['Header'])) + myDFS(graph, tuple(cluster[k][i]), tuple(["Header"])) -def freq_subgraphs_edge_list(paths: List[List[tuple]]) -> List[List[tuple]]: - ''' +def freq_subgraphs_edge_list(paths): + """ returns Edge list for frequent subgraphs - ''' - freq_sub_EL=[] + """ + freq_sub_EL = [] for edges in paths: - EL=[] - for j in range(len(edges)-1): - temp=list(edges[j]) + EL = [] + for j in range(len(edges) - 1): + temp = list(edges[j]) for e in temp: - edge=(e[0],e[1]) + edge = (e[0], e[1]) EL.append(edge) - freq_sub_EL.append(EL) - return freq_sub_EL + freq_sub_EL.append(EL) + return freq_sub_EL -def preprocess(edge_array: List[List[str]]) -> List[List[List[str]]]: - ''' +def preprocess(edge_array): + """ Preprocess the edge array - >>> preprocess([['ab-e1','ac-e3','ad-e5','bc-e4','bd-e2','be-e6','bh-e12','cd-e2','ce-e4','de-e1','df-e8','dg-e5','dh-e10','ef-e3','eg-e2','fg-e6','gh-e6','hi-e3']]) - - ''' + >>> preprocess([['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', + ... 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', + ... 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3']]) + + """ for i in range(len(edge_array)): for j in range(len(edge_array[i])): - t=edge_array[i][j].split('-') - edge_array[i][j]=t + t = edge_array[i][j].split("-") + edge_array[i][j] = t -if __name__ == "__main__": +if __name__ == "__main__": preprocess(edge_array) - frequency_table=get_frequency_table(edge_array) - nodes=get_nodes(frequency_table) - cluster=get_cluster(nodes) - support=get_support(cluster) - graph=construct_graph(cluster,nodes) - find_freq_subgraph_given_support(60,cluster,graph) + frequency_table = get_frequency_table(edge_array) + nodes = get_nodes(frequency_table) + cluster = get_cluster(nodes) + support = get_support(cluster) + graph = construct_graph(cluster, nodes) + find_freq_subgraph_given_support(60, cluster, graph) paths = [] - freq_subgraph_edge_list=freq_subgraphs_edge_list(paths) + freq_subgraph_edge_list = freq_subgraphs_edge_list(paths) print_all()