@@ -1081,7 +1081,7 @@ def _init_fuzzy_matcher_with_genome_infos(self):
1081
1081
if len (val ) > 0 :
1082
1082
values .append (val )
1083
1083
maps_to .append (row )
1084
- return FuzzyMatcher (values , maps_to )
1084
+ return FuzzyMatcher (values , maps_to , rel_sim_cutoff = 0.6 )
1085
1085
1086
1086
def genome_of_entry_nr (self , e_nr ):
1087
1087
"""returns the genome code belonging to a given entry_nr"""
@@ -1147,7 +1147,8 @@ def identify_genome(self, code):
1147
1147
return self .genome_from_SciName (code )
1148
1148
1149
1149
def approx_search_genomes (self , pattern ):
1150
- pass
1150
+ candidates = self .approx_genome_matcher .search_approx (pattern )
1151
+ return [Genome (self ._db , self .genome_table [z [2 ]]) for z in candidates ]
1151
1152
1152
1153
def omaid_to_entry_nr (self , omaid ):
1153
1154
"""returns the internal numeric entrynr from a
@@ -1209,7 +1210,7 @@ def species_ordering(self, root=None):
1209
1210
1210
1211
1211
1212
class FuzzyMatcher (object ):
1212
- def __init__ (self , values , maps_to = None ):
1213
+ def __init__ (self , values , maps_to = None , rel_sim_cutoff = 0.8 ):
1213
1214
"""FuzzyMatcher allows to search for approximate matches of a list of values.
1214
1215
It is a thin wrapper to the :class:`fuzzyset.FuzzySet datastructure.
1215
1216
@@ -1222,23 +1223,24 @@ def __init__(self, values, maps_to=None):
1222
1223
:param values: an iterable/mapping
1223
1224
"""
1224
1225
if maps_to is not None :
1225
- self .fuzzySet = fuzzyset .FuzzySet (rel_sim_cutoff = 0.8 )
1226
+ self .fuzzySet = fuzzyset .FuzzySet (rel_sim_cutoff = rel_sim_cutoff )
1226
1227
self .mapping = collections .defaultdict (list )
1227
1228
for val , map_source in zip (values , maps_to ):
1228
1229
self .fuzzySet .add (val )
1229
1230
self .mapping [val ].append (map_source )
1230
1231
else :
1231
- self .fuzzySet = fuzzyset .FuzzySet (values , rel_sim_cutoff = 0.8 )
1232
+ self .fuzzySet = fuzzyset .FuzzySet (values , rel_sim_cutoff = rel_sim_cutoff )
1232
1233
self .mapping = None
1233
1234
1234
1235
def search_approx (self , key ):
1235
1236
matches = self .fuzzySet .get (key )
1236
1237
if self .mapping :
1237
1238
bests = {}
1238
1239
for score , val in matches :
1239
- src = self .mapping [val ]
1240
- if src not in bests or score > bests [src ][0 ]:
1241
- bests [src ] = (score , val , src )
1240
+ sources = self .mapping [val ]
1241
+ for src in sources :
1242
+ if src not in bests or score > bests [src ][0 ]:
1243
+ bests [src ] = (score , val , src )
1242
1244
matches = list (bests .values ())
1243
1245
return matches
1244
1246
0 commit comments