Skip to content

Commit b233971

Browse files
committed
finish approx_search of genomes
1 parent 5ac6b37 commit b233971

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

pyoma/browser/db.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,7 +1081,7 @@ def _init_fuzzy_matcher_with_genome_infos(self):
10811081
if len(val) > 0:
10821082
values.append(val)
10831083
maps_to.append(row)
1084-
return FuzzyMatcher(values, maps_to)
1084+
return FuzzyMatcher(values, maps_to, rel_sim_cutoff=0.6)
10851085

10861086
def genome_of_entry_nr(self, e_nr):
10871087
"""returns the genome code belonging to a given entry_nr"""
@@ -1147,7 +1147,8 @@ def identify_genome(self, code):
11471147
return self.genome_from_SciName(code)
11481148

11491149
def approx_search_genomes(self, pattern):
1150-
pass
1150+
candidates = self.approx_genome_matcher.search_approx(pattern)
1151+
return [Genome(self._db, self.genome_table[z[2]]) for z in candidates]
11511152

11521153
def omaid_to_entry_nr(self, omaid):
11531154
"""returns the internal numeric entrynr from a
@@ -1209,7 +1210,7 @@ def species_ordering(self, root=None):
12091210

12101211

12111212
class FuzzyMatcher(object):
1212-
def __init__(self, values, maps_to=None):
1213+
def __init__(self, values, maps_to=None, rel_sim_cutoff=0.8):
12131214
"""FuzzyMatcher allows to search for approximate matches of a list of values.
12141215
It is a thin wrapper to the :class:`fuzzyset.FuzzySet datastructure.
12151216
@@ -1222,23 +1223,24 @@ def __init__(self, values, maps_to=None):
12221223
:param values: an iterable/mapping
12231224
"""
12241225
if maps_to is not None:
1225-
self.fuzzySet = fuzzyset.FuzzySet(rel_sim_cutoff=0.8)
1226+
self.fuzzySet = fuzzyset.FuzzySet(rel_sim_cutoff=rel_sim_cutoff)
12261227
self.mapping = collections.defaultdict(list)
12271228
for val, map_source in zip(values, maps_to):
12281229
self.fuzzySet.add(val)
12291230
self.mapping[val].append(map_source)
12301231
else:
1231-
self.fuzzySet = fuzzyset.FuzzySet(values, rel_sim_cutoff=0.8)
1232+
self.fuzzySet = fuzzyset.FuzzySet(values, rel_sim_cutoff=rel_sim_cutoff)
12321233
self.mapping = None
12331234

12341235
def search_approx(self, key):
12351236
matches = self.fuzzySet.get(key)
12361237
if self.mapping:
12371238
bests = {}
12381239
for score, val in matches:
1239-
src = self.mapping[val]
1240-
if src not in bests or score > bests[src][0]:
1241-
bests[src] = (score, val, src)
1240+
sources = self.mapping[val]
1241+
for src in sources:
1242+
if src not in bests or score > bests[src][0]:
1243+
bests[src] = (score, val, src)
12421244
matches = list(bests.values())
12431245
return matches
12441246

0 commit comments

Comments
 (0)