fix fuzzy map filtering

pull/1/head
qwewqa 4 years ago
parent 35a8f0bbb4
commit fde0421bfb
  1. 21
      miyu_bot/commands/common/fuzzy_matching.py

@ -9,7 +9,7 @@ import pykakasi
class FuzzyMap: class FuzzyMap:
def __init__(self, filter=lambda: True, matcher=None): def __init__(self, filter=None, matcher=None):
self.filter = filter or (lambda n: True) self.filter = filter or (lambda n: True)
self.matcher = matcher or FuzzyMatcher() self.matcher = matcher or FuzzyMatcher()
self._values = {} self._values = {}
@ -41,7 +41,8 @@ class FuzzyMap:
try: try:
matcher = self.matcher matcher = self.matcher
result = min((score, item) for score, item in result = min((score, item) for score, item in
((matcher.score(key, item[0]), item) for item in self._values.items()) if score <= 0)[1][1] ((matcher.score(key, item[0]), item) for item in self._values.items() if self.filter(item[1]))
if score <= 0)[1][1]
self.logger.info(f'Found key "{key}" in time {timeit.default_timer() - start_time}.') self.logger.info(f'Found key "{key}" in time {timeit.default_timer() - start_time}.')
return result return result
except ValueError: except ValueError:
@ -55,7 +56,9 @@ class FuzzyMap:
return [] return []
key = romanize(key) key = romanize(key)
values = [item[1] for score, item in values = [item[1] for score, item in
sorted((self.matcher.score(key, item[0]), item) for item in self._values.items()) if score <= 0] sorted(
(self.matcher.score(key, item[0]), item) for item in self._values.items() if self.filter(item[1]))
if score <= 0]
self.logger.info(f'Searched key "{key}" in time {timeit.default_timer() - start_time}.') self.logger.info(f'Searched key "{key}" in time {timeit.default_timer() - start_time}.')
return values return values
@ -129,14 +132,12 @@ class FuzzyMatcher:
for i in range(l_tgt + 1): for i in range(l_tgt + 1):
a[0][i] = i * insertion_weight a[0][i] = i * insertion_weight
def strip_vowels(s):
return re.sub('[aeoiu]', '', s)
words = target.split() words = target.split()
word_bonus = min(word_match_weight * max(sum(a == b for a, b in zip(source, w)) for w in words), word_bonus = min(word_match_weight * max(sum(a == b for a, b in zip(source, w)) for w in words),
word_match_weight * max(sum(a == b for a, b in word_match_weight * max(sum(a == b for a, b in
zip(source, w[0] + strip_vowels(w[1:]))) for w in zip(source, w[0] + strip_vowels(w[1:]))) for w in
words), words),
word_match_weight * sum(a == b for a, b in zip(strip_spaces(source), strip_spaces(target))),
acronym_match_weight * sum( acronym_match_weight * sum(
a == b for a, b in zip(source, ''.join(w[0] for w in words)))) a == b for a, b in zip(source, ''.join(w[0] for w in words))))
@ -162,6 +163,14 @@ class FuzzyMatcher:
return a[l_src][l_tgt] + word_bonus + base_score return a[l_src][l_tgt] + word_bonus + base_score
def strip_spaces(s):
return re.sub(' ', '', s)
def strip_vowels(s):
return re.sub('[aeoiu]', '', s)
def romanize(s: str) -> str: def romanize(s: str) -> str:
kks = pykakasi.kakasi() kks = pykakasi.kakasi()
s = re.sub('[\']', '', s) s = re.sub('[\']', '', s)

Loading…
Cancel
Save