fix fuzzy map filtering
This commit is contained in:
parent
35a8f0bbb4
commit
fde0421bfb
@ -9,7 +9,7 @@ import pykakasi
|
|||||||
|
|
||||||
|
|
||||||
class FuzzyMap:
|
class FuzzyMap:
|
||||||
def __init__(self, filter=lambda: True, matcher=None):
|
def __init__(self, filter=None, matcher=None):
|
||||||
self.filter = filter or (lambda n: True)
|
self.filter = filter or (lambda n: True)
|
||||||
self.matcher = matcher or FuzzyMatcher()
|
self.matcher = matcher or FuzzyMatcher()
|
||||||
self._values = {}
|
self._values = {}
|
||||||
@ -41,7 +41,8 @@ class FuzzyMap:
|
|||||||
try:
|
try:
|
||||||
matcher = self.matcher
|
matcher = self.matcher
|
||||||
result = min((score, item) for score, item in
|
result = min((score, item) for score, item in
|
||||||
((matcher.score(key, item[0]), item) for item in self._values.items()) if score <= 0)[1][1]
|
((matcher.score(key, item[0]), item) for item in self._values.items() if self.filter(item[1]))
|
||||||
|
if score <= 0)[1][1]
|
||||||
self.logger.info(f'Found key "{key}" in time {timeit.default_timer() - start_time}.')
|
self.logger.info(f'Found key "{key}" in time {timeit.default_timer() - start_time}.')
|
||||||
return result
|
return result
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@ -55,7 +56,9 @@ class FuzzyMap:
|
|||||||
return []
|
return []
|
||||||
key = romanize(key)
|
key = romanize(key)
|
||||||
values = [item[1] for score, item in
|
values = [item[1] for score, item in
|
||||||
sorted((self.matcher.score(key, item[0]), item) for item in self._values.items()) if score <= 0]
|
sorted(
|
||||||
|
(self.matcher.score(key, item[0]), item) for item in self._values.items() if self.filter(item[1]))
|
||||||
|
if score <= 0]
|
||||||
self.logger.info(f'Searched key "{key}" in time {timeit.default_timer() - start_time}.')
|
self.logger.info(f'Searched key "{key}" in time {timeit.default_timer() - start_time}.')
|
||||||
return values
|
return values
|
||||||
|
|
||||||
@ -129,14 +132,12 @@ class FuzzyMatcher:
|
|||||||
for i in range(l_tgt + 1):
|
for i in range(l_tgt + 1):
|
||||||
a[0][i] = i * insertion_weight
|
a[0][i] = i * insertion_weight
|
||||||
|
|
||||||
def strip_vowels(s):
|
|
||||||
return re.sub('[aeoiu]', '', s)
|
|
||||||
|
|
||||||
words = target.split()
|
words = target.split()
|
||||||
word_bonus = min(word_match_weight * max(sum(a == b for a, b in zip(source, w)) for w in words),
|
word_bonus = min(word_match_weight * max(sum(a == b for a, b in zip(source, w)) for w in words),
|
||||||
word_match_weight * max(sum(a == b for a, b in
|
word_match_weight * max(sum(a == b for a, b in
|
||||||
zip(source, w[0] + strip_vowels(w[1:]))) for w in
|
zip(source, w[0] + strip_vowels(w[1:]))) for w in
|
||||||
words),
|
words),
|
||||||
|
word_match_weight * sum(a == b for a, b in zip(strip_spaces(source), strip_spaces(target))),
|
||||||
acronym_match_weight * sum(
|
acronym_match_weight * sum(
|
||||||
a == b for a, b in zip(source, ''.join(w[0] for w in words))))
|
a == b for a, b in zip(source, ''.join(w[0] for w in words))))
|
||||||
|
|
||||||
@ -162,6 +163,14 @@ class FuzzyMatcher:
|
|||||||
return a[l_src][l_tgt] + word_bonus + base_score
|
return a[l_src][l_tgt] + word_bonus + base_score
|
||||||
|
|
||||||
|
|
||||||
|
def strip_spaces(s):
|
||||||
|
return re.sub(' ', '', s)
|
||||||
|
|
||||||
|
|
||||||
|
def strip_vowels(s):
|
||||||
|
return re.sub('[aeoiu]', '', s)
|
||||||
|
|
||||||
|
|
||||||
def romanize(s: str) -> str:
|
def romanize(s: str) -> str:
|
||||||
kks = pykakasi.kakasi()
|
kks = pykakasi.kakasi()
|
||||||
s = re.sub('[\']', '', s)
|
s = re.sub('[\']', '', s)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user