From f0d8776bfa480f30ebde6a067dd253373553d343 Mon Sep 17 00:00:00 2001 From: i30817 Date: Sat, 4 Nov 2023 16:11:17 +0000 Subject: [PATCH] Update version. Replace WRatio by the much faster QRatio (#39) * Replace WRatio by the much faster QRatio --- libretrofuzz/__init__.py | 2 +- libretrofuzz/__main__.py | 12 ++++++------ pyproject.toml | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libretrofuzz/__init__.py b/libretrofuzz/__init__.py index b202327..4f82193 100644 --- a/libretrofuzz/__init__.py +++ b/libretrofuzz/__init__.py @@ -1 +1 @@ -__version__ = "3.6.1" +__version__ = "3.6.2" diff --git a/libretrofuzz/__main__.py b/libretrofuzz/__main__.py index b5c8a59..6682bb1 100644 --- a/libretrofuzz/__main__.py +++ b/libretrofuzz/__main__.py @@ -334,7 +334,7 @@ def __call__(self, name, other, score_cutoff=None): return MAX_SCORE if not name_ns: return 0 - # score is based on WRatio (a comprehensive 0-100 weighted heuristic from rapidfuzz) + # score is based on QRatio (a 0-100 heuristic from rapidfuzz, WRatio is better but unacceptably slow) # summed to some custom heuristics # Up to DEF_SCORE WRatio is used with 100 being exactly DEF_SCORE, after the heuristics # if -min is used, above DEF_SCORE, the heuristics will need better fit to win @@ -354,25 +354,25 @@ def __call__(self, name, other, score_cutoff=None): # 97% of remaining score will be used for different heuristics heuristic = remaining * 0.97 # used denormalized in all returns, just with different percentages - wratio = fuzz.WRatio(name, other) * 0.01 + ratio = fuzz.QRatio(name, other) * 0.01 # find a exact full name non digit match # (lots of dump or disc numbers as subtitles in some dumps) # on either a subtitle or a sequence of subtitles from the start # note that this doesn't include subtitles in # 'name' matching a subtitle in 'other' and vice versa # because a subtitle match like this is a strong indicator of a match, - # give it the full default score slot, and wratio for the heuristic slot + # give it the full default score slot, and ratio for the heuristic slot if not name_ns.isdigit(): sum_ns = "" for sub_ns in other_ns_subs: if name_ns == sub_ns or name_ns == (sum_ns := sum_ns + sub_ns): - rest_of_score += heuristic * wratio + rest_of_score += heuristic * ratio return DEF_SCORE + rest_of_score if not other_ns.isdigit(): sum_ns = "" for sub_ns in name_ns_subs: if other_ns == sub_ns or other_ns == (sum_ns := sum_ns + sub_ns): - rest_of_score += heuristic * wratio + rest_of_score += heuristic * ratio return DEF_SCORE + rest_of_score # heuristic measures if the name is more completely at the start of other name common = len(os.path.commonprefix([name_ns, other_ns])) / len(name_ns) @@ -380,7 +380,7 @@ def __call__(self, name, other, score_cutoff=None): parity = min(len(name_ns),len(other_ns))/max(len(name_ns),len(other_ns)) rest_of_score += (heuristic * common * 0.80) + (heuristic * parity * 0.20) # remember that WRatio fills the DEF_SCORE slot - return rest_of_score + DEF_SCORE * wratio + return rest_of_score + DEF_SCORE * ratio # --------------------------------------------------------------- # Normalization functions, part of the functions that change both diff --git a/pyproject.toml b/pyproject.toml index 7e34014..689c40b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "libretrofuzz" -version = "3.6.1" +version = "3.6.2" description = "Fuzzy Retroarch thumbnail downloader" authors = ["i30817 "] license = "MIT"