also make the definite article removal case insensitive, and remove u…

…seless repeat of roman numeral case removal by just moving the lowercasing up
i30817 · Jun 3, 2023 · 4894ea4 · 4894ea4
1 parent 74fdd86
commit 4894ea4
Showing 1 changed file with 35 additions and 55 deletions.
diff --git a/libretrofuzz/__main__.py b/libretrofuzz/__main__.py
@@ -233,85 +233,65 @@ def normalizer(t, nometa, hack):
     #strips just because the user may have made a mistake naming the source
     #(or the replacement above introduce boundary spaces)
     t = t.strip()
+    #normalize case
+    t = t.lower()
     #beginning and end definite articles in several european languages (people move them)
-    #make sure we're only removing the capitalized start and end forms with spaces
-    t = removefirst(t, ', The')
-    t = removeprefix(t, 'The ')
-    t = removefirst(t, ', Los')
-    t = removeprefix(t, 'Los ')
-    t = removefirst(t, ', Las')
-    t = removeprefix(t, 'Las ')
-    t = removefirst(t, ', Les')
-    t = removeprefix(t, 'Les ')
-    t = removefirst(t, ', Le')
-    t = removeprefix(t, 'Le ')
-    t = removefirst(t, ', La')
-    t = removeprefix(t, 'La ')
-    t = removefirst(t, ', L\'')
+    #make sure we're only removing the start and end forms with spaces
+    t = removefirst(t, ', the')
+    t = removeprefix(t, 'the ')
+    t = removefirst(t, ', los')
+    t = removeprefix(t, 'los ')
+    t = removefirst(t, ', las')
+    t = removeprefix(t, 'las ')
+    t = removefirst(t, ', les')
+    t = removeprefix(t, 'les ')
+    t = removefirst(t, ', le')
+    t = removeprefix(t, 'le ')
+    t = removefirst(t, ', la')
+    t = removeprefix(t, 'la ')
+    t = removefirst(t, ', l\'')
     #L' sometimes ommits the space so always remove L' at the start even without space
-    t = removeprefix(t, 'L\'')  #if there is a extra space the next join will remove it
-    t = removefirst(t, ', Der')
-    t = removeprefix(t, 'Der ')
-    t = removefirst(t, ', Die')
-    t = removeprefix(t, 'Die ')
-    t = removefirst(t, ', Das')
-    t = removeprefix(t, 'Das ')
-    t = removefirst(t, ', El')
-    t = removeprefix(t, 'El ')
-    t = removefirst(t, ', Os')
-    t = removeprefix(t, 'Os ')
-    t = removefirst(t, ', As')
-    t = removeprefix(t, 'As ')
-    t = removefirst(t, ', O')
-    t = removeprefix(t, 'O ')
-    t = removefirst(t, ', A')
-    t = removeprefix(t, 'A ')
+    t = removeprefix(t, 'l\'')  #if there is a extra space the next join will remove it
+    t = removefirst(t, ', der')
+    t = removeprefix(t, 'der ')
+    t = removefirst(t, ', die')
+    t = removeprefix(t, 'die ')
+    t = removefirst(t, ', das')
+    t = removeprefix(t, 'das ')
+    t = removefirst(t, ', el')
+    t = removeprefix(t, 'el ')
+    t = removefirst(t, ', os')
+    t = removeprefix(t, 'os ')
+    t = removefirst(t, ', as')
+    t = removeprefix(t, 'as ')
+    t = removefirst(t, ', o')
+    t = removeprefix(t, 'o ')
+    t = removefirst(t, ', a')
+    t = removeprefix(t, 'a ')
+    #remove the symbols used in the definite article normalization
+    t = replacemany(t, ',\'', '')
     #Tries to make roman numerals in the range 1-20 equivalent to normal numbers (to handle names that change it).
     #If both sides are roman numerals there is no harm done if XXIV gets turned into 204 in both sides.
-    t = t.replace('XVIII', '18')
     t = t.replace('xviii', '18')
-    t = t.replace('XVII',  '17')
     t = t.replace('xvii',  '17')
-    t = t.replace('XVI' ,  '16')
     t = t.replace('xvi' ,  '16')
-    t = t.replace('XIII',  '13')
     t = t.replace('xiii',  '13')
-    t = t.replace('XII' ,  '12')
     t = t.replace('xii' ,  '12')
-    t = t.replace('XIV' ,  '14')
     t = t.replace('xiv' ,  '14')
-    t = t.replace('XV'  ,  '15')
     t = t.replace('xv'  ,  '15')
-    t = t.replace('XIX',   '19')
     t = t.replace('xix',   '19')
-    t = t.replace('XX',   '20')
     t = t.replace('xx',   '20')
-    t = t.replace('XI',   '11')
     t = t.replace('xi',   '11')
-    t = t.replace('VIII', '8')
     t = t.replace('viii', '8')
-    t = t.replace('VII',  '7')
     t = t.replace('vii',  '7')
-    t = t.replace('VI' ,  '6')
     t = t.replace('vi' ,  '6')
-    t = t.replace('III',  '3')
     t = t.replace('iii',  '3')
-    t = t.replace('II' ,  '2')
     t = t.replace('ii' ,  '2')
-    t = t.replace('IV' ,  '4')
     t = t.replace('iv' ,  '4')
-    t = t.replace('V'  ,  '5')
     t = t.replace('v'  ,  '5')
-    t = t.replace('IX',   '9')
     t = t.replace('ix',   '9')
-    t = t.replace('X',   '10')
     t = t.replace('x',   '10')
-    t = t.replace('I',    '1')
     t = t.replace('i',    '1')
-    #remove the symbols used in the definite article normalization
-    t = replacemany(t, ',\'', '')
-    #normalize case
-    t = t.lower()
     #this makes sure that if a remote name has ' and ' instead of ' _ ' to replace ' & ' it works
     #': ' doesn't need this because ':' is a forbidden character and both '_' and '-' turn to ''
     t = t.replace(' and ',  '')