Skip to content

Commit

Permalink
Add more words to formatter and start of word list
Browse files Browse the repository at this point in the history
  • Loading branch information
ajayyy committed Dec 14, 2024
1 parent 6c5ce21 commit 626bf42
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 4 deletions.
19 changes: 16 additions & 3 deletions src/titles/titleFormatter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { VideoID } from "../../maze-utils/src/video";
import Config, { TitleFormatting } from "../config/config";
import { getTitleFormatting, shouldCleanEmojis } from "../config/channelOverrides";
import { acronymBlocklist, allowlistedWords, fancyTextConversions, notStartOfSentence, titleCaseDetectionNotCapitalized, titleCaseNotCapitalized } from "./titleFormatterData";
import { acronymBlocklist, allowlistedStartOfWords, allowlistedWords, fancyTextConversions, notStartOfSentence, titleCaseDetectionNotCapitalized, titleCaseNotCapitalized } from "./titleFormatterData";
import { chromeP } from "../../maze-utils/src/browserApi";
import type { LanguageIdentifier } from "cld3-asm";

Expand Down Expand Up @@ -316,12 +316,14 @@ function isFirstLetterCapital(word: string): boolean {

function forceKeepFormatting(word: string, ignorePunctuation = true): boolean {
let result = !!word.match(/^>/)
|| listHasWord(allowlistedWords, word);
|| listHasWord(allowlistedWords, word)
|| listHasStartOfWord(allowlistedStartOfWords, word);

if (ignorePunctuation) {
const withoutPunctuation = word.replace(/[:?.!+\]]+$|^[[+:/]+/, "");
if (word !== withoutPunctuation) {
result ||= listHasWord(allowlistedWords, withoutPunctuation);
result ||= listHasWord(allowlistedWords, withoutPunctuation)
|| listHasStartOfWord(allowlistedStartOfWords, word);
}
}

Expand Down Expand Up @@ -582,6 +584,17 @@ function listHasWord(list: Set<string>, word: string): boolean {
return list.has(word.replace(/[[<({:)}\]]/g, ""))
}

function listHasStartOfWord(list: Set<string>, word: string): boolean {
word = word.replace(/[[<({:)}\]]/g, "");

for (const item of list) {
if (word.startsWith(item)) {
return true;
}
}

return false;
}

export async function localizeHtmlPageWithFormatting(): Promise<void> {
// Localize by replacing __MSG_***__ meta tags
Expand Down
31 changes: 30 additions & 1 deletion src/titles/titleFormatterData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ export const allowlistedWords = new Set([
"GTA",
"bell hooks",
"TOOOL",
"PCECD",
"drawholic",
"TF2",
"L4D",
Expand Down Expand Up @@ -322,6 +321,36 @@ export const allowlistedWords = new Set([
"WWF",
"WWI",
"XML",
"LGBTQIA2S+",
"NVFBC",
"DALL-E",
"DALL·E",
"iilluminaughtii",
"TOML",
"DJ",
"SPTV",
"VST",
"EQ"
]);

// Can be switched to a trie structure if it grows
export const allowlistedStartOfWords = new Set([
"osu!",
"de_",
"cs_",
"ar_",
"as_",
"es_",
"dz_",
"fy_",
"aim_",
"zm_",
"jail_",
"bb_",
"gg_",
"awp_",
"df_",
"deathrun_",
]);

export const acronymBlocklist = new Set([
Expand Down

0 comments on commit 626bf42

Please sign in to comment.