From aac8e9c68659c0f70ab09dcf48fc047cd6f8353d Mon Sep 17 00:00:00 2001 From: LXNN Date: Fri, 13 May 2022 20:40:51 +0100 Subject: [PATCH 01/12] Initial commit on the snippet-search branch --- snippet_search/snippet_search.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 snippet_search/snippet_search.py diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py new file mode 100644 index 0000000..8653420 --- /dev/null +++ b/snippet_search/snippet_search.py @@ -0,0 +1,26 @@ +from typing import Optional +from discord.ext import commands +from bot import ModmailBot +from core import checks +from core.models import PermissionLevel + + +class SnippetSearch(commands.Cog): + """A plugin that provides a command for searching snippets.""" + + def __init__(self, bot: ModmailBot): + self.bot = bot + + @checks.has_permissions(PermissionLevel.SUPPORTER) + @commands.command() + async def tag(self, ctx: commands.Context, *, query: Optional[str]) -> None: + """ + Search for a snippet. + """ + ... + + +def setup(bot: ModmailBot) -> None: + """Add the SnippetSearch cog to the bot.""" + bot.add_cog(SnippetSearch(bot)) + From 3fbfa76cbb50084b2ae64539780f65aa0e3079f6 Mon Sep 17 00:00:00 2001 From: LXNN Date: Fri, 13 May 2022 23:09:04 +0100 Subject: [PATCH 02/12] Make a start on snippet-search Details to be worked out in testing. Particularly the search method and output styling. Co-authored-by: Etzeitet <5340057+Etzeitet@users.noreply.github.com> --- snippet_search/snippet_search.py | 33 +++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 8653420..8d9b174 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -1,8 +1,11 @@ from typing import Optional +import discord from discord.ext import commands from bot import ModmailBot from core import checks from core.models import PermissionLevel +from core.paginator import EmbedPaginatorSession +from core.utils import truncate, escape_code_block class SnippetSearch(commands.Cog): @@ -13,14 +16,38 @@ def __init__(self, bot: ModmailBot): @checks.has_permissions(PermissionLevel.SUPPORTER) @commands.command() - async def tag(self, ctx: commands.Context, *, query: Optional[str]) -> None: + async def tag(self, ctx: commands.Context, *, query: Optional[str] = None) -> None: """ Search for a snippet. """ - ... + if not self.bot.snippets: + embed = discord.Embed( + color=self.bot.error_color, + description="You dont have any snippets at the moment.", + ) + embed.set_footer( + text=f'Check "{self.bot.prefix}help snippet add" to add a snippet.' + ) + embed.set_author(name="Snippets", icon_url=ctx.guild.icon_url) + return await ctx.send(embed=embed) + + if query is None: + snippets = self.bot.snippets + else: + snippets = {k: v for k, v in self.bot.snippets if query.lower() in k} + + embeds = [] + + for name, val in snippets.items(): + description = f"{name}\n\n{truncate(escape_code_block(val), 2048 - 7)}" + embed = discord.Embed(color=self.bot.main_color, description=description) + embed.set_author(name="Snippets", icon_url=ctx.guild.icon_url) + embeds.append(embed) + + session = EmbedPaginatorSession(ctx, *embeds) + await session.run() def setup(bot: ModmailBot) -> None: """Add the SnippetSearch cog to the bot.""" bot.add_cog(SnippetSearch(bot)) - From fb2c12079962abda982f9ed9731d9525532d6ee9 Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 01:24:31 +0100 Subject: [PATCH 03/12] Fix command name --- snippet_search/snippet_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 8d9b174..deec693 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -16,7 +16,7 @@ def __init__(self, bot: ModmailBot): @checks.has_permissions(PermissionLevel.SUPPORTER) @commands.command() - async def tag(self, ctx: commands.Context, *, query: Optional[str] = None) -> None: + async def snippetsearch(self, ctx: commands.Context, *, query: Optional[str] = None) -> None: """ Search for a snippet. """ From e11a08b18062d714dbb29ada714653126cd5fb6a Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 01:30:49 +0100 Subject: [PATCH 04/12] Fix dict comprehension --- snippet_search/snippet_search.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index deec693..f9ec7e2 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -34,7 +34,11 @@ async def snippetsearch(self, ctx: commands.Context, *, query: Optional[str] = N if query is None: snippets = self.bot.snippets else: - snippets = {k: v for k, v in self.bot.snippets if query.lower() in k} + snippets = { + k: v + for k, v in self.bot.snippets.items() + if query.lower() in k + } embeds = [] From 65d0f5bbf020f8be19a90526d38def71fe117f5f Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 02:09:22 +0100 Subject: [PATCH 05/12] Mostly cosmetic changes --- snippet_search/snippet_search.py | 35 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index f9ec7e2..9c01a3a 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -15,22 +15,11 @@ def __init__(self, bot: ModmailBot): self.bot = bot @checks.has_permissions(PermissionLevel.SUPPORTER) - @commands.command() - async def snippetsearch(self, ctx: commands.Context, *, query: Optional[str] = None) -> None: + @commands.command(name="snippetsearch") + async def snippet_search(self, ctx: commands.Context, *, query: Optional[str] = None) -> None: """ Search for a snippet. """ - if not self.bot.snippets: - embed = discord.Embed( - color=self.bot.error_color, - description="You dont have any snippets at the moment.", - ) - embed.set_footer( - text=f'Check "{self.bot.prefix}help snippet add" to add a snippet.' - ) - embed.set_author(name="Snippets", icon_url=ctx.guild.icon_url) - return await ctx.send(embed=embed) - if query is None: snippets = self.bot.snippets else: @@ -40,12 +29,26 @@ async def snippetsearch(self, ctx: commands.Context, *, query: Optional[str] = N if query.lower() in k } + if not snippets: + embed = discord.Embed( + description="No snippets found.", + color=self.bot.error_color, + ) + await ctx.send(embed=embed) + return + embeds = [] for name, val in snippets.items(): - description = f"{name}\n\n{truncate(escape_code_block(val), 2048 - 7)}" - embed = discord.Embed(color=self.bot.main_color, description=description) - embed.set_author(name="Snippets", icon_url=ctx.guild.icon_url) + content = truncate(escape_code_block(val), 2048 - 7) + embed = ( + discord.Embed( + title=f'Snippets Found ({len(snippets)})', + color=self.bot.main_color, + ) + .add_field(name="Name", value=f"`{name}`") + .add_field(name="Raw Content", value=f"```\n{content}\n```") + ) embeds.append(embed) session = EmbedPaginatorSession(ctx, *embeds) From 9b2b19f4c44b4b6d992986068db2f4015b1a61fb Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 02:10:44 +0100 Subject: [PATCH 06/12] Cosmetic change; make embed fields not inline --- snippet_search/snippet_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 9c01a3a..09c69cb 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -46,8 +46,8 @@ async def snippet_search(self, ctx: commands.Context, *, query: Optional[str] = title=f'Snippets Found ({len(snippets)})', color=self.bot.main_color, ) - .add_field(name="Name", value=f"`{name}`") - .add_field(name="Raw Content", value=f"```\n{content}\n```") + .add_field(name="Name", value=f"`{name}`", inline=False) + .add_field(name="Raw Content", value=f"```\n{content}\n```", inline=False) ) embeds.append(embed) From 1f5f51dd8e09602206e0df29274b824211a0d4d1 Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 16:03:23 +0100 Subject: [PATCH 07/12] Cosmetic changes Added a separate embed to summarise the snippet search results. --- snippet_search/snippet_search.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 09c69cb..8c766b3 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -24,27 +24,33 @@ async def snippet_search(self, ctx: commands.Context, *, query: Optional[str] = snippets = self.bot.snippets else: snippets = { - k: v - for k, v in self.bot.snippets.items() - if query.lower() in k + name: content + for name, content in self.bot.snippets.items() + if query.lower() in name } if not snippets: embed = discord.Embed( - description="No snippets found.", + description="No matching snippets found.", color=self.bot.error_color, ) await ctx.send(embed=embed) return + result_summary_embed = discord.Embed( + color=self.bot.main_color, + title=f"{len(snippets)} Matching Snippet{'s' if len(snippets) > 1 else ''}", + description=', '.join(f"`{name}`" for name in snippets), + ) + embeds = [] - for name, val in snippets.items(): + for i, (name, val) in enumerate(snippets.items(), start=1): content = truncate(escape_code_block(val), 2048 - 7) embed = ( discord.Embed( - title=f'Snippets Found ({len(snippets)})', color=self.bot.main_color, + title="Snippet {i}", ) .add_field(name="Name", value=f"`{name}`", inline=False) .add_field(name="Raw Content", value=f"```\n{content}\n```", inline=False) From e611e1f11ff7a243cda83d87747ffb4e6d23a843 Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 16:11:21 +0100 Subject: [PATCH 08/12] Bug fixes --- snippet_search/snippet_search.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 8c766b3..2c14d88 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -43,6 +43,8 @@ async def snippet_search(self, ctx: commands.Context, *, query: Optional[str] = description=', '.join(f"`{name}`" for name in snippets), ) + await ctx.send(embed=result_summary_embed) + embeds = [] for i, (name, val) in enumerate(snippets.items(), start=1): @@ -50,7 +52,7 @@ async def snippet_search(self, ctx: commands.Context, *, query: Optional[str] = embed = ( discord.Embed( color=self.bot.main_color, - title="Snippet {i}", + title=f"Snippet {i}", ) .add_field(name="Name", value=f"`{name}`", inline=False) .add_field(name="Raw Content", value=f"```\n{content}\n```", inline=False) From d398f804ecb9b9dcf9f30f2946688cd774b77a0d Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 18:20:47 +0100 Subject: [PATCH 09/12] Grouping of snippets, and improvements to search heuristic Snippet names which correspond to the same snippet content are now grouped together. Snippets are now scored as: percentage of query words in name + percentage of query words in content. --- snippet_search/snippet_search.py | 96 ++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 18 deletions(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 2c14d88..1058c0b 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -6,6 +6,46 @@ from core.models import PermissionLevel from core.paginator import EmbedPaginatorSession from core.utils import truncate, escape_code_block +from collections import defaultdict, Counter +import re + + +WORD_PATTERN = re.compile(r"[a-zA-Z]+") +THRESHOLD = 1.0 + + +def score(query, name, content): + if query is None: + return THRESHOLD + return ( + (common_word_count(query, name) + common_word_count(query, content)) + / len(words(query)) + ) + + +def words(s): + return WORD_PATTERN.findall(s) + + +def common_word_count(s1, s2): + return sum( + ( + Counter(map(str.casefold, words(s1))) + & Counter(map(str.casefold, words(s2))) + ).values() + ) + + +def group_snippets_by_content(snippets: dict[str, str]) -> list[tuple[set[str], str]]: + names_by_content = defaultdict(set) + for name, content in snippets.items(): + names_by_content[content.strip()].add(name) + grouped_snippets = [] + for group in names_by_content.values(): + name, *_ = group + content = snippets[name] + grouped_snippets.append((group, content)) + return grouped_snippets class SnippetSearch(commands.Cog): @@ -16,20 +56,28 @@ def __init__(self, bot: ModmailBot): @checks.has_permissions(PermissionLevel.SUPPORTER) @commands.command(name="snippetsearch") - async def snippet_search(self, ctx: commands.Context, *, query: Optional[str] = None) -> None: + async def snippet_search( + self, ctx: commands.Context, *, query: Optional[str] = None + ) -> None: """ Search for a snippet. """ - if query is None: - snippets = self.bot.snippets - else: - snippets = { - name: content - for name, content in self.bot.snippets.items() - if query.lower() in name - } - - if not snippets: + grouped_snippets = group_snippets_by_content(self.bot.snippets) + + scored_groups = [] + for i, (names, content) in enumerate(grouped_snippets): + group_score = max(score(query, name, content) for name in names) + scored_groups.append((group_score, i, names, content)) + + scored_groups.sort(reverse=True) + + matching_snippet_groups = [ + (names, content) + for group_score, _, names, content in scored_groups + if group_score >= THRESHOLD + ] + + if not matching_snippet_groups: embed = discord.Embed( description="No matching snippets found.", color=self.bot.error_color, @@ -39,23 +87,35 @@ async def snippet_search(self, ctx: commands.Context, *, query: Optional[str] = result_summary_embed = discord.Embed( color=self.bot.main_color, - title=f"{len(snippets)} Matching Snippet{'s' if len(snippets) > 1 else ''}", - description=', '.join(f"`{name}`" for name in snippets), + title=f"Found {len(matching_snippet_groups)} Matching Snippet{'s' if len(matching_snippet_groups) > 1 else ''}:", + description=", ".join( + "/".join(f"`{name}`" for name in sorted(names)) + for names, content in matching_snippet_groups + ), ) await ctx.send(embed=result_summary_embed) embeds = [] - for i, (name, val) in enumerate(snippets.items(), start=1): - content = truncate(escape_code_block(val), 2048 - 7) + for names, content in matching_snippet_groups: + formatted_content = ( + f"```\n{truncate(escape_code_block(content), 2048 - 7)}\n```" + ) embed = ( discord.Embed( color=self.bot.main_color, - title=f"Snippet {i}", ) - .add_field(name="Name", value=f"`{name}`", inline=False) - .add_field(name="Raw Content", value=f"```\n{content}\n```", inline=False) + .add_field( + name=f"Name{'s' if len(names) > 1 else ''}", + value=",".join(f"`{name}`" for name in sorted(names)), + inline=False, + ) + .add_field( + name="Raw Content", + value=formatted_content, + inline=False, + ) ) embeds.append(embed) From 29ebaba5bb432b5a35ef8b24f648d88459aad22a Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 18:36:04 +0100 Subject: [PATCH 10/12] Bug fix; embed field value length shortened --- snippet_search/snippet_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 1058c0b..3bc945c 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -100,7 +100,7 @@ async def snippet_search( for names, content in matching_snippet_groups: formatted_content = ( - f"```\n{truncate(escape_code_block(content), 2048 - 7)}\n```" + f"```\n{truncate(escape_code_block(content), 1000)}\n```" ) embed = ( discord.Embed( From 53abbf90212f52fb3f89fbcaac26b9ca09999e24 Mon Sep 17 00:00:00 2001 From: LXNN Date: Sat, 14 May 2022 21:01:28 +0100 Subject: [PATCH 11/12] Fix linting issues. --- snippet_search/snippet_search.py | 43 ++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index 3bc945c..bbe666f 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -1,20 +1,28 @@ +import re +from collections import Counter, defaultdict from typing import Optional + import discord from discord.ext import commands + from bot import ModmailBot from core import checks from core.models import PermissionLevel from core.paginator import EmbedPaginatorSession -from core.utils import truncate, escape_code_block -from collections import defaultdict, Counter -import re +from core.utils import escape_code_block, truncate WORD_PATTERN = re.compile(r"[a-zA-Z]+") THRESHOLD = 1.0 -def score(query, name, content): +def score(query: str | None, name: str, content: str) -> float: + """ + Return a numerical sorting score for a snippet based on a query. + + More relevant snippets have higher scores. If the query is None, + return a score that always meets the search inclusion threshold. + """ if query is None: return THRESHOLD return ( @@ -23,11 +31,18 @@ def score(query, name, content): ) -def words(s): +def words(s: str) -> list[str]: + """ + Extract a list of 'words' from the given string. + + A 'word' is defined by the WORD_PATTERN regex. This is purely for + use by the scoring function so isn't perfect. + """ return WORD_PATTERN.findall(s) -def common_word_count(s1, s2): +def common_word_count(s1: str, s2: str) -> int: + """Return the number of words in common between the two strings.""" return sum( ( Counter(map(str.casefold, words(s1))) @@ -37,6 +52,14 @@ def common_word_count(s1, s2): def group_snippets_by_content(snippets: dict[str, str]) -> list[tuple[set[str], str]]: + """ + Take a dictionary of snippets (in the form {name: content}) and group together snippets with the same content. + + Snippet contents are stipped of leading and trailing whitespace + before comparison. + + The result is of the form [(set_of_snippet_names, content)]. + """ names_by_content = defaultdict(set) for name, content in snippets.items(): names_by_content[content.strip()].add(name) @@ -59,9 +82,7 @@ def __init__(self, bot: ModmailBot): async def snippet_search( self, ctx: commands.Context, *, query: Optional[str] = None ) -> None: - """ - Search for a snippet. - """ + """Search for a snippet.""" grouped_snippets = group_snippets_by_content(self.bot.snippets) scored_groups = [] @@ -85,9 +106,11 @@ async def snippet_search( await ctx.send(embed=embed) return + num_results = len(matching_snippet_groups) + result_summary_embed = discord.Embed( color=self.bot.main_color, - title=f"Found {len(matching_snippet_groups)} Matching Snippet{'s' if len(matching_snippet_groups) > 1 else ''}:", + title=f"Found {num_results} Matching Snippet{'s' if num_results > 1 else ''}:", description=", ".join( "/".join(f"`{name}`" for name in sorted(names)) for names, content in matching_snippet_groups From 831c1c773a3ebeaaa0ef152e26b6b7eab59810ec Mon Sep 17 00:00:00 2001 From: LXNN Date: Sun, 15 May 2022 00:56:00 +0100 Subject: [PATCH 12/12] Remove Python 3.10 typing syntax that snuck in --- snippet_search/snippet_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snippet_search/snippet_search.py b/snippet_search/snippet_search.py index bbe666f..59d4231 100644 --- a/snippet_search/snippet_search.py +++ b/snippet_search/snippet_search.py @@ -16,7 +16,7 @@ THRESHOLD = 1.0 -def score(query: str | None, name: str, content: str) -> float: +def score(query: Optional[str], name: str, content: str) -> float: """ Return a numerical sorting score for a snippet based on a query.