Skip to content

Commit

Permalink
Merge pull request #3246 from python-discord/revert-3241-pastebin-aut…
Browse files Browse the repository at this point in the history
…o-upload

Revert "Apply filtering to text attachments; offer to auto-upload text attachments to paste bin"
  • Loading branch information
swfarnsworth authored Jan 30, 2025
2 parents 1a8ee2c + 186d5e9 commit de61208
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 180 deletions.
39 changes: 29 additions & 10 deletions bot/exts/filtering/_filter_lists/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@
if typing.TYPE_CHECKING:
from bot.exts.filtering.filtering import Filtering

PASTE_URL = "https://paste.pythondiscord.com"
PY_EMBED_DESCRIPTION = (
"It looks like you tried to attach a Python file - "
f"please use a code-pasting service such as {PASTE_URL}"
)

TXT_LIKE_FILES = {".txt", ".csv", ".json"}
TXT_EMBED_DESCRIPTION = (
"You either uploaded a `{blocked_extension}` file or entered a message that was too long. "
f"Please use our [paste bin]({PASTE_URL}) instead."
)

DISALLOWED_EMBED_DESCRIPTION = (
"It looks like you tried to attach file type(s) that we do not allow ({joined_blacklist}). "
"We currently allow the following file types: **{joined_whitelist}**.\n\n"
Expand Down Expand Up @@ -75,23 +87,30 @@ async def actions_for(
not_allowed = {ext: filename for ext, filename in all_ext if ext not in allowed_ext}

if ctx.event == Event.SNEKBOX:
not_allowed = dict(not_allowed.items())
not_allowed = {ext: filename for ext, filename in not_allowed.items() if ext not in TXT_LIKE_FILES}

if not not_allowed: # Yes, it's a double negative. Meaning all attachments are allowed :)
return None, [], {ListType.ALLOW: triggered}

# At this point, something is disallowed.
if ctx.event != Event.SNEKBOX: # Don't post the embed if it's a snekbox response.
meta_channel = bot.instance.get_channel(Channels.meta)
if not self._whitelisted_description:
self._whitelisted_description = ", ".join(
filter_.content for filter_ in self[ListType.ALLOW].filters.values()
if ".py" in not_allowed:
# Provide a pastebin link for .py files.
ctx.dm_embed = PY_EMBED_DESCRIPTION
elif txt_extensions := {ext for ext in TXT_LIKE_FILES if ext in not_allowed}:
# Work around Discord auto-conversion of messages longer than 2000 chars to .txt
ctx.dm_embed = TXT_EMBED_DESCRIPTION.format(blocked_extension=txt_extensions.pop())
else:
meta_channel = bot.instance.get_channel(Channels.meta)
if not self._whitelisted_description:
self._whitelisted_description = ", ".join(
filter_.content for filter_ in self[ListType.ALLOW].filters.values()
)
ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
joined_whitelist=self._whitelisted_description,
joined_blacklist=", ".join(not_allowed),
meta_channel_mention=meta_channel.mention,
)
ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
joined_whitelist=self._whitelisted_description,
joined_blacklist=", ".join(not_allowed),
meta_channel_mention=meta_channel.mention,
)

ctx.matches += not_allowed.values()
ctx.blocked_exts |= set(not_allowed)
Expand Down
5 changes: 4 additions & 1 deletion bot/exts/filtering/_filter_lists/filter_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,10 @@ def __hash__(self):
return hash(id(self))


class FilterList[T: Filter](dict[ListType, AtomicList], FieldRequiring):
T = typing.TypeVar("T", bound=Filter)


class FilterList(dict[ListType, AtomicList], typing.Generic[T], FieldRequiring):
"""Dispatches events to lists of _filters, and aggregates the responses into a single list of actions to take."""

# Each subclass must define a name matching the filter_list name we're expecting to receive from the database.
Expand Down
14 changes: 9 additions & 5 deletions bot/exts/filtering/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,21 @@
from abc import abstractmethod
from copy import copy
from functools import reduce
from typing import Any, NamedTuple, Self
from typing import Any, NamedTuple, Self, TypeVar

from bot.exts.filtering._filter_context import FilterContext
from bot.exts.filtering._settings_types import settings_types
from bot.exts.filtering._settings_types.settings_entry import ActionEntry, SettingsEntry, ValidationEntry
from bot.exts.filtering._utils import FieldRequiring
from bot.log import get_logger

TSettings = TypeVar("TSettings", bound="Settings")

log = get_logger(__name__)

_already_warned = set[str]()
_already_warned: set[str] = set()

T = TypeVar("T", bound=SettingsEntry)


def create_settings(
Expand Down Expand Up @@ -51,7 +55,7 @@ def create_settings(
)


class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):
class Settings(FieldRequiring, dict[str, T]):
"""
A collection of settings.
Expand All @@ -65,7 +69,7 @@ class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):

entry_type: type[T]

_already_warned = set[str]()
_already_warned: set[str] = set()

@abstractmethod # ABCs have to have at least once abstract method to actually count as such.
def __init__(self, settings_data: dict, *, defaults: Settings | None = None, keep_empty: bool = False):
Expand Down Expand Up @@ -100,7 +104,7 @@ def overrides(self) -> dict[str, Any]:
"""Return a dictionary of overrides across all entries."""
return reduce(operator.or_, (entry.overrides for entry in self.values() if entry), {})

def copy(self: Self) -> Self:
def copy(self: TSettings) -> TSettings:
"""Create a shallow copy of the object."""
return copy(self)

Expand Down
19 changes: 1 addition & 18 deletions bot/exts/filtering/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,6 @@
WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday


async def _extract_text_file_content(att: discord.Attachment) -> str:
"""Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1)
file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines()
first_n_lines = "\n".join(file_lines[:30])[:2_000]
return f"{att.filename}: {first_n_lines}"


class Filtering(Cog):
"""Filtering and alerting for content posted on the server."""

Expand All @@ -88,7 +80,7 @@ class Filtering(Cog):
def __init__(self, bot: Bot):
self.bot = bot
self.filter_lists: dict[str, FilterList] = {}
self._subscriptions = defaultdict[Event, list[FilterList]](list)
self._subscriptions: defaultdict[Event, list[FilterList]] = defaultdict(list)
self.delete_scheduler = scheduling.Scheduler(self.__class__.__name__)
self.webhook: discord.Webhook | None = None

Expand Down Expand Up @@ -231,15 +223,6 @@ async def on_message(self, msg: Message) -> None:
self.message_cache.append(msg)

ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache)

text_contents = [
await _extract_text_file_content(a)
for a in msg.attachments if "charset" in a.content_type
]
if text_contents:
attachment_content = "\n\n".join(text_contents)
ctx = ctx.replace(content=f"{ctx.content}\n\n{attachment_content}")

result_actions, list_messages, triggers = await self._resolve_action(ctx)
self.message_cache.update(msg, metadata=triggers)
if result_actions:
Expand Down
144 changes: 0 additions & 144 deletions bot/exts/utils/attachment_pastebin_uploader.py

This file was deleted.

3 changes: 1 addition & 2 deletions bot/exts/utils/snekbox/_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from bot.bot import Bot
from bot.constants import BaseURLs, Channels, Emojis, MODERATION_ROLES, Roles, URLs
from bot.decorators import redirect_output
from bot.exts.filtering._filter_lists.extension import TXT_LIKE_FILES
from bot.exts.help_channels._channel import is_help_forum_post
from bot.exts.utils.snekbox._eval import EvalJob, EvalResult
from bot.exts.utils.snekbox._io import FileAttachment
Expand All @@ -31,8 +32,6 @@
ANSI_REGEX = re.compile(r"\N{ESC}\[[0-9;:]*m")
ESCAPE_REGEX = re.compile("[`\u202E\u200B]{3,}")

TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"}

# The timeit command should only output the very last line, so all other output should be suppressed.
# This will be used as the setup code along with any setup code provided.
TIMEIT_SETUP_WRAPPER = """
Expand Down

0 comments on commit de61208

Please sign in to comment.