Teach the bot to speak

thomaxius-and-co · Feb 22, 2024 · c9ac17a · c9ac17a
1 parent 4a9b9d2
commit c9ac17a
Show file tree

Hide file tree

Showing 5 changed files with 124 additions and 20 deletions.
diff --git a/Pipfile b/Pipfile
@@ -24,3 +24,4 @@ requests = "==2.26.0"
 lxml = "==4.9.2"
 cfscrape = "==2.1.1"
 "discord.py" = {version = "==2.2.3", extras = ["voice"]}
+boto3 = "==1.34.39"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/infra/Application.ts b/infra/Application.ts
@@ -11,6 +11,7 @@ import {
   Vpc
 } from "aws-cdk-lib/aws-ec2";
 import {Credentials, DatabaseInstance, DatabaseInstanceEngine, PostgresEngineVersion, PerformanceInsightRetention} from "aws-cdk-lib/aws-rds";
+import {Effect, PolicyStatement} from "aws-cdk-lib/aws-iam";
 import {Repository} from "aws-cdk-lib/aws-ecr";
 import {Code, Function, Runtime} from "aws-cdk-lib/aws-lambda";
 import {
@@ -137,6 +138,12 @@ class Application extends Stack {
         "DATABASE_PORT": EcsSecret.fromSecretsManager(db.secret!, "port"),
       }
     })
+
+    taskDefinition.addToTaskRolePolicy(new PolicyStatement({
+      effect: Effect.ALLOW,
+      actions: ["polly:SynthesizeSpeech"],
+      resources: ["*"],
+    }))
     appSecrets.grantRead(taskDefinition.executionRole!)
     appSecrets.grantRead(taskDefinition.taskRole)
     db.secret!.grantRead(taskDefinition.executionRole!)

diff --git a/src/openai.py b/src/openai.py
@@ -185,6 +185,13 @@ async def create_image(prompt, user_id):
         "user": str(user_id),
     })
 
+async def prompt(prompt, user_id):
+    messages = [{ "role": "user", "content": prompt }]
+    match await get_response_for_messages(messages, user_id, allow_tool_calls=False):
+        case 200, response:
+            return response["choices"][0]["message"]["content"]
+        case _:
+            raise Exception("Failed to call OpenAI")
 
 async def get_response_for_messages(messages, user_id, *, allow_tool_calls=True):
     request = {

diff --git a/src/run_lemon_bot.py b/src/run_lemon_bot.py
@@ -9,6 +9,7 @@
 # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 # of the Software, and to permit persons to whom the Software is furnished to do so
 import base64
+import tempfile
 import time
 import asyncpg
 import os
@@ -657,7 +658,19 @@ async def wrong_channel_for_this_word(current_message_channel_id, database_chann
     return current_message_channel_id != database_channel_id
 
 
+async def cmd_aforismi(client, message, arg):
+    aforismi = await openai.prompt("""
+        Luo lyhyt vanhanaikainen päivän aforismi piristämään nuorison iltapäivää.
+        Selitä myös lyhyesti, miten aforismin voisi tulkita nykymaailmassa.
+    """, message.author.id)
+
+    await message.reply(aforismi)
+    if voice_state := message.author.voice:
+        if voice_channel := voice_state.channel:
+            await say_in_voice_channel(voice_channel, aforismi)
+
 commands = {
+    'aforismi': cmd_aforismi,
     'sql': cmd_sql,
     'roll': cmd_roll,
     '8ball': cmd_8ball,
@@ -741,7 +754,6 @@ async def upsert_users(users):
             """, user.get("id"), user.get("username"), json.dumps(user))
 
 
-
 # Dispacther for messages from the users.
 @client.event
 @logger.with_request_id
@@ -863,24 +875,53 @@ def hours(n): return n * minutes(60)
         run_scheduled_task(kansallisgalleria.update_data, hours(24))
     run_scheduled_task(ence_matches.do_tasks, hours(2.5))
     run_scheduled_task(status.check_user_and_message_count, minutes(30))
-    asyncio.create_task(play_sound(client))
 
-async def play_sound(client):
-  try:
-    voice_channel_id = 855406379916853258
-    voice_channel = await client.fetch_channel(voice_channel_id)
+def run_in_asyncio_executor(func):
+    from functools import partial
+    async def wrapper(*args, **kwargs):
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(None, partial(func, *args, **kwargs))
+    return wrapper
+
+@run_in_asyncio_executor
+def text_to_speech(text, output_file):
+    log.info("Generating speech for text: %s", text)
+    from boto3 import Session
+    from contextlib import closing
+
+    session = Session()
+    polly = session.client('polly')
+    response = polly.synthesize_speech(
+        Text=text,
+        OutputFormat='mp3',
+        VoiceId='Suvi',
+        Engine='neural',
+        LanguageCode='fi-FI',
+    )
+    if "AudioStream" in response:
+        with closing(response["AudioStream"]) as stream:
+            output_file.write(stream.read())
+        output_file.seek(0)
+    else:
+        raise Exception("No AudioStream in response")
+
+async def say_in_voice_channel(voice_channel, text):
+    with tempfile.TemporaryDirectory() as d:
+        filepath = os.path.join(d, "output.mp3")
+        with open(filepath, "wb") as file:
+            await text_to_speech(text, file)
+        await play_file_on_channel(voice_channel, filepath)
+
+async def play_file_on_channel(voice_channel, filepath):
+    source = await discord.FFmpegOpusAudio.from_probe(filepath)
     voice_client = await voice_channel.connect()
-    source = await discord.FFmpegOpusAudio.from_probe('https://www.myinstants.com/media/sounds/roblox-death-sound_1.mp3')
     def after(error):
-      if error is not None:
-        log.info(error)
+        if error is not None:
+            log.info(error)
     voice_client.play(source, after=after)
     while voice_client.is_playing():
-      log.info("still playing...")
-      await asyncio.sleep(1)
+        await asyncio.sleep(0.5)
     await voice_client.disconnect()
-  except Exception as e:
-    log.info(e)
 
 
 def run_scheduled_task(task_func, interval):