From 4b23c8a83f1e7433f283d18f4b4ebd137da75b7e Mon Sep 17 00:00:00 2001 From: AnamolZ Date: Wed, 20 Nov 2024 02:47:50 +0545 Subject: [PATCH] Refactoring the entire project structure. --- app/__init__.py | 0 app/main.py | 18 +++++++ app/routes/__init__.py | 0 app/routes/home_routes.py | 11 +++++ app/routes/process_routes.py | 67 +++++++++++++++++++++++++++ app/services/__init__.py | 0 app/services/file_service.py | 12 +++++ app/services/qa_service.py | 33 +++++++++++++ app/services/transcription_service.py | 25 ++++++++++ run.sh | 3 ++ utils/__init__.py | 0 utils/downloader.py | 25 ++++++++++ utils/file_utils.py | 12 +++++ 13 files changed, 206 insertions(+) create mode 100644 app/__init__.py create mode 100644 app/main.py create mode 100644 app/routes/__init__.py create mode 100644 app/routes/home_routes.py create mode 100644 app/routes/process_routes.py create mode 100644 app/services/__init__.py create mode 100644 app/services/file_service.py create mode 100644 app/services/qa_service.py create mode 100644 app/services/transcription_service.py create mode 100644 run.sh create mode 100644 utils/__init__.py create mode 100644 utils/downloader.py create mode 100644 utils/file_utils.py diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..4102307 --- /dev/null +++ b/app/main.py @@ -0,0 +1,18 @@ +from fastapi import FastAPI +from fastapi.responses import JSONResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from app.routes.process_routes import router as process_router +from app.routes.home_routes import router as home_router + +app = FastAPI() + +app.mount("/static", StaticFiles(directory="static"), name="static") +templates = Jinja2Templates(directory="templates") + +app.include_router(process_router) +app.include_router(home_router) + +@app.get("/") +async def root(): + return {"message": "Welcome to the Court Case Scraper API!"} diff --git a/app/routes/__init__.py b/app/routes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/routes/home_routes.py b/app/routes/home_routes.py new file mode 100644 index 0000000..46b24a0 --- /dev/null +++ b/app/routes/home_routes.py @@ -0,0 +1,11 @@ +from fastapi import APIRouter +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates +from fastapi import Request + +router = APIRouter() +templates = Jinja2Templates(directory="templates") + +@router.get("/", response_class=HTMLResponse) +async def read_root(request: Request): + return templates.TemplateResponse("index.html", {"request": request}) diff --git a/app/routes/process_routes.py b/app/routes/process_routes.py new file mode 100644 index 0000000..8d6b4d0 --- /dev/null +++ b/app/routes/process_routes.py @@ -0,0 +1,67 @@ +from fastapi import APIRouter, HTTPException +from fastapi.responses import JSONResponse +from pydantic import BaseModel +from app.services.transcription_service import YouTubeTranscriber +from app.services.qa_service import QABot +from utils.file_utils import remove_file +import os +import google.generativeai as genai + +router = APIRouter() + +class ProcessRequest(BaseModel): + action: str + input: str + +api_key = os.getenv("ASSEMBLYAI_API_KEY") +genaiApiKey = os.getenv("GENAI_API_KEY") +modelName = "multi-qa-mpnet-base-dot-v1" + +class GenaiQA: + def __init__(self, modelName, genaiApiKey): + self.model = SentenceTransformer(modelName) + genai.configure(api_key=genaiApiKey) + self.genaiModel = genai.GenerativeModel(model_name="gemini-1.5-flash") + + def getSummary(self, transcriptionText): + if not transcriptionText: + return "No transcription text provided." + inputUser = (f"This document contains a transcription of the video's audio. Please just provide a professionally crafted summary based on the transcript paragraph. Transcription: {transcriptionText}") + response = self.genaiModel.generate_content(inputUser) + return response.text + + def getAnswer(self, query, localData): + qaBot = QABot(self.model) + answer = qaBot.answerQuery(query, localData) + inputUser = f"For this question, I'm seeking the perfect answer. Please provide the answer directly. {query}\n\n{answer}" + response = self.genaiModel.generate_content(inputUser) + return response.text + +@router.post("/process") +async def process_request(request: ProcessRequest): + action = request.action + input_text = request.input + + if os.path.exists('video.m4a'): + os.remove('video.m4a') + + if action == "transcribe": + yt_transcriber = YouTubeTranscriber(api_key, input_text) + yt_transcriber.download_video() + transcript_text = yt_transcriber.transcribe_video("video.m4a") + if not transcript_text: + raise HTTPException(status_code=500, detail="Transcription failed.") + + genaiQA = GenaiQA(modelName, genaiApiKey) + summary_text = genaiQA.getSummary(transcript_text) + + return JSONResponse(content={"status": "success", "summary": summary_text}) + + elif action == "ask": + genaiQA = GenaiQA(modelName, genaiApiKey) + answer_text = genaiQA.getAnswer(input_text, [input_text]) + + return JSONResponse(content={"status": "success", "answer": answer_text}) + + else: + raise HTTPException(status_code=400, detail="Invalid action.") diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/file_service.py b/app/services/file_service.py new file mode 100644 index 0000000..f63c877 --- /dev/null +++ b/app/services/file_service.py @@ -0,0 +1,12 @@ +import os + +def remove_file(filename): + if os.path.exists(filename): + os.remove(filename) + +def create_directory(directory): + if not os.path.exists(directory): + os.makedirs(directory) + +def file_exists(filename): + return os.path.exists(filename) diff --git a/app/services/qa_service.py b/app/services/qa_service.py new file mode 100644 index 0000000..37f0a99 --- /dev/null +++ b/app/services/qa_service.py @@ -0,0 +1,33 @@ +from sentence_transformers import SentenceTransformer +from numpy import dot +from numpy.linalg import norm + +class QABot: + def __init__(self, model): + self.model = model + + def generateAnswer(self, query, relevantData): + if not relevantData: + return "No relevant data found." + scores = [score for score, _ in relevantData] + maxScore = max(scores) if scores else 1 + normalizedScores = [score / maxScore for score in scores] + return "\n\n".join(f"**Passage {i + 1} (Score: {normalizedScores[i]:.2f}):** {text}" + for i, (_, text) in enumerate(relevantData)) + + def answerQuery(self, query, localData): + relevantData = self.fetchRelevantData(query, localData) + return self.generateAnswer(query, relevantData) + + def fetchRelevantData(self, query, localData, topK=5): + queryEmbedding = self.model.encode(query, convert_to_tensor=True).tolist() + scoresAndTexts = [] + for text in localData: + textEmbedding = self.model.encode(text, convert_to_tensor=True).tolist() + score = self.computeSimilarity(queryEmbedding, textEmbedding) + scoresAndTexts.append((score, text)) + sortedScoresAndTexts = sorted(scoresAndTexts, key=lambda x: x[0], reverse=True) + return sortedScoresAndTexts[:topK] + + def computeSimilarity(self, queryEmbedding, textEmbedding): + return dot(queryEmbedding, textEmbedding) / (norm(queryEmbedding) * norm(textEmbedding)) diff --git a/app/services/transcription_service.py b/app/services/transcription_service.py new file mode 100644 index 0000000..1b735a1 --- /dev/null +++ b/app/services/transcription_service.py @@ -0,0 +1,25 @@ +import os +import assemblyai as aai +from utils.downloader import YouTubeDownloader + +class YouTubeTranscriber: + def __init__(self, api_key, url): + self.api_key = api_key + self.url = url + aai.settings.api_key = self.api_key + self.downloader = YouTubeDownloader(self.url) + + def remove_existing_video(self, filename): + if os.path.exists(filename): + os.remove(filename) + + def download_video(self): + self.downloader.download_video() + + def transcribe_video(self, filename): + if not os.path.exists(filename): + print(f"File {filename} not found.") + return "" + transcriber = aai.Transcriber() + transcript = transcriber.transcribe(filename) + return transcript.text diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..2a0a276 --- /dev/null +++ b/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/downloader.py b/utils/downloader.py new file mode 100644 index 0000000..0d4b7e0 --- /dev/null +++ b/utils/downloader.py @@ -0,0 +1,25 @@ +import yt_dlp +import os + +class YouTubeDownloader: + def __init__(self, url): + self.url = url + self.ydl_opts = { + 'outtmpl': 'video.%(ext)s', + 'format': 'bestaudio/best', + 'noplaylist': True, + 'merge_output_format': None, + } + + def remove_existing_video(self, filename): + if os.path.exists(filename): + os.remove(filename) + + def download_video(self): + try: + self.remove_existing_video('video.m4a') + with yt_dlp.YoutubeDL(self.ydl_opts) as ydl: + ydl.download([self.url]) + print("Download completed successfully!") + except Exception as e: + print(f"An error occurred: {e}") diff --git a/utils/file_utils.py b/utils/file_utils.py new file mode 100644 index 0000000..f63c877 --- /dev/null +++ b/utils/file_utils.py @@ -0,0 +1,12 @@ +import os + +def remove_file(filename): + if os.path.exists(filename): + os.remove(filename) + +def create_directory(directory): + if not os.path.exists(directory): + os.makedirs(directory) + +def file_exists(filename): + return os.path.exists(filename)