Skip to content

Commit

Permalink
Refactoring the entire project structure.
Browse files Browse the repository at this point in the history
  • Loading branch information
AnamolZ committed Nov 19, 2024
1 parent 2bd9ce8 commit 4b23c8a
Show file tree
Hide file tree
Showing 13 changed files with 206 additions and 0 deletions.
Empty file added app/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from app.routes.process_routes import router as process_router
from app.routes.home_routes import router as home_router

app = FastAPI()

app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")

app.include_router(process_router)
app.include_router(home_router)

@app.get("/")
async def root():
return {"message": "Welcome to the Court Case Scraper API!"}
Empty file added app/routes/__init__.py
Empty file.
11 changes: 11 additions & 0 deletions app/routes/home_routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from fastapi import APIRouter
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from fastapi import Request

router = APIRouter()
templates = Jinja2Templates(directory="templates")

@router.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
67 changes: 67 additions & 0 deletions app/routes/process_routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from app.services.transcription_service import YouTubeTranscriber
from app.services.qa_service import QABot
from utils.file_utils import remove_file
import os
import google.generativeai as genai

router = APIRouter()

class ProcessRequest(BaseModel):
action: str
input: str

api_key = os.getenv("ASSEMBLYAI_API_KEY")
genaiApiKey = os.getenv("GENAI_API_KEY")
modelName = "multi-qa-mpnet-base-dot-v1"

class GenaiQA:
def __init__(self, modelName, genaiApiKey):
self.model = SentenceTransformer(modelName)
genai.configure(api_key=genaiApiKey)
self.genaiModel = genai.GenerativeModel(model_name="gemini-1.5-flash")

def getSummary(self, transcriptionText):
if not transcriptionText:
return "No transcription text provided."
inputUser = (f"This document contains a transcription of the video's audio. Please just provide a professionally crafted summary based on the transcript paragraph. Transcription: {transcriptionText}")
response = self.genaiModel.generate_content(inputUser)
return response.text

def getAnswer(self, query, localData):
qaBot = QABot(self.model)
answer = qaBot.answerQuery(query, localData)
inputUser = f"For this question, I'm seeking the perfect answer. Please provide the answer directly. {query}\n\n{answer}"
response = self.genaiModel.generate_content(inputUser)
return response.text

@router.post("/process")
async def process_request(request: ProcessRequest):
action = request.action
input_text = request.input

if os.path.exists('video.m4a'):
os.remove('video.m4a')

if action == "transcribe":
yt_transcriber = YouTubeTranscriber(api_key, input_text)
yt_transcriber.download_video()
transcript_text = yt_transcriber.transcribe_video("video.m4a")
if not transcript_text:
raise HTTPException(status_code=500, detail="Transcription failed.")

genaiQA = GenaiQA(modelName, genaiApiKey)
summary_text = genaiQA.getSummary(transcript_text)

return JSONResponse(content={"status": "success", "summary": summary_text})

elif action == "ask":
genaiQA = GenaiQA(modelName, genaiApiKey)
answer_text = genaiQA.getAnswer(input_text, [input_text])

return JSONResponse(content={"status": "success", "answer": answer_text})

else:
raise HTTPException(status_code=400, detail="Invalid action.")
Empty file added app/services/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions app/services/file_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os

def remove_file(filename):
if os.path.exists(filename):
os.remove(filename)

def create_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)

def file_exists(filename):
return os.path.exists(filename)
33 changes: 33 additions & 0 deletions app/services/qa_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from sentence_transformers import SentenceTransformer
from numpy import dot
from numpy.linalg import norm

class QABot:
def __init__(self, model):
self.model = model

def generateAnswer(self, query, relevantData):
if not relevantData:
return "No relevant data found."
scores = [score for score, _ in relevantData]
maxScore = max(scores) if scores else 1
normalizedScores = [score / maxScore for score in scores]
return "\n\n".join(f"**Passage {i + 1} (Score: {normalizedScores[i]:.2f}):** {text}"
for i, (_, text) in enumerate(relevantData))

def answerQuery(self, query, localData):
relevantData = self.fetchRelevantData(query, localData)
return self.generateAnswer(query, relevantData)

def fetchRelevantData(self, query, localData, topK=5):
queryEmbedding = self.model.encode(query, convert_to_tensor=True).tolist()
scoresAndTexts = []
for text in localData:
textEmbedding = self.model.encode(text, convert_to_tensor=True).tolist()
score = self.computeSimilarity(queryEmbedding, textEmbedding)
scoresAndTexts.append((score, text))
sortedScoresAndTexts = sorted(scoresAndTexts, key=lambda x: x[0], reverse=True)
return sortedScoresAndTexts[:topK]

def computeSimilarity(self, queryEmbedding, textEmbedding):
return dot(queryEmbedding, textEmbedding) / (norm(queryEmbedding) * norm(textEmbedding))
25 changes: 25 additions & 0 deletions app/services/transcription_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
import assemblyai as aai
from utils.downloader import YouTubeDownloader

class YouTubeTranscriber:
def __init__(self, api_key, url):
self.api_key = api_key
self.url = url
aai.settings.api_key = self.api_key
self.downloader = YouTubeDownloader(self.url)

def remove_existing_video(self, filename):
if os.path.exists(filename):
os.remove(filename)

def download_video(self):
self.downloader.download_video()

def transcribe_video(self, filename):
if not os.path.exists(filename):
print(f"File {filename} not found.")
return ""
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(filename)
return transcript.text
3 changes: 3 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
Empty file added utils/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions utils/downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import yt_dlp
import os

class YouTubeDownloader:
def __init__(self, url):
self.url = url
self.ydl_opts = {
'outtmpl': 'video.%(ext)s',
'format': 'bestaudio/best',
'noplaylist': True,
'merge_output_format': None,
}

def remove_existing_video(self, filename):
if os.path.exists(filename):
os.remove(filename)

def download_video(self):
try:
self.remove_existing_video('video.m4a')
with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
ydl.download([self.url])
print("Download completed successfully!")
except Exception as e:
print(f"An error occurred: {e}")
12 changes: 12 additions & 0 deletions utils/file_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os

def remove_file(filename):
if os.path.exists(filename):
os.remove(filename)

def create_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)

def file_exists(filename):
return os.path.exists(filename)

0 comments on commit 4b23c8a

Please sign in to comment.