-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactoring the entire project structure.
- Loading branch information
Showing
13 changed files
with
206 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from fastapi import FastAPI | ||
from fastapi.responses import JSONResponse | ||
from fastapi.staticfiles import StaticFiles | ||
from fastapi.templating import Jinja2Templates | ||
from app.routes.process_routes import router as process_router | ||
from app.routes.home_routes import router as home_router | ||
|
||
app = FastAPI() | ||
|
||
app.mount("/static", StaticFiles(directory="static"), name="static") | ||
templates = Jinja2Templates(directory="templates") | ||
|
||
app.include_router(process_router) | ||
app.include_router(home_router) | ||
|
||
@app.get("/") | ||
async def root(): | ||
return {"message": "Welcome to the Court Case Scraper API!"} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from fastapi import APIRouter | ||
from fastapi.responses import HTMLResponse | ||
from fastapi.templating import Jinja2Templates | ||
from fastapi import Request | ||
|
||
router = APIRouter() | ||
templates = Jinja2Templates(directory="templates") | ||
|
||
@router.get("/", response_class=HTMLResponse) | ||
async def read_root(request: Request): | ||
return templates.TemplateResponse("index.html", {"request": request}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
from fastapi import APIRouter, HTTPException | ||
from fastapi.responses import JSONResponse | ||
from pydantic import BaseModel | ||
from app.services.transcription_service import YouTubeTranscriber | ||
from app.services.qa_service import QABot | ||
from utils.file_utils import remove_file | ||
import os | ||
import google.generativeai as genai | ||
|
||
router = APIRouter() | ||
|
||
class ProcessRequest(BaseModel): | ||
action: str | ||
input: str | ||
|
||
api_key = os.getenv("ASSEMBLYAI_API_KEY") | ||
genaiApiKey = os.getenv("GENAI_API_KEY") | ||
modelName = "multi-qa-mpnet-base-dot-v1" | ||
|
||
class GenaiQA: | ||
def __init__(self, modelName, genaiApiKey): | ||
self.model = SentenceTransformer(modelName) | ||
genai.configure(api_key=genaiApiKey) | ||
self.genaiModel = genai.GenerativeModel(model_name="gemini-1.5-flash") | ||
|
||
def getSummary(self, transcriptionText): | ||
if not transcriptionText: | ||
return "No transcription text provided." | ||
inputUser = (f"This document contains a transcription of the video's audio. Please just provide a professionally crafted summary based on the transcript paragraph. Transcription: {transcriptionText}") | ||
response = self.genaiModel.generate_content(inputUser) | ||
return response.text | ||
|
||
def getAnswer(self, query, localData): | ||
qaBot = QABot(self.model) | ||
answer = qaBot.answerQuery(query, localData) | ||
inputUser = f"For this question, I'm seeking the perfect answer. Please provide the answer directly. {query}\n\n{answer}" | ||
response = self.genaiModel.generate_content(inputUser) | ||
return response.text | ||
|
||
@router.post("/process") | ||
async def process_request(request: ProcessRequest): | ||
action = request.action | ||
input_text = request.input | ||
|
||
if os.path.exists('video.m4a'): | ||
os.remove('video.m4a') | ||
|
||
if action == "transcribe": | ||
yt_transcriber = YouTubeTranscriber(api_key, input_text) | ||
yt_transcriber.download_video() | ||
transcript_text = yt_transcriber.transcribe_video("video.m4a") | ||
if not transcript_text: | ||
raise HTTPException(status_code=500, detail="Transcription failed.") | ||
|
||
genaiQA = GenaiQA(modelName, genaiApiKey) | ||
summary_text = genaiQA.getSummary(transcript_text) | ||
|
||
return JSONResponse(content={"status": "success", "summary": summary_text}) | ||
|
||
elif action == "ask": | ||
genaiQA = GenaiQA(modelName, genaiApiKey) | ||
answer_text = genaiQA.getAnswer(input_text, [input_text]) | ||
|
||
return JSONResponse(content={"status": "success", "answer": answer_text}) | ||
|
||
else: | ||
raise HTTPException(status_code=400, detail="Invalid action.") |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import os | ||
|
||
def remove_file(filename): | ||
if os.path.exists(filename): | ||
os.remove(filename) | ||
|
||
def create_directory(directory): | ||
if not os.path.exists(directory): | ||
os.makedirs(directory) | ||
|
||
def file_exists(filename): | ||
return os.path.exists(filename) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from sentence_transformers import SentenceTransformer | ||
from numpy import dot | ||
from numpy.linalg import norm | ||
|
||
class QABot: | ||
def __init__(self, model): | ||
self.model = model | ||
|
||
def generateAnswer(self, query, relevantData): | ||
if not relevantData: | ||
return "No relevant data found." | ||
scores = [score for score, _ in relevantData] | ||
maxScore = max(scores) if scores else 1 | ||
normalizedScores = [score / maxScore for score in scores] | ||
return "\n\n".join(f"**Passage {i + 1} (Score: {normalizedScores[i]:.2f}):** {text}" | ||
for i, (_, text) in enumerate(relevantData)) | ||
|
||
def answerQuery(self, query, localData): | ||
relevantData = self.fetchRelevantData(query, localData) | ||
return self.generateAnswer(query, relevantData) | ||
|
||
def fetchRelevantData(self, query, localData, topK=5): | ||
queryEmbedding = self.model.encode(query, convert_to_tensor=True).tolist() | ||
scoresAndTexts = [] | ||
for text in localData: | ||
textEmbedding = self.model.encode(text, convert_to_tensor=True).tolist() | ||
score = self.computeSimilarity(queryEmbedding, textEmbedding) | ||
scoresAndTexts.append((score, text)) | ||
sortedScoresAndTexts = sorted(scoresAndTexts, key=lambda x: x[0], reverse=True) | ||
return sortedScoresAndTexts[:topK] | ||
|
||
def computeSimilarity(self, queryEmbedding, textEmbedding): | ||
return dot(queryEmbedding, textEmbedding) / (norm(queryEmbedding) * norm(textEmbedding)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import os | ||
import assemblyai as aai | ||
from utils.downloader import YouTubeDownloader | ||
|
||
class YouTubeTranscriber: | ||
def __init__(self, api_key, url): | ||
self.api_key = api_key | ||
self.url = url | ||
aai.settings.api_key = self.api_key | ||
self.downloader = YouTubeDownloader(self.url) | ||
|
||
def remove_existing_video(self, filename): | ||
if os.path.exists(filename): | ||
os.remove(filename) | ||
|
||
def download_video(self): | ||
self.downloader.download_video() | ||
|
||
def transcribe_video(self, filename): | ||
if not os.path.exists(filename): | ||
print(f"File {filename} not found.") | ||
return "" | ||
transcriber = aai.Transcriber() | ||
transcript = transcriber.transcribe(filename) | ||
return transcript.text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import yt_dlp | ||
import os | ||
|
||
class YouTubeDownloader: | ||
def __init__(self, url): | ||
self.url = url | ||
self.ydl_opts = { | ||
'outtmpl': 'video.%(ext)s', | ||
'format': 'bestaudio/best', | ||
'noplaylist': True, | ||
'merge_output_format': None, | ||
} | ||
|
||
def remove_existing_video(self, filename): | ||
if os.path.exists(filename): | ||
os.remove(filename) | ||
|
||
def download_video(self): | ||
try: | ||
self.remove_existing_video('video.m4a') | ||
with yt_dlp.YoutubeDL(self.ydl_opts) as ydl: | ||
ydl.download([self.url]) | ||
print("Download completed successfully!") | ||
except Exception as e: | ||
print(f"An error occurred: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import os | ||
|
||
def remove_file(filename): | ||
if os.path.exists(filename): | ||
os.remove(filename) | ||
|
||
def create_directory(directory): | ||
if not os.path.exists(directory): | ||
os.makedirs(directory) | ||
|
||
def file_exists(filename): | ||
return os.path.exists(filename) |