Skip to content

Commit

Permalink
Refactor project structure, add Docker support, and improve environme…
Browse files Browse the repository at this point in the history
…nt variable management.
  • Loading branch information
AnamolZ committed Nov 19, 2024
1 parent 4b23c8a commit ab5db33
Show file tree
Hide file tree
Showing 22 changed files with 149 additions and 339 deletions.
16 changes: 14 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
.env
# Python bytecode files
__pycache__/
*.pyc
*.pyo

# Virtual environment
venv/
__pycache__/
env/

# .env file (contains sensitive API keys)
.env

# Docker-related files
docker-compose.override.yml
*.log
16 changes: 8 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . /app

# Install any needed packages specified in requirements.txt
# Install dependencies from the requirements.txt
RUN pip install --no-cache-dir --upgrade pip \
&& pip install --no-cache-dir -r requirements.txt

# Make port 8000 available to the world outside this container
EXPOSE 8000
# Set environment variables from the .env file or the docker-compose.yml
ENV ASSEMBLYAI_API_KEY=${ASSEMBLYAI_API_KEY}
ENV GENAI_API_KEY=${GENAI_API_KEY}

# Define environment variable
ENV ASSEMBLYAI_API_KEY="ASSEMBLYAI_API_KEY"
ENV GENAI_API_KEY="GENAI_API_KEY"
# Expose port 8000
EXPOSE 8000

# Run app.py when the container launches
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
# Command to run the FastAPI app using Uvicorn
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
59 changes: 59 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from dotenv import load_dotenv
from app.services.youtube_service import YouTubeService
from app.services.genai_service import GenaiService

# Load environment variables
load_dotenv()

# FastAPI app initialization
app = FastAPI()

# Mount static files and templates
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="app/templates")

# Load API keys from .env
api_key = os.getenv("ASSEMBLYAI_API_KEY")
genaiApiKey = os.getenv("GENAI_API_KEY")
modelName = "multi-qa-mpnet-base-dot-v1"

class ProcessRequest(BaseModel):
action: str
input: str

@app.post("/process")
async def process_request(request: ProcessRequest):
action = request.action
input_text = request.input

if action == "transcribe":
yt_service = YouTubeService(api_key, input_text)
yt_service.download_video()
transcript_text = yt_service.transcribe_video("video.m4a")
if not transcript_text:
raise HTTPException(status_code=500, detail="Transcription failed.")

genai_service = GenaiService(modelName, genaiApiKey)
summary_text = genai_service.getSummary(transcript_text)
return JSONResponse(content={"status": "success", "summary": summary_text})

elif action == "ask":
genai_service = GenaiService(modelName, genaiApiKey)
answer_text = genai_service.getAnswer(input_text, [input_text])
return JSONResponse(content={"status": "success", "answer": answer_text})

else:
raise HTTPException(status_code=400, detail="Invalid action.")

@app.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
return templates.TemplateResponse("index.html", {"request": request})

if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
18 changes: 0 additions & 18 deletions app/main.py

This file was deleted.

File renamed without changes.
11 changes: 0 additions & 11 deletions app/routes/home_routes.py

This file was deleted.

67 changes: 0 additions & 67 deletions app/routes/process_routes.py

This file was deleted.

12 changes: 0 additions & 12 deletions app/services/file_service.py

This file was deleted.

23 changes: 23 additions & 0 deletions app/services/genai_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import google.generativeai as genai
from app.services.qa_service import QAService
from sentence_transformers import SentenceTransformer

class GenaiService:
def __init__(self, model_name, genai_api_key):
self.model = SentenceTransformer(model_name)
genai.configure(api_key=genai_api_key)
self.genai_model = genai.GenerativeModel(model_name="gemini-1.5-flash")

def get_summary(self, transcription_text):
if not transcription_text:
return "No transcription text provided."
input_user = (f"This document contains a transcription of the video's audio. Please provide a professionally crafted summary based on the transcript. Transcription: {transcription_text}")
response = self.genai_model.generate_content(input_user)
return response.text

def get_answer(self, query, local_data):
qa_service = QAService(self.model)
answer = qa_service.answer_query(query, local_data)
input_user = f"For this question, provide a direct and accurate answer. {query}\n\n{answer}"
response = self.genai_model.generate_content(input_user)
return response.text
44 changes: 22 additions & 22 deletions app/services/qa_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,32 @@
from numpy import dot
from numpy.linalg import norm

class QABot:
class QAService:
def __init__(self, model):
self.model = model

def generateAnswer(self, query, relevantData):
if not relevantData:
def generate_answer(self, query, relevant_data):
if not relevant_data:
return "No relevant data found."
scores = [score for score, _ in relevantData]
maxScore = max(scores) if scores else 1
normalizedScores = [score / maxScore for score in scores]
return "\n\n".join(f"**Passage {i + 1} (Score: {normalizedScores[i]:.2f}):** {text}"
for i, (_, text) in enumerate(relevantData))
scores = [score for score, _ in relevant_data]
max_score = max(scores) if scores else 1
normalized_scores = [score / max_score for score in scores]
return "\n\n".join(f"**Passage {i + 1} (Score: {normalized_scores[i]:.2f}):** {text}"
for i, (_, text) in enumerate(relevant_data))

def answerQuery(self, query, localData):
relevantData = self.fetchRelevantData(query, localData)
return self.generateAnswer(query, relevantData)
def answer_query(self, query, local_data):
relevant_data = self.fetch_relevant_data(query, local_data)
return self.generate_answer(query, relevant_data)

def fetchRelevantData(self, query, localData, topK=5):
queryEmbedding = self.model.encode(query, convert_to_tensor=True).tolist()
scoresAndTexts = []
for text in localData:
textEmbedding = self.model.encode(text, convert_to_tensor=True).tolist()
score = self.computeSimilarity(queryEmbedding, textEmbedding)
scoresAndTexts.append((score, text))
sortedScoresAndTexts = sorted(scoresAndTexts, key=lambda x: x[0], reverse=True)
return sortedScoresAndTexts[:topK]
def fetch_relevant_data(self, query, local_data, top_k=5):
query_embedding = self.model.encode(query, convert_to_tensor=True).tolist()
scores_and_texts = []
for text in local_data:
text_embedding = self.model.encode(text, convert_to_tensor=True).tolist()
score = self.compute_similarity(query_embedding, text_embedding)
scores_and_texts.append((score, text))
sorted_scores_and_texts = sorted(scores_and_texts, key=lambda x: x[0], reverse=True)
return sorted_scores_and_texts[:top_k]

def computeSimilarity(self, queryEmbedding, textEmbedding):
return dot(queryEmbedding, textEmbedding) / (norm(queryEmbedding) * norm(textEmbedding))
def compute_similarity(self, query_embedding, text_embedding):
return dot(query_embedding, text_embedding) / (norm(query_embedding) * norm(text_embedding))
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
25 changes: 0 additions & 25 deletions app/services/transcription_service.py

This file was deleted.

16 changes: 8 additions & 8 deletions utils/downloader.py → app/services/youtube_service.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import yt_dlp
import os
import yt_dlp
import assemblyai as aai
from rich import print

class YouTubeDownloader:
def __init__(self, url):
class YouTubeService:
def __init__(self, api_key, url):
self.api_key = api_key
self.url = url
self.ydl_opts = {
'outtmpl': 'video.%(ext)s',
'format': 'bestaudio/best',
'noplaylist': True,
'merge_output_format': None,
}
aai.settings.api_key = self.api_key

def remove_existing_video(self, filename):
if os.path.exists(filename):
Expand All @@ -18,8 +22,4 @@ def remove_existing_video(self, filename):
def download_video(self):
try:
self.remove_existing_video('video.m4a')
with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
ydl.download([self.url])
print("Download completed successfully!")
except Exception as e:
print(f"An error occurred: {e}")
with yt
16 changes: 15 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,18 @@ services:
ASSEMBLYAI_API_KEY: ${ASSEMBLYAI_API_KEY}
GENAI_API_KEY: ${GENAI_API_KEY}
volumes:
- .:/app
- .:/app
depends_on:
- api

api:
image: python:3.12-slim
container_name: fastapi-container
environment:
ASSEMBLYAI_API_KEY: ${ASSEMBLYAI_API_KEY}
GENAI_API_KEY: ${GENAI_API_KEY}
command: uvicorn app.main:app --host 0.0.0.0 --port 8000
volumes:
- .:/app
ports:
- "8000:8000"
Loading

0 comments on commit ab5db33

Please sign in to comment.