diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ee073b1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,40 @@ +name: FastAPI CI + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + cache: 'pip' + + - name: Install dependencies + run: | + cd backend + pip install -r requirements.txt + + - name: Start FastAPI app + run: | + cd backend + nohup fastapi dev > /dev/null 2>&1 & + echo $! > uvicorn.pid + sleep 10 + + - name: Check FastAPI /health endpoint + run: | + response=$(curl -s http://127.0.0.1:8000/health) + echo "Response: $response" + if [ "$response" == '{"status":"Active"}' ]; then + echo "Health check passed" + else + echo "Health check failed" + exit 1 + fi diff --git a/backend/api/__init__.py b/backend/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/api/health.py b/backend/api/health.py new file mode 100644 index 0000000..6c7982a --- /dev/null +++ b/backend/api/health.py @@ -0,0 +1,9 @@ + +from fastapi import APIRouter + +health_router = APIRouter() + + +@health_router.get("/health") +async def check_health(): + return {"status": "Active"} diff --git a/backend/api/query.py b/backend/api/query.py new file mode 100644 index 0000000..58e1136 --- /dev/null +++ b/backend/api/query.py @@ -0,0 +1,31 @@ +from fastapi import APIRouter +from fastapi.responses import StreamingResponse + +from backend.rag_llms_langchain import chain +from backend.embeddings.ingest import get_vectorstore + +import json +import uuid + + +query_router = APIRouter() + + +@query_router.get("/query") +async def query(query: str): + # if current_user.role < 5: + # raise HTTPException(status_code=403, detail="Only admin users can delete other users") + store = get_vectorstore() + docs = store.invoke(query) + + print(20*"*", "docs", 20*"*", "\n", docs) + + async def stream_generator(): + # Use the LangChain model to generate text + print(20*'*', "\n", query) + async for text in chain.astream({"input": query, "context": docs}): + yield json.dumps({"event_id": str(uuid.uuid4()), "data": text}) + + # TODO here we have to add the metadata/source + + return StreamingResponse(stream_generator(), media_type="application/x-ndjson") \ No newline at end of file diff --git a/backend/api/token.py b/backend/api/token.py new file mode 100644 index 0000000..3b04164 --- /dev/null +++ b/backend/api/token.py @@ -0,0 +1,26 @@ + +from fastapi import APIRouter, Depends, HTTPException +from backend.pydantic_models import Token +from backend.oauth import authenticate_user, create_access_token +from backend.config import ACCESS_TOKEN_EXPIRE_MINUTES +from fastapi.security import OAuth2PasswordRequestForm +from datetime import timedelta + + +token_router = APIRouter() + + +@token_router.post("/token", response_model=Token) +async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()): + user = authenticate_user(form_data.username, form_data.password) + if not user: + raise HTTPException( + status_code=401, + detail="Incorrect username or password", + headers={"WWW-Authenticate": "Bearer"}, + ) + access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) + access_token = create_access_token( + data={"sub": user.username}, expires_delta=access_token_expires + ) + return {"access_token": access_token, "token_type": "bearer"} \ No newline at end of file diff --git a/backend/api/user.py b/backend/api/user.py new file mode 100644 index 0000000..74d9e5a --- /dev/null +++ b/backend/api/user.py @@ -0,0 +1,73 @@ + +from fastapi import APIRouter, Depends, HTTPException +from backend.sqlalchemy_models import User +from backend.sessions import session +from backend.oauth import encrypt_password, get_current_user + + +user_router = APIRouter() + + + +@user_router.get("/users/me") +async def read_users_me(current_user: User = Depends(get_current_user)): + return {"username": current_user.username, "role": current_user.role} + + +@user_router.get("/users/") +async def read_users(current_user: User = Depends(get_current_user)): + if current_user.role < 5: + raise HTTPException( + status_code=403, detail="Only admin users can view all users") + return [{"username": user.username, "role": user.role} for user in session.query(User).all()] + + +@user_router.get("/users/{user_id}") +async def read_user(user_id: int, current_user: User = Depends(get_current_user)): + if current_user.id != user_id and current_user.role < 5: + raise HTTPException( + status_code=403, detail="Only admin users can view other users") + user = session.query(User).filter(User.id == user_id).first() + if not user: + raise HTTPException(status_code=404, detail="User not found") + return {"username": user.username, "role": user.role} + + +@user_router.post("/users/") +async def create_user(username: str, password: str, role: int, current_user: User = Depends(get_current_user)): + if current_user.role < 5: + raise HTTPException( + status_code=403, detail="Only admin users can create new users") + user = User(username=username, + password_hash=encrypt_password(password), role=role) + session.add(user) + session.commit() + return {"username": user.username, "role": user.role} + + +@user_router.put("/users/{user_id}") +async def update_user(user_id: int, username: str, password: str, role: int, current_user: User = Depends(get_current_user)): + if current_user.id != user_id and current_user.role < 5: + raise HTTPException( + status_code=403, detail="Only admin users can update other users") + user = session.query(User).filter(User.id == user_id).first() + if not user: + raise HTTPException(status_code=404, detail="User not found") + user.username = username + user.password = password + user.role = role + session.commit() + return {"username": user.username, "role": user.role} + + +@user_router.delete("/users/{user_id}") +async def delete_user(user_id: int, current_user: User = Depends(get_current_user)): + if current_user.id != user_id and current_user.role < 5: + raise HTTPException( + status_code=403, detail="Only admin users can delete other users") + user = session.query(User).filter(User.id == user_id).first() + if not user: + raise HTTPException(status_code=404, detail="User not found") + session.delete(user) + session.commit() + return {"message": "User deleted"} \ No newline at end of file diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 0000000..41dca8d --- /dev/null +++ b/backend/config.py @@ -0,0 +1,13 @@ +SECRET_KEY = "your_secret_key" +ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = 30 + +# No origins are limited for now +# All origins are allowed for now and this +# must be changed +origins = [ + "http://localhost", + "http://127.0.0.1:8000", +] + + diff --git a/backend/embeddings/__init__.py b/backend/embeddings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/ingest.py b/backend/embeddings/ingest.py similarity index 100% rename from backend/ingest.py rename to backend/embeddings/ingest.py diff --git a/backend/main.py b/backend/main.py index 79b3003..971f3aa 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,49 +1,13 @@ -from fastapi import FastAPI, Depends, HTTPException -from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm -from fastapi.responses import StreamingResponse +from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from sqlalchemy import create_engine -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy import Column, Integer, String -from sqlalchemy.orm import sessionmaker -from pydantic import BaseModel -from typing import Optional -from datetime import datetime, timedelta -import jwt -from bcrypt import hashpw, gensalt, checkpw -from .rag_llms_langchain import chain, langfuse_handler -from .ingest import get_vectorstore -import json -import uuid +from backend.api.health import health_router +from backend.api.user import user_router +from backend.api.token import token_router +from backend.api.query import query_router +from backend.utils import populate_admin_user -origins = [ - "http://localhost", - "http://127.0.0.1:8000", -] - -# SQLAlchemy setup - -engine = create_engine("sqlite:///users.db") -Base = declarative_base() - - -class User(Base): - __tablename__ = "users" - id = Column(Integer, primary_key=True) - username = Column(String, unique=True) - password_hash = Column(String) - # 1 = user, 4 = manager, 5 = admin, 6 = superadmin - role = Column(Integer, default=1) - - -Base.metadata.create_all(engine) - -Session = sessionmaker(bind=engine) -session = Session() - -# FastAPI setup app = FastAPI() app.add_middleware( @@ -54,192 +18,8 @@ class User(Base): allow_headers=["*"], ) -oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") - -# Password encryption - - -def encrypt_password(password: str): - return hashpw(password.encode(), gensalt()) - - -def verify_password(plain_password: str, hashed_password: str): - return checkpw(plain_password.encode(), hashed_password) - -# User authentication - - -def get_user(username: str): - return session.query(User).filter(User.username == username).first() - - -def authenticate_user(username: str, password: str): - user = get_user(username) - if not user or not verify_password(password, user.password_hash): - return False - return user - - -# JWT secret key -SECRET_KEY = "your_secret_key" -ALGORITHM = "HS256" -ACCESS_TOKEN_EXPIRE_MINUTES = 30 - - -class Token(BaseModel): - access_token: str - token_type: str - - -class TokenData(BaseModel): - username: Optional[str] = None - - -def create_access_token(data: dict, expires_delta: Optional[timedelta] = None): - to_encode = data.copy() - if expires_delta: - expire = datetime.utcnow() + expires_delta - else: - expire = datetime.utcnow() + timedelta(minutes=15) - to_encode.update({"exp": expire}) - encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) - return encoded_jwt - - -async def get_current_user(token: str = Depends(oauth2_scheme)): - credentials_exception = HTTPException( - status_code=401, - detail="Could not validate credentials", - headers={"WWW-Authenticate": "Bearer"}, - ) - try: - payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) - username: str = payload.get("sub") - if username is None: - raise credentials_exception - token_data = TokenData(username=username) - except jwt.ExpiredSignatureError: - raise HTTPException(status_code=401, detail="Token has expired") - except jwt.InvalidTokenError: - raise credentials_exception - user = get_user(username=token_data.username) - if user is None: - raise credentials_exception - return user - -# Populate admin user on first start - - -def populate_admin_user(): - admin_user = get_user("admin") - if not admin_user: - admin_user = User(username="admin", - password_hash=encrypt_password("admin"), role=6) - session.add(admin_user) - session.commit() - - +app.include_router(health_router) +app.include_router(token_router) +app.include_router(user_router) +app.include_router(query_router) populate_admin_user() - -# Routes - - -@app.post("/token", response_model=Token) -async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()): - user = authenticate_user(form_data.username, form_data.password) - if not user: - raise HTTPException( - status_code=401, - detail="Incorrect username or password", - headers={"WWW-Authenticate": "Bearer"}, - ) - access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) - access_token = create_access_token( - data={"sub": user.username}, expires_delta=access_token_expires - ) - return {"access_token": access_token, "token_type": "bearer"} - - -@app.get("/users/me") -async def read_users_me(current_user: User = Depends(get_current_user)): - return {"username": current_user.username, "role": current_user.role} - - -@app.get("/users/") -async def read_users(current_user: User = Depends(get_current_user)): - if current_user.role < 5: - raise HTTPException( - status_code=403, detail="Only admin users can view all users") - return [{"username": user.username, "role": user.role} for user in session.query(User).all()] - - -@app.get("/users/{user_id}") -async def read_user(user_id: int, current_user: User = Depends(get_current_user)): - if current_user.id != user_id and current_user.role < 5: - raise HTTPException( - status_code=403, detail="Only admin users can view other users") - user = session.query(User).filter(User.id == user_id).first() - if not user: - raise HTTPException(status_code=404, detail="User not found") - return {"username": user.username, "role": user.role} - - -@app.post("/users/") -async def create_user(username: str, password: str, role: int, current_user: User = Depends(get_current_user)): - if current_user.role < 5: - raise HTTPException( - status_code=403, detail="Only admin users can create new users") - user = User(username=username, - password_hash=encrypt_password(password), role=role) - session.add(user) - session.commit() - return {"username": user.username, "role": user.role} - - -@app.put("/users/{user_id}") -async def update_user(user_id: int, username: str, password: str, role: int, current_user: User = Depends(get_current_user)): - if current_user.id != user_id and current_user.role < 5: - raise HTTPException( - status_code=403, detail="Only admin users can update other users") - user = session.query(User).filter(User.id == user_id).first() - if not user: - raise HTTPException(status_code=404, detail="User not found") - user.username = username - user.password = password - user.role = role - session.commit() - return {"username": user.username, "role": user.role} - - -@app.delete("/users/{user_id}") -async def delete_user(user_id: int, current_user: User = Depends(get_current_user)): - if current_user.id != user_id and current_user.role < 5: - raise HTTPException( - status_code=403, detail="Only admin users can delete other users") - user = session.query(User).filter(User.id == user_id).first() - if not user: - raise HTTPException(status_code=404, detail="User not found") - session.delete(user) - session.commit() - return {"message": "User deleted"} - - -# Define a route for the app -@app.get("/query") -async def query(query: str): - # if current_user.role < 5: - # raise HTTPException(status_code=403, detail="Only admin users can delete other users") - store = get_vectorstore() - docs = store.invoke(query) - - print(20*"*", "docs", 20*"*", "\n", docs) - - async def stream_generator(): - # Use the LangChain model to generate text - print(20*'*', "\n", query) - async for text in chain.astream({"input": query, "context": docs}): - yield json.dumps({"event_id": str(uuid.uuid4()), "data": text}) - - # TODO here we have to add the metadata/source - - return StreamingResponse(stream_generator(), media_type="application/x-ndjson") diff --git a/backend/oauth.py b/backend/oauth.py new file mode 100644 index 0000000..68b8eca --- /dev/null +++ b/backend/oauth.py @@ -0,0 +1,63 @@ +import jwt + +from fastapi.security import OAuth2PasswordBearer +from fastapi import Depends, HTTPException + +from backend.config import SECRET_KEY, ALGORITHM +from backend.sqlalchemy_models import User +from backend.sessions import session +from backend.pydantic_models import TokenData + +from bcrypt import hashpw, gensalt, checkpw +from datetime import datetime, timedelta +from typing import Optional + +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") + +def encrypt_password(password: str): + return hashpw(password.encode(), gensalt()) + + +def verify_password(plain_password: str, hashed_password: str): + return checkpw(plain_password.encode(), hashed_password) + +def get_user(username: str): + return session.query(User).filter(User.username == username).first() + + +def authenticate_user(username: str, password: str): + user = get_user(username) + if not user or not verify_password(password, user.password_hash): + return False + return user + +async def get_current_user(token: str = Depends(oauth2_scheme)): + credentials_exception = HTTPException( + status_code=401, + detail="Could not validate credentials", + headers={"WWW-Authenticate": "Bearer"}, + ) + try: + payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + username: str = payload.get("sub") + if username is None: + raise credentials_exception + token_data = TokenData(username=username) + except jwt.ExpiredSignatureError: + raise HTTPException(status_code=401, detail="Token has expired") + except jwt.InvalidTokenError: + raise credentials_exception + user = get_user(username=token_data.username) + if user is None: + raise credentials_exception + return user + +def create_access_token(data: dict, expires_delta: Optional[timedelta] = None): + to_encode = data.copy() + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta(minutes=15) + to_encode.update({"exp": expire}) + encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) + return encoded_jwt \ No newline at end of file diff --git a/backend/1_rag.py b/backend/old_files/1_rag.py similarity index 100% rename from backend/1_rag.py rename to backend/old_files/1_rag.py diff --git a/backend/old_files/__init__.py b/backend/old_files/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/gradio_demo.py b/backend/old_files/gradio_demo.py similarity index 88% rename from backend/gradio_demo.py rename to backend/old_files/gradio_demo.py index 51442d8..4362210 100644 --- a/backend/gradio_demo.py +++ b/backend/old_files/gradio_demo.py @@ -1,5 +1,5 @@ -from question_and_answer import resp as QA -from rag_docs import resp as RAG +from backend.old_files.question_and_answer import resp as QA +from backend.old_files.rag_docs import resp as RAG import gradio as gr diff --git a/backend/question_and_answer.py b/backend/old_files/question_and_answer.py similarity index 100% rename from backend/question_and_answer.py rename to backend/old_files/question_and_answer.py diff --git a/backend/rag_docs.py b/backend/old_files/rag_docs.py similarity index 100% rename from backend/rag_docs.py rename to backend/old_files/rag_docs.py diff --git a/backend/pydantic_models.py b/backend/pydantic_models.py new file mode 100644 index 0000000..428270a --- /dev/null +++ b/backend/pydantic_models.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel +from typing import Optional + +class Token(BaseModel): + access_token: str + token_type: str + + +class TokenData(BaseModel): + username: Optional[str] = None \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 90c2fda..7cfa115 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,10 +1,132 @@ -langchain_core -langchain_community -langchain_ollama -langchain_chroma -langfuse -pypdf -fastapi -sqlalchemy -pyjwt -bcrypt \ No newline at end of file +aiohappyeyeballs==2.3.4 +aiohttp==3.10.0 +aiosignal==1.3.1 +annotated-types==0.7.0 +anyio==4.4.0 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==24.1.0 +backoff==2.2.1 +bcrypt==4.2.0 +build==1.2.1 +cachetools==5.4.0 +certifi==2024.7.4 +cffi==1.16.0 +cfgv==3.4.0 +charset-normalizer==3.3.2 +chroma-hnswlib==0.7.6 +chromadb==0.5.5 +click==8.1.7 +colorama==0.4.6 +coloredlogs==15.0.1 +cryptography==43.0.0 +dataclasses-json==0.6.7 +Deprecated==1.2.14 +distlib==0.3.8 +dnspython==2.6.1 +email_validator==2.2.0 +exceptiongroup==1.2.2 +fastapi==0.112.0 +fastapi-cli==0.0.5 +filelock==3.15.4 +flatbuffers==24.3.25 +frozenlist==1.4.1 +fsspec==2024.6.1 +google-auth==2.32.0 +googleapis-common-protos==1.63.2 +greenlet==3.0.3 +grpcio==1.65.4 +h11==0.14.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +huggingface-hub==0.24.5 +humanfriendly==10.0 +identify==2.6.0 +idna==3.7 +importlib_metadata==8.0.0 +importlib_resources==6.4.0 +Jinja2==3.1.4 +jsonpatch==1.33 +jsonpointer==3.0.0 +jwt==1.3.1 +kubernetes==30.1.0 +langchain==0.2.12 +langchain-chroma==0.1.2 +langchain-community==0.2.11 +langchain-core==0.2.28 +langchain-ollama==0.1.1 +langchain-text-splitters==0.2.2 +langfuse==2.40.0 +langsmith==0.1.96 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +marshmallow==3.21.3 +mdurl==0.1.2 +mmh3==4.1.0 +monotonic==1.6 +mpmath==1.3.0 +multidict==6.0.5 +mypy-extensions==1.0.0 +nodeenv==1.9.1 +numpy==1.26.4 +oauthlib==3.2.2 +ollama==0.3.1 +onnxruntime==1.18.1 +opentelemetry-api==1.26.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +opentelemetry-instrumentation==0.47b0 +opentelemetry-instrumentation-asgi==0.47b0 +opentelemetry-instrumentation-fastapi==0.47b0 +opentelemetry-proto==1.26.0 +opentelemetry-sdk==1.26.0 +opentelemetry-semantic-conventions==0.47b0 +opentelemetry-util-http==0.47b0 +orjson==3.10.6 +overrides==7.7.0 +packaging==23.2 +platformdirs==4.2.2 +posthog==3.5.0 +protobuf==4.25.4 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pycparser==2.22 +pydantic==2.8.2 +pydantic_core==2.20.1 +Pygments==2.18.0 +PyJWT==2.9.0 +pypdf==4.3.1 +PyPika==0.48.9 +pyproject_hooks==1.1.0 +pyreadline3==3.4.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-multipart==0.0.9 +PyYAML==6.0.1 +requests==2.32.3 +requests-oauthlib==2.0.0 +rich==13.7.1 +rsa==4.9 +shellingham==1.5.4 +six==1.16.0 +sniffio==1.3.1 +SQLAlchemy==2.0.31 +starlette==0.37.2 +sympy==1.13.1 +tenacity==8.5.0 +tokenizers==0.19.1 +tomli==2.0.1 +tqdm==4.66.5 +typer==0.12.3 +typing-inspect==0.9.0 +typing_extensions==4.12.2 +urllib3==2.2.2 +uvicorn==0.30.5 +virtualenv==20.26.3 +watchfiles==0.22.0 +websocket-client==1.8.0 +websockets==12.0 +wrapt==1.16.0 +yarl==1.9.4 +zipp==3.19.2 diff --git a/backend/sessions.py b/backend/sessions.py new file mode 100644 index 0000000..1a1eb73 --- /dev/null +++ b/backend/sessions.py @@ -0,0 +1,13 @@ +from sqlalchemy import create_engine + +from sqlalchemy.orm import sessionmaker + +from backend.sqlalchemy_models import Base + + +engine = create_engine("sqlite:///users.db") + +Base.metadata.create_all(engine) + +session_maker = sessionmaker(bind=engine) +session = session_maker() \ No newline at end of file diff --git a/backend/sqlalchemy_models.py b/backend/sqlalchemy_models.py new file mode 100644 index 0000000..3524b65 --- /dev/null +++ b/backend/sqlalchemy_models.py @@ -0,0 +1,12 @@ +from sqlalchemy import Column, Integer, String +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + +class User(Base): + __tablename__ = "users" + id = Column(Integer, primary_key=True) + username = Column(String, unique=True) + password_hash = Column(String) + # 1 = user, 4 = manager, 5 = admin, 6 = superadmin + role = Column(Integer, default=1) \ No newline at end of file diff --git a/backend/utils.py b/backend/utils.py new file mode 100644 index 0000000..cc77236 --- /dev/null +++ b/backend/utils.py @@ -0,0 +1,13 @@ +from backend.sqlalchemy_models import User +from backend.sessions import session +from backend.oauth import encrypt_password, get_user + + + +def populate_admin_user(): + admin_user = get_user("admin") + if not admin_user: + admin_user = User(username="admin", + password_hash=encrypt_password("admin"), role=6) + session.add(admin_user) + session.commit() \ No newline at end of file