diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5b3d79a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,34 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/go/build-context-dockerignore/ + +**/.DS_Store +**/__pycache__ +**/.venv +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..f7f0d0b --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,57 @@ +name: CI/CD Pipeline + +on: + push: + branches: + - main + +permissions: + contents: read + id-token: write + +jobs: + build-and-push-ecr: + name: Build and Push to ECR + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Login to Amazon ECR + run: | + aws ecr get-login-password --region ${{ secrets.AWS_REGION }} | docker login --username AWS --password-stdin ${{ secrets.AWS_ECR_LOGIN_URI }} + + - name: Build, tag, and push image to Amazon ECR + run: | + docker build -t ${{ secrets.AWS_ECR_LOGIN_URI }}/${{ secrets.AWS_ECR_REPOSITORY_NAME }}:latest . + docker push ${{ secrets.AWS_ECR_LOGIN_URI }}/${{ secrets.AWS_ECR_REPOSITORY_NAME }}:latest + + deploy-to-ec2: + name: Deploy to EC2 + needs: build-and-push-ecr + runs-on: [self-hosted, linux] + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Deploy to EC2 + run: | + # Replace 'your-ec2-instance-ip' with your actual EC2 instance IP + # The following command assumes you have SSH set up to connect to your EC2 instance + ssh ec2-user@15.156.87.140 "docker pull ${{ secrets.AWS_ECR_LOGIN_URI }}/${{ secrets.AWS_ECR_REPOSITORY_NAME }}:latest && docker run -d -p 8501:8501 --restart=always ${{ secrets.AWS_ECR_LOGIN_URI }}/${{ secrets.AWS_ECR_REPOSITORY_NAME }}:latest" diff --git a/Capstone/requirements.txt b/Capstone/requirements.txt index 07aa6cb..f714580 100644 --- a/Capstone/requirements.txt +++ b/Capstone/requirements.txt @@ -5,3 +5,8 @@ seaborn matplotlib dvclive mlops +mlflow +xgboost +streamlit +pickle +boto3 diff --git a/Capstone/streamlit-app/pages/1_Predictor.py b/Capstone/streamlit-app/pages/1_Predictor.py index 505696d..b22abe7 100644 --- a/Capstone/streamlit-app/pages/1_Predictor.py +++ b/Capstone/streamlit-app/pages/1_Predictor.py @@ -2,19 +2,30 @@ import pandas as pd import numpy as np import pickle +import boto3 +import os +def download_file_from_s3(bucket_name, s3_key, local_path): + if not os.path.exists(local_path): + s3 = boto3.client('s3') + s3.download_file(bucket_name, s3_key, local_path) +model_path = '/app/models/pipeline.pkl' +df_path = '/app/models/df.pkl' -st.title("Page 1") +download_file_from_s3('capstone-houseprice-prediction', 'models/pipeline.pkl', model_path) +download_file_from_s3('capstone-houseprice-prediction', 'models/df.pkl', df_path) + + +with open(model_path, 'rb') as file: + pipeline = pickle.load(file) -with open("/Users/siddhant/housepriceproject/Capstone/df.pkl", 'rb') as file: +with open(df_path, 'rb') as file: df = pickle.load(file) - -with open("/Users/siddhant/housepriceproject/Capstone/pipeline.pkl", 'rb') as file: - pipeline = pickle.load(file) -st.dataframe(df) +st.title("Page 1") + st.header("Enter your input") diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..61d792b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +ARG PYTHON_VERSION=3.10.9 +FROM python:${PYTHON_VERSION}-slim as base + +# Prevents Python from writing pyc files. +ENV PYTHONDONTWRITEBYTECODE=1 + +# Keeps Python from buffering stdout and stderr. +ENV PYTHONUNBUFFERED=1 + +# Create a directory for the model. +RUN mkdir -p /app/models + +# Set the working directory in the container +WORKDIR /app + +# Copy the Streamlit app, src directory, and requirements.txt into the container. +COPY Capstone/streamlit-app/main_app.py ./ +COPY Capstone/src/ ./src/ +COPY Capstone/requirements.txt . + +# List files in /app to verify copying (optional) +RUN ls -l + +# Install dependencies from requirements.txt +RUN pip install -r requirements.txt + +# Expose the port that the application listens on. +EXPOSE 8501 + +# Run the Streamlit application. +CMD ["streamlit", "run", "main_app.py"] + + + + + diff --git a/README.Docker.md b/README.Docker.md new file mode 100644 index 0000000..2029604 --- /dev/null +++ b/README.Docker.md @@ -0,0 +1,22 @@ +### Building and running your application + +When you're ready, start your application by running: +`docker compose up --build`. + +Your application will be available at http://localhost:8501. + +### Deploying your application to the cloud + +First, build your image, e.g.: `docker build -t myapp .`. +If your cloud uses a different CPU architecture than your development +machine (e.g., you are on a Mac M1 and your cloud provider is amd64), +you'll want to build the image for that platform, e.g.: +`docker build --platform=linux/amd64 -t myapp .`. + +Then, push it to your registry, e.g. `docker push myregistry.com/myapp`. + +Consult Docker's [getting started](https://docs.docker.com/go/get-started-sharing/) +docs for more detail on building and pushing. + +### References +* [Docker's Python guide](https://docs.docker.com/language/python/) \ No newline at end of file diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..89f8f5c --- /dev/null +++ b/compose.yaml @@ -0,0 +1,49 @@ +# Comments are provided throughout this file to help you get started. +# If you need more help, visit the Docker compose reference guide at +# https://docs.docker.com/go/compose-spec-reference/ + +# Here the instructions define your application as a service called "server". +# This service is built from the Dockerfile in the current directory. +# You can add other services your application may depend on here, such as a +# database or a cache. For examples, see the Awesome Compose repository: +# https://github.com/docker/awesome-compose +services: + server: + build: + context: . + ports: + - 8501:8501 + +# The commented out section below is an example of how to define a PostgreSQL +# database that your application can use. `depends_on` tells Docker Compose to +# start the database before your application. The `db-data` volume persists the +# database data between container restarts. The `db-password` secret is used +# to set the database password. You must create `db/password.txt` and add +# a password of your choosing to it before running `docker compose up`. +# depends_on: +# db: +# condition: service_healthy +# db: +# image: postgres +# restart: always +# user: postgres +# secrets: +# - db-password +# volumes: +# - db-data:/var/lib/postgresql/data +# environment: +# - POSTGRES_DB=example +# - POSTGRES_PASSWORD_FILE=/run/secrets/db-password +# expose: +# - 5432 +# healthcheck: +# test: [ "CMD", "pg_isready" ] +# interval: 10s +# timeout: 5s +# retries: 5 +# volumes: +# db-data: +# secrets: +# db-password: +# file: db/password.txt + diff --git a/dvc.yaml b/dvc.yaml index 5494521..8b5a707 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -61,11 +61,9 @@ stages: deps: - /Users/siddhant/housepriceproject/Capstone/pipeline_generated_data/missing_imputed.csv - /Users/siddhant/housepriceproject/Capstone/src/models/train_model.py - # No output specified as the model is saved in a fixed location push_model_to_s3: cmd: python /Users/siddhant/housepriceproject/Capstone/src/models/push_model.py deps: - /Users/siddhant/housepriceproject/Capstone/pipeline.pkl - /Users/siddhant/housepriceproject/Capstone/src/models/push_model.py - # Assuming no specific output for this stage