From e53b1271a98a37f71b70a2ca5c5350daea229eae Mon Sep 17 00:00:00 2001 From: Siddhant Date: Tue, 27 Feb 2024 09:38:04 -0600 Subject: [PATCH] Path changes --- Capstone/notebooks/baseline-model.ipynb | 55 ++-- Capstone/{datasets => src}/data_viz1.csv | 0 Capstone/streamlit-app/main_app.py | 341 ++++++-------------- Capstone/streamlit-app/pages/1_Predictor.py | 2 +- Capstone/streamlit-app/pages/2_Analytics.py | 2 +- 5 files changed, 134 insertions(+), 266 deletions(-) rename Capstone/{datasets => src}/data_viz1.csv (100%) diff --git a/Capstone/notebooks/baseline-model.ipynb b/Capstone/notebooks/baseline-model.ipynb index 53890cb..fa1ff3a 100644 --- a/Capstone/notebooks/baseline-model.ipynb +++ b/Capstone/notebooks/baseline-model.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -175,7 +175,7 @@ "4 Mid Floor " ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -186,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -195,7 +195,7 @@ "(3548, 13)" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -206,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -242,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -264,7 +264,7 @@ "dtype: int64" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -275,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -286,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -295,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -308,7 +308,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -319,7 +319,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -331,7 +331,7 @@ " dtype='object')" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -342,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -352,7 +352,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -493,7 +493,7 @@ "4 Mid Floor " ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -516,7 +516,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -535,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -544,7 +544,7 @@ "" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, @@ -567,7 +567,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -576,7 +576,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -588,7 +588,7 @@ " dtype='object')" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -599,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -2068,11 +2068,12 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "file_path2 = '/Users/siddhant/housepriceproject/Capstone/df.pkl'\n", + "import pickle\n", + "file_path2 = '/Users/siddhant/housepriceproject/Capstone/df2.pkl'\n", "\n", "with open(file_path2, 'wb') as file:\n", " pickle.dump(X, file)" diff --git a/Capstone/datasets/data_viz1.csv b/Capstone/src/data_viz1.csv similarity index 100% rename from Capstone/datasets/data_viz1.csv rename to Capstone/src/data_viz1.csv diff --git a/Capstone/streamlit-app/main_app.py b/Capstone/streamlit-app/main_app.py index ba7a467..ac54d21 100644 --- a/Capstone/streamlit-app/main_app.py +++ b/Capstone/streamlit-app/main_app.py @@ -1,40 +1,116 @@ -import streamlit as st -import base64 -import boto3 -import sys -import pickle -sys.path.append("/Users/siddhant/housepriceproject") - -from botocore.exceptions import NoCredentialsError - -image_path = '/app/files/image.pkl' -s3_image = 'files/image.pkl' +# import streamlit as st +# import base64 +# import boto3 +# import sys +# import pickle +# sys.path.append("/Users/siddhant/housepriceproject") +# from botocore.exceptions import NoCredentialsError +# image_path = '/app/files/image.pkl' +# s3_image = 'files/image.pkl' + + + +# bucket_name = "capstone-houseprice-prediction" + +# def s3_download(s3_bucket, s3_file_path, local_file): +# s3 = boto3.client("s3") + +# s3.download_file(s3_bucket, s3_file_path, local_file) + + + +# s3_download(bucket_name, s3_image, image_path) + +# with open(image_path, 'rb') as file: +# image = pickle.load(file) +# # Set page configuration +# st.set_page_config(page_title="Welcome to the World of Homes", layout="wide") + +# # Function to get base64 of the image file +# def get_image_base64(image_path): +# with open(image_path, "rb") as img_file: +# return base64.b64encode(img_file.read()).decode() + +# # Custom CSS to inject into the Streamlit app +# # Custom CSS to inject into the Streamlit app +# def add_bg_from_local(image_file): +# bin_str = get_image_base64(image_file) +# page_bg_img = f''' +# +# ''' +# st.markdown(page_bg_img, unsafe_allow_html=True) + +# add_bg_from_local(image) # Adjust the path to the image + +# st.markdown(""" +#
+#

Welcome to the World of Homes

+#

Unlock the Door to Real Estate Mastery

+#

Welcome to our Real Estate Analysis Platform—where data meets strategy.

+#
    +#
  • Tailored Insights: Whether you're a first-time homebuyer or a seasoned investor, our platform offers tailored insights that cater to your needs. Explore interactive charts, maps, and analytics designed to empower your decision-making process.
  • +#
  • Comprehensive Coverage: From bustling city centers to serene suburbs, navigate through an extensive database of properties, market trends, and price comparisons at your fingertips.
  • +#
+#

Begin your journey with us today and see where data can take you. Your dream property awaits!

+#
+# """, unsafe_allow_html=True) -bucket_name = "capstone-houseprice-prediction" +import streamlit as st +import boto3 +from botocore.exceptions import NoCredentialsError +import pickle +import base64 +import os -def s3_download(s3_bucket, s3_file_path, local_file): - s3 = boto3.client("s3") +# Ensure your AWS credentials are set in your environment or in ~/.aws/credentials - s3.download_file(s3_bucket, s3_file_path, local_file) +local_image_path = '/app/files/image.pkl' # Path inside Docker +def s3_download(s3_bucket, s3_file_key, local_file_path): + try: + s3 = boto3.client("s3") + # Ensure directory exists + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + s3.download_file(s3_bucket, s3_file_key, local_file_path) + st.success("File downloaded successfully.") + except Exception as e: + st.error(f"Failed to download file: {e}") +def load_and_display_image(image_path): + with open(image_path, "rb") as file: + image_data = pickle.load(file) + st.image(image_data, caption="Loaded Image", use_column_width=True) -s3_download(bucket_name, s3_image, image_path) +# Example usage +s3_bucket_name = "capstone-houseprice-prediction" +s3_file_key = "files/image.pkl" +s3_download(s3_bucket_name, s3_file_key, local_image_path) -with open(image_path, 'rb') as file: - image = pickle.load(file) -# Set page configuration -st.set_page_config(page_title="Welcome to the World of Homes", layout="wide") +# Load and display the deserialized image +load_and_display_image(local_image_path) -# Function to get base64 of the image file def get_image_base64(image_path): with open(image_path, "rb") as img_file: return base64.b64encode(img_file.read()).decode() -# Custom CSS to inject into the Streamlit app -# Custom CSS to inject into the Streamlit app def add_bg_from_local(image_file): bin_str = get_image_base64(image_file) page_bg_img = f''' @@ -58,224 +134,15 @@ def add_bg_from_local(image_file): ''' st.markdown(page_bg_img, unsafe_allow_html=True) +add_bg_from_local(local_image_path) # Adjust the path to the image - -# Apply the background image from a local file -add_bg_from_local(image) # Adjust the path to the image - -# # Sidebar for navigation -# st.sidebar.title("Navigation") -# navigation_options = ["Home", "Price Analysis", "Sector Insights", "Property Finder"] -# selection = st.sidebar.selectbox("Go to", navigation_options) - -# Main body of the app with an overlay background for text +# Additional Streamlit app content +st.set_page_config(page_title="Welcome to the World of Homes", layout="wide") st.markdown(""" -
+

Welcome to the World of Homes

Unlock the Door to Real Estate Mastery

Welcome to our Real Estate Analysis Platform—where data meets strategy.

-
    -
  • Tailored Insights: Whether you're a first-time homebuyer or a seasoned investor, our platform offers tailored insights that cater to your needs. Explore interactive charts, maps, and analytics designed to empower your decision-making process.
  • -
  • Comprehensive Coverage: From bustling city centers to serene suburbs, navigate through an extensive database of properties, market trends, and price comparisons at your fingertips.
  • -
-

Begin your journey with us today and see where data can take you. Your dream property awaits!

+ ...
""", unsafe_allow_html=True) - -# Rest of your Streamlit app code - - - - - - - - - - - - -# import streamlit as st -# import pandas as pd -# import plotly.express as px -# import matplotlib.pyplot as plt -# from wordcloud import WordCloud -# import pickle -# import ast -# import numpy as np -# import boto3 -# import sys -# sys.path.append("/Users/siddhant/housepriceproject") -# from Capstone.logger import logging -# from botocore.exceptions import NoCredentialsError - - -# def run_page2(): -# st.title("Page 2") -# model_path1 = '/app/models/cosinse_sim.pkl' -# model_path2 = '/app/models/cosinse_sim2.pkl' -# model_path3 = '/app/models/cosinse_sim3.pkl' -# wordcloud = '/app/models/wordcloud_data.pkl' -# df_path = '/app/models/locationdf.pkl' - -# s3_path1 = 'models/cosinse_sim.pkl' -# s3_path2 = 'models/cosinse_sim2.pkl' -# s3_path3 = 'models/cosinse_sim3.pkl' -# s3_path_df_4 = 'files/locationdf.pkl' -# s3_path_wordcloud_5 = 'files/wordcloud_data.pkl' - - - -# bucket_name = "capstone-houseprice-prediction" - -# def s3_download(s3_bucket, s3_file_path, local_file): -# s3 = boto3.client("s3") -# try: -# logging.info(f"Downloading file {local_file} residing in S3 bucket {s3_bucket} at {s3_file_path}") -# s3.download_file(s3_bucket, s3_file_path, local_file) -# logging.info(f"Downloaded file {local_file} residing in S3 bucket {s3_bucket} at {s3_file_path}") -# except FileNotFoundError: -# logging.exception(f"The file {local_file} was not found.") -# except NoCredentialsError: -# logging.exception("Credentials not available.") -# except Exception as e: -# logging.exception(f"An error occurred while downloading {local_file}: {e}") - -# # Upload each file -# s3_download(bucket_name, s3_path1, model_path1) -# s3_download(bucket_name, s3_path2, model_path2) -# s3_download(bucket_name, s3_path3, model_path3) -# s3_download(bucket_name, s3_path_df_4, df_path) -# s3_download(bucket_name, s3_path_wordcloud_5, wordcloud) - - -# with open(model_path1, 'rb') as file: -# cosine_sim1 = pickle.load(file) - -# with open(model_path2, 'rb') as file: -# cosine_sim2 = pickle.load(file) - -# with open(model_path3, 'rb') as file: -# cosine_sim3 = pickle.load(file) - -# with open(df_path, 'rb') as file: -# locationdf = pickle.load(file) - -# with open(wordcloud, 'rb') as file: -# wordcloud_data = pickle.load(file) - - -# st.title("Page 2") - -# # STREAMLIT LAYOUT -# st.header("Sector wise price per sqft map") - -# # PREPARING DATA FOR GEOMAP -# new_df = pd.read_csv("/Users/siddhant/housepriceproject/Capstone/datasets/data_viz1.csv") -# groupdf = new_df.groupby("sector")[['price','price_per_sqft','built_up_area','latitude','longitude']].mean() - -# # GEOMAP -# fig = px.scatter_mapbox(groupdf, lat="latitude", lon="longitude", color="price_per_sqft", size='built_up_area', -# color_continuous_scale=px.colors.cyclical.IceFire, zoom=10, -# mapbox_style="open-street-map", width=1200, height=700, hover_name=groupdf.index) -# st.plotly_chart(fig, use_container_width=True) - - - - - -# # STREAMLIT LAYOUT -# st.header("Sector-specific Wordcloud") -# # FUNCTION TO PLOT SECTOR WISE WORDCLOUD -# def generate_wordcloud(sector): -# # Extracting features for the selected sector -# features = wordcloud_data[wordcloud_data['sector'] == sector]['features'] -# main = [] -# for feature_list in features.dropna(): -# main.extend(ast.literal_eval(feature_list)) -# text = ' '.join(main) - -# # Generate wordcloud -# wordcloud = WordCloud(width=800, height=800, -# background_color='white', -# stopwords=set(['s']), # Add any stopwords here -# min_font_size=10).generate(text) - -# # Display wordcloud using matplotlib -# plt.figure(figsize=(8, 8), facecolor=None) -# plt.imshow(wordcloud, interpolation='bilinear') -# plt.axis("off") -# plt.tight_layout(pad=0) -# st.pyplot() - -# # Dropdown for sector selection -# selected_sector = st.selectbox("Select a sector", wordcloud_data['sector'].unique()) -# # Display the wordcloud for the selected sector -# if selected_sector: -# generate_wordcloud(selected_sector) - -# st.set_option('deprecation.showPyplotGlobalUse', False) - - - -# # STREAMLIT LAYOUT -# st.header("Area vs Price") - -# property_type = st.selectbox("Select poperty type", ["flat", "house"]) - -# # PLOTTING SCATTER PLOT AREA VS PRICE -# if property_type == "flat": -# fig1 = px.scatter(new_df[new_df["property_type"] == "flat"], x="built_up_area", y="price", color="bedRoom") -# st.plotly_chart(fig1, use_container_width=True) -# else: -# fig1 = px.scatter(new_df[new_df["property_type"] == "house"], x="built_up_area", y="price", color="bedRoom") -# st.plotly_chart(fig1, use_container_width=True) - - - -# # STREAMLIT LAYOUT -# st.header("BHK Pie Chart") - -# sector_list = new_df["sector"].unique().tolist() -# sector_list.insert(0, "overall") - -# selected_sector = st.selectbox("Select sector", sector_list) -# # PLOTTING PIE CHART FOR BEDROOMS SECTORWISE -# if selected_sector == "overall": -# fig2 = px.pie(new_df, names="bedRoom") -# st.plotly_chart(fig2, use_container_width=True) -# else: -# fig2 = px.pie(new_df[new_df["sector"] == selected_sector], names="bedRoom") -# st.plotly_chart(fig2, use_container_width=True) - - - -# # STREAMLIT LAYOUT -# st.header("BHK price range") - -# allsectors = new_df["sector"].unique().tolist() -# allsectors.insert(0, "overall") - -# sector_selected = st.selectbox("Select required sector", allsectors) -# # BOXPLOT FOR BHK WIS PRICE -# if sector_selected == "overall": -# fig3 = px.box(new_df[new_df["bedRoom"] <= 4], x="bedRoom", y="price") -# st.plotly_chart(fig3, use_container_width=True) -# else: -# fig3 = px.box(new_df[(new_df["sector"] == sector_selected) & (new_df["bedRoom"] <= 4)], x="bedRoom", y="price") -# st.plotly_chart(fig3, use_container_width=True) - - - -# # STREAMLIT LAYOUT -# st.header("Distribution of flat prices vs house prices") - -# property_selected = st.selectbox("Select poperty", ["flat", "house"]) - -# # DISTRIBUTION PLOT -# if property_selected == "flat": -# fig4 = px.histogram(new_df[new_df["property_type"] == "flat"], x="price", color_discrete_sequence=['green']) -# st.plotly_chart(fig4, use_container_width=True) -# else: -# fig4 = px.histogram(new_df[new_df["property_type"] == "house"], x="price") -# st.plotly_chart(fig4, use_container_width=True) \ No newline at end of file diff --git a/Capstone/streamlit-app/pages/1_Predictor.py b/Capstone/streamlit-app/pages/1_Predictor.py index 8693bce..eb21553 100644 --- a/Capstone/streamlit-app/pages/1_Predictor.py +++ b/Capstone/streamlit-app/pages/1_Predictor.py @@ -28,7 +28,7 @@ def download_file_from_s3(bucket_name, s3_key, local_path): download_file_from_s3('capstone-houseprice-prediction', 'models/pipeline.pkl', model_path) -download_file_from_s3('capstone-houseprice-prediction', 'models/df.pkl', df_path) +download_file_from_s3('capstone-houseprice-prediction', 'models/df2.pkl', df_path) pipeline = None df = None diff --git a/Capstone/streamlit-app/pages/2_Analytics.py b/Capstone/streamlit-app/pages/2_Analytics.py index eba7170..4589092 100644 --- a/Capstone/streamlit-app/pages/2_Analytics.py +++ b/Capstone/streamlit-app/pages/2_Analytics.py @@ -64,7 +64,7 @@ def s3_download(s3_bucket, s3_file_path, local_file): st.header("Sector wise price per sqft map") # PREPARING DATA FOR GEOMAP -new_df = pd.read_csv("/Users/siddhant/housepriceproject/Capstone/datasets/data_viz1.csv") +new_df = pd.read_csv("/app/src/data_viz1.csv") groupdf = new_df.groupby("sector")[['price','price_per_sqft','built_up_area','latitude','longitude']].mean() # GEOMAP