-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnj_realtor_dashboard.py
484 lines (360 loc) · 17.1 KB
/
nj_realtor_dashboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
# -*- coding: utf-8 -*-
"""NJ_Realtor_Dashboard.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1MFPMiLBJxVKIgbhuTuqbGxhVfnwBMtqm
"""
import streamlit as st
import pydeck as pdk
import json
from pydeck.data_utils.viewport_helpers import compute_view
import pandas as pd
import matplotlib.pyplot as plt
from datetime import date
from datetime import datetime
from sqlalchemy import create_engine
import geopandas as gpd
import os
current_wd = os.getcwd()
st.set_page_config(layout='wide')
def create_db_connection():
def get_us_pw(website):
# Saves the current directory in a variable in order to switch back to it once the program ends
previous_wd = os.getcwd()
os.chdir('F:\\Jibreel Hameed\\Kryptonite')
db = pd.read_excel('get_us_pw.xlsx', index_col=0)
username = db.loc[website, 'Username']
pw = db.loc[website, 'Password']
base_url = db.loc[website, 'Base URL']
os.chdir(previous_wd)
return username, base_url, pw
*username, pw = get_us_pw('PostgreSQL')
return create_engine(f"postgresql+psycopg2://{username[0]}:{pw}@{username[1]}:5432/nj_realtor_data")
@st.cache_data
def load_main_tables(_db_engine):
@st.cache_data
def prepare_dataframes(re_data, _geo_data):
re_data['Municipality'] = re_data['Municipality'].str.upper()
re_data['County'] = re_data['County'].str.upper()
re_data = re_data.rename(columns={'Municipality': 'MUN', 'County': 'COUNTY'})
_geo_data['Temp'] = 'COUNTY'
_geo_data['COUNTY'] = _geo_data['COUNTY'].str.cat(_geo_data['Temp'], sep=' ')
_geo_data.drop(columns='Temp', inplace=True)
return re_data, _geo_data
previous_wd = os.getcwd()
os.chdir('F:\\Real Estate Investing\\JQH Holding Company LLC\\Python Coding for Real Estate')
# Locally loading geojson. May need to change later
df1 = pd.read_sql_table('nj_realtor_basic', _db_engine)
df2 = gpd.read_file('Municipal_Boundaries_of_NJ (2).geojson')
df1, df2 = prepare_dataframes(df1, df2)
df1['Date'] = pd.to_datetime(df1['Date'])
# Be sure to remove duplicates before
df1.sort_values(by='Date', ascending=True, inplace=True)
# df1.set_index('Date', inplace=True)
os.chdir(previous_wd)
return df1, df2
def num2month(month):
month_dict = {
1: 'January', 2: 'February',
3: 'March', 4: 'April',
5: 'May', 6: 'June',
7: 'July', 8: 'August',
9: 'September', 10: 'October',
11: 'November', 12: 'December'
}
if month.isdigit():
return month_dict[int(month)]
elif month.isalpha():
for value, name in month_dict.items():
if name == month:
return value
# Create Geopandas object with filtered data
def create_geopandas_obj(counties='All'):
year = st.session_state['Date']['Year']
month = st.session_state['Date']['Month']
df = st.session_state['NJ Realtor Data'].set_index('Date')
geo_df = st.session_state['GeoJSON']
if counties != 'All':
if type(st.session_state['Counties']) == list:
df = df[df['COUNTY'].isin(counties)].loc[f'{year}-{month}']
elif type(st.session_state['Counties']) == str:
df = df[df['COUNTY'] == counties].loc[f'{year}-{month}']
else:
df = df.loc[f'{year}-{month}']
# Make sure this merge type still shows the outline of the municiplaities/counties not chosen
return geo_df.merge(df, on=['MUN', 'COUNTY'])
# Create choropleth map using PyDeck
def create_PyDeck_map(counties='All'):
# Create Geopandas dataframe and convert to GeoJSON
geo_df = create_geopandas_obj(counties)
geo_df.columns = geo_df.columns.str.replace(' ', '_')
geo_json = json.loads(geo_df.to_json())
# Compute the initial view of the map
initial_view = pdk.ViewState(latitude=39.833851, longitude=-74.871826, zoom=7, max_zoom=16, pitch=45, bearing=0)
column = '_'.join(st.session_state['Current Column'].split(' '))
max_value = geo_df[column].max()
# Define the Layer generated on the map
layer1 = pdk.Layer(
'GeoJsonLayer',
data=geo_json,
opacity=0.5,
stroked=True,
filled=True,
extruded=True,
pickable=True,
auto_highlight=True,
get_elevation=f"properties.{column} / {max_value}",
elevation_scale=10000,
get_fill_color=f"[255, (properties.{column} / {max_value}) * 255, 100]"
)
layer2 = pdk.Layer(
'PolygonLayer',
data=[[-75.592927, 41.357565], [-73.890363, 38.855893]],
stroked=False,
# processes the data as a flat longitude-latitude pair
get_polygon="-",
get_fill_color=[0, 0, 0, 20],
)
map_ = pdk.Deck(
layers=[layer2, layer1],
initial_view_state=initial_view,
map_style='dark',
tooltip={
'text': '''
{properties.NAME}
Median Sales Price: {properties.Median_Sales_Prices}
New Listings: {properties.New_Listings}
Closed Sales: {properties.Closed_Sales}
Inventory of Homes for Sales: {properties.Inventory_of_Homes_for_Sales}
'''
}
)
return map_
# 3) Function for the creation of real estate metrics
def real_estate_metrics(counties='All'):
# I need to calculate the deltas for these metrics as well
metrics_dict = {
'Median Sales Prices': 0,
'Median Sales Prices (MoM%)': 0,
'Median Sales Prices (YoY%)': 0,
'New Listings': 0,
'New Listings (MoM%)': 0,
'New Listings (YoY%)': 0,
'Closed Sales': 0,
'Closed Sales (MoM%)': 0,
'Closed Sales (YoY%)': 0,
'Inventory of Homes for Sales': 0,
'Inventory of Homes for Sales (MoM%)': 0,
'Inventory of Homes for Sales (YoY%)': 0
}
year = st.session_state['Date']['Year']
month = st.session_state['Date']['Month']
df = st.session_state['NJ Realtor Data'].set_index('Date')
# The month and year filters may be in the form of strs so I may have to cast to int
df_current = df.loc[f'{year}-{month}']
if month == '1':
df_last_month = df.loc[f'{int(year) - 1}-12']
else:
df_last_month = df.loc[f'{year}-{int(month) - 1}']
df_last_year = df.loc[f'{int(year) - 1}-{month}']
if counties != 'All':
if type(st.session_state['Counties']) == list:
df_current = df_current[df_current['COUNTY'].isin(counties)]
df_last_month = df_last_month[df_last_month['COUNTY'].isin(counties)]
df_last_year = df_last_year[df_last_year['COUNTY'].isin(counties)]
elif type(st.session_state['Counties']) == str:
df_current = df_current[df_current['COUNTY'] == counties]
df_last_month = df_last_month[df_last_month['COUNTY'] == counties]
df_last_year = df_last_year[df_last_year['COUNTY'] == counties]
for key in ['Median Sales Prices', 'New Listings', 'Closed Sales', 'Inventory of Homes for Sales']:
if key == 'Median Sales Prices':
metrics_dict[key] = round(df_current[key].mean(), 2)
metrics_dict[f'{key} (MoM%)'] = round((df_current[key].mean() - df_last_month[key].mean()) / df_last_month[key].mean(), 2) * 100
metrics_dict[f'{key} (YoY%)'] = round((df_current[key].mean() - df_last_year[key].mean()) / df_last_year[key].mean(), 2) * 100
else:
metrics_dict[key] = df_current[key].sum()
metrics_dict[f'{key} (MoM%)'] = round((df_current[key].sum() - df_last_month[key].sum()) / df_last_month[key].sum(), 2) * 100
metrics_dict[f'{key} (YoY%)'] = round((df_current[key].sum() - df_last_year[key].sum()) / df_last_year[key].sum(), 2) * 100
return metrics_dict
# Callback function to update checkboxes
def status_callback():
column_names = ['Median Sales Prices', 'New Listings', 'Closed Sales', 'Inventory of Homes for Sales']
# Ensure only one checkbox is selected at a time
if st.session_state['Median Sales Prices']:
st.session_state['Current Column'] = 'Median Sales Prices'
elif st.session_state['New Listings']:
st.session_state['Current Column'] = 'New Listings'
elif st.session_state['Closed Sales']:
st.session_state['Current Column'] = 'Closed Sales'
elif st.session_state['Inventory of Homes for Sales']:
st.session_state['Current Column'] = 'Inventory of Homes for Sales'
for name in [i for i in column_names]:
if name == st.session_state['Current Column']:
st.session_state[name] = True
else:
st.session_state[name] = False
# Ensure the correct counties are displayed in the choropleth map and metrics
if st.session_state['Counties'] == 'All Counties':
counties = 'All'
elif st.session_state['Counties'] == 'North Jersey':
counties = ['Bergen County', 'Essex County', 'Hudson County', 'Morris County',
'Passaic County', 'Sussex County', 'Warren County']
elif st.session_state['Counties'] == 'Central Jersey':
counties = ['Hunterdon County', 'Union County', 'Mercer County',
'Middlesex County', 'Monmouth County', 'Somerset County',
'Ocean County']
elif st.session_state['Counties'] == 'South Jersey':
counties = ['Burlington County', 'Camden County', 'Cumberland County',
'Gloucester County', 'Salem County']
else:
counties = st.session_state['Counties']
# Update the displayed metrics
st.session_state['Metrics'] = real_estate_metrics(counties)
# Update the displayed choropleth map
st.session_state['Map'] = create_PyDeck_map(counties)
# Update the line graph data
st.session_state['Line Graph Data'] = create_line_graph(counties)
# 7) Create line chart for Inventory, Closed Sales, New Listings
def create_line_graph(counties='All'):
df = st.session_state['NJ Realtor Data']
if counties != 'All':
if type(st.session_state['Counties']) == list:
df = df[df['COUNTY'].isin(counties)]
elif type(st.session_state['Counties']) == str:
df = df[df['COUNTY'] == counties]
df = df[['Date', 'New Listings', 'Closed Sales', 'Inventory of Homes for Sales']]
df = df.sort_values(by='Date')
group = df.groupby('Date').sum()
group.reset_index(names='Date')
return group.reset_index(names='Date')
def create_bar_chart(counties='All'):
year = st.session_state['Date']['Year']
month = st.session_state['Date']['Month']
df = st.session_state['NJ Realtor Data'].set_index('Date')
df = df.loc[f'{year}-{month}']
if counties != 'All':
if type(st.session_state['Counties']) == list:
df = df[df['COUNTY'].isin(counties)]
elif type(st.session_state['Counties']) == str:
df = df[df['COUNTY'] == counties]
# df.reset_index(inplace=True)
df = df[['MUN', st.session_state['Current Column']]]
df = df.sort_values(by=st.session_state['Current Column'], ascending=False)[:25]
return df
def create_pie_chart(counties='All'):
fig, axs = plt.subplots()
year = st.session_state['Date']['Year']
month = st.session_state['Date']['Month']
df = st.session_state['NJ Realtor Data'].set_index('Date')
df = df.loc[f'{year}-{month}']
if counties != 'All':
if type(st.session_state['Counties']) == list:
df = df[df['COUNTY'].isin(counties)]
elif type(st.session_state['Counties']) == str:
df = df[df['COUNTY'] == counties]
group = df.groupby('COUNTY')[st.session_state['Current Column']].sum()
axs.pie(group.values, labels=group.index, autopct='%1.1f%%', textprops={'size': 'smaller'})
return fig
# When loading the database, be sure to change the date column to the index
# Use pd.to_datetime on the column as well
# Be sure to order the database by date
# '''
# ------------------------------------------------------------------------------------------
# Dashboard Initializations
# ------------------------------------------------------------------------------------------
# '''
# Load Dashboard dependencies
st.header("NJ Realtor Project Dashboard", divider='grey')
st.sidebar.markdown("NJ Realtor Project Dashboard")
if 'NJ Realtor Data' not in st.session_state:
engine = create_db_connection()
st.session_state['NJ Realtor Data'], st.session_state['GeoJSON'] = load_main_tables(engine)
parsed_date = str(st.session_state['NJ Realtor Data']['Date'].max()).split('-')[0:2]
county_groups = ['All Counties', 'North Jersey', 'Central Jersey', 'South Jersey']
nj_counties = st.session_state['NJ Realtor Data']['COUNTY'].unique()
county_groups.extend(sorted([i.title() for i in nj_counties]))
if 'Date' not in st.session_state:
st.session_state['Date'] = {'Year': parsed_date[0], 'Month': parsed_date[1]}
if 'Month' not in st.session_state:
st.session_state['Month'] = num2month(parsed_date[1])
if 'Current Column' not in st.session_state:
st.session_state['Current Column'] = 'Median Sales Prices'
# Initialize checkboxes in session state
if 'Median Sales Prices' not in st.session_state:
st.session_state['Median Sales Prices'] = True
st.session_state['New Listings'] = False
st.session_state['Closed Sales'] = False
st.session_state['Inventory of Homes for Sales'] = False
# Intialize the value of the county select dropdown menu
if 'Counties' not in st.session_state:
st.session_state['Counties'] = 'All Counties'
# Initialize the latest data for the current, MoM% and YoY% metrics
if 'Metrics' not in st.session_state:
st.session_state['Metrics'] = real_estate_metrics()
# Initialize the latest choropleth map for all counties
if 'Map' not in st.session_state:
st.session_state['Map'] = create_PyDeck_map()
# Initialize the line graph data
if 'Line Graph Data' not in st.session_state:
st.session_state['Line Graph Data'] = create_line_graph()
# Initialize the pie chart
if 'Pie Chart' not in st.session_state:
st.session_state['Pie Chart'] = create_pie_chart()
# '''
# ------------------------------------------------------------------------------------------
# Dashboard Layout
# ------------------------------------------------------------------------------------------
# '''
category_list = ['Median Sales Prices', 'New Listings', 'Closed Sales', 'Inventory of Homes for Sales']
# Create the title for the dashboard
if st.session_state['Counties'] == 'All Counties':
location = 'NJ Municipalities'
elif st.session_state['Counties'] == 'North Jersey':
location = 'North Jersey Municipalities'
elif st.session_state['Counties'] == 'Central Jersey':
location = 'Central Jersey Municipalities'
elif st.session_state['Counties'] == 'South Jersey':
location = 'South Jersey Municipalities'
else:
location = f'{st.session_state["Counties"]}'
# Create the columns for the data matrix
top_row = st.columns(4)
bottom_row = st.columns(4)
for col, category in zip(top_row, category_list):
if category == 'Median Sales Prices':
col.metric(label=f'{category}', value=f"${st.session_state['Metrics'][f'{category}']}",
delta=f"{st.session_state['Metrics'][f'{category} (MoM%)']}%")
else:
col.metric(label=f'{category}', value=st.session_state['Metrics'][f'{category}'],
delta=f"{st.session_state['Metrics'][f'{category} (MoM%)']}%")
for col, category in zip(bottom_row, category_list):
col.metric(label=f'{category} YoY%', value=st.session_state['Metrics'][f'{category} (YoY%)'])
with st.container():
st.write(f'Choropleth Map of {location}')
st.pydeck_chart(st.session_state.Map, use_container_width=True)
with st.container():
st.write(f'Time Series of Closed Sales, Inventory, and New Listings for {location}')
st.line_chart(st.session_state['Line Graph Data'], x='Date')
st.sidebar.selectbox('Select Counties', county_groups, index=0, key='Counties', on_change=status_callback)
# Create checkboxes and dropdown menu for map and data filters
for category in category_list:
st.sidebar.checkbox(f'{category}', key=f'{category}', on_change=status_callback)
# Create two containers on one page
left_col, right_col = st.columns(2)
# Place the choropleth map inside the left container
with left_col:
st.write(f"Top 25 Municipalities with Highest {st.session_state['Current Column']}")
st.bar_chart(create_bar_chart(), x='MUN', y=st.session_state['Current Column'], x_label='Municipalities')
with right_col:
with st.container():
st.write(f"Total {st.session_state['Current Column']} By County")
st.pyplot(st.session_state['Pie Chart'])
with st.expander(f"Overview of the markey for {st.session_state['Month']} {st.session_state['Date']['Year']}"):
st.write('''Write blurb here about the use and functionality of the dashboard.''')
with st.expander('About NJ Realtor Dashboard'):
st.write('''Write blurb here about the use and functionality of the dashboard.''')
# '''
# ------------------------------------------------------------------------------------------
# Dashboard Legalities
# ------------------------------------------------------------------------------------------
# '''