From 1049780b1f5f2694fcb9741659e2cac44b0a8541 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Thu, 20 Feb 2025 15:04:01 +0100 Subject: [PATCH] bugfixing --- app/cron_price_analysis.py | 29 +++++++++++++++++++++++------ app/ml_models/prophet_model.py | 18 ++++++++++++------ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/app/cron_price_analysis.py b/app/cron_price_analysis.py index 1dfbbd8..0dc7927 100755 --- a/app/cron_price_analysis.py +++ b/app/cron_price_analysis.py @@ -8,6 +8,7 @@ from tqdm import tqdm import concurrent.futures import orjson +import os def convert_symbols(symbol_list): converted_symbols = [] @@ -50,10 +51,13 @@ async def download_data(ticker: str, start_date: str, end_date: str): # Apply filtering logic if enough data exists if len(df) > 252 * 2: # At least 2 years of history is necessary q_high = df["y"].quantile(0.99) - q_low = df["y"].quantile(0.01) + q_low = df["y"].quantile(0.1) df = df[(df["y"] > q_low) & (df["y"] < q_high)] - return df + # Calculate Simple Moving Average (SMA) + #df["y"] = df["y"].rolling(window=50).mean() # 50-day SMA + + return df.dropna() # Drop initial NaN values due to rolling window except Exception as e: print(f"Error processing {ticker}: {e}") return None @@ -62,10 +66,19 @@ async def process_symbol(ticker, start_date, end_date): try: df = await download_data(ticker, start_date, end_date) data = PricePredictor().run(df) - await save_json(ticker, data) + file_path = f"json/price-analysis/{ticker}.json" + + if data and data['lowPriceTarget'] > 0: + print(data) + await save_json(ticker, data) + else: + await asyncio.to_thread(os.remove, file_path) except Exception as e: - print(e) + try: + await asyncio.to_thread(os.remove, file_path) + except FileNotFoundError: + pass # The file might not exist, so we ignore the error async def run(): @@ -79,12 +92,16 @@ async def run(): total_symbols = stock_symbols print(f"Total tickers: {len(total_symbols)}") - start_date = datetime(2017, 1, 1).strftime("%Y-%m-%d") + start_date = datetime(2015, 1, 1).strftime("%Y-%m-%d") end_date = datetime.today().strftime("%Y-%m-%d") + df_sp500 = await download_data('SPY', start_date, end_date) + df_sp500 = df_sp500.rename(columns={"y": "sp500"}) + #print(df_sp500) + chunk_size = len(total_symbols) // 70 # Divide the list into N chunks chunks = [total_symbols[i:i + chunk_size] for i in range(0, len(total_symbols), chunk_size)] - #chunks = [['NVDA','GME','TSLA','AAPL']] + #chunks = [['GME']] for chunk in chunks: tasks = [] for ticker in tqdm(chunk): diff --git a/app/ml_models/prophet_model.py b/app/ml_models/prophet_model.py index 35a2b26..438ad37 100755 --- a/app/ml_models/prophet_model.py +++ b/app/ml_models/prophet_model.py @@ -55,18 +55,24 @@ def __init__(self, predict_ndays=365): interval_width=0.8, daily_seasonality=True, yearly_seasonality=True, + changepoint_prior_scale= 0.1, + seasonality_prior_scale=0.1, ) + #self.model.add_regressor('volume') + def run(self, df): + df = df.copy() + self.model.fit(df) future = self.model.make_future_dataframe(periods=self.predict_ndays) forecast = self.model.predict(future) # Apply rolling average to smooth the forecast intervals rolling_window = 200 - forecast['smoothed_upper'] = forecast['yhat_upper'].round(2)#.rolling(window=rolling_window, min_periods=1).mean().round(2) - forecast['smoothed_lower'] = forecast['yhat_lower'].round(2)#.rolling(window=rolling_window, min_periods=1).mean().round(2) - forecast['smoothed_mean'] = forecast['yhat'].round(2)#.rolling(window=rolling_window, min_periods=1).mean().round(2) + forecast['smoothed_upper'] = forecast['yhat_upper'].rolling(window=rolling_window, min_periods=1).mean().round(2) + forecast['smoothed_lower'] = forecast['yhat_lower'].rolling(window=rolling_window, min_periods=1).mean().round(2) + forecast['smoothed_mean'] = forecast['yhat'].rolling(window=rolling_window, min_periods=1).mean().round(2) # Actual and predicted values for evaluation (optional) actual_values = df['y'].values @@ -74,9 +80,9 @@ def run(self, df): # Extract forecast values for plotting or analysis (if needed) pred_date_list = forecast['ds'][-1200 - self.predict_ndays:].dt.strftime('%Y-%m-%d').tolist() - upper_list = forecast['smoothed_upper'][-1200 - self.predict_ndays:].tolist() - lower_list = forecast['smoothed_lower'][-1200 - self.predict_ndays:].tolist() - mean_list = forecast['smoothed_mean'][-1200 - self.predict_ndays:].tolist() + upper_list = forecast['smoothed_upper'][-1200 - self.predict_ndays:].tolist() + lower_list = forecast['smoothed_lower'][-1200 - self.predict_ndays:].tolist() + mean_list = forecast['smoothed_mean'][-1200 - self.predict_ndays:].tolist() historical_date_list = df['ds'][-1200:].dt.strftime('%Y-%m-%d').tolist() historical_price_list = df['y'][-1200:].round(2).tolist()