-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData Anslysis
380 lines (315 loc) · 17.1 KB
/
Data Anslysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
19th April
19-04-2024,09:15:04,ABC,10804
Date,Time,Stock Name,Last Traded Quantity
22/04/24,9:15:07,ABC,56427
19th ->time(dd-XXXXX) where |d| =2 Exactly 2
22th ->time(dd-XXXXX) where |d| < 2 (not Exactly 2)
date on 19th -> 19-04-2024 '-' as splitter
date on 22th -> 22/04/24 '/' as splitter
year on 19th -> 19-04-2024 YYYY format
year on 22th -> 22/04/24 '/' yy Format
"Code can be used in further "
"""
Developer Name : Kamal Kumar Chanchal
EmailId : kchanchal78@gmail.com
Linkedin : linkedin.com/in/kamalchanchal
"""
import pandas as pd
import logging
from datetime import datetime, timedelta
class StockVolumeStrategy:
def __init__(self, day_data_file, intraday_data_files):
# Register Logger
self.__logger_()
if len(intraday_data_files) <2:
raise ValueError("intraday data path is missing")
# Load Day Data from CSV
self.__load_daily_stocks_data(day_data_file)
# Load Intraday Data from CSV
# self.logger.info(f"Loading 19 April intraday data: {intraday_data_files[0]}")
# self.intraday_data_19th = pd.read_csv(intraday_data_files[0])
# self.intraday_data_19th['Date'] = pd.to_datetime(self.intraday_data_19th['Date'], format='%d-%m-%Y')
# self.intraday_data_19th['Timestamp'] = pd.to_datetime(self.intraday_data_19th['Time'],
# format='%H:%M:%S').dt.time
self.__load_intrady_stocks_data1(intraday_data_files[0])
# self.logger.info(f"Loading 22 April intraday data: {intraday_data_files[1]}")
# self.intraday_data_22nd = pd.read_csv(intraday_data_files[1])
# self.intraday_data_22nd['Date'] = pd.to_datetime(self.intraday_data_22nd['Date'], format='%d/%m/%y')
# self.intraday_data_22nd['Timestamp'] = pd.to_datetime(self.intraday_data_22nd['Time'],
# format='%H:%M:%S').dt.time
self.__load_intrady_stocks_data2(intraday_data_files[1])
self.logger.info('Data files loaded successfully.')
def __load_daily_stocks_data(self,day_data_file):
try:
# Load Day Data from CSV
self.logger.info(f"Loading daily stock data: {day_data_file}")
self.day_data = pd.read_csv(day_data_file)
self.day_data['Date'] = pd.to_datetime(self.day_data['Date'], format='%d/%m/%y')
except AttributeError as e:
raise AttributeError(f"Not found:{e}")
except KeyError as e:
raise KeyError(f"Missing or wrong key :{e}")
except Exception as e:
raise Exception(f"error occured while loading data:{day_data_file}")
def __load_intrady_stocks_data1(self,day_data_file):
try:
# Load Day Data from CSV
self.logger.info(f"Loading 19 April intraday data: {day_data_file}")
self.intraday_data_19th = pd.read_csv(day_data_file)
self.intraday_data_19th['Date'] = pd.to_datetime(self.intraday_data_19th['Date'], format='%d-%m-%Y')
self.intraday_data_19th['Timestamp'] = pd.to_datetime(self.intraday_data_19th['Time'],
format='%H:%M:%S').dt.time
except AttributeError as e:
raise AttributeError(f"Not found:{e}")
except KeyError as e:
raise KeyError(f"Missing or wrong key :{e}")
except Exception as e:
raise Exception(f"error occured while loading data:{day_data_file}")
def __load_intrady_stocks_data2(self,day_data_file):
try:
self.logger.info(f"Loading 22 April intraday data: {day_data_file}")
self.intraday_data_22nd = pd.read_csv(day_data_file)
self.intraday_data_22nd['Date'] = pd.to_datetime(self.intraday_data_22nd['Date'], format='%d/%m/%y')
self.intraday_data_22nd['Timestamp'] = pd.to_datetime(self.intraday_data_22nd['Time'],
format='%H:%M:%S').dt.time
except AttributeError as e:
raise AttributeError(f"Not found:{e}")
except KeyError as e:
raise KeyError(f"Missing or wrong key :{e}")
except Exception as e:
raise Exception(f"error occured while loading data:{day_data_file}")
def __logger_(self):
"""Set up logger to display messages in the console."""
self.logger = logging.getLogger('StockVolumeStrategy')
handler = logging.StreamHandler() # Display logs in the console
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
self.logger.addHandler(handler)
self.logger.setLevel(logging.INFO)
self.logger.info('Initializing StockVolumeStrategy...')
def calculate_30_day_avg(self):
"""Calculate the 30-day average volume for each stock."""
self.logger.info("Calculating 30-day average volume...")
self.stock_30_day_avg = {}
get_mystocklist = self.day_data['Stock Name'].unique()
for stock in get_mystocklist:
#read ll data from matching stocks
stock_data = self.day_data[self.day_data['Stock Name'] == stock]
# For 19th April: Only consider data before 19th
recent_data = stock_data[stock_data['Date'] < datetime(2024, 4, 19)]
if len(recent_data) >= 30:
avg_volume = recent_data['Volume'].tail(30).mean() # Last 30 days volume
self.stock_30_day_avg[stock] = avg_volume
self.logger.debug(f"30-day average for stock {stock}: {avg_volume}")
# For 22nd April, include 19th April data
recent_data_22nd = stock_data[stock_data['Date'] <= datetime(2024, 4, 22)]
if len(recent_data_22nd) >= 30:
avg_volume_22nd = recent_data_22nd['Volume'].tail(30).mean()
self.stock_30_day_avg[stock] = avg_volume_22nd
self.logger.debug(f"30-day average for stock {stock} (including 19th April): {avg_volume_22nd}")
# def calculate_crossover(self, date):
# """Calculate when the cumulative volume exceeds the 30-day average volume."""
# self.logger.info(f"Calculating crossover for {date}...")
# crossover_timestamp = {}
#
# for stock in self.intraday_data_19th['Stock Name'].unique():
# if stock in self.stock_30_day_avg:
# stock_30_day_avg = self.stock_30_day_avg[stock]
#
# # Select the intraday data for the given date
# if date == '2024-04-19':
# intraday_data = self.intraday_data_19th
# elif date == '2024-04-22':
# intraday_data = self.intraday_data_22nd
# else:
# continue
#
# stock_data = intraday_data[intraday_data['Stock Name'] == stock]
#
# # Rolling 60-minute tracking using deque
# volume_window = deque()
# cumulative_volume = 0
# start_time = datetime.strptime(f"{date} 09:15:00", '%Y-%m-%d %H:%M:%S')
#
# for idx, row in stock_data.iterrows():
# timestamp = datetime.combine(datetime.today(), row['Timestamp'])
# if timestamp < start_time:
# continue
#
# # Add new trade to the rolling window
# volume_window.append((timestamp, row['Last Traded Quantity']))
# cumulative_volume += row['Last Traded Quantity']
#
# # Remove trades outside the 60-minute window
# while volume_window and (timestamp - volume_window[0][0]) > timedelta(minutes=60):
# oldest_time, oldest_qty = volume_window.popleft()
# cumulative_volume -= oldest_qty
#
# # Check for crossover
# if cumulative_volume >= stock_30_day_avg:
# crossover_timestamp[stock] = timestamp.strftime('%Y-%m-%d %H:%M:%S')
# self.logger.info(
# f"Crossover for {stock} found at {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
# break
#
# if stock not in crossover_timestamp:
# crossover_timestamp[stock] = None
# self.logger.info(f"No crossover found for {stock} on {date}.")
#
# return crossover_timestamp
# def calculate_crossover(self, date):
# """Calculate when the cumulative volume exceeds the 30-day average volume using a 60-minute rolling window."""
# self.logger.info(f"Calculating crossover for {date}...")
# crossover_timestamp = {}
#
# get_mystocklist = self.intraday_data_19th['Stock Name'].unique()
#
# for stock in get_mystocklist:
# #check stock , whose 30 day average is VALID
# if stock in self.stock_30_day_avg:
# stock_30_day_avg = self.stock_30_day_avg[stock]
#
# # Select intraday data for the given date
# if date == '2024-04-19':
# intraday_data = self.intraday_data_19th
# elif date == '2024-04-22':
# intraday_data = self.intraday_data_22nd
# else:
# continue
#
# #read all records matching for stocks
# stock_data = intraday_data[intraday_data['Stock Name'] == stock]
#
# # Initialize list for volume and timestamps (for rolling window)
# volume_window = []
# cumulative_volume = 0
# start_time = datetime.strptime(f"{date} 09:15:00", '%Y-%m-%d %H:%M:%S')
#
# for idx, row in stock_data.iterrows():
# timestamp = datetime.combine(datetime.today(), row['Timestamp'])
#
# #check VALID Entry Time
# if timestamp < start_time:
# continue
#
# # Add new trade to the volume window
#
# volume_window.append((timestamp, row['Last Traded Quantity']))
# cumulative_volume += row['Last Traded Quantity']
#
# # Remove old trades beyond 60 minutes
# while volume_window and (timestamp - volume_window[0][0]) > timedelta(minutes=60):
# oldest_time, oldest_qty = volume_window.pop(0) # Remove the oldest trade
# cumulative_volume -= oldest_qty
#
# # Check for crossover
# if cumulative_volume >= stock_30_day_avg:
# crossover_timestamp[stock] = timestamp.strftime('%Y-%m-%d %H:%M:%S')
# self.logger.info(f"Crossover for {stock} found at {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
# break
#
# if stock not in crossover_timestamp:
# crossover_timestamp[stock] = None
# self.logger.info(f"No crossover found for {stock} on {date}.")
#
# return crossover_timestamp
def calculate_crossover(self, date):
"""Calculate when the cumulative volume exceeds the 30-day average volume using a 60-minute rolling window."""
self.logger.info(f"Calculating crossover for {date}...")
crossover_timestamp = {}
get_mystocklist = self.intraday_data_19th['Stock Name'].unique()
for stock in get_mystocklist:
# Check stock, whose 30-day average is VALID
if stock in self.stock_30_day_avg:
stock_30_day_avg = self.stock_30_day_avg[stock]
# Select intraday data for the given date
if date == '2024-04-19':
intraday_data = self.intraday_data_19th
elif date == '2024-04-22':
intraday_data = self.intraday_data_22nd
else:
continue
# Read all records matching for stocks
stock_data = intraday_data[intraday_data['Stock Name'] == stock]
# Initialize list for volume and timestamps (for rolling window)
volume_window = []
cumulative_volume = 0
start_time = datetime.strptime(f"{date} 09:15:00", '%Y-%m-%d %H:%M:%S')
for idx, row in stock_data.iterrows():
timestamp = datetime.combine(datetime.today(), row['Timestamp'])
# Check VALID Entry Time
if timestamp < start_time:
continue
# Add new trade to the volume window
volume_window.append((timestamp, row['Last Traded Quantity']))
cumulative_volume += row['Last Traded Quantity']
# Remove old trades beyond 60 minutes
while volume_window and (timestamp - volume_window[0][0]) > timedelta(minutes=60):
oldest_time, oldest_qty = volume_window.pop(0) # Remove the oldest trade
cumulative_volume -= oldest_qty
# Check for crossover
if cumulative_volume >= stock_30_day_avg:
crossover_timestamp[stock] = timestamp.strftime('%Y-%m-%d %H:%M:%S')
self.logger.info(f"Crossover for {stock} found at {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
break
if stock not in crossover_timestamp:
crossover_timestamp[stock] = None
self.logger.info(f"No crossover found for {stock} on {date}.")
return crossover_timestamp
# def aggregate_to_minute(self, intraday_data):
# """Aggregate intraday data to 1-minute intervals."""
# #self.logger.info("Aggregating intraday data to 1minute intervals")
#
# # Ensure that the 'Timestamp' is in the correct format
# #intraday_data['Timestamp'] = pd.to_datetime(intraday_data['Timestamp'], format='%H:%M:%S')
#
# # # Set the timestamp as index for resampling
# # intraday_data.set_index('Timestamp', inplace=True)
# #
# # # Resample the data to 1-minute intervals and aggregate the volume
# # resampled_data = intraday_data.resample('T').agg({'Last Traded Quantity': 'sum'}).reset_index()
# #
# # return resampled_data
#
# self.logger.info("Aggregating intraday data to 1-minute intervals with Stock Name...")
#
# # Ensure that the 'Timestamp' is in the correct format
# intraday_data['Timestamp'] = pd.to_datetime(intraday_data['Timestamp'], format='%H:%M:%S')
#
# # Set 'Stock Name' and 'Timestamp' as index for grouping and resampling
# intraday_data.set_index(['Stock Name', 'Timestamp'], inplace=True)
#
# # Resample the data to 1-minute intervals and aggregate the volume
# resampled_data = intraday_data.groupby('Stock Name').resample('T', on='Timestamp').agg(
# {'Last Traded Quantity': 'sum'}).reset_index()
#
# return resampled_data
def aggregate_to_minute(self, intraday_data):
"""Aggregate second-data to 1-minute intervals with Stock Name."""
self.logger.info("Aggregate second-data to 1-minute intervals with Stock Name")
# Ensure that the 'DateTime' is in the correct format (datetime)
intraday_data['DateTime'] = intraday_data['Date'].astype(str) + " " +intraday_data['Time'].astype(str)
#convert string Datetime to Data Time Data type Format
intraday_data['DateTime'] = pd.to_datetime(intraday_data['DateTime'], format='%Y-%m-%d %H:%M:%S')
intraday_data.reset_index( inplace=True)
# Set 'Stock Name' as the index and resample by 'Timestamp'
resampled_data = intraday_data.groupby('Stock Name').resample('T', on='DateTime').agg(
{'Last Traded Quantity': 'sum'}).reset_index()
resampled_data['Timestamp'] = pd.to_datetime(self.intraday_data_19th['DateTime'],
format='%Y-%m-%d %H:%M:%S').dt.time
# resampled_data = intraday_data.resample('T', on='Timestamp').agg(
# {'Last Traded Quantity': 'sum'}).reset_index()
return resampled_data
def run(self):
"""Run the entire strategy."""
self.logger.info("Running the StockVolumeStrategy...")
self.intraday_data_19th = self.aggregate_to_minute(self.intraday_data_19th)
self.intraday_data_22nd = self.aggregate_to_minute(self.intraday_data_22nd)
# Step 1: Calculate 30-day average volumes
self.calculate_30_day_avg()
# Step 2: Run crossover calculations for both dates
crossover_19th = self.calculate_crossover('2024-04-19')
crossover_22nd = self.calculate_crossover('2024-04-22')
# Print the results
print(f"Timestamp crossover for 19th April 2024: {crossover_19th}")
print(f"Timestamp crossover for 22nd April 2024: {crossover_22nd}")