forked from AI4Finance-Foundation/FinRL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_hyperparameter_opt.py
306 lines (266 loc) · 13 KB
/
train_hyperparameter_opt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import random
import pandas as pd
import numpy as np
import matplotlib
import sys
import matplotlib.pyplot as plt
from IPython.core.display import clear_output
from sklearn import preprocessing
matplotlib.use("Agg")
import datetime
import optuna
from torch import nn
from finrl.apps import config
from finrl.neo_finrl.preprocessor.yahoodownloader import YahooDownloader
from finrl.neo_finrl.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.neo_finrl.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.neo_finrl.env_stock_trading.env_stocktrading_cashpenalty import StockTradingEnvCashpenalty
from finrl.neo_finrl.env_stock_trading.env_stocktrading_stoploss import StockTradingEnvStopLoss
from finrl.drl_agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline, tear_plot
import itertools
class LoggingCallback:
def __init__(self, threshold, trial_number, patience):
'''
threshold:int tolerance for increase in sharpe ratio
trial_number: int Prune after minimum number of trials
patience: int patience for the threshold
'''
self.threshold = threshold
self.trial_number = trial_number
self.patience = patience
self.cb_list = [] # Trials list for which threshold is reached
def __call__(self, study: optuna.study, frozen_trial: optuna.Trial):
# Setting the best value in the current trial
study.set_user_attr("previous_best_value", study.best_value)
# study.set_user_attr("previous_best_value", study.)
# Checking if the minimum number of trials have pass
if frozen_trial.number > self.trial_number:
previous_best_value = study.user_attrs.get("previous_best_value", None)
# Checking if the previous and current objective values have the same sign
if previous_best_value * study.best_value >= 0:
# Checking for the threshold condition
if abs(previous_best_value - study.best_value) > self.threshold:
self.cb_list.append(frozen_trial.number)
# If threshold is achieved for the patience amount of time
if len(self.cb_list) > self.patience:
print('The study stops now...')
print('With number', frozen_trial.number, 'and value ', frozen_trial.value)
print('The previous and current best values are {} and {} respectively'
.format(previous_best_value, study.best_value))
study.stop()
# Calculate the daily return
def calculate_return(df):
df['daily_return'] = df['account_value'].pct_change(1)
return df['daily_return'].mean()
# Calculate the Sharpe ratio
def calculate_sharpe(df):
df['daily_return'] = df['account_value'].pct_change(1)
# For env_stocktrading_cashpenalty
# df['daily_return'] = df['total_assets'].pct_change(1)
if df['daily_return'].std() != 0:
sharpe = (252 ** 0.5) * df['daily_return'].mean() / \
df['daily_return'].std()
return sharpe
else:
return 0
def calculate_std(df):
df['daily_return'] = df['account_value'].pct_change(1)
# For env_stocktrading_cashpenalty
# df['daily_return'] = df['total_assets'].pct_change(1)
if df['daily_return'].std() != 0:
std = df['daily_return'].std()
return std
else:
return 0
def sample_ppo_params(trial: optuna.Trial):
"""
Sampler for PPO hyperparams.
:param trial:
:return:
"""
batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256])
# n_steps = trial.suggest_categorical("n_steps", [32, 64, 128, 256])
# gamma = trial.suggest_float("gamma", 0.85, 0.99)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 0.0008)
# ent_coef = trial.suggest_discrete_uniform("ent_coef", 0.01, 0.2, 0.01)
# clip_range = trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3, 0.4])
# n_epochs = trial.suggest_int("n_epochs", 1, 10, 1)
# gae_lambda = trial.suggest_categorical("gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0])
# max_grad_norm = trial.suggest_categorical("max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5])
# vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
# net_arch = trial.suggest_categorical("net_arch", ["m", "h", "Rm", "Rh"])
# net_arch = trial.suggest_categorical("net_arch", ["s", "m", "h"])
# p_net = trial.suggest_categorical("p_net", [64, 128, 256, 512])
# v_net = trial.suggest_categorical("v_net", [64, 128, 256, 512])
# activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu", "leaky_relu"])
buffer_size = trial.suggest_int("buffer_size", 70000, 300000, 10000)
asset_re = trial.suggest_int("asset_re", 0, 5, 0.5)
return_re = trial.suggest_int("return_re", 0, 5, 0.5)
shapre_re = trial.suggest_int("shapre_re", 0, 5, 0.5)
reward_scale = np.array([asset_re, return_re, shapre_re])
tech_index = trial.suggest_categorical("tech_index", ["basic", "b_boll", "adv"])
# if batch_size > n_steps*8:
# batch_size = n_steps*8
# if lr_schedule == "linear":
# learning_rate = linear_schedule(learning_rate)
# reward_scale = trial.suggest_discrete_uniform("reward_scale", 0.00, 5, 0.01)
tech_index = {
"basic": ["macd", "rsi_30", "cci_30", "dx_30"],
"b_boll": ["macd", "rsi_30", "boll_ub", "boll_lb"],
"adv":["macd", "boll_ub", "boll_lb", "rsi_30", "cci_14", "cci_30", "dx_30", "close_30_sma", "close_60_sma"],
}[tech_index]
# Independent networks usually work best
# when not working with images
# net_arch = {
# "xs": [64, 64],
# "s": [128, 128],
# "m": [256, 256],
# "h": [512, 512],
# # "3s": [128, 128, 128],
# # "3m": [256, 256, 256],
# # "3h": [512, 512, 512],
# # "Rxs": [64, 32],
# "Rs": [128, 64],
# "Rm": [256, 128],
# "Rh": [512, 256],
# # "small": [dict(pi=[64, 64], vf=[64, 64])],
# # "medium": [dict(pi=[256, 256], vf=[256, 256])],
# # "s": [dict(pi=[128, 128], vf=[128, 128])],
# # "m": [dict(pi=[256, 256], vf=[128, 128])],
# # "h": [dict(pi=[512, 512], vf=[128, 128])],
# }[net_arch]
# net_arch = [dict(pi=[p_net, p_net], vf=[v_net, v_net])]
# activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU, "leaky_relu": nn.LeakyReLU}[activation_fn]
model_params = {
"batch_size": batch_size,
"buffer_size": buffer_size,
"gamma": 0.9,
"learning_rate": learning_rate,
"ent_coef": "auto_0.2",
# "ent_coef": ent_coef,
# "n_steps": n_steps,
# "clip_range": clip_range,
# "n_epochs": n_epochs,
# "gae_lambda": gae_lambda,
# "max_grad_norm": max_grad_norm,
# "vf_coef": vf_coef,
# "sde_sample_freq": sde_sample_freq,
}
# policy_params = dict(net_arch=net_arch, activation_fn=activation_fn)
# policy_params = dict(net_arch=net_arch)
# policy_params = dict(net_arch=[512, 512], activation_fn=activation_fn)
return tech_index, reward_scale, model_params
# This is our objective for tuning
def objective(trial: optuna.Trial):
# Trial will suggest a set of hyperparamters from the specified range
tech_index, reward_scale, model_params= sample_ppo_params(trial)
print("==============Start Feature Engineering===========")
fe = FeatureEngineer(
use_technical_indicator=True,
tech_indicator_list=tech_index,
use_turbulence=True,
user_defined_feature=False,
)
processed = fe.preprocess_data(df)
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(), processed['date'].max()).astype(str))
combination = list(itertools.product(list_date, list_ticker))
processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date', 'tic'])
processed_full = processed_full.fillna(0)
# Training & Trading data split
train = data_split(processed_full, config.START_DATE, config.START_TRADE_DATE)
trade = data_split(processed_full, config.START_TRADE_DATE, config.END_DATE)
# calculate state action space
stock_dimension = len(train.tic.unique())
state_space = (
1
+ 2 * stock_dimension
+ len(tech_index) * stock_dimension
)
env_kwargs = {
"hmax": 100,
"initial_amount": 20000,
"buy_cost_pct": 0.01,
"sell_cost_pct": 0.01,
# "stoploss_penalty": 0.85,
# "profit_loss_ratio": 1.2,
"state_space": state_space,
"stock_dim": stock_dimension,
"tech_indicator_list": tech_index,
# "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
"action_space": stock_dimension,
"reward_scaling": reward_scale,
"print_verbosity": 20,
# "shares_increment": 100,
# "discrete_actions": True,
# "patient": False,
# "print_verbosity": 10000,
}
e_train_gym = StockTradingEnv(df=train, **env_kwargs, random_start=True)
e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=None, **env_kwargs, )
env_train, _ = e_train_gym.get_sb_env()
# env_train, _ = e_train_gym.get_multiproc_env(n=6)
print(type(env_train))
agent = DRLAgent(env=env_train)
e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)
model_sac = agent.get_model("sac", model_kwargs=model_params, verbose=0)
# model_ppo = agent.get_model("ppo", model_kwargs=model_params, policy_kwargs=policy_params, verbose=100)
# You can increase it for better comparison
# trained_ppo = agent.train_model(model=model_ppo,
# tb_log_name='ppo',
# total_timesteps=20000,
# )
trained_sac = agent.train_model(model=model_sac,
tb_log_name='sac',
total_timesteps=40000,
)
# clear_output(wait=True)
# For the given hyperparamters, determine the account value in the trading period
# df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_ppo,
# environment=e_trade_gym
# )
df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,
environment=e_trade_gym
)
baseline_df = get_baseline(
ticker='^DJI', start=config.START_TRADE_DATE, end=config.END_DATE
)
baseline_df = pd.merge(df_account_value['date'], baseline_df, how='left', on='date')
baseline_df = baseline_df.fillna(method='ffill').fillna(method='bfill')
baseline_returns = get_daily_return(baseline_df, value_col_name="close")
perf_stats_all = backtest_stats(df_account_value, baseline_return=baseline_returns)
perf_stats_all = pd.DataFrame(perf_stats_all)
alpha = perf_stats_all.loc['Alpha']
return alpha
print("==============Start Fetching Data===========")
df = YahooDownloader(
start_date=config.START_DATE,
end_date=config.END_DATE,
ticker_list=config.DOW_30_TICKER,
).fetch_data()
# Create a study object and specify the direction as 'maximize'
# As you want to maximize sharpe
# Pruner stops not promising iterations
# Use a pruner, else you will get error related to divergence of model
# You can also use Multivariate samplere
# sampler = optuna.samplers.TPESampler(multivariate=True)
sampler = optuna.samplers.RandomSampler()
# study = optuna.create_study(study_name="sac_study", direction='maximize',
# sampler=sampler, pruner=optuna.pruners.HyperbandPruner(),)
study = optuna.create_study(direction='maximize',
sampler=sampler, pruner=optuna.pruners.HyperbandPruner(),
storage="mysql+pymysql://optuna@localhost/optuna")
# study = optuna.multi_objective.create_study(study_name="ppo_study", directions=['maximize','maximize'],
# sampler=sampler)
# study = optuna.create_study(directions=['maximize','maximize'])
logging_callback = LoggingCallback(threshold=1e-4, patience=30, trial_number=5)
# You can increase the n_trials for a better search space scanning
study.optimize(objective, n_trials=50, catch=(ValueError,), callbacks=[logging_callback])
# study.optimize(objective, n_trials=50)
print('Best params: ' + str(study.best_params))
fig = optuna.visualization.plot_contour(study)
# fig = optuna.visualization.plot_pareto_front(study)
fig.show()