-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
darkerego
committed
Jul 22, 2020
1 parent
4f64634
commit 31a2074
Showing
13 changed files
with
1,950 additions
and
1 deletion.
There are no files selected for viewing
Submodule SyBrain
deleted from
a4b139
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# SyBrain | ||
Scalping Trading Unit and Machine Learning using BitMEX API. ML implementation has been inspired by this author : https://github.com/nicholastoddsmith/pythonml | ||
|
||
 | ||
|
||
------------------------------------ | ||
|
||
Configuration | ||
|
||
Just replace the key and secret fields with yours and adjust the number of contracts traded per position in sybrain.py | ||
|
||
------------------------------------ | ||
|
||
Execution | ||
|
||
python sybrain.py | ||
|
||
------------------------------------ | ||
|
||
Donations to allow further developments | ||
|
||
BTC: 3BMEXbS4Neu5KwsiATuZVowmwYD3UPMuxo |
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# This file is used by pip to install required python packages | ||
# Usage: pip3 install -r requirements.txt | ||
|
||
numpy == 1.17.2 | ||
pandas == 0.25.1 | ||
matplotlib == 3.1.1 | ||
sklearn | ||
bravado == 10.4.1 | ||
websocket-client == 0.56.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,348 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Wed Nov 2 07:36:10 2016 | ||
@author: Nicholas Smith | ||
""" | ||
# Used for numpy arrays | ||
import numpy as np | ||
# Used to read data from CSV file | ||
import pandas as pd | ||
# Used to convert date string to numerical value | ||
from datetime import datetime, timedelta | ||
# Used to plot data | ||
import matplotlib.pyplot as mpl | ||
# Used to scale data | ||
from sklearn.preprocessing import StandardScaler | ||
# Used to perform CV | ||
from sklearn.model_selection import ShuffleSplit | ||
from sklearn.metrics import make_scorer, r2_score | ||
from sklearn.model_selection import cross_val_score | ||
|
||
# Gives a list of timestamps from the start date to the end date | ||
# | ||
# startDate: The start date as a string xxxx-xx-xx | ||
# endDate: The end date as a string year-month-day | ||
# period: 'minute', 'daily', 'weekly', or 'monthly' | ||
# weekends: True if weekends should be included; false otherwise | ||
# return: A numpy array of timestamps | ||
def DateRange(startDate, endDate, period, weekends=True): | ||
# The start and end date | ||
sd = datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S') | ||
ed = datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S') | ||
# Invalid start and end dates | ||
if (sd > ed): | ||
raise ValueError("The start date cannot be later than the end date.") | ||
# One time period is a day | ||
if (period == 'minute'): | ||
prd = timedelta(minutes=1) | ||
if (period == 'daily'): | ||
prd = timedelta(1) | ||
# One prediction per week | ||
if (period == 'weekly'): | ||
prd = timedelta(7) | ||
# one prediction every 30 days ("month") | ||
if (period == 'monthly'): | ||
prd = timedelta(30) | ||
# The final list of timestamp data | ||
dates = [] | ||
cd = sd | ||
while (cd <= ed): | ||
# If weekdays are included or it's a weekday append the current ts | ||
if (weekends or (cd.date().weekday() != 5 and cd.date().weekday() != 6)): | ||
dates.append(cd.timestamp()) | ||
# Onto the next period | ||
cd = cd + prd | ||
# print(np.array(dates)) | ||
return np.array(dates) | ||
|
||
|
||
# Given a date, returns the previous day | ||
# | ||
# startDate: The start date as a datetime object | ||
# weekends: True if weekends should counted; false otherwise | ||
def DatePrevDay(startDate, weekends=True): | ||
# One day | ||
day = timedelta(minutes=1) | ||
cd = datetime.fromtimestamp(startDate) | ||
while (True): | ||
cd = cd - day | ||
if (weekends or (cd.date().weekday() != 5 and cd.date().weekday() != 6)): | ||
return cd.timestamp() | ||
# Should never happen | ||
return None | ||
|
||
|
||
# Load data from the CSV file. Note: Some systems are unable | ||
# to give timestamps for dates before 1970. This function may | ||
# fail on such systems. | ||
# | ||
# path: The path to the file | ||
# return: A data frame with the parsed timestamps | ||
def ParseData(path): | ||
# Read the csv file into a dataframe | ||
df = pd.read_csv(path) | ||
# Get the date strings from the date column | ||
dateStr = df['Date'].values | ||
D = np.zeros(dateStr.shape) | ||
# Convert all date strings to a numeric value | ||
for i, j in enumerate(dateStr): | ||
# Date strings are of the form year-month-day | ||
D[i] = datetime.strptime(j, '%Y-%m-%d %H:%M:%S%z').replace(tzinfo=None).timestamp() | ||
# Add the newly parsed column to the dataframe | ||
df['Timestamp'] = D | ||
# Remove any unused columns (axis = 1 specifies fields are columns) | ||
return df.drop('Date', axis=1) | ||
|
||
|
||
# Given dataframe from ParseData | ||
# plot it to the screen | ||
# | ||
# df: Dataframe returned from | ||
# p: The position of the predicted data points | ||
def PlotData(df, p=None): | ||
if (p is None): | ||
p = np.array([]) | ||
# Timestamp data | ||
ts = df.Timestamp.values | ||
# Number of x tick marks | ||
nTicks = 10 | ||
# Left most x value | ||
s = np.min(ts) | ||
# Right most x value | ||
e = np.max(ts) | ||
# Total range of x values | ||
r = e - s | ||
# Add some buffer on both sides | ||
s -= r / 5 | ||
e += r / 5 | ||
# These will be the tick locations on the x axis | ||
tickMarks = np.arange(s, e, (e - s) / nTicks) | ||
# Convert timestamps to strings | ||
strTs = [datetime.fromtimestamp(i).strftime('%Y-%m-%d %H:%M:%S') for i in tickMarks] | ||
mpl.figure() | ||
# Plots of the high and low values for the day | ||
mpl.plot(ts, df.High.values, color='#727272', linewidth=1.618, label='Actual') | ||
# Predicted data was also provided | ||
if (len(p) > 0): | ||
mpl.plot(ts[p], df.High.values[p], color='#7294AA', linewidth=1.618, label='Predicted') | ||
# Set the tick marks | ||
mpl.xticks(tickMarks, strTs, rotation='vertical') | ||
# Set y-axis label | ||
mpl.ylabel('Stock High Value (USD)') | ||
# Add the label in the upper left | ||
mpl.legend(loc='upper left') | ||
mpl.show() | ||
|
||
|
||
# A class that predicts stock prices based on historical stock data | ||
class StockPredictor: | ||
# The (scaled) data frame | ||
D = None | ||
# Unscaled timestamp data | ||
DTS = None | ||
# The data matrix | ||
A = None | ||
# Target value matrix | ||
y = None | ||
# Corresponding columns for target values | ||
targCols = None | ||
# Number of previous days of data to use | ||
npd = 1 | ||
# The regressor model | ||
R = None | ||
# Object to scale input data | ||
S = None | ||
|
||
# Constructor | ||
# nPrevDays: The number of past days to include | ||
# in a sample. | ||
# rmodel: The regressor model to use (sklearn) | ||
# nPastDays: The number of past days in each feature | ||
# scaler: The scaler object used to scale the data (sklearn) | ||
def __init__(self, rmodel, nPastDays=1, scaler=StandardScaler()): | ||
self.npd = nPastDays | ||
self.R = rmodel | ||
self.S = scaler | ||
|
||
# Extracts features from stock market data | ||
# | ||
# D: A dataframe from ParseData | ||
# ret: The data matrix of samples | ||
def _ExtractFeat(self, D): | ||
# One row per day of stock data | ||
m = D.shape[0] | ||
# Open, High, Low, and Close for past n days + timestamp and volume | ||
n = self._GetNumFeatures() | ||
B = np.zeros([m, n]) | ||
# Preserve order of spreadsheet | ||
for i in range(m - 1, -1, -1): | ||
self._GetSample(B[i], i, D) | ||
# Return the internal numpy array | ||
return B | ||
|
||
# Extracts the target values from stock market data | ||
# | ||
# D: A dataframe from ParseData | ||
# ret: The data matrix of targets and the | ||
|
||
def _ExtractTarg(self, D): | ||
# Timestamp column is not predicted | ||
tmp = D.drop('Timestamp', axis=1) | ||
# Return the internal numpy array | ||
return tmp.values, tmp.columns | ||
|
||
# Get the number of features in the data matrix | ||
# | ||
# n: The number of previous days to include | ||
# self.npd is used if n is None | ||
# ret: The number of features in the data matrix | ||
def _GetNumFeatures(self, n=None): | ||
if (n is None): | ||
n = self.npd | ||
return n * 7 + 1 | ||
|
||
# Get the sample for a specific row in the dataframe. | ||
# A sample consists of the current timestamp and the data from | ||
# the past n rows of the dataframe | ||
# | ||
# r: The array to fill with data | ||
# i: The index of the row for which to build a sample | ||
# df: The dataframe to use | ||
# return; r | ||
def _GetSample(self, r, i, df): | ||
# First value is the timestamp | ||
r[0] = df['Timestamp'].values[i] | ||
# The number of columns in df | ||
n = df.shape[1] | ||
# The last valid index | ||
lim = df.shape[0] | ||
# Each sample contains the past n days of stock data; for non-existing data | ||
# repeat last available sample | ||
# Format of row: | ||
# Timestamp Volume Open[i] High[i] ... Open[i-1] High[i-1]... etc | ||
for j in range(0, self.npd): | ||
# Subsequent rows contain older data in the spreadsheet | ||
ind = i + j + 1 | ||
# If there is no older data, duplicate the oldest available values | ||
if (ind >= lim): | ||
ind = lim - 1 | ||
# Add all columns from row[ind] | ||
for k, c in enumerate(df.columns): | ||
# + 1 is needed as timestamp is at index 0 | ||
r[k + 1 + n * j] = df[c].values[ind] | ||
return r | ||
|
||
# Attempts to learn the stock market data | ||
# given a dataframe taken from ParseData | ||
# | ||
# D: A dataframe from ParseData | ||
def Learn(self, D): | ||
# Keep track of the currently learned data | ||
self.D = D.copy() | ||
# Keep track of old timestamps for indexing | ||
self.DTS = np.copy(D.Timestamp.values) | ||
# Scale the data | ||
self.D[self.D.columns] = self.S.fit_transform(self.D) | ||
# Get features from the data frame | ||
self.A = self._ExtractFeat(self.D) | ||
# Get the target values and their corresponding column names | ||
self.y, self.targCols = self._ExtractTarg(self.D) | ||
# Create the regressor model and fit it | ||
self.R.fit(self.A, self.y) | ||
|
||
# Predicts values for each row of the dataframe. Can be used to | ||
# estimate performance of the model | ||
# | ||
# df: The dataframe for which to make prediction | ||
# return: A dataframe containing the predictions | ||
def PredictDF(self, df): | ||
# Make a local copy to prevent modifying df | ||
D = df.copy() | ||
# Scale the input data like the training data | ||
D[D.columns] = self.S.transform() | ||
# Get features | ||
A = self._ExtractFeat(D) | ||
# Construct a dataframe to contain the predictions | ||
# Column order was saved earlier | ||
P = pd.DataFrame(index=range(A.shape[0]), columns=self.targCols) | ||
# Perform prediction | ||
P[P.columns] = self.R.predict(A) | ||
# Add the timestamp (already scaled from above) | ||
P['Timestamp'] = D['Timestamp'].values | ||
# Scale the data back to original range | ||
P[P.columns] = self.S.inverse_transform(P) | ||
return P | ||
|
||
# Predict the stock price during a specified time | ||
# | ||
# startDate: The start date as a string in yyyy-mm-dd format | ||
# endDate: The end date as a string yyyy-mm-dd format | ||
# period: 'daily', 'weekly', or 'monthly' for the time period | ||
# between predictions | ||
# return: A dataframe containing the predictions or | ||
def PredictDate(self, startDate, endDate, period='minute'): | ||
# Create the range of timestamps and reverse them | ||
ts = DateRange(startDate, endDate, period)[::-1] | ||
m = ts.shape[0] | ||
# Prediction is based on data prior to start date | ||
# Get timestamp of previous day | ||
prevts = DatePrevDay(ts[-1]) | ||
# Test if there is enough data to continue | ||
try: | ||
ind = np.where(self.DTS == prevts)[0][0] | ||
except IndexError: | ||
return None | ||
# There is enough data to perform prediction; allocate new data frame | ||
P = pd.DataFrame(np.zeros([m, self.D.shape[1]]), index=range(m), columns=self.D.columns) | ||
# Add in the timestamp column so that it can be scaled properly | ||
P['Timestamp'] = ts | ||
# Scale the timestamp (other fields are 0) | ||
P[P.columns] = self.S.transform(P) | ||
# B is to be the data matrix of features | ||
B = np.zeros([1, self._GetNumFeatures()]) | ||
# Add extra last entries for past existing data | ||
for i in range(self.npd): | ||
# If the current index does not exist, repeat the last valid data | ||
curInd = ind + i | ||
if (curInd >= self.D.shape[0]): | ||
curInd = curInd - 1 | ||
# Copy over the past data (already scaled) | ||
P.loc[m + i] = self.D.loc[curInd] | ||
# Loop until end date is reached | ||
for i in range(m - 1, -1, -1): | ||
# Create one sample | ||
self._GetSample(B[0], i, P) | ||
# Predict the row of the dataframe and save it | ||
pred = self.R.predict(B).ravel() | ||
# Fill in the remaining fields into the respective columns | ||
for j, k in zip(self.targCols, pred): | ||
P.at[i, j] = k | ||
# Discard extra rows needed for prediction | ||
P = P[0:m] | ||
# Scale the dataframe back to the original range | ||
P[P.columns] = self.S.inverse_transform(P) | ||
# print(P) | ||
return P | ||
|
||
# Test the predictors performance and | ||
# displays results to the screen | ||
# | ||
# D: The dataframe for which to make prediction | ||
def TestPerformance(self, df=None): | ||
# If no dataframe is provided, use the currently learned one | ||
if (df is None): | ||
D = self.D | ||
else: | ||
D = self.S.transform(df.copy()) | ||
# Get features from the data frame | ||
A = self._ExtractFeat(D) | ||
# Get the target values and their corresponding column names | ||
y, _ = self._ExtractTarg(D) | ||
# Begin cross validation | ||
ss = ShuffleSplit(n_splits=1) | ||
for trn, tst in ss.split(A): | ||
s1 = cross_val_score(self.R, A, y, cv=3, scoring=make_scorer(r2_score)) | ||
s2 = cross_val_score(self.R, A[tst], y[tst], cv=3, scoring=make_scorer(r2_score)) | ||
s3 = cross_val_score(self.R, A[trn], y[trn], cv=3, scoring=make_scorer(r2_score)) | ||
print('C-V:\t' + str(s1) + '\nTst:\t' + str(s2) + '\nTrn:\t' + str(s3)) |
Oops, something went wrong.