Skip to content

Commit

Permalink
add SyBrain contents
Browse files Browse the repository at this point in the history
  • Loading branch information
darkerego committed Jul 22, 2020
1 parent 4f64634 commit 31a2074
Show file tree
Hide file tree
Showing 13 changed files with 1,950 additions and 1 deletion.
1 change: 0 additions & 1 deletion SyBrain
Submodule SyBrain deleted from a4b139
674 changes: 674 additions & 0 deletions SyBrain/LICENSE

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions SyBrain/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# SyBrain
Scalping Trading Unit and Machine Learning using BitMEX API. ML implementation has been inspired by this author : https://github.com/nicholastoddsmith/pythonml

![Alt text](illustration.png?raw=true "SyBrain Logic")

------------------------------------

Configuration

Just replace the key and secret fields with yours and adjust the number of contracts traded per position in sybrain.py

------------------------------------

Execution

python sybrain.py

------------------------------------

Donations to allow further developments

BTC: 3BMEXbS4Neu5KwsiATuZVowmwYD3UPMuxo
Binary file not shown.
Binary file added SyBrain/__pycache__/stocks.cpython-37.pyc
Binary file not shown.
Binary file added SyBrain/illustration.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 9 additions & 0 deletions SyBrain/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# This file is used by pip to install required python packages
# Usage: pip3 install -r requirements.txt

numpy == 1.17.2
pandas == 0.25.1
matplotlib == 3.1.1
sklearn
bravado == 10.4.1
websocket-client == 0.56.0
348 changes: 348 additions & 0 deletions SyBrain/stockpredictor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,348 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 2 07:36:10 2016
@author: Nicholas Smith
"""
# Used for numpy arrays
import numpy as np
# Used to read data from CSV file
import pandas as pd
# Used to convert date string to numerical value
from datetime import datetime, timedelta
# Used to plot data
import matplotlib.pyplot as mpl
# Used to scale data
from sklearn.preprocessing import StandardScaler
# Used to perform CV
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import make_scorer, r2_score
from sklearn.model_selection import cross_val_score

# Gives a list of timestamps from the start date to the end date
#
# startDate: The start date as a string xxxx-xx-xx
# endDate: The end date as a string year-month-day
# period: 'minute', 'daily', 'weekly', or 'monthly'
# weekends: True if weekends should be included; false otherwise
# return: A numpy array of timestamps
def DateRange(startDate, endDate, period, weekends=True):
# The start and end date
sd = datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S')
ed = datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S')
# Invalid start and end dates
if (sd > ed):
raise ValueError("The start date cannot be later than the end date.")
# One time period is a day
if (period == 'minute'):
prd = timedelta(minutes=1)
if (period == 'daily'):
prd = timedelta(1)
# One prediction per week
if (period == 'weekly'):
prd = timedelta(7)
# one prediction every 30 days ("month")
if (period == 'monthly'):
prd = timedelta(30)
# The final list of timestamp data
dates = []
cd = sd
while (cd <= ed):
# If weekdays are included or it's a weekday append the current ts
if (weekends or (cd.date().weekday() != 5 and cd.date().weekday() != 6)):
dates.append(cd.timestamp())
# Onto the next period
cd = cd + prd
# print(np.array(dates))
return np.array(dates)


# Given a date, returns the previous day
#
# startDate: The start date as a datetime object
# weekends: True if weekends should counted; false otherwise
def DatePrevDay(startDate, weekends=True):
# One day
day = timedelta(minutes=1)
cd = datetime.fromtimestamp(startDate)
while (True):
cd = cd - day
if (weekends or (cd.date().weekday() != 5 and cd.date().weekday() != 6)):
return cd.timestamp()
# Should never happen
return None


# Load data from the CSV file. Note: Some systems are unable
# to give timestamps for dates before 1970. This function may
# fail on such systems.
#
# path: The path to the file
# return: A data frame with the parsed timestamps
def ParseData(path):
# Read the csv file into a dataframe
df = pd.read_csv(path)
# Get the date strings from the date column
dateStr = df['Date'].values
D = np.zeros(dateStr.shape)
# Convert all date strings to a numeric value
for i, j in enumerate(dateStr):
# Date strings are of the form year-month-day
D[i] = datetime.strptime(j, '%Y-%m-%d %H:%M:%S%z').replace(tzinfo=None).timestamp()
# Add the newly parsed column to the dataframe
df['Timestamp'] = D
# Remove any unused columns (axis = 1 specifies fields are columns)
return df.drop('Date', axis=1)


# Given dataframe from ParseData
# plot it to the screen
#
# df: Dataframe returned from
# p: The position of the predicted data points
def PlotData(df, p=None):
if (p is None):
p = np.array([])
# Timestamp data
ts = df.Timestamp.values
# Number of x tick marks
nTicks = 10
# Left most x value
s = np.min(ts)
# Right most x value
e = np.max(ts)
# Total range of x values
r = e - s
# Add some buffer on both sides
s -= r / 5
e += r / 5
# These will be the tick locations on the x axis
tickMarks = np.arange(s, e, (e - s) / nTicks)
# Convert timestamps to strings
strTs = [datetime.fromtimestamp(i).strftime('%Y-%m-%d %H:%M:%S') for i in tickMarks]
mpl.figure()
# Plots of the high and low values for the day
mpl.plot(ts, df.High.values, color='#727272', linewidth=1.618, label='Actual')
# Predicted data was also provided
if (len(p) > 0):
mpl.plot(ts[p], df.High.values[p], color='#7294AA', linewidth=1.618, label='Predicted')
# Set the tick marks
mpl.xticks(tickMarks, strTs, rotation='vertical')
# Set y-axis label
mpl.ylabel('Stock High Value (USD)')
# Add the label in the upper left
mpl.legend(loc='upper left')
mpl.show()


# A class that predicts stock prices based on historical stock data
class StockPredictor:
# The (scaled) data frame
D = None
# Unscaled timestamp data
DTS = None
# The data matrix
A = None
# Target value matrix
y = None
# Corresponding columns for target values
targCols = None
# Number of previous days of data to use
npd = 1
# The regressor model
R = None
# Object to scale input data
S = None

# Constructor
# nPrevDays: The number of past days to include
# in a sample.
# rmodel: The regressor model to use (sklearn)
# nPastDays: The number of past days in each feature
# scaler: The scaler object used to scale the data (sklearn)
def __init__(self, rmodel, nPastDays=1, scaler=StandardScaler()):
self.npd = nPastDays
self.R = rmodel
self.S = scaler

# Extracts features from stock market data
#
# D: A dataframe from ParseData
# ret: The data matrix of samples
def _ExtractFeat(self, D):
# One row per day of stock data
m = D.shape[0]
# Open, High, Low, and Close for past n days + timestamp and volume
n = self._GetNumFeatures()
B = np.zeros([m, n])
# Preserve order of spreadsheet
for i in range(m - 1, -1, -1):
self._GetSample(B[i], i, D)
# Return the internal numpy array
return B

# Extracts the target values from stock market data
#
# D: A dataframe from ParseData
# ret: The data matrix of targets and the

def _ExtractTarg(self, D):
# Timestamp column is not predicted
tmp = D.drop('Timestamp', axis=1)
# Return the internal numpy array
return tmp.values, tmp.columns

# Get the number of features in the data matrix
#
# n: The number of previous days to include
# self.npd is used if n is None
# ret: The number of features in the data matrix
def _GetNumFeatures(self, n=None):
if (n is None):
n = self.npd
return n * 7 + 1

# Get the sample for a specific row in the dataframe.
# A sample consists of the current timestamp and the data from
# the past n rows of the dataframe
#
# r: The array to fill with data
# i: The index of the row for which to build a sample
# df: The dataframe to use
# return; r
def _GetSample(self, r, i, df):
# First value is the timestamp
r[0] = df['Timestamp'].values[i]
# The number of columns in df
n = df.shape[1]
# The last valid index
lim = df.shape[0]
# Each sample contains the past n days of stock data; for non-existing data
# repeat last available sample
# Format of row:
# Timestamp Volume Open[i] High[i] ... Open[i-1] High[i-1]... etc
for j in range(0, self.npd):
# Subsequent rows contain older data in the spreadsheet
ind = i + j + 1
# If there is no older data, duplicate the oldest available values
if (ind >= lim):
ind = lim - 1
# Add all columns from row[ind]
for k, c in enumerate(df.columns):
# + 1 is needed as timestamp is at index 0
r[k + 1 + n * j] = df[c].values[ind]
return r

# Attempts to learn the stock market data
# given a dataframe taken from ParseData
#
# D: A dataframe from ParseData
def Learn(self, D):
# Keep track of the currently learned data
self.D = D.copy()
# Keep track of old timestamps for indexing
self.DTS = np.copy(D.Timestamp.values)
# Scale the data
self.D[self.D.columns] = self.S.fit_transform(self.D)
# Get features from the data frame
self.A = self._ExtractFeat(self.D)
# Get the target values and their corresponding column names
self.y, self.targCols = self._ExtractTarg(self.D)
# Create the regressor model and fit it
self.R.fit(self.A, self.y)

# Predicts values for each row of the dataframe. Can be used to
# estimate performance of the model
#
# df: The dataframe for which to make prediction
# return: A dataframe containing the predictions
def PredictDF(self, df):
# Make a local copy to prevent modifying df
D = df.copy()
# Scale the input data like the training data
D[D.columns] = self.S.transform()
# Get features
A = self._ExtractFeat(D)
# Construct a dataframe to contain the predictions
# Column order was saved earlier
P = pd.DataFrame(index=range(A.shape[0]), columns=self.targCols)
# Perform prediction
P[P.columns] = self.R.predict(A)
# Add the timestamp (already scaled from above)
P['Timestamp'] = D['Timestamp'].values
# Scale the data back to original range
P[P.columns] = self.S.inverse_transform(P)
return P

# Predict the stock price during a specified time
#
# startDate: The start date as a string in yyyy-mm-dd format
# endDate: The end date as a string yyyy-mm-dd format
# period: 'daily', 'weekly', or 'monthly' for the time period
# between predictions
# return: A dataframe containing the predictions or
def PredictDate(self, startDate, endDate, period='minute'):
# Create the range of timestamps and reverse them
ts = DateRange(startDate, endDate, period)[::-1]
m = ts.shape[0]
# Prediction is based on data prior to start date
# Get timestamp of previous day
prevts = DatePrevDay(ts[-1])
# Test if there is enough data to continue
try:
ind = np.where(self.DTS == prevts)[0][0]
except IndexError:
return None
# There is enough data to perform prediction; allocate new data frame
P = pd.DataFrame(np.zeros([m, self.D.shape[1]]), index=range(m), columns=self.D.columns)
# Add in the timestamp column so that it can be scaled properly
P['Timestamp'] = ts
# Scale the timestamp (other fields are 0)
P[P.columns] = self.S.transform(P)
# B is to be the data matrix of features
B = np.zeros([1, self._GetNumFeatures()])
# Add extra last entries for past existing data
for i in range(self.npd):
# If the current index does not exist, repeat the last valid data
curInd = ind + i
if (curInd >= self.D.shape[0]):
curInd = curInd - 1
# Copy over the past data (already scaled)
P.loc[m + i] = self.D.loc[curInd]
# Loop until end date is reached
for i in range(m - 1, -1, -1):
# Create one sample
self._GetSample(B[0], i, P)
# Predict the row of the dataframe and save it
pred = self.R.predict(B).ravel()
# Fill in the remaining fields into the respective columns
for j, k in zip(self.targCols, pred):
P.at[i, j] = k
# Discard extra rows needed for prediction
P = P[0:m]
# Scale the dataframe back to the original range
P[P.columns] = self.S.inverse_transform(P)
# print(P)
return P

# Test the predictors performance and
# displays results to the screen
#
# D: The dataframe for which to make prediction
def TestPerformance(self, df=None):
# If no dataframe is provided, use the currently learned one
if (df is None):
D = self.D
else:
D = self.S.transform(df.copy())
# Get features from the data frame
A = self._ExtractFeat(D)
# Get the target values and their corresponding column names
y, _ = self._ExtractTarg(D)
# Begin cross validation
ss = ShuffleSplit(n_splits=1)
for trn, tst in ss.split(A):
s1 = cross_val_score(self.R, A, y, cv=3, scoring=make_scorer(r2_score))
s2 = cross_val_score(self.R, A[tst], y[tst], cv=3, scoring=make_scorer(r2_score))
s3 = cross_val_score(self.R, A[trn], y[trn], cv=3, scoring=make_scorer(r2_score))
print('C-V:\t' + str(s1) + '\nTst:\t' + str(s2) + '\nTrn:\t' + str(s3))
Loading

0 comments on commit 31a2074

Please sign in to comment.