diff --git a/Kernel.py b/Kernel.py index 2dd3b6c24..2ffa4d9f2 100644 --- a/Kernel.py +++ b/Kernel.py @@ -4,15 +4,22 @@ import datetime, os, queue, sys from message.Message import Message, MessageType -from util.util import print +from util.util import print, log_print class Kernel: - def __init__(self, kernel_name): + def __init__(self, kernel_name, random_state = None): # kernel_name is for human readers only. self.name = kernel_name - print ("Kernel initialized: {}".format(self.name)) + self.random_state = random_state + if not random_state: + raise ValueError("A valid, seeded np.random.RandomState object is required " + + "for the Kernel", self.name) + sys.exit() + + # A single message queue to keep everything organized by increasing + # delivery timestamp. self.messages = queue.PriorityQueue() # currentTime is None until after kernelStarting() event completes @@ -20,14 +27,26 @@ def __init__(self, kernel_name): self.currentTime = None # Timestamp at which the Kernel was created. Primarily used to - # create a unique log directory for this run. + # create a unique log directory for this run. Also used to + # print some elapsed time and messages per second statistics. self.kernelWallClockStart = pd.Timestamp('now') # TODO: This is financial, and so probably should not be here... self.meanResultByAgentType = {} self.agentCountByType = {} + # The Kernel maintains a summary log to which agents can write + # information that should be centralized for very fast access + # by separate statistical summary programs. Detailed event + # logging should go only to the agent's individual log. This + # is for things like "final position value" and such. + self.summaryLog = [] + + log_print ("Kernel initialized: {}", self.name) + + # This is called to actually start the simulation, once all agent + # configuration is done. def runner(self, agents = [], startTime = None, stopTime = None, num_simulations = 1, defaultComputationDelay = 1, defaultLatency = 1, agentLatency = None, latencyNoise = [ 1.0 ], @@ -36,11 +55,19 @@ def runner(self, agents = [], startTime = None, stopTime = None, # agents must be a list of agents for the simulation, # based on class agent.Agent self.agents = agents + + # The kernel start and stop time (first and last timestamp in + # the simulation, separate from anything like exchange open/close). self.startTime = startTime self.stopTime = stopTime + + # The global seed, NOT used for anything agent-related. self.seed = seed + + # The data oracle for this simulation, if needed. self.oracle = oracle + # If a log directory was not specified, use the initial wallclock. if log_dir: self.log_dir = log_dir else: @@ -77,6 +104,9 @@ def runner(self, agents = [], startTime = None, stopTime = None, # distribution with the peak at zero. By default there is no noise # (100% chance to add zero ns extra delay). Format is a list with # list index = ns extra delay, value = probability of this delay. + # TODO: This should probably become more sophisticated. If not + # continuous, then at least a dictionary with key=delay and + # value=probability in case we want a more sparse delay function. self.latencyNoise = latencyNoise # The kernel maintains an accumulating additional delay parameter @@ -87,11 +117,14 @@ def runner(self, agents = [], startTime = None, stopTime = None, # staggering of sent messages. self.currentAgentAdditionalDelay = 0 - print ("Kernel started: {}".format(self.name)) - print ("Simulation started!") + log_print ("Kernel started: {}", self.name) + log_print ("Simulation started!") + # Note that num_simulations has not yet been really used or tested + # for anything. Instead we have been running multiple simulations + # with coarse parallelization from a shell script. for sim in range(num_simulations): - print ("Starting sim {}".format(sim)) + log_print ("Starting sim {}", sim) # Event notification for kernel init (agents should not try to # communicate with other agents, as order is unknown). Agents @@ -100,7 +133,7 @@ def runner(self, agents = [], startTime = None, stopTime = None, # Kernel passes self-reference for agents to retain, so they can # communicate with the kernel in the future (as it does not have # an agentID). - print ("\n--- Agent.kernelInitializing() ---") + log_print ("\n--- Agent.kernelInitializing() ---") for agent in self.agents: agent.kernelInitializing(self) @@ -108,27 +141,37 @@ def runner(self, agents = [], startTime = None, stopTime = None, # communications or references to other agents, as all agents # are guaranteed to exist now). Agents should obtain references # to other agents they require for proper operation (exchanges, - # brokers, subscription services...). - print ("\n--- Agent.kernelStarting() ---") + # brokers, subscription services...). Note that we generally + # don't (and shouldn't) permit agents to get direct references + # to other agents (like the exchange) as they could then bypass + # the Kernel, and therefore simulation "physics" to send messages + # directly and instantly or to perform disallowed direct inspection + # of the other agent's state. Agents should instead obtain the + # agent ID of other agents, and communicate with them only via + # the Kernel. Direct references to utility objects that are not + # agents are acceptable (e.g. oracles). + log_print ("\n--- Agent.kernelStarting() ---") for agent in self.agents: agent.kernelStarting(self.startTime) # Set the kernel to its startTime. self.currentTime = self.startTime - print ("\n--- Kernel Clock started ---") - print ("Kernel.currentTime is now {}".format(self.currentTime)) + log_print ("\n--- Kernel Clock started ---") + log_print ("Kernel.currentTime is now {}", self.currentTime) # Start processing the Event Queue. - print ("\n--- Kernel Event Queue begins ---") - print ("Kernel will start processing messages. ", - "Queue length: {}".format(len(self.messages.queue))) + log_print ("\n--- Kernel Event Queue begins ---") + log_print ("Kernel will start processing messages. Queue length: {}", len(self.messages.queue)) # Track starting wall clock time and total message count for stats at the end. eventQueueWallClockStart = pd.Timestamp('now') ttl_messages = 0 - # Process messages. + # Process messages until there aren't any (at which point there never can + # be again, because agents only "wake" in response to messages), or until + # the kernel stop time is reached. while not self.messages.empty() and self.currentTime and (self.currentTime <= self.stopTime): + # Get the next message in timestamp order (delivery time) and extract it. self.currentTime, event = self.messages.get() msg_recipient, msg_type, msg = event @@ -138,9 +181,9 @@ def runner(self, agents = [], startTime = None, stopTime = None, self.fmtTime(self.currentTime), ttl_messages, pd.Timestamp('now') - eventQueueWallClockStart), override=True) - print ("\n--- Kernel Event Queue pop ---") - print ("Kernel handling {} message for agent {} at time {}".format( - msg_type, msg_recipient, self.fmtTime(self.currentTime))) + log_print ("\n--- Kernel Event Queue pop ---") + log_print ("Kernel handling {} message for agent {} at time {}", + msg_type, msg_recipient, self.fmtTime(self.currentTime)) ttl_messages += 1 @@ -159,8 +202,8 @@ def runner(self, agents = [], startTime = None, stopTime = None, # Push the wakeup call back into the PQ with a new time. self.messages.put((self.agentCurrentTimes[agent], (msg_recipient, msg_type, msg))) - print ("Agent in future: wakeup requeued for {}".format( - self.fmtTime(self.agentCurrentTimes[agent]))) + log_print ("Agent in future: wakeup requeued for {}", + self.fmtTime(self.agentCurrentTimes[agent])) continue # Set agent's current time to global current time for start @@ -174,8 +217,8 @@ def runner(self, agents = [], startTime = None, stopTime = None, self.agentCurrentTimes[agent] += pd.Timedelta(self.agentComputationDelays[agent] + self.currentAgentAdditionalDelay) - print ("After wakeup return, agent {} delayed from {} to {}".format( - agent, self.fmtTime(self.currentTime), self.fmtTime(self.agentCurrentTimes[agent]))) + log_print ("After wakeup return, agent {} delayed from {} to {}", + agent, self.fmtTime(self.currentTime), self.fmtTime(self.agentCurrentTimes[agent])) elif msg_type == MessageType.MESSAGE: @@ -188,9 +231,8 @@ def runner(self, agents = [], startTime = None, stopTime = None, # Push the message back into the PQ with a new time. self.messages.put((self.agentCurrentTimes[agent], (msg_recipient, msg_type, msg))) - print ("Agent in future: message requeued for {}".format( - self.fmtTime(self.agentCurrentTimes[agent]))) - #print ("TMP: delayed message was: {}".format(msg)) + log_print ("Agent in future: message requeued for {}", + self.fmtTime(self.agentCurrentTimes[agent])) continue # Set agent's current time to global current time for start @@ -204,8 +246,8 @@ def runner(self, agents = [], startTime = None, stopTime = None, self.agentCurrentTimes[agent] += pd.Timedelta(self.agentComputationDelays[agent] + self.currentAgentAdditionalDelay) - print ("After receiveMessage return, agent {} delayed from {} to {}".format( - agent, self.fmtTime(self.currentTime), self.fmtTime(self.agentCurrentTimes[agent]))) + log_print ("After receiveMessage return, agent {} delayed from {} to {}", + agent, self.fmtTime(self.currentTime), self.fmtTime(self.agentCurrentTimes[agent])) else: raise ValueError("Unknown message type found in queue", @@ -213,10 +255,10 @@ def runner(self, agents = [], startTime = None, stopTime = None, "messageType:", self.msg.type) if self.messages.empty(): - print ("\n--- Kernel Event Queue empty ---") + log_print ("\n--- Kernel Event Queue empty ---") if self.currentTime and (self.currentTime > self.stopTime): - print ("\n--- Kernel Stop Time surpassed ---") + log_print ("\n--- Kernel Stop Time surpassed ---") # Record wall clock stop time and elapsed time for stats at the end. eventQueueWallClockStop = pd.Timestamp('now') @@ -227,7 +269,7 @@ def runner(self, agents = [], startTime = None, stopTime = None, # other agents, as all agents are still guaranteed to exist). # Agents should not destroy resources they may need to respond # to final communications from other agents. - print ("\n--- Agent.kernelStopping() ---") + log_print ("\n--- Agent.kernelStopping() ---") for agent in agents: agent.kernelStopping() @@ -235,7 +277,7 @@ def runner(self, agents = [], startTime = None, stopTime = None, # attempt communication with other agents, as order of termination # is unknown). Agents should clean up all used resources as the # simulation program may not actually terminate if num_simulations > 1. - print ("\n--- Agent.kernelTerminating() ---") + log_print ("\n--- Agent.kernelTerminating() ---") for agent in agents: agent.kernelTerminating() @@ -243,8 +285,15 @@ def runner(self, agents = [], startTime = None, stopTime = None, eventQueueWallClockElapsed, ttl_messages, ttl_messages / (eventQueueWallClockElapsed / (np.timedelta64(1, 's')))), override=True) - print ("Ending sim {}".format(sim)) + log_print ("Ending sim {}", sim) + + # Agents will request the Kernel to serialize their agent logs, usually + # during kernelTerminating, but the Kernel must write out the summary + # log itself. + self.writeSummaryLog() + # This should perhaps be elsewhere, as it is explicitly financial, but it + # is convenient to have a quick summary of the results for now. print ("Mean ending value by agent type:", override=True) for a in self.meanResultByAgentType: value = self.meanResultByAgentType[a] @@ -261,8 +310,8 @@ def sendMessage(self, sender = None, recipient = None, msg = None, delay = 0): # and/or network latency. The message must derive from the message.Message class. # The optional delay parameter represents an agent's request for ADDITIONAL # delay (beyond the Kernel's mandatory computation + latency delays) to represent - # parallel pipeline processing delays (that do not make the agent "busy" and - # unable to respond to new messages). + # parallel pipeline processing delays (that should delay the transmission of messages + # but do not make the agent "busy" and unable to respond to new messages). if sender is None: raise ValueError("sendMessage() called without valid sender ID", @@ -297,14 +346,15 @@ def sendMessage(self, sender = None, recipient = None, msg = None, delay = 0): # Apply communication delay per the agentLatency matrix [sender][recipient]. latency = self.agentLatency[sender][recipient] - noise = np.random.choice(len(self.latencyNoise), 1, self.latencyNoise)[0] + noise = self.random_state.choice(len(self.latencyNoise), 1, self.latencyNoise)[0] deliverAt = sentTime + pd.Timedelta(latency + noise) + # Finally drop the message in the queue with priority == delivery time. self.messages.put((deliverAt, (recipient, MessageType.MESSAGE, msg))) - print ("Kernel applied latency {}, noise {}, accumulated delay {}, one-time delay {} on sendMessage from: {} to {}, scheduled for {}".format( - latency, noise, self.currentAgentAdditionalDelay, delay, self.agents[sender].name, self.agents[recipient].name, self.fmtTime(deliverAt))) - print ("Message queued: {}".format(msg)) + log_print ("Kernel applied latency {}, noise {}, accumulated delay {}, one-time delay {} on sendMessage from: {} to {}, scheduled for {}", + latency, noise, self.currentAgentAdditionalDelay, delay, self.agents[sender].name, self.agents[recipient].name, self.fmtTime(deliverAt)) + log_print ("Message queued: {}", msg) def setWakeup(self, sender = None, requestedTime = None): @@ -327,8 +377,8 @@ def setWakeup(self, sender = None, requestedTime = None): "currentTime:", self.currentTime, "requestedTime:", requestedTime) - print ("Kernel adding wakeup for agent {} at time {}".format( - sender, self.fmtTime(requestedTime))) + log_print ("Kernel adding wakeup for agent {} at time {}", + sender, self.fmtTime(requestedTime)) self.messages.put((requestedTime, (sender, MessageType.WAKEUP, None))) @@ -348,7 +398,7 @@ def setAgentComputeDelay(self, sender = None, requestedDelay = None): # sent by the agent during the current wake cycle (simulating the # messages popping out at the end of its "thinking" time). - # Note that we DO permit a computation delay of zero, but this should + # Also note that we DO permit a computation delay of zero, but this should # really only be used for special or massively parallel agents. # requestedDelay should be in whole nanoseconds. @@ -425,6 +475,26 @@ def writeLog (self, sender, dfLog, filename=None): dfLog.to_pickle(os.path.join(path, file), compression='bz2') + + def appendSummaryLog (self, sender, eventType, event): + # We don't even include a timestamp, because this log is for one-time-only + # summary reporting, like starting cash, or ending cash. + self.summaryLog.append({ 'AgentID' : sender, + 'AgentStrategy' : self.agents[sender].type, + 'EventType' : eventType, 'Event' : event }) + + + def writeSummaryLog (self): + path = os.path.join(".", "log", self.log_dir) + file = "summary_log.bz2" + + if not os.path.exists(path): + os.makedirs(path) + + dfLog = pd.DataFrame(self.summaryLog) + + dfLog.to_pickle(os.path.join(path, file), compression='bz2') + @staticmethod def fmtTime(simulationTime): diff --git a/agent/Agent.py b/agent/Agent.py index 53336ddce..cf55fa6c2 100644 --- a/agent/Agent.py +++ b/agent/Agent.py @@ -1,14 +1,27 @@ import pandas as pd from copy import deepcopy -from util.util import print +from util.util import print, log_print class Agent: - def __init__ (self, id, name): + def __init__ (self, id, name, type, random_state): + # ID must be a unique number (usually autoincremented). + # Name is for human consumption, should be unique (often type + number). + # Type is for machine aggregation of results, should be same for all + # agents following the same strategy (incl. parameter settings). + # Every agent is given a random state to use for any stochastic needs. + # This is an np.random.RandomState object, already seeded. self.id = id self.name = name + self.type = type + self.random_state = random_state + + if not random_state: + raise ValueError("A valid, seeded np.random.RandomState object is required " + + "for every agent.Agent", self.name) + sys.exit() # Kernel is supplied via kernelInitializing method of kernel lifecycle. self.kernel = None @@ -30,6 +43,7 @@ def __init__ (self, id, name): # It might, or might not, make sense to formalize these log Events # as a class, with enumerated EventTypes and so forth. self.log = [] + self.logEvent("AGENT_TYPE", type) ### Flow of required kernel listening methods: @@ -44,7 +58,7 @@ def kernelInitializing (self, kernel): self.kernel = kernel - print ("{} exists!".format(self.name)) + log_print ("{} exists!", self.name) def kernelStarting (self, startTime): @@ -56,8 +70,8 @@ def kernelStarting (self, startTime): # Base Agent schedules a wakeup call for the first available timestamp. # Subclass agents may override this behavior as needed. - print ("Agent {} ({}) requesting kernel wakeup at time {}".format( - self.id, self.name, self.kernel.fmtTime(startTime))) + log_print ("Agent {} ({}) requesting kernel wakeup at time {}", + self.id, self.name, self.kernel.fmtTime(startTime)) self.setWakeup(startTime) @@ -83,12 +97,19 @@ def kernelTerminating (self): ### Methods for internal use by agents (e.g. bookkeeping). - def logEvent (self, eventType, event = ''): + def logEvent (self, eventType, event = '', appendSummaryLog = False): # Adds an event to this agent's log. The deepcopy of the Event field, # often an object, ensures later state changes to the object will not # retroactively update the logged event. + + # We can make a single copy of the object (in case it is an arbitrary + # class instance) for both potential log targets, because we don't + # alter logs once recorded. + e = deepcopy(event) self.log.append({ 'EventTime' : self.currentTime, 'EventType' : eventType, - 'Event' : deepcopy(event) }) + 'Event' : e }) + + if appendSummaryLog: self.kernel.appendSummaryLog(self.id, eventType, e) ### Methods required for communication from other agents. @@ -104,8 +125,8 @@ def receiveMessage (self, currentTime, msg): self.currentTime = currentTime - print ("At {}, agent {} ({}) received: {}".format( - self.kernel.fmtTime(currentTime), self.id, self.name, msg)) + log_print ("At {}, agent {} ({}) received: {}", + self.kernel.fmtTime(currentTime), self.id, self.name, msg) def wakeup (self, currentTime): @@ -115,8 +136,8 @@ def wakeup (self, currentTime): self.currentTime = currentTime - print ("At {}, agent {} ({}) received wakeup.".format( - self.kernel.fmtTime(currentTime), self.id, self.name)) + log_print ("At {}, agent {} ({}) received wakeup.", + self.kernel.fmtTime(currentTime), self.id, self.name) ### Methods used to request services from the Kernel. These should be used diff --git a/agent/BackgroundAgent.py b/agent/BackgroundAgent.py deleted file mode 100644 index dae102af2..000000000 --- a/agent/BackgroundAgent.py +++ /dev/null @@ -1,379 +0,0 @@ -from agent.TradingAgent import TradingAgent -from message.Message import Message -from util.util import print - -import numpy as np -import pandas as pd -import sys - -class BackgroundAgent(TradingAgent): - - def __init__(self, id, name, symbol, startingCash, sigma_n, arb_last_trade, freq, trade_vol, offset_unit): - # Base class init. - super().__init__(id, name, startingCash) - - self.sigma_n = sigma_n - - self.symbol = symbol - self.trading = False - - self.LOW_CUSHION = 0.0015 - self.HIGH_CUSHION = 0.0025 - - self.TRADE_THRESHOLD = 5 - #self.LIMIT_STD_CENTS = 0.001 - self.LIMIT_STD_CENTS = 0.01 - - # Used by this agent to control how long to safely wait - # for orders to have reached the exchange before proceeding (ns). - self.message_delay = 1000000000 - - # The agent begins in its "complete" state, not waiting for - # any special event or condition. - self.state = 'AWAITING_WAKEUP' - - # To provide some consistency, the agent maintains knowledge of its prior value belief. - self.value_belief = None - #self.learning_rate = 0.001 - #self.learning_rate = 0.50 - self.learning_rate = 1.0 - - # This (for now) constant controls whether the agent arbs to the last trade or to the - # bid-ask midpoint. - self.ARB_LAST_TRADE = arb_last_trade - - # This controls the wakeup frequency of the trader. - self.freq = freq - - # This should be the average trade volume of this trader. - self.trade_vol = trade_vol - - # The unit of measurement for the -100 to +100 offset of wakeup time. - self.offset_unit = offset_unit - - - def kernelStarting(self, startTime): - # self.kernel is set in Agent.kernelInitializing() - # self.exchangeID is set in TradingAgent.kernelStarting() - - super().kernelStarting(startTime) - - self.oracle = self.kernel.oracle - - - def wakeup (self, currentTime): - # Parent class handles discovery of exchange times and market_open wakeup call. - super().wakeup(currentTime) - - if not self.mkt_open or not self.mkt_close: - # TradingAgent handles discovery of exchange times. - return - else: - if not self.trading: - self.trading = True - - # Time to start trading! - print ("{} is ready to start trading now.".format(self.name)) - - - # Steady state wakeup behavior starts here. - - # First, see if we have received a MKT_CLOSED message for the day. If so, - # there's nothing to do except clean-up. In the future, we could also do - # some activity that is not order based (because the exchange and simulation - # will still be running, just not accepting orders) like final price quotes - # or final trade information. - if self.mkt_closed and (self.symbol in self.daily_close_price): - # Market is closed and we already got the daily close price. - return - - - # Next, schedule a wakeup for about five minutes, plus or minus ten seconds. - # We do this early in case some of our expected message responses don't arrive. - - offset = np.random.randint(-100,100) - self.setWakeup(currentTime + (pd.Timedelta(self.freq) + pd.Timedelta('{}{}'.format(offset, self.offset_unit)))) - - # If the market is closed and we haven't obtained the daily close price yet, - # do that before we cease activity for the day. Don't do any other behavior - # after market close. - if self.mkt_closed and (not self.symbol in self.daily_close_price): - self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' - return - - - # The agent's behavior has changed to cancel orders, wait for confirmation, - # exit all positions, wait for confirmation, then enter new positions. - # It does yield (return) in between these, so it can react to events that - # occur in between. This adds a few messages, but greatly improves the logical - # flow of the simulation and solves several important problems. - - # On a true "wakeup", the agent is at one of its scheduled intervals. - # We should first check for open orders we would like to cancel. - # There should be no harm in issuing all the cancel orders simultaneously. - - if self.cancelOrders(): - self.state = 'AWAITING_CANCEL_CONFIRMATION' - return - - # If we needed to cancel orders, the logic below will not execute, because - # our ORDER_CANCELLED messages will come through receiveMessage(). If we - # did not need to, we may as well carry on to exiting our positions. - - if self.exitPositions(): - self.state = 'AWAITING_EXIT_CONFIRMATION' - return - - # The below logic is only reached if we neither needed to cancel orders - # nor exit positions, in which case we may as well find out the most recent - # trade prices and get ready to place new orders. - - self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' - - - def receiveMessage (self, currentTime, msg): - # Parent class schedules market open wakeup call once market open/close times are known. - super().receiveMessage(currentTime, msg) - - # We have been awakened by something other than our scheduled wakeup. - # If our internal state indicates we were waiting for a particular event, - # check if we can transition to a new state. - - if self.state == 'AWAITING_CANCEL_CONFIRMATION': - # We were waiting for all open orders to be cancelled. See if that has happened. - if not self.orders: - # Ready to exit positions. - if self.exitPositions(): - self.state = 'AWAITING_EXIT_CONFIRMATION' - return - - # If we did not need to exit positions, go ahead and query the most recent trade. - self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' - - elif self.state == 'AWAITING_EXIT_CONFIRMATION': - # We were waiting for all open positions to be exited. See if that has happened. - if not self.havePositions(): - # Query the most recent trade and prepare to proceed. - self.getCurrentSpread(self.symbol) - self.state = 'AWAITING_SPREAD' - - elif self.state == 'AWAITING_SPREAD': - # We were waiting to learn the most recent trade price of our symbol, so we would - # know what direction of order we'd like to place. - - # Right now we can't tell from internal state whether the last_trade has been - # updated recently, so we rely on actually seeing a last_trade response message. - if msg.body['msg'] == 'QUERY_SPREAD': - # This is what we were waiting for. - - # But if the market is now closed, don't advance to placing orders. - if self.mkt_closed: return - - # Now we can obtain a new price belief and place our next set of orders. - self.placeOrders() - self.state = 'AWAITING_WAKEUP' - - - # Internal state and logic specific to this Background (Oracle) Agent. - - # Cancel all open orders. - # Return value: did we issue any cancellation requests? - def cancelOrders (self): - if not self.orders: return False - - for id, order in self.orders.items(): - self.cancelOrder(order) - - return True - - - # Exit all open positions. - # Return value: did we issue any orders to exit positions? - def exitPositions (self): - if not self.havePositions(): return False - - for sym, qty in self.holdings.items(): - if sym == 'CASH': continue - - # Place an exit order for this position. Instead of (pseudo-) market - # orders, we now place a limit likely to immediately execute. - #last_trade = self.last_trade[self.symbol] - - #offset = int(round(np.random.uniform(low=self.LOW_CUSHION, high=self.HIGH_CUSHION) * last_trade)) - bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) - - if not bid or not ask: - # No spread yet. Use last trade (or open price) for bid and ask. - arb_target = self.last_trade[self.symbol] - bid = arb_target - ask = arb_target - - # Don't force exit with market orders. Just sell to the bid or buy from the ask. - # This should keep the order book from collapsing. - if qty > 0: self.placeLimitOrder(sym, qty, False, bid) - elif qty < 0: self.placeLimitOrder(sym, -qty, True, ask) - else: del self.holdings[sym] - - return True - - - # Do we have non-CASH positions? - def havePositions (self): - return len(self.holdings) > 1 or \ - (len(self.holdings) == 1 and 'CASH' not in self.holdings) - - - # Request the last trade price for our symbol. - def getLastTrade (self): - super().getLastTrade(self.symbol) - - - # Obtain new beliefs and place new orders for position entry. - def placeOrders (self): - - # The background agents use the DataOracle to obtain noisy observations of the - # actual historical intraday price on a particular date. They use this to - # produce a realistic "background" market of agents who trade based on a belief - # that follows history (i.e. beliefs do not change based on other agent trading - # activity) but whose behavior does react to market conditions -- because they - # will try to arbitrage between their beliefs and the current market state. - - # Get current value belief for relevant stock (observation is noisy). Beliefs - # can change even when (unknown) real historical stock price has not changed. - # sigma_n is the variance of gaussian observation noise as a proportion of the - # current stock price. (e.g. if stock trades at 100, sigma_n=0.01 will - # select from a normal(mean=100,std=1) distribution. - value_observation = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n=self.sigma_n) - - # TESTING: value_belief is only allowed to change at a certain rate from the prior - # belief, to keep some kind of consistency and make "beliefs" mean something. - - if self.value_belief: self.logEvent("OLD_BELIEF", self.value_belief) - self.logEvent("BELIEF_OBSERVATION", value_observation) - - # If there was a prior belief, update it. - if self.value_belief: - delta = value_observation - self.value_belief - print ("observation {}, old belief {}, delta {}".format(value_observation, self.value_belief, delta)) - self.value_belief = int(round(self.value_belief + (delta * self.learning_rate))) - else: - # Otherwise use the observation as the belief. - self.value_belief = value_observation - - print ("New belief {}".format(self.value_belief)) - self.logEvent("NEW_BELIEF", self.value_belief) - - if self.ARB_LAST_TRADE: - arb_target = self.last_trade[self.symbol] - else: - bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) - - if bid and ask: - arb_target = int(round((bid + ask) / 2)) - else: - # No spread yet. Use last trade (or open price) for arb_target, bid, and ask. - arb_target = self.last_trade[self.symbol] - bid = arb_target - ask = arb_target - - print ("{} believes {} is worth {} at {}, arb target: {}.".format(self.name, self.symbol, - self.dollarize(self.value_belief), self.kernel.fmtTime(self.currentTime), - self.dollarize(arb_target))) - - # The agents now have their desired behavior. Instead of placing bracketing limit orders, they - # arbitrage between the last trade price and their value belief. This means one-sided orders. - # Note that value_belief, like all prices, is in integer CENTS. - # - # The agent places limit orders designed to immediately execute. They will pay slightly more (buy) - # than the last trade price, or accept slightly less than the last current trade price (sell). - # Offset must be adjusted to round cents. - offset = int(round(np.random.uniform(low=self.LOW_CUSHION, high=self.HIGH_CUSHION) * arb_target)) - #shares = np.random.randint(100,400) - if self.trade_vol < 200: - print ("ERROR: BackgroundAgents don't work right with less than 200 average trade volume (shares)", override=True) - sys.exit() - else: - shares = np.random.randint(200, int(round(self.trade_vol * 2)) - 200) - - # Pick an exit offset (to take profit) if the trade goes in the agent's favor by a percentage of the - # current price. - exit_offset = int(round(0.01 * arb_target)) - - # If the last traded price is too close to the value belief, don't trade. - if abs(arb_target - self.value_belief) < self.TRADE_THRESHOLD: - # No trade. - pass - elif self.value_belief > arb_target: - # The agent believes the price should be higher. Go long. - - # Place 1/2 the shares for immediate execution. This will be at least 100. - #mkt_shares = int(round(shares/2)) - mkt_shares = 100 - - # Use base limit. - #base_limit = self.value_belief - self.TRADE_THRESHOLD - #base_limit = int(round(((self.value_belief - arb_target) / 2) + arb_target)) - #self.placeLimitOrder(self.symbol, mkt_shares, True, ask) - base_limit = ask - self.placeLimitOrder(self.symbol, mkt_shares, True, base_limit) - - rem_shares = shares - mkt_shares - - while rem_shares > 0: - trade_shares = 100 if rem_shares >= 100 else rem_shares - rem_shares -= trade_shares - - # Each 100 share lot's limit price is drawn from a one-sided gaussian with the peak - # at the (possibly previous after our trade above) best ask. - #rand = np.random.normal(0, ask * self.LIMIT_STD_CENTS) - #TMP? Peak a little shy of the belief. - rand = np.random.normal(0, base_limit * self.LIMIT_STD_CENTS) - limit_price = int(round(base_limit - abs(rand))) - - self.placeLimitOrder(self.symbol, trade_shares, True, limit_price) - - # Also place a profit-taking exit, not designed to be immediately executed. - #self.placeLimitOrder(self.symbol, shares, False, arb_target + exit_offset) - - else: - # The agent believes the price should be lower. Go short. - - # Place 1/2 the shares for immediate execution. This will be at least 100. - #mkt_shares = int(round(shares/2)) - mkt_shares = 100 - - # Use base limit. - #base_limit = self.value_belief + self.TRADE_THRESHOLD - #base_limit = int(round(((arb_target - self.value_belief) / 2) + self.value_belief)) - #self.placeLimitOrder(self.symbol, mkt_shares, False, bid) - base_limit = bid - self.placeLimitOrder(self.symbol, mkt_shares, False, base_limit) - - rem_shares = shares - mkt_shares - - while rem_shares > 0: - trade_shares = 100 if rem_shares >= 100 else rem_shares - rem_shares -= trade_shares - - # Each 100 share lot's limit price is drawn from a one-sided gaussian with the peak - # at the (possibly previous after our trade above) best bid. - #rand = np.random.normal(0, bid * self.LIMIT_STD_CENTS) - #TMP? Peak a little shy of the belief. - rand = np.random.normal(0, base_limit * self.LIMIT_STD_CENTS) - limit_price = int(round(base_limit + abs(rand))) - - self.placeLimitOrder(self.symbol, trade_shares, False, limit_price) - - # Also place a profit-taking exit, not designed to be immediately executed. - #self.placeLimitOrder(self.symbol, shares, True, arb_target - exit_offset) - - - def getWakeFrequency (self): - return pd.Timedelta(np.random.randint(low = 0, high = pd.Timedelta(self.freq) / np.timedelta64(1, 'ns')), unit='ns') - - # Parent class defines: - #def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price): - diff --git a/agent/ExchangeAgent.py b/agent/ExchangeAgent.py index 8726f6e18..e51a12bf8 100644 --- a/agent/ExchangeAgent.py +++ b/agent/ExchangeAgent.py @@ -1,12 +1,17 @@ -# The ExchangeAgent expects an agent id, printable name, timestamp to open and close trading, -# and a list of equity symbols for which it should create order books. +# The ExchangeAgent expects a numeric agent id, printable name, agent type, timestamp to open and close trading, +# a list of equity symbols for which it should create order books, a frequency at which to archive snapshots +# of its order books, a pipeline delay (in ns) for order activity, the exchange computation delay (in ns), +# the levels of order stream history to maintain per symbol (maintains all orders that led to the last N trades), +# whether to log all order activity to the agent log, and a random state object (already seeded) to use +# for stochasticity. from agent.FinancialAgent import FinancialAgent from message.Message import Message from util.OrderBook import OrderBook -from util.util import print +from util.util import print, log_print import sys +import jsons as js import numpy as np import pandas as pd pd.set_option('display.max_rows', 500) @@ -15,10 +20,10 @@ class ExchangeAgent(FinancialAgent): - def __init__(self, id, name, mkt_open, mkt_close, symbols, book_freq='S', pipeline_delay = 40000, - computation_delay = 1, stream_history = 0): + def __init__(self, id, name, type, mkt_open, mkt_close, symbols, book_freq='S', pipeline_delay = 40000, + computation_delay = 1, stream_history = 0, log_orders = False, random_state = None): - super().__init__(id, name) + super().__init__(id, name, type, random_state) # Do not request repeated wakeup calls. self.reschedule = False @@ -27,14 +32,21 @@ def __init__(self, id, name, mkt_open, mkt_close, symbols, book_freq='S', pipeli self.mkt_open = mkt_open self.mkt_close = mkt_close - # Right now, only the exchange agent has a parallel processing pipeline delay. + # Right now, only the exchange agent has a parallel processing pipeline delay. This is an additional + # delay added only to order activity (placing orders, etc) and not simple inquiries (market operating + # hours, etc). self.pipeline_delay = pipeline_delay + + # Computation delay is applied on every wakeup call or message received. self.computation_delay = computation_delay # The exchange maintains an order stream of all orders leading to the last L trades # to support certain agents from the auction literature (GD, HBL, etc). self.stream_history = stream_history + # Log all order activity? + self.log_orders = log_orders + # Create an order book for each symbol. self.order_books = {} @@ -45,7 +57,7 @@ def __init__(self, id, name, mkt_open, mkt_close, symbols, book_freq='S', pipeli self.book_freq = book_freq - # The exchange agent overrides this to obtain a reference to a DataOracle. + # The exchange agent overrides this to obtain a reference to an oracle. # This is needed to establish a "last trade price" at open (i.e. an opening # price) in case agents query last trade before any simulated trades are made. # This can probably go away once we code the opening cross auction. @@ -57,7 +69,7 @@ def kernelInitializing (self, kernel): # Obtain opening prices (in integer cents). These are not noisy right now. for symbol in self.order_books: self.order_books[symbol].last_trade = self.oracle.getDailyOpenPrice(symbol, self.mkt_open) - print ("Opening price for {} is {}".format(symbol, self.order_books[symbol].last_trade)) + log_print ("Opening price for {} is {}", symbol, self.order_books[symbol].last_trade) # The exchange agent overrides this to additionally log the full depth of its @@ -68,14 +80,15 @@ def kernelTerminating (self): # Skip order book dump if requested. if self.book_freq is None: return + # Iterate over the order books controlled by this exchange. for symbol in self.order_books: book = self.order_books[symbol] # Log full depth quotes (price, volume) from this order book at some pre-determined frequency. + # Here we are looking at the actual log for this order book (i.e. are there snapshots to export, + # independent of the requested frequency). if book.book_log: - ### THE FAST WAY - # This must already be sorted by time because it was a list of order book snapshots and time # only increases in our simulation. BUT it can have duplicates if multiple orders happen # in the same nanosecond. (This particularly happens if using nanoseconds as the discrete @@ -83,79 +96,42 @@ def kernelTerminating (self): dfLog = pd.DataFrame(book.book_log) dfLog.set_index('QuoteTime', inplace=True) - if True: - dfLog = dfLog[~dfLog.index.duplicated(keep='last')] - dfLog.sort_index(inplace=True) - dfLog = dfLog.resample(self.book_freq).ffill() - dfLog.sort_index(inplace=True) + # With multiple quotes in a nanosecond, use the last one, then resample to the requested freq. + dfLog = dfLog[~dfLog.index.duplicated(keep='last')] + dfLog.sort_index(inplace=True) + dfLog = dfLog.resample(self.book_freq).ffill() + dfLog.sort_index(inplace=True) - time_idx = pd.date_range(self.mkt_open, self.mkt_close, freq=self.book_freq, closed='right') - dfLog = dfLog.reindex(time_idx, method='ffill') - dfLog.sort_index(inplace=True) - - dfLog = dfLog.stack() - dfLog.sort_index(inplace=True) + # Create a fully populated index at the desired frequency from market open to close. + # Then project the logged data into this complete index. + time_idx = pd.date_range(self.mkt_open, self.mkt_close, freq=self.book_freq, closed='right') + dfLog = dfLog.reindex(time_idx, method='ffill') + dfLog.sort_index(inplace=True) + + dfLog = dfLog.stack() + dfLog.sort_index(inplace=True) - quotes = sorted(dfLog.index.get_level_values(1).unique()) - min_quote = quotes[0] - max_quote = quotes[-1] - quotes = range(min_quote, max_quote+1) + # Get the full range of quotes at the finest possible resolution. + quotes = sorted(dfLog.index.get_level_values(1).unique()) + min_quote = quotes[0] + max_quote = quotes[-1] + quotes = range(min_quote, max_quote+1) - filledIndex = pd.MultiIndex.from_product([time_idx, quotes], names=['time','quote']) - dfLog = dfLog.reindex(filledIndex) - dfLog.fillna(0, inplace=True) + # Restructure the log to have multi-level rows of all possible pairs of time and quote + # with volume as the only column. + filledIndex = pd.MultiIndex.from_product([time_idx, quotes], names=['time','quote']) + dfLog = dfLog.reindex(filledIndex) + dfLog.fillna(0, inplace=True) - dfLog.rename('Volume') + dfLog.rename('Volume') - df = pd.DataFrame(index=dfLog.index) - df['Volume'] = dfLog - + df = pd.DataFrame(index=dfLog.index) + df['Volume'] = dfLog - ### THE SLOW WAY - if False: - - # Make a MultiIndex dataframe of (Seconds, QuotePrice) -> Volume, giving the quote prices and volumes - # at the end of each second the market was open. - seconds = pd.date_range(self.mkt_open, self.mkt_close, freq=self.book_freq, closed='right') - quotes = dfLog.columns - - df = pd.DataFrame(index=pd.MultiIndex.from_product([seconds, quotes], names=['time','quote'])) - df['Volume'] = 0 - - df.sort_index(inplace=True) - - logWriteStart = pd.Timestamp('now') - i = 0 - - for idx, row in df.iterrows(): - if i % 1000 == 0: - print ("Exchange writing order book log, interval {}, wallclock elapsed {}".format(idx[0], pd.Timestamp('now') - logWriteStart), override=True) - - best = dfLog.index.asof(idx[0]) - if pd.isnull(best): continue - df.loc[idx,'Volume'] = dfLog.loc[best,idx[1]] - - i += 1 - - print ("Exchange sorting order book index.", override=True) - df.sort_index(inplace=True) - - # Create a filled version of the index without gaps from min to max quote price. - min_quote = df.index.get_level_values(1)[0] - max_quote = df.index.get_level_values(1)[-1] - quotes = range(min_quote, max_quote+1) - - # Create the new index and move the data over. - print ("Exchange reindexing order book.", override=True) - filledIndex = pd.MultiIndex.from_product([seconds, quotes], names=['time','quote']) - df = df.reindex(filledIndex) - - # NaNs represent that there is NO volume at this quoted price at this time, so they should become zero. - df.fillna(0, inplace=True) - - print ("Exchange archiving order book.", override=True) + # Archive the order book snapshots directly to a file named with the symbol, rather than + # to the exchange agent log. self.writeLog(df, filename='orderbook_{}'.format(symbol)) print ("Order book archival complete.", override=True) @@ -164,16 +140,21 @@ def kernelTerminating (self): def receiveMessage (self, currentTime, msg): super().receiveMessage(currentTime, msg) - # Exchanges currently get a very fast (but not instant) computation delay of 1 ns for handling - # all order types. Note that computation delay MUST be updated before any calls to sendMessage. + # Unless the intent of an experiment is to examine computational issues within an Exchange, + # it will typically have either 1 ns delay (near instant but cannot process multiple orders + # in the same atomic time unit) or 0 ns delay (can process any number of orders, always in + # the atomic time unit in which they are received). This is separate from, and additional + # to, any parallel pipeline delay imposed for order book activity. + + # Note that computation delay MUST be updated before any calls to sendMessage. self.setComputationDelay(self.computation_delay) - # We're closed. + # Is the exchange closed? (This block only affects post-close, not pre-open.) if currentTime > self.mkt_close: # Most messages after close will receive a 'MKT_CLOSED' message in response. A few things # might still be processed, like requests for final trade prices or such. if msg.body['msg'] in ['LIMIT_ORDER', 'CANCEL_ORDER']: - print ("{} received {}: {}".format(self.name, msg.body['msg'], msg.body['order'])) + log_print ("{} received {}: {}", self.name, msg.body['msg'], msg.body['order']) self.sendMessage(msg.body['sender'], Message({ "msg": "MKT_CLOSED" })) # Don't do any further processing on these messages! @@ -183,21 +164,21 @@ def receiveMessage (self, currentTime, msg): # final trade of the day as their "daily close" price for a symbol. pass else: - print ("{} received {}, discarded: market is closed.".format(self.name, msg.body['msg'])) + log_print ("{} received {}, discarded: market is closed.", self.name, msg.body['msg']) self.sendMessage(msg.body['sender'], Message({ "msg": "MKT_CLOSED" })) # Don't do any further processing on these messages! return - # Log all received messages. + # Log order messages only if that option is configured. Log all other messages. if msg.body['msg'] in ['LIMIT_ORDER', 'CANCEL_ORDER']: - self.logEvent(msg.body['msg'], msg.body['order']) + if self.log_orders: self.logEvent(msg.body['msg'], js.dump(msg.body['order'])) else: self.logEvent(msg.body['msg'], msg.body['sender']) - # Handle message types understood by this exchange. + # Handle all message types understood by this exchange. if msg.body['msg'] == "WHEN_MKT_OPEN": - print ("{} received WHEN_MKT_OPEN request from agent {}".format(self.name, msg.body['sender'])) + log_print ("{} received WHEN_MKT_OPEN request from agent {}", self.name, msg.body['sender']) # The exchange is permitted to respond to requests for simple immutable data (like "what are your # hours?") instantly. This does NOT include anything that queries mutable data, like equity @@ -206,7 +187,7 @@ def receiveMessage (self, currentTime, msg): self.sendMessage(msg.body['sender'], Message({ "msg": "WHEN_MKT_OPEN", "data": self.mkt_open })) elif msg.body['msg'] == "WHEN_MKT_CLOSE": - print ("{} received WHEN_MKT_CLOSE request from agent {}".format(self.name, msg.body['sender'])) + log_print ("{} received WHEN_MKT_CLOSE request from agent {}", self.name, msg.body['sender']) # The exchange is permitted to respond to requests for simple immutable data (like "what are your # hours?") instantly. This does NOT include anything that queries mutable data, like equity @@ -217,31 +198,41 @@ def receiveMessage (self, currentTime, msg): elif msg.body['msg'] == "QUERY_LAST_TRADE": symbol = msg.body['symbol'] if symbol not in self.order_books: - print ("Last trade request discarded. Unknown symbol: {}".format(symbol)) + log_print ("Last trade request discarded. Unknown symbol: {}", symbol) else: - print ("{} received QUERY_LAST_TRADE ({}) request from agent {}".format(self.name, symbol, msg.body['sender'])) + log_print ("{} received QUERY_LAST_TRADE ({}) request from agent {}", self.name, symbol, msg.body['sender']) + # Return the single last executed trade price (currently not volume) for the requested symbol. + # This will return the average share price if multiple executions resulted from a single order. self.sendMessage(msg.body['sender'], Message({ "msg": "QUERY_LAST_TRADE", "symbol": symbol, "data": self.order_books[symbol].last_trade, "mkt_closed": True if currentTime > self.mkt_close else False })) elif msg.body['msg'] == "QUERY_SPREAD": symbol = msg.body['symbol'] depth = msg.body['depth'] if symbol not in self.order_books: - print ("Bid-ask spread request discarded. Unknown symbol: {}".format(symbol)) + log_print ("Bid-ask spread request discarded. Unknown symbol: {}", symbol) else: - print ("{} received QUERY_SPREAD ({}:{}) request from agent {}".format(self.name, symbol, depth, msg.body['sender'])) + log_print ("{} received QUERY_SPREAD ({}:{}) request from agent {}", self.name, symbol, depth, msg.body['sender']) + + # Return the requested depth on both sides of the order book for the requested symbol. + # Returns price levels and aggregated volume at each level (not individual orders). self.sendMessage(msg.body['sender'], Message({ "msg": "QUERY_SPREAD", "symbol": symbol, "depth": depth, "bids": self.order_books[symbol].getInsideBids(depth), "asks": self.order_books[symbol].getInsideAsks(depth), "data": self.order_books[symbol].last_trade, "mkt_closed": True if currentTime > self.mkt_close else False, - "book": self.order_books[symbol].prettyPrint(silent=True) })) + "book": '' })) + + # It is possible to also send the pretty-printed order book to the agent for logging, but forcing pretty-printing + # of a large order book is very slow, so we should only do it with good reason. We don't currently + # have a configurable option for it. + #"book": self.order_books[symbol].prettyPrint(silent=True) })) elif msg.body['msg'] == "QUERY_ORDER_STREAM": symbol = msg.body['symbol'] length = msg.body['length'] if symbol not in self.order_books: - print ("Order stream request discarded. Unknown symbol: {}".format(symbol)) + log_print ("Order stream request discarded. Unknown symbol: {}", symbol) else: - print ("{} received QUERY_ORDER_STREAM ({}:{}) request from agent {}".format(self.name, symbol, length, msg.body['sender'])) + log_print ("{} received QUERY_ORDER_STREAM ({}:{}) request from agent {}", self.name, symbol, length, msg.body['sender']) # We return indices [1:length] inclusive because the agent will want "orders leading up to the last # L trades", and the items under index 0 are more recent than the last trade. @@ -251,10 +242,11 @@ def receiveMessage (self, currentTime, msg): })) elif msg.body['msg'] == "LIMIT_ORDER": order = msg.body['order'] - print ("{} received LIMIT_ORDER: {}".format(self.name, order)) + log_print ("{} received LIMIT_ORDER: {}", self.name, order) if order.symbol not in self.order_books: - print ("Order discarded. Unknown symbol: {}".format(order.symbol)) + log_print ("Order discarded. Unknown symbol: {}", order.symbol) else: + # Hand the order to the order book for processing. self.order_books[order.symbol].handleLimitOrder(deepcopy(order)) elif msg.body['msg'] == "CANCEL_ORDER": # Note: this is somewhat open to abuse, as in theory agents could cancel other agents' orders. @@ -262,24 +254,31 @@ def receiveMessage (self, currentTime, msg): # then successfully cancel, but receive the cancel confirmation first. Things to think about # for later... order = msg.body['order'] - print ("{} received CANCEL_ORDER: {}".format(self.name, order)) + log_print ("{} received CANCEL_ORDER: {}", self.name, order) if order.symbol not in self.order_books: - print ("Cancellation request discarded. Unknown symbol: {}".format(order.symbol)) + log_print ("Cancellation request discarded. Unknown symbol: {}", order.symbol) else: + # Hand the order to the order book for processing. self.order_books[order.symbol].cancelOrder(deepcopy(order)) def sendMessage (self, recipientID, msg): # The ExchangeAgent automatically applies appropriate parallel processing pipeline delay - # to exactly those message types which require it. - # TODO: probably organize the order types into constant categories once there are more. + # to those message types which require it. + # TODO: probably organize the order types into categories once there are more, so we can + # take action by category (e.g. ORDER-related messages) instead of enumerating all message + # types to be affected. if msg.body['msg'] in ['ORDER_ACCEPTED', 'ORDER_CANCELLED', 'ORDER_EXECUTED']: + # Messages that require order book modification (not simple queries) incur the additional + # parallel processing delay as configured. super().sendMessage(recipientID, msg, delay = self.pipeline_delay) - self.logEvent(msg.body['msg'], msg.body['order']) + if self.log_orders: self.logEvent(msg.body['msg'], js.dump(msg.body['order'])) else: + # Other message types incur only the currently-configured computation delay for this agent. super().sendMessage(recipientID, msg) + # Simple accessor methods for the market open and close times. def getMarketOpen(self): return self.__mkt_open diff --git a/agent/FinancialAgent.py b/agent/FinancialAgent.py index 5faac1322..6f5c5e9e2 100644 --- a/agent/FinancialAgent.py +++ b/agent/FinancialAgent.py @@ -4,11 +4,15 @@ # The FinancialAgent class contains attributes and methods that should be available # to all agent types (traders, exchanges, etc) in a financial market simulation. +# To be honest, it mainly exists because the base Agent class should not have any +# finance-specific aspects and it doesn't make sense for ExchangeAgent to inherit +# from TradingAgent. Hopefully we'll find more common ground for traders and +# exchanges to make this more useful later on. class FinancialAgent(Agent): - def __init__(self, id, name): + def __init__(self, id, name, type, random_state): # Base class init. - super().__init__(id, name) + super().__init__(id, name, type, random_state) # Used by any subclass to dollarize an int-cents price for printing. def dollarize (self, cents): diff --git a/agent/HeuristicBeliefLearningAgent.py b/agent/HeuristicBeliefLearningAgent.py index 8b05b41ce..8e7199516 100644 --- a/agent/HeuristicBeliefLearningAgent.py +++ b/agent/HeuristicBeliefLearningAgent.py @@ -1,32 +1,34 @@ from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent from message.Message import Message -from util.util import print +from util.util import print, log_print from math import sqrt import numpy as np import pandas as pd import sys +np.set_printoptions(threshold=np.inf) + class HeuristicBeliefLearningAgent(ZeroIntelligenceAgent): - def __init__(self, id, name, symbol, startingCash=100000, sigma_n=1000, + def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1000, r_bar=100000, kappa=0.05, sigma_s=100000, q_max=10, sigma_pv=5000000, R_min = 0, R_max = 250, eta = 1.0, - lambda_a = 0.005, L = 8): + lambda_a = 0.005, L = 8, log_orders = False, random_state = None): # Base class init. - super().__init__(id, name, symbol, startingCash=startingCash, sigma_n=sigma_n, + super().__init__(id, name, type, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=r_bar, kappa=kappa, sigma_s=sigma_s, q_max=q_max, sigma_pv=sigma_pv, R_min=R_min, R_max=R_max, eta=eta, - lambda_a = lambda_a) + lambda_a = lambda_a, log_orders = log_orders, random_state = random_state) # Store important parameters particular to the HBL agent. - self.L = L + self.L = L # length of order book history to use (number of transactions) def wakeup (self, currentTime): # Parent class handles discovery of exchange times and market_open wakeup call. - # Also handles SRG-type "background agent" needs that are not specific to HBL. + # Also handles ZI-style "background agent" needs that are not specific to HBL. super().wakeup(currentTime) # Only if the superclass leaves the state as ACTIVE should we proceed with our @@ -51,7 +53,7 @@ def placeOrder (self): if len(self.stream_history[self.symbol]) < self.L: # Not enough history for HBL. - print ("Insufficient history for HBL: length {}, L {}".format(len(self.stream_history[self.symbol]), self.L)) + log_print ("Insufficient history for HBL: length {}, L {}", len(self.stream_history[self.symbol]), self.L) super().placeOrder() return @@ -66,14 +68,22 @@ def placeOrder (self): # Walk through the visible order history and accumulate values needed for HBL's # estimation of successful transaction by limit price. - sa = {} - sb = {} - ua = {} - ub = {} - low_p = sys.maxsize high_p = 0 + # Find the lowest and highest observed prices in the order history. + for h in self.stream_history[self.symbol]: + for id, order in h.items(): + p = order['limit_price'] + if p < low_p: low_p = p + if p > high_p: high_p = p + + # Set up the ndarray we will use for our computation. + # idx 0-7 are sa, sb, ua, ub, num, denom, Pr, Es + nd = np.zeros((high_p-low_p+1,8)) + + + # Iterate through the history and compile our observations. for h in self.stream_history[self.symbol]: # h follows increasing "transactions into the past", with index zero being orders # after the most recent transaction. @@ -83,55 +93,62 @@ def placeOrder (self): if p > high_p: high_p = p # For now if there are any transactions, consider the order successful. For single - # unit orders as used in SRG configs, this is sufficient. For multi-unit orders, + # unit orders, this is sufficient. For multi-unit orders, # we may wish to switch to a proportion of shares executed. if order['is_buy_order']: - if order['transactions']: sb[p] = 1 if not p in sb else sb[p] + 1 - else: - ub[p] = 1 if not p in ub else ub[p] + 1 + if order['transactions']: nd[p-low_p,1] += 1 + else: nd[p-low_p,3] += 1 else: - if order['transactions']: sa[p] = 1 if not p in sa else sa[p] + 1 - else: - ua[p] = 1 if not p in ua else ua[p] + 1 - - - # For each limit price between the lowest and highest observed price in history, - # compute the estimated probability of a successful transaction. Remember the - # price that produces the greatest expected surplus. - best_p = None - best_Pr = None - best_Es = -sys.maxsize - - for p in range(low_p, high_p+1): - if buy: - o = sum( [sa[x] for x in sa if x <= p] + [ua[x] for x in ua if x <= p] ) - s = sum( [sb[x] for x in sb if x <= p] ) - u = sum( [ub[x] for x in ub if x >= p] ) - else: - o = sum( [sb[x] for x in sb if x >= p] + [ub[x] for x in ub if x >= p] ) - s = sum( [sa[x] for x in sa if x >= p] ) - u = sum( [ua[x] for x in ua if x <= p] ) - - #print ("p {}, o {}, s {}, u {}".format(p, o, s, u)) - - if o + s + u <= 0: Pr = 0 - else: Pr = (o + s) / (o + s + u) - - Es = Pr * (v - p) if buy else Pr * (p - v) - - if Es > best_Es: - best_Es = Es - best_Pr = Pr - best_p = p - - # best_p should now contain the limit price that produces maximum expected surplus best_Es + if order['transactions']: nd[p-low_p,0] += 1 + else: nd[p-low_p,2] += 1 + + # Compute the sums and cumulative sums required, from our observations, + # to drive the HBL's transaction probability estimates. + if buy: + nd[:,[0,1,2]] = np.cumsum(nd[:,[0,1,2]], axis=0) + nd[::-1,3] = np.cumsum(nd[::-1,3], axis=0) + nd[:,4] = np.sum(nd[:,[0,1,2]], axis=1) + else: + nd[::-1,[0,1,3]] = np.cumsum(nd[::-1,[0,1,3]], axis=0) + nd[:,2] = np.cumsum(nd[:,2], axis=0) + nd[:,4] = np.sum(nd[:,[0,1,3]], axis=1) + + nd[:,5] = np.sum(nd[:,0:4], axis=1) + + # Okay to ignore divide by zero errors here because we expect that in + # some cases (0/0 can happen) and we immediately convert the resulting + # nan to zero, which is the right answer for us. + + # Compute probability estimates for successful transaction at all price levels. + with np.errstate(divide='ignore',invalid='ignore'): + nd[:,6] = np.nan_to_num(np.divide(nd[:,4], nd[:,5])) + + # Compute expected surplus for all price levels. + if buy: nd[:,7] = nd[:,6] * (v - np.arange(low_p,high_p+1)) + else: nd[:,7] = nd[:,6] * (np.arange(low_p,high_p+1) - v) + + # Extract the price and other data for the maximum expected surplus. + best_idx = np.argmax(nd[:,7]) + best_Es, best_Pr = nd[best_idx,[7,6]] + best_p = low_p + best_idx + + # If the best expected surplus is positive, go for it. if best_Es > 0: - print ("{} selects limit price {} with expected surplus {} (Pr = {:0.4f})".format(self.name, best_p, int(round(best_Es)), best_Pr)) - + log_print ("Numpy: {} selects limit price {} with expected surplus {} (Pr = {:0.4f})", self.name, best_p, int(round(best_Es)), best_Pr) + # Place the constructed order. - self.placeLimitOrder(self.symbol, 1, buy, best_p) + self.placeLimitOrder(self.symbol, 1, buy, int(round(best_p))) else: - print ("{} elects not to place an order (best expected surplus <= 0)".format(self.name)) + # Do nothing if best limit price has negative expected surplus with below code. + log_print ("Numpy: {} elects not to place an order (best expected surplus <= 0)", self.name) + + # OTHER OPTION 1: Allow negative expected surplus with below code. + #log_print ("Numpy: {} placing undesirable order (best expected surplus <= 0)", self.name) + #self.placeLimitOrder(self.symbol, 1, buy, int(round(best_p))) + + # OTHER OPTION 2: Force fallback to ZI logic on negative surplus with below code (including return). + #log_print ("Numpy: no desirable order for {}, acting as ZI", self.name) + #super().placeOrder() diff --git a/agent/ImpactAgent.py b/agent/ImpactAgent.py new file mode 100644 index 000000000..2537361fe --- /dev/null +++ b/agent/ImpactAgent.py @@ -0,0 +1,163 @@ +from agent.TradingAgent import TradingAgent +from message.Message import Message +from util.util import print, log_print + +import numpy as np +import pandas as pd +import sys + +class ImpactAgent(TradingAgent): + + def __init__(self, id, name, type, symbol = None, starting_cash = None, greed = None, within = 0.01, + impact = True, impact_time = None, random_state = None): + # Base class init. + super().__init__(id, name, type, starting_cash = starting_cash, random_state = random_state) + + self.symbol = symbol # symbol to trade + self.trading = False # ready to trade + self.traded = False # has made its one trade + + # The amount of available "nearby" liquidity to consume when placing its order. + self.greed = greed # trade this proportion of liquidity + self.within = within # within this range of the inside price + + # When should we make the impact trade? + self.impact_time = impact_time + + # The agent begins in its "complete" state, not waiting for + # any special event or condition. + self.state = 'AWAITING_WAKEUP' + + # Controls whether the impact trade is actually placed. + self.impact = impact + + + def wakeup (self, currentTime): + # Parent class handles discovery of exchange times and market_open wakeup call. + super().wakeup(currentTime) + + if not self.mkt_open or not self.mkt_close: + # TradingAgent handles discovery of exchange times. + return + else: + if not self.trading: + self.trading = True + + # Time to start trading! + print ("{} is ready to start trading now.".format(self.name)) + + + # Steady state wakeup behavior starts here. + + # First, see if we have received a MKT_CLOSED message for the day. If so, + # there's nothing to do except clean-up. + if self.mkt_closed and (self.symbol in self.daily_close_price): + # Market is closed and we already got the daily close price. + return + + + ### Impact agent operates at a specific time. + if currentTime < self.impact_time: + print ("Impact agent waiting for impact_time {}".format(self.impact_time), override=True) + self.setWakeup(self.impact_time) + return + + + ### The impact agent only trades once, but we will monitor prices for + ### the sake of performance. + self.setWakeup(currentTime + pd.Timedelta('30m')) + + + # If the market is closed and we haven't obtained the daily close price yet, + # do that before we cease activity for the day. Don't do any other behavior + # after market close. + # + # Also, if we already made our one trade, do nothing except monitor prices. + if self.traded or (self.mkt_closed and (not self.symbol in self.daily_close_price)): + self.getLastTrade() + self.state = 'AWAITING_LAST_TRADE' + return + + + # The impact agent will place one order based on the current spread. + self.getCurrentSpread() + self.state = 'AWAITING_SPREAD' + + + def receiveMessage (self, currentTime, msg): + # Parent class schedules market open wakeup call once market open/close times are known. + super().receiveMessage(currentTime, msg) + + # We have been awakened by something other than our scheduled wakeup. + # If our internal state indicates we were waiting for a particular event, + # check if we can transition to a new state. + + if self.state == 'AWAITING_SPREAD': + # We were waiting for current spread information to make our trade. + # If the message we just received is QUERY_SPREAD, that means we just got it. + if msg.body['msg'] == 'QUERY_SPREAD': + # Place our one trade. + bid, bid_vol, ask, ask_vol = self.getKnownBidAsk(self.symbol) + bid_liq, ask_liq = self.getKnownLiquidity(self.symbol, within=self.within) + + # Buy order. + direction, shares, price = True, int(round(ask_liq * self.greed)), ask + + # Sell order. This should be a parameter, but isn't yet. + #direction, shares, price = False, int(round(bid_liq * self.greed)), bid + + # Compute the limit price we must offer to ensure our order executes immediately. + # This is essentially a workaround for the lack of true market orders in our + # current simulation. + price = self.computeRequiredPrice(direction, shares) + + # Actually place the order only if self.impact is true. + if self.impact: + print ("Impact agent firing: {} {} @ {}".format('BUY' if direction else 'SELL', shares, self.dollarize(price)), override=True) + self.placeLimitOrder (self.symbol, shares, direction, price) + else: + print ("Impact agent would fire: {} {} @ {} (but self.impact = False)".format('BUY' if direction else 'SELL', shares, self.dollarize(price)), override=True) + + self.traded = True + self.state = 'AWAITING_WAKEUP' + + + # Internal state and logic specific to this agent. + + def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price): + super().placeLimitOrder(symbol, quantity, is_buy_order, limit_price, ignore_risk = True) + + # Computes required limit price to immediately execute a trade for the specified quantity + # of shares. + def computeRequiredPrice (self, direction, shares): + book = self.known_asks[self.symbol] if direction else self.known_bids[self.symbol] + + # Start at the inside and add up the shares. + t = 0 + + for i in range(len(book)): + p,v = book[i] + t += v + + # If we have accumulated enough shares, return this price. + if t >= shares: return p + + # Not enough shares. Just return worst price (highest ask, lowest bid). + return book[-1][0] + + + # Request the last trade price for our symbol. + def getLastTrade (self): + super().getLastTrade(self.symbol) + + + # Request the spread for our symbol. + def getCurrentSpread (self): + # Impact agent gets depth 10000 on each side (probably everything). + super().getCurrentSpread(self.symbol, 10000) + + + def getWakeFrequency (self): + return (pd.Timedelta('1ns')) + + diff --git a/agent/TradingAgent.py b/agent/TradingAgent.py index 3d20e3d6e..2052051d5 100644 --- a/agent/TradingAgent.py +++ b/agent/TradingAgent.py @@ -2,30 +2,38 @@ from agent.ExchangeAgent import ExchangeAgent from message.Message import Message from util.order.LimitOrder import LimitOrder -from util.util import print +from util.util import print, log_print from copy import deepcopy +import jsons as js import numpy as np import pandas as pd -import random import sys +# The TradingAgent class (via FinancialAgent, via Agent) is intended as the +# base class for all trading agents (i.e. not things like exchanges) in a +# market simulation. It handles a lot of messaging (inbound and outbound) +# and state maintenance automatically, so subclasses can focus just on +# implementing a strategy without too much bookkeeping. class TradingAgent(FinancialAgent): - def __init__(self, id, name, startingCash): + def __init__(self, id, name, type, random_state=None, starting_cash=100000, log_orders=False): # Base class init. - super().__init__(id, name) + super().__init__(id, name, type, random_state) # We don't yet know when the exchange opens or closes. self.mkt_open = None self.mkt_close = None - # Store startingCash in case we want to refer to it for performance stats. + # Log all order activity? + self.log_orders = log_orders + + # Store starting_cash in case we want to refer to it for performance stats. # It should NOT be modified. Use the 'CASH' key in self.holdings. # 'CASH' is always in cents! Note that agents are limited by their starting # cash, currently without leverage. Taking short positions is permitted, # but does NOT increase the amount of at-risk capital allowed. - self.startingCash = startingCash + self.starting_cash = starting_cash # TradingAgent has constants to support simulated market orders. self.MKT_BUY = sys.maxsize @@ -33,9 +41,9 @@ def __init__(self, id, name, startingCash): # The base TradingAgent will track its holdings and outstanding orders. # Holdings is a dictionary of symbol -> shares. CASH is a special symbol - # worth one dollar per share. Orders is a dictionary of active, open orders + # worth one cent per share. Orders is a dictionary of active, open orders # (not cancelled, not fully executed) keyed by order_id. - self.holdings = { 'CASH' : startingCash } + self.holdings = { 'CASH' : starting_cash } self.orders = {} # The base TradingAgent also tracks last known prices for every symbol @@ -45,11 +53,20 @@ def __init__(self, id, name, startingCash): # automatically generate such requests, though it has a helper function # that can be used to make it happen. self.last_trade = {} + + # When a last trade price comes in after market close, the trading agent + # automatically records it as the daily close price for a symbol. self.daily_close_price = {} + # The agent remembers the last known bids and asks (with variable depth, + # showing only aggregate volume at each price level) when it receives + # a response to QUERY_SPREAD. self.known_bids = {} self.known_asks = {} + # The agent remembers the order history communicated by the exchange + # when such is requested by an agent (for example, a heuristic belief + # learning agent). self.stream_history = {} # For special logging at the first moment the simulator kernel begins @@ -61,7 +78,12 @@ def __init__(self, id, name, startingCash): # as we know. self.mkt_closed = False - # TMP + # This is probably a transient feature, but for now we permit the exchange + # to return the entire order book sometimes, for development and debugging. + # It is very expensive to pass this around, and breaks "simulation physics", + # but can really help understand why agents are making certain decisions. + # Subclasses should NOT rely on this feature as part of their strategy, + # as it will go away. self.book = '' @@ -69,13 +91,14 @@ def __init__(self, id, name, startingCash): def kernelStarting(self, startTime): # self.kernel is set in Agent.kernelInitializing() + self.logEvent('STARTING_CASH', self.starting_cash, True) # Find an exchange with which we can place orders. It is guaranteed # to exist by now (if there is one). self.exchangeID = self.kernel.findAgentByType(ExchangeAgent) - print ("Agent {} requested agent of type Agent.ExchangeAgent. Given Agent ID: {}".format( - self.id, self.exchangeID)) + log_print ("Agent {} requested agent of type Agent.ExchangeAgent. Given Agent ID: {}", + self.id, self.exchangeID) # Request a wake-up call as in the base Agent. super().kernelStarting(startTime) @@ -87,23 +110,25 @@ def kernelStopping (self): # Print end of day holdings. self.logEvent('FINAL_HOLDINGS', self.fmtHoldings(self.holdings)) + self.logEvent('FINAL_CASH_POSITION', self.holdings['CASH'], True) # Mark to market. - # We may want a separate mark to market function (to use anytime) eventually. cash = self.markToMarket(self.holdings) - self.logEvent('ENDING_CASH', cash) + self.logEvent('ENDING_CASH', cash, True) print ("Final holdings for {}: {}. Marked to market: {}".format(self.name, self.fmtHoldings(self.holdings), cash), override=True) - # TODO: Record final results for presentation/debugging. This is probably bad. - mytype = str(type(self)).split('.')[-1].split("'")[0] + # Record final results for presentation/debugging. This is an ugly way + # to do this, but it is useful for now. + mytype = self.type + gain = cash - self.starting_cash if mytype in self.kernel.meanResultByAgentType: - self.kernel.meanResultByAgentType[mytype] += cash + self.kernel.meanResultByAgentType[mytype] += gain self.kernel.agentCountByType[mytype] += 1 else: - self.kernel.meanResultByAgentType[mytype] = cash + self.kernel.meanResultByAgentType[mytype] = gain self.kernel.agentCountByType[mytype] = 1 @@ -114,7 +139,6 @@ def wakeup (self, currentTime): if self.first_wake: # Log initial holdings. - #self.logEvent('HOLDINGS_UPDATED', self.fmtHoldings(self.holdings)) self.logEvent('HOLDINGS_UPDATED', self.holdings) self.first_wake = False @@ -123,7 +147,9 @@ def wakeup (self, currentTime): self.sendMessage(self.exchangeID, Message({ "msg" : "WHEN_MKT_OPEN", "sender": self.id })) self.sendMessage(self.exchangeID, Message({ "msg" : "WHEN_MKT_CLOSE", "sender": self.id })) - # New for MomentumAgent. + # For the sake of subclasses, TradingAgent now returns a boolean + # indicating whether the agent is "ready to trade" -- has it received + # the market open and closed times, and is the market not already closed. return (self.mkt_open and self.mkt_close) and not self.mkt_closed @@ -137,12 +163,12 @@ def receiveMessage (self, currentTime, msg): if msg.body['msg'] == "WHEN_MKT_OPEN": self.mkt_open = msg.body['data'] - print ("Recorded market open: {}".format(self.kernel.fmtTime(self.mkt_open))) + log_print ("Recorded market open: {}", self.kernel.fmtTime(self.mkt_open)) elif msg.body['msg'] == "WHEN_MKT_CLOSE": self.mkt_close = msg.body['data'] - print ("Recorded market close: {}".format(self.kernel.fmtTime(self.mkt_close))) + log_print ("Recorded market close: {}", self.kernel.fmtTime(self.mkt_close)) elif msg.body['msg'] == "ORDER_EXECUTED": # Call the orderExecuted method, which subclasses should extend. This parent @@ -179,12 +205,14 @@ def receiveMessage (self, currentTime, msg): elif msg.body['msg'] == 'QUERY_SPREAD': # Call the querySpread method, which subclasses may extend. + # Also note if the market is closed. if msg.body['mkt_closed']: self.mkt_closed = True self.querySpread(msg.body['symbol'], msg.body['data'], msg.body['bids'], msg.body['asks'], msg.body['book']) elif msg.body['msg'] == 'QUERY_ORDER_STREAM': # Call the queryOrderStream method, which subclasses may extend. + # Also note if the market is closed. if msg.body['mkt_closed']: self.mkt_closed = True self.queryOrderStream(msg.body['symbol'], msg.body['orders']) @@ -224,8 +252,8 @@ def getOrderStream (self, symbol, length=1): # Used by any Trading Agent subclass to place a limit order. Parameters expect: - # string (valid symbol), int (positive share quantity), bool (True == BUY), float (x.xx price). - def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price): + # string (valid symbol), int (positive share quantity), bool (True == BUY), int (price in cents). + def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price, ignore_risk = False): order = LimitOrder(self.id, self.currentTime, symbol, quantity, is_buy_order, limit_price) if quantity > 0: @@ -242,18 +270,24 @@ def placeLimitOrder (self, symbol, quantity, is_buy_order, limit_price): new_at_risk = self.markToMarket(new_holdings) - new_holdings['CASH'] # If at_risk is lower, always allow. Otherwise, new_at_risk must be below starting cash. - if (new_at_risk > at_risk) and (new_at_risk > self.startingCash): - print ("TradingAgent ignored limit order due to at-risk constraints: {}\n{}".format(order, self.fmtHoldings(self.holdings))) - return - + if not ignore_risk: + if (new_at_risk > at_risk) and (new_at_risk > self.starting_cash): + log_print ("TradingAgent ignored limit order due to at-risk constraints: {}\n{}", order, self.fmtHoldings(self.holdings)) + return + + # Copy the intended order for logging, so any changes made to it elsewhere + # don't retroactively alter our "as placed" log of the order. Eventually + # it might be nice to make the whole history of the order into transaction + # objects inside the order (we're halfway there) so there CAN be just a single + # object per order, that never alters its original state, and eliminate all these copies. self.orders[order.order_id] = deepcopy(order) self.sendMessage(self.exchangeID, Message({ "msg" : "LIMIT_ORDER", "sender": self.id, "order" : order })) # Log this activity. - self.logEvent('ORDER_SUBMITTED', order) + if self.log_orders: self.logEvent('ORDER_SUBMITTED', js.dump(order)) else: - print ("TradingAgent ignored limit order of quantity zero: {}".format(order)) + log_print ("TradingAgent ignored limit order of quantity zero: {}", order) # Used by any Trading Agent subclass to cancel any order. The order must currently @@ -263,16 +297,16 @@ def cancelOrder (self, order): "order" : order })) # Log this activity. - self.logEvent('CANCEL_SUBMITTED', order) + if self.log_orders: self.logEvent('CANCEL_SUBMITTED', js.dump(order)) # Handles ORDER_EXECUTED messages from an exchange agent. Subclasses may wish to extend, # but should still call parent method for basic portfolio/returns tracking. def orderExecuted (self, order): - print ("Received notification of execution for: {}".format(order)) + log_print ("Received notification of execution for: {}", order) # Log this activity. - self.logEvent('ORDER_EXECUTED', order) + if self.log_orders: self.logEvent('ORDER_EXECUTED', js.dump(order)) # At the very least, we must update CASH and holdings at execution time. qty = order.quantity if order.is_buy_order else -1 * order.quantity @@ -299,21 +333,20 @@ def orderExecuted (self, order): else: o.quantity -= order.quantity else: - print ("Execution received for order not in orders list: {}".format(order)) + log_print ("Execution received for order not in orders list: {}", order) - print ("After execution, agent open orders: {}".format(self.orders)) + log_print ("After execution, agent open orders: {}", self.orders) # After execution, log holdings. - #self.logEvent('HOLDINGS_UPDATED', self.fmtHoldings(self.holdings)) self.logEvent('HOLDINGS_UPDATED', self.holdings) # Handles ORDER_ACCEPTED messages from an exchange agent. Subclasses may wish to extend. def orderAccepted (self, order): - print ("Received notification of acceptance for: {}".format(order)) + log_print ("Received notification of acceptance for: {}", order) # Log this activity. - self.logEvent('ORDER_ACCEPTED', order) + if self.log_orders: self.logEvent('ORDER_ACCEPTED', js.dump(order)) # We may later wish to add a status to the open orders so an agent can tell whether # a given order has been accepted or not (instead of needing to override this method). @@ -321,10 +354,10 @@ def orderAccepted (self, order): # Handles ORDER_CANCELLED messages from an exchange agent. Subclasses may wish to extend. def orderCancelled (self, order): - print ("Received notification of cancellation for: {}".format(order)) + log_print ("Received notification of cancellation for: {}", order) # Log this activity. - self.logEvent('ORDER_CANCELLED', order) + if self.log_orders: self.logEvent('ORDER_CANCELLED', js.dump(order)) # Remove the cancelled order from the open orders list. We may of course wish to have # additional logic here later, so agents can easily "look for" cancelled orders. Of @@ -332,12 +365,12 @@ def orderCancelled (self, order): if order.order_id in self.orders: del self.orders[order.order_id] else: - print ("Cancellation received for order not in orders list: {}".format(order)) + log_print ("Cancellation received for order not in orders list: {}", order) # Handles MKT_CLOSED messages from an exchange agent. Subclasses may wish to extend. def marketClosed (self): - print ("Received notification of market closure.") + log_print ("Received notification of market closure.") # Log this activity. self.logEvent('MKT_CLOSED') @@ -350,13 +383,13 @@ def marketClosed (self): def queryLastTrade (self, symbol, price): self.last_trade[symbol] = price - print ("Received last trade price of {} for {}.".format(self.last_trade[symbol], symbol)) + log_print ("Received last trade price of {} for {}.", self.last_trade[symbol], symbol) if self.mkt_closed: # Note this as the final price of the day. self.daily_close_price[symbol] = self.last_trade[symbol] - print ("Received daily close price of {} for {}.".format(self.last_trade[symbol], symbol)) + log_print ("Received daily close price of {} for {}.", self.last_trade[symbol], symbol) # Handles QUERY_SPREAD messages from an exchange agent. @@ -373,7 +406,7 @@ def querySpread (self, symbol, price, bids, asks, book): if asks: best_ask, best_ask_qty = (asks[0][0], asks[0][1]) else: best_ask, best_ask_qty = ('No asks', 0) - print ("Received spread of {} @ {} / {} @ {} for {}".format(best_bid_qty, best_bid, best_ask_qty, best_ask, symbol)) + log_print ("Received spread of {} @ {} / {} @ {} for {}", best_bid_qty, best_bid, best_ask_qty, best_ask, symbol) self.logEvent("BID_DEPTH", bids) self.logEvent("ASK_DEPTH", asks) @@ -418,9 +451,9 @@ def getKnownLiquidity (self, symbol, within=0.00): bid_liq = self.getBookLiquidity(self.known_bids[symbol], within) ask_liq = self.getBookLiquidity(self.known_asks[symbol], within) - print ("Bid/ask liq: {}, {}".format(bid_liq, ask_liq)) - print ("Known bids: {}".format(self.known_bids[self.symbol])) - print ("Known asks: {}".format(self.known_asks[self.symbol])) + log_print ("Bid/ask liq: {}, {}", bid_liq, ask_liq) + log_print ("Known bids: {}", self.known_bids[self.symbol]) + log_print ("Known asks: {}", self.known_asks[self.symbol]) return bid_liq, ask_liq @@ -434,7 +467,7 @@ def getBookLiquidity (self, book, within): # Is this price within "within" proportion of the best price? if abs(best - price) <= int(round(best * within)): - print ("Within {} of {}: {} with {} shares".format(within, best, price, shares)) + log_print ("Within {} of {}: {} with {} shares", within, best, price, shares) liq += shares return liq diff --git a/agent/ZeroIntelligenceAgent.py b/agent/ZeroIntelligenceAgent.py index efb2a4547..27c9d7e87 100644 --- a/agent/ZeroIntelligenceAgent.py +++ b/agent/ZeroIntelligenceAgent.py @@ -1,6 +1,6 @@ from agent.TradingAgent import TradingAgent from message.Message import Message -from util.util import print +from util.util import print, log_print from math import sqrt import numpy as np @@ -9,26 +9,26 @@ class ZeroIntelligenceAgent(TradingAgent): - def __init__(self, id, name, symbol, startingCash=100000, sigma_n=1000, + def __init__(self, id, name, type, symbol='IBM', starting_cash=100000, sigma_n=1000, r_bar=100000, kappa=0.05, sigma_s=100000, q_max=10, sigma_pv=5000000, R_min = 0, R_max = 250, eta = 1.0, - lambda_a = 0.005): + lambda_a = 0.005, log_orders = False, random_state = None): # Base class init. - super().__init__(id, name, startingCash) + super().__init__(id, name, type, starting_cash=starting_cash, log_orders=log_orders, random_state = random_state) # Store important parameters particular to the ZI agent. - self.symbol = symbol - self.sigma_n = sigma_n - self.r_bar = r_bar - self.kappa = kappa - self.sigma_s = sigma_s - self.q_max = q_max - self.sigma_pv = sigma_pv - self.R_min = R_min - self.R_max = R_max - self.eta = eta - self.lambda_a = lambda_a + self.symbol = symbol # symbol to trade + self.sigma_n = sigma_n # observation noise variance + self.r_bar = r_bar # true mean fundamental value + self.kappa = kappa # mean reversion parameter + self.sigma_s = sigma_s # shock variance + self.q_max = q_max # max unit holdings + self.sigma_pv = sigma_pv # private value variance + self.R_min = R_min # min requested surplus + self.R_max = R_max # max requested surplus + self.eta = eta # strategic threshold + self.lambda_a = lambda_a # mean arrival rate of ZI agents # The agent uses this to track whether it has begun its strategy or is still # handling pre-market tasks. @@ -38,7 +38,7 @@ def __init__(self, id, name, symbol, startingCash=100000, sigma_n=1000, # any special event or condition. self.state = 'AWAITING_WAKEUP' - # The agent maintains two priors: r_t and sigma_t. + # The agent maintains two priors: r_t and sigma_t (value and error estimates). self.r_t = r_bar self.sigma_t = 0 @@ -48,7 +48,7 @@ def __init__(self, id, name, symbol, startingCash=100000, sigma_n=1000, # The agent has a private value for each incremental unit. self.theta = [int(x) for x in sorted( - np.round(np.random.normal(loc=0, scale=sqrt(sigma_pv), size=(q_max*2))).tolist(), + np.round(self.random_state.normal(loc=0, scale=sqrt(sigma_pv), size=(q_max*2))).tolist(), reverse=True)] @@ -61,6 +61,37 @@ def kernelStarting(self, startTime): self.oracle = self.kernel.oracle + def kernelStopping (self): + # Always call parent method to be safe. + super().kernelStopping() + + # Print end of day valuation. + H = self.getHoldings(self.symbol) + + # May request real fundamental value from oracle as part of final cleanup/stats. + rT = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n=0, random_state = self.random_state) + + # Start with surplus as private valuation of shares held. + if H > 0: surplus = sum([ self.theta[x+self.q_max-1] for x in range(1,H+1) ]) + elif H < 0: surplus = -sum([ self.theta[x+self.q_max-1] for x in range(H+1,1) ]) + else: surplus = 0 + + log_print ("surplus init: {}", surplus) + + # Add final (real) fundamental value times shares held. + surplus += rT * H + + log_print ("surplus after holdings: {}", surplus) + + # Add ending cash value and subtract starting cash value. + surplus += self.holdings['CASH'] - self.starting_cash + + self.logEvent('FINAL_VALUATION', surplus, True) + + log_print ("{} final report. Holdings {}, end cash {}, start cash {}, final fundamental {}, preferences {}, surplus {}", + self.name, H, self.holdings['CASH'], self.starting_cash, rT, self.theta, surplus) + + def wakeup (self, currentTime): # Parent class handles discovery of exchange times and market_open wakeup call. super().wakeup(currentTime) @@ -75,7 +106,7 @@ def wakeup (self, currentTime): self.trading = True # Time to start trading! - print ("{} is ready to start trading now.".format(self.name)) + log_print ("{} is ready to start trading now.", self.name) # Steady state wakeup behavior starts here. @@ -95,7 +126,7 @@ def wakeup (self, currentTime): # each agent independently sampling its next arrival time from an exponential # distribution in alternate Beta formation with Beta = 1 / lambda, where lambda # is the mean arrival rate of the Poisson process. - delta_time = np.random.exponential(scale = 1.0 / self.lambda_a) + delta_time = self.random_state.exponential(scale = 1.0 / self.lambda_a) self.setWakeup(currentTime + pd.Timedelta('{}ns'.format(int(round(delta_time))))) @@ -119,10 +150,12 @@ def wakeup (self, currentTime): # but this will be as a natural consequence of its beliefs. - # In order to use the SRG "strategic threshold" parameter (eta), the ZI agent needs the current + # In order to use the "strategic threshold" parameter (eta), the ZI agent needs the current # spread (inside bid/ask quote). It would not otherwise need any trade/quote information. + # If the calling agent is a subclass, don't initiate the strategy section of wakeup(), as it # may want to do something different. + if type(self) == ZeroIntelligenceAgent: self.getCurrentSpread(self.symbol) self.state = 'AWAITING_SPREAD' @@ -131,15 +164,15 @@ def wakeup (self, currentTime): def updateEstimates (self): - # Called by an SRG-type background agent that wishes to obtain a new fundamental observation, + # Called by a background agent that wishes to obtain a new fundamental observation, # update its internal estimation parameters, and compute a new total valuation for the # action it is considering. # The agent obtains a new noisy observation of the current fundamental value # and uses this to update its internal estimates in a Bayesian manner. - obs_t = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n = self.sigma_n) + obs_t = self.oracle.observePrice(self.symbol, self.currentTime, sigma_n = self.sigma_n, random_state = self.random_state) - print ("{} observed {} at {}".format(self.name, obs_t, self.currentTime)) + log_print ("{} observed {} at {}", self.name, obs_t, self.currentTime) # Flip a coin to decide if we will buy or sell a unit at this time. @@ -147,13 +180,13 @@ def updateEstimates (self): if q >= self.q_max: buy = False - print ("Long holdings limit: agent will SELL") + log_print ("Long holdings limit: agent will SELL") elif q <= -self.q_max: buy = True - print ("Short holdings limit: agent will BUY") + log_print ("Short holdings limit: agent will BUY") else: - buy = bool(np.random.randint(0,2)) - print ("Coin flip: agent will {}".format("BUY" if buy else "SELL")) + buy = bool(self.random_state.randint(0,2)) + log_print ("Coin flip: agent will {}", "BUY" if buy else "SELL") # Update internal estimates of the current fundamental value and our error of same. @@ -198,7 +231,7 @@ def updateEstimates (self): # time as the previous wake time. self.prev_wake_time = self.currentTime - print ("{} estimates r_T = {} as of {}".format(self.name, r_T, self.currentTime)) + log_print ("{} estimates r_T = {} as of {}", self.name, r_T, self.currentTime) # Determine the agent's total valuation. @@ -206,7 +239,7 @@ def updateEstimates (self): theta = self.theta[q+1 if buy else q] v = r_T + theta - print ("{} total unit valuation is {} (theta = {})".format(self.name, v, theta)) + log_print ("{} total unit valuation is {} (theta = {})", self.name, v, theta) # Return values needed to implement strategy and select limit price. @@ -222,7 +255,7 @@ def placeOrder (self): # Select a requested surplus for this trade. - R = np.random.randint(self.R_min, self.R_max+1) + R = self.random_state.randint(self.R_min, self.R_max+1) # Determine the limit price. @@ -235,17 +268,17 @@ def placeOrder (self): if buy and ask_vol > 0: R_ask = v - ask if R_ask >= (self.eta * R): - print ("{} desired R = {}, but took R = {} at ask = {} due to eta".format(self.name, R, R_ask, ask)) + log_print ("{} desired R = {}, but took R = {} at ask = {} due to eta", self.name, R, R_ask, ask) p = ask else: - print ("{} demands R = {}, limit price {}".format(self.name, R, p)) + log_print ("{} demands R = {}, limit price {}", self.name, R, p) elif (not buy) and bid_vol > 0: R_bid = bid - v if R_bid >= (self.eta * R): - print ("{} desired R = {}, but took R = {} at bid = {} due to eta".format(self.name, R, R_bid, bid)) + log_print ("{} desired R = {}, but took R = {} at bid = {} due to eta", self.name, R, R_bid, bid) p = bid else: - print ("{} demands R = {}, limit price {}".format(self.name, R, p)) + log_print ("{} demands R = {}, limit price {}", self.name, R, p) @@ -291,13 +324,6 @@ def cancelOrders (self): return True - - # Do we have non-CASH positions? - def havePositions (self): - return len(self.holdings) > 1 or \ - (len(self.holdings) == 1 and 'CASH' not in self.holdings) - - def getWakeFrequency (self): - return pd.Timedelta(np.random.randint(low = 0, high = 100), unit='ns') + return pd.Timedelta(self.random_state.randint(low = 0, high = 100), unit='ns') diff --git a/cli/book_plot.py b/cli/book_plot.py new file mode 100644 index 000000000..e1c0dd16a --- /dev/null +++ b/cli/book_plot.py @@ -0,0 +1,259 @@ +import copy +from mpl_toolkits.mplot3d import Axes3D +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np +import pandas as pd +import sys + +from matplotlib.colors import LogNorm + +from joblib import Memory + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 1000 +pd.options.display.max_colwidth = 200 + +# Initialize a persistent memcache. +mem = Memory(cachedir='./.cached_plot_book', verbose=0) +mem_hist = Memory(cachedir='./.cached_plot_book_historical', verbose=0) +mem_hist_plot = Memory(cachedir='./.cached_plot_book_historical_heatmap', verbose=0) + + +# Turn these into command line parameters. +SHOW_BEST = False +TIME_STEPS = False +PLOT_HISTORICAL = False + + +# Used to read and cache simulated quotes (best bid/ask). +# Doesn't actually pay attention to symbols yet. +#@mem.cache +def read_book_quotes (file): + print ("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + + if len(df) <= 0: + print ("There appear to be no simulated quotes.") + sys.exit() + + print ("Cached simulated quotes.") + return df + + +# Used to read historical national best bid/ask spread. +@mem_hist.cache +def read_historical_quotes (file, symbol): + print ("Historical quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + + if len(df) <= 0: + print ("There appear to be no historical quotes.") + sys.exit() + + df = df.loc[symbol] + + return df + + +# Used to cache the transformed historical dataframe for a symbol. +@mem_hist_plot.cache +def prepare_histogram (df_hist): + print ("Historical dataframe transformation was not cached. This will take a minute.") + + min_quote = df_hist['BEST_BID'].min() + max_quote = df_hist['BEST_ASK'].max() + + quote_range = pd.Series(np.arange(min_quote, max_quote + 0.01, 0.01)).round(2).map(str) + quote_range = quote_range.str.pad(6, side='right', fillchar='0') + + df = pd.DataFrame(index=df_hist.index, columns=quote_range) + df[:] = 0 + + i = 0 + + for idx in df.index: + if i % 1000 == 0: print ("Caching {}".format(idx)) + + col = '{:0.2f}'.format(round(df_hist.loc[idx].BEST_BID, 2)) + val = -df_hist.loc[idx].BEST_BIDSIZ + df.loc[idx,col] = val + + col = '{:0.2f}'.format(round(df_hist.loc[idx].BEST_ASK, 2)) + val = df_hist.loc[idx].BEST_ASKSIZ + df.loc[idx,col] = val + + i += 1 + + return df + + +# Main program starts here. + +if len(sys.argv) < 2: + print ("Usage: python book_plot.py ") + sys.exit() + +book_file = sys.argv[1] + +print ("Visualizing order book from {}".format(book_file)) + +sns.set() + +df_book = read_book_quotes(book_file) +#df_hist = read_historical_quotes('./data/nbbo/nbbo_2018/nbbom_20180518.bgz', 'IBM') + +fig = plt.figure(figsize=(12,9)) + +# Use this to make all volume positive (ASK volume is negative in the dataframe). +#df_book.Volume = df_book.Volume.abs() + +# Use this to swap the sign of BID vs ASK volume (to better fit a colormap, perhaps). +#df_book.Volume = df_book.Volume * -1 + +# Use this to clip volume to an upper limit. +#df_book.Volume = df_book.Volume.clip(lower=-400,upper=400) + +# Use this to turn zero volume into np.nan (useful for some plot types). +#df_book.Volume[df_book.Volume == 0] = np.nan + +# This section colors the best bid, best ask, and bid/ask midpoint +# differently from the rest of the heatmap below, by substituting +# special values at those indices. It is important to do this while +# the price index is still numeric. We use a value outside the +# range of actual order book volumes (selected dynamically). +min_volume = df_book.Volume.min() +max_volume = df_book.Volume.max() + +best_bid_value = min_volume - 1000 +best_ask_value = min_volume - 1001 +midpoint_value = min_volume - 1002 + +# This converts the DateTimeIndex to integer nanoseconds since market open. We use +# these as our time steps for discrete time simulations (e.g. SRG config). +if TIME_STEPS: + df_book = df_book.unstack(1) + t = df_book.index.get_level_values(0) - df_book.index.get_level_values(0)[0] + df_book.index = (t / np.timedelta64(1, 'ns')).astype(np.int64) + df_book = df_book.stack() + + +# Use this to restrict plotting to a certain time of day. Depending on quote frequency, +# plotting could be very slow without this. +#df_book = df_book.unstack(1) +#df_book = df_book.between_time('11:50:00', '12:10:00') +#df_book = df_book.stack() + + + +if SHOW_BEST: + + df_book = df_book.unstack(1) + df_book.columns = df_book.columns.droplevel(0) + + # Now row (single) index is time. Column (single) index is quote price. + + # In temporary data frame, find best bid per (time) row. + # Copy bids only. + best_bid = df_book[df_book < 0].copy() + + # Replace every non-zero bid volume with the column header (quote price) instead. + for col in best_bid.columns: + c = best_bid[col] + c[c < 0] = col + + # Copy asks only. + best_ask = df_book[df_book > 0].copy() + + # Replace every non-zero ask volume with the column header (quote price) instead. + for col in best_ask.columns: + c = best_ask[col] + c[c > 0] = col + + # In a new column in each temporary data frame, compute the best bid or ask. + best_bid['best'] = best_bid.idxmax(axis=1) + best_ask['best'] = best_ask.idxmin(axis=1) + + # Iterate over the index (all three DF have the same index) and set the special + # best bid/ask value in the correct column(s) per row. Also compute and include + # the midpoint where possible. + for idx in df_book.index: + bb = best_bid.loc[idx,'best'] + #if bb: df_book.loc[idx,bb] = best_bid_value + + ba = best_ask.loc[idx,'best'] + #if ba: df_book.loc[idx,ba] = best_ask_value + + if ba and bb: df_book.loc[idx,round((ba+bb)/2)] = midpoint_value + + + # Put the data frame indices back the way they were and ensure it is a DataFrame, + # not a Series. + df_book = df_book.stack() + df_book = pd.DataFrame(data=df_book) + df_book.columns = ['Volume'] + + +# Change the MultiIndex to time and dollars. +df_book['Time'] = df_book.index.get_level_values(0) +df_book['Price'] = df_book.index.get_level_values(1) + +# Use this to restrict plotting to a certain range of prices. +#df_book = df_book.loc[(df_book.Price > 98500) & (df_book.Price < 101500)] + +# Use this to pad price strings for appearance. +#df_book.Price = df_book.Price.map(str) +#df_book.Price = df_book.Price.str.pad(6, side='right', fillchar='0') + +df_book.set_index(['Time', 'Price'], inplace=True) + +# This section makes a 2-D histogram (time vs price, color == volume) +unstacked = df_book.unstack(1) +if not TIME_STEPS: unstacked.index = unstacked.index.time +unstacked.columns = unstacked.columns.droplevel(0) + +with sns.axes_style("white"): + ax = sns.heatmap(unstacked, cmap='seismic', mask=unstacked < min_volume, vmin=min_volume, cbar_kws={'label': 'Shares Available'}, center=0, antialiased = False) + +ax.set(xlabel='Quoted Price', ylabel='Quote Time') + +# Plot layers of best bid, best ask, and midpoint in special colors. +#best_bids = unstacked[unstacked == best_bid_value].copy().notnull() +midpoints = unstacked[unstacked == midpoint_value].copy().notnull() +#best_asks = unstacked[unstacked == best_ask_value].copy().notnull() + +if SHOW_BEST: + #sns.heatmap(best_bids, cmap=['xkcd:hot purple'], mask=~best_bids, cbar=False, ax=ax) + #sns.heatmap(midpoints, cmap=['xkcd:hot green'], mask=~midpoints, cbar=False, ax=ax) + sns.heatmap(midpoints, cmap=['black'], mask=~midpoints, cbar=False, ax=ax) + #sns.heatmap(best_asks, cmap=['xkcd:hot pink'], mask=~best_asks, cbar=False, ax=ax) + +plt.tight_layout() + +# This section plots the historical order book (no depth available). +if PLOT_HISTORICAL: + fig = plt.figure(figsize=(12,9)) + + df_hist = df_hist.between_time('9:30', '16:00') + #df_hist = df_hist.between_time('10:00', '10:05') + df_hist = df_hist.resample('1S').last().ffill() + + df = prepare_histogram(df_hist) + df.index = df.index.time + + # There's no order book depth anyway, so make all bids the same volume + # and all asks the same volume, so they're easy to see. + df[df > 0] = 1 + df[df < 0] = -1 + + ax = sns.heatmap(df, cmap=sns.color_palette("coolwarm", 7), cbar_kws={'label': 'Shares Available'}, center=0) + ax.set(xlabel='Quoted Price', ylabel='Quote Time') + + plt.tight_layout() + +# Show all the plots. +plt.show() + diff --git a/cli/dump.py b/cli/dump.py new file mode 100644 index 000000000..c3c9ae1f1 --- /dev/null +++ b/cli/dump.py @@ -0,0 +1,23 @@ +import pandas as pd +import sys + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 500000 +pd.options.display.max_colwidth = 200 + +if len(sys.argv) < 2: + print ("Usage: python dump.py [List of Event Types]") + sys.exit() + +file = sys.argv[1] + +df = pd.read_pickle(file, compression='bz2') + +if len(sys.argv) > 2: + events = sys.argv[2:] + event = "|".join(events) + df = df[df['EventType'].str.contains(event)] + +print(df) + diff --git a/cli/event_midpoint.py b/cli/event_midpoint.py new file mode 100644 index 000000000..8f36b99ee --- /dev/null +++ b/cli/event_midpoint.py @@ -0,0 +1,203 @@ +import ast +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +import pandas as pd +import os +import re +import sys + +from joblib import Memory + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 1000 +pd.options.display.max_colwidth = 200 + +# Initialize a persistent memcache. +mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) + + +# Linewidth for plots. +LW = 2 + +# Rolling window for smoothing. +#SIM_WINDOW = 250 +SIM_WINDOW = 1 + + +# Used to read and cache simulated quotes. +# Doesn't actually pay attention to symbols yet. +#@mem_sim.cache +def read_simulated_quotes (file, symbol): + print ("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + df['Timestamp'] = df.index + + # Keep only the last bid and last ask event at each timestamp. + df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') + + del df['Timestamp'] + + df_bid = df[df['EventType'] == 'BEST_BID'].copy() + df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + + if len(df) <= 0: + print ("There appear to be no simulated quotes.") + sys.exit() + + df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] + df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] + df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] + df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + + df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') + df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + + df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') + df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + + df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') + df['BEST_BID'] = df['BEST_BID'].ffill().bfill() + df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() + df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() + df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + + df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + + return df + + + +# Main program starts here. + +if len(sys.argv) < 3: + print ("Usage: python mean_std_event.py ") + sys.exit() + +# TODO: only really works for one symbol right now. + +symbol = sys.argv[1] +sim_files = sys.argv[2:] + +fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) + + + +# Plot each impact simulation with the baseline subtracted (i.e. residual effect). +i = 1 +legend = [] +#legend = ['baseline'] + +# Events is now a dictionary of event lists (key == greed parameter). +events = {} + +first_date = None +impact_time = 200 + +for sim_file in sim_files: + + # Skip baseline files. + if 'baseline' in sim_file: continue + + if 'greed' in os.path.dirname(sim_file): + # Group plots by greed parameter. + m = re.search("greed(\d\d\d)_", sim_file) + g = m.group(1) + else: + g = 'greed' + + baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) + print ("Visualizing simulation baseline from {}".format(baseline_file)) + + df_baseline = read_simulated_quotes(baseline_file, symbol) + + # Read the event file. + print ("Visualizing simulated {} from {}".format(symbol, sim_file)) + + df_sim = read_simulated_quotes(sim_file, symbol) + + plt.rcParams.update({'font.size': 12}) + + # Given nanosecond ("time step") data, we can just force everything to + # fill out an integer index of nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + + df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] + df_baseline = df_baseline.reindex(rng,method='ffill') + df_baseline = df_baseline.reset_index(drop=True) + + df_sim = df_sim[~df_sim.index.duplicated(keep='last')] + df_sim = df_sim.reindex(rng,method='ffill') + df_sim = df_sim.reset_index(drop=True) + + # Absolute price difference. + #s = df_sim['MIDPOINT'] - df_baseline['MIDPOINT'] + + # Relative price difference. + s = (df_sim['MIDPOINT'] / df_baseline['MIDPOINT']) - 1.0 + + s = s.rolling(window=SIM_WINDOW).mean() + s.name = sim_file + + if g not in events: events[g] = [] + + events[g].append(s.copy()) + + i += 1 + + +# Now have a list of series (each an event) that are time-aligned. BUT the data is +# still aperiodic, so they may not have the exact same index timestamps. + +legend = [] + +for g in events: + df = pd.DataFrame() + legend.append("greed = " + str(g)) + + for s in events[g]: + print ("Joining {}".format(s.name)) + df = df.join(s, how='outer') + + df.dropna(how='all', inplace=True) + df = df.ffill().bfill() + + # Smooth after combining means at each instant-of-trade. + #df.mean(axis=1).rolling(window=250).mean().plot(grid=True, linewidth=LW, ax=ax) + + # No additional smoothing. + m = df.mean(axis=1) + s = df.std(axis=1) + + # Plot mean and std. + m.plot(grid=True, linewidth=LW, ax=ax, fontsize=12) + + # Fill std region? + #ax.fill_between(m.index, m-s, m+s, alpha=0.2) + + +# Do the rest a single time for the whole plot. + +# If we need a vertical "time of event" line... +#ax.axvline(x=100, color='0.5', linestyle='--', linewidth=2) + +# Absolute or relative time labels... +ax.set_xticklabels(['0','10000','20000','30000','40000','50000','60000','70000']) +#ax.set_xticklabels(['T-30', 'T-20', 'T-10', 'T', 'T+10', 'T+20', 'T+30']) + +ax.legend(legend) + +# Force y axis limits to make multiple plots line up exactly... +#ax.set_ylim(-0.0065,0.0010) + +# If an in-figure super title is required... +#plt.suptitle('Impact Event Study: {}'.format(symbol)) + +ax.set_xlabel('Relative Time (ms)', fontsize=12) +ax.set_ylabel('Baseline-Relative Price', fontsize=12) + +#plt.savefig('IABS_SELL_100_multi_size.png') + +plt.show() + diff --git a/cli/event_ticker.py b/cli/event_ticker.py new file mode 100644 index 000000000..dd7a7c918 --- /dev/null +++ b/cli/event_ticker.py @@ -0,0 +1,152 @@ +import ast +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +import pandas as pd +import os +import re +import sys + +from joblib import Memory + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 1000 +pd.options.display.max_colwidth = 200 + +# Initialize a persistent memcache. +mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) + + +# Linewidth for plots. +LW = 2 + +# Rolling window for smoothing. +#SIM_WINDOW = 250 +SIM_WINDOW = 1 + + +# Used to read and cache simulated trades. +# Doesn't actually pay attention to symbols yet. +#@mem_sim.cache +def read_simulated_trades (file, symbol): + #print ("Simulated trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + df = df[df['EventType'] == 'LAST_TRADE'] + + if len(df) <= 0: + print ("There appear to be no simulated trades.") + sys.exit() + + df['PRICE'] = [y for x,y in df['Event'].str.split(',')] + df['SIZE'] = [x for x,y in df['Event'].str.split(',')] + + df['PRICE'] = df['PRICE'].str.replace('$','').astype('float64') + df['SIZE'] = df['SIZE'].astype('float64') + + return df + + +# Main program starts here. + +if len(sys.argv) < 3: + print ("Usage: python mean_std_event.py ") + sys.exit() + +# TODO: only really works for one symbol right now. + +symbol = sys.argv[1] +sim_files = sys.argv[2:] + +fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) + + + +# Plot each impact simulation with the baseline subtracted (i.e. residual effect). +i = 1 +legend = [] +#legend = ['baseline'] + +events = [] + +first_date = None +impact_time = 200 + +for sim_file in sim_files: + + # Skip baseline files. + if 'baseline' in sim_file: continue + + baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) + print ("Visualizing simulation baseline from {}".format(baseline_file)) + + df_baseline = read_simulated_trades(baseline_file, symbol) + + # Read the event file. + print ("Visualizing simulated {} from {}".format(symbol, sim_file)) + + df_sim = read_simulated_trades(sim_file, symbol) + + plt.rcParams.update({'font.size': 12}) + + # Given nanosecond ("time step") data, we can just force everything to + # fill out an integer index of nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + + df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] + df_baseline = df_baseline.reindex(rng,method='ffill') + df_baseline = df_baseline.reset_index(drop=True) + + df_sim = df_sim[~df_sim.index.duplicated(keep='last')] + df_sim = df_sim.reindex(rng,method='ffill') + df_sim = df_sim.reset_index(drop=True) + + s = df_sim['PRICE'] - df_baseline['PRICE'] + s = s.rolling(window=SIM_WINDOW).mean() + + s.name = sim_file + events.append(s.copy()) + + i += 1 + + +# Now have a list of series (each an event) that are time-aligned. +df = pd.DataFrame() + +for s in events: + print ("Joining {}".format(s.name)) + df = df.join(s, how='outer') + +df.dropna(how='all', inplace=True) +df = df.ffill().bfill() + +# Smooth after combining means at each instant-of-trade. +#df.mean(axis=1).rolling(window=250).mean().plot(grid=True, linewidth=LW, ax=ax) + +# No additional smoothing. +m = df.mean(axis=1) +s = df.std(axis=1) + +# Plot mean and std. +m.plot(grid=True, linewidth=LW, ax=ax) + +# Shade the stdev region? +ax.fill_between(m.index, m-s, m+s, alpha=0.2) + +# Override prettier axis ticks... +#ax.set_xticklabels(['T-30', 'T-20', 'T-10', 'T', 'T+10', 'T+20', 'T+30']) + +# Force y axis limits to match some other plot. +#ax.set_ylim(-0.1, 0.5) + +# Set a super title if required. +plt.suptitle('Impact Event Study: {}'.format(symbol)) + +ax.set_xlabel('Relative Time') +ax.set_ylabel('Baseline-Relative Price') + +#plt.savefig('background_{}.png'.format(b)) + +plt.show() + + diff --git a/cli/midpoint_plot.py b/cli/midpoint_plot.py new file mode 100644 index 000000000..033521397 --- /dev/null +++ b/cli/midpoint_plot.py @@ -0,0 +1,163 @@ +import ast +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +import pandas as pd +import os +import sys + +from joblib import Memory + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 1000 +pd.options.display.max_colwidth = 200 + +# Initialize a persistent memcache. +mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0) +mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) + + +PRINT_BASELINE = True +PRINT_DELTA_ONLY = True + +BETWEEN_START = pd.to_datetime('09:30').time() +BETWEEN_END = pd.to_datetime('09:30:00.000001').time() + +# Linewidth for plots. +LW = 2 + +# Used to read and cache simulated quotes. +# Doesn't actually pay attention to symbols yet. +#@mem_sim.cache +def read_simulated_quotes (file, symbol): + print ("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + df['Timestamp'] = df.index + + # Keep only the last bid and last ask event at each timestamp. + df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') + + del df['Timestamp'] + + df_bid = df[df['EventType'] == 'BEST_BID'].copy() + df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + + if len(df) <= 0: + print ("There appear to be no simulated quotes.") + sys.exit() + + df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] + df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] + df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] + df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + + df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') + df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + + df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') + df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + + df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') + df['BEST_BID'] = df['BEST_BID'].ffill().bfill() + df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() + df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() + df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + + df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + + return df + + + +# Main program starts here. + +if len(sys.argv) < 3: + print ("Usage: python midpoint_plot.py ") + sys.exit() + +# TODO: only really works for one symbol right now. + +symbol = sys.argv[1] +sim_file = sys.argv[2] + +print ("Visualizing simulated {} from {}".format(symbol, sim_file)) +df_sim = read_simulated_quotes(sim_file, symbol) + +if PRINT_BASELINE: + baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) + print (baseline_file) + df_baseline = read_simulated_quotes(baseline_file, symbol) + +plt.rcParams.update({'font.size': 12}) + + + +# Use to restrict time to plot. +df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) + +if PRINT_BASELINE: + df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) + +fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) +axes = [ax] + +# For smoothing... +#hist_window = 100 +#sim_window = 100 + +hist_window = 1 +sim_window = 1 + +if PRINT_BASELINE: + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + + df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] + df_baseline = df_baseline.reindex(rng,method='ffill') + df_baseline = df_baseline.reset_index(drop=True) + + df_sim = df_sim[~df_sim.index.duplicated(keep='last')] + df_sim = df_sim.reindex(rng,method='ffill') + df_sim = df_sim.reset_index(drop=True) + + # Print both separately. + if PRINT_DELTA_ONLY: + # Print the difference as a single series. + df_diff = df_sim['MIDPOINT'] - df_baseline['MIDPOINT'] + + # Smoothing. + df_diff = df_diff.rolling(window=10).mean() + + df_diff.plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) + + axes[0].legend(['Bid-ask Midpoint Delta']) + else: + df_baseline['MIDPOINT'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) + df_sim['MIDPOINT'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + + axes[0].legend(['Baseline', 'With Impact']) + +else: + #df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + df_sim = df_sim[~df_sim.index.duplicated(keep='last')] + df_sim = df_sim.reindex(rng,method='ffill') + df_sim = df_sim.reset_index(drop=True) + df_sim['MIDPOINT'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(['Simulated']) + + +plt.suptitle('Bid-Ask Midpoint: {}'.format(symbol)) + +axes[0].set_ylabel('Quote Price') +axes[0].set_xlabel('Quote Time') + +#plt.savefig('background_{}.png'.format(b)) + +plt.show() + diff --git a/cli/profile.py b/cli/profile.py new file mode 100644 index 000000000..c13c1046c --- /dev/null +++ b/cli/profile.py @@ -0,0 +1,16 @@ +import pstats +import sys + +if len(sys.argv) < 2: + print ('Usage: python cli/profile.py ') + sys.exit() + +field = sys.argv[1] + +if field not in ['time', 'cumulative', 'tottime', 'cumtime', 'ncalls']: + print ('Sort by field must be one of: time, cumulative, tottime, cumtime, ncalls.') + sys.exit() + +p = pstats.Stats('runstats.prof') +p.strip_dirs().sort_stats(field).print_stats(50) + diff --git a/cli/quote_plot.py b/cli/quote_plot.py new file mode 100644 index 000000000..20180a444 --- /dev/null +++ b/cli/quote_plot.py @@ -0,0 +1,108 @@ +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +import pandas as pd +import sys + +from joblib import Memory + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 1000 +pd.options.display.max_colwidth = 200 + +# Initialize a persistent memcache. +mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) + + +# Used to read and cache simulated quotes (best bid/ask). +# Doesn't actually pay attention to symbols yet. +#@mem_sim.cache +def read_simulated_quotes (file, symbol): + print ("Simulated quotes were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + df['Timestamp'] = df.index + + # Keep only the last bid and last ask event at each timestamp. + df = df.drop_duplicates(subset=['Timestamp','EventType'], keep='last') + + del df['Timestamp'] + + df_bid = df[df['EventType'] == 'BEST_BID'].copy() + df_ask = df[df['EventType'] == 'BEST_ASK'].copy() + + if len(df) <= 0: + print ("There appear to be no simulated quotes.") + sys.exit() + + df_bid['BEST_BID'] = [b for s,b,bv in df_bid['Event'].str.split(',')] + df_bid['BEST_BID_VOL'] = [bv for s,b,bv in df_bid['Event'].str.split(',')] + df_ask['BEST_ASK'] = [a for s,a,av in df_ask['Event'].str.split(',')] + df_ask['BEST_ASK_VOL'] = [av for s,a,av in df_ask['Event'].str.split(',')] + + df_bid['BEST_BID'] = df_bid['BEST_BID'].str.replace('$','').astype('float64') + df_ask['BEST_ASK'] = df_ask['BEST_ASK'].str.replace('$','').astype('float64') + + df_bid['BEST_BID_VOL'] = df_bid['BEST_BID_VOL'].astype('float64') + df_ask['BEST_ASK_VOL'] = df_ask['BEST_ASK_VOL'].astype('float64') + + df = df_bid.join(df_ask, how='outer', lsuffix='.bid', rsuffix='.ask') + df['BEST_BID'] = df['BEST_BID'].ffill().bfill() + df['BEST_ASK'] = df['BEST_ASK'].ffill().bfill() + df['BEST_BID_VOL'] = df['BEST_BID_VOL'].ffill().bfill() + df['BEST_ASK_VOL'] = df['BEST_ASK_VOL'].ffill().bfill() + + df['MIDPOINT'] = (df['BEST_BID'] + df['BEST_ASK']) / 2.0 + + return df + + +# Main program starts here. + +if len(sys.argv) < 2: + print ("Usage: python ticker_plot.py ") + sys.exit() + +# TODO: only really works for one symbol right now. + +symbol = sys.argv[1] +sim_file = sys.argv[2] + +print ("Visualizing {} from {}".format(symbol, sim_file)) + +plt.rcParams.update({'font.size': 12}) + +df_sim = read_simulated_quotes(sim_file, symbol) + +fig,axes = plt.subplots(figsize=(12,9), nrows=2, ncols=1) + +# Crop figures to desired times and price scales. +#df_hist = df_hist.between_time('9:46', '13:30') +#df_sim = df_sim.between_time('10:00:00', '10:00:30') + +# For nanosecond experiments, turn it into int index. Pandas gets weird if all +# the times vary only by a few nanoseconds. +df_sim = df_sim.reset_index(drop=True) + +ax = df_sim['BEST_BID'].plot(color='C0', grid=True, linewidth=1, ax=axes[0]) +df_sim['BEST_ASK'].plot(color='C1', grid=True, linewidth=1, ax=axes[0]) +#df_sim['MIDPOINT'].plot(color='C2', grid=True, linewidth=1, ax=axes[0]) + +df_sim['BEST_BID_VOL'].plot(color='C3', linewidth=1, ax=axes[1]) +df_sim['BEST_ASK_VOL'].plot(color='C4', linewidth=1, ax=axes[1]) + +axes[0].legend(['Best Bid', 'Best Ask', 'Midpoint']) +axes[1].legend(['Best Bid Vol', 'Best Ask Vol']) + +plt.suptitle('Best Bid/Ask: {}'.format(symbol)) + +axes[0].set_ylabel('Quote Price') +axes[1].set_xlabel('Quote Time') +axes[1].set_ylabel('Quote Volume') + +axes[0].get_xaxis().set_visible(False) + +#plt.savefig('background_{}.png'.format(b)) + +plt.show() + diff --git a/cli/read_agent_logs.py b/cli/read_agent_logs.py new file mode 100644 index 000000000..134729f9a --- /dev/null +++ b/cli/read_agent_logs.py @@ -0,0 +1,60 @@ +import os +import pandas as pd +import sys + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 500000 +pd.options.display.max_colwidth = 200 + +if len(sys.argv) < 2: + print ("Usage: python dump.py ") + sys.exit() + + +# read_agent_logs.py takes a log directory, reads all agent log files, and produces a summary of +# desired totals or statistics by strategy (type + parameter settings). + + +# If more than one directory is given, the program aggregates across all of them. + +log_dirs = sys.argv[1:] +stats = [] + +dir_count = 0 +file_count = 0 + +for log_dir in log_dirs: + if dir_count % 100 == 0: print ("Completed {} directories".format(dir_count)) + dir_count += 1 + for file in os.listdir(log_dir): + df = pd.read_pickle(os.path.join(log_dir,file), compression='bz2') + + events = [ 'AGENT_TYPE', 'STARTING_CASH', 'ENDING_CASH', 'FINAL_CASH_POSITION', 'FINAL_VALUATION' ] + event = "|".join(events) + df = df[df['EventType'].str.contains(event)] + + at = df.loc[df['EventType'] == 'AGENT_TYPE', 'Event'][0] + + if 'Exchange' in at: + # There may be different fields to look at later on. + continue + + file_count += 1 + + sc = df.loc[df['EventType'] == 'STARTING_CASH', 'Event'][0] + ec = df.loc[df['EventType'] == 'ENDING_CASH', 'Event'][0] + fcp = df.loc[df['EventType'] == 'FINAL_CASH_POSITION', 'Event'][0] + fv = df.loc[df['EventType'] == 'FINAL_VALUATION', 'Event'][0] + + ret = ec - sc + surp = fcp - sc + fv + + stats.append({ 'AgentType' : at, 'Return' : ret, 'Surplus' : surp }) + + +df_stats = pd.DataFrame(stats) + +print (df_stats.groupby('AgentType').mean()) + +print ("\nRead {} files in {} log directories.".format(file_count, dir_count)) diff --git a/cli/stats.py b/cli/stats.py new file mode 100644 index 000000000..8df158194 --- /dev/null +++ b/cli/stats.py @@ -0,0 +1,82 @@ +import os +import pandas as pd +import sys + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 500000 +pd.options.display.max_colwidth = 200 + +if len(sys.argv) < 2: + print ("Usage: python dump.py ") + sys.exit() + + +# stats.py takes one or more log directories, reads the summary log files, and produces a summary of +# the agent surpluses and returns by strategy (type + parameter settings). + + +# If more than one directory is given, the program aggregates across all of them. + +log_dirs = sys.argv[1:] +agents = {} +games = [] +stats = [] + +dir_count = 0 + +for log_dir in log_dirs: + if dir_count % 100 == 0: print ("Completed {} directories".format(dir_count)) + dir_count += 1 + for file in os.listdir(log_dir): + if 'summary' not in file: continue + + df = pd.read_pickle(os.path.join(log_dir,file), compression='bz2') + + events = [ 'STARTING_CASH', 'ENDING_CASH', 'FINAL_CASH_POSITION', 'FINAL_VALUATION' ] + event = "|".join(events) + df = df[df['EventType'].str.contains(event)] + + for x in df.itertuples(): + id = x.AgentID + if id not in agents: + agents[id] = { 'AGENT_TYPE' : x.AgentStrategy } + agents[id][x.EventType] = x.Event + + game_ret = 0 + game_surp = 0 + + for id, agent in agents.items(): + at = agent['AGENT_TYPE'] + + if 'Impact' in at: continue + + sc = agent['STARTING_CASH'] + ec = agent['ENDING_CASH'] + fcp = agent['FINAL_CASH_POSITION'] + fv = agent['FINAL_VALUATION'] + + ret = ec - sc + surp = fcp - sc + fv + + game_ret += ret + game_surp += surp + + stats.append({ 'AgentType' : at, 'Return' : ret, 'Surplus' : surp }) + + games.append({ 'GameReturn' : game_ret, 'GameSurplus' : game_surp }) + + +df_stats = pd.DataFrame(stats) +df_game = pd.DataFrame(games) + +print ("Agent Mean") +print (df_stats.groupby('AgentType').mean()) +print ("Agent Std") +print (df_stats.groupby('AgentType').std()) +print ("Game Mean") +print (df_game.mean()) +print ("Game Std") +print (df_game.std()) + +print ("\nRead summary files in {} log directories.".format(dir_count)) diff --git a/cli/ticker_plot.py b/cli/ticker_plot.py new file mode 100644 index 000000000..f44319b90 --- /dev/null +++ b/cli/ticker_plot.py @@ -0,0 +1,214 @@ +import ast +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +import pandas as pd +import os +import sys + +from joblib import Memory + +# Auto-detect terminal width. +pd.options.display.width = None +pd.options.display.max_rows = 1000 +pd.options.display.max_colwidth = 200 + +# Initialize a persistent memcache. +mem_hist = Memory(cachedir='./.cached_plot_hist', verbose=0) +mem_sim = Memory(cachedir='./.cached_plot_sim', verbose=0) + + +PRINT_HISTORICAL = False +PRINT_BASELINE = True +PRINT_VOLUME = False + +BETWEEN_START = pd.to_datetime('09:30').time() +BETWEEN_END = pd.to_datetime('09:30:00.000001').time() + +# Linewidth for plots. +LW = 2 + +# Used to read and cache real historical trades. +#@mem_hist.cache +def read_historical_trades (file, symbol): + print ("Historical trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + + df = df.loc[symbol] + df = df.between_time('9:30', '16:00') + + return df + + +# Used to read and cache simulated trades. +# Doesn't actually pay attention to symbols yet. +#@mem_sim.cache +def read_simulated_trades (file, symbol): + print ("Simulated trades were not cached. This will take a minute.") + df = pd.read_pickle(file, compression='bz2') + df = df[df['EventType'] == 'LAST_TRADE'] + + if len(df) <= 0: + print ("There appear to be no simulated trades.") + sys.exit() + + df['PRICE'] = [y for x,y in df['Event'].str.split(',')] + df['SIZE'] = [x for x,y in df['Event'].str.split(',')] + + df['PRICE'] = df['PRICE'].str.replace('$','').astype('float64') + df['SIZE'] = df['SIZE'].astype('float64') + + return df + + +# Main program starts here. + +if len(sys.argv) < 3: + print ("Usage: python ticker_plot.py [agent trade log]") + sys.exit() + +# TODO: only really works for one symbol right now. + +symbol = sys.argv[1] +sim_file = sys.argv[2] + +agent_log = None +if len(sys.argv) >= 4: agent_log = sys.argv[3] + +print ("Visualizing simulated {} from {}".format(symbol, sim_file)) + +df_sim = read_simulated_trades(sim_file, symbol) + +if PRINT_BASELINE: + baseline_file = os.path.join(os.path.dirname(sim_file) + '_baseline', os.path.basename(sim_file)) + print (baseline_file) + df_baseline = read_simulated_trades(baseline_file, symbol) + +# Take the date from the first index and use that to pick the correct historical date for comparison. +if PRINT_HISTORICAL: + hist_date = pd.to_datetime(df_sim.index[0]) + hist_year = hist_date.strftime('%Y') + hist_date = hist_date.strftime('%Y%m%d') + hist_file = "/nethome/cb107/emh/data/trades/trades_{}/ct{}_{}.bgz".format(hist_year, 'm' if int(hist_year) > 2014 else '', hist_date) + + print ("Visualizing historical {} from {}".format(symbol, hist_file)) + df_hist = read_historical_trades(hist_file, symbol) + +plt.rcParams.update({'font.size': 12}) + + + +# Use to restrict time to plot. +df_sim = df_sim.between_time(BETWEEN_START, BETWEEN_END) +print ("Total simulated volume:", df_sim['SIZE'].sum()) + +if PRINT_BASELINE: + df_baseline = df_baseline.between_time(BETWEEN_START, BETWEEN_END) + print ("Total baseline volume:", df_baseline['SIZE'].sum()) + +if PRINT_VOLUME: + fig,axes = plt.subplots(figsize=(12,9), nrows=2, ncols=1) +else: + fig,ax = plt.subplots(figsize=(12,9), nrows=1, ncols=1) + axes = [ax] + +# Crop figures to desired times and price scales. +#df_hist = df_hist.between_time('9:46', '13:30') + +# For smoothing... +#hist_window = 100 +#sim_window = 100 + +hist_window = 1 +sim_window = 1 + +if PRINT_HISTORICAL: + df_hist = df_hist.between_time(BETWEEN_START, BETWEEN_END) + print ("Total historical volume:", df_hist['SIZE'].sum()) + + df_hist['PRICE'] = df_hist['PRICE'].rolling(window=hist_window).mean() + df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + + df_hist['PRICE'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) + df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(['Historical', 'Simulated']) + + if PRINT_VOLUME: + df_hist['SIZE'].plot(color='C0', linewidth=LW, ax=axes[1]) + df_sim['SIZE'].plot(color='C1', linewidth=LW, alpha=0.9, ax=axes[1]) + axes[1].legend(['Historical Vol', 'Simulated Vol']) +elif PRINT_BASELINE: + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + + df_baseline = df_baseline[~df_baseline.index.duplicated(keep='last')] + df_baseline = df_baseline.reindex(rng,method='ffill') + df_baseline = df_baseline.reset_index(drop=True) + + df_sim = df_sim[~df_sim.index.duplicated(keep='last')] + df_sim = df_sim.reindex(rng,method='ffill') + df_sim = df_sim.reset_index(drop=True) + + df_baseline['PRICE'].plot(color='C0', grid=True, linewidth=LW, ax=axes[0]) + df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + + axes[0].legend(['Baseline', 'With Impact']) + +else: + #df_sim['PRICE'] = df_sim['PRICE'].rolling(window=sim_window).mean() + + # For nanosecond experiments, turn it into int index. Pandas gets weird if all + # the times vary only by a few nanoseconds. + rng = pd.date_range(start=df_sim.index[0], end=df_sim.index[-1], freq='1N') + df_sim = df_sim[~df_sim.index.duplicated(keep='last')] + df_sim = df_sim.reindex(rng,method='ffill') + df_sim = df_sim.reset_index(drop=True) + df_sim['PRICE'].plot(color='C1', grid=True, linewidth=LW, alpha=0.9, ax=axes[0]) + axes[0].legend(['Simulated']) + + if PRINT_VOLUME: + df_sim['SIZE'].plot(color='C1', linewidth=LW, alpha=0.9, ax=axes[1]) + axes[1].legend(['Simulated Vol']) + +# Superimpose a particular trading agent's trade decisions on top of the ticker +# plot to make it easy to visually see if it is making sensible choices. +if agent_log: + df_agent = pd.read_pickle(agent_log, compression='bz2') + df_agent = df_agent.between_time(BETWEEN_START, BETWEEN_END) + df_agent = df_agent[df_agent.EventType == 'HOLDINGS_UPDATED'] + + first = True + + for idx in df_agent.index: + event = df_agent.loc[idx,'Event'] + if symbol in event: + shares = event[symbol] + if shares > 0: + print ("LONG at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color='g') + elif shares < 0: + print ("SHORT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color='r') + else: + print ("EXIT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color='k') + else: + print ("EXIT at {}".format(idx)) + axes[0].axvline(x=idx, linewidth=LW, color='k') + +plt.suptitle('Execution Price/Volume: {}'.format(symbol)) + +axes[0].set_ylabel('Executed Price') + +if PRINT_VOLUME: + axes[1].set_xlabel('Execution Time') + axes[1].set_ylabel('Executed Volume') + axes[0].get_xaxis().set_visible(False) +else: + axes[0].set_xlabel('Execution Time') + +#plt.savefig('background_{}.png'.format(b)) + +plt.show() + diff --git a/config/impact.py b/config/impact.py new file mode 100644 index 000000000..8e2258b18 --- /dev/null +++ b/config/impact.py @@ -0,0 +1,301 @@ +from Kernel import Kernel +from agent.ExchangeAgent import ExchangeAgent +from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent +from agent.ImpactAgent import ImpactAgent +from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent +from util.order import LimitOrder +from util.oracle.MeanRevertingOracle import MeanRevertingOracle +from util import util + +import datetime as dt +import numpy as np +import pandas as pd +import sys + + +DATA_DIR = "~/data" + + +# Some config files require additional command line parameters to easily +# control agent or simulation hyperparameters during coarse parallelization. +import argparse + +parser = argparse.ArgumentParser(description='Detailed options for momentum config.') +parser.add_argument('-b', '--book_freq', default=None, + help='Frequency at which to archive order book for visualization') +parser.add_argument('-c', '--config', required=True, + help='Name of config file to execute') +parser.add_argument('-g', '--greed', type=float, default=0.25, + help='Impact agent greed') +parser.add_argument('-i', '--impact', action='store_false', + help='Do not actually fire an impact trade.') +parser.add_argument('-l', '--log_dir', default=None, + help='Log directory name (default: unix timestamp at program start)') +parser.add_argument('-n', '--obs_noise', type=float, default=1000000, + help='Observation noise variance for zero intelligence agents (sigma^2_n)') +parser.add_argument('-r', '--shock_variance', type=float, default=500000, + help='Shock variance for mean reversion process (sigma^2_s)') +parser.add_argument('-o', '--log_orders', action='store_true', + help='Log every order-related action by every agent.') +parser.add_argument('-s', '--seed', type=int, default=None, + help='numpy.random.seed() for simulation') +parser.add_argument('-v', '--verbose', action='store_true', + help='Maximum verbosity!') +parser.add_argument('--config_help', action='store_true', + help='Print argument options for this config file') + +args, remaining_args = parser.parse_known_args() + +if args.config_help: + parser.print_help() + sys.exit() + +# Historical date to simulate. Required even if not relevant. +historical_date = pd.to_datetime('2014-01-28') + +# Requested log directory. +log_dir = args.log_dir + +# Requested order book snapshot archive frequency. +book_freq = args.book_freq + +# Observation noise variance for zero intelligence agents. +sigma_n = args.obs_noise + +# Shock variance of mean reversion process. +sigma_s = args.shock_variance + +# Impact agent greed. +greed = args.greed + +# Should the impact agent actually trade? +impact = args.impact + +# Random seed specification on the command line. Default: None (by clock). +# If none, we select one via a specific random method and pass it to seed() +# so we can record it for future use. (You cannot reasonably obtain the +# automatically generated seed when seed() is called without a parameter.) + +# Note that this seed is used to (1) make any random decisions within this +# config file itself and (2) to generate random number seeds for the +# (separate) Random objects given to each agent. This ensure that when +# the agent population is appended, prior agents will continue to behave +# in the same manner save for influences by the new agents. (i.e. all prior +# agents still have their own separate PRNG sequence, and it is the same as +# before) + +seed = args.seed +if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) +np.random.seed(seed) + +# Config parameter that causes util.util.print to suppress most output. +# Also suppresses formatting of limit orders (which is time consuming). +util.silent_mode = not args.verbose +LimitOrder.silent_mode = not args.verbose + +# Config parameter that causes every order-related action to be logged by +# every agent. Activate only when really needed as there is a significant +# time penalty to all that object serialization! +log_orders = args.log_orders + + +print ("Silent mode: {}".format(util.silent_mode)) +print ("Logging orders: {}".format(log_orders)) +print ("Book freq: {}".format(book_freq)) +print ("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) +print ("ImpactAgent greed: {:0.2f}".format(greed)) +print ("ImpactAgent firing: {}".format(impact)) +print ("Shock variance: {:0.4f}".format(sigma_s)) +print ("Configuration seed: {}\n".format(seed)) + + + +# Since the simulator often pulls historical data, we use a real-world +# nanosecond timestamp (pandas.Timestamp) for our discrete time "steps", +# which are considered to be nanoseconds. For other (or abstract) time +# units, one can either configure the Timestamp interval, or simply +# interpret the nanoseconds as something else. + +# What is the earliest available time for an agent to act during the +# simulation? +midnight = historical_date +kernelStartTime = midnight + +# When should the Kernel shut down? (This should be after market close.) +# Here we go for 5 PM the same day. +kernelStopTime = midnight + pd.to_timedelta('17:00:00') + +# This will configure the kernel with a default computation delay +# (time penalty) for each agent's wakeup and recvMsg. An agent +# can change this at any time for itself. (nanoseconds) +defaultComputationDelay = 0 # no delay for this config + + +# IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: +# 1. be unique +# 2. equal its index in the agents list +# This is to avoid having to call an extra getAgentListIndexByID() +# in the kernel every single time an agent must be referenced. + + +# This is a list of symbols the exchange should trade. It can handle any number. +# It keeps a separate order book for each symbol. The example data includes +# only IBM. This config uses generated data, so the symbol doesn't really matter. + +# If shock variance must differ for each traded symbol, it can be overridden here. +symbols = { 'IBM' : { 'r_bar' : 100000, 'kappa' : 0.05, 'sigma_s' : sigma_s } } + + + +### Configure the Kernel. +kernel = Kernel("Base Kernel", random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) + + + +### Configure the agents. When conducting "agent of change" experiments, the +### new agents should be added at the END only. +agent_count = 0 +agents = [] +agent_types = [] + + +### Configure an exchange agent. + +# Let's open the exchange at 9:30 AM. +mkt_open = midnight + pd.to_timedelta('09:30:00') + +# And close it at 9:30:00.000001 (i.e. 1,000 nanoseconds or "time steps") +mkt_close = midnight + pd.to_timedelta('09:30:00.000001') + + +# Configure an appropriate oracle for all traded stocks. +# All agents requiring the same type of Oracle will use the same oracle instance. +oracle = MeanRevertingOracle(mkt_open, mkt_close, symbols) + + +# Create the exchange. +num_exchanges = 1 +agents.extend([ ExchangeAgent(j, "Exchange Agent {}".format(j), "ExchangeAgent", mkt_open, mkt_close, [s for s in symbols], log_orders=log_orders, book_freq=book_freq, pipeline_delay = 0, computation_delay = 0, stream_history = 10, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32))) + for j in range(agent_count, agent_count + num_exchanges) ]) +agent_types.extend(["ExchangeAgent" for j in range(num_exchanges)]) +agent_count += num_exchanges + + + +### Configure some zero intelligence agents. + +# Cash in this simulator is always in CENTS. +starting_cash = 10000000 + +# Here are the zero intelligence agents. +symbol = 'IBM' +s = symbols[symbol] + +# Tuples are: (# agents, R_min, R_max, eta, L). L for HBL only. + +# Some configs for ZI agents only (among seven parameter settings). + +# 4 agents +#zi = [ (1, 0, 250, 1), (1, 0, 500, 1), (1, 0, 1000, 0.8), (1, 0, 1000, 1), (0, 0, 2000, 0.8), (0, 250, 500, 0.8), (0, 250, 500, 1) ] +#hbl = [] + +# 28 agents +#zi = [ (4, 0, 250, 1), (4, 0, 500, 1), (4, 0, 1000, 0.8), (4, 0, 1000, 1), (4, 0, 2000, 0.8), (4, 250, 500, 0.8), (4, 250, 500, 1) ] +#hbl = [] + +# 65 agents +#zi = [ (10, 0, 250, 1), (10, 0, 500, 1), (9, 0, 1000, 0.8), (9, 0, 1000, 1), (9, 0, 2000, 0.8), (9, 250, 500, 0.8), (9, 250, 500, 1) ] +#hbl = [] + +# 100 agents +#zi = [ (15, 0, 250, 1), (15, 0, 500, 1), (14, 0, 1000, 0.8), (14, 0, 1000, 1), (14, 0, 2000, 0.8), (14, 250, 500, 0.8), (14, 250, 500, 1) ] +#hbl = [] + +# 1000 agents +#zi = [ (143, 0, 250, 1), (143, 0, 500, 1), (143, 0, 1000, 0.8), (143, 0, 1000, 1), (143, 0, 2000, 0.8), (143, 250, 500, 0.8), (142, 250, 500, 1) ] +#hbl = [] + +# 10000 agents +#zi = [ (1429, 0, 250, 1), (1429, 0, 500, 1), (1429, 0, 1000, 0.8), (1429, 0, 1000, 1), (1428, 0, 2000, 0.8), (1428, 250, 500, 0.8), (1428, 250, 500, 1) ] +#hbl = [] + + +# Some configs for HBL agents only (among four parameter settings). + +# 4 agents +#zi = [] +#hbl = [ (1, 250, 500, 1, 2), (1, 250, 500, 1, 3), (1, 250, 500, 1, 5), (1, 250, 500, 1, 8) ] + +# 28 agents +#zi = [] +#hbl = [ (7, 250, 500, 1, 2), (7, 250, 500, 1, 3), (7, 250, 500, 1, 5), (7, 250, 500, 1, 8) ] + +# 1000 agents +#zi = [] +#hbl = [ (250, 250, 500, 1, 2), (250, 250, 500, 1, 3), (250, 250, 500, 1, 5), (250, 250, 500, 1, 8) ] + + +# Some configs that mix both types of agents. + +# 28 agents +#zi = [ (3, 0, 250, 1), (3, 0, 500, 1), (3, 0, 1000, 0.8), (3, 0, 1000, 1), (3, 0, 2000, 0.8), (3, 250, 500, 0.8), (2, 250, 500, 1) ] +#hbl = [ (2, 250, 500, 1, 2), (2, 250, 500, 1, 3), (2, 250, 500, 1, 5), (2, 250, 500, 1, 8) ] + +# 65 agents +#zi = [ (7, 0, 250, 1), (7, 0, 500, 1), (7, 0, 1000, 0.8), (7, 0, 1000, 1), (7, 0, 2000, 0.8), (7, 250, 500, 0.8), (7, 250, 500, 1) ] +#hbl = [ (4, 250, 500, 1, 2), (4, 250, 500, 1, 3), (4, 250, 500, 1, 5), (4, 250, 500, 1, 8) ] + +# 1000 agents +zi = [ (100, 0, 250, 1), (100, 0, 500, 1), (100, 0, 1000, 0.8), (100, 0, 1000, 1), (100, 0, 2000, 0.8), (100, 250, 500, 0.8), (100, 250, 500, 1) ] +hbl = [ (75, 250, 500, 1, 2), (75, 250, 500, 1, 3), (75, 250, 500, 1, 5), (75, 250, 500, 1, 8) ] + + + +# ZI strategy split. +for i,x in enumerate(zi): + strat_name = "Type {} [{} <= R <= {}, eta={}]".format(i+1, x[1], x[2], x[3]) + agents.extend([ ZeroIntelligenceAgent(j, "ZI Agent {} {}".format(j, strat_name), "ZeroIntelligenceAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)),log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['kappa'], sigma_s=s['sigma_s'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005) for j in range(agent_count,agent_count+x[0]) ]) + agent_types.extend([ "ZeroIntelligenceAgent {}".format(strat_name) for j in range(x[0]) ]) + agent_count += x[0] + +# HBL strategy split. +for i,x in enumerate(hbl): + strat_name = "Type {} [{} <= R <= {}, eta={}, L={}]".format(i+1, x[1], x[2], x[3], x[4]) + agents.extend([ HeuristicBeliefLearningAgent(j, "HBL Agent {} {}".format(j, strat_name), "HeuristicBeliefLearningAgent {}".format(strat_name), random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)), log_orders=log_orders, symbol=symbol, starting_cash=starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['kappa'], sigma_s=s['sigma_s'], q_max=10, sigma_pv=5000000, R_min=x[1], R_max=x[2], eta=x[3], lambda_a=0.005, L=x[4]) for j in range(agent_count,agent_count+x[0]) ]) + agent_types.extend([ "HeuristicBeliefLearningAgent {}".format(strat_name) for j in range(x[0]) ]) + agent_count += x[0] + + + +# Impact agent. + +# 200 time steps in... +impact_time = midnight + pd.to_timedelta('09:30:00.0000002') + +i = agent_count +agents.append(ImpactAgent(i, "Impact Agent {}".format(i), "ImpactAgent", symbol = "IBM", starting_cash = starting_cash, greed = greed, impact = impact, impact_time = impact_time, random_state = np.random.RandomState(seed=np.random.randint(low=0,high=2**32)))) +agent_types.append("Impact Agent {}".format(i)) +agent_count += 1 + + +### Configure a simple message latency matrix for the agents. Each entry is the minimum +# nanosecond delay on communication [from][to] agent ID. + +# Square numpy array with dimensions equal to total agent count. In this config, +# there should not be any communication delay. +latency = np.zeros((len(agent_types),len(agent_types))) + +# Configure a simple latency noise model for the agents. +# Index is ns extra delay, value is probability of this delay being applied. +# In this config, there is no latency (noisy or otherwise). +noise = [ 1.0 ] + + + +# Start the kernel running. +kernel.runner(agents = agents, startTime = kernelStartTime, + stopTime = kernelStopTime, agentLatency = latency, + latencyNoise = noise, + defaultComputationDelay = defaultComputationDelay, + oracle = oracle, log_dir = log_dir) + diff --git a/config/momentum.py b/config/momentum.py deleted file mode 100644 index 4abbcb6cd..000000000 --- a/config/momentum.py +++ /dev/null @@ -1,204 +0,0 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.BackgroundAgent import BackgroundAgent -from agent.MomentumAgent import MomentumAgent -from util.order import LimitOrder -from util.oracle.DataOracle import DataOracle -from util import util - -import datetime as dt -import numpy as np -import pandas as pd -import sys - - -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse - -parser = argparse.ArgumentParser(description='Detailed options for momentum config.') -parser.add_argument('-b', '--bg_noise', type=float, default=0.01, - help='Observation noise std for background agents') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-d', '--date', default='2014-01-28', - help='Historical date to simulate') -parser.add_argument('-f', '--frequency', default='5m', - help='Base Timestamp frequency for BackgroundTrader actions') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-o', '--offset_unit', default='s', - help='Wakeup offset (jitter) unit for BackgroundTrader actions') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-t', '--arb_last_trade', action='store_true', - help='Arbitrage last trade instead of spread midpoint') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') - -args, remaining_args = parser.parse_known_args() - -if args.config_help: - parser.print_help() - sys.exit() - -# Historical date to simulate. Log file directory if not default. -historical_date = pd.to_datetime(args.date) -log_dir = args.log_dir - -# Observation noise for background agents. BG agent wake frequency + noise. -noise_std = args.bg_noise -freq = args.frequency -offset_unit = args.offset_unit - -# Random seed specification on the command line. Default: None (by clock). -# If none, we select one via a specific random method and pass it to seed() -# so we can record it for future use. (You cannot reasonably obtain the -# automatically generated seed when seed() is called without a parameter.) -seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) -np.random.seed(seed) - -# Config parameter that causes util.util.print to suppress most output. -# Also suppresses formatting of limit orders (which is time consuming). -util.silent_mode = not args.verbose -LimitOrder.silent_mode = not args.verbose - -# Config parameter to arbitrage last trade vs spread (for BackgroundTrader). -arb_last_trade = args.arb_last_trade - - -print ("Silent mode: {}".format(util.silent_mode)) -print ("BackgroundAgent freq: {}".format(freq)) -print ("BackgroundAgent noise: {:0.4f}".format(noise_std)) -print ("BackgroundAgent arbs last trade: {}".format(arb_last_trade)) -print ("Configuration seed: {}\n".format(seed)) - - - -### Required parameters for all simulations. - -# Since the simulator often pulls historical data, we use a real-world -# nanosecond timestamp (pandas.Timestamp) for our discrete time "steps", -# which are considered to be nanoseconds. For other (or abstract) time -# units, one can either configure the Timestamp interval, or simply -# interpret the nanoseconds as something else. - -# What is the earliest available time for an agent to act during the -# simulation? - -# Timestamp will default to midnight, as desired. -midnight = historical_date -kernelStartTime = midnight - -# When should the Kernel shut down? (This should be after market close.) -# Here we go for 5 PM the same day. -kernelStopTime = midnight + pd.to_timedelta('17:00:00') - -# This will configure the kernel with a default computation delay -# (time penalty) for each agent's wakeup and recvMsg. An agent -# can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 1000000 # one millisecond - - -# IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: -# 1. be unique -# 2. equal its index in the agents list -# This is to avoid having to call an extra getAgentListIndexByID() -# in the kernel every single time an agent must be referenced. - - -### Configure some background agents. -num_agents = 100 - -# Cash in this simulator is always in CENTS. -starting_cash = 1000000 * 100 # a million dollars - -# Set a mean order volume around which BG agents should select somewhat random sizes. -# Eventually we might like to pull in historical volume from the oracle. -bg_trade_vol = 300 - -# Here are those background agents. -agents = [ BackgroundAgent(i, "Background Agent {}".format(i), "IBM", starting_cash, noise_std, arb_last_trade, freq, bg_trade_vol, offset_unit) for i in range(0,num_agents) ] -agent_types = ["BackgroundAgent" for i in range(num_agents)] - - -### Configure some momentum agents. -num_momentum_agents = 10 -lookback = 5 -agents.extend([ MomentumAgent(i, "Momentum Agent {}".format(i), "IBM", starting_cash, lookback) - for i in range(num_agents, num_agents + num_momentum_agents) ]) -num_agents += num_momentum_agents -agent_types.extend(["MomentumAgent" for i in range(num_momentum_agents)]) - - -### Configure an exchange agent. - -# Let's open the exchange at 9:30 AM. -mkt_open = midnight + pd.to_timedelta('09:30:00') - -# And close it at 4:00 PM. -mkt_close = midnight + pd.to_timedelta('16:00:00') - -# This is a list of symbols the exchange should trade. It can handle any number. -# It keeps a separate order book for each symbol. The example data includes -# only IBM. -symbols = ['IBM'] - -num_exchanges = 1 -agents.extend([ ExchangeAgent(i, "Exchange Agent {}".format(i), mkt_open, mkt_close, symbols, book_freq='S') - for i in range(num_agents, num_agents + num_exchanges) ]) -agent_types.extend(["ExchangeAgent" for i in range(num_exchanges)]) - - - -### Configure a simple message latency matrix for the agents. Each entry is the minimum -# nanosecond delay on communication [from][to] agent ID. - -# Square numpy array with dimensions equal to total agent count. Background Agents, -# by far the largest population, are handled at init, drawn from a uniform distribution from: -# Times Square (3.9 miles from NYSE, approx. 21 microseconds at the speed of light) to: -# Pike Place Starbucks in Seattle, WA (2402 miles, approx. 13 ms at the speed of light). -# Other agents are set afterward (and the mirror half of the matrix is also). - -latency = np.random.uniform(low = 21000, high = 13000000, size=(len(agent_types),len(agent_types))) - -for i, t1 in zip(range(latency.shape[0]), agent_types): - for j, t2 in zip(range(latency.shape[1]), agent_types): - # Three cases for symmetric array. Set latency when j > i, copy it when i > j, same agent when i == j. - if j > i: - # A hypothetical order book exploiting agent might require a special very low latency to represent - # colocation with the exchange hardware. - if (t1 == "ExploitAgent" and t2 == "ExchangeAgent") or (t2 == "ExploitAgent" and t1 == "ExchangeAgent"): - # We don't have any exploiting agents in this configuration, so any arbitrary number is fine. - # Let's use about 1/3 usec or approx 100m of fiber-optic cable. - latency[i,j] = 333 - elif i > j: - # This "bottom" half of the matrix simply mirrors the top. - latency[i,j] = latency[j,i] - else: - # This is the same agent. How long does it take to reach localhost? In our data center, it actually - # takes about 20 microseconds. - latency[i,j] = 20000 - - -# Configure a simple latency noise model for the agents. -# Index is ns extra delay, value is probability of this delay being applied. -# We may later want to substitute some realistic noise model or sample from a geographic database. -noise = [ 0.4, 0.25, 0.15, 0.1, 0.05, 0.025, 0.025 ] - - -# Create the data oracle for this experiment. All agents will use the same one. -oracle = DataOracle(historical_date.date(), symbols) - - -# Start a basic kernel. -kernel = Kernel("Base Kernel") -kernel.runner(agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, agentLatency = latency, - latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = log_dir) - diff --git a/config/srg.py b/config/srg.py deleted file mode 100644 index 6cedfb8e0..000000000 --- a/config/srg.py +++ /dev/null @@ -1,182 +0,0 @@ -from Kernel import Kernel -from agent.ExchangeAgent import ExchangeAgent -from agent.HeuristicBeliefLearningAgent import HeuristicBeliefLearningAgent -from agent.ZeroIntelligenceAgent import ZeroIntelligenceAgent -from util.order import LimitOrder -from util.oracle.MeanRevertingOracle import MeanRevertingOracle -from util import util - -import datetime as dt -import numpy as np -import pandas as pd -import sys - - -# Some config files require additional command line parameters to easily -# control agent or simulation hyperparameters during coarse parallelization. -import argparse - -parser = argparse.ArgumentParser(description='Detailed options for momentum config.') -parser.add_argument('-b', '--book_freq', default='10N', - help='Frequency at which to archive order book for visualization') -parser.add_argument('-c', '--config', required=True, - help='Name of config file to execute') -parser.add_argument('-l', '--log_dir', default=None, - help='Log directory name (default: unix timestamp at program start)') -parser.add_argument('-n', '--obs_noise', type=float, default=1000, - help='Observation noise variance for zero intelligence agents (sigma^2_n)') -parser.add_argument('-r', '--shock_variance', type=float, default=100000, - help='Shock variance for mean reversion process (sigma^2_s)') -parser.add_argument('-s', '--seed', type=int, default=None, - help='numpy.random.seed() for simulation') -parser.add_argument('-v', '--verbose', action='store_true', - help='Maximum verbosity!') -parser.add_argument('--config_help', action='store_true', - help='Print argument options for this config file') - -args, remaining_args = parser.parse_known_args() - -if args.config_help: - parser.print_help() - sys.exit() - -# Historical date to simulate. Log file directory if not default. -# Not relevant for SRG config, but one is required. -historical_date = pd.to_datetime('2014-01-28') -log_dir = args.log_dir -book_freq = args.book_freq if args.book_freq.lower() != 'none' else None - -# Observation noise variance for zero intelligence agents. -sigma_n = args.obs_noise - -# Shock variance of mean reversion process. -sigma_s = args.shock_variance - -# Random seed specification on the command line. Default: None (by clock). -# If none, we select one via a specific random method and pass it to seed() -# so we can record it for future use. (You cannot reasonably obtain the -# automatically generated seed when seed() is called without a parameter.) -seed = args.seed -if not seed: seed = int(pd.Timestamp.now().timestamp() * 1000000) % (2**32 - 1) -np.random.seed(seed) - -# Config parameter that causes util.util.print to suppress most output. -# Also suppresses formatting of limit orders (which is time consuming). -util.silent_mode = not args.verbose -LimitOrder.silent_mode = not args.verbose - - -print ("Silent mode: {}".format(util.silent_mode)) -print ("Book freq: {}".format(book_freq)) -print ("ZeroIntelligenceAgent noise: {:0.4f}".format(sigma_n)) -print ("Shock variance: {:0.4f}".format(sigma_s)) -print ("Configuration seed: {}\n".format(seed)) - - - -### Required parameters for all simulations. - -# Since the simulator often pulls historical data, we use a real-world -# nanosecond timestamp (pandas.Timestamp) for our discrete time "steps", -# which are considered to be nanoseconds. For other (or abstract) time -# units, one can either configure the Timestamp interval, or simply -# interpret the nanoseconds as something else. - -# What is the earliest available time for an agent to act during the -# simulation? - -# Timestamp will default to midnight, as desired. -midnight = historical_date -kernelStartTime = midnight - -# When should the Kernel shut down? (This should be after market close.) -# Here we go for 5 PM the same day. -kernelStopTime = midnight + pd.to_timedelta('17:00:00') - -# This will configure the kernel with a default computation delay -# (time penalty) for each agent's wakeup and recvMsg. An agent -# can change this at any time for itself. (nanoseconds) -defaultComputationDelay = 0 # no delay for SRG config - - -# IMPORTANT NOTE CONCERNING AGENT IDS: the id passed to each agent must: -# 1. be unique -# 2. equal its index in the agents list -# This is to avoid having to call an extra getAgentListIndexByID() -# in the kernel every single time an agent must be referenced. - - -# This is a list of symbols the exchange should trade. It can handle any number. -# It keeps a separate order book for each symbol. The example data includes -# only IBM. - -# If shock variance must differ for each traded symbol, it can be overridden here. -symbols = { 'IBM' : { 'r_bar' : 100000, 'kappa' : 0.05, 'sigma_s' : sigma_s } } - - -### Configure some zero intelligence agents. -num_agents = 100 - -# Cash in this simulator is always in CENTS. -starting_cash = 10000000 - -agent_count = 0 - -# Here are the zero intelligence agents. -symbol = 'IBM' -s = symbols[symbol] -agents = [ ZeroIntelligenceAgent(i, "ZI Agent {}".format(i), symbol, starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['kappa'], sigma_s=s['sigma_s'], q_max=10, sigma_pv=5000000, R_min=250, R_max=500, eta=0.8, lambda_a=0.005) for i in range(0,num_agents) ] -agent_types = ["ZeroIntelligenceAgent" for i in range(num_agents)] -agent_count = num_agents - - -# Here are the heuristic belief learning agents. -num_hbl_agents = 10 -agents.extend([ HeuristicBeliefLearningAgent(i, "HBL Agent {}".format(i), symbol, starting_cash, sigma_n=sigma_n, r_bar=s['r_bar'], kappa=s['kappa'], sigma_s=s['sigma_s'], q_max=10, sigma_pv=5000000, R_min=250, R_max=500, eta=0.8, lambda_a=0.005, L=8) for i in range(agent_count, agent_count + num_hbl_agents) ]) -agent_types.extend(["HeuristicBeliefLearningAgent" for i in range(num_hbl_agents)]) -agent_count += num_hbl_agents - - - -### Configure an exchange agent. - -# Let's open the exchange at 9:30 AM. -mkt_open = midnight + pd.to_timedelta('09:30:00') - -# And close it at 9:30:00.00001 (i.e. 10,000 nanoseconds or "time steps") -mkt_close = midnight + pd.to_timedelta('09:30:00.00001') - - -num_exchanges = 1 -agents.extend([ ExchangeAgent(i, "Exchange Agent {}".format(i), mkt_open, mkt_close, [s for s in symbols], book_freq=book_freq, pipeline_delay = 0, computation_delay = 0, stream_history = 10) - for i in range(agent_count, agent_count + num_exchanges) ]) -agent_types.extend(["ExchangeAgent" for i in range(num_exchanges)]) -agent_count += num_exchanges - - - -### Configure a simple message latency matrix for the agents. Each entry is the minimum -# nanosecond delay on communication [from][to] agent ID. - -# Square numpy array with dimensions equal to total agent count. In the SRG config, -# there should not be any communication delay. -latency = np.zeros((len(agent_types),len(agent_types))) - -# Configure a simple latency noise model for the agents. -# Index is ns extra delay, value is probability of this delay being applied. -# In the SRG config, there is no latency (noisy or otherwise). -noise = [ 1.0 ] - - -# Create the data oracle for this experiment. All agents will use the same one. -oracle = MeanRevertingOracle(mkt_open, mkt_close, symbols) - - -# Start a basic kernel. -kernel = Kernel("Base Kernel") -kernel.runner(agents = agents, startTime = kernelStartTime, - stopTime = kernelStopTime, agentLatency = latency, - latencyNoise = noise, - defaultComputationDelay = defaultComputationDelay, - oracle = oracle, log_dir = log_dir) - diff --git a/message/Message.py b/message/Message.py index 5ef771bc3..b12870ace 100644 --- a/message/Message.py +++ b/message/Message.py @@ -11,6 +11,8 @@ def __lt__(self, other): class Message: + uniq = 0 + def __init__ (self, body = None): # The base Message class no longer holds envelope/header information, # however any desired information can be placed in the arbitrary @@ -19,20 +21,28 @@ def __init__ (self, body = None): # It is acceptable for WAKEUP type messages to have no body. self.body = body + # The autoincrementing variable here will ensure that, when Messages are + # due for delivery at the same time step, the Message that was created + # first is delivered first. (Which is not important, but Python 3 + # requires a fully resolved chain of priority in all cases, so we need + # something consistent.) We might want to generate these with stochasticity, + # but guarantee uniqueness somehow, to make delivery of orders at the same + # exact timestamp "random" instead of "arbitrary" (FIFO among tied times) + # as it currently is. + self.uniq = Message.uniq + Message.uniq += 1 + # The base Message class can no longer do any real error checking. # Subclasses are strongly encouraged to do so based on their body. def __lt__(self, other): # Required by Python3 for this object to be placed in a priority queue. + # If we ever decide to place something on the queue other than Messages, + # we will need to alter the below to not assume the other object is + # also a Message. - # TODO: might consider adding a random number to message objects - # at creation time, or storing creation time, to provide - # consistent sorting of messages without biasing delivery - # at the same timestamp based on arbitrary body comparisons. - - return ("{}".format(self.body) < - "{}".format(other.body)) + return (self.uniq < other.uniq) def __str__(self): diff --git a/requirements.txt b/requirements.txt index 0ef2e0982..bc15430b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ cycler==0.10.0 joblib==0.13.2 -jsons==0.8.7 -kiwisolver==1.0.1 +jsons==0.8.8 +kiwisolver==1.1.0 matplotlib==3.0.3 numpy==1.16.3 pandas==0.24.2 diff --git a/scripts/book.sh b/scripts/book.sh new file mode 100644 index 000000000..2ab545d27 --- /dev/null +++ b/scripts/book.sh @@ -0,0 +1 @@ +python cli/book_plot.py "$(ls -at log/15*/order* | head -n 1)" diff --git a/scripts/capture_profile.sh b/scripts/capture_profile.sh new file mode 100644 index 000000000..672c363ff --- /dev/null +++ b/scripts/capture_profile.sh @@ -0,0 +1 @@ +time python -m cProfile -o runstats.prof abides.py -c impact -s100 > profiled_stdout diff --git a/scripts/dump.sh b/scripts/dump.sh new file mode 100644 index 000000000..67f4d7f48 --- /dev/null +++ b/scripts/dump.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +if [ $# -eq 0 ]; then + echo $0: usage: dump.sh agent_filename + exit 1 +fi + +if [ $# -eq 1 ]; then + file=$1 + python cli/dump.py "$(ls -at log/15*/${file}* | head -n 1)" +fi + +if [ $# -ge 2 ]; then + file=$1 + python cli/dump.py "$(ls -at log/15*/${file}* | head -n 1)" "${@:2}" +fi diff --git a/scripts/impact_baseline.sh b/scripts/impact_baseline.sh new file mode 100644 index 000000000..99b00d16c --- /dev/null +++ b/scripts/impact_baseline.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +if [ $# -eq 0 ]; then + echo $0: 'usage: impact_baseline.sh ' + exit 1 +fi + +if [ $# -ge 2 ]; then + echo $0: 'usage: impact_baseline.sh ' + exit 1 +fi + +if [ $# -eq 1 ]; then + count=$1 + dt=`date +%s` + for i in `seq 1 ${count}`; + do + echo "Launching simulation $i" + python -u abides.py -c impact -i -l impact_${i}_baseline -s ${i} > ./batch_output/impact_${i}_baseline & + sleep 0.5 + done +fi diff --git a/scripts/impact_study.sh b/scripts/impact_study.sh new file mode 100644 index 000000000..0d3eea8c0 --- /dev/null +++ b/scripts/impact_study.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +if [ $# -eq 0 ]; then + echo $0: 'usage: impact_study.sh ' + exit 1 +fi + +if [ $# -ge 2 ]; then + echo $0: 'usage: impact_study.sh ' + exit 1 +fi + +if [ $# -eq 1 ]; then + count=$1 + dt=`date +%s` + for i in `seq 1 ${count}`; + do + echo "Launching simulation $i" + python -u abides.py -c impact -l impact_${i} -s ${i} > ./batch_output/impact_${i} & + sleep 0.5 + done +fi diff --git a/scripts/stats.sh b/scripts/stats.sh new file mode 100644 index 000000000..e99627d4b --- /dev/null +++ b/scripts/stats.sh @@ -0,0 +1 @@ +python cli/stats.py "$(ls -atd log/15* | head -n 1)" diff --git a/scripts/timeit.sh b/scripts/timeit.sh new file mode 100644 index 000000000..2faa89ab3 --- /dev/null +++ b/scripts/timeit.sh @@ -0,0 +1 @@ +python -m timeit "__import__('os').system('python abides.py -c impact')" diff --git a/util/OrderBook.py b/util/OrderBook.py index ca5d8e0c4..f65813a8e 100644 --- a/util/OrderBook.py +++ b/util/OrderBook.py @@ -5,7 +5,7 @@ from message.Message import Message from util.order.LimitOrder import LimitOrder -from util.util import print +from util.util import print, log_print, be_silent from copy import deepcopy from agent.FinancialAgent import dollarize @@ -36,11 +36,11 @@ def handleLimitOrder (self, order): # order size "fit" or minimizing number of transactions. Sends one notification per # match. if order.symbol != self.symbol: - print ("{} order discarded. Does not match OrderBook symbol: {}".format(order.symbol, self.symbol)) + log_print ("{} order discarded. Does not match OrderBook symbol: {}", order.symbol, self.symbol) return if (order.quantity <= 0) or (int(order.quantity) != order.quantity): - print ("{} order discarded. Quantity ({}) must be a positive integer.".format(order.symbol, order.quantity)) + log_print ("{} order discarded. Quantity ({}) must be a positive integer.", order.symbol, order.quantity) return # Add the order under index 0 of history: orders since the most recent trade. @@ -48,6 +48,8 @@ def handleLimitOrder (self, order): 'quantity' : order.quantity, 'is_buy_order' : order.is_buy_order, 'limit_price' : order.limit_price, 'transactions' : [], 'cancellations' : [] } + + print ("Just added history", self.history[0]) matching = True @@ -66,9 +68,9 @@ def handleLimitOrder (self, order): order.quantity -= filled_order.quantity - print ("MATCHED: new order {} vs old order {}".format(filled_order, matched_order)) - print ("SENT: notifications of order execution to agents {} and {} for orders {} and {}".format( - filled_order.agent_id, matched_order.agent_id, filled_order.order_id, matched_order.order_id)) + log_print ("MATCHED: new order {} vs old order {}", filled_order, matched_order) + log_print ("SENT: notifications of order execution to agents {} and {} for orders {} and {}", + filled_order.agent_id, matched_order.agent_id, filled_order.order_id, matched_order.order_id) self.owner.sendMessage(order.agent_id, Message({ "msg": "ORDER_EXECUTED", "order": filled_order })) self.owner.sendMessage(matched_order.agent_id, Message({ "msg": "ORDER_EXECUTED", "order": matched_order })) @@ -83,9 +85,9 @@ def handleLimitOrder (self, order): # No matching order was found, so the new order enters the order book. Notify the agent. self.enterOrder(deepcopy(order)) - print ("ACCEPTED: new order {}".format(order)) - print ("SENT: notifications of order acceptance to agent {} for order {}".format( - order.agent_id, order.order_id)) + log_print ("ACCEPTED: new order {}", order) + log_print ("SENT: notifications of order acceptance to agent {} for order {}", + order.agent_id, order.order_id) self.owner.sendMessage(order.agent_id, Message({ "msg": "ORDER_ACCEPTED", "order": order })) @@ -108,30 +110,39 @@ def handleLimitOrder (self, order): trade_qty = 0 trade_price = 0 for q, p in executed: - print ("Executed: {} @ {}".format(q, p)) + log_print ("Executed: {} @ {}", q, p) trade_qty += q trade_price += (p*q) avg_price = int(round(trade_price / trade_qty)) - print ("Avg: {} @ ${:0.4f}".format(trade_qty, avg_price)) + log_print ("Avg: {} @ ${:0.4f}", trade_qty, avg_price) self.owner.logEvent('LAST_TRADE', "{},${:0.4f}".format(trade_qty, avg_price)) self.last_trade = avg_price - # Finally, log the full depth of the order book. - row = { 'QuoteTime' : self.owner.currentTime } - for quote in self.quotes_seen: - row[quote] = 0 - for quote, volume in self.getInsideBids(): - row[quote] = -volume - self.quotes_seen.add(quote) - for quote, volume in self.getInsideAsks(): - if quote in row: - if row[quote] != 0: - print ("WARNING: THIS IS A REAL PROBLEM: an order book contains bids and asks at the same quote price!", override=True) - row[quote] = volume - self.quotes_seen.add(quote) - self.book_log.append(row) + # Transaction occurred, so advance indices. + self.history.insert(0, {}) + + # Truncate history to required length. + self.history = self.history[:self.owner.stream_history+1] + + + # Finally, log the full depth of the order book, ONLY if we have been requested to store the order book + # for later visualization. (This is slow.) + if self.owner.book_freq is not None: + row = { 'QuoteTime' : self.owner.currentTime } + for quote in self.quotes_seen: + row[quote] = 0 + for quote, volume in self.getInsideBids(): + row[quote] = -volume + self.quotes_seen.add(quote) + for quote, volume in self.getInsideAsks(): + if quote in row: + if row[quote] != 0: + print ("WARNING: THIS IS A REAL PROBLEM: an order book contains bids and asks at the same quote price!", override=True) + row[quote] = volume + self.quotes_seen.add(quote) + self.book_log.append(row) self.prettyPrint() @@ -162,60 +173,50 @@ def executeOrder (self, order): return None else: # There are orders on the right side, and the new order's price does fall - # somewhere within them. Find the best-price matching order. + # somewhere within them. We can/will only match against the oldest order + # among those with the best price. (i.e. best price, then FIFO) - # Current matching is best price then FIFO (at same price). - # Note that o is a LIST of all orders (oldest at index 0) at this same price. - for i, o in enumerate(book): - # The first time we find an order that can match, we take it. - if self.isMatch(order, o[0]): - # The matched order might be only partially filled. - # (i.e. new order is smaller) - if order.quantity >= o[0].quantity: - # Consumed entire matched order. - matched_order = book[i].pop(0) - - # If the matched price now has no orders, remove it completely. - if not book[i]: - del book[i] + # Note that book[i] is a LIST of all orders (oldest at index book[i][0]) at the same price. - else: - # Consumed only part of matched order. - matched_order = deepcopy(book[i][0]) - matched_order.quantity = order.quantity + # The matched order might be only partially filled. (i.e. new order is smaller) + if order.quantity >= book[0][0].quantity: + # Consumed entire matched order. + matched_order = book[0].pop(0) - book[i][0].quantity -= matched_order.quantity + # If the matched price now has no orders, remove it completely. + if not book[0]: + del book[0] - # When two limit orders are matched, they execute at the price that - # was being "advertised" in the order book. - matched_order.fill_price = matched_order.limit_price - - # Record the transaction in the order history and push the indices - # out one, possibly truncating to the maximum history length. + else: + # Consumed only part of matched order. + matched_order = deepcopy(book[0][0]) + matched_order.quantity = order.quantity - # The incoming order is guaranteed to exist under index 0. - self.history[0][order.order_id]['transactions'].append( (self.owner.currentTime, order.quantity) ) + book[0][0].quantity -= matched_order.quantity - # The pre-existing order may or may not still be in the recent history. - for idx, orders in enumerate(self.history): - if matched_order.order_id not in orders: continue + # When two limit orders are matched, they execute at the price that + # was being "advertised" in the order book. + matched_order.fill_price = matched_order.limit_price - # Found the matched order in history. Update it with this transaction. - self.history[idx][matched_order.order_id]['transactions'].append( - (self.owner.currentTime, matched_order.quantity) ) + # Record the transaction in the order history and push the indices + # out one, possibly truncating to the maximum history length. - # Transaction occurred, so advance indices. - self.history.insert(0, {}) + # The incoming order is guaranteed to exist under index 0. + print ("HISTORY", self.history) + print ("MATCHED HISTORY 0", self.history[0]) + self.history[0][order.order_id]['transactions'].append( (self.owner.currentTime, order.quantity) ) - # Truncate history to required length. - self.history = self.history[:self.owner.stream_history+1] + # The pre-existing order may or may not still be in the recent history. + for idx, orders in enumerate(self.history): + if matched_order.order_id not in orders: continue + # Found the matched order in history. Update it with this transaction. + self.history[idx][matched_order.order_id]['transactions'].append( + (self.owner.currentTime, matched_order.quantity) ) - # Return (only the executed portion of) the matched order. - return matched_order + # Return (only the executed portion of) the matched order. + return matched_order - # No matching order found. - return None def isMatch (self, order, o): @@ -304,9 +305,9 @@ def cancelOrder (self, order): if not book[i]: del book[i] - print ("CANCELLED: order {}".format(order)) - print ("SENT: notifications of order cancellation to agent {} for order {}".format( - cancelled_order.agent_id, cancelled_order.order_id)) + log_print ("CANCELLED: order {}", order) + log_print ("SENT: notifications of order cancellation to agent {} for order {}", + cancelled_order.agent_id, cancelled_order.order_id) self.owner.sendMessage(order.agent_id, Message({ "msg": "ORDER_CANCELLED", "order": cancelled_order })) @@ -370,9 +371,13 @@ def prettyPrint (self, silent=False): # Start at the highest ask price and move down. Then switch to the highest bid price and move down. # Show the total volume at each price. If silent is True, return the accumulated string and print nothing. + # If the global silent flag is set, skip prettyPrinting entirely, as it takes a LOT of time. + if be_silent: return '' + book = "{} order book as of {}\n".format(self.symbol, self.owner.currentTime) book += "Last trades: simulated {:d}, historical {:d}\n".format(self.last_trade, - self.owner.oracle.observePrice(self.symbol, self.owner.currentTime, sigma_n = 0)) + self.owner.oracle.observePrice(self.symbol, self.owner.currentTime, sigma_n = 0, + random_state = self.owner.random_state)) book += "{:10s}{:10s}{:10s}\n".format('BID','PRICE','ASK') book += "{:10s}{:10s}{:10s}\n".format('---','-----','---') @@ -385,5 +390,5 @@ def prettyPrint (self, silent=False): if silent: return book - print (book) + log_print (book) diff --git a/util/oracle/DataOracle.py b/util/oracle/DataOracle.py index 51b54af33..810a43996 100644 --- a/util/oracle/DataOracle.py +++ b/util/oracle/DataOracle.py @@ -4,16 +4,16 @@ ### certain "background" agents to obtain noisy observations about the "real" ### price of a stock at a current time. It is intended to provide some realistic ### behavior and "price gravity" to the simulated market -- i.e. to make the -### market behave something like reality in the absence of whatever experiment -### we are running with more active agent types. +### market behave something like historical reality in the absence of whatever +### experiment we are running with more active agent types. import datetime as dt import numpy as np import pandas as pd -import os, random, sys +import os, sys from math import sqrt -from util.util import print +from util.util import print, log_print from joblib import Memory mem = Memory(cachedir='./cache', verbose=0) @@ -21,7 +21,7 @@ #@mem.cache def read_trades(trade_file, symbols): - print ("Data not cached. This will take a minute...") + log_print ("Data not cached. This will take a minute...") df = pd.read_pickle(trade_file, compression='bz2') @@ -39,7 +39,7 @@ def read_trades(trade_file, symbols): class DataOracle: - def __init__(self, historical_date, symbols): + def __init__(self, historical_date = None, symbols = None, data_dir = None): self.historical_date = historical_date self.symbols = symbols @@ -48,28 +48,22 @@ def __init__(self, historical_date, symbols): # Read historical trades here... h = historical_date pre = 'ct' if h.year < 2015 else 'ctm' - trade_file = os.path.join('data', 'trades', 'trades_{}'.format(h.year), + trade_file = os.path.join(data_dir, 'trades', 'trades_{}'.format(h.year), '{}_{}{:02d}{:02d}.bgz'.format(pre, h.year, h.month, h.day)) - bars_1m_file = os.path.join('data', '1m_ohlc', '1m_ohlc_{}'.format(h.year), + bars_1m_file = os.path.join(data_dir, '1m_ohlc', '1m_ohlc_{}'.format(h.year), '{}{:02d}{:02d}_ohlc_1m.bgz'.format(h.year, h.month, h.day)) - print ("DataOracle initializing trades from file {}".format(trade_file)) - print ("DataOracle initializing 1m bars from file {}".format(bars_1m_file)) + log_print ("DataOracle initializing trades from file {}", trade_file) + log_print ("DataOracle initializing 1m bars from file {}", bars_1m_file) then = dt.datetime.now() self.df_trades = read_trades(trade_file, symbols) - - # Use this for debugging the historical trade files. - #tmp = self.df_trades.loc['IBM'].between_time('9:30','16:00') - #print (tmp[tmp['EX'] == 'T']) - #sys.exit() - self.df_bars_1m = read_trades(bars_1m_file, symbols) now = dt.datetime.now() - print ("DataOracle initialized for {} with symbols {}".format(historical_date, symbols)) - print ("DataOracle initialization took {}".format(now - then)) + log_print ("DataOracle initialized for {} with symbols {}", historical_date, symbols) + log_print ("DataOracle initialization took {}", now - then) @@ -77,13 +71,14 @@ def __init__(self, historical_date, symbols): # files does propagate the earliest trade backwards, which helps. The exchange should # pass its opening time. def getDailyOpenPrice (self, symbol, mkt_open, cents=True): - # Remember this. It is useful. + # Remember market open time. self.mkt_open = mkt_open - print ("Oracle: client requested {} at market open: {}".format(symbol, mkt_open)) + log_print ("Oracle: client requested {} at market open: {}", symbol, mkt_open) + # Find the opening historical price in the 1m OHLC bars for this symbol. open = self.df_bars_1m.loc[(symbol,mkt_open.time()),'open'] - print ("Oracle: market open price was was {}".format(open)) + log_print ("Oracle: market open price was was {}", open) return int(round(open * 100)) if cents else open @@ -92,7 +87,7 @@ def getDailyOpenPrice (self, symbol, mkt_open, cents=True): # which must be of type pd.Timestamp. def getLatestTrade (self, symbol, currentTime): - print ("Oracle: client requested {} as of {}".format(symbol, currentTime)) + log_print ("Oracle: client requested {} as of {}", symbol, currentTime) # See when the last historical trade was, prior to simulated currentTime. dt_last_trade = self.df_trades.loc[symbol].index.asof(currentTime) @@ -109,7 +104,7 @@ def getLatestTrade (self, symbol, currentTime): price = self.getDailyOpenPrice(symbol, self.mkt_open, cents=False) time = self.mkt_open - print ("Oracle: latest historical trade was {} at {}".format(price, time)) + log_print ("Oracle: latest historical trade was {} at {}", price, time) return price @@ -119,16 +114,22 @@ def getLatestTrade (self, symbol, currentTime): # agents should use noisy=False. # # NOTE: sigma_n is the observation variance, NOT STANDARD DEVIATION. - def observePrice(self, symbol, currentTime, sigma_n = 0.0001): + # + # Each agent must pass its own np.random.RandomState object to the oracle. + # This helps to preserve the consistency of multiple simulations with experimental + # changes (if the oracle used a global Random object, simply adding one new agent + # would change everyone's "noise" on all subsequent observations). + def observePrice(self, symbol, currentTime, sigma_n = 0.0001, random_state = None): last_trade_price = self.getLatestTrade(symbol, currentTime) - # Noisy belief is a normal distribution with stdev around 1% of the last trade price. + # Noisy belief is a normal distribution around 1% the last trade price with variance + # as requested by the agent. if sigma_n == 0: belief = float(last_trade_price) else: - belief = np.random.normal(loc=last_trade_price, scale=last_trade_price * sqrt(sigma_n)) + belief = random_state.normal(loc=last_trade_price, scale=last_trade_price * sqrt(sigma_n)) - print ("Oracle: giving client value observation {:0.2f}".format(belief)) + log_print ("Oracle: giving client value observation {:0.2f}", belief) # All simulator prices are specified in integer cents. return int(round(belief * 100)) diff --git a/util/oracle/MeanRevertingOracle.py b/util/oracle/MeanRevertingOracle.py index df51b8254..be2e42487 100644 --- a/util/oracle/MeanRevertingOracle.py +++ b/util/oracle/MeanRevertingOracle.py @@ -19,7 +19,7 @@ import os, random, sys from math import sqrt -from util.util import print +from util.util import print, log_print class MeanRevertingOracle: @@ -30,19 +30,21 @@ def __init__(self, mkt_open, mkt_close, symbols): self.mkt_open = mkt_open self.mkt_close = mkt_close self.symbols = symbols + + # The dictionary r holds the fundamenal value series for each symbol. self.r = {} then = dt.datetime.now() for symbol in symbols: s = symbols[symbol] - print ("MeanRevertingOracle computing fundamental value series for {}".format(symbol)) + log_print ("MeanRevertingOracle computing fundamental value series for {}", symbol) self.r[symbol] = self.generate_fundamental_value_series(symbol=symbol, **s) now = dt.datetime.now() - print ("MeanRevertingOracle initialized for symbols {}".format(symbols)) - print ("MeanRevertingOracle initialization took {}".format(now - then)) + log_print ("MeanRevertingOracle initialized for symbols {}", symbols) + log_print ("MeanRevertingOracle initialization took {}", now - then) def generate_fundamental_value_series(self, symbol, r_bar, kappa, sigma_s): @@ -50,6 +52,11 @@ def generate_fundamental_value_series(self, symbol, r_bar, kappa, sigma_s): # mean fundamental value, kappa is the mean reversion coefficient, and sigma_s # is the shock variance. (Note: NOT STANDARD DEVIATION.) + # Because the oracle uses the global np.random PRNG to create the fundamental value + # series, it is important to create the oracle BEFORE the agents. In this way the + # addition of a new agent will not affect the sequence created. (Observations using + # the oracle will use an agent's PRNG and thus not cause a problem.) + # Turn variance into std. sigma_s = sqrt(sigma_s) @@ -84,10 +91,10 @@ def getDailyOpenPrice (self, symbol, mkt_open=None): if (mkt_open is not None) and (self.mkt_open is None): self.mkt_open = mkt_open - print ("Oracle: client requested {} at market open: {}".format(symbol, self.mkt_open)) + log_print ("Oracle: client requested {} at market open: {}", symbol, self.mkt_open) open = self.r[symbol].loc[self.mkt_open] - print ("Oracle: market open price was was {}".format(open)) + log_print ("Oracle: market open price was was {}", open) return open @@ -99,7 +106,12 @@ def getDailyOpenPrice (self, symbol, mkt_open=None): # Only the Exchange or other privileged agents should use noisy=False. # # sigma_n is experimental observation variance. NOTE: NOT STANDARD DEVIATION. - def observePrice(self, symbol, currentTime, sigma_n = 1000): + # + # Each agent must pass its RandomState object to observePrice. This ensures that + # each agent will receive the same answers across multiple same-seed simulations + # even if a new agent has been added to the experiment. + def observePrice(self, symbol, currentTime, sigma_n = 1000, random_state = None): + # If the request is made after market close, return the close price. if currentTime >= self.mkt_close: r_t = self.r[symbol].loc[self.mkt_close - pd.Timedelta('1ns')] else: @@ -109,10 +121,10 @@ def observePrice(self, symbol, currentTime, sigma_n = 1000): if sigma_n == 0: obs = r_t else: - obs = int(round(np.random.normal(loc=r_t, scale=sqrt(sigma_n)))) + obs = int(round(random_state.normal(loc=r_t, scale=sqrt(sigma_n)))) - print ("Oracle: current fundamental value is {} at {}".format(r_t, currentTime)) - print ("Oracle: giving client value observation {}".format(obs)) + log_print ("Oracle: current fundamental value is {} at {}", r_t, currentTime) + log_print ("Oracle: giving client value observation {}", obs) # Reminder: all simulator prices are specified in integer cents. return obs diff --git a/util/order/LimitOrder.py b/util/order/LimitOrder.py index bd4210eea..c34c248af 100644 --- a/util/order/LimitOrder.py +++ b/util/order/LimitOrder.py @@ -14,6 +14,9 @@ class LimitOrder (Order): def __init__ (self, agent_id, time_placed, symbol, quantity, is_buy_order, limit_price): super().__init__(agent_id, time_placed, symbol, quantity, is_buy_order) + + # The limit price is the minimum price the agent will accept (for a sell order) or + # the maximum price the agent will pay (for a buy order). self.limit_price = limit_price def __str__ (self): diff --git a/util/order/Order.py b/util/order/Order.py index 1a9a9a1dd..fddc39759 100644 --- a/util/order/Order.py +++ b/util/order/Order.py @@ -7,10 +7,19 @@ class Order: order_id = 0 def __init__(self, agent_id, time_placed, symbol, quantity, is_buy_order): + # Numeric agent id that placed the order. self.agent_id = agent_id + + # Time at which the order was created by the agent. self.time_placed = time_placed + + # Equity symbol for the order. self.symbol = symbol + + # Number of equity units affected by the order. self.quantity = quantity + + # Boolean: True indicates a buy order; False indicates a sell order. self.is_buy_order = is_buy_order # Assign and increment the next unique order_id (simulation-wide). diff --git a/util/util.py b/util/util.py index 61abd5870..f49049413 100644 --- a/util/util.py +++ b/util/util.py @@ -20,3 +20,16 @@ def print (*args, **kwargs): if (not silent_mode) or override: return __builtin__.print (*args, **kwargs) + +# This optional log_print function will call str.format(args) and print the +# result to stdout. It will return immediately when silent mode is active. +# Use it for all permanent logging print statements to allow fastest possible +# execution when verbose flag is not set. +def log_print (str, *args): + if not silent_mode: print (str.format(*args)) + + +# Accessor method for the global silent_mode variable. +def be_silent (): + return silent_mode +