-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMC_simulator.py
23 lines (20 loc) · 929 Bytes
/
MC_simulator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from bandit import Bandit
from util import *
import numpy as np
def mc_simulate(n,bandit,label=''):
result = {}
result['totalReward'] = 0
result['totalRegret'] = 0
result['regrets'] = [0]*bandit.time_horizon
result['regrets'] = np.array(result['regrets'])
result['rewards'] = [0]*bandit.time_horizon
result['rewards'] = np.array(result['rewards'])
result['name'] = bandit.agent.name + ' ' + label
for i in range(n):
bandit.reset()
bandit.simulate()
result['totalReward'] = incrementalAvg(i+1,result['totalReward'],bandit.totalReward)
result['totalRegret'] = incrementalAvg(i+1,result['totalRegret'],bandit.totalRegret)
result['regrets'] = (result['regrets']*(i) + np.array(bandit.regrets)) / (i+1)
result['rewards'] = (result['rewards']*(i) + np.array(bandit.rewards)) / (i+1)
return result