-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab7_3.py
31 lines (24 loc) · 918 Bytes
/
lab7_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import numpy as np
class NonstationaryBandit:
def _init_(self, k=10, mean=0, std_dev=0.01):
self.k = k
self.mean_rewards = np.full(k, mean)
self.std_dev = std_dev
def step(self):
self.mean_rewards += np.random.normal(0, self.std_dev, self.k)
def get_reward(self, action):
reward = np.random.normal(self.mean_rewards[action], 1)
return reward
def bandit_nonstat(action):
# Initialize the bandit
bandit = NonstationaryBandit()
# Perform steps to simulate the random walk
for _ in range(1000): # Simulate 1000 time steps
bandit.step()
# Get the reward for the specified action
reward = bandit.get_reward(action)
return reward
# Example of using the bandit_nonstat function
action = np.random.randint(0, 10) # Choose a random action
reward = bandit_nonstat(action)
print("Action:", action, "Reward:", reward)