-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrewardfn.py
38 lines (29 loc) · 974 Bytes
/
rewardfn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def hamming(state1, state2):
return sum(map(str.__ne__, state1, state2))
def firstRewardFunction(action, observation):
try:
reward = 0
waitingTime = observation[0]
if waitingTime == 0:
reward = reward + 1
elif (waitingTime / 10) < 0.2:
reward = reward - 0.5
elif (waitingTime / 10) > 0.5:
reward = reward - 1
reward = reward + 0.1 * action.count("g") - 0.1 * action.count("r")
return reward
except:
return 0
def secondRewardFunction(action, observation, last_action):
try:
reward = 0
occupancy = observation[1]
haltingCars = observation[2]
emergencyStops = observation[4]
trafficFlow = occupancy / haltingCars
if (last_action is None):
return 0
reward = reward + trafficFlow - hamming(last_action, action) - emergencyStops
return reward
except:
return 0