-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathonline_update.m
59 lines (53 loc) · 1.91 KB
/
online_update.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
function q = online_update( q, state, past_state, quality, action, reward, download_time, capacity, complexity, lambda, alpha )
% ONLINE_UPDATE = parallel update for all relevant states
%
% q = the Q-value table at instant t
% state = the state s_t
% past_state = the state s_{t-1}
% quality = the quality for the chosen action (q_{t-1})
% action = the chosen action a_{t-1}
% reward = the reward r_{t-1}
% download_time = the download time for segment L_{t-1}
% capacity = the channel capacity h_{t-1}
% complexity = the complexity D_{t-1}
% lambda = exponential discount factor
% alpha = learning rate
%
% q = the updated Q-value table for instant t+1
% state mean values for buffer and quality
buffers = [2.5 3.5 4.5 5.5 7 9 11 13.5 16.5 19];
qual = [0.82 0.855 0.885 0.91 0.93 0.95 0.97 0.985 0.9925 0.9975];
% fixed part of the state (capacity and complexity)
fixed = mod(state, 50);
past_fixed = mod(past_state, 50);
if (fixed(1) == 0),
fixed(1) = 50;
end
if (past_fixed(1) == 0),
past_fixed(1) = 50;
end
if (fixed(3) == 0 && fixed(4)>0),
fixed(3) = 50;
end
if (past_fixed(3) == 0 && past_fixed(4)>0),
past_fixed(3) = 50;
end
% real update
q = single_update(q, state, past_state, reward, action, lambda, alpha);
% fictitious updates in parallel
for i = 0 : 50 : length(q(:, 1)) - 2,
% skip the real state
if (i == state(1) - mod(state(1), 50)),
continue;
end
% choose fictitious state and calculate reward
buffer = buffers(1 + floor(mod(i, 500) / 50));
past_quality = qual(1 + floor(i / 500));
[r_u, r_b, pen] = find_reward(quality, past_quality, buffer, download_time);
reward = quality - r_u - r_b - pen;
% find fictitious state numbers
hyp_state = find_state(capacity, 0, quality, complexity, max(buffer - download_time, 0) + 2, 1, 2);
hyp_past = past_fixed + [i 0 i 0];
q = single_update(q, hyp_state, hyp_past, reward, action, lambda, alpha);
end
end