Skip to content

Commit

Permalink
Add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
GiBg1aN committed Jan 5, 2019
1 parent 7a6250e commit 60895fe
Show file tree
Hide file tree
Showing 24 changed files with 715 additions and 75 deletions.
29 changes: 27 additions & 2 deletions Breakout/Breakout_DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,41 @@
from dqn_lib import DQNAgent


# Original size: 210x160x3
def pre_processing(observe):
grayscaled = rgb2gray(observe) # 210x160
"""
Frame grayscaling and subsampling
Args:
observe: input frame
Returns:
processed_observed: output frame
"""
grayscaled = rgb2gray(observe) # From 210x160x3 to 210x160
grayscaled = grayscaled[16:201,:]
processed_observe = np.uint8(resize(grayscaled, (84, 84), mode='constant') * 255)
return processed_observe


# 0: stay, 1: start, 2: right, 3: left
def experiment(n_episodes, max_action, default_policy=False, policy=None, render=False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
max_action: maximum number of steps per episode
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""

with tf.device('/gpu:0'):
res = [0,0] # array of results accumulator: {[0]: Loss, [1]: Victory}
Expand Down
24 changes: 24 additions & 0 deletions CartPole/dqn_cart_pole.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,34 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def experiment(n_episodes, default_policy=False, policy=None, render = False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
agent_config: DQNAgent object
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # steps per episode
Expand Down Expand Up @@ -62,6 +85,7 @@ def experiment(n_episodes, default_policy=False, policy=None, render = False):
x, x_dot, theta, theta_dot = new_state
new_state = np.reshape(new_state, [1, 4])

# Reward shaping
r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
r3 = -abs(theta_dot)
Expand Down
33 changes: 33 additions & 0 deletions Ensembling/ensembling_cart_pole.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,27 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def evaluate(env, agentE):
"""
Model validation for early stopping.
Args:
env: OpenAI environment object
agentE: Ensembler object
Returns:
true if accuracy is 100%, false otherwise
"""
eval_steps = []
eval_scores = []
eval_res = [0, 0]
Expand Down Expand Up @@ -67,6 +83,22 @@ def evaluate(env, agentE):


def experiment(n_episodes, default_policy=False, policy=None, render = False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # steps per episode
Expand Down Expand Up @@ -117,6 +149,7 @@ def experiment(n_episodes, default_policy=False, policy=None, render = False):
x, x_dot, theta, theta_dot = new_state
new_state = np.reshape(new_state, [1, 4])

# Reward shaping
r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
r3 = -abs(theta_dot)
Expand Down
24 changes: 24 additions & 0 deletions Ensembling/ensembling_mountaincar.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,33 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def experiment(n_episodes, default_policy=False, policy=None, render=False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # Steps per episode
Expand Down Expand Up @@ -74,6 +96,7 @@ def experiment(n_episodes, default_policy=False, policy=None, render=False):
# r2 = reward + 0.2 * np.sin(3 * original_state[0])
# r3 = reward + 0.7 * (original_state[1] * original_state[1])

# Reward shaping
r1 = reward + original_state[0]
r2 = reward + np.sin(3 * original_state[0])
r3 = reward + (original_state[1] * original_state[1])
Expand Down Expand Up @@ -111,6 +134,7 @@ def experiment(n_episodes, default_policy=False, policy=None, render=False):
cumulative_reward += reward
scores.append(cumulative_reward)
else:
# Model validation for early stopping
evaluate = False
eval_res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
eval_scores = [] # Cumulative rewards
Expand Down
35 changes: 34 additions & 1 deletion Ensembling/ensembling_mountaincar_mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,28 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def obs_to_state(env, obs, n_states):
""" Maps an observation to state """
"""
Perfom the discretisation of an observation.
Args:
env: OpenAI environment object
obs: current state observation
n_state: number of discrete bins
Returns:
Discretised observation
"""
env_low = env.observation_space.low
env_high = env.observation_space.high
env_dx = (env_high - env_low) / n_states
Expand All @@ -37,6 +53,22 @@ def obs_to_state(env, obs, n_states):


def experiment(n_episodes, default_policy=False, policy=None, render=False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # Steps per episode
Expand Down Expand Up @@ -82,6 +114,7 @@ def experiment(n_episodes, default_policy=False, policy=None, render=False):
new_discretized_state = obs_to_state(env, new_state, n_states)
original_state = new_state

# Reward shaping
# r1 = reward + 0.1 * original_state[0]
# r2 = reward + 0.2 * np.sin(3 * original_state[0])
# r3 = reward + 0.7 * (original_state[1] * original_state[1])
Expand Down
22 changes: 22 additions & 0 deletions FrozenLake/ql_4x4.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,33 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def experiment(n_episodes, default_policy=False, policy=None, render=False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # Steps per episode
Expand Down
22 changes: 22 additions & 0 deletions FrozenLake/ql_4x4_deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,33 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def experiment(n_episodes, default_policy=False, policy=None, render=False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # Steps per episode
Expand Down
22 changes: 22 additions & 0 deletions FrozenLake/ql_8x8.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,33 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def experiment(n_episodes, default_policy=False, policy=None, render=False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # Steps per episode
Expand Down
22 changes: 22 additions & 0 deletions FrozenLake/ql_8x8_deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,33 @@
def accuracy(results):
"""
Evaluate the accuracy of results, considering victories and defeats.
Args:
results: List of 2 elements representing the number of victories and defeats
Returns:
results accuracy
"""
return results[1] / (results[0] + results[1]) * 100


def experiment(n_episodes, default_policy=False, policy=None, render=False):
"""
Run a RL experiment that can be either training or testing
Args:
n_episodes: number of train/test episodes
default_policy: boolean to enable testing/training phase
policy: numpy tensor with a trained policy
render: enable OpenAI environment graphical rendering
Returns:
Dictionary with:
cumulative experiments outcomes
list of steps per episode
list of cumulative rewards
trained policy
"""
res = [0, 0] # array of results accumulator: {[0]: Loss, [1]: Victory}
scores = [] # Cumulative rewards
steps = [] # Steps per episode
Expand Down
Loading

0 comments on commit 60895fe

Please sign in to comment.