diff --git a/TutorialBot/tutorial_bot_output.py b/TutorialBot/tutorial_bot_output.py index 3723829..95b1018 100644 --- a/TutorialBot/tutorial_bot_output.py +++ b/TutorialBot/tutorial_bot_output.py @@ -5,7 +5,7 @@ class TutorialBotOutput: # Constants distance_from_ball_to_go_fast = tf.constant(600.0) - distance_from_ball_to_boost = tf.constant(1500.0) # Minimum distance to ball for using boost + distance_from_ball_to_boost = tf.constant(2000.0) # Minimum distance to ball for using boost unreal_to_degrees = tf.constant( 1.0 / 65536.0 * 360.0) # The numbers used to convert unreal rotation units to degrees true = tf.constant(1.0) @@ -23,7 +23,7 @@ def distance(self, x1, y1, x2, y2): def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance_to_ball, is_on_ground): full_turn_angle = 70.0 half_turn_angle = 30.0 - powerslide_angle_constant = 710.0 # The angle (from the front of the bot to the ball) to start to powerslide. + powerslide_angle_constant = 80.0 # The angle (from the front of the bot to the ball) to start to powerslide. angle_front_to_target = self.feature_creator.generate_angle_to_target(bot_position.X, bot_position.Y, bot_rotation, @@ -46,14 +46,15 @@ def aim(self, bot_position, bot_rotation, target_x, target_y, target_z, distance jump = tf.cast(should_jump, tf.float32) - powerslide_angle = full_turn_angle * tf.cast(tf.less(1000.0, distance_to_ball), tf.float32) - powerslide_angle = powerslide_angle_constant + powerslide_angle - - ps = tf.greater(tf.abs(angle_front_to_target), powerslide_angle) + ps = tf.logical_and(tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle), + tf.less_equal(distance_to_ball, 2000.0)) + # ps = tf.greater_equal(tf.abs(angle_front_to_target), full_turn_angle) power_slide = tf.cast(ps, tf.float32) + should_not_dodge = tf.cast(tf.greater_equal(distance_to_ball, 500), tf.float32) + # if jump is 1 then we should not execute a turn - safe_steer = steer * (1.0 - jump) + safe_steer = steer * (1.0 - jump * should_not_dodge) return (safe_steer, power_slide, jump) def get_output_vector(self, values): @@ -79,8 +80,8 @@ def get_output_vector(self, values): xy_distance = self.distance(bot_pos.X, bot_pos.Y, ball_pos.X, ball_pos.Y) # Boost when ball is far enough away - boost = tf.logical_and(tf.greater(xy_distance, self.distance_from_ball_to_boost), - tf.greater(car_boost, 34)) + boost = tf.logical_and(tf.greater_equal(xy_distance, self.distance_from_ball_to_boost / car_boost), + tf.greater_equal(car_boost, 10)) full_throttle = 0.5 * tf.cast(tf.greater(xy_distance, self.distance_from_ball_to_go_fast), tf.float32) throttle = full_throttle + tf.constant(0.5) diff --git a/bot_manager.py b/bot_manager.py index 842f7e4..e8ff523 100644 --- a/bot_manager.py +++ b/bot_manager.py @@ -132,6 +132,7 @@ def run(self): print('\n\n\n\n Match has ended so ending bot loop\n\n\n\n\n') break + controller_input = None # Run the Agent only if the gameInfo has updated. tick_game_time = game_tick_packet.gameInfo.TimeSeconds should_call_while_paused = datetime.now() - last_call_real_time >= MAX_AGENT_CALL_PERIOD diff --git a/conversions/input/input_formatter.py b/conversions/input/input_formatter.py index 0614c0f..0421151 100644 --- a/conversions/input/input_formatter.py +++ b/conversions/input/input_formatter.py @@ -118,11 +118,17 @@ def get_car_info(self, game_tick_packet, index): player_team = game_tick_packet.gamecars[index].Team player_boost = game_tick_packet.gamecars[index].Boost last_touched_ball = self.get_last_touched_ball(game_tick_packet.gamecars[index], game_tick_packet.gameball.LatestTouch) - car_array = [player_x, player_y, player_z, player_pitch, player_yaw, player_roll, - player_speed_x, player_speed_y, player_speed_z, player_angular_speed_x, - player_angular_speed_y, player_angular_speed_z, - player_on_ground, player_supersonic, player_demolished, player_jumped, - player_double_jumped, player_team, player_boost, last_touched_ball] + car_array = [player_x, player_y, player_z, + player_pitch, player_yaw, player_roll, + player_speed_x, player_speed_y, player_speed_z, + player_angular_speed_x, player_angular_speed_y, player_angular_speed_z, + player_on_ground, + player_supersonic, + player_demolished, + player_jumped, player_double_jumped, + player_team, + player_boost, + last_touched_ball] return car_array def get_last_touched_ball(self, car, latest_touch): @@ -190,6 +196,15 @@ def get_score_info(self, Score, diff_in_score): return [score, goals, own_goals, assists, saves, shots, demolitions, diff_in_score] + def format_array(self, array): + """ + Formats the array to properly fit the model + :param input_length: The batch size of the array + :param array: A numpy array that is being rescaled + :return: A new array that has been properly formatted + """ + return np.array(array, dtype=np.float32) + def flattenArrays(self, array_of_array): """ Takes an array of arrays and flattens it into a single array diff --git a/conversions/input/simple_input_formatter.py b/conversions/input/simple_input_formatter.py new file mode 100644 index 0000000..71fb1fb --- /dev/null +++ b/conversions/input/simple_input_formatter.py @@ -0,0 +1,42 @@ +import numpy as np + +from conversions.input.input_formatter import InputFormatter + + +class SimpleInputFormatter(InputFormatter): + + def create_input_array(self, game_tick_packet, passed_time=0.0): + # posx, posy, posz, rotx, roty, rotz, vx, vy, vz, angvx, angy, angvz, boost_amt, ballx, bally, ballz, ballvx, ballvy, ballvz + inputs = [game_tick_packet.gamecars[self.index].Location.X, + game_tick_packet.gamecars[self.index].Location.Y, + game_tick_packet.gamecars[self.index].Location.Z, + game_tick_packet.gamecars[self.index].Rotation.Pitch, + game_tick_packet.gamecars[self.index].Rotation.Yaw, + game_tick_packet.gamecars[self.index].Rotation.Roll, + game_tick_packet.gamecars[self.index].Velocity.X, + game_tick_packet.gamecars[self.index].Velocity.Y, + game_tick_packet.gamecars[self.index].Velocity.Z, + game_tick_packet.gamecars[self.index].AngularVelocity.X, + game_tick_packet.gamecars[self.index].AngularVelocity.Y, + game_tick_packet.gamecars[self.index].AngularVelocity.Z, + game_tick_packet.gamecars[self.index].Boost, + game_tick_packet.gameball.Location.X, + game_tick_packet.gameball.Location.Y, + game_tick_packet.gameball.Location.Z, + game_tick_packet.gameball.Velocity.X, + game_tick_packet.gameball.Velocity.Y, + game_tick_packet.gameball.Velocity.Z + ] + return inputs + + def get_state_dim(self): + return 19 + + def format_array(self, input_length, array): + """ + Formats the array to properly fit the model + :param input_length: The batch size of the array + :param array: A numpy array that is being rescaled + :return: A new array that has been properly formatted + """ + return array.reshape(input_length, get_state_dim()) diff --git a/conversions/output_formatter.py b/conversions/output_formatter.py index 20c11cd..48beea7 100644 --- a/conversions/output_formatter.py +++ b/conversions/output_formatter.py @@ -85,14 +85,14 @@ def get_car_info(array, index): car_info.Rotation = create_3D_rotation(array, index + 3) car_info.Velocity = create_3D_point(array, index + 6) car_info.AngularVelocity = create_3D_point(array, index + 9) - car_info.bOnGround = array[12] - car_info.bSuperSonic = array[13] - car_info.bDemolished = array[14] - car_info.bJumped = array[15] - car_info.bDoubleJumped = array[16] - car_info.Team = array[17] - car_info.Boost = array[18] - car_info.bLastTouchedBall = array[19] + car_info.bOnGround = array[index + 12] + car_info.bSuperSonic = array[index + 13] + car_info.bDemolished = array[index + 14] + car_info.bJumped = array[index + 15] + car_info.bDoubleJumped = array[index + 16] + car_info.Team = array[index + 17] + car_info.Boost = array[index + 18] + car_info.bLastTouchedBall = array[index + 19] return car_info diff --git a/modelHelpers/actions/action_factory.py b/modelHelpers/actions/action_factory.py index d55ea59..a25837c 100644 --- a/modelHelpers/actions/action_factory.py +++ b/modelHelpers/actions/action_factory.py @@ -30,6 +30,14 @@ [('jump', (0, 2, 1)), ('boost', (0, 2, 1)), ('handbrake', (0, 2, 1))], []] +regression_everything = [[('throttle', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('steer', (-1, 1.5, .5), LOSS_SQUARE_MEAN), + ('yaw', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('pitch', (-1, 1.5, .5), LOSS_SQUARE_MEAN), + ('roll', (-1, 1.5, .5), LOSS_SQUARE_MEAN), ('jump', (0, 2, 1), LOSS_SQUARE_MEAN), + ('boost', (0, 2, 1), LOSS_SQUARE_MEAN), + ('handbrake', (0, 2, 1), LOSS_SQUARE_MEAN)], + [], + []] + def get_handler(split_mode=True, control_scheme=default_scheme): """ Creates a handler based on the options given. diff --git a/modelHelpers/actions/action_handler.py b/modelHelpers/actions/action_handler.py index d43ff0c..5c49ae8 100644 --- a/modelHelpers/actions/action_handler.py +++ b/modelHelpers/actions/action_handler.py @@ -165,9 +165,9 @@ def get_random_action(self): pass def get_random_option(self): - return [random.randrange(self.get_logit_size())] + return [random.randrange(self.get_action_sizes())] - def run_func_on_split_tensors(self, input_tensors, split_func): + def run_func_on_split_tensors(self, input_tensors, split_func, return_as_list=False): """ Optionally splits the tensor and runs a function on the split tensor If the tensor should not be split it runs the function on the entire tensor @@ -175,12 +175,16 @@ def run_func_on_split_tensors(self, input_tensors, split_func): :param input_tensors: needs to have shape of (?, num_actions) :param split_func: a function that is called with a tensor or array the same rank as input_tensor. It should return a tensor with the same rank as input_tensor - :return: a stacked tensor (see tf.stack) or the same tensor depending on if it is in split mode or not. + :param return_as_list If true then the result will be a list of tensors instead of a single stacked tensor + :return: a single tensor or a tensor wrapped in a list """ if not isinstance(input_tensors, collections.Sequence): input_tensors = [input_tensors] - return split_func(*input_tensors) + if return_as_list: + return [split_func(*input_tensors)] + return [split_func(*input_tensors)] + def optionally_split_numpy_arrays(self, numpy_array, split_func, is_already_split=False): """ @@ -256,3 +260,6 @@ def scale_layer(self, layer, index): def get_loss_type(self, index): return 'softmax' + + def is_classification(self, index): + return True diff --git a/modelHelpers/actions/dynamic_action_handler.py b/modelHelpers/actions/dynamic_action_handler.py index bc004bc..a201021 100644 --- a/modelHelpers/actions/dynamic_action_handler.py +++ b/modelHelpers/actions/dynamic_action_handler.py @@ -62,29 +62,7 @@ def create_range_action(self, item): action_data = np.arange(*item[1]) return action_data - def create_actions(self): - self.reset() - - for i, item in enumerate(self.control_names): - self.control_names_index_map[item] = i - - ranges = self.control_scheme[0] - combo_scheme = self.control_scheme[1] - copies = self.control_scheme[2] - - for item in ranges: - action = self.create_range_action(item) - self.action_sizes.append(len(action)) - self.action_name_index_map[item[0]] = len(self.action_list_names) - if len(item) > 2: - self.action_loss_type_map[len(self.action_list_names)] = item[2] - else: - self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS - self.action_list_names.append(item[0]) - self.actions.append(action) - - self.ranged_actions = list(self.actions) - + def create_combo_actions(self, combo_scheme): for item in combo_scheme: action = self.create_range_action(item) self.combo_name_list.append(item[0]) @@ -101,6 +79,38 @@ def create_actions(self): self.action_list_names.append(COMBO) self.actions.append(self.button_combo) + def create_ranged_actions(self, ranges): + for item in ranges: + action = self.create_range_action(item) + self.action_sizes.append(len(action)) + self.action_name_index_map[item[0]] = len(self.action_list_names) + if len(item) > 2: + self.action_loss_type_map[len(self.action_list_names)] = item[2] + else: + self.action_loss_type_map[len(self.action_list_names)] = LOSS_SPARSE_CROSS + self.action_list_names.append(item[0]) + self.actions.append(action) + + self.ranged_actions = list(self.actions) + + def create_actions(self): + self.reset() + + for i, item in enumerate(self.control_names): + self.control_names_index_map[item] = i + + ranges = self.control_scheme[0] + combo_scheme = self.control_scheme[1] + copies = self.control_scheme[2] + + if len(ranges) > 0: + self.create_ranged_actions(ranges) + + if len(combo_scheme) > 0: + self.create_combo_actions(combo_scheme) + else: + self.action_name_index_map[COMBO] = -1 + for item in copies: self.action_name_index_map[item[0]] = self.action_name_index_map[item[1]] return self.actions @@ -110,7 +120,7 @@ def create_action_map(self): def create_controller_from_selection(self, action_selection): if len(action_selection) != len(self.actions): - raise Exception('Invalid action selection size') + raise Exception('Invalid action selection size' + str(len(action_selection)) + ':' + str(len(self.actions))) combo_index = self.action_name_index_map[COMBO] controller_output = [] @@ -171,6 +181,7 @@ def create_tensorflow_controller_from_selection(self, action_selection, batch_si output = tf.gather_nd(ranged_action, tf.stack([indexer, tf.cast(selection, tf.int32)], axis=1)) controller_output.append(output) else: + # selection = tf.Print(selection, [selection], control) controller_output.append(selection) # make sure everything is the same type @@ -259,8 +270,9 @@ def create_action_indexes_graph(self, real_action, batch_size=None): elif indexes[action_index] is None: indexes[action_index] = tf.squeeze(real_control, axis=1) - combo_action = self._create_combo_index_graph(combo_list, real_action) - indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1) + if len(self.combo_list) > 0: + combo_action = self._create_combo_index_graph(combo_list, real_action) + indexes[self.action_name_index_map[COMBO]] = tf.squeeze(combo_action, axis=1) result = tf.stack(indexes, axis=1) return result @@ -283,7 +295,7 @@ def get_action_loss_from_logits(self, logits, labels, index): def get_last_layer_activation_function(self, func, index): if self.is_classification(index): return func - return None + return tf.nn.tanh def scale_layer(self, layer, index): """ diff --git a/modelHelpers/data_normalizer.py b/modelHelpers/data_normalizer.py index 14a5aa8..bb7844f 100644 --- a/modelHelpers/data_normalizer.py +++ b/modelHelpers/data_normalizer.py @@ -170,7 +170,9 @@ def apply_normalization(self, input_array): # error_prevention = tf.cast(tf.equal(diff, 0.0), tf.float32) # diff = diff + error_prevention - result = (input_array - min) / diff + + #result = (input_array - min) / diff + result = input_array / diff #result = tf.Print(result, [min], 'min', summarize=16) #result = tf.Print(result, [max], 'max', summarize=16) #result = tf.Print(result, [input_array[0]], 'inp', summarize=30) diff --git a/models/actor_critic/base_actor_critic.py b/models/actor_critic/base_actor_critic.py index e13e27e..168c4b8 100644 --- a/models/actor_critic/base_actor_critic.py +++ b/models/actor_critic/base_actor_critic.py @@ -1,10 +1,10 @@ -from conversions import output_formatter from models import base_reinforcement from models import base_model import numpy as np import tensorflow as tf import random import livedata.live_data_util as live_data_util +import collections class BaseActorCritic(base_reinforcement.BaseReinforcement): @@ -16,7 +16,6 @@ class BaseActorCritic(base_reinforcement.BaseReinforcement): forced_frame_action = 500 is_graphing = False keep_prob = 0.5 - reg_param = 0.001 first_layer_name = 'first_layer' hidden_layer_name = 'hidden_layer' @@ -26,23 +25,28 @@ class BaseActorCritic(base_reinforcement.BaseReinforcement): # tensorflow objects discounted_rewards = None estimated_values = None - iterator = None logprobs = None def __init__(self, session, - state_dim, num_actions, + input_formatter_info=[0, 0], player_index=-1, action_handler=None, is_training=False, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), summary_writer=None, summary_every=100, - config_file=None, - discount_factor=0.99, # discount future rewards + config_file=None ): - super().__init__(session, state_dim, num_actions, player_index, action_handler, is_training, optimizer, - summary_writer, summary_every, config_file, discount_factor) + super().__init__(session, num_actions, + input_formatter_info=input_formatter_info, + player_index=player_index, + action_handler=action_handler, + is_training=is_training, + optimizer=optimizer, + summary_writer=summary_writer, + summary_every=summary_every, + config_file=config_file) if player_index >= 0: self.rotating_expected_reward_buffer = live_data_util.RotatingBuffer(player_index) @@ -52,7 +56,9 @@ def printParameters(self): print('network size', self.network_size) print('number of layers', self.num_layers) print('keep probability', self.keep_prob) - print('regulation parameter', self.reg_param) + + def get_activation(self): + return tf.nn.elu # tf.nn.relu6 def load_config_file(self): super().load_config_file() @@ -86,7 +92,7 @@ def smart_argmax(self, input_tensor, index): # input_tensor = tf.Print(input_tensor, [input_tensor], str(index)) return tf.squeeze(input_tensor, axis=1) argmax_index = tf.cast(tf.argmax(input_tensor, axis=1), tf.int32) - indexer = tf.range(0, self.mini_batch_size) + indexer = tf.range(0, self.batch_size) slicer_data = tf.stack([indexer, argmax_index], axis=1) sliced_tensor = tf.gather_nd(input_tensor, slicer_data) condition = tf.greater(sliced_tensor, self.action_threshold) @@ -143,16 +149,12 @@ def create_copy_training_model(self, model_input=None, taken_actions=None): converted_input = self.get_input(model_input) if taken_actions is None: - actions_input = self.taken_actions_placeholder + actions_input = self.get_labels_placeholder() else: actions_input = taken_actions - if self.batch_size > self.mini_batch_size: - ds = tf.data.Dataset.from_tensor_slices((converted_input, actions_input)).batch(self.mini_batch_size) - self.iterator = ds.make_initializable_iterator() - batched_input, batched_taken_actions = self.iterator.get_next() - else: - batched_input = converted_input - batched_taken_actions = actions_input + + batched_input, batched_taken_actions = self.create_batched_inputs([converted_input, actions_input]) + with tf.name_scope("training_network"): self.discounted_rewards = tf.constant(0.0) with tf.variable_scope("actor_network", reuse=True): @@ -163,10 +165,9 @@ def create_copy_training_model(self, model_input=None, taken_actions=None): taken_actions = self.parse_actions(batched_taken_actions) + self.log_output_data() + self.train_op = self.create_training_op(self.logprobs, taken_actions) - if model_input is None: - return self.input_placeholder, self.taken_actions_placeholder - return model_input, self.taken_actions_placeholder def create_reinforcement_training_model(self, model_input=None): converted_input = self.get_input(model_input) @@ -212,6 +213,7 @@ def sample_action(self, input_state): else: action_scores = self.sess.run([self.smart_max], {self.input_placeholder: input_state}) + # print(action_scores) action_scores = np.array(action_scores).flatten() return action_scores @@ -242,15 +244,16 @@ def actor_network(self, input_states, variable_list=None, last_layer_list=None, last_layer_list = [[] for _ in range(len(self.action_handler.get_action_sizes()))] # define policy neural network actor_prefix = 'actor' + activation = self.get_activation() # input_states = tf.Print(input_states, [input_states], summarize=self.network_size, message='') with tf.variable_scope(self.first_layer_name): - layer1, _ = self.create_layer(tf.nn.relu6, input_states, 1, self.state_feature_dim, self.network_size, actor_prefix, + layer1, _ = self.create_layer(activation, input_states, 1, self.state_feature_dim, self.network_size, actor_prefix, variable_list=variable_list, dropout=False) - layers_list.append(layer1) + layers_list.append([layer1]) # layer1 = tf.Print(layer1, [layer1], summarize=self.network_size, message='') - inner_layer, output_size = self.create_hidden_layers(tf.nn.relu6, layer1, self.network_size, actor_prefix, + inner_layer, output_size = self.create_hidden_layers(activation, layer1, self.network_size, actor_prefix, variable_list=variable_list, layers_list=layers_list) output_layer = self.create_last_layer(tf.nn.sigmoid, inner_layer, output_size, @@ -285,13 +288,12 @@ def get_model_name(self): def parse_actions(self, taken_actions): return taken_actions - def get_regularization_loss(self, variables, prefix=None): - normalized_variables = [tf.reduce_sum(tf.nn.l2_loss(x) * self.reg_param) - for x in variables] - - reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss')) - tf.summary.scalar(prefix + '_reg_loss', reg_loss) - return reg_loss + def log_output_data(self): + """Logs the output of the last layer of the model""" + with tf.name_scope('model_output'): + for i in range(self.action_handler.get_number_actions()): + variable_name = str(self.action_handler.action_list_names[i]) + tf.summary.histogram(variable_name + '_output', self.actor_last_row_layer[i]) def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, layers_list=[]): @@ -300,6 +302,7 @@ def create_hidden_layers(self, activation_function, input_layer, network_size, n for i in range(0, self.num_layers - 2): inner_layer, _ = self.create_layer(activation_function, inner_layer, i + 2, network_size, network_size, network_prefix, variable_list=variable_list) + layers_list.append(inner_layer) return inner_layer, network_size def create_last_layer(self, activation_function, inner_layer, network_size, num_actions, network_prefix, @@ -307,12 +310,15 @@ def create_last_layer(self, activation_function, inner_layer, network_size, num_ with tf.variable_scope(self.last_layer_name): last_layer_name = 'final' if not self.action_handler.is_split_mode(): - self.actor_last_row_layer, _ = self.create_layer(activation_function, inner_layer, last_layer_name, + self.actor_last_row_layer, _ = self.create_layer(activation_function, inner_layer[0], last_layer_name, network_size, num_actions, network_prefix, - variable_list=last_layer_list, dropout=False) + variable_list=last_layer_list[0], dropout=False) + return self.actor_last_row_layer self.actor_last_row_layer = [] + if not isinstance(inner_layer, collections.Sequence): + inner_layer = [inner_layer] * self.action_handler.get_number_actions() for i, item in enumerate(self.action_handler.get_action_sizes()): variable_name = str(self.action_handler.action_list_names[i]) with tf.variable_scope(variable_name): @@ -322,8 +328,7 @@ def create_last_layer(self, activation_function, inner_layer, network_size, num_ variable_list=last_layer_list[i], dropout=False)[0] scaled_layer = self.action_handler.scale_layer(layer, i) self.actor_last_row_layer.append(scaled_layer) - # tf.summary.histogram(variable_name + '_output', scaled_layer) - + layers_list.append(self.actor_last_row_layer) return tf.concat(self.actor_last_row_layer, 1) def create_savers(self): @@ -371,3 +376,28 @@ def _create_layer_saver(self, network_name, layer_name, extra_info=None, variabl tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope_name)) else: self.add_saver(saver_name, variable_list) + + def get_variables_activations(self): + unified_layers = np.array(self.all_but_last_actor_layer).reshape((-1, 2)) + split_layers = np.array(self.last_row_variables).reshape((-1, len(self.last_row_variables), 2)) + unified_layers = self.sess.run(unified_layers.tolist()) + split_layers = self.sess.run(split_layers.tolist()) + network_variables = [] + for element in unified_layers: + layer = element + ['relu'] + network_variables.append([layer]) + for i, layer in enumerate(split_layers): + split_layer = [] + for j, element in enumerate(layer): + if i == len(split_layers) - 1: + output_type = ['sigmoid' if self.action_handler.is_classification(j) else 'none'] + else: + output_type = ['relu'] + layer = element + output_type + split_layer.append(layer) + network_variables.append(split_layer) + return network_variables + + def get_activations(self, input_array=None): + layer_activations = self.sess.run(self.layers, feed_dict={self.get_input_placeholder(): input_array}) + return layer_activations diff --git a/models/actor_critic/policy_gradient.py b/models/actor_critic/policy_gradient.py index a30d859..3bed9a9 100644 --- a/models/actor_critic/policy_gradient.py +++ b/models/actor_critic/policy_gradient.py @@ -5,28 +5,34 @@ from models import base_model from models.actor_critic.base_actor_critic import BaseActorCritic from modelHelpers import tensorflow_reward_manager +from models.actor_critic.split_layers import SplitLayers -class PolicyGradient(BaseActorCritic): +class PolicyGradient(SplitLayers): max_gradient = 1.0 total_loss_divider = 1.0 def __init__(self, session, - state_dim, num_actions, + input_formatter_info=[0, 0], player_index=-1, action_handler=None, is_training=False, - optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.01), + optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), summary_writer=None, summary_every=100, - config_file=None, - discount_factor=0.99, # discount future rewards + config_file=None ): - self.reward_manager = tensorflow_reward_manager.TensorflowRewardManager(state_dim) - - super().__init__(session, state_dim, num_actions, player_index, action_handler, is_training, optimizer, - summary_writer, summary_every, config_file, discount_factor) + super().__init__(session, num_actions, + input_formatter_info=input_formatter_info, + player_index=player_index, + action_handler=action_handler, + is_training=is_training, + optimizer=optimizer, + summary_writer=summary_writer, + summary_every=summary_every, + config_file=config_file) + self.reward_manager = tensorflow_reward_manager.TensorflowRewardManager(self.state_dim) def printParameters(self): super().printParameters() @@ -47,12 +53,6 @@ def load_config_file(self): except: print('unable to load total_loss_divider') - def get_input(self, model_input=None): - if model_input is None: - return super().get_input(self.input) - else: - return super().get_input(model_input) - def create_training_op(self, logprobs, taken_actions): critic_gradients, critic_loss, critic_reg_loss = self.create_critic_gadients() actor_gradients, actor_loss, actor_reg_loss = self.create_actor_gradients(logprobs, taken_actions) @@ -91,7 +91,7 @@ def create_actor_gradients(self, logprobs, taken_actions): total_loss = total_loss / self.total_loss_divider - # total_loss += actor_reg_loss + total_loss += actor_reg_loss # total_loss = tf.Print(total_loss, [total_loss], 'total_loss') @@ -115,6 +115,8 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac # calculates the entropy loss from getting the label wrong cross_entropy_loss, wrongness, reduced = self.calculate_loss_of_actor(logprobs, taken_actions, index) + if reduced: + cross_entropy_loss = tf.reduce_mean(cross_entropy_loss) if not reduced: if self.action_handler.is_classification(index): tf.summary.histogram('actor_wrongness', wrongness) @@ -133,7 +135,7 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac actor_reg_loss = self.get_regularization_loss(actor_network_variables, prefix="actor") - actor_loss = actor_loss + actor_reg_loss * self.reg_param + actor_loss = actor_loss + actor_reg_loss # compute actor gradients actor_gradients = self.optimizer.compute_gradients(actor_loss, @@ -151,15 +153,11 @@ def create_split_actor_loss(self, index, logprobs, taken_actions, advantages, ac return [actor_gradients, actor_loss] def create_critic_gadients(self): - critic_reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in self.critic_network_variables], - name='critic_reg_loss') - - tf.summary.scalar("critic_reg_loss", critic_reg_loss) - + critic_reg_loss = self.get_regularization_loss(self.critic_network_variables, prefix='critic') # compute critic gradients mean_square_loss = tf.reduce_mean(tf.square(self.discounted_rewards - self.estimated_values), name='mean_square_loss') - critic_loss = mean_square_loss + self.reg_param * critic_reg_loss + critic_loss = mean_square_loss + critic_reg_loss tf.summary.scalar("critic_loss", critic_loss) critic_gradients = self.optimizer.compute_gradients(critic_loss, self.critic_network_variables) return (critic_gradients, critic_loss, critic_reg_loss) @@ -198,50 +196,6 @@ def create_reward(self): def discount_rewards(self, input_rewards, input): return self.reward_manager.create_reward_graph(input) - #def parse_actions(self, taken_actions): - # return tf.cast(self.action_handler.create_indexes_graph(taken_actions), tf.int32) - - def run_train_step(self, calculate_summaries, input_states, actions, rewards=None): - # perform one update of training - if self.batch_size > self.mini_batch_size: - self.sess.run([self.input, self.taken_actions, self.iterator.initializer], - feed_dict={self.input_placeholder: input_states, self.taken_actions_placeholder: actions}) - num_batches = math.ceil(float(self.batch_size) / float(self.mini_batch_size)) - # print('num batches', num_batches) - counter = 0 - while counter < num_batches: - try: - result, summary_str = self.sess.run([ - self.train_op, - self.summarize if calculate_summaries else self.no_op - ]) - # emit summaries - if calculate_summaries: - self.summary_writer.add_summary(summary_str, self.train_iteration) - self.train_iteration += 1 - counter += 1 - except tf.errors.OutOfRangeError: - #print("End of training dataset.") - break - print('batch amount:', counter) - else: - result, summary_str = self.sess.run([ - self.train_op, - self.summarize if calculate_summaries else self.no_op - ], - feed_dict={ - self.input_placeholder: input_states, - self.taken_actions_placeholder: actions - }) - # emit summaries - if calculate_summaries: - self.summary_writer.add_summary(summary_str, self.train_iteration, - ) - self.train_iteration += 1 - - return None, None - - def get_model_name(self): return 'a_c_policy_gradient' + ('_split' if self.action_handler.is_split_mode else '') + str(self.num_layers) + '-layers' diff --git a/models/actor_critic/split_layers.py b/models/actor_critic/split_layers.py new file mode 100644 index 0000000..3c782c1 --- /dev/null +++ b/models/actor_critic/split_layers.py @@ -0,0 +1,138 @@ +import tensorflow as tf +import numpy as np + +from models import base_model +from models.actor_critic.base_actor_critic import BaseActorCritic + + +class SplitLayers(BaseActorCritic): + num_split_layers = 7 + gated_layer_index = -1 + split_hidden_layer_variables = None + split_hidden_layer_name = "split_hidden_layer" + gated_layer_name = "gated_layer" + + def printParameters(self): + super().printParameters() + print('Split Layer Parameters:') + print('number of split layers:', self.num_split_layers) + print('gate layer (not used if < 0):', self.gated_layer_index) + + def load_config_file(self): + super().load_config_file() + try: + self.num_split_layers = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, + 'num_split_layers') + except: + print('unable to load num_split_layers') + try: + self.gated_layer_index = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, + 'gated_layer_index') + except: + print('unable to load gated_layer_index') + + def create_gated_layer(self, inner_layer, input_state, layer_number, network_size, network_prefix, + variable_list=None): + with tf.variable_scope(self.gated_layer_name): + weight_input = network_prefix + "Winput" + str(layer_number) + weight_network = network_prefix + "Wnetwork" + str(layer_number) + weight_decider = network_prefix + "Wdecider" + str(layer_number) + + cut_size = network_size // 2.0 + + w_input = tf.get_variable(weight_input, [network_size, cut_size], + initializer=tf.random_normal_initializer()) + w_network = tf.get_variable(weight_network, [network_size, cut_size], + initializer=tf.random_normal_initializer()) + w_decider = tf.get_variable(weight_decider, [network_size, cut_size], + initializer=tf.random_normal_initializer()) + + if variable_list is not None: + variable_list.append(w_network) + variable_list.append(w_decider) + + decider = tf.nn.sigmoid(tf.matmul(inner_layer, w_decider), name="decider" + str(layer_number)) + + left = tf.matmul(input_state, w_input) * decider + right = tf.matmul(inner_layer, w_network) * (tf.constant(1.0) - decider) + + return left + right, cut_size + + def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, + layers_list=[]): + inner_layer = input_layer + layer_size = self.network_size + max_layer = self.num_layers - 2 - self.num_split_layers + for i in range(0, max_layer): + if i == self.gated_layer_index: + inner_layer, layer_size = self.create_gated_layer(inner_layer, input_layer, i + 2, layer_size, + network_prefix, + variable_list=variable_list) + layers_list.append(inner_layer) + else: + with tf.variable_scope(self.hidden_layer_name): + inner_layer, layer_size = self.create_layer(tf.nn.relu6, inner_layer, i + 2, layer_size, + self.network_size, + network_prefix, variable_list=variable_list) + layers_list.append(inner_layer) + return inner_layer, layer_size + + def create_last_layer(self, activation_function, inner_layer, network_size, num_actions, network_prefix, + last_layer_list=None, layers_list=[]): + with tf.variable_scope(self.split_hidden_layer_name): + output_layers, layer_size = self.create_split_layers(tf.nn.relu6, inner_layer, network_size, + self.num_split_layers, + network_prefix, + variable_list=last_layer_list, layers_list=layers_list) + + return super().create_last_layer(activation_function, output_layers, layer_size, num_actions, network_prefix, + last_layer_list, layers_list=layers_list) + + def create_split_layers(self, activation_function, inner_layer, network_size, + num_split_layers, network_prefix, variable_list=None, layers_list=[]): + + cut_size = self.network_size // 3 + previous_layer = [] + last_sizes = [] + step_size = (network_size - cut_size) // num_split_layers + for i in reversed(np.arange(cut_size, network_size, step_size)): + layer_size = [] + for j in range(self.action_handler.get_number_actions()): + layer_size.append(i) + last_sizes.append(layer_size) + layer_size = [] + last_layer_size = last_sizes[len(last_sizes) - 1] + for j in range(self.action_handler.get_number_actions()): + previous_layer.append(inner_layer) + layer_size.append(network_size) + # needs to be one more longer then the number of layers + last_sizes.insert(0, layer_size) + for i in range(0, num_split_layers): + split_layers = [] + for j, item in enumerate(self.action_handler.get_action_sizes()): + name = str(i) + with tf.variable_scope(str(self.action_handler.action_list_names[j])): + inner_layer, last_layer_size = self.create_layer(activation_function, previous_layer[j], 'split' + name, + last_sizes[i][j], last_sizes[i + 1][j], network_prefix, + variable_list=variable_list[j]) + split_layers.append(inner_layer) + previous_layer = split_layers + layers_list.append(split_layers) + return layers_list[len(layers_list) - 1], last_layer_size + + def create_savers(self): + super().create_savers() + # self._create_layer_saver('actor_network', self.split_hidden_layer_name) + self._create_layer_saver('actor_network', self.gated_layer_name) + + def _create_last_row_saver(self, network_name): + super()._create_last_row_saver(network_name) + # create the hidden row savers + split_las_layer = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, + scope=network_name + '/' + self.split_hidden_layer_name + '.*') + reshaped_list = np.reshape(np.array(split_las_layer), [-1, self.action_handler.get_number_actions(), 2]) + for i in range(len(reshaped_list)): + for j in range(len(reshaped_list[i])): + self._create_layer_saver(network_name, self.split_hidden_layer_name + '_' + str(i), + extra_info=self.action_handler.action_list_names[j], + variable_list=reshaped_list[i][j].tolist()) diff --git a/models/actor_critic/tutorial_model.py b/models/actor_critic/tutorial_model.py index 54468de..e7197a5 100644 --- a/models/actor_critic/tutorial_model.py +++ b/models/actor_critic/tutorial_model.py @@ -5,42 +5,44 @@ class TutorialModel(PolicyGradient): - num_split_layers = 7 - gated_layer_index = -1 - split_hidden_layer_variables = None - split_hidden_layer_name = "split_hidden_layer" - gated_layer_name = "gated_layer" max_gradient = 10.0 total_loss_divider = 2.0 # hidden_layer_activation = tf.nn.relu6 # hidden_layer_activation = tf.tanh - def __init__(self, session, state_dim, num_actions, player_index=-1, action_handler=None, is_training=False, - optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), summary_writer=None, summary_every=100, - config_file=None, teacher=None): - super().__init__(session, state_dim, num_actions, player_index, action_handler, is_training, optimizer, - summary_writer, summary_every, config_file) + def __init__(self, session, + num_actions, + input_formatter_info=[0, 0], + player_index=-1, + action_handler=None, + is_training=False, + optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), + summary_writer=None, + summary_every=100, + config_file=None, + teacher=None + ): if teacher is not None: self.teacher = '_' + teacher + else: + self.teacher = '' + super().__init__(session, num_actions, + input_formatter_info=input_formatter_info, + player_index=player_index, + action_handler=action_handler, + is_training=is_training, + optimizer=optimizer, + summary_writer=summary_writer, + summary_every=summary_every, + config_file=config_file) def printParameters(self): super().printParameters() print('TutorialModel Parameters:') - print('number of split layers:', self.num_split_layers) - print('gate layer (not used if < 0):', self.gated_layer_index) + print('Teacher:', self.teacher) def load_config_file(self): super().load_config_file() - try: - self.num_split_layers = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, - 'num_split_layers') - except: - print('unable to load num_split_layers') - try: - self.gated_layer_index = self.config_file.getint(base_model.MODEL_CONFIGURATION_HEADER, - 'gated_layer_index') - except: - print('unable to load gated_layer_index') try: self.teacher = '_' + self.config_file.get(base_model.MODEL_CONFIGURATION_HEADER, @@ -91,8 +93,7 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index): if self.action_handler.is_classification(index): wrongness += tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32) else: - # doing anything else is very very slow - wrongness += 0.0 + wrongness += tf.abs(taken_actions - tf.round(logprobs * 2.0) / 2.0) else: # use temporarily wrongness += tf.log(1.0 + tf.cast(tf.abs(tf.cast(argmax, tf.float32) - taken_actions), tf.float32)) @@ -104,116 +105,9 @@ def calculate_loss_of_actor(self, logprobs, taken_actions, index): return cross_entropy_loss, wrongness, False - def create_gated_layer(self, inner_layer, input_state, layer_number, network_size, network_prefix, variable_list=None, scope=None): - with tf.variable_scope(self.gated_layer_name): - weight_input = network_prefix + "Winput" + str(layer_number) - weight_network = network_prefix + "Wnetwork" + str(layer_number) - weight_decider = network_prefix + "Wdecider" + str(layer_number) - - cut_size = network_size // 2.0 - - w_input = tf.get_variable(weight_input, [network_size, cut_size], - initializer=tf.random_normal_initializer()) - w_network = tf.get_variable(weight_network, [network_size, cut_size], - initializer=tf.random_normal_initializer()) - w_decider = tf.get_variable(weight_decider, [network_size, cut_size], - initializer=tf.random_normal_initializer()) - - if variable_list is not None: - variable_list.append(w_network) - variable_list.append(w_decider) - - decider = tf.nn.sigmoid(tf.matmul(inner_layer, w_decider), name="decider" + str(layer_number)) - - left = tf.matmul(input_state, w_input) * decider - right = tf.matmul(inner_layer, w_network) * (tf.constant(1.0) - decider) - - return left + right, cut_size - - def create_hidden_layers(self, activation_function, input_layer, network_size, network_prefix, variable_list=None, - layers_list=[]): - inner_layer = input_layer - layer_size = self.network_size - max_layer = self.num_layers - 2 - self.num_split_layers - for i in range(0, max_layer): - if i == self.gated_layer_index: - inner_layer, layer_size = self.create_gated_layer(inner_layer, input_layer, i + 2, layer_size, - network_prefix, - variable_list=variable_list) - else: - with tf.variable_scope(self.hidden_layer_name): - inner_layer, layer_size = self.create_layer(tf.nn.relu6, inner_layer, i + 2, layer_size, - self.network_size, - network_prefix, variable_list=variable_list) - return inner_layer, layer_size - - def create_last_layer(self, activation_function, inner_layer, network_size, num_actions, network_prefix, - last_layer_list=None, layers_list=[]): - with tf.variable_scope(self.split_hidden_layer_name): - inner_layers, layer_size = self.create_split_layers(tf.nn.relu6, inner_layer, network_size, - self.num_split_layers, - network_prefix, - variable_list=last_layer_list) - - for layer in inner_layers: - layers_list.append(layer) - output_layers = inner_layers[len(inner_layers) - 1] - return super().create_last_layer(activation_function, output_layers, layer_size, num_actions, network_prefix, - last_layer_list, layers_list=layers_list) - - def create_split_layers(self, activation_function, inner_layer, network_size, - num_split_layers, network_prefix, variable_list=None): - - cut_size = self.network_size // 3 - total_layers = [] - previous_layer = [] - last_sizes = [] - step_size = (network_size - cut_size) // num_split_layers - for i in reversed(np.arange(cut_size, network_size, step_size)): - layer_size = [] - for j in range(self.action_handler.get_number_actions()): - layer_size.append(i) - last_sizes.append(layer_size) - layer_size = [] - last_layer_size = last_sizes[len(last_sizes) - 1] - for j in range(self.action_handler.get_number_actions()): - previous_layer.append(inner_layer) - layer_size.append(network_size) - # needs to be one more longer then the number of layers - last_sizes.insert(0, layer_size) - for i in range(0, num_split_layers): - split_layers = [] - for j, item in enumerate(self.action_handler.get_action_sizes()): - name = str(i) - with tf.variable_scope(str(self.action_handler.action_list_names[j])): - inner_layer, last_layer_size = self.create_layer(activation_function, previous_layer[j], 'split' + name, - last_sizes[i][j], last_sizes[i + 1][j], network_prefix, - variable_list=variable_list[j]) - split_layers.append(inner_layer) - previous_layer = split_layers - total_layers.append(split_layers) - return total_layers, last_layer_size - def get_model_name(self): return 'tutorial_bot' + ('_split' if self.action_handler.is_split_mode else '') + self.teacher - def create_savers(self): - super().create_savers() - # self._create_layer_saver('actor_network', self.split_hidden_layer_name) - self._create_layer_saver('actor_network', self.gated_layer_name) - - def _create_last_row_saver(self, network_name): - super()._create_last_row_saver(network_name) - # create the hidden row savers - split_las_layer = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, - scope=network_name + '/' + self.split_hidden_layer_name + '.*') - reshaped_list = np.reshape(np.array(split_las_layer), [-1, self.action_handler.get_number_actions(), 2]) - for i in range(len(reshaped_list)): - for j in range(len(reshaped_list[i])): - self._create_layer_saver(network_name, self.split_hidden_layer_name + '_' + str(i), - extra_info=self.action_handler.action_list_names[j], - variable_list=reshaped_list[i][j].tolist()) - def add_histograms(self, gradients): # summarize gradients for grad, var in gradients: diff --git a/models/base_model.py b/models/base_model.py index c20c601..5b25893 100644 --- a/models/base_model.py +++ b/models/base_model.py @@ -1,8 +1,9 @@ -import hashlib +import math import os import tensorflow as tf import numpy as np +from conversions.input.input_formatter import InputFormatter from modelHelpers import tensorflow_feature_creator from modelHelpers.data_normalizer import DataNormalizer @@ -20,23 +21,32 @@ class BaseModel: is_online_training = False no_op = tf.no_op() train_op = no_op - model = None logits = None is_normalizing = True normalizer = None feature_creator = None load_from_checkpoints = None QUICK_SAVE_KEY = 'quick_save' + network_size = 128 + controller_predictions = None + input_formatter = None + summarize = no_op + iterator = None + reg_param = 0.001 + should_regulate = None """" This is a base class for all models It has a couple helper methods but is mainly used to provide a standard interface for running and training a model """ - def __init__(self, session, state_dim, num_actions, player_index=-1, action_handler=None, is_training=False, + def __init__(self, session, num_actions, + input_formatter_info=[0, 0], + player_index=-1, action_handler=None, is_training=False, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), summary_writer=None, summary_every=100, config_file=None): # tensorflow machinery + self.train_iteration = 0 self.optimizer = optimizer self.sess = session self.summary_writer = summary_writer @@ -52,10 +62,10 @@ def __init__(self, session, state_dim, num_actions, player_index=-1, action_hand # output space self.num_actions = num_actions - + self.add_input_formatter(input_formatter_info[0], input_formatter_info[1]) # input space - self.state_dim = state_dim - self.state_feature_dim = state_dim + self.state_dim = self.input_formatter.get_state_dim() + self.state_feature_dim = self.state_dim self.is_training = is_training @@ -67,15 +77,19 @@ def __init__(self, session, state_dim, num_actions, player_index=-1, action_hand self.stored_variables = self._create_variables() def printParameters(self): + """Visually displays all the model parameters""" print('model parameters:') print('batch size:', self.batch_size) print('mini batch size:', self.mini_batch_size) print('using features', (self.feature_creator is not None)) + print('regulation parameter', self.reg_param) + print('is regulating parameter', self.should_regulate) def _create_variables(self): + """Creates any variables needed by this model. + Variables keep their value across multiple runs""" with tf.name_scope("model_inputs"): self.input_placeholder = tf.placeholder(tf.float32, shape=(None, self.state_dim), name="state_input") - self.input = self.input_placeholder return {} def store_rollout(self, input_state, last_action, reward): @@ -99,6 +113,14 @@ def store_rollout_batch(self, input_states, last_actions): """ print(' i do nothing!') + def add_input_formatter(self, team, index): + """Creates and adds an input formatter""" + self.input_formatter = InputFormatter(team, index) + + def create_input_array(self, game_tick_packet, frame_time): + """Creates the input array from the game_tick_packet""" + return self.input_formatter.create_input_array(game_tick_packet, frame_time) + def sample_action(self, input_state): """ Runs the model to get a single action that can be returned. @@ -107,7 +129,7 @@ def sample_action(self, input_state): A sample action that can then be used to get controller output. """ #always return an integer - return 10 + return self.sess.run(self.controller_predictions, feed_dict={self.get_input_placeholder(): input_state}) def create_copy_training_model(self, model_input=None, taken_actions=None): """ @@ -128,6 +150,72 @@ def create_copy_training_model(self, model_input=None, taken_actions=None): labels = None return loss, input, labels + def create_batched_inputs(self, inputs): + """ + Takes in the inputs and creates a batch variation of them. + :param inputs: This is an array or tuple of inputs that will be converted to their batch form. + :return: The outputs converted to their batch form. + """ + outputs = tuple(inputs) + if self.batch_size > self.mini_batch_size: + ds = tf.data.Dataset.from_tensor_slices(tuple(inputs)).batch(self.mini_batch_size) + self.iterator = ds.make_initializable_iterator() + outputs = self.iterator.get_next() + return outputs + + def create_feed_dict(self, input_array, label_array): + return {self.get_input_placeholder(): input_array, self.get_labels_placeholder(): label_array} + + def run_train_step(self, should_calculate_summaries, feed_dict=None, epoch=-1): + """ + Runs a single train step of the model. + If batching is enable this will internally handle batching as well + :param should_calculate_summaries: True if summaries/logs from this train step should be saved. False otherwise + :param feed_dict: The inputs we feed into the model. + :param epoch: What number iteration we should be on + :return: The epoch number of the internal model state + """ + + if epoch != -1: + self.train_iteration = epoch + + should_summarize = should_calculate_summaries and self.summarize is not None and self.summary_writer is not None + + # perform one update of training + if self.batch_size > self.mini_batch_size: + _, = self.sess.run([self.iterator.initializer], + feed_dict=feed_dict) + num_batches = math.ceil(float(self.batch_size) / float(self.mini_batch_size)) + # print('num batches', num_batches) + counter = 0 + while counter < num_batches: + try: + result, summary_str = self.sess.run([ + self.train_op, + self.summarize if should_summarize else self.no_op + ]) + # emit summaries + if should_summarize: + self.summary_writer.add_summary(summary_str, self.train_iteration) + self.train_iteration += 1 + counter += 1 + except tf.errors.OutOfRangeError: + break + except Exception as e: + print(e) + print('batch amount:', counter) + else: + result, summary_str = self.sess.run([ + self.train_op, + self.summarize if should_summarize else self.no_op + ], + feed_dict=feed_dict) + # emit summaries + if should_summarize: + self.summary_writer.add_summary(summary_str, self.train_iteration) + self.train_iteration += 1 + return self.train_iteration + def apply_feature_creation(self, feature_creator): self.state_feature_dim = self.state_dim + tensorflow_feature_creator.get_feature_dim() self.feature_creator = feature_creator @@ -136,6 +224,7 @@ def get_input(self, model_input=None): """ Gets the input for the model. Also applies normalization + And feature creation :param model_input: input to be used if another input is not None :return: """ @@ -171,32 +260,32 @@ def create_model(self, model_input=None): """ input = self.get_input(model_input) - self.model, self.logits = self._create_model(input) - return self.model, self.logits + self.controller_predictions = self._create_model(input) def _create_model(self, model_input): """ - Called to create the model, this is called in the constructor + Called to create the model, this is not called in the constructor. :param model_input: A placeholder for the input data into the model. :return: A tensorflow object representing the output of the model - This output should be able to be run and create an action - And a tensorflow object representing the logits of the model - This output should be able to be used in training + This output should be able to be run and create an action that is parsed by the action handler """ - return None, None + return None - def _set_variables(self): + def _initialize_variables(self): + """ + Initializes all variables attempts to run them with placeholders if those are required + """ try: init = tf.global_variables_initializer() - self.sess.run(init, feed_dict={self.input_placeholder: np.zeros((self.batch_size, self.state_dim))}) + self.sess.run(init) except Exception as e: print('failed to initialize') print(e) try: init = tf.global_variables_initializer() - self.sess.run(init) + self.sess.run(init, feed_dict={self.input_placeholder: np.zeros((self.batch_size, self.state_dim))}) except Exception as e2: print('failed to initialize again') print(e2) @@ -210,7 +299,7 @@ def initialize_model(self): This is used to initialize the model variables This will also try to load an existing model if it exists """ - self._set_variables() + self._initialize_variables() #file does not exist too lazy to add check if self.model_file is None: @@ -225,26 +314,17 @@ def initialize_model(self): file = os.path.abspath(model_file) self.load_model(os.path.dirname(file), os.path.basename(file)) except Exception as e: - self._set_variables() + self._initialize_variables() print("Unexpected error loading model:", e) print('unable to load model') else: - self._set_variables() print('unable to find model to load') - self._add_summary_writer() + if self.summary_writer is not None: + self.summary_writer.add_graph(self.sess.graph) + self.summarize = tf.summary.merge_all() self.is_initialized = True - def run_train_step(self, calculate_summaries, input_states, actions): - """ - Runs a single train step of the model - :param calculate_summaries: If the model should calculate summaries - :param input_states: A batch of input states which should equal batch size - :param actions: A batch of actions which should equal batch size - :return: - """ - pass - def get_model_name(self): """ :return: The name of the model used for saving the file @@ -271,9 +351,9 @@ def get_event_path(self, filename, is_replay=False): :return: The path of the file """ dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - base_path = "/training/data/events/" + base_path = "/training/training_events/" if is_replay: - base_path = "/training/replay_events/" + base_path = "/training/in_game_events/" complete_path = dir_path + base_path + self.get_model_name() + "/" + filename modified_path = complete_path counter = 0 @@ -282,15 +362,18 @@ def get_event_path(self, filename, is_replay=False): modified_path = complete_path + str(counter) return modified_path - def _add_summary_writer(self): - if self.summary_writer is not None: - self.summarize = tf.summary.merge_all() - # graph was not available when journalist was created - self.summary_writer.add_graph(self.sess.graph) - else: - self.summarize = self.no_op + def add_summary_writer(self, event_name, is_replay=False): + """ + Called to add a way to summarize the model info. + This could be called before the graph is finalized + :param event_name: The file name of the summary + :param is_replay: True if the events should be saved for replay analysis + :return: + """ + self.summary_writer = tf.summary.FileWriter(self.get_event_path(event_name, is_replay)) def load_config_file(self): + """Loads a config file. The config file is stored in self.config_file""" try: self.model_file = self.config_file.get(MODEL_CONFIGURATION_HEADER, 'model_directory') except Exception as e: @@ -317,8 +400,27 @@ def load_config_file(self): 'is_normalizing') except Exception as e: print('unable to load if it should be normalizing defaulting to true') + try: + self.should_regulate = self.config_file.getboolean(MODEL_CONFIGURATION_HEADER, + 'should_regulate') + except Exception as e: + self.should_regulate = True + print('unable to load if it should be regulating defaulting to true') + try: + self.reg_param = self.config_file.getfloat(MODEL_CONFIGURATION_HEADER, + 'regulate_param') + except Exception as e: + self.reg_param = 0.001 + print('unable to load if it should be regulating defaulting to true') def add_saver(self, name, variable_list): + """ + Adds a saver to the saver map. + All subclasses should still use severs_map even if they do not store a tensorflow saver + :param name: The key of the saver + :param variable_list: The list of variables to save + :return: None + """ if len(variable_list) == 0: print('no variables for saver ', name) return @@ -329,6 +431,9 @@ def add_saver(self, name, variable_list): raise e def create_savers(self): + """Called to create the savers for the model. Or any other way to store the model + This is called after the model has been created but before it has been initialized. + This should make calls to add_saver""" self.add_saver(self.QUICK_SAVE_KEY, tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) def _create_model_directory(self, file_path): @@ -337,11 +442,23 @@ def _create_model_directory(self, file_path): os.makedirs(dirname) def _save_keyed_model(self, model_path, key, global_step): + """ + :param model_path: The directory for which the model should live + :param key: The key for the savers_map variables + :param global_step: Which number step in training it is + """ keyed_path = self._create_saved_model_path(os.path.dirname(model_path), os.path.basename(model_path), key) self._create_model_directory(keyed_path) self._save_model(self.sess, self.savers_map[key], keyed_path, global_step) def _save_model(self, session, saver, file_path, global_step): + """ + Saves the model with the specific path, saver, and tensorflow session. + :param session: The tensorflow session + :param saver: The object that is actually saving the model + :param file_path: The place where the model is stored + :param global_step: What number it is in the training + """ try: saver.save(session, file_path, global_step=global_step) except Exception as e: @@ -379,6 +496,12 @@ def load_model(self, model_path, file_name, quick_save=False): self._load_keyed_model(model_path, file_name, key) def _load_keyed_model(self, model_path, file_name, key): + """ + Loads a model based on a key and a model path + :param model_path: The base directory of where the model lives + :param file_name: The name of this specific model piece + :param key: The key used for the savers_map + """ try: self._load_model(self.sess, self.savers_map[key], self._create_saved_model_path(model_path, file_name, key)) except Exception as e: @@ -386,6 +509,13 @@ def _load_keyed_model(self, model_path, file_name, key): print(e) def _load_model(self, session, saver, path): + """ + Loads a model only loads it if the directory exists + :param session: Tensorflow session + :param saver: The object that saves and loads the model + :param path: + :return: + """ if os.path.exists(os.path.dirname(path)): checkpoint_path = path if self.load_from_checkpoints: @@ -395,6 +525,7 @@ def _load_model(self, session, saver, path): print('model for saver not found:', path) def create_model_hash(self): + """Creates the hash of the model used for the server keeping track of what is being used""" all_saved_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) print(len(all_saved_variables)) for i in all_saved_variables: @@ -402,3 +533,50 @@ def create_model_hash(self): saved_variables = self.sess.run(all_saved_variables) saved_variables = np.array(saved_variables) return int(hex(hash(str(saved_variables.data))), 16) % 2 ** 64 + + def get_input_placeholder(self): + """Returns the placeholder for getting inputs""" + return self.input_placeholder + + def get_labels_placeholder(self): + """Returns the placeholder for getting what actions have been taken""" + return self.no_op + + def get_variables_activations(self): + """ + Returns the weights, biases and activation type for each layer + :return: Return using [layer1, layer2, etc.] layer: [weights, biases, activation] + weights: [neuron0, neuron1, neuron2, etc.] which each include (from prev. layer): [neuron0, neuron1, etc.] + biases: [neuron0, neuron1, etc.] Each holding the bias value. + ex. layer: [[[[1, 2, 3], [2, 5, 1], [2, 5, 1]], [1, 4, 2, 1, 4], 'relu'], next layer] + """ + r = list() + weights = list() + biases = list() + for i in range(7): + biases.append(np.random.randint(-10, 10)) + r.append([[], biases, 'relu']) + biases.clear() + for i in range(5): + temp = list() + for n in range(7): + temp.append(np.random.randint(-20, 20)) + weights.append(temp) + biases.append(np.random.rand()) + r.append([weights, biases, 'sigmoid']) + return r + + def get_activations(self, input_array=None): + return [[np.random.randint(0, 30) for i in range(7)], [np.random.rand() for i in range(5)]] + + def get_regularization_loss(self, variables, prefix=None): + """Gets the regularization loss from the varaibles. Used if the weights are getting to big""" + normalized_variables = [tf.reduce_sum(tf.nn.l2_loss(x) * self.reg_param) + for x in variables] + + reg_loss = tf.reduce_sum(normalized_variables, name=(prefix + '_reg_loss')) + tf.summary.scalar(prefix + '_reg_loss', reg_loss) + if self.should_regulate: + return reg_loss * (self.reg_param * 10.0) + else: + return tf.constant(0.0) diff --git a/models/base_reinforcement.py b/models/base_reinforcement.py index 32eb909..04be381 100644 --- a/models/base_reinforcement.py +++ b/models/base_reinforcement.py @@ -13,8 +13,8 @@ class BaseReinforcement(base_model.BaseModel): taken_actions = None def __init__(self, session, - state_dim, num_actions, + input_formatter_info=[0, 0], player_index=-1, action_handler=None, is_training=False, @@ -27,8 +27,15 @@ def __init__(self, session, anneal_steps=1000, # N steps for annealing exploration discount_factor=0.99, # discount future rewards ): - super().__init__(session, state_dim, num_actions, player_index, action_handler, is_training, optimizer, - summary_writer, summary_every, config_file) + super().__init__(session, num_actions, + input_formatter_info=input_formatter_info, + player_index=player_index, + action_handler=action_handler, + is_training=is_training, + optimizer=optimizer, + summary_writer=summary_writer, + summary_every=summary_every, + config_file=config_file) # counters self.train_iteration = 0 @@ -52,21 +59,21 @@ def printParameters(self): print('Reinforcment Parameters:') print('discount factor', self.discount_factor) - def _set_variables(self): + def _initialize_variables(self): try: init = tf.global_variables_initializer() - if self.action_handler.is_split_mode(): - actions_null = np.zeros((self.batch_size, self.action_handler.get_number_actions())) - else: - actions_null = np.zeros((self.batch_size,)) - self.sess.run(init, feed_dict={self.input_placeholder: np.zeros((self.batch_size, self.state_dim)), - self.taken_actions_placeholder: actions_null}) + self.sess.run(init) except Exception as e: print('failed to initialize') print(e) try: init = tf.global_variables_initializer() - self.sess.run(init) + if self.action_handler.is_split_mode(): + actions_null = np.zeros((self.batch_size, self.action_handler.get_number_actions())) + else: + actions_null = np.zeros((self.batch_size,)) + self.sess.run(init, feed_dict={self.get_input_placeholder(): np.zeros((self.batch_size, self.state_dim)), + self.taken_actions_placeholder: actions_null}) except Exception as e2: print('failed to initialize again') print(e2) @@ -96,6 +103,9 @@ def _create_variables(self): self.input_rewards = self.create_reward() return {} + def get_labels_placeholder(self): + return self.taken_actions + def store_rollout(self, input_state, last_action, reward): if self.is_training: if self.action_buffer is None: @@ -143,14 +153,12 @@ def update_model(self): if len(self.state_buffer) == 0: return # whether to calculate summaries - calculate_summaries = (self.summarize is not None and self.summary_writer is not None and - self.train_iteration % self.summary_every == 0) # update policy network with the rollout in batches input_states = np.array(self.state_buffer) actions = np.array(self.action_buffer) rewards = None - self.run_train_step(calculate_summaries, input_states, actions, rewards) + self.run_train_step(True, feed_dict=self.create_feed_dict(input_states, actions)) self.anneal_exploration() self.train_iteration += 1 @@ -158,24 +166,6 @@ def update_model(self): # clean up self.clean_up() - def run_train_step(self, calculate_summaries, input_states, actions, rewards=None): - if rewards is None: - rewards = np.zeros([self.batch_size, 1]) - # perform one update of training - result, summary_str = self.sess.run([ - self.train_op, - self.summarize if calculate_summaries else self.no_op - ], feed_dict={ - self.input_placeholder: input_states, - self.taken_actions_placeholder: actions, - self.input_rewards: rewards - }) - - if self.summary_writer is not None: - self.summary_writer.add_summary(summary_str, self.train_iteration) - - return result, summary_str - def anneal_exploration(self, stategy='linear'): ratio = max((self.anneal_steps - self.train_iteration) / float(self.anneal_steps), 0) self.exploration = (self.init_exp - self.final_exp) * ratio + self.final_exp diff --git a/models/fake_model.py b/models/fake_model.py index 76734cb..cb18bb5 100644 --- a/models/fake_model.py +++ b/models/fake_model.py @@ -8,11 +8,20 @@ class FakeModel(BaseModel): teacher_package = None - def __init__(self, session, state_dim, num_actions, player_index=-1, action_handler=None, is_training=False, + def __init__(self, session, num_actions, + input_formatter_info=[0, 0], + player_index=-1, action_handler=None, is_training=False, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1), summary_writer=None, summary_every=100, config_file=None): - super().__init__(session, state_dim, num_actions, player_index, action_handler, is_training, optimizer, - summary_writer, summary_every, config_file) + super().__init__(session, num_actions, + input_formatter_info=input_formatter_info, + player_index=player_index, + action_handler=action_handler, + is_training=is_training, + optimizer=optimizer, + summary_writer=summary_writer, + summary_every=summary_every, + config_file=config_file) def get_class(self, class_package, class_name): class_package = importlib.import_module(class_package) diff --git a/models/keras/__init__.py b/models/keras/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/keras/base_keras_model.py b/models/keras/base_keras_model.py new file mode 100644 index 0000000..2f78da4 --- /dev/null +++ b/models/keras/base_keras_model.py @@ -0,0 +1,219 @@ +from conversions.input.simple_input_formatter import SimpleInputFormatter +from models.base_model import BaseModel, MODEL_CONFIGURATION_HEADER +from keras.models import Sequential, Model +from keras.layers import Input, Dense, Dropout, LeakyReLU, PReLU +from keras import backend as K +from keras import optimizers, regularizers +from keras.callbacks import EarlyStopping, Callback, TensorBoard +# from keras.utils import plot_model +import numpy as np +import tensorflow as tf + + +class BaseKerasModel(BaseModel): + + shared_hidden_layers = 0 + split_hidden_layers = 0 + model_activation = None + kernel_regularizer = None + loss_weights = None + loss = None + tensorboard = None + names = [] + + def __init__(self, session, + num_actions, + input_formatter_info=[0, 0], + player_index=-1, + action_handler=None, + is_training=False, + optimizer=None, + summary_writer=None, + summary_every=100, + config_file=None + ): + K.set_session(session) + super().__init__(session, num_actions, + input_formatter_info=input_formatter_info, + player_index=player_index, + action_handler=action_handler, + is_training=is_training, + optimizer=optimizer, + summary_writer=summary_writer, + summary_every=summary_every, + config_file=config_file) + + def printParameters(self): + super().printParameters() + + def _create_variables(self): + pass + + def add_input_formatter(self, team, index): + self.input_formatter = SimpleInputFormatter(team, index) + + def sample_action(self, input_state): + relative_positions = input_state[:, 13:16] - input_state[:, 0:3] + rotations = input_state[:, 3:6] + unrotated_positions = self.unrotate_positions(relative_positions, rotations) + + input_state = np.column_stack((input_state, unrotated_positions)) + outputs = self.model.predict(input_state) + outputs = np.array(outputs).flatten().tolist() + return outputs + + def unrotate_positions(self, relative_positions, rotations): + new_positions = relative_positions + + # YAW + yaws = rotations[:, 1] + yaws = -yaws / 32768. * np.pi + + new_positions[:, 0], new_positions[:, 1] = new_positions[:, 0] * np.cos(yaws) - new_positions[:, 1] * np.sin(yaws), new_positions[:, 0] * np.sin(yaws) + new_positions[:, 1] * np.cos(yaws) + + # PITCH + + pitchs = rotations[:, 0] + pitchs = pitchs / 32768. * np.pi + + new_positions[:, 2], new_positions[:, 0] = new_positions[:, 2] * np.cos(pitchs) - new_positions[:, 0] * np.sin(pitchs), new_positions[:, 2] * np.sin(pitchs) + new_positions[:, 0] * np.cos(pitchs) + + # ROLL + + rolls = rotations[:, 2] + rolls = rolls / 32768. * np.pi + + new_positions[:, 1], new_positions[:, 2] = new_positions[:, 1] * np.cos(rolls) - new_positions[:, 2] * np.sin(rolls), new_positions[:, 1] * np.sin(rolls) + new_positions[:, 2] * np.cos(rolls) + + return new_positions + + def create_copy_training_model(self, model_input=None, taken_actions=None): + loss_weights = {} + for i, control in enumerate(self.action_handler.control_names): + is_classification = self.action_handler.is_classification(i) + loss_weights['o_%s' % + control] = 0.01 if is_classification else 0.1 + + loss_weights['o_steer'] *= 20 + loss_weights['o_boost'] *= 10 + loss_weights['o_throttle'] *= 20 + loss_weights['o_jump'] *= 20 + self.loss_weights = loss_weights + # loss_weights['o_pitch'] *= 3 + # loss_weights['o_pitch'] *= 0.001 + # loss_weights['o_yaw'] *= 0.001 + # loss_weights['o_roll'] *= 0.001 + + def get_input(self, model_input=None): + if model_input is None: + return Input(shape=(self.state_dim + 3,)) + else: + return Input(shape=(self.state_dim + 3,), tensor=model_input) + + def _create_model(self, model_input): + """Generates the Keras model""" + + x = model_input + for hidden_layer_i in range(1, self.shared_hidden_layers + 1): + x = Dense(self.network_size, activation=self.model_activation, kernel_regularizer=self.kernel_regularizer, name='hidden_layer_%s' % + hidden_layer_i)(x) + x = Dropout(0.4)(x) + + shared_output = x + outputs = [] + + extra_hidden_layer_nodes = self.network_size / self.action_handler.get_number_actions() + loss = {} + action_sizes = self.action_handler.get_action_sizes() + for i, control in enumerate(self.action_handler.action_list_names): + output_size = action_sizes[i] + x = shared_output + for hidden_layer_i in range(1, self.split_hidden_layers + 1): + x = Dense(extra_hidden_layer_nodes, activation=self.model_activation, kernel_regularizer=self.kernel_regularizer, + name='hidden_layer_%s_%s' % (control, hidden_layer_i))(x) + x = Dropout(0.4)(x) + + if self.action_handler.is_classification(i): + activation = 'sigmoid' + loss_name = 'categorical_crossentropy' + else: + activation = 'tanh' + loss_name = 'mean_absolute_error' + _output = Dense(output_size, activation=activation, + name='o_%s' % control)(x) + if self.action_handler.is_classification(i): + _output = K.argmax(_output, axis=1) + outputs.append(_output) + loss['o_%s' % control] = loss_name + + self.loss = loss + + self.model = Model(inputs=model_input, outputs=outputs) + + return None + + def initialize_model(self): + self.model.compile(optimizer='adam', loss=self.loss, loss_weights=self.loss_weights) + super().initialize_model() + self.tensorboard.set_model(self.model) + + def _initialize_variables(self): + super()._initialize_variables() + + def run_train_step(self, should_calculate_summaries, feed_dict=None, epoch=-1): + model_input = None + model_label = None + if self.train_iteration is not -1: + self.train_iteration = epoch + if feed_dict is not None: + model_input = feed_dict[self.get_input_placeholder()] + model_label = feed_dict[self.get_labels_placeholder()] + logs = self.model.train_on_batch(model_input, model_label) + if should_calculate_summaries and self.tensorboard is not None: + self.tensorboard.on_epoch_end(self.train_iteration, logs) + self.train_iteration += 1 + + def create_batched_inputs(self, inputs): + return inputs + + def add_summary_writer(self, even_name): + log_dir = self.get_event_path(even_name) + self.tensorboard = TensorBoard( + write_graph=False, write_images=False, log_dir=log_dir, histogram_freq=10) + + def load_config_file(self): + super().load_config_file() + try: + self.model_file = self.config_file.get(MODEL_CONFIGURATION_HEADER, 'model_directory') + except Exception as e: + print('model directory is not in config', e) + + def add_saver(self, name, variable_list): + super().add_saver(name, variable_list) + + def create_savers(self): + self.add_saver(self.QUICK_SAVE_KEY, True) + self.add_saver('NotQuickSave', False) + + def _save_model(self, session, saver, file_path, global_step): + if saver: + file_name = file_path + str(global_step) + else: + file_name = file_path + self.model.save_weights(file_name) + self.model.save(file_name + '.h5') + + def _load_model(self, session, saver, path): + self.model.load_weights(path, by_name=True) + + def create_model_hash(self): + return super().create_model_hash() + + def get_model_name(self): + return 'keras' + + def get_input_placeholder(self): + return 'Input' + + def get_labels_placeholder(self): + return 'Output' diff --git a/saltie.py b/saltie.py index a84e123..f9f48b4 100644 --- a/saltie.py +++ b/saltie.py @@ -1,16 +1,12 @@ # Defined as a generic bot, can use multiple models -from conversions.input import input_formatter -from conversions.input.input_formatter import InputFormatter import importlib import inspect -from modelHelpers.actions import action_handler, action_factory, dynamic_action_handler +from modelHelpers.actions import action_factory from modelHelpers import reward_manager from modelHelpers.tensorflow_feature_creator import TensorflowFeatureCreator -from models.actor_critic import policy_gradient import livedata.live_data_util as live_data_util import numpy as np -import random import tensorflow as tf import time @@ -33,7 +29,6 @@ def __init__(self, name, team, index, config_file=None): self.config_file = config_file self.index = index self.load_config_file() - self.inp = InputFormatter(team, index) self.reward_manager = reward_manager.RewardManager() config = tf.ConfigProto( device_count={'GPU': 0} @@ -41,21 +36,18 @@ def __init__(self, name, team, index, config_file=None): self.sess = tf.Session(config=config) # self.sess = tf.Session() self.actions_handler = action_factory.get_handler(control_scheme=self.control_scheme) - self.state_dim = input_formatter.get_state_dim() self.num_actions = self.actions_handler.get_logit_size() print('num_actions', self.num_actions) self.model = self.get_model_class()(self.sess, - self.state_dim, self.num_actions, + input_formatter_info=[team, index], player_index=self.index, action_handler=self.actions_handler, config_file=config_file, is_training=False) - writer = self.model.summary_writer = tf.summary.FileWriter( - self.model.get_event_path('random_packet', is_replay=True)) + self.model.add_summary_writer('random_packet', is_replay=True) - self.model.summary_writer = writer self.model.batch_size = 1 self.model.mini_batch_size = 1 @@ -66,7 +58,7 @@ def __init__(self, name, team, index, config_file=None): self.model.apply_feature_creation(TensorflowFeatureCreator()) try: - self.model.create_model(self.model.input_placeholder) + self.model.create_model(self.model.get_input_placeholder()) except TypeError as e: raise Exception('failed to create model') from e @@ -138,8 +130,8 @@ def get_output_vector(self, game_tick_packet): if self.last_frame_time is not None: frame_time = time.time() - self.last_frame_time self.last_frame_time = time.time() - input_state = self.inp.create_input_array(game_tick_packet, frame_time) - if self.state_dim != len(input_state): + input_state = self.model.create_input_array(game_tick_packet, frame_time) + if self.model.state_dim != len(input_state): print('wrong input size', self.index, len(input_state)) return self.actions_handler.create_controller_from_selection( self.actions_handler.get_random_option()) # do not return anything @@ -164,6 +156,7 @@ def get_output_vector(self, game_tick_packet): action = self.actions_handler.get_random_option() self.previous_action = action controller_selection = self.actions_handler.create_controller_from_selection(action) + controller_selection = [max(-1, min(1, control)) for control in controller_selection] return controller_selection def create_model_hash(self): @@ -172,4 +165,3 @@ def create_model_hash(self): except Exception as e: print('creating hash exception', e) return 0 -0 diff --git a/saltie2.cfg b/saltie2.cfg index 38c5d6e..03aae05 100644 --- a/saltie2.cfg +++ b/saltie2.cfg @@ -23,10 +23,10 @@ goal_explosion_id = 1905 [Model Configuration] model_package = models.actor_critic.tutorial_model model_name = TutorialModel -teacher = tutorial_bot_output control_scheme = regression_controls #model_directory = training/data/tutorial_bot_split10-layers/2/trained_variables -num_layers = 5 +teacher = replay_files +num_layers = 4 num_split_layers = 2 num_width = 128 is_graphing = True diff --git a/saltiek.cfg b/saltiek.cfg new file mode 100644 index 0000000..f4b0fc8 --- /dev/null +++ b/saltiek.cfg @@ -0,0 +1,37 @@ +[Bot Location] +# Path to module from runner +# Only need this if RLBot controlled +agent_module = saltie + +[Participant Loadout] +# Name that will be displayed in game +name = Saltie10 +team_color_id = 27 +custom_color_id = 0 +car_id = 23 +decal_id = 1435 +wheels_id = 1728 +boost_id = 69 +antenna_id = 217 +hat_id = 580 +paint_finish_1_id = 1978 +paint_finish_2_id = 1978 +engine_audio_id = 0 +trails_id = 1997 +goal_explosion_id = 1905 + +[Model Configuration] +model_package = models.keras.base_keras_model +model_name = BaseKerasModel +teacher = tutorial_bot_output +control_scheme = regression_everything +#model_directory = training/data/tutorial_bot_split10-layers/2/trained_variables +num_layers = 5 +num_split_layers = 2 +num_width = 128 +is_graphing = True +is_evaluating = True +exploration_factor = 500 +should_graph = False +batch_size = 1 +mini_batch_size = 1 diff --git a/saved_tf_checkpoints/nnatba_split/checkpoint b/saved_tf_checkpoints/nnatba_split/checkpoint deleted file mode 100644 index 91b5650..0000000 --- a/saved_tf_checkpoints/nnatba_split/checkpoint +++ /dev/null @@ -1,2 +0,0 @@ -model_checkpoint_path: "D:\\WindowsDocuments\\GitHub\\Saltie\\training\\data\\nnatba\\trained_variables_drop.ckpt" -all_model_checkpoint_paths: "D:\\WindowsDocuments\\GitHub\\Saltie\\training\\data\\nnatba\\trained_variables_drop.ckpt" diff --git a/saved_tf_checkpoints/nnatba_split/trained_variables_drop.ckpt.data-00000-of-00001 b/saved_tf_checkpoints/nnatba_split/trained_variables_drop.ckpt.data-00000-of-00001 deleted file mode 100644 index ed22808..0000000 Binary files a/saved_tf_checkpoints/nnatba_split/trained_variables_drop.ckpt.data-00000-of-00001 and /dev/null differ diff --git a/saved_tf_checkpoints/nnatba_split/trained_variables_drop.ckpt.index b/saved_tf_checkpoints/nnatba_split/trained_variables_drop.ckpt.index deleted file mode 100644 index b7c387a..0000000 Binary files a/saved_tf_checkpoints/nnatba_split/trained_variables_drop.ckpt.index and /dev/null differ diff --git a/tests/actions_test.py b/tests/actions_test.py index 6e2f2d6..0c15057 100644 --- a/tests/actions_test.py +++ b/tests/actions_test.py @@ -1,4 +1,4 @@ -from modelHelpers.actions import action_handler, dynamic_action_handler, action_factory +from modelHelpers.actions import action_factory import tensorflow as tf import numpy as np @@ -95,7 +95,7 @@ def test2(): def test3(): handler = action_factory.get_handler(False) - dynamic_handler = action_factory.get_handler(True, dynamic_action_handler.super_split_scheme) + dynamic_handler = action_factory.get_handler(True, action_factory.regression_controls) session = tf.Session(config=tf.ConfigProto( device_count={'GPU': 0} @@ -109,20 +109,23 @@ def test3(): [-0.2, -0.3, 0.2, 0.3, 0.0, 1.0, 0.0, 0.0], [ 1.0, -0.3, 0.2, 0.3, 0.0, 0.0, 1.0, 0.0], [-1.0, -0.3, 0.2, 0.3, 0.0, 0.0, 0.0, 1.0], - [-0.25, -0.75, 0.25, 0.75, 0.0, 0.0, 0.0, 0.0]]) + [-0.25, -0.75, 0.25, 0.75, 0.0, 0.0, 0.0, 0.0], + [-0.25, -0.75, 0.25, 0.75, 0.0, 0.0, 1.0, 1.0], + [-0.25, -0.75, 0.25, 0.75, 0.0, 1.0, 0.0, 1.0], + [-0.25, -0.75, 0.25, 0.75, 0.0, 1.0, 1.0, 0.0]]) #t, y, p, r, real_action = tf.Variable(input, dtype=tf.float32) action_index = dynamic_handler.create_action_indexes_graph(real_action) - back_again = dynamic_handler.create_tensorflow_controller_from_selection(tf.transpose(action_index), batch_size=9) + back_again = dynamic_handler.create_tensorflow_controller_from_selection(tf.transpose(action_index), batch_size=len(input)) init = tf.global_variables_initializer() session.run(init) indexes, dynamic_results = session.run([action_index, back_again]) - for index in range(9): + for index in range(len(input)): row = input[index] print('blank row') # print('input row ', np.array(row, dtype=np.float32)) @@ -132,7 +135,7 @@ def test3(): print('and back again') print('correct answer', row) - print('numpy result', handler.create_controller_from_selection(action_index)) + print('numpy result', dynamic_handler.create_controller_from_selection(indexes[index])) # purposely using the working result print('dynamic result', dynamic_results[index]) diff --git a/trainer/base_classes/base_trainer.py b/trainer/base_classes/base_trainer.py index a1e7c14..b9b3cc5 100644 --- a/trainer/base_classes/base_trainer.py +++ b/trainer/base_classes/base_trainer.py @@ -65,8 +65,7 @@ def instantiate_model(self, model_class): def setup_model(self): self.model = self.instantiate_model(self.model_class) - self.model.summary_writer = tf.summary.FileWriter( - self.model.get_event_path(self.get_event_filename())) + self.model.add_summary_writer(self.get_event_filename()) def get_event_filename(self): return 'event' diff --git a/trainer/base_classes/default_model_trainer.py b/trainer/base_classes/default_model_trainer.py index 8b0b217..a90c928 100644 --- a/trainer/base_classes/default_model_trainer.py +++ b/trainer/base_classes/default_model_trainer.py @@ -23,7 +23,7 @@ def load_config(self): super().load_config() config = super().create_config() try: - self.max_files = config.getfloat(self.OPTIMIZER_CONFIG_HEADER, 'learning_rate') + self.learning_rate = config.getfloat(self.OPTIMIZER_CONFIG_HEADER, 'learning_rate') except Exception as e: self.learning_rate = 0.001 try: @@ -50,8 +50,7 @@ def setup_model(self): if self.should_apply_features: self.model.apply_feature_creation(self.feature_creator) - def instantiate_model(self, model_class): - return model_class(self.sess, get_state_dim(), + return model_class(self.sess, self.action_handler.get_logit_size(), action_handler=self.action_handler, is_training=True, optimizer=self.optimizer, config_file=self.create_config()) diff --git a/trainer/configs/copy_trainer.cfg b/trainer/configs/copy_trainer.cfg index 88e69d3..1de4140 100644 --- a/trainer/configs/copy_trainer.cfg +++ b/trainer/configs/copy_trainer.cfg @@ -4,6 +4,7 @@ download_files = False [Optimizer Config] should_apply_features = True +learning_rate = 0.0005 [Copy Configuration] should_shuffle = True @@ -13,10 +14,11 @@ control_scheme = regression_controls [Model Configuration] batch_size = 20000 -mini_batch_size = 500 -num_width = 256 +mini_batch_size = 5000 +num_width = 128 model_package = models.actor_critic.tutorial_model model_name = TutorialModel -num_layers = 5 +num_layers = 4 num_split_layers = 2 -keep_probability = 0.7 +keep_probability = 0.8 +should_regulate = False diff --git a/trainer/configs/keras_trainer.cfg b/trainer/configs/keras_trainer.cfg new file mode 100644 index 0000000..289c2e2 --- /dev/null +++ b/trainer/configs/keras_trainer.cfg @@ -0,0 +1,11 @@ +[Optimizer Config] +optimizer = adam + +[Model Configuration] +model_name = KerasModel +network_size = 128 +kernal_regularizer_l = 1 +kernal_regularizer = 1e-6 +shared_hidden_layers = 3 +split_hidden_layers = 2 +keep_probability = 0.6 diff --git a/trainer/configs/randomised_trainer.cfg b/trainer/configs/randomised_trainer.cfg index 6b97622..c9ad424 100644 --- a/trainer/configs/randomised_trainer.cfg +++ b/trainer/configs/randomised_trainer.cfg @@ -1,10 +1,12 @@ [Randomised Trainer Configuration] -total_batches = 10000 -save_step = 2000000 +total_batches = 4000 +save_step = 10 teacher_package = TutorialBot.tutorial_bot_output +#teacher_package = TutorialBot.atba2_demo_output [Optimizer Config] should_apply_features = True +learning_rate = 0.0005 [Misc Config] control_scheme = regression_controls @@ -17,6 +19,8 @@ mini_batch_size = 5000 num_width = 128 model_package = models.actor_critic.tutorial_model model_name = TutorialModel -num_layers = 5 +teacher = replay_files +num_layers = 4 num_split_layers = 2 -keep_probability = 0.6 +keep_probability = 0.8 +should_regulate = False diff --git a/trainer/configs/reward_trainer.cfg b/trainer/configs/reward_trainer.cfg index e27b8d8..f10a7b9 100644 --- a/trainer/configs/reward_trainer.cfg +++ b/trainer/configs/reward_trainer.cfg @@ -1,8 +1,19 @@ [Download Configuration] download_files = False +[Optimizer Config] +should_apply_features = False + +[Misc Config] +control_scheme = regression_controls + [Model Configuration] model_package = models.actor_critic.policy_gradient model_name = PolicyGradient -num_layers = 10 is_evaluating = True +batch_size = 20000 +mini_batch_size = 500 +num_width = 128 +num_layers = 5 +num_split_layers = 2 +keep_probability = 0.7 diff --git a/trainer/copy_trainer.py b/trainer/copy_trainer.py index 72bf13d..30b64a9 100644 --- a/trainer/copy_trainer.py +++ b/trainer/copy_trainer.py @@ -1,6 +1,5 @@ import numpy as np -from conversions.input.input_formatter import get_state_dim from trainer.base_classes.default_model_trainer import DefaultModelTrainer from trainer.base_classes.download_trainer import DownloadTrainer from trainer.utils import controller_statistics @@ -40,10 +39,11 @@ def get_event_filename(self): return 'copy_replays' def instantiate_model(self, model_class): - return model_class(self.sess, get_state_dim(), + return model_class(self.sess, self.action_handler.get_logit_size(), action_handler=self.action_handler, is_training=True, optimizer=self.optimizer, - config_file=self.create_config(), teacher='replay_files') + config_file=self.create_config(), + teacher='replay_files') def setup_model(self): super().setup_model() @@ -98,28 +98,25 @@ def batch_process(self): if len(self.input_batch) <= 1 or len(self.label_batch) <= 1: return - input_length = len(self.input_batch) - self.input_batch = np.array(self.input_batch) - self.input_batch = self.input_batch.reshape(input_length, get_state_dim()) + input_batch = np.array(self.input_batch) + input_batch = self.model.input_formatter.format_array(input_batch) - output = np.argwhere(np.isnan(self.input_batch)) + output = np.argwhere(np.isnan(input_batch)) if len(output) > 0: print('nan indexes', output) for index in output: - self.input_batch[index[0]][index[1]] = 0 + input_batch[index[0]][index[1]] = 0 - self.label_batch = np.array(self.label_batch) - self.label_batch = self.label_batch.reshape(input_length, self.action_length) + self.label_batch = np.array(self.label_batch, dtype=np.float32) - print(input_length) if self.should_shuffle: - self.input_batch, self.label_batch = self.unison_shuffled_copies(self.input_batch, self.label_batch) + input_batch, self.label_batch = self.unison_shuffled_copies(input_batch, self.label_batch) if self.eval_file: self.controller_stats.get_amounts(input_array=self.input_batch, bot_output=np.transpose(self.label_batch)) else: - self.model.run_train_step(True, self.input_batch, self.label_batch) - + feed_dict = self.model.create_feed_dict(input_batch, self.label_batch) + self.model.run_train_step(True, feed_dict=feed_dict) self.epoch += 1 diff --git a/trainer/random_packet_trainer.py b/trainer/random_packet_trainer.py index 6b1f4ff..8737333 100644 --- a/trainer/random_packet_trainer.py +++ b/trainer/random_packet_trainer.py @@ -20,6 +20,10 @@ class RandomPacketTrainer(DefaultModelTrainer): controller_stats = None start_time = None model_save_time = None + frame_per_file = 20000 + + def __init__(self): + super().__init__() def get_random_data(self, packet_generator, input_formatter): game_tick_packet = packet_generator.get_random_array() @@ -46,8 +50,8 @@ def setup_trainer(self): self.teacher = self.teacher_package.split('.')[-1] def instantiate_model(self, model_class): - return model_class(self.sess, self.input_formatter.get_state_dim(), - self.action_handler.get_logit_size(), action_handler=self.action_handler, is_training=True, + return model_class(self.sess, self.action_handler.get_logit_size(), + action_handler=self.action_handler, is_training=True, optimizer=self.optimizer, config_file=self.create_config(), teacher=self.teacher) @@ -85,18 +89,16 @@ def _run_trainer(self): model = self.model # Percentage to print statistics (and also save the model) - print_every_x_batches = (total_batches * batch_size) / save_step - print('Prints at this percentage:', 100.0 / print_every_x_batches) + save_step = (total_batches * batch_size) / save_step + print('training on the equivalent of', self.total_batches * self.batch_size / self.frame_per_file, 'games') + print('Prints at this percentage:', 100.0 / self.save_step) model_counter = 0 self.model_save_time = 0 # Running the model for i in tqdm(range(total_batches)): - result, summaries = sess.run([model.train_op, - model.summarize if model.summarize is not None else model.no_op]) + model.run_train_step(True, None, i) - if model.summary_writer is not None: - model.summary_writer.add_summary(summaries, i) if ((i + 1) * batch_size) % save_step == 0: print('\nStats at', (i + 1) * batch_size, 'frames (', i + 1, 'batches): ') self.controller_stats.get_amounts() @@ -109,6 +111,7 @@ def _run_trainer(self): model_counter += 1 def finish_trainer(self): + print('trained on the equivalent of', self.total_batches * self.batch_size / self.frame_per_file, 'games') start_saving = time.time() self.model.save_model() print('saved model in', time.time() - start_saving, 'seconds') diff --git a/trainer/utils/controller_statistics.py b/trainer/utils/controller_statistics.py index 85e537d..ee6fd2b 100644 --- a/trainer/utils/controller_statistics.py +++ b/trainer/utils/controller_statistics.py @@ -48,15 +48,15 @@ def get_amounts(self, input_array=None, bot_output=None): else: output = self.sess.run(self.controls, feed_dict={self.model_input: input_array}) - accuracy = np.sum(np.isclose(output, bot_output, 0.01), 1) / np.size(output[1]) + accuracy = np.sum(np.isclose(output, bot_output, 0.2), 1) / np.size(output[1]) self.accuracy_over_time.append(accuracy) self.bot_data_over_time.append((output, bot_output)) - analog_buckets = [-1.0001, -0.50001, -0.0001, 0.0001, 0.50001, 1.0001] + analog_buckets = [-1.0001, -0.50001, -0.1000, 0.1000, 0.50001, 1.0001] boolean_buckets = [-0.001, 0.50001, 1.0001] np.set_printoptions(formatter={'int': '{0:5}'.format}) names = ["Throttle", "Steer", "Pitch", "Yaw", "Roll", "Jump", "Boost", "Handbrake"] - print("Splitting up everything in ranges: [-1, -0.5>, [-0.5, -0>, [0], <0+, 0.5], <0.5, 1]") + print("Splitting up everything in ranges: [-1, -0.5>, [-0.5, -0.1>, [0], <0.1+, 0.5], <0.5, 1]") print("Real is model output, Expt is tutorialbot output and Acc. is accuracy") for i in range(8): print("From here the ranges are [0.0, 0.5>, [0.5, 1.0]") if i is 5 else None diff --git a/trainer/utils/visualise_net.py b/trainer/utils/visualise_net.py index 20bb4ae..d31d08b 100644 --- a/trainer/utils/visualise_net.py +++ b/trainer/utils/visualise_net.py @@ -1,18 +1,39 @@ +from tkinter.ttk import * from tkinter import * -import numpy as np import ast +from trainer.utils import random_packet_creator +from conversions.input import tensorflow_input_formatter +import tensorflow as tf +from models.actor_critic import tutorial_model +from modelHelpers.actions import action_factory +import threading +import numpy as np +import logging # Some values useful for editing how the net gets shown -x_spacing = 100 -y_spacing = 50 -circle_dia = 30 +default_x_spacing = 100 +default_y_spacing = 50 +split_spacing = 220 +default_circle_dia = 30 + +logging.basicConfig(level=logging.DEBUG, + format='[%(levelname)s] (%(threadName)-10s) %(message)s', + ) + +class AutoScrollbar(Scrollbar): + def set(self, lo, hi): + if float(lo) <= 0.0 and float(hi) >= 1.0: + self.grid_remove() + else: + self.grid() + Scrollbar.set(self, lo, hi) class Visualiser: gui = None # The window - relu = None # Whether activations are through relu - highrelu = 20 # The - bigweight = 30 + act_type = None # Array with activation type for each layer + big_relu = 20 # The + big_weight = 20 layer_activations = None # The values for the activations scale = 1.0 # The current scale of the canvas delta = 0.75 # The impact of scrolling @@ -21,6 +42,7 @@ class Visualiser: iFrame = None # The frame with the info cFrame = None # The frame with the canvas canvas = None # The canvas showing the net + canvas_0 = None # Position of xy rotate_canvas = False # Should the canvas be rotated info_text_neuron = None # The info about the last neuron hovered over @@ -29,28 +51,46 @@ class Visualiser: input_array = None # The StringVar storing the array used when hitting generate input_relu = None # The StringVar storing the array used for the relu adaption relu_number = None # The IntVar storing the spinbox value + split_box_selection = None + heaviest_weights = 0 # How many weights to print on the canvas - def __init__(self, inp=None): + def __init__(self, sess, m, inp=None): # Initialising the window self.gui = Tk() self.gui.geometry('600x600') self.gui.title("Net visualisation") # Initialising all variables - self.highrelu = 20 - self.relu = [True, True, True, True, False] # Is the layer using relu - self.bigweight = 30 - self.layer_activations = inp - # del inp (Is it necessary? Might kill the original array as well, creating problems over there) - self.rotate_canvas = False + self.big_relu = 20 + self.big_weight = 2 + + self.model = m + self.model_info = self.model.get_variables_activations() + self.n_layers = len(self.model_info) + + self.act_type = [[self.model_info[i][n][2] for n in range(len(self.model_info[i]))] for i in range(self.n_layers)] + self.randomiser = random_packet_creator.TensorflowPacketGenerator(1) + self.input_formatter = tensorflow_input_formatter.TensorflowInputFormatter(0, 0, 1, None) + first_input = self.model.sess.run(self.input_formatter.create_input_array(self.randomiser.get_random_array())) + self.layer_activations = inp if inp is not None else self.model.get_activations(first_input) + + self.heaviest_weights = 10 + self.last_layer = list() - self.scale = 1.0 - self.delta = 0.75 - self.biggestarraylen = 0 - for item in self.layer_activations: - if len(item) > self.biggestarraylen: - self.biggestarraylen = len(item) + for layer in range(len(self.layer_activations)): + for split in range(len(self.layer_activations[layer])): + new_array_size = len(self.get_activations(layer, split)) + if new_array_size > self.biggestarraylen: + self.biggestarraylen = new_array_size + + self.biggest_split = 0 + for item in self.model_info: + if len(item) > self.biggest_split: + self.biggest_split = len(item) + + self.current_split_layer = 0 + # Initialising the frames self.eFrame = Frame(self.gui) @@ -72,47 +112,42 @@ def edit_stuff(self): input_array_field = Entry(self.eFrame, textvariable=self.input_array) input_array_field.bind('', lambda event: self.change_input()) input_array_field.grid(row=0, column=0) - input_array_button = Button(self.eFrame, command=self.change_input, text="Generate") + input_array_button = Button(self.eFrame, command=self.change_input, text="Use data") input_array_button.grid(row=0, column=1) - self.input_relu = StringVar() - input_relu_field = Entry(self.eFrame, textvariable=self.input_relu) - input_relu_field.bind('', lambda event: self.change_relu()) - input_relu_field.grid(row=1, column=0) - input_relu_button = Button(self.eFrame, command=self.change_relu, text="Edit relu") - input_relu_button.grid(row=1, column=1) - self.relu_number = IntVar() self.relu_number.set(20) relu_spin_box = Spinbox(self.eFrame, from_=1, to=1000, width=5, textvariable=self.relu_number) relu_spin_box.bind('', lambda event: self.change_relu_factor()) - relu_spin_box.grid(row=2, column=0) - relu_button = Button(self.eFrame, command=self.change_relu_factor, text="Change high relu") - relu_button.grid(row=2, column=1) + relu_spin_box.grid(row=1, column=0) + relu_button = Button(self.eFrame, command=self.change_relu_factor, text="Change big relu") + relu_button.grid(row=1, column=1) rotate = Button(self.eFrame, command=self.rotate_and_refresh, text="Rotate") - rotate.grid(row=3, column=0) + rotate.grid(row=2, column=0) + + random = Button(self.eFrame, command=self.layer_activations_random, text="Random input") + random.grid(row=2, column=1) + + self.split_box_selection = IntVar() + split_selection = Spinbox(self.eFrame, from_=1, to=self.biggest_split, width=5, textvariable=self.split_box_selection) + split_selection.grid(row=3, column=0) + input_array_button = Button(self.eFrame, command=self.change_split_layer, text="Switch split") + input_array_button.grid(row=3, column=1) def info_stuff(self): self.info_text_neuron = StringVar() - self.info_text_neuron.set("Layer: ?\nNeuron: ?\nActivation type: ?\nActivation: ?") + self.info_text_neuron.set("Index: ?, ?\nActivation type: ?\nActivation: ?") activation_label = Label(self.iFrame, textvariable=self.info_text_neuron, justify=LEFT) activation_label.grid(row=0, column=0, sticky='w') self.info_text_line = StringVar() - self.info_text_line.set("From:\nLayer: ?\nNeuron: ?\nTo:\nLayer: ?\nNeuron: ?") + self.info_text_line.set("?, ? -> ?, ?") activation_label = Label(self.iFrame, textvariable=self.info_text_line, justify=LEFT) activation_label.grid(row=1, column=0, sticky='w') def canvas_stuff(self): # Create canvas including the scrollbars - class AutoScrollbar(Scrollbar): - def set(self, lo, hi): - if float(lo) <= 0.0 and float(hi) >= 1.0: - self.grid_remove() - else: - self.grid() - Scrollbar.set(self, lo, hi) def wheel(event): scale = 1.0 @@ -144,110 +179,140 @@ def wheel(event): self.canvas.bind('', wheel) self.canvas.configure(scrollregion=self.canvas.bbox('all')) + self.canvas_0 = self.canvas.create_line(0, 0, 0, 0, tags='zero-zero', fill='white') + # Generate the canvas itself if self.layer_activations is not None: - for i in range(len(self.layer_activations)): - self.create_layer(i) + self.refresh_canvas() - def create_circle(self, x0, y0, activation, relu, layer, neuron): + def create_circle(self, x0, y0, activation, type, layer_index, split_index, neuron): if self.rotate_canvas: x0, y0 = y0, x0 - if relu: - activation = activation if activation <= self.highrelu else self.highrelu - rgb = int(-1 * (activation - self.highrelu) * 255 / self.highrelu) + if type == 'relu': + activation = activation if activation <= self.big_relu else self.big_relu + rgb = int(-1 * (activation - self.big_relu) * 255.0 / self.big_relu) else: activation = activation if activation <= 1 else 1 rgb = int(-1 * (activation - 1) * 255) hex_color = "#{:02x}{:02x}{:02x}".format(rgb, rgb, rgb) - tag = str(layer) + ";" + str(neuron) - self.canvas.create_oval(x0, y0, x0 + circle_dia, y0 + circle_dia, fill=hex_color, tags=tag) + tag = str(layer_index) + ";" + str(split_index) + ";" + str(neuron) + circle_dia = default_circle_dia * self.scale + self.canvas.create_oval(x0, y0, x0 + circle_dia, y0 + circle_dia, fill=hex_color, tags=(tag, 'neuron')) - def handler(event, la=layer, ne=neuron): - self.info_text_neuron.set("Layer: " + str(la) + "\nNeuron: " + str(ne) + "\nActivation type: " + ( - "Relu" if self.relu[layer] else "Sigmoid") + "\nActivation: " + str( - self.layer_activations[layer][neuron])) + def hover_handler(event, la=layer_index, sp=split_index, ne=neuron): + self.info_text_neuron.set("Index: " + str(la) + ", " + str(ne) + "\nActivation type: " + ( + "Relu" if self.act_type[layer_index][split_index] is 'relu' else "Sigmoid") + "\nActivation: " + + str(self.get_activations(la, sp)[ne])) - self.canvas.tag_bind(tag, "", handler) + def double_click_handler(event, la=layer_index, sp=split_index, ne=neuron): + self.show_neuron_info(la, sp, ne) - def create_line(self, x0, y0, x1, y1, layer0, neuron0, layer1, neuron1): + self.canvas.tag_bind(tag, "", hover_handler) + self.canvas.tag_bind(tag, "", double_click_handler) + + def create_line(self, x0, y0, x1, y1, previous_neuron, current_layer, current_neuron, split_index, weight): if self.rotate_canvas: x0, y0, x1, y1 = y0, x0, y1, x1 - half = .5 * circle_dia + half = .5 * default_circle_dia * self.scale - weight = self.obtain_weight() r, g, b = 0, 0, 0 + if abs(weight) <= .1: + return if weight >= 0: - weight = weight if weight <= self.bigweight else self.bigweight - r = int(-1 * (weight - self.bigweight) * 255 / self.bigweight) + weight = weight if weight <= self.big_weight else self.big_weight + r = int(weight * 255.0 / self.big_weight) else: - weight = weight if weight >= (-self.bigweight) else (-self.bigweight) - b = int((weight + self.bigweight) * 255 / self.bigweight) + weight = weight if weight >= (-self.big_weight) else (-self.big_weight) + b = int(-1 * weight * 255 / self.big_weight) + hex_color = "#{:02x}{:02x}{:02x}".format(r, g, b) - tag = str(layer0) + ";" + str(neuron0) + ";" + str(layer1) + ";" + str(neuron1) - self.canvas.create_line(x0 + half, y0 + half, x1 + half, y1 + half, fill=hex_color, tags=tag) + tag = str(current_layer - 1) + ";" + str(previous_neuron) + ";" + str(current_layer) + ";" + str(current_neuron) + self.canvas.create_line(x0 + half, y0 + half, x1 + half, y1 + half, fill=hex_color, tags=(tag, 'line')) - def handler(event, l0=layer0, n0=neuron0, l1=layer1, n1=neuron1): - self.info_text_line.set( - "From:\nLayer: " + str(l0) + "\nNeuron: " + str(n0) + "\nTo:\nLayer: " + str(l1) + "\nNeuron: " + str( - n1)) + def handler(event, l0=current_layer - 1, n0=previous_neuron, l1=current_layer, n1=current_neuron, w=weight): + self.info_text_line.set(str(l0) + ", " + str(n0) + " -> " + str(l1) + ", " + str(n1) + + "\nWeight: " + str(w)) self.canvas.tag_bind(tag, "", handler) self.canvas.tag_lower(tag) - - def obtain_weight(self): - return np.random.randint(-30, 30) - - def create_layer(self, layer): - activations = self.layer_activations[layer] - x = layer * x_spacing - y = (self.biggestarraylen - len(activations)) * y_spacing * .5 - this_layer = list() - neuron = 0 - for i in activations: - this_layer.append([x, y]) - if layer != 0: - nn = 0 - for n in self.last_layer: - self.create_line(n[0], n[1], x, y, layer - 1, nn, layer, neuron) - nn += 1 - self.create_circle(x, y, i, self.relu[layer], layer, neuron) + def create_layer(self, layer_index, circles=True, lines=True): + split_index = self.current_split_layer if self.current_split_layer < len( + self.layer_activations[layer_index]) else len(self.layer_activations[layer_index]) - 1 + activations = self.get_activations(layer_index, split_index) + x_spacing = default_x_spacing * self.scale + y_spacing = default_y_spacing * self.scale + zero_x, zero_y = self.canvas.coords(self.canvas_0)[:2] + x = layer_index * x_spacing + zero_x + y = (self.biggestarraylen - len(activations)) * y_spacing * .5 + zero_y + + last_layer_split = self.current_split_layer if self.current_split_layer < len(self.layer_activations[layer_index - 1]) else len(self.layer_activations[layer_index - 1]) - 1 + last_layer_size = len(self.layer_activations[layer_index - 1][last_layer_split][0]) + last_layer_y = (self.biggestarraylen - last_layer_size) * y_spacing * .5 + zero_y + last_layer_x = (layer_index - 1) * x_spacing + zero_x + + for neuron_index, activation in enumerate(activations): + if layer_index != 0 and lines: + pass + if self.heaviest_weights is 0: + for i in range(last_layer_size): + # weight = self.model_info[current_layer][split_index][0][current_neuron][previous_neuron] + weight = 1 + self.create_line(last_layer_x, last_layer_y + i * y_spacing, x, y, i, layer_index, neuron_index, split_index, weight) + else: + # weights = self.model_info[current_layer][split_index][0][current_neuron] + weights = np.random.rand(last_layer_size) * 2 - 1 + biggest_w_indexes = np.argpartition(np.abs(weights), -self.heaviest_weights)[-self.heaviest_weights:] + for i in biggest_w_indexes: + self.create_line(last_layer_x, last_layer_y + i * y_spacing, x, y, i, layer_index, neuron_index, split_index, weights[i]) + if circles: + self.create_circle(x, y, activation, self.act_type[layer_index][split_index], layer_index, split_index, neuron_index) y += y_spacing - neuron += 1 - self.last_layer = this_layer + def refresh_canvas(self): - self.canvas.scale('all', 0, 0, 1, 1) - self.scale = 1 - self.canvas.delete('all') - for i in range(len(self.layer_activations)): - self.create_layer(i) + self.canvas.delete('neuron', 'line') + for layer_index in range(len(self.layer_activations)): + self.create_layer(layer_index) # time resulted 150.62620782852173, with 10 weights: 1.052992820739746 + # t = threading.Thread(target=self.create_layer, args=(layer_index, split_layer)) + # t.start() # time resulted 155.981516122818, with 10 weights: 1.2656633853912354 + + def refresh_neurons(self): + self.canvas.delete('neuron') + for layer_index in range(len(self.layer_activations)): + self.create_layer(layer_index, True, False) + + def refresh_lines(self): + self.canvas.delete('line') + for layer_index in range(len(self.layer_activations)): + self.create_layer(layer_index, False, True) def rotate_and_refresh(self): self.rotate_canvas = not self.rotate_canvas self.refresh_canvas() def change_relu_factor(self): - self.highrelu = self.relu_number.get() - self.refresh_canvas() - - def change_relu(self): - if self.input_relu.get(): - try: - self.relu = ast.literal_eval(self.input_relu.get()) - self.refresh_canvas() - except Exception: - pass + self.big_relu = self.relu_number.get() + self.refresh_neurons() def change_input(self): if self.input_array.get(): try: self.layer_activations = ast.literal_eval(self.input_array.get()) - self.refresh_canvas() + self.refresh_neurons() except Exception: pass + def change_split_layer(self): + self.current_split_layer = self.split_box_selection.get() + self.refresh_canvas() + + def layer_activations_random(self): + random_array = self.model.sess.run(self.input_formatter.create_input_array(self.randomiser.get_random_array())) + self.layer_activations = self.model.get_activations(random_array) + self.refresh_neurons() + def config_options(self): # Make the canvas expandable self.gui.grid_rowconfigure(0, weight=1) @@ -257,3 +322,58 @@ def config_options(self): self.cFrame.grid_columnconfigure(0, weight=1) self.gui.grid_columnconfigure(0, minsize=100) + + def get_activations(self, layer, split): + split = split if split < len(self.layer_activations[layer]) else len(self.layer_activations[layer]) - 1 + return self.layer_activations[layer][split][0] + + def show_neuron_info(self, layer, split, neuron): + + def treeview_sort_column(tv, col, reverse): + l = [(tv.set(k, col), k) for k in tv.get_children('')] + # l.sort(reverse=reverse) + + def int_or_double(inp): + try: + return int(inp) + except ValueError: + try: + return float(inp) + except ValueError: + return inp + + l = sorted(l, reverse=reverse, key=lambda s: (int_or_double(s[0]), s[1])) + for index, (val, k) in enumerate(l): + tv.move(k, '', index) + + tv.heading(col, command=lambda: treeview_sort_column(tv, col, not reverse)) + + info_window = Toplevel() + info_window.title("Info for neuron " + str(neuron) + " in split " + str(split) + " of layer " + str(layer)) + columns = ('neuron', 'value') + vbar = AutoScrollbar(info_window, orient='vertical') + vbar.grid(row=0, column=1, sticky='ns') + table = Treeview(info_window, columns=columns, show='headings', yscrollcommand=vbar.set) + vbar.configure(command=table.yview) + + for i in range(len(self.layer_activations[layer][split][0])): + table.insert("", "end", values=(i, np.random.rand())) + table.grid(row=0, column=0, sticky='nsew') + table.heading("neuron", text="From neuron", command=lambda: treeview_sort_column(table, "neuron", False)) + table.heading("value", text="Weight", command=lambda: treeview_sort_column(table, "value", False)) + info_window.grid_rowconfigure(0, weight=1) + info_window.grid_columnconfigure(0, weight=1) + info_window.grab_set() + + +if __name__ == '__main__': + with tf.Session() as sess: + controls = action_factory.default_scheme + action_handler = action_factory.get_handler(control_scheme=controls) + action_handler.get_logit_size() + model = tutorial_model.TutorialModel(sess, action_handler.get_logit_size(), action_handler=action_handler) + model.batch_size = 1 + model.mini_batch_size = 1 + model.create_model() + model.initialize_model() + Visualiser(sess, model) diff --git a/tutor_bot.cfg b/tutor_bot.cfg index 1118150..136bf01 100644 --- a/tutor_bot.cfg +++ b/tutor_bot.cfg @@ -23,8 +23,8 @@ goal_explosion_id = 1905 [Model Configuration] model_package = models.fake_model model_name = FakeModel -control_scheme = regression_controls -#teacher_package = TutorialBot.tutorial_bot_output -teacher_package = TutorialBot.atba2_demo_output +control_scheme = regression_everything +teacher_package = TutorialBot.tutorial_bot_output +#teacher_package = TutorialBot.atba2_demo_output batch_size = 1 mini_batch_size = 1