From a06d4529dad41a879300141cdc41663447b768d9 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Tue, 3 Sep 2024 20:16:06 +0000 Subject: [PATCH 01/28] Combine two tf_agents policies with timestep spec given by combine_tfa_policies_lib.get_input_signature() and action spec given by combine_tfa_policies_lib.get_action_spec() The combiner policy uses a new timestep spec feature "model_selector" to select the requested policy at the current state. The feature is computed as a md5 hash from the respective policies names. --- compiler_opt/tools/combine_tfa_policies.py | 30 +++ .../tools/combine_tfa_policies_lib.py | 176 ++++++++++++++++++ .../tools/combine_tfa_policies_lib_test.py | 115 ++++++++++++ 3 files changed, 321 insertions(+) create mode 100755 compiler_opt/tools/combine_tfa_policies.py create mode 100644 compiler_opt/tools/combine_tfa_policies_lib.py create mode 100644 compiler_opt/tools/combine_tfa_policies_lib_test.py diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py new file mode 100755 index 00000000..d3932a60 --- /dev/null +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -0,0 +1,30 @@ +from absl import app + +import tensorflow as tf + +from compiler_opt.rl import policy_saver +from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib + + +def main(_): + expected_signature = cfa_lib.get_input_signature() + action_spec = cfa_lib.get_action_spec() + policy1_name = input("First policy name: ") + policy1_path = input(policy1_name + " path: ") + policy2_name = input("Second policy name: ") + policy2_path = input(policy2_name + " path: ") + policy1 = tf.saved_model.load(policy1_path, tags=None, options=None) + policy2 = tf.saved_model.load(policy2_path, tags=None, options=None) + combined_policy = cfa_lib.CombinedTFPolicy( + tf_policies={policy1_name:policy1, policy2_name:policy2}, + time_step_spec=expected_signature, + action_spec=action_spec + ) + combined_policy_path = input("Save combined policy path: ") + policy_dict = {'combined_policy': combined_policy} + saver = policy_saver.PolicySaver(policy_dict=policy_dict) + saver.save(combined_policy_path) + +if __name__ == "__main__": + app.run(main) + diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py new file mode 100644 index 00000000..db808112 --- /dev/null +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -0,0 +1,176 @@ +from typing import Dict, List, Optional, Tuple + +import gin +import tensorflow as tf +import hashlib + +import tf_agents +from tf_agents.trajectories import time_step +from tf_agents.typing import types +from tf_agents.trajectories import policy_step +import tensorflow_probability as tfp +from tf_agents.specs import tensor_spec + + +class CombinedTFPolicy(tf_agents.policies.TFPolicy): + + def __init__(self, *args, + tf_policies: Dict[str, tf_agents.policies.TFPolicy], + **kwargs): + super(CombinedTFPolicy, self).__init__(*args, **kwargs) + + self.tf_policies = [] + self.tf_policy_names = [] + for name, policy in tf_policies.items(): + self.tf_policies.append(policy) + self.tf_policy_names.append(name) + + self.expected_signature = self.time_step_spec + self.sorted_keys = sorted(self.expected_signature.observation.keys()) + + high_low_tensors = [] + for name in self.tf_policy_names: + m = hashlib.md5() + m.update(name.encode('utf-8')) + high_low_tensors.append(tf.stack([ + tf.constant(int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64), + tf.constant(int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64) + ]) + ) + self.high_low_tensors = tf.stack(high_low_tensors) + + m = hashlib.md5() + m.update(self.tf_policy_names[0].encode('utf-8')) + self.high = int.from_bytes(m.digest()[8:], 'little') + self.low = int.from_bytes(m.digest()[:8], 'little') + self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64) + + def _process_observation(self, observation): + for name in self.sorted_keys: + if name in ['model_selector']: + switch_tensor = observation.pop(name)[0] + high_low_tensor = switch_tensor + + tf.debugging.Assert( + tf.equal( + tf.reduce_any( + tf.reduce_all( + tf.equal(high_low_tensor, self.high_low_tensors), axis=1 + ) + ),True + ), + [high_low_tensor, self.high_low_tensors]) + return observation, switch_tensor + + def _create_distribution(self, inlining_prediction): + probs = [inlining_prediction, 1.0 - inlining_prediction] + logits = [[0.0, tf.math.log(probs[1]/(1.0 - probs[1]))]] + return tfp.distributions.Categorical(logits=logits) + + def _action(self, time_step: time_step.TimeStep, + policy_state: types.NestedTensorSpec, + seed: Optional[types.Seed] = None) -> policy_step.PolicyStep: + new_observation = time_step.observation + new_observation, switch_tensor = self._process_observation(new_observation) + updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type, + reward=time_step.reward, + discount=time_step.discount, + observation=new_observation) + def f0(): + return tf.cast( + self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64) + def f1(): + return tf.cast( + self.tf_policies[1].action(updated_step).action[0], dtype=tf.int64) + action = tf.cond( + tf.math.reduce_all( + tf.equal(switch_tensor, self.high_low_tensor)), + f0, + f1 + ) + return tf_agents.trajectories.PolicyStep(action=action, state=policy_state) + + def _distribution( + self, time_step: time_step.TimeStep, + policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep: + new_observation = time_step.observation + new_observation, switch_tensor = self._process_observation(new_observation) + updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type, + reward=time_step.reward, + discount=time_step.discount, + observation=new_observation) + def f0(): + return tf.cast( + self.tf_policies[0].distribution(updated_step).action.cdf(0)[0], + dtype=tf.float32) + def f1(): + return tf.cast( + self.tf_policies[1].distribution(updated_step).action.cdf(0)[0], + dtype=tf.float32) + distribution = tf.cond( + tf.math.reduce_all( + tf.equal(switch_tensor, self.high_low_tensor)), + f0, + f1 + ) + return tf_agents.trajectories.PolicyStep( + action=self._create_distribution(distribution), + state=policy_state) + + + +@gin.configurable() +def get_input_signature(): + """Returns the list of features for LLVM inlining to be used in combining models.""" + # int64 features + inputs = dict( + (key,tf.TensorSpec(dtype=tf.int64, shape=(), name=key)) + for key in [ + "caller_basic_block_count", + "caller_conditionally_executed_blocks", + "caller_users", + "callee_basic_block_count", + "callee_conditionally_executed_blocks", + "callee_users", + "nr_ctant_params", + "node_count", + "edge_count", + "callsite_height", + "cost_estimate", + "inlining_default", + "sroa_savings", + "sroa_losses", + "load_elimination", + "call_penalty", + "call_argument_setup", + "load_relative_intrinsic", + "lowered_call_arg_setup", + "indirect_call_penalty", + "jump_table_penalty", + "case_cluster_penalty", + "switch_penalty", + "unsimplified_common_instructions", + "num_loops", + "dead_blocks", + "simplified_instructions", + "constant_args", + "constant_offset_ptr_args", + "callsite_cost", + "cold_cc_penalty", + "last_call_to_static_bonus", + "is_multiple_blocks", + "nested_inlines", + "nested_inline_cost_estimate", + "threshold", + "is_callee_avail_external", + "is_caller_avail_external", + ] + ) + inputs.update({'model_selector': tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')}) + return time_step.time_step_spec(inputs) + +@gin.configurable() +def get_action_spec(): + return tensor_spec.BoundedTensorSpec( + dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1 + ) \ No newline at end of file diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py new file mode 100644 index 00000000..0c5f71c6 --- /dev/null +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -0,0 +1,115 @@ +"""Tests for the feature_importance_utils.py module""" + +from absl.testing import absltest + +import tensorflow as tf +from compiler_opt.tools import combine_tfa_policies_lib +from tf_agents.trajectories import time_step +import tf_agents +from tf_agents.specs import tensor_spec +from tf_agents.trajectories import policy_step +import hashlib +import numpy as np + +class AddOnePolicy(tf_agents.policies.TFPolicy): + def __init__(self): + observation_spec = {'obs': tensor_spec.TensorSpec( + shape=(1,), dtype=tf.int64)} + time_step_spec = time_step.time_step_spec(observation_spec) + + action_spec = tensor_spec.TensorSpec( + shape=(1,), dtype=tf.int64) + + super(AddOnePolicy, self).__init__(time_step_spec=time_step_spec, + action_spec=action_spec) + def _distribution(self, time_step): + pass + + def _variables(self): + return () + + def _action(self, time_step, policy_state, seed): + observation = time_step.observation['obs'][0] + action = tf.reshape(observation + 1, (1,)) + return policy_step.PolicyStep(action, policy_state) + +class SubtractOnePolicy(tf_agents.policies.TFPolicy): + def __init__(self): + observation_spec = {'obs': tensor_spec.TensorSpec( + shape=(1,), dtype=tf.int64)} + time_step_spec = time_step.time_step_spec(observation_spec) + + action_spec = tensor_spec.TensorSpec( + shape=(1,), dtype=tf.int64) + + super(SubtractOnePolicy, self).__init__(time_step_spec=time_step_spec, + action_spec=action_spec) + def _distribution(self, time_step): + pass + + def _variables(self): + return () + + def _action(self, time_step, policy_state, seed): + observation = time_step.observation['obs'][0] + action = tf.reshape(observation - 1, (1,)) + return policy_step.PolicyStep(action, policy_state) + +observation_spec = time_step.time_step_spec( + {'obs':tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'), + 'model_selector': tf.TensorSpec(shape=(2,), + dtype=tf.uint64, name='model_selector')} +) + +action_spec = tensor_spec.TensorSpec( + shape=(1,), dtype=tf.int64) + +class FeatureImportanceTest(absltest.TestCase): + + def test_select_add_policy(self): + policy1 = AddOnePolicy() + policy2 = SubtractOnePolicy() + combined_policy = combine_tfa_policies_lib.CombinedTFPolicy( + tf_policies={'add_one':policy1, 'subtract_one':policy2}, + time_step_spec=observation_spec, + action_spec=action_spec) + + m = hashlib.md5() + m.update('add_one'.encode('utf-8')) + high = int.from_bytes(m.digest()[8:], 'little') + low = int.from_bytes(m.digest()[:8], 'little') + model_selector = tf.constant([[high, low]], dtype=tf.uint64) + + state = tf_agents.trajectories.TimeStep( + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={'obs':tf.constant(np.array([0]), dtype=tf.int64), + 'model_selector':model_selector}, + reward=tf.constant(np.array([0]), dtype=tf.float64), + step_type=tf.constant(np.array([0]), dtype=tf.int64) + ) + + self.assertEqual(combined_policy.action(state).action, tf.constant(1, dtype=tf.int64)) + + def test_select_subtract_policy(self): + policy1 = AddOnePolicy() + policy2 = SubtractOnePolicy() + combined_policy = combine_tfa_policies_lib.CombinedTFPolicy( + tf_policies={'add_one':policy1, 'subtract_one':policy2}, + time_step_spec=observation_spec, + action_spec=action_spec) + + m = hashlib.md5() + m.update('subtract_one'.encode('utf-8')) + high = int.from_bytes(m.digest()[8:], 'little') + low = int.from_bytes(m.digest()[:8], 'little') + model_selector = tf.constant([[high, low]], dtype=tf.uint64) + + state = tf_agents.trajectories.TimeStep( + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={'obs':tf.constant(np.array([0]), dtype=tf.int64), + 'model_selector':model_selector}, + reward=tf.constant(np.array([0]), dtype=tf.float64), + step_type=tf.constant(np.array([0]), dtype=tf.int64) + ) + + self.assertEqual(combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64)) \ No newline at end of file From 9bc8c0548f733b60536213f54f76fa497347d913 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 17:49:05 +0000 Subject: [PATCH 02/28] Added licence. --- compiler_opt/tools/combine_tfa_policies.py | 15 +++++++++++++++ compiler_opt/tools/combine_tfa_policies_lib.py | 15 +++++++++++++++ .../tools/combine_tfa_policies_lib_test.py | 14 ++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index d3932a60..3c0db9c1 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -1,3 +1,18 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Runs the policy combiner.""" from absl import app import tensorflow as tf diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index db808112..87faf829 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -1,3 +1,18 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Combines two tf-agent policies with the given state and action spec.""" from typing import Dict, List, Optional, Tuple import gin diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index 0c5f71c6..92ab24d7 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -1,3 +1,17 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for the feature_importance_utils.py module""" from absl.testing import absltest From 86e4d12460fec210afb34340c9cb9d05f7f8652c Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 17:53:32 +0000 Subject: [PATCH 03/28] yapf . -ir --- compiler_opt/tools/combine_tfa_policies.py | 14 +- .../tools/combine_tfa_policies_lib.py | 171 +++++++++--------- .../tools/combine_tfa_policies_lib_test.py | 102 ++++++----- 3 files changed, 154 insertions(+), 133 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index 3c0db9c1..7309b60f 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -31,15 +31,17 @@ def main(_): policy1 = tf.saved_model.load(policy1_path, tags=None, options=None) policy2 = tf.saved_model.load(policy2_path, tags=None, options=None) combined_policy = cfa_lib.CombinedTFPolicy( - tf_policies={policy1_name:policy1, policy2_name:policy2}, - time_step_spec=expected_signature, - action_spec=action_spec - ) + tf_policies={ + policy1_name: policy1, + policy2_name: policy2 + }, + time_step_spec=expected_signature, + action_spec=action_spec) combined_policy_path = input("Save combined policy path: ") policy_dict = {'combined_policy': combined_policy} saver = policy_saver.PolicySaver(policy_dict=policy_dict) saver.save(combined_policy_path) -if __name__ == "__main__": - app.run(main) +if __name__ == "__main__": + app.run(main) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index 87faf829..8aff3e3f 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -29,8 +29,7 @@ class CombinedTFPolicy(tf_agents.policies.TFPolicy): - def __init__(self, *args, - tf_policies: Dict[str, tf_agents.policies.TFPolicy], + def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy], **kwargs): super(CombinedTFPolicy, self).__init__(*args, **kwargs) @@ -47,11 +46,13 @@ def __init__(self, *args, for name in self.tf_policy_names: m = hashlib.md5() m.update(name.encode('utf-8')) - high_low_tensors.append(tf.stack([ - tf.constant(int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64), - tf.constant(int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64) - ]) - ) + high_low_tensors.append( + tf.stack([ + tf.constant( + int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64), + tf.constant( + int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64) + ])) self.high_low_tensors = tf.stack(high_low_tensors) m = hashlib.md5() @@ -65,44 +66,44 @@ def _process_observation(self, observation): if name in ['model_selector']: switch_tensor = observation.pop(name)[0] high_low_tensor = switch_tensor - + tf.debugging.Assert( tf.equal( tf.reduce_any( tf.reduce_all( - tf.equal(high_low_tensor, self.high_low_tensors), axis=1 - ) - ),True - ), - [high_low_tensor, self.high_low_tensors]) + tf.equal(high_low_tensor, self.high_low_tensors), + axis=1)), True), + [high_low_tensor, self.high_low_tensors]) return observation, switch_tensor def _create_distribution(self, inlining_prediction): probs = [inlining_prediction, 1.0 - inlining_prediction] - logits = [[0.0, tf.math.log(probs[1]/(1.0 - probs[1]))]] + logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]] return tfp.distributions.Categorical(logits=logits) - def _action(self, time_step: time_step.TimeStep, + def _action(self, + time_step: time_step.TimeStep, policy_state: types.NestedTensorSpec, seed: Optional[types.Seed] = None) -> policy_step.PolicyStep: new_observation = time_step.observation new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type, - reward=time_step.reward, - discount=time_step.discount, - observation=new_observation) + updated_step = tf_agents.trajectories.TimeStep( + step_type=time_step.step_type, + reward=time_step.reward, + discount=time_step.discount, + observation=new_observation) + def f0(): return tf.cast( self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64) + def f1(): return tf.cast( self.tf_policies[1].action(updated_step).action[0], dtype=tf.int64) + action = tf.cond( - tf.math.reduce_all( - tf.equal(switch_tensor, self.high_low_tensor)), - f0, - f1 - ) + tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, + f1) return tf_agents.trajectories.PolicyStep(action=action, state=policy_state) def _distribution( @@ -110,82 +111,82 @@ def _distribution( policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep: new_observation = time_step.observation new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type, - reward=time_step.reward, - discount=time_step.discount, - observation=new_observation) + updated_step = tf_agents.trajectories.TimeStep( + step_type=time_step.step_type, + reward=time_step.reward, + discount=time_step.discount, + observation=new_observation) + def f0(): return tf.cast( self.tf_policies[0].distribution(updated_step).action.cdf(0)[0], dtype=tf.float32) + def f1(): return tf.cast( self.tf_policies[1].distribution(updated_step).action.cdf(0)[0], dtype=tf.float32) + distribution = tf.cond( - tf.math.reduce_all( - tf.equal(switch_tensor, self.high_low_tensor)), - f0, - f1 - ) + tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, + f1) return tf_agents.trajectories.PolicyStep( - action=self._create_distribution(distribution), - state=policy_state) - + action=self._create_distribution(distribution), state=policy_state) @gin.configurable() def get_input_signature(): - """Returns the list of features for LLVM inlining to be used in combining models.""" - # int64 features - inputs = dict( - (key,tf.TensorSpec(dtype=tf.int64, shape=(), name=key)) - for key in [ - "caller_basic_block_count", - "caller_conditionally_executed_blocks", - "caller_users", - "callee_basic_block_count", - "callee_conditionally_executed_blocks", - "callee_users", - "nr_ctant_params", - "node_count", - "edge_count", - "callsite_height", - "cost_estimate", - "inlining_default", - "sroa_savings", - "sroa_losses", - "load_elimination", - "call_penalty", - "call_argument_setup", - "load_relative_intrinsic", - "lowered_call_arg_setup", - "indirect_call_penalty", - "jump_table_penalty", - "case_cluster_penalty", - "switch_penalty", - "unsimplified_common_instructions", - "num_loops", - "dead_blocks", - "simplified_instructions", - "constant_args", - "constant_offset_ptr_args", - "callsite_cost", - "cold_cc_penalty", - "last_call_to_static_bonus", - "is_multiple_blocks", - "nested_inlines", - "nested_inline_cost_estimate", - "threshold", - "is_callee_avail_external", - "is_caller_avail_external", - ] - ) - inputs.update({'model_selector': tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')}) - return time_step.time_step_spec(inputs) + """Returns the list of features for LLVM inlining to be used in combining models.""" + # int64 features + inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key)) + for key in [ + "caller_basic_block_count", + "caller_conditionally_executed_blocks", + "caller_users", + "callee_basic_block_count", + "callee_conditionally_executed_blocks", + "callee_users", + "nr_ctant_params", + "node_count", + "edge_count", + "callsite_height", + "cost_estimate", + "inlining_default", + "sroa_savings", + "sroa_losses", + "load_elimination", + "call_penalty", + "call_argument_setup", + "load_relative_intrinsic", + "lowered_call_arg_setup", + "indirect_call_penalty", + "jump_table_penalty", + "case_cluster_penalty", + "switch_penalty", + "unsimplified_common_instructions", + "num_loops", + "dead_blocks", + "simplified_instructions", + "constant_args", + "constant_offset_ptr_args", + "callsite_cost", + "cold_cc_penalty", + "last_call_to_static_bonus", + "is_multiple_blocks", + "nested_inlines", + "nested_inline_cost_estimate", + "threshold", + "is_callee_avail_external", + "is_caller_avail_external", + ]) + inputs.update({ + 'model_selector': + tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector') + }) + return time_step.time_step_spec(inputs) + @gin.configurable() def get_action_spec(): return tensor_spec.BoundedTensorSpec( - dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1 - ) \ No newline at end of file + dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1) diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index 92ab24d7..c79b592c 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -25,17 +25,20 @@ import hashlib import numpy as np + class AddOnePolicy(tf_agents.policies.TFPolicy): + def __init__(self): - observation_spec = {'obs': tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64)} + observation_spec = { + 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + } time_step_spec = time_step.time_step_spec(observation_spec) - action_spec = tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64) + action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + + super(AddOnePolicy, self).__init__( + time_step_spec=time_step_spec, action_spec=action_spec) - super(AddOnePolicy, self).__init__(time_step_spec=time_step_spec, - action_spec=action_spec) def _distribution(self, time_step): pass @@ -47,17 +50,20 @@ def _action(self, time_step, policy_state, seed): action = tf.reshape(observation + 1, (1,)) return policy_step.PolicyStep(action, policy_state) + class SubtractOnePolicy(tf_agents.policies.TFPolicy): + def __init__(self): - observation_spec = {'obs': tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64)} + observation_spec = { + 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + } time_step_spec = time_step.time_step_spec(observation_spec) - action_spec = tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64) + action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + + super(SubtractOnePolicy, self).__init__( + time_step_spec=time_step_spec, action_spec=action_spec) - super(SubtractOnePolicy, self).__init__(time_step_spec=time_step_spec, - action_spec=action_spec) def _distribution(self, time_step): pass @@ -68,25 +74,30 @@ def _action(self, time_step, policy_state, seed): observation = time_step.observation['obs'][0] action = tf.reshape(observation - 1, (1,)) return policy_step.PolicyStep(action, policy_state) - -observation_spec = time_step.time_step_spec( - {'obs':tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'), - 'model_selector': tf.TensorSpec(shape=(2,), - dtype=tf.uint64, name='model_selector')} -) -action_spec = tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64) + +observation_spec = time_step.time_step_spec({ + 'obs': + tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'), + 'model_selector': + tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector') +}) + +action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + class FeatureImportanceTest(absltest.TestCase): - + def test_select_add_policy(self): policy1 = AddOnePolicy() policy2 = SubtractOnePolicy() combined_policy = combine_tfa_policies_lib.CombinedTFPolicy( - tf_policies={'add_one':policy1, 'subtract_one':policy2}, - time_step_spec=observation_spec, - action_spec=action_spec) + tf_policies={ + 'add_one': policy1, + 'subtract_one': policy2 + }, + time_step_spec=observation_spec, + action_spec=action_spec) m = hashlib.md5() m.update('add_one'.encode('utf-8')) @@ -95,22 +106,27 @@ def test_select_add_policy(self): model_selector = tf.constant([[high, low]], dtype=tf.uint64) state = tf_agents.trajectories.TimeStep( - discount=tf.constant(np.array([0.]), dtype=tf.float32), - observation={'obs':tf.constant(np.array([0]), dtype=tf.int64), - 'model_selector':model_selector}, - reward=tf.constant(np.array([0]), dtype=tf.float64), - step_type=tf.constant(np.array([0]), dtype=tf.int64) - ) + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'obs': tf.constant(np.array([0]), dtype=tf.int64), + 'model_selector': model_selector + }, + reward=tf.constant(np.array([0]), dtype=tf.float64), + step_type=tf.constant(np.array([0]), dtype=tf.int64)) - self.assertEqual(combined_policy.action(state).action, tf.constant(1, dtype=tf.int64)) + self.assertEqual( + combined_policy.action(state).action, tf.constant(1, dtype=tf.int64)) def test_select_subtract_policy(self): policy1 = AddOnePolicy() policy2 = SubtractOnePolicy() combined_policy = combine_tfa_policies_lib.CombinedTFPolicy( - tf_policies={'add_one':policy1, 'subtract_one':policy2}, - time_step_spec=observation_spec, - action_spec=action_spec) + tf_policies={ + 'add_one': policy1, + 'subtract_one': policy2 + }, + time_step_spec=observation_spec, + action_spec=action_spec) m = hashlib.md5() m.update('subtract_one'.encode('utf-8')) @@ -119,11 +135,13 @@ def test_select_subtract_policy(self): model_selector = tf.constant([[high, low]], dtype=tf.uint64) state = tf_agents.trajectories.TimeStep( - discount=tf.constant(np.array([0.]), dtype=tf.float32), - observation={'obs':tf.constant(np.array([0]), dtype=tf.int64), - 'model_selector':model_selector}, - reward=tf.constant(np.array([0]), dtype=tf.float64), - step_type=tf.constant(np.array([0]), dtype=tf.int64) - ) - - self.assertEqual(combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64)) \ No newline at end of file + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'obs': tf.constant(np.array([0]), dtype=tf.int64), + 'model_selector': model_selector + }, + reward=tf.constant(np.array([0]), dtype=tf.float64), + step_type=tf.constant(np.array([0]), dtype=tf.int64)) + + self.assertEqual( + combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64)) From 27dee696f888a550267e5b839118a6f10c0380a0 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 17:53:32 +0000 Subject: [PATCH 04/28] yapf . -ir --- compiler_opt/tools/combine_tfa_policies.py | 14 +- .../tools/combine_tfa_policies_lib.py | 171 +++++++++--------- .../tools/combine_tfa_policies_lib_test.py | 102 ++++++----- 3 files changed, 154 insertions(+), 133 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index 3c0db9c1..7309b60f 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -31,15 +31,17 @@ def main(_): policy1 = tf.saved_model.load(policy1_path, tags=None, options=None) policy2 = tf.saved_model.load(policy2_path, tags=None, options=None) combined_policy = cfa_lib.CombinedTFPolicy( - tf_policies={policy1_name:policy1, policy2_name:policy2}, - time_step_spec=expected_signature, - action_spec=action_spec - ) + tf_policies={ + policy1_name: policy1, + policy2_name: policy2 + }, + time_step_spec=expected_signature, + action_spec=action_spec) combined_policy_path = input("Save combined policy path: ") policy_dict = {'combined_policy': combined_policy} saver = policy_saver.PolicySaver(policy_dict=policy_dict) saver.save(combined_policy_path) -if __name__ == "__main__": - app.run(main) +if __name__ == "__main__": + app.run(main) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index 87faf829..8aff3e3f 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -29,8 +29,7 @@ class CombinedTFPolicy(tf_agents.policies.TFPolicy): - def __init__(self, *args, - tf_policies: Dict[str, tf_agents.policies.TFPolicy], + def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy], **kwargs): super(CombinedTFPolicy, self).__init__(*args, **kwargs) @@ -47,11 +46,13 @@ def __init__(self, *args, for name in self.tf_policy_names: m = hashlib.md5() m.update(name.encode('utf-8')) - high_low_tensors.append(tf.stack([ - tf.constant(int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64), - tf.constant(int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64) - ]) - ) + high_low_tensors.append( + tf.stack([ + tf.constant( + int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64), + tf.constant( + int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64) + ])) self.high_low_tensors = tf.stack(high_low_tensors) m = hashlib.md5() @@ -65,44 +66,44 @@ def _process_observation(self, observation): if name in ['model_selector']: switch_tensor = observation.pop(name)[0] high_low_tensor = switch_tensor - + tf.debugging.Assert( tf.equal( tf.reduce_any( tf.reduce_all( - tf.equal(high_low_tensor, self.high_low_tensors), axis=1 - ) - ),True - ), - [high_low_tensor, self.high_low_tensors]) + tf.equal(high_low_tensor, self.high_low_tensors), + axis=1)), True), + [high_low_tensor, self.high_low_tensors]) return observation, switch_tensor def _create_distribution(self, inlining_prediction): probs = [inlining_prediction, 1.0 - inlining_prediction] - logits = [[0.0, tf.math.log(probs[1]/(1.0 - probs[1]))]] + logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]] return tfp.distributions.Categorical(logits=logits) - def _action(self, time_step: time_step.TimeStep, + def _action(self, + time_step: time_step.TimeStep, policy_state: types.NestedTensorSpec, seed: Optional[types.Seed] = None) -> policy_step.PolicyStep: new_observation = time_step.observation new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type, - reward=time_step.reward, - discount=time_step.discount, - observation=new_observation) + updated_step = tf_agents.trajectories.TimeStep( + step_type=time_step.step_type, + reward=time_step.reward, + discount=time_step.discount, + observation=new_observation) + def f0(): return tf.cast( self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64) + def f1(): return tf.cast( self.tf_policies[1].action(updated_step).action[0], dtype=tf.int64) + action = tf.cond( - tf.math.reduce_all( - tf.equal(switch_tensor, self.high_low_tensor)), - f0, - f1 - ) + tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, + f1) return tf_agents.trajectories.PolicyStep(action=action, state=policy_state) def _distribution( @@ -110,82 +111,82 @@ def _distribution( policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep: new_observation = time_step.observation new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = tf_agents.trajectories.TimeStep(step_type=time_step.step_type, - reward=time_step.reward, - discount=time_step.discount, - observation=new_observation) + updated_step = tf_agents.trajectories.TimeStep( + step_type=time_step.step_type, + reward=time_step.reward, + discount=time_step.discount, + observation=new_observation) + def f0(): return tf.cast( self.tf_policies[0].distribution(updated_step).action.cdf(0)[0], dtype=tf.float32) + def f1(): return tf.cast( self.tf_policies[1].distribution(updated_step).action.cdf(0)[0], dtype=tf.float32) + distribution = tf.cond( - tf.math.reduce_all( - tf.equal(switch_tensor, self.high_low_tensor)), - f0, - f1 - ) + tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, + f1) return tf_agents.trajectories.PolicyStep( - action=self._create_distribution(distribution), - state=policy_state) - + action=self._create_distribution(distribution), state=policy_state) @gin.configurable() def get_input_signature(): - """Returns the list of features for LLVM inlining to be used in combining models.""" - # int64 features - inputs = dict( - (key,tf.TensorSpec(dtype=tf.int64, shape=(), name=key)) - for key in [ - "caller_basic_block_count", - "caller_conditionally_executed_blocks", - "caller_users", - "callee_basic_block_count", - "callee_conditionally_executed_blocks", - "callee_users", - "nr_ctant_params", - "node_count", - "edge_count", - "callsite_height", - "cost_estimate", - "inlining_default", - "sroa_savings", - "sroa_losses", - "load_elimination", - "call_penalty", - "call_argument_setup", - "load_relative_intrinsic", - "lowered_call_arg_setup", - "indirect_call_penalty", - "jump_table_penalty", - "case_cluster_penalty", - "switch_penalty", - "unsimplified_common_instructions", - "num_loops", - "dead_blocks", - "simplified_instructions", - "constant_args", - "constant_offset_ptr_args", - "callsite_cost", - "cold_cc_penalty", - "last_call_to_static_bonus", - "is_multiple_blocks", - "nested_inlines", - "nested_inline_cost_estimate", - "threshold", - "is_callee_avail_external", - "is_caller_avail_external", - ] - ) - inputs.update({'model_selector': tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector')}) - return time_step.time_step_spec(inputs) + """Returns the list of features for LLVM inlining to be used in combining models.""" + # int64 features + inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key)) + for key in [ + "caller_basic_block_count", + "caller_conditionally_executed_blocks", + "caller_users", + "callee_basic_block_count", + "callee_conditionally_executed_blocks", + "callee_users", + "nr_ctant_params", + "node_count", + "edge_count", + "callsite_height", + "cost_estimate", + "inlining_default", + "sroa_savings", + "sroa_losses", + "load_elimination", + "call_penalty", + "call_argument_setup", + "load_relative_intrinsic", + "lowered_call_arg_setup", + "indirect_call_penalty", + "jump_table_penalty", + "case_cluster_penalty", + "switch_penalty", + "unsimplified_common_instructions", + "num_loops", + "dead_blocks", + "simplified_instructions", + "constant_args", + "constant_offset_ptr_args", + "callsite_cost", + "cold_cc_penalty", + "last_call_to_static_bonus", + "is_multiple_blocks", + "nested_inlines", + "nested_inline_cost_estimate", + "threshold", + "is_callee_avail_external", + "is_caller_avail_external", + ]) + inputs.update({ + 'model_selector': + tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector') + }) + return time_step.time_step_spec(inputs) + @gin.configurable() def get_action_spec(): return tensor_spec.BoundedTensorSpec( - dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1 - ) \ No newline at end of file + dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1) diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index 92ab24d7..c79b592c 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -25,17 +25,20 @@ import hashlib import numpy as np + class AddOnePolicy(tf_agents.policies.TFPolicy): + def __init__(self): - observation_spec = {'obs': tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64)} + observation_spec = { + 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + } time_step_spec = time_step.time_step_spec(observation_spec) - action_spec = tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64) + action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + + super(AddOnePolicy, self).__init__( + time_step_spec=time_step_spec, action_spec=action_spec) - super(AddOnePolicy, self).__init__(time_step_spec=time_step_spec, - action_spec=action_spec) def _distribution(self, time_step): pass @@ -47,17 +50,20 @@ def _action(self, time_step, policy_state, seed): action = tf.reshape(observation + 1, (1,)) return policy_step.PolicyStep(action, policy_state) + class SubtractOnePolicy(tf_agents.policies.TFPolicy): + def __init__(self): - observation_spec = {'obs': tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64)} + observation_spec = { + 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + } time_step_spec = time_step.time_step_spec(observation_spec) - action_spec = tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64) + action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + + super(SubtractOnePolicy, self).__init__( + time_step_spec=time_step_spec, action_spec=action_spec) - super(SubtractOnePolicy, self).__init__(time_step_spec=time_step_spec, - action_spec=action_spec) def _distribution(self, time_step): pass @@ -68,25 +74,30 @@ def _action(self, time_step, policy_state, seed): observation = time_step.observation['obs'][0] action = tf.reshape(observation - 1, (1,)) return policy_step.PolicyStep(action, policy_state) - -observation_spec = time_step.time_step_spec( - {'obs':tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'), - 'model_selector': tf.TensorSpec(shape=(2,), - dtype=tf.uint64, name='model_selector')} -) -action_spec = tensor_spec.TensorSpec( - shape=(1,), dtype=tf.int64) + +observation_spec = time_step.time_step_spec({ + 'obs': + tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'), + 'model_selector': + tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector') +}) + +action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + class FeatureImportanceTest(absltest.TestCase): - + def test_select_add_policy(self): policy1 = AddOnePolicy() policy2 = SubtractOnePolicy() combined_policy = combine_tfa_policies_lib.CombinedTFPolicy( - tf_policies={'add_one':policy1, 'subtract_one':policy2}, - time_step_spec=observation_spec, - action_spec=action_spec) + tf_policies={ + 'add_one': policy1, + 'subtract_one': policy2 + }, + time_step_spec=observation_spec, + action_spec=action_spec) m = hashlib.md5() m.update('add_one'.encode('utf-8')) @@ -95,22 +106,27 @@ def test_select_add_policy(self): model_selector = tf.constant([[high, low]], dtype=tf.uint64) state = tf_agents.trajectories.TimeStep( - discount=tf.constant(np.array([0.]), dtype=tf.float32), - observation={'obs':tf.constant(np.array([0]), dtype=tf.int64), - 'model_selector':model_selector}, - reward=tf.constant(np.array([0]), dtype=tf.float64), - step_type=tf.constant(np.array([0]), dtype=tf.int64) - ) + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'obs': tf.constant(np.array([0]), dtype=tf.int64), + 'model_selector': model_selector + }, + reward=tf.constant(np.array([0]), dtype=tf.float64), + step_type=tf.constant(np.array([0]), dtype=tf.int64)) - self.assertEqual(combined_policy.action(state).action, tf.constant(1, dtype=tf.int64)) + self.assertEqual( + combined_policy.action(state).action, tf.constant(1, dtype=tf.int64)) def test_select_subtract_policy(self): policy1 = AddOnePolicy() policy2 = SubtractOnePolicy() combined_policy = combine_tfa_policies_lib.CombinedTFPolicy( - tf_policies={'add_one':policy1, 'subtract_one':policy2}, - time_step_spec=observation_spec, - action_spec=action_spec) + tf_policies={ + 'add_one': policy1, + 'subtract_one': policy2 + }, + time_step_spec=observation_spec, + action_spec=action_spec) m = hashlib.md5() m.update('subtract_one'.encode('utf-8')) @@ -119,11 +135,13 @@ def test_select_subtract_policy(self): model_selector = tf.constant([[high, low]], dtype=tf.uint64) state = tf_agents.trajectories.TimeStep( - discount=tf.constant(np.array([0.]), dtype=tf.float32), - observation={'obs':tf.constant(np.array([0]), dtype=tf.int64), - 'model_selector':model_selector}, - reward=tf.constant(np.array([0]), dtype=tf.float64), - step_type=tf.constant(np.array([0]), dtype=tf.int64) - ) - - self.assertEqual(combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64)) \ No newline at end of file + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'obs': tf.constant(np.array([0]), dtype=tf.int64), + 'model_selector': model_selector + }, + reward=tf.constant(np.array([0]), dtype=tf.float64), + step_type=tf.constant(np.array([0]), dtype=tf.int64)) + + self.assertEqual( + combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64)) From 47f5efc9a4e92c9663495863c66161b183b48330 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 18:51:42 +0000 Subject: [PATCH 05/28] Fixed pylint errors. --- compiler_opt/tools/combine_tfa_policies.py | 2 +- .../tools/combine_tfa_policies_lib.py | 39 ++++++++++--------- .../tools/combine_tfa_policies_lib_test.py | 28 ++++++------- 3 files changed, 36 insertions(+), 33 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index 7309b60f..05a956ee 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -38,7 +38,7 @@ def main(_): time_step_spec=expected_signature, action_spec=action_spec) combined_policy_path = input("Save combined policy path: ") - policy_dict = {'combined_policy': combined_policy} + policy_dict = {"combined_policy": combined_policy} saver = policy_saver.PolicySaver(policy_dict=policy_dict) saver.save(combined_policy_path) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index 8aff3e3f..33a38708 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -13,21 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. """Combines two tf-agent policies with the given state and action spec.""" -from typing import Dict, List, Optional, Tuple +from typing import Dict, Optional import gin import tensorflow as tf import hashlib import tf_agents -from tf_agents.trajectories import time_step +from tf_agents.trajectories import time_step as ts from tf_agents.typing import types from tf_agents.trajectories import policy_step -import tensorflow_probability as tfp from tf_agents.specs import tensor_spec +import tensorflow_probability as tfp class CombinedTFPolicy(tf_agents.policies.TFPolicy): + """Policy which combines two target policies.""" def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy], **kwargs): @@ -45,25 +46,25 @@ def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy], high_low_tensors = [] for name in self.tf_policy_names: m = hashlib.md5() - m.update(name.encode('utf-8')) + m.update(name.encode("utf-8")) high_low_tensors.append( tf.stack([ tf.constant( - int.from_bytes(m.digest()[8:], 'little'), dtype=tf.uint64), + int.from_bytes(m.digest()[8:], "little"), dtype=tf.uint64), tf.constant( - int.from_bytes(m.digest()[:8], 'little'), dtype=tf.uint64) + int.from_bytes(m.digest()[:8], "little"), dtype=tf.uint64) ])) self.high_low_tensors = tf.stack(high_low_tensors) m = hashlib.md5() - m.update(self.tf_policy_names[0].encode('utf-8')) - self.high = int.from_bytes(m.digest()[8:], 'little') - self.low = int.from_bytes(m.digest()[:8], 'little') + m.update(self.tf_policy_names[0].encode("utf-8")) + self.high = int.from_bytes(m.digest()[8:], "little") + self.low = int.from_bytes(m.digest()[:8], "little") self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64) def _process_observation(self, observation): for name in self.sorted_keys: - if name in ['model_selector']: + if name in ["model_selector"]: switch_tensor = observation.pop(name)[0] high_low_tensor = switch_tensor @@ -82,12 +83,12 @@ def _create_distribution(self, inlining_prediction): return tfp.distributions.Categorical(logits=logits) def _action(self, - time_step: time_step.TimeStep, + time_step: ts.TimeStep, policy_state: types.NestedTensorSpec, seed: Optional[types.Seed] = None) -> policy_step.PolicyStep: new_observation = time_step.observation new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = tf_agents.trajectories.TimeStep( + updated_step = ts.TimeStep( step_type=time_step.step_type, reward=time_step.reward, discount=time_step.discount, @@ -107,11 +108,11 @@ def f1(): return tf_agents.trajectories.PolicyStep(action=action, state=policy_state) def _distribution( - self, time_step: time_step.TimeStep, + self, time_step: ts.TimeStep, policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep: new_observation = time_step.observation new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = tf_agents.trajectories.TimeStep( + updated_step = ts.TimeStep( step_type=time_step.step_type, reward=time_step.reward, discount=time_step.discount, @@ -136,7 +137,7 @@ def f1(): @gin.configurable() def get_input_signature(): - """Returns the list of features for LLVM inlining to be used in combining models.""" + """Returns a list of inlining features to be used with the combined models.""" # int64 features inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key)) for key in [ @@ -180,13 +181,13 @@ def get_input_signature(): "is_caller_avail_external", ]) inputs.update({ - 'model_selector': - tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector') + "model_selector": + tf.TensorSpec(shape=(2,), dtype=tf.uint64, name="model_selector") }) - return time_step.time_step_spec(inputs) + return ts.time_step_spec(inputs) @gin.configurable() def get_action_spec(): return tensor_spec.BoundedTensorSpec( - dtype=tf.int64, shape=(), name='inlining_decision', minimum=0, maximum=1) + dtype=tf.int64, shape=(), name="inlining_decision", minimum=0, maximum=1) diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index c79b592c..03b3ae96 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -12,13 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for the feature_importance_utils.py module""" +"""Tests for the combine_tfa_policies_lib.py module""" from absl.testing import absltest import tensorflow as tf from compiler_opt.tools import combine_tfa_policies_lib -from tf_agents.trajectories import time_step +from tf_agents.trajectories import time_step as ts import tf_agents from tf_agents.specs import tensor_spec from tf_agents.trajectories import policy_step @@ -27,19 +27,20 @@ class AddOnePolicy(tf_agents.policies.TFPolicy): + """Test policy which adds one to obs feature.""" def __init__(self): - observation_spec = { + obs_spec = { 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) } - time_step_spec = time_step.time_step_spec(observation_spec) + time_step_spec = ts.time_step_spec(obs_spec) - action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) super(AddOnePolicy, self).__init__( - time_step_spec=time_step_spec, action_spec=action_spec) + time_step_spec=time_step_spec, action_spec=act_spec) - def _distribution(self, time_step): + def _distribution(self, t_step): pass def _variables(self): @@ -52,19 +53,20 @@ def _action(self, time_step, policy_state, seed): class SubtractOnePolicy(tf_agents.policies.TFPolicy): + """Test policy which subtracts one to obs feature.""" def __init__(self): - observation_spec = { + obs_spec = { 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) } - time_step_spec = time_step.time_step_spec(observation_spec) + time_step_spec = ts.time_step_spec(obs_spec) - action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) + act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) super(SubtractOnePolicy, self).__init__( - time_step_spec=time_step_spec, action_spec=action_spec) + time_step_spec=time_step_spec, action_spec=act_spec) - def _distribution(self, time_step): + def _distribution(self, t_step): pass def _variables(self): @@ -76,7 +78,7 @@ def _action(self, time_step, policy_state, seed): return policy_step.PolicyStep(action, policy_state) -observation_spec = time_step.time_step_spec({ +observation_spec = ts.time_step_spec({ 'obs': tf.TensorSpec(dtype=tf.int32, shape=(), name='obs'), 'model_selector': From f5b6b6f4b0a7081290fc336f2f71267013ffe253 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 19:02:35 +0000 Subject: [PATCH 06/28] yapf . -ir --- compiler_opt/tools/combine_tfa_policies_lib_test.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index e0f46f2e..89a74eef 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -30,9 +30,7 @@ class AddOnePolicy(tf_agents.policies.TFPolicy): """Test policy which adds one to obs feature.""" def __init__(self): - obs_spec = { - 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) - } + obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)} time_step_spec = ts.time_step_spec(obs_spec) act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) @@ -56,9 +54,7 @@ class SubtractOnePolicy(tf_agents.policies.TFPolicy): """Test policy which subtracts one to obs feature.""" def __init__(self): - obs_spec = { - 'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) - } + obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)} time_step_spec = ts.time_step_spec(obs_spec) act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) From 35d9e8c5fa6206fbeb2f0c3f81c7db3e63263d6c Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 19:05:51 +0000 Subject: [PATCH 07/28] Fixed super without arguments pylint error. --- compiler_opt/tools/combine_tfa_policies_lib.py | 2 +- compiler_opt/tools/combine_tfa_policies_lib_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index 33a38708..ebb1b1f8 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -32,7 +32,7 @@ class CombinedTFPolicy(tf_agents.policies.TFPolicy): def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy], **kwargs): - super(CombinedTFPolicy, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.tf_policies = [] self.tf_policy_names = [] diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index 89a74eef..030d213f 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -35,7 +35,7 @@ def __init__(self): act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) - super(AddOnePolicy, self).__init__( + super().__init__( time_step_spec=time_step_spec, action_spec=act_spec) def _distribution(self, time_step): @@ -59,7 +59,7 @@ def __init__(self): act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) - super(SubtractOnePolicy, self).__init__( + super().__init__( time_step_spec=time_step_spec, action_spec=act_spec) def _distribution(self, time_step): From 5d6783d7659abec63d666d33eb75434746be5ac1 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 19:09:06 +0000 Subject: [PATCH 08/28] yapf . -ir --- compiler_opt/tools/combine_tfa_policies_lib_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index 030d213f..9fb8bb4b 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -35,8 +35,7 @@ def __init__(self): act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) - super().__init__( - time_step_spec=time_step_spec, action_spec=act_spec) + super().__init__(time_step_spec=time_step_spec, action_spec=act_spec) def _distribution(self, time_step): pass @@ -59,8 +58,7 @@ def __init__(self): act_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) - super().__init__( - time_step_spec=time_step_spec, action_spec=act_spec) + super().__init__(time_step_spec=time_step_spec, action_spec=act_spec) def _distribution(self, time_step): pass From 7997f143587eadb2200682e9bf66bd1462e85cbf Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 19:58:38 +0000 Subject: [PATCH 09/28] Fixing pytype annotations. --- compiler_opt/tools/combine_tfa_policies_lib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index ebb1b1f8..439f1b9b 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -62,7 +62,7 @@ def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy], self.low = int.from_bytes(m.digest()[:8], "little") self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64) - def _process_observation(self, observation): + def _process_observation(self, observation: types.NestedSpecTensorOrArray): for name in self.sorted_keys: if name in ["model_selector"]: switch_tensor = observation.pop(name)[0] @@ -105,7 +105,7 @@ def f1(): action = tf.cond( tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, f1) - return tf_agents.trajectories.PolicyStep(action=action, state=policy_state) + return policy_step.PolicyStep(action=action, state=policy_state) def _distribution( self, time_step: ts.TimeStep, @@ -131,7 +131,7 @@ def f1(): distribution = tf.cond( tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, f1) - return tf_agents.trajectories.PolicyStep( + return policy_step.PolicyStep( action=self._create_distribution(distribution), state=policy_state) From 59d36774660273416de75b03437a8c7f2a5ab477 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 6 Sep 2024 22:31:55 +0000 Subject: [PATCH 10/28] Fixed pytype errors. Addressed comments. --- compiler_opt/tools/combine_tfa_policies.py | 53 +++++++++++--- .../tools/combine_tfa_policies_lib.py | 73 +++---------------- .../tools/combine_tfa_policies_lib_test.py | 36 +++++---- 3 files changed, 75 insertions(+), 87 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index 05a956ee..fb1a8f25 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -14,20 +14,55 @@ # limitations under the License. """Runs the policy combiner.""" from absl import app +from absl import flags + +import gin import tensorflow as tf from compiler_opt.rl import policy_saver +from compiler_opt.rl import registry from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib +_COMBINE_POLICIES_NAMES = flags.DEFINE_multi_string( + 'policies_names', [], + 'List in order of policy names for combined policies.') +_COMBINE_POLICIES_PATHS = flags.DEFINE_multi_string( + 'policies_paths', [], + 'List in order of policy paths for combined policies.') +_COMBINED_POLICY_PATH = flags.DEFINE_string( + 'combined_policy_path', '', 'Path to save the combined policy.') +_GIN_FILES = flags.DEFINE_multi_string( + 'gin_files', [], 'List of paths to gin configuration files.') +_GIN_BINDINGS = flags.DEFINE_multi_string( + 'gin_bindings', [], + 'Gin bindings to override the values set in the config files.') + def main(_): - expected_signature = cfa_lib.get_input_signature() - action_spec = cfa_lib.get_action_spec() - policy1_name = input("First policy name: ") - policy1_path = input(policy1_name + " path: ") - policy2_name = input("Second policy name: ") - policy2_path = input(policy2_name + " path: ") + flags.mark_flag_as_required('policies_names') + flags.mark_flag_as_required('policies_paths') + flags.mark_flag_as_required('combined_policy_path') + assert len(_COMBINE_POLICIES_NAMES.value) == len( + _COMBINE_POLICIES_PATHS.value) + gin.add_config_file_search_path( + 'compiler_opt/rl/inlining/gin_configs/common.gin') + gin.parse_config_files_and_bindings( + _GIN_FILES.value, bindings=_GIN_BINDINGS.value, skip_unknown=False) + + problem_config = registry.get_configuration() + expected_signature, action_spec = problem_config.get_signature_spec() + expected_signature.observation.update({ + 'model_selector': + tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector') + }) + assert len(_COMBINE_POLICIES_NAMES.value + ) == 2, 'Combiner supports only two policies.' + + policy1_name = _COMBINE_POLICIES_NAMES.value[0] + policy1_path = _COMBINE_POLICIES_PATHS.value[0] + policy2_name = _COMBINE_POLICIES_NAMES.value[1] + policy2_path = _COMBINE_POLICIES_PATHS.value[1] policy1 = tf.saved_model.load(policy1_path, tags=None, options=None) policy2 = tf.saved_model.load(policy2_path, tags=None, options=None) combined_policy = cfa_lib.CombinedTFPolicy( @@ -37,11 +72,11 @@ def main(_): }, time_step_spec=expected_signature, action_spec=action_spec) - combined_policy_path = input("Save combined policy path: ") - policy_dict = {"combined_policy": combined_policy} + combined_policy_path = _COMBINED_POLICY_PATH.value + policy_dict = {'combined_policy': combined_policy} saver = policy_saver.PolicySaver(policy_dict=policy_dict) saver.save(combined_policy_path) -if __name__ == "__main__": +if __name__ == '__main__': app.run(main) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index 439f1b9b..a2303bf0 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -13,9 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """Combines two tf-agent policies with the given state and action spec.""" -from typing import Dict, Optional +from typing import Dict, Optional, Tuple -import gin import tensorflow as tf import hashlib @@ -23,7 +22,6 @@ from tf_agents.trajectories import time_step as ts from tf_agents.typing import types from tf_agents.trajectories import policy_step -from tf_agents.specs import tensor_spec import tensorflow_probability as tfp @@ -55,14 +53,18 @@ def __init__(self, *args, tf_policies: Dict[str, tf_agents.policies.TFPolicy], int.from_bytes(m.digest()[:8], "little"), dtype=tf.uint64) ])) self.high_low_tensors = tf.stack(high_low_tensors) - + # Related LLVM commit: https://github.com/llvm/llvm-project/pull/96276 m = hashlib.md5() m.update(self.tf_policy_names[0].encode("utf-8")) self.high = int.from_bytes(m.digest()[8:], "little") self.low = int.from_bytes(m.digest()[:8], "little") self.high_low_tensor = tf.constant([self.high, self.low], dtype=tf.uint64) - def _process_observation(self, observation: types.NestedSpecTensorOrArray): + def _process_observation( + self, observation: types.NestedSpecTensorOrArray + ) -> Tuple[types.NestedSpecTensorOrArray, types.TensorOrArray]: + assert "model_selector" in self.sorted_keys + high_low_tensor = self.high_low_tensor for name in self.sorted_keys: if name in ["model_selector"]: switch_tensor = observation.pop(name)[0] @@ -75,7 +77,8 @@ def _process_observation(self, observation: types.NestedSpecTensorOrArray): tf.equal(high_low_tensor, self.high_low_tensors), axis=1)), True), [high_low_tensor, self.high_low_tensors]) - return observation, switch_tensor + + return observation, high_low_tensor def _create_distribution(self, inlining_prediction): probs = [inlining_prediction, 1.0 - inlining_prediction] @@ -133,61 +136,3 @@ def f1(): f1) return policy_step.PolicyStep( action=self._create_distribution(distribution), state=policy_state) - - -@gin.configurable() -def get_input_signature(): - """Returns a list of inlining features to be used with the combined models.""" - # int64 features - inputs = dict((key, tf.TensorSpec(dtype=tf.int64, shape=(), name=key)) - for key in [ - "caller_basic_block_count", - "caller_conditionally_executed_blocks", - "caller_users", - "callee_basic_block_count", - "callee_conditionally_executed_blocks", - "callee_users", - "nr_ctant_params", - "node_count", - "edge_count", - "callsite_height", - "cost_estimate", - "inlining_default", - "sroa_savings", - "sroa_losses", - "load_elimination", - "call_penalty", - "call_argument_setup", - "load_relative_intrinsic", - "lowered_call_arg_setup", - "indirect_call_penalty", - "jump_table_penalty", - "case_cluster_penalty", - "switch_penalty", - "unsimplified_common_instructions", - "num_loops", - "dead_blocks", - "simplified_instructions", - "constant_args", - "constant_offset_ptr_args", - "callsite_cost", - "cold_cc_penalty", - "last_call_to_static_bonus", - "is_multiple_blocks", - "nested_inlines", - "nested_inline_cost_estimate", - "threshold", - "is_callee_avail_external", - "is_caller_avail_external", - ]) - inputs.update({ - "model_selector": - tf.TensorSpec(shape=(2,), dtype=tf.uint64, name="model_selector") - }) - return ts.time_step_spec(inputs) - - -@gin.configurable() -def get_action_spec(): - return tensor_spec.BoundedTensorSpec( - dtype=tf.int64, shape=(), name="inlining_decision", minimum=0, maximum=1) diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index 9fb8bb4b..2404bff1 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -22,12 +22,22 @@ import tf_agents from tf_agents.specs import tensor_spec from tf_agents.trajectories import policy_step +from tf_agents.typing import types import hashlib import numpy as np +def client_side_model_selector_calculation(policy_name: str) -> types.Tensor: + m = hashlib.md5() + m.update(policy_name.encode('utf-8')) + high = int.from_bytes(m.digest()[8:], 'little') + low = int.from_bytes(m.digest()[:8], 'little') + model_selector = tf.constant([[high, low]], dtype=tf.uint64) + return model_selector + + class AddOnePolicy(tf_agents.policies.TFPolicy): - """Test policy which adds one to obs feature.""" + """Test policy which increments the obs feature.""" def __init__(self): obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)} @@ -38,19 +48,22 @@ def __init__(self): super().__init__(time_step_spec=time_step_spec, action_spec=act_spec) def _distribution(self, time_step): + """Boilerplate function for TFPolicy.""" pass def _variables(self): + """Boilerplate function for TFPolicy.""" return () def _action(self, time_step, policy_state, seed): + """Boilerplate function for TFPolicy.""" observation = time_step.observation['obs'][0] action = tf.reshape(observation + 1, (1,)) return policy_step.PolicyStep(action, policy_state) class SubtractOnePolicy(tf_agents.policies.TFPolicy): - """Test policy which subtracts one to obs feature.""" + """Test policy which decrements the obs feature.""" def __init__(self): obs_spec = {'obs': tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64)} @@ -61,12 +74,15 @@ def __init__(self): super().__init__(time_step_spec=time_step_spec, action_spec=act_spec) def _distribution(self, time_step): + """Boilerplate function for TFPolicy.""" pass def _variables(self): + """Boilerplate function for TFPolicy.""" return () def _action(self, time_step, policy_state, seed): + """Boilerplate function for TFPolicy.""" observation = time_step.observation['obs'][0] action = tf.reshape(observation - 1, (1,)) return policy_step.PolicyStep(action, policy_state) @@ -95,13 +111,9 @@ def test_select_add_policy(self): time_step_spec=observation_spec, action_spec=action_spec) - m = hashlib.md5() - m.update('add_one'.encode('utf-8')) - high = int.from_bytes(m.digest()[8:], 'little') - low = int.from_bytes(m.digest()[:8], 'little') - model_selector = tf.constant([[high, low]], dtype=tf.uint64) + model_selector = client_side_model_selector_calculation('add_one') - state = tf_agents.trajectories.TimeStep( + state = ts.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ 'obs': tf.constant(np.array([0]), dtype=tf.int64), @@ -124,13 +136,9 @@ def test_select_subtract_policy(self): time_step_spec=observation_spec, action_spec=action_spec) - m = hashlib.md5() - m.update('subtract_one'.encode('utf-8')) - high = int.from_bytes(m.digest()[8:], 'little') - low = int.from_bytes(m.digest()[:8], 'little') - model_selector = tf.constant([[high, low]], dtype=tf.uint64) + model_selector = client_side_model_selector_calculation('subtract_one') - state = tf_agents.trajectories.TimeStep( + state = ts.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ 'obs': tf.constant(np.array([0]), dtype=tf.int64), From 6d8c0c77a2185dc3bbc41df2ee85c5494a3c35b7 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Mon, 9 Sep 2024 13:46:27 +0000 Subject: [PATCH 11/28] Addressed comments. --- compiler_opt/tools/combine_tfa_policies.py | 31 +++++++++++++------ .../tools/combine_tfa_policies_lib.py | 11 +++++++ .../tools/combine_tfa_policies_lib_test.py | 11 ++++--- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index fb1a8f25..0e758a8f 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -15,6 +15,9 @@ """Runs the policy combiner.""" from absl import app from absl import flags +from absl import logging + +import sys import gin @@ -25,11 +28,15 @@ from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib _COMBINE_POLICIES_NAMES = flags.DEFINE_multi_string( - 'policies_names', [], - 'List in order of policy names for combined policies.') + 'policies_names', + [], + 'List in order of policy names for combined policies. Order must match that of policies_paths.' # pylint: disable=line-too-long +) _COMBINE_POLICIES_PATHS = flags.DEFINE_multi_string( - 'policies_paths', [], - 'List in order of policy paths for combined policies.') + 'policies_paths', + [], + 'List in order of policy paths for combined policies. Order must match that of policies_names.' # pylint: disable=line-too-long +) _COMBINED_POLICY_PATH = flags.DEFINE_string( 'combined_policy_path', '', 'Path to save the combined policy.') _GIN_FILES = flags.DEFINE_multi_string( @@ -43,8 +50,11 @@ def main(_): flags.mark_flag_as_required('policies_names') flags.mark_flag_as_required('policies_paths') flags.mark_flag_as_required('combined_policy_path') - assert len(_COMBINE_POLICIES_NAMES.value) == len( - _COMBINE_POLICIES_PATHS.value) + if len(_COMBINE_POLICIES_NAMES.value) != len(_COMBINE_POLICIES_PATHS.value): + logging.error( + 'Length of policies_names: %d must equal length of policies_paths: %d.', + len(_COMBINE_POLICIES_NAMES.value), len(_COMBINE_POLICIES_PATHS.value)) + sys.exit(1) gin.add_config_file_search_path( 'compiler_opt/rl/inlining/gin_configs/common.gin') gin.parse_config_files_and_bindings( @@ -56,9 +66,12 @@ def main(_): 'model_selector': tf.TensorSpec(shape=(2,), dtype=tf.uint64, name='model_selector') }) - assert len(_COMBINE_POLICIES_NAMES.value - ) == 2, 'Combiner supports only two policies.' - + # TODO(359): We only support combining two policies.Generalize this to handle + # multiple policies. + if len(_COMBINE_POLICIES_NAMES.value) != 2: + logging.error('Policy combiner only supports two policies, %d given.', + len(_COMBINE_POLICIES_NAMES.value)) + sys.exit(1) policy1_name = _COMBINE_POLICIES_NAMES.value[0] policy1_path = _COMBINE_POLICIES_PATHS.value[0] policy2_name = _COMBINE_POLICIES_NAMES.value[1] diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index a2303bf0..c8d09b6a 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -67,6 +67,7 @@ def _process_observation( high_low_tensor = self.high_low_tensor for name in self.sorted_keys: if name in ["model_selector"]: + # model_selector is a Tensor of shape (1,) which requires indexing [0] switch_tensor = observation.pop(name)[0] high_low_tensor = switch_tensor @@ -81,6 +82,12 @@ def _process_observation( return observation, high_low_tensor def _create_distribution(self, inlining_prediction): + """Ensures that even deterministic policies return a distribution. + + This will not change the behavior of the action function which is + what is used at inference time. The change for the distribution + function is so that we can always support sampling even for + deterministic policies.""" probs = [inlining_prediction, 1.0 - inlining_prediction] logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]] return tfp.distributions.Categorical(logits=logits) @@ -97,6 +104,8 @@ def _action(self, discount=time_step.discount, observation=new_observation) + # TODO(359): We only support combining two policies.Generalize this to + # handle multiple policies. def f0(): return tf.cast( self.tf_policies[0].action(updated_step).action[0], dtype=tf.int64) @@ -121,6 +130,8 @@ def _distribution( discount=time_step.discount, observation=new_observation) + # TODO(359): We only support combining two policies.Generalize this to + # handle multiple policies. def f0(): return tf.cast( self.tf_policies[0].distribution(updated_step).action.cdf(0)[0], diff --git a/compiler_opt/tools/combine_tfa_policies_lib_test.py b/compiler_opt/tools/combine_tfa_policies_lib_test.py index 2404bff1..7cd873c5 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib_test.py +++ b/compiler_opt/tools/combine_tfa_policies_lib_test.py @@ -98,7 +98,8 @@ def _action(self, time_step, policy_state, seed): action_spec = tensor_spec.TensorSpec(shape=(1,), dtype=tf.int64) -class FeatureImportanceTest(absltest.TestCase): +class CombinedTFPolicyTest(absltest.TestCase): + """Test for CombinedTFPolicy.""" def test_select_add_policy(self): policy1 = AddOnePolicy() @@ -116,14 +117,14 @@ def test_select_add_policy(self): state = ts.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ - 'obs': tf.constant(np.array([0]), dtype=tf.int64), + 'obs': tf.constant(np.array([42]), dtype=tf.int64), 'model_selector': model_selector }, reward=tf.constant(np.array([0]), dtype=tf.float64), step_type=tf.constant(np.array([0]), dtype=tf.int64)) self.assertEqual( - combined_policy.action(state).action, tf.constant(1, dtype=tf.int64)) + combined_policy.action(state).action, tf.constant(43, dtype=tf.int64)) def test_select_subtract_policy(self): policy1 = AddOnePolicy() @@ -141,11 +142,11 @@ def test_select_subtract_policy(self): state = ts.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ - 'obs': tf.constant(np.array([0]), dtype=tf.int64), + 'obs': tf.constant(np.array([42]), dtype=tf.int64), 'model_selector': model_selector }, reward=tf.constant(np.array([0]), dtype=tf.float64), step_type=tf.constant(np.array([0]), dtype=tf.int64)) self.assertEqual( - combined_policy.action(state).action, tf.constant(-1, dtype=tf.int64)) + combined_policy.action(state).action, tf.constant(41, dtype=tf.int64)) From 3b0cefd0c245f1a4b6a890ce6b0e0bfefdc4b78d Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Mon, 9 Sep 2024 16:55:01 +0000 Subject: [PATCH 12/28] Resolved _distribution and common.gin comments. --- compiler_opt/tools/combine_tfa_policies.py | 2 - .../tools/combine_tfa_policies_lib.py | 37 +------------------ 2 files changed, 2 insertions(+), 37 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index 0e758a8f..0aad21b9 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -55,8 +55,6 @@ def main(_): 'Length of policies_names: %d must equal length of policies_paths: %d.', len(_COMBINE_POLICIES_NAMES.value), len(_COMBINE_POLICIES_PATHS.value)) sys.exit(1) - gin.add_config_file_search_path( - 'compiler_opt/rl/inlining/gin_configs/common.gin') gin.parse_config_files_and_bindings( _GIN_FILES.value, bindings=_GIN_BINDINGS.value, skip_unknown=False) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index c8d09b6a..4a53ba19 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -81,17 +81,6 @@ def _process_observation( return observation, high_low_tensor - def _create_distribution(self, inlining_prediction): - """Ensures that even deterministic policies return a distribution. - - This will not change the behavior of the action function which is - what is used at inference time. The change for the distribution - function is so that we can always support sampling even for - deterministic policies.""" - probs = [inlining_prediction, 1.0 - inlining_prediction] - logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]] - return tfp.distributions.Categorical(logits=logits) - def _action(self, time_step: ts.TimeStep, policy_state: types.NestedTensorSpec, @@ -122,28 +111,6 @@ def f1(): def _distribution( self, time_step: ts.TimeStep, policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep: - new_observation = time_step.observation - new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = ts.TimeStep( - step_type=time_step.step_type, - reward=time_step.reward, - discount=time_step.discount, - observation=new_observation) - - # TODO(359): We only support combining two policies.Generalize this to - # handle multiple policies. - def f0(): - return tf.cast( - self.tf_policies[0].distribution(updated_step).action.cdf(0)[0], - dtype=tf.float32) - - def f1(): - return tf.cast( - self.tf_policies[1].distribution(updated_step).action.cdf(0)[0], - dtype=tf.float32) - - distribution = tf.cond( - tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, - f1) + """Placeholder for distribution as every TFPolicy requires it.""" return policy_step.PolicyStep( - action=self._create_distribution(distribution), state=policy_state) + action=tfp.distributions.Deterministic(2.), state=policy_state) From 78460ce93ca26b495cc243949c3de1c980e346af Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Mon, 9 Sep 2024 18:58:17 +0000 Subject: [PATCH 13/28] Fixed Aiden's nits. --- compiler_opt/tools/combine_tfa_policies.py | 12 ++++-------- compiler_opt/tools/combine_tfa_policies_lib.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index 0aad21b9..c3146711 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -28,15 +28,11 @@ from compiler_opt.tools import combine_tfa_policies_lib as cfa_lib _COMBINE_POLICIES_NAMES = flags.DEFINE_multi_string( - 'policies_names', - [], - 'List in order of policy names for combined policies. Order must match that of policies_paths.' # pylint: disable=line-too-long -) + 'policies_names', [], 'List in order of policy names for combined policies.' + 'Order must match that of policies_paths.') _COMBINE_POLICIES_PATHS = flags.DEFINE_multi_string( - 'policies_paths', - [], - 'List in order of policy paths for combined policies. Order must match that of policies_names.' # pylint: disable=line-too-long -) + 'policies_paths', [], 'List in order of policy paths for combined policies.' + 'Order must match that of policies_names.') _COMBINED_POLICY_PATH = flags.DEFINE_string( 'combined_policy_path', '', 'Path to save the combined policy.') _GIN_FILES = flags.DEFINE_multi_string( diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index 4a53ba19..4d1de2b3 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -93,7 +93,7 @@ def _action(self, discount=time_step.discount, observation=new_observation) - # TODO(359): We only support combining two policies.Generalize this to + # TODO(359): We only support combining two policies. Generalize this to # handle multiple policies. def f0(): return tf.cast( From 6be71863921662e6bcc7486cc386487bb6906a68 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Tue, 1 Oct 2024 20:35:53 +0000 Subject: [PATCH 14/28] Patch to env.py and compilation_runner.py which adds working_dir to TimeStep. The patch also gives the option to keep the temporary working_dir by setting keep_temps in compilation_runner.py to a directory where all temporary working_dirs will be saved. --- compiler_opt/rl/compilation_runner.py | 17 +++++++++++---- compiler_opt/rl/env.py | 17 ++++++++++++--- compiler_opt/rl/env_test.py | 30 +++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py index a3f5c0b4..50021929 100644 --- a/compiler_opt/rl/compilation_runner.py +++ b/compiler_opt/rl/compilation_runner.py @@ -80,6 +80,18 @@ def __exit__(self, exc, value, tb): pass +def get_directory_context(): + """Return a context which manages how the temperory directories are handled. + + When the flag keep_temps is specified temporary directories are stored in + keep_temps.""" + if _KEEP_TEMPS.value is not None: + tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value) + else: + tempdir_context = tempfile.TemporaryDirectory() + return tempdir_context + + def _overwrite_trajectory_reward(sequence_example: tf.train.SequenceExample, reward: float) -> tf.train.SequenceExample: """Overwrite the reward in the trace (sequence_example) with the given one. @@ -401,10 +413,7 @@ def collect_data(self, compilation_runner.ProcessKilledException is passed through. ValueError if example under default policy and ml policy does not match. """ - if _KEEP_TEMPS.present: - tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value) - else: - tempdir_context = tempfile.TemporaryDirectory() + tempdir_context = get_directory_context() with tempdir_context as tempdir: final_cmd_line = loaded_module_spec.build_command_line(tempdir) diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py index 904fd388..de4fea0d 100644 --- a/compiler_opt/rl/env.py +++ b/compiler_opt/rl/env.py @@ -31,6 +31,7 @@ from compiler_opt.rl import corpus from compiler_opt.rl import log_reader +from compiler_opt.rl import compilation_runner class StepType(Enum): @@ -47,6 +48,7 @@ class TimeStep: score_default: Optional[dict[str, float]] context: Optional[str] module_name: str + working_dir: str obs_id: Optional[int] step_type: StepType @@ -115,10 +117,13 @@ class ClangProcess: """ def __init__(self, proc: subprocess.Popen, - get_scores_fn: Callable[[], dict[str, float]], module_name): + get_scores_fn: Callable[[], dict[str, float]], + module_name: str, + working_dir: str): self._proc = proc self._get_scores_fn = get_scores_fn self._module_name = module_name + self._working_dir = working_dir def get_scores(self, timeout: Optional[int] = None): self._proc.wait(timeout=timeout) @@ -133,10 +138,11 @@ def __init__( proc: subprocess.Popen, get_scores_fn: Callable[[], dict[str, float]], module_name: str, + working_dir: str, reader_pipe: io.BufferedReader, writer_pipe: io.BufferedWriter, ): - super().__init__(proc, get_scores_fn, module_name) + super().__init__(proc, get_scores_fn, module_name, working_dir) self._reader_pipe = reader_pipe self._writer_pipe = writer_pipe self._obs_gen = log_reader.read_log_from_file(self._reader_pipe) @@ -150,6 +156,7 @@ def __init__( score_default=None, context=None, module_name=module_name, + working_dir=working_dir, obs_id=None, step_type=StepType.LAST, ) @@ -180,6 +187,7 @@ def _get_step_type() -> StepType: score_default=None, context=obs.context, module_name=self._module_name, + working_dir=self._working_dir, obs_id=obs.observation_id, step_type=_get_step_type(), ) @@ -235,7 +243,8 @@ def clang_session( Yields: Either the constructed InteractiveClang or DefaultClang object. """ - with tempfile.TemporaryDirectory() as td: + tempdir_context = compilation_runner.get_directory_context() + with tempdir_context as td: task_working_dir = os.path.join(td, '__task_working_dir__') os.mkdir(task_working_dir) task = task_type() @@ -264,6 +273,7 @@ def _get_scores() -> dict[str, float]: proc, _get_scores, module.name, + task_working_dir, reader_pipe, writer_pipe, ) @@ -272,6 +282,7 @@ def _get_scores() -> dict[str, float]: proc, _get_scores, module.name, + task_working_dir, ) finally: diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py index 87577b3e..4d690cb2 100644 --- a/compiler_opt/rl/env_test.py +++ b/compiler_opt/rl/env_test.py @@ -19,6 +19,9 @@ import ctypes from unittest import mock import subprocess +import os +import shutil +from absl.testing import flagsaver from typing import Dict, List, Optional @@ -161,6 +164,33 @@ def test_interactive_clang_session(self, mock_popen): self.assertEqual(obs.context, f'context_{idx}') mock_popen.assert_called_once() + @mock.patch('subprocess.Popen') + def test_interactive_clang_temp_dir(self, mock_popen): + mock_popen.side_effect = mock_interactive_clang + working_dir = None + + with env.clang_session( + _CLANG_PATH, _MOCK_MODULE, MockTask, interactive=True) as clang_session: + for _ in range(_NUM_STEPS): + obs = clang_session.get_observation() + working_dir = obs.working_dir + self.assertEqual(os.path.exists(working_dir), True) + self.assertEqual(os.path.exists(working_dir), False) + + with flagsaver.flagsaver( + (env.compilation_runner._KEEP_TEMPS, '/tmp')): # pylint: disable=protected-access + with env.clang_session( + _CLANG_PATH, _MOCK_MODULE, MockTask, + interactive=True) as clang_session: + for _ in range(_NUM_STEPS): + obs = clang_session.get_observation() + working_dir = obs.working_dir + self.assertEqual(os.path.exists(working_dir), True) + self.assertEqual(os.path.exists(working_dir), True) + temp_dir_name = str.split(working_dir, '/')[2] + temp_dir_name = os.path.join('/tmp', temp_dir_name) + shutil.rmtree(temp_dir_name) + class MLGOEnvironmentTest(tf.test.TestCase): From 6342ddaa9a44c4f3cb8411883b5d3a0d20591a55 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Tue, 1 Oct 2024 21:55:25 +0000 Subject: [PATCH 15/28] Fixed comments. --- compiler_opt/rl/compilation_runner.py | 6 +++--- compiler_opt/rl/env.py | 6 ++---- compiler_opt/rl/env_test.py | 24 +++++++++++------------- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py index 50021929..452bab55 100644 --- a/compiler_opt/rl/compilation_runner.py +++ b/compiler_opt/rl/compilation_runner.py @@ -80,9 +80,9 @@ def __exit__(self, exc, value, tb): pass -def get_directory_context(): +def get_workdir_context(): """Return a context which manages how the temperory directories are handled. - + When the flag keep_temps is specified temporary directories are stored in keep_temps.""" if _KEEP_TEMPS.value is not None: @@ -413,7 +413,7 @@ def collect_data(self, compilation_runner.ProcessKilledException is passed through. ValueError if example under default policy and ml policy does not match. """ - tempdir_context = get_directory_context() + tempdir_context = get_workdir_context() with tempdir_context as tempdir: final_cmd_line = loaded_module_spec.build_command_line(tempdir) diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py index de4fea0d..0b40f1b9 100644 --- a/compiler_opt/rl/env.py +++ b/compiler_opt/rl/env.py @@ -24,7 +24,6 @@ import contextlib import io import os -import tempfile from typing import Callable, Generator, List, Optional, Tuple, Type import numpy as np @@ -117,8 +116,7 @@ class ClangProcess: """ def __init__(self, proc: subprocess.Popen, - get_scores_fn: Callable[[], dict[str, float]], - module_name: str, + get_scores_fn: Callable[[], dict[str, float]], module_name: str, working_dir: str): self._proc = proc self._get_scores_fn = get_scores_fn @@ -243,7 +241,7 @@ def clang_session( Yields: Either the constructed InteractiveClang or DefaultClang object. """ - tempdir_context = compilation_runner.get_directory_context() + tempdir_context = compilation_runner.get_workdir_context() with tempdir_context as td: task_working_dir = os.path.join(td, '__task_working_dir__') os.mkdir(task_working_dir) diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py index 4d690cb2..79b36598 100644 --- a/compiler_opt/rl/env_test.py +++ b/compiler_opt/rl/env_test.py @@ -21,6 +21,7 @@ import subprocess import os import shutil +import tempfile from absl.testing import flagsaver from typing import Dict, List, Optional @@ -177,19 +178,16 @@ def test_interactive_clang_temp_dir(self, mock_popen): self.assertEqual(os.path.exists(working_dir), True) self.assertEqual(os.path.exists(working_dir), False) - with flagsaver.flagsaver( - (env.compilation_runner._KEEP_TEMPS, '/tmp')): # pylint: disable=protected-access - with env.clang_session( - _CLANG_PATH, _MOCK_MODULE, MockTask, - interactive=True) as clang_session: - for _ in range(_NUM_STEPS): - obs = clang_session.get_observation() - working_dir = obs.working_dir - self.assertEqual(os.path.exists(working_dir), True) - self.assertEqual(os.path.exists(working_dir), True) - temp_dir_name = str.split(working_dir, '/')[2] - temp_dir_name = os.path.join('/tmp', temp_dir_name) - shutil.rmtree(temp_dir_name) + with tempfile.TemporaryDirectory() as td: + with flagsaver.flagsaver((env.compilation_runner._KEEP_TEMPS, td)): # pylint: disable=protected-access + with env.clang_session( + _CLANG_PATH, _MOCK_MODULE, MockTask, + interactive=True) as clang_session: + for _ in range(_NUM_STEPS): + obs = clang_session.get_observation() + working_dir = obs.working_dir + self.assertEqual(os.path.exists(working_dir), True) + self.assertEqual(os.path.exists(working_dir), True) class MLGOEnvironmentTest(tf.test.TestCase): From 3082ae712cd47c97475b6433a57854d53da35a8f Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Tue, 1 Oct 2024 22:00:34 +0000 Subject: [PATCH 16/28] Fixed pylint. --- compiler_opt/rl/compilation_runner.py | 2 +- compiler_opt/rl/env_test.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py index 452bab55..e60f6112 100644 --- a/compiler_opt/rl/compilation_runner.py +++ b/compiler_opt/rl/compilation_runner.py @@ -88,7 +88,7 @@ def get_workdir_context(): if _KEEP_TEMPS.value is not None: tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value) else: - tempdir_context = tempfile.TemporaryDirectory() + tempdir_context = tempfile.TemporaryDirectory() # pylint: disable=consider-using-with return tempdir_context diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py index 79b36598..f6d3c63b 100644 --- a/compiler_opt/rl/env_test.py +++ b/compiler_opt/rl/env_test.py @@ -20,7 +20,6 @@ from unittest import mock import subprocess import os -import shutil import tempfile from absl.testing import flagsaver From 2e262438bb2393660aa87d28c3c6a525904b248b Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Wed, 2 Oct 2024 00:09:25 +0000 Subject: [PATCH 17/28] Fixed a nit --- compiler_opt/rl/compilation_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py index e60f6112..26231244 100644 --- a/compiler_opt/rl/compilation_runner.py +++ b/compiler_opt/rl/compilation_runner.py @@ -84,7 +84,8 @@ def get_workdir_context(): """Return a context which manages how the temperory directories are handled. When the flag keep_temps is specified temporary directories are stored in - keep_temps.""" + keep_temps. + """ if _KEEP_TEMPS.value is not None: tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value) else: From 56fa72a6e9e3b28fdc3f08a3dddf4efed3b315d5 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Wed, 2 Oct 2024 15:23:51 +0000 Subject: [PATCH 18/28] Added interactive only mode for env.py which compiles only using iclang instead of both clang and iclang. --- compiler_opt/rl/env.py | 15 +++++++++++---- compiler_opt/rl/env_test.py | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py index 0b40f1b9..a0f9e5ea 100644 --- a/compiler_opt/rl/env.py +++ b/compiler_opt/rl/env.py @@ -290,6 +290,7 @@ def _get_scores() -> dict[str, float]: def _get_clang_generator( clang_path: str, task_type: Type[MLGOTask], + interactive_only: bool = False, ) -> Generator[Optional[Tuple[ClangProcess, InteractiveClang]], Optional[corpus.LoadedModuleSpec], None]: """Returns a generator for creating InteractiveClang objects. @@ -299,6 +300,7 @@ def _get_clang_generator( Args: clang_path: Path to the clang binary to use within InteractiveClang. task_type: Type of the MLGO task to use. + interactive_only: If set to true only iclang is yielded Returns: The generator for InteractiveClang objects. @@ -311,9 +313,12 @@ def _get_clang_generator( module = yield with clang_session( clang_path, module, task_type, interactive=True) as iclang: - with clang_session( - clang_path, module, task_type, interactive=False) as clang: - yield iclang, clang + if interactive_only: + yield iclang, iclang + else: + with clang_session( + clang_path, module, task_type, interactive=False) as clang: + yield iclang, clang class MLGOEnvironmentBase: @@ -332,8 +337,10 @@ def __init__( task_type: Type[MLGOTask], obs_spec, action_spec, + interactive_only: bool = False, ): - self._clang_generator = _get_clang_generator(clang_path, task_type) + self._clang_generator = _get_clang_generator( + clang_path, task_type, interactive_only=interactive_only) self._obs_spec = obs_spec self._action_spec = action_spec diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py index f6d3c63b..63f66fe6 100644 --- a/compiler_opt/rl/env_test.py +++ b/compiler_opt/rl/env_test.py @@ -215,6 +215,32 @@ def test_env(self, mock_popen): step = test_env.step(np.array([1], dtype=np.int64)) self.assertEqual(step.step_type, env.StepType.LAST) + @mock.patch('subprocess.Popen') + def test_env_interactive_only(self, mock_popen): + mock_popen.side_effect = mock_interactive_clang + + test_env = env.MLGOEnvironmentBase( + clang_path=_CLANG_PATH, + task_type=MockTask, + obs_spec={}, + action_spec={}, + interactive_only=True, + ) + + for env_itr in range(3): + del env_itr + step = test_env.reset(_MOCK_MODULE) + self.assertEqual(step.step_type, env.StepType.FIRST) + + for step_itr in range(_NUM_STEPS - 1): + del step_itr + step = test_env.step(np.array([1], dtype=np.int64)) + self.assertEqual(step.step_type, env.StepType.MID) + + step = test_env.step(np.array([1], dtype=np.int64)) + self.assertEqual(step.step_type, env.StepType.LAST) + self.assertEqual(step.reward, {'default': 0.}) + if __name__ == '__main__': tf.test.main() From 5568aafba8908a4b3f18c1897cfad57558756e45 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Wed, 2 Oct 2024 19:09:00 +0000 Subject: [PATCH 19/28] Improved _get_clang_generator documentation in env.py. Changed the unit test for MLGOEnvironment to check if the clang sessions are equal and respect the interactive_only variable. --- compiler_opt/rl/env.py | 12 +++++++----- compiler_opt/rl/env_test.py | 3 ++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py index a0f9e5ea..ea501f7c 100644 --- a/compiler_opt/rl/env.py +++ b/compiler_opt/rl/env.py @@ -293,17 +293,19 @@ def _get_clang_generator( interactive_only: bool = False, ) -> Generator[Optional[Tuple[ClangProcess, InteractiveClang]], Optional[corpus.LoadedModuleSpec], None]: - """Returns a generator for creating InteractiveClang objects. - - TODO: fix this docstring + """Returns a tuple of generators for creating InteractiveClang objects. Args: clang_path: Path to the clang binary to use within InteractiveClang. task_type: Type of the MLGO task to use. - interactive_only: If set to true only iclang is yielded + interactive_only: If set to true the returned tuple of generators is + iclang, iclang instead of iclang, clang Returns: - The generator for InteractiveClang objects. + A tuple of generators created with clang_session. First argument of + the tuple is always an interactive clang session. The second argumnet + is a default clang session if interactive_only is False and otherwise + the exact same interactive clang session object as the first argument. """ while True: # The following line should be type-hinted as follows: diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py index 63f66fe6..67a951df 100644 --- a/compiler_opt/rl/env_test.py +++ b/compiler_opt/rl/env_test.py @@ -214,6 +214,7 @@ def test_env(self, mock_popen): step = test_env.step(np.array([1], dtype=np.int64)) self.assertEqual(step.step_type, env.StepType.LAST) + self.assertNotEqual(test_env._iclang, test_env._clang) # pylint: disable=protected-access @mock.patch('subprocess.Popen') def test_env_interactive_only(self, mock_popen): @@ -239,7 +240,7 @@ def test_env_interactive_only(self, mock_popen): step = test_env.step(np.array([1], dtype=np.int64)) self.assertEqual(step.step_type, env.StepType.LAST) - self.assertEqual(step.reward, {'default': 0.}) + self.assertEqual(test_env._iclang, test_env._clang) # pylint: disable=protected-access if __name__ == '__main__': From ac307fc20215257f3850766fd95297e0eeeda167 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Wed, 2 Oct 2024 19:24:36 +0000 Subject: [PATCH 20/28] Address a nit. --- compiler_opt/rl/env.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py index ea501f7c..d3632b34 100644 --- a/compiler_opt/rl/env.py +++ b/compiler_opt/rl/env.py @@ -302,10 +302,11 @@ def _get_clang_generator( iclang, iclang instead of iclang, clang Returns: - A tuple of generators created with clang_session. First argument of - the tuple is always an interactive clang session. The second argumnet - is a default clang session if interactive_only is False and otherwise - the exact same interactive clang session object as the first argument. + A generator of tuples. Each element of the tuple is created with + clang_session. First argument of the tuple is always an interactive + clang session. The second argumnet is a default clang session if + interactive_only is False and otherwise the exact same interactive + clang session object as the first argument. """ while True: # The following line should be type-hinted as follows: From 07a77ceedc7e1f1c871f145ec7711130ee59aefb Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Wed, 2 Oct 2024 19:28:38 +0000 Subject: [PATCH 21/28] Fixed pylint. --- compiler_opt/rl/env.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py index d3632b34..6f9ee41a 100644 --- a/compiler_opt/rl/env.py +++ b/compiler_opt/rl/env.py @@ -302,9 +302,9 @@ def _get_clang_generator( iclang, iclang instead of iclang, clang Returns: - A generator of tuples. Each element of the tuple is created with - clang_session. First argument of the tuple is always an interactive - clang session. The second argumnet is a default clang session if + A generator of tuples. Each element of the tuple is created with + clang_session. First argument of the tuple is always an interactive + clang session. The second argumnet is a default clang session if interactive_only is False and otherwise the exact same interactive clang session object as the first argument. """ From 49af23a6f3027a1d94f5e06ae97f3cd0ff04a2ea Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Thu, 3 Oct 2024 23:06:36 +0000 Subject: [PATCH 22/28] Class which defi --- compiler_opt/rl/generate_bc_trajectories.py | 93 +++++++++++ .../rl/generate_bc_trajectories_test.py | 155 ++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 compiler_opt/rl/generate_bc_trajectories.py create mode 100644 compiler_opt/rl/generate_bc_trajectories_test.py diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py new file mode 100644 index 00000000..cf749000 --- /dev/null +++ b/compiler_opt/rl/generate_bc_trajectories.py @@ -0,0 +1,93 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module for running compilation and collect data for behavior cloning.""" + +from typing import Callable, Dict, List + +import numpy as np +import tensorflow as tf +import tf_agents +import tf_agents.policies +from tf_agents.trajectories import policy_step + + +class ExplorationWithPolicy: + """Policy which selects states for exploration. + + Exploration is fascilitated in the following way. First the policy plays + all actions from the replay_prefix. At the following state the policy computes + a gap which is difference between the most likely action and the second most + likely action according to the randomized exploration policy (distr). + If the current gap is smaller than previously maintained gap, the gap is + updated and the exploration state is set to the current state. + The trajectory is completed by following following the policy from the + constructor. + + Attributes: + replay_prefix: a replay buffer of actions + policy: policy to follow after exhausting the replay buffer + explore_policy: randomized policy which is used to compute the gap + curr_step: current step of the trajectory + explore_step: current candidate for exploration step + gap: current difference at explore step between probability of most likely + action according to explore_policy and second most likely action + explore_on_features: dict of feature names and functions which specify + when to explore on the respective feature + """ + + def __init__( + self, + replay_prefix: List[int], + policy: Callable[[tf_agents.trajectories.TimeStep], np.ndarray], + explore_policy: Callable[[tf_agents.trajectories.TimeStep], + policy_step.PolicyStep], + explore_on_features: Dict[str, Callable[[tf.Tensor], bool]] | None = None, + ): + self.replay_prefix = replay_prefix + self.policy = policy + self.explore_policy = explore_policy + self.curr_step = 0 + self.explore_step = 0 + self.gap = np.inf + self.explore_on_features = explore_on_features + self._stop_exploration = False + + def advice(self, state: tf_agents.trajectories.TimeStep) -> np.ndarray: + """Action function for the policy. + + Args: + state: current state in the trajectory + + Returns: + policy_deca: action to take at the current state. + + """ + if self.curr_step < len(self.replay_prefix): + self.curr_step += 1 + return np.array(self.replay_prefix[self.curr_step - 1]) + policy_deca = self.policy(state) + distr = tf.nn.softmax(self.explore_policy(state).action.logits).numpy()[0] + if not self._stop_exploration and distr.shape[0] > 1 and self.gap > np.abs( + distr[0] - distr[1]): + self.gap = np.abs(distr[0] - distr[1]) + self.explore_step = self.curr_step + if not self._stop_exploration and self.explore_on_features is not None: + for feature_name, explore_on_feature in self.explore_on_features.items(): + if explore_on_feature(state.observation[feature_name]): + self.explore_step = self.curr_step + self._stop_exploration = True + break + self.curr_step += 1 + return policy_deca diff --git a/compiler_opt/rl/generate_bc_trajectories_test.py b/compiler_opt/rl/generate_bc_trajectories_test.py new file mode 100644 index 00000000..8f5ce3f8 --- /dev/null +++ b/compiler_opt/rl/generate_bc_trajectories_test.py @@ -0,0 +1,155 @@ +# coding=utf-8 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for compiler_opt.rl.generate_bc_trajectories.""" + +from typing import List + +import numpy as np +import tensorflow as tf +import tensorflow_probability as tfp +import tf_agents +from tf_agents.trajectories import policy_step + +from compiler_opt.rl import generate_bc_trajectories + +_eps = 1e-5 + + +def _get_state_list() -> List[tf_agents.trajectories.TimeStep]: + + state_0 = tf_agents.trajectories.TimeStep( + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'feature_1': tf.constant(np.array([0]), dtype=tf.int64), + 'feature_2': tf.constant(np.array([50]), dtype=tf.int64), + 'feature_3': tf.constant(np.array([0]), dtype=tf.int64), + }, + reward=tf.constant(np.array([0]), dtype=tf.float32), + step_type=tf.constant(np.array([0]), dtype=tf.int32)) + state_1 = tf_agents.trajectories.TimeStep( + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'feature_1': tf.constant(np.array([1]), dtype=tf.int64), + 'feature_2': tf.constant(np.array([25]), dtype=tf.int64), + 'feature_3': tf.constant(np.array([0]), dtype=tf.int64), + }, + reward=tf.constant(np.array([0]), dtype=tf.float32), + step_type=tf.constant(np.array([0]), dtype=tf.int32)) + state_2 = tf_agents.trajectories.TimeStep( + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'feature_1': tf.constant(np.array([0]), dtype=tf.int64), + 'feature_2': tf.constant(np.array([25]), dtype=tf.int64), + 'feature_3': tf.constant(np.array([1]), dtype=tf.int64), + }, + reward=tf.constant(np.array([0]), dtype=tf.float32), + step_type=tf.constant(np.array([0]), dtype=tf.int32)) + state_3 = tf_agents.trajectories.TimeStep( + discount=tf.constant(np.array([0.]), dtype=tf.float32), + observation={ + 'feature_1': tf.constant(np.array([0]), dtype=tf.int64), + 'feature_2': tf.constant(np.array([25]), dtype=tf.int64), + 'feature_3': tf.constant(np.array([0]), dtype=tf.int64), + }, + reward=tf.constant(np.array([0]), dtype=tf.float32), + step_type=tf.constant(np.array([0]), dtype=tf.int32)) + + return [state_0, state_1, state_2, state_3] + + +def _policy(state: tf_agents.trajectories.TimeStep) -> np.ndarray: + feature_sum = np.array([0]) + for feature in state.observation.values(): + feature_sum += feature.numpy() + return np.mod(feature_sum, 5) + + +def _explore_policy( + state: tf_agents.trajectories.TimeStep) -> policy_step.PolicyStep: + probs = [ + 0.5 * float(state.observation['feature_3'].numpy()), + 1 - 0.5 * float(state.observation['feature_3'].numpy()) + ] + logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1] + _eps))]] + return policy_step.PolicyStep( + action=tfp.distributions.Categorical(logits=logits)) + + +class ExplorationWithPolicyTest(tf.test.TestCase): + + def test_explore_policy(self): + prob = 1. + state = _get_state_list()[3] + logits = [[0.0, tf.math.log(prob / (1.0 - prob + _eps))]] + action = tfp.distributions.Categorical(logits=logits) + self.assertAllClose(action.logits, _explore_policy(state).action.logits) + + def test_explore_with_gap(self): + explore_with_policy = generate_bc_trajectories.ExplorationWithPolicy( + replay_prefix=[np.array([1])], + policy=_policy, + explore_policy=_explore_policy, + ) + for state in _get_state_list(): + _ = explore_with_policy.advice(state)[0] + + self.assertAllClose(0, explore_with_policy.gap, atol=2 * _eps) + self.assertEqual(2, explore_with_policy.explore_step) + + explore_with_policy = generate_bc_trajectories.ExplorationWithPolicy( + replay_prefix=[np.array([1]), + np.array([1]), + np.array([1])], + policy=_policy, + explore_policy=_explore_policy, + ) + for state in _get_state_list(): + _ = explore_with_policy.advice(state)[0] + + self.assertAllClose(1, explore_with_policy.gap, atol=2 * _eps) + self.assertEqual(3, explore_with_policy.explore_step) + + def test_explore_with_feature(self): + + def explore_on_feature_1_val(feature_val): + return feature_val.numpy()[0] > 0 + + def explore_on_feature_2_val(feature_val): + return feature_val.numpy()[0] > 25 + + explore_on_features = { + 'feature_1': explore_on_feature_1_val, + 'feature_2': explore_on_feature_2_val + } + + explore_with_policy = generate_bc_trajectories.ExplorationWithPolicy( + replay_prefix=[], + policy=_policy, + explore_policy=_explore_policy, + explore_on_features=explore_on_features) + for state in _get_state_list(): + _ = explore_with_policy.advice(state)[0] + self.assertEqual(0, explore_with_policy.explore_step) + + explore_with_policy = generate_bc_trajectories.ExplorationWithPolicy( + replay_prefix=[np.array([1])], + policy=_policy, + explore_policy=_explore_policy, + explore_on_features=explore_on_features, + ) + + for state in _get_state_list(): + _ = explore_with_policy.advice(state)[0] + self.assertEqual(1, explore_with_policy.explore_step) From b7e6fb2587a7ebb48453f55735b35a910026c2ad Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Thu, 3 Oct 2024 23:21:38 +0000 Subject: [PATCH 23/28] Fix an Optional. --- compiler_opt/rl/generate_bc_trajectories.py | 4 ++-- compiler_opt/rl/generate_bc_trajectories_test.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py index cf749000..627d7340 100644 --- a/compiler_opt/rl/generate_bc_trajectories.py +++ b/compiler_opt/rl/generate_bc_trajectories.py @@ -14,7 +14,7 @@ # limitations under the License. """Module for running compilation and collect data for behavior cloning.""" -from typing import Callable, Dict, List +from typing import Callable, Dict, List, Optional import numpy as np import tensorflow as tf @@ -53,7 +53,7 @@ def __init__( policy: Callable[[tf_agents.trajectories.TimeStep], np.ndarray], explore_policy: Callable[[tf_agents.trajectories.TimeStep], policy_step.PolicyStep], - explore_on_features: Dict[str, Callable[[tf.Tensor], bool]] | None = None, + explore_on_features: Optional[Dict[str, Callable[[tf.Tensor], bool]]] = None, ): self.replay_prefix = replay_prefix self.policy = policy diff --git a/compiler_opt/rl/generate_bc_trajectories_test.py b/compiler_opt/rl/generate_bc_trajectories_test.py index 8f5ce3f8..a14babee 100644 --- a/compiler_opt/rl/generate_bc_trajectories_test.py +++ b/compiler_opt/rl/generate_bc_trajectories_test.py @@ -24,6 +24,7 @@ from compiler_opt.rl import generate_bc_trajectories + _eps = 1e-5 From 958f3b6326c2fc5c359184a66cc844e76ca6ea7b Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Thu, 3 Oct 2024 23:31:20 +0000 Subject: [PATCH 24/28] yapf -ir . --- compiler_opt/rl/generate_bc_trajectories.py | 3 ++- compiler_opt/rl/generate_bc_trajectories_test.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py index 627d7340..d9196de1 100644 --- a/compiler_opt/rl/generate_bc_trajectories.py +++ b/compiler_opt/rl/generate_bc_trajectories.py @@ -53,7 +53,8 @@ def __init__( policy: Callable[[tf_agents.trajectories.TimeStep], np.ndarray], explore_policy: Callable[[tf_agents.trajectories.TimeStep], policy_step.PolicyStep], - explore_on_features: Optional[Dict[str, Callable[[tf.Tensor], bool]]] = None, + explore_on_features: Optional[Dict[str, Callable[[tf.Tensor], + bool]]] = None, ): self.replay_prefix = replay_prefix self.policy = policy diff --git a/compiler_opt/rl/generate_bc_trajectories_test.py b/compiler_opt/rl/generate_bc_trajectories_test.py index a14babee..8f5ce3f8 100644 --- a/compiler_opt/rl/generate_bc_trajectories_test.py +++ b/compiler_opt/rl/generate_bc_trajectories_test.py @@ -24,7 +24,6 @@ from compiler_opt.rl import generate_bc_trajectories - _eps = 1e-5 From dad551cafee31dca08a1a8511bd19eb536a0c168 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Thu, 3 Oct 2024 23:51:21 +0000 Subject: [PATCH 25/28] Trying to fix TimeStep problem. --- compiler_opt/rl/generate_bc_trajectories.py | 10 ++++------ .../rl/generate_bc_trajectories_test.py | 17 ++++++++--------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py index d9196de1..c96f607e 100644 --- a/compiler_opt/rl/generate_bc_trajectories.py +++ b/compiler_opt/rl/generate_bc_trajectories.py @@ -18,9 +18,8 @@ import numpy as np import tensorflow as tf -import tf_agents -import tf_agents.policies from tf_agents.trajectories import policy_step +from tf_agents.trajectories import time_step class ExplorationWithPolicy: @@ -50,9 +49,8 @@ class ExplorationWithPolicy: def __init__( self, replay_prefix: List[int], - policy: Callable[[tf_agents.trajectories.TimeStep], np.ndarray], - explore_policy: Callable[[tf_agents.trajectories.TimeStep], - policy_step.PolicyStep], + policy: Callable[[time_step.TimeStep], np.ndarray], + explore_policy: Callable[[time_step.TimeStep], policy_step.PolicyStep], explore_on_features: Optional[Dict[str, Callable[[tf.Tensor], bool]]] = None, ): @@ -65,7 +63,7 @@ def __init__( self.explore_on_features = explore_on_features self._stop_exploration = False - def advice(self, state: tf_agents.trajectories.TimeStep) -> np.ndarray: + def advice(self, state: time_step.TimeStep) -> np.ndarray: """Action function for the policy. Args: diff --git a/compiler_opt/rl/generate_bc_trajectories_test.py b/compiler_opt/rl/generate_bc_trajectories_test.py index 8f5ce3f8..a6347e34 100644 --- a/compiler_opt/rl/generate_bc_trajectories_test.py +++ b/compiler_opt/rl/generate_bc_trajectories_test.py @@ -19,17 +19,17 @@ import numpy as np import tensorflow as tf import tensorflow_probability as tfp -import tf_agents from tf_agents.trajectories import policy_step +from tf_agents.trajectories import time_step from compiler_opt.rl import generate_bc_trajectories _eps = 1e-5 -def _get_state_list() -> List[tf_agents.trajectories.TimeStep]: +def _get_state_list() -> List[time_step.TimeStep]: - state_0 = tf_agents.trajectories.TimeStep( + state_0 = time_step.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ 'feature_1': tf.constant(np.array([0]), dtype=tf.int64), @@ -38,7 +38,7 @@ def _get_state_list() -> List[tf_agents.trajectories.TimeStep]: }, reward=tf.constant(np.array([0]), dtype=tf.float32), step_type=tf.constant(np.array([0]), dtype=tf.int32)) - state_1 = tf_agents.trajectories.TimeStep( + state_1 = time_step.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ 'feature_1': tf.constant(np.array([1]), dtype=tf.int64), @@ -47,7 +47,7 @@ def _get_state_list() -> List[tf_agents.trajectories.TimeStep]: }, reward=tf.constant(np.array([0]), dtype=tf.float32), step_type=tf.constant(np.array([0]), dtype=tf.int32)) - state_2 = tf_agents.trajectories.TimeStep( + state_2 = time_step.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ 'feature_1': tf.constant(np.array([0]), dtype=tf.int64), @@ -56,7 +56,7 @@ def _get_state_list() -> List[tf_agents.trajectories.TimeStep]: }, reward=tf.constant(np.array([0]), dtype=tf.float32), step_type=tf.constant(np.array([0]), dtype=tf.int32)) - state_3 = tf_agents.trajectories.TimeStep( + state_3 = time_step.TimeStep( discount=tf.constant(np.array([0.]), dtype=tf.float32), observation={ 'feature_1': tf.constant(np.array([0]), dtype=tf.int64), @@ -69,15 +69,14 @@ def _get_state_list() -> List[tf_agents.trajectories.TimeStep]: return [state_0, state_1, state_2, state_3] -def _policy(state: tf_agents.trajectories.TimeStep) -> np.ndarray: +def _policy(state: time_step.TimeStep) -> np.ndarray: feature_sum = np.array([0]) for feature in state.observation.values(): feature_sum += feature.numpy() return np.mod(feature_sum, 5) -def _explore_policy( - state: tf_agents.trajectories.TimeStep) -> policy_step.PolicyStep: +def _explore_policy(state: time_step.TimeStep) -> policy_step.PolicyStep: probs = [ 0.5 * float(state.observation['feature_3'].numpy()), 1 - 0.5 * float(state.observation['feature_3'].numpy()) From 2504a95d8e7bcee8791210c6323ef928da9b931c Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Fri, 4 Oct 2024 00:03:09 +0000 Subject: [PATCH 26/28] Fix typecheck problem --- compiler_opt/rl/generate_bc_trajectories.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py index c96f607e..43985a34 100644 --- a/compiler_opt/rl/generate_bc_trajectories.py +++ b/compiler_opt/rl/generate_bc_trajectories.py @@ -48,7 +48,7 @@ class ExplorationWithPolicy: def __init__( self, - replay_prefix: List[int], + replay_prefix: List[np.ndarray], policy: Callable[[time_step.TimeStep], np.ndarray], explore_policy: Callable[[time_step.TimeStep], policy_step.PolicyStep], explore_on_features: Optional[Dict[str, Callable[[tf.Tensor], From 9a19f99a85c45b0d2ede004333436e3d0b54918c Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Mon, 7 Oct 2024 23:50:06 +0000 Subject: [PATCH 27/28] Addressing mtrofin comments. --- compiler_opt/rl/generate_bc_trajectories.py | 23 ++++++++++++------- .../rl/generate_bc_trajectories_test.py | 8 +++---- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py index 43985a34..85905f3b 100644 --- a/compiler_opt/rl/generate_bc_trajectories.py +++ b/compiler_opt/rl/generate_bc_trajectories.py @@ -25,7 +25,7 @@ class ExplorationWithPolicy: """Policy which selects states for exploration. - Exploration is fascilitated in the following way. First the policy plays + Exploration is facilitated in the following way. First the policy plays all actions from the replay_prefix. At the following state the policy computes a gap which is difference between the most likely action and the second most likely action according to the randomized exploration policy (distr). @@ -63,24 +63,31 @@ def __init__( self.explore_on_features = explore_on_features self._stop_exploration = False - def advice(self, state: time_step.TimeStep) -> np.ndarray: + def _compute_gap(self, distr: np.ndarray) -> np.float32: + if distr.shape[0] < 2: + return np.inf + sorted_distr = np.sort(distr) + return sorted_distr[-1] - sorted_distr[-2] + + def get_advice(self, state: time_step.TimeStep) -> np.ndarray: """Action function for the policy. Args: state: current state in the trajectory Returns: - policy_deca: action to take at the current state. + policy_action: action to take at the current state. """ if self.curr_step < len(self.replay_prefix): self.curr_step += 1 return np.array(self.replay_prefix[self.curr_step - 1]) - policy_deca = self.policy(state) + policy_action = self.policy(state) distr = tf.nn.softmax(self.explore_policy(state).action.logits).numpy()[0] - if not self._stop_exploration and distr.shape[0] > 1 and self.gap > np.abs( - distr[0] - distr[1]): - self.gap = np.abs(distr[0] - distr[1]) + curr_gap = self._compute_gap(distr) + if (not self._stop_exploration and distr.shape[0] > 1 and + self.gap > curr_gap): + self.gap = curr_gap self.explore_step = self.curr_step if not self._stop_exploration and self.explore_on_features is not None: for feature_name, explore_on_feature in self.explore_on_features.items(): @@ -89,4 +96,4 @@ def advice(self, state: time_step.TimeStep) -> np.ndarray: self._stop_exploration = True break self.curr_step += 1 - return policy_deca + return policy_action diff --git a/compiler_opt/rl/generate_bc_trajectories_test.py b/compiler_opt/rl/generate_bc_trajectories_test.py index a6347e34..a4ae9b6c 100644 --- a/compiler_opt/rl/generate_bc_trajectories_test.py +++ b/compiler_opt/rl/generate_bc_trajectories_test.py @@ -102,7 +102,7 @@ def test_explore_with_gap(self): explore_policy=_explore_policy, ) for state in _get_state_list(): - _ = explore_with_policy.advice(state)[0] + _ = explore_with_policy.get_advice(state)[0] self.assertAllClose(0, explore_with_policy.gap, atol=2 * _eps) self.assertEqual(2, explore_with_policy.explore_step) @@ -115,7 +115,7 @@ def test_explore_with_gap(self): explore_policy=_explore_policy, ) for state in _get_state_list(): - _ = explore_with_policy.advice(state)[0] + _ = explore_with_policy.get_advice(state)[0] self.assertAllClose(1, explore_with_policy.gap, atol=2 * _eps) self.assertEqual(3, explore_with_policy.explore_step) @@ -139,7 +139,7 @@ def explore_on_feature_2_val(feature_val): explore_policy=_explore_policy, explore_on_features=explore_on_features) for state in _get_state_list(): - _ = explore_with_policy.advice(state)[0] + _ = explore_with_policy.get_advice(state)[0] self.assertEqual(0, explore_with_policy.explore_step) explore_with_policy = generate_bc_trajectories.ExplorationWithPolicy( @@ -150,5 +150,5 @@ def explore_on_feature_2_val(feature_val): ) for state in _get_state_list(): - _ = explore_with_policy.advice(state)[0] + _ = explore_with_policy.get_advice(state)[0] self.assertEqual(1, explore_with_policy.explore_step) From 3fd273c8d42cb3dfbd36db1a0e23744dc54355c1 Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Tue, 8 Oct 2024 18:03:31 +0000 Subject: [PATCH 28/28] Addressing mtrofin comments. --- compiler_opt/rl/generate_bc_trajectories.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py index 85905f3b..79d65f2d 100644 --- a/compiler_opt/rl/generate_bc_trajectories.py +++ b/compiler_opt/rl/generate_bc_trajectories.py @@ -83,8 +83,13 @@ def get_advice(self, state: time_step.TimeStep) -> np.ndarray: self.curr_step += 1 return np.array(self.replay_prefix[self.curr_step - 1]) policy_action = self.policy(state) + # explore_policy(state) should play at least one action per state and so + # self.explore_policy(state).action.logits should have at least one entry distr = tf.nn.softmax(self.explore_policy(state).action.logits).numpy()[0] curr_gap = self._compute_gap(distr) + # selecting explore_step is done based on smallest encountered gap in the + # play of self.policy. This logic can be changed to have different type + # of exploration. if (not self._stop_exploration and distr.shape[0] > 1 and self.gap > curr_gap): self.gap = curr_gap