From f179ce1bc81ff1fb62fbffa1892bbea59d9c2726 Mon Sep 17 00:00:00 2001 From: tvmarino Date: Mon, 23 Dec 2024 07:19:21 -0500 Subject: [PATCH] [BC] ```keep_temps``` update (#411) * Rename ```base_path``` to ```persistent_objects_path``` and ```keep_temps``` to ```explicit_temps_dir```. Makes sure that if ```persistent_objects_path``` is set then ```explicit_temps_dir``` is also set or neither of them is set. Further, passes ```explicit_temps_dir``` to ```compilation_runner.get_workdir_context``` and ensures that the flag and argument are not set at the same time. * Addressing @mtrofin and @boomanaiden154 comments. * Set explicit_temps_dir to persistent_objects_path+'/temp_dirs' whenever explicit_temps_dir is not set and persistent_objects_path is set. --- compiler_opt/rl/compilation_runner.py | 25 +++++++--- compiler_opt/rl/env.py | 27 +++++++++-- compiler_opt/rl/env_test.py | 3 +- .../generate_bc_trajectories_lib.py | 46 ++++++++++++++----- .../gin_configs/imitation_learning.gin | 2 - 5 files changed, 77 insertions(+), 26 deletions(-) diff --git a/compiler_opt/rl/compilation_runner.py b/compiler_opt/rl/compilation_runner.py index 2381115a..67d26bd3 100644 --- a/compiler_opt/rl/compilation_runner.py +++ b/compiler_opt/rl/compilation_runner.py @@ -39,8 +39,8 @@ 'Max duration (in seconds) after which we cancel any compilation job.') _QUIET = flags.DEFINE_bool( 'quiet', True, 'Whether or not to compile quietly (hiding info logging)') -_KEEP_TEMPS = flags.DEFINE_string( - 'keep_temps', None, +_EXPLICIT_TEMPS_DIR = flags.DEFINE_string( + 'explicit_temps_dir', None, 'Put temporary files into given directory and keep them past exit.') @@ -80,14 +80,25 @@ def __exit__(self, exc, value, tb): pass -def get_workdir_context(): +def get_workdir_context(explicit_temps_dir: Optional[str] = None): """Return a context which manages how the temperory directories are handled. - When the flag keep_temps is specified temporary directories are stored in - keep_temps. + When the flag explicit_temps_dir is specified temporary directories are + stored in explicit_temps_dir. + + Args: + explicit_temps_dir: Put temporary files into given directory and keep them + past exit when compilining """ - if _KEEP_TEMPS.value is not None: - tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value) + if explicit_temps_dir and _EXPLICIT_TEMPS_DIR.value: + raise ValueError('Only one of flag' + 'explicit_temps_dir={_EXPLICIT_TEMPS_DIR.value}' + 'and arg explicit_temps_dir={explicit_temps_dir}' + 'should be specified.') + if _EXPLICIT_TEMPS_DIR.value is not None: + tempdir_context = NonTemporaryDirectory(dir=_EXPLICIT_TEMPS_DIR.value) + elif explicit_temps_dir: + tempdir_context = NonTemporaryDirectory(dir=explicit_temps_dir) else: tempdir_context = tempfile.TemporaryDirectory() # pylint: disable=consider-using-with return tempdir_context diff --git a/compiler_opt/rl/env.py b/compiler_opt/rl/env.py index 6f9ee41a..383ed124 100644 --- a/compiler_opt/rl/env.py +++ b/compiler_opt/rl/env.py @@ -225,6 +225,7 @@ def clang_session( module: corpus.LoadedModuleSpec, task_type: Type[MLGOTask], *, + explicit_temps_dir: Optional[str] = None, interactive: bool, ): """Context manager for clang session. @@ -236,12 +237,15 @@ def clang_session( clang_path: The clang binary to use for the InteractiveClang session. module: The module to compile with clang. task_type: Type of the MLGOTask to use. + explicit_temps_dir: Put temporary files into given directory and keep them + past exit when compilining interactive: Whether to use an interactive or default clang instance Yields: Either the constructed InteractiveClang or DefaultClang object. """ - tempdir_context = compilation_runner.get_workdir_context() + tempdir_context = compilation_runner.get_workdir_context( + explicit_temps_dir=explicit_temps_dir) with tempdir_context as td: task_working_dir = os.path.join(td, '__task_working_dir__') os.mkdir(task_working_dir) @@ -290,6 +294,7 @@ def _get_scores() -> dict[str, float]: def _get_clang_generator( clang_path: str, task_type: Type[MLGOTask], + explicit_temps_dir: Optional[str] = None, interactive_only: bool = False, ) -> Generator[Optional[Tuple[ClangProcess, InteractiveClang]], Optional[corpus.LoadedModuleSpec], None]: @@ -298,6 +303,8 @@ def _get_clang_generator( Args: clang_path: Path to the clang binary to use within InteractiveClang. task_type: Type of the MLGO task to use. + explicit_temps_dir: Put temporary files into given directory and keep them + past exit when compilining interactive_only: If set to true the returned tuple of generators is iclang, iclang instead of iclang, clang @@ -315,12 +322,20 @@ def _get_clang_generator( # https://github.com/google/yapf/issues/1092 module = yield with clang_session( - clang_path, module, task_type, interactive=True) as iclang: + clang_path, + module, + task_type, + explicit_temps_dir=explicit_temps_dir, + interactive=True) as iclang: if interactive_only: yield iclang, iclang else: with clang_session( - clang_path, module, task_type, interactive=False) as clang: + clang_path, + module, + task_type, + explicit_temps_dir=explicit_temps_dir, + interactive=False) as clang: yield iclang, clang @@ -340,10 +355,14 @@ def __init__( task_type: Type[MLGOTask], obs_spec, action_spec, + explicit_temps_dir: Optional[str] = None, interactive_only: bool = False, ): self._clang_generator = _get_clang_generator( - clang_path, task_type, interactive_only=interactive_only) + clang_path, + task_type, + explicit_temps_dir=explicit_temps_dir, + interactive_only=interactive_only) self._obs_spec = obs_spec self._action_spec = action_spec diff --git a/compiler_opt/rl/env_test.py b/compiler_opt/rl/env_test.py index 67a951df..823734b9 100644 --- a/compiler_opt/rl/env_test.py +++ b/compiler_opt/rl/env_test.py @@ -178,7 +178,8 @@ def test_interactive_clang_temp_dir(self, mock_popen): self.assertEqual(os.path.exists(working_dir), False) with tempfile.TemporaryDirectory() as td: - with flagsaver.flagsaver((env.compilation_runner._KEEP_TEMPS, td)): # pylint: disable=protected-access + with flagsaver.flagsaver( + (env.compilation_runner._EXPLICIT_TEMPS_DIR, td)): # pylint: disable=protected-access with env.clang_session( _CLANG_PATH, _MOCK_MODULE, MockTask, interactive=True) as clang_session: diff --git a/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py b/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py index de67993a..4be59399 100644 --- a/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py +++ b/compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py @@ -20,7 +20,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Generator, Union import json -# from absl import flags +from absl import flags from absl import logging import bisect import dataclasses @@ -46,6 +46,14 @@ from compiler_opt.distributed import buffered_scheduler from compiler_opt.distributed.local import local_worker_manager +_PERSISTENT_OBJECTS_PATH = flags.DEFINE_string( + 'persistent_objects_path', None, + ('If specified, the temp compiled binaries throughout' + 'the trajectory generation will be saved in persistent_objects_path' + 'for linking the final binary.')) + +FLAGS = flags.FLAGS + ProfilingDictValueType = Dict[str, Union[str, float, int]] @@ -318,6 +326,7 @@ def __init__( tensor_spec.BoundedTensorSpec, ]] = None, reward_key: str = '', + explicit_temps_dir: Optional[str] = None, **kwargs, ): self._loaded_module_spec = loaded_module_spec @@ -343,6 +352,7 @@ def __init__( task_type=mlgo_task_type, obs_spec=obs_spec, action_spec=action_spec, + explicit_temps_dir=explicit_temps_dir, interactive_only=True, ) if self._env.action_spec: @@ -603,8 +613,8 @@ def _process_obs(self, curr_obs, sequence_example): class ModuleWorkerResultProcessor: """Utility class to process ModuleExplorer results for ModuleWorker.""" - def __init__(self, base_path: Optional[str] = None): - self._base_path = base_path + def __init__(self, persistent_objects_path: Optional[str] = None): + self._persistent_objects_path = persistent_objects_path def _partition_for_loss(self, seq_example: tf.train.SequenceExample, partitions: List[float], label_name: str): @@ -654,12 +664,13 @@ def process_succeeded( logging.info('best policy idx: %s, best exploration idxs %s', best_policy_idx, best_exploration_idxs) - if self._base_path: + if self._persistent_objects_path: # as long as we have one process handles one module this can stay here temp_working_dir_idx = working_dir_list[best_policy_idx][1] temp_working_dir_list = working_dir_list[best_policy_idx][0] temp_working_dir = temp_working_dir_list[temp_working_dir_idx] - self._save_binary(self._base_path, spec_name, temp_working_dir) + self._save_binary(self._persistent_objects_path, spec_name, + temp_working_dir) self._partition_for_loss(seq_example, partitions, label_name) @@ -689,11 +700,12 @@ def _profiling_dict( } return per_module_dict - def _save_binary(self, base_path: str, save_path: str, binary_path: str): + def _save_binary(self, persistent_objects_path: str, save_path: str, + binary_path: str): path_head_tail = os.path.split(save_path) path_head = path_head_tail[0] path_tail = path_head_tail[1] - save_dir = os.path.join(base_path, path_head) + save_dir = os.path.join(persistent_objects_path, path_head) if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) shutil.copy( @@ -725,7 +737,8 @@ class ModuleWorker(worker.Worker): explore_on_features: dict of feature names and functions which specify when to explore on the respective feature obs_action_specs: optional observation spec annotating TimeStep - base_path: root path to save best compiled binaries for linking + persistent_objects_path: root path to save best compiled binaries + for linking partitions: a tuple of limits defining the buckets, see partition_for_loss env_args: additional arguments to pass to the ModuleExplorer, used in creating the environment. This has to include the reward_key @@ -748,7 +761,7 @@ def __init__( time_step.TimeStep, tensor_spec.BoundedTensorSpec, ]] = None, - base_path: Optional[str] = None, + persistent_objects_path: Optional[str] = None, partitions: List[float] = [ 0., ], @@ -775,8 +788,8 @@ def __init__( [tf.Tensor], bool]]] = explore_on_features self._obs_action_specs: Optional[Tuple[ time_step.TimeStep, tensor_spec.BoundedTensorSpec]] = obs_action_specs - self._mw_utility = ModuleWorkerResultProcessor(base_path) - self._base_path = base_path + self._mw_utility = ModuleWorkerResultProcessor(persistent_objects_path) + self._persistent_objects_path = persistent_objects_path self._partitions = partitions self._envargs = envargs @@ -858,7 +871,7 @@ def select_best_exploration( try: shutil.rmtree(temp_dir_head) except FileNotFoundError as e: - if not self._base_path: + if not self._persistent_objects_path: continue else: raise FileNotFoundError( @@ -918,6 +931,13 @@ def gen_trajectories( worker_manager_class: A pool of workers hosted on the local machines, each in its own process. """ + explicit_temps_dir = FLAGS.explicit_temps_dir + persistent_objects_path = _PERSISTENT_OBJECTS_PATH.value + if not explicit_temps_dir and persistent_objects_path: + logging.warning('Setting explicit_temps_dir to persistent_objects_path=%s', + persistent_objects_path) + explicit_temps_dir = os.path.join(persistent_objects_path, 'temp_dirs') + cps = corpus.Corpus(data_path=data_path, delete_flags=delete_flags) logging.info('Done loading module specs from corpus.') @@ -944,6 +964,8 @@ def gen_trajectories( mlgo_task_type=mlgo_task_type, callable_policies=callable_policies, explore_on_features=explore_on_features, + persistent_objects_path=persistent_objects_path, + explicit_temps_dir=explicit_temps_dir, gin_config_str=gin.config_str(), ) as lwm: diff --git a/compiler_opt/rl/inlining/gin_configs/imitation_learning.gin b/compiler_opt/rl/inlining/gin_configs/imitation_learning.gin index 9b69ffdd..8dfdda30 100644 --- a/compiler_opt/rl/inlining/gin_configs/imitation_learning.gin +++ b/compiler_opt/rl/inlining/gin_configs/imitation_learning.gin @@ -10,13 +10,11 @@ generate_bc_trajectories_lib.ModuleWorker.mlgo_task_type=@env.InliningForSizeTas generate_bc_trajectories_lib.ModuleWorker.policy_paths=[''] generate_bc_trajectories_lib.ModuleWorker.exploration_policy_paths=[] generate_bc_trajectories_lib.ModuleWorker.explore_on_features=None -generate_bc_trajectories_lib.ModuleWorker.base_path='' generate_bc_trajectories_lib.ModuleWorker.partitions=[ 285.0, 376.0, 452.0, 512.0, 571.0, 627.5, 720.0, 809.5, 1304.0, 1832.0, 2467.0, 3344.0, 4545.0, 6459.0, 9845.0, 17953.0, 29430.5, 85533.5, 124361.0] generate_bc_trajectories_lib.ModuleWorker.reward_key='default' -# generate_bc_trajectories_lib.ModuleWorker.gin_config_str=None generate_bc_trajectories_lib.gen_trajectories.data_path='' generate_bc_trajectories_lib.gen_trajectories.delete_flags=('-split-dwarf-file', '-split-dwarf-output')