Skip to content

Commit

Permalink
[BC] keep_temps update (google#411)
Browse files Browse the repository at this point in the history
* Rename ```base_path``` to ```persistent_objects_path``` and ```keep_temps``` to ```explicit_temps_dir```.
Makes sure that if ```persistent_objects_path``` is set then ```explicit_temps_dir``` is also set
or neither of them is set. Further, passes ```explicit_temps_dir``` to
```compilation_runner.get_workdir_context``` and ensures that the flag
and argument are not set at the same time.

* Addressing @mtrofin and @boomanaiden154 comments.

* Set explicit_temps_dir to persistent_objects_path+'/temp_dirs' whenever
explicit_temps_dir is not set and persistent_objects_path is set.
  • Loading branch information
tvmarino authored Dec 23, 2024
1 parent 7e94d9e commit f179ce1
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 26 deletions.
25 changes: 18 additions & 7 deletions compiler_opt/rl/compilation_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
'Max duration (in seconds) after which we cancel any compilation job.')
_QUIET = flags.DEFINE_bool(
'quiet', True, 'Whether or not to compile quietly (hiding info logging)')
_KEEP_TEMPS = flags.DEFINE_string(
'keep_temps', None,
_EXPLICIT_TEMPS_DIR = flags.DEFINE_string(
'explicit_temps_dir', None,
'Put temporary files into given directory and keep them past exit.')


Expand Down Expand Up @@ -80,14 +80,25 @@ def __exit__(self, exc, value, tb):
pass


def get_workdir_context():
def get_workdir_context(explicit_temps_dir: Optional[str] = None):
"""Return a context which manages how the temperory directories are handled.
When the flag keep_temps is specified temporary directories are stored in
keep_temps.
When the flag explicit_temps_dir is specified temporary directories are
stored in explicit_temps_dir.
Args:
explicit_temps_dir: Put temporary files into given directory and keep them
past exit when compilining
"""
if _KEEP_TEMPS.value is not None:
tempdir_context = NonTemporaryDirectory(dir=_KEEP_TEMPS.value)
if explicit_temps_dir and _EXPLICIT_TEMPS_DIR.value:
raise ValueError('Only one of flag'
'explicit_temps_dir={_EXPLICIT_TEMPS_DIR.value}'
'and arg explicit_temps_dir={explicit_temps_dir}'
'should be specified.')
if _EXPLICIT_TEMPS_DIR.value is not None:
tempdir_context = NonTemporaryDirectory(dir=_EXPLICIT_TEMPS_DIR.value)
elif explicit_temps_dir:
tempdir_context = NonTemporaryDirectory(dir=explicit_temps_dir)
else:
tempdir_context = tempfile.TemporaryDirectory() # pylint: disable=consider-using-with
return tempdir_context
Expand Down
27 changes: 23 additions & 4 deletions compiler_opt/rl/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def clang_session(
module: corpus.LoadedModuleSpec,
task_type: Type[MLGOTask],
*,
explicit_temps_dir: Optional[str] = None,
interactive: bool,
):
"""Context manager for clang session.
Expand All @@ -236,12 +237,15 @@ def clang_session(
clang_path: The clang binary to use for the InteractiveClang session.
module: The module to compile with clang.
task_type: Type of the MLGOTask to use.
explicit_temps_dir: Put temporary files into given directory and keep them
past exit when compilining
interactive: Whether to use an interactive or default clang instance
Yields:
Either the constructed InteractiveClang or DefaultClang object.
"""
tempdir_context = compilation_runner.get_workdir_context()
tempdir_context = compilation_runner.get_workdir_context(
explicit_temps_dir=explicit_temps_dir)
with tempdir_context as td:
task_working_dir = os.path.join(td, '__task_working_dir__')
os.mkdir(task_working_dir)
Expand Down Expand Up @@ -290,6 +294,7 @@ def _get_scores() -> dict[str, float]:
def _get_clang_generator(
clang_path: str,
task_type: Type[MLGOTask],
explicit_temps_dir: Optional[str] = None,
interactive_only: bool = False,
) -> Generator[Optional[Tuple[ClangProcess, InteractiveClang]],
Optional[corpus.LoadedModuleSpec], None]:
Expand All @@ -298,6 +303,8 @@ def _get_clang_generator(
Args:
clang_path: Path to the clang binary to use within InteractiveClang.
task_type: Type of the MLGO task to use.
explicit_temps_dir: Put temporary files into given directory and keep them
past exit when compilining
interactive_only: If set to true the returned tuple of generators is
iclang, iclang instead of iclang, clang
Expand All @@ -315,12 +322,20 @@ def _get_clang_generator(
# https://github.com/google/yapf/issues/1092
module = yield
with clang_session(
clang_path, module, task_type, interactive=True) as iclang:
clang_path,
module,
task_type,
explicit_temps_dir=explicit_temps_dir,
interactive=True) as iclang:
if interactive_only:
yield iclang, iclang
else:
with clang_session(
clang_path, module, task_type, interactive=False) as clang:
clang_path,
module,
task_type,
explicit_temps_dir=explicit_temps_dir,
interactive=False) as clang:
yield iclang, clang


Expand All @@ -340,10 +355,14 @@ def __init__(
task_type: Type[MLGOTask],
obs_spec,
action_spec,
explicit_temps_dir: Optional[str] = None,
interactive_only: bool = False,
):
self._clang_generator = _get_clang_generator(
clang_path, task_type, interactive_only=interactive_only)
clang_path,
task_type,
explicit_temps_dir=explicit_temps_dir,
interactive_only=interactive_only)
self._obs_spec = obs_spec
self._action_spec = action_spec

Expand Down
3 changes: 2 additions & 1 deletion compiler_opt/rl/env_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ def test_interactive_clang_temp_dir(self, mock_popen):
self.assertEqual(os.path.exists(working_dir), False)

with tempfile.TemporaryDirectory() as td:
with flagsaver.flagsaver((env.compilation_runner._KEEP_TEMPS, td)): # pylint: disable=protected-access
with flagsaver.flagsaver(
(env.compilation_runner._EXPLICIT_TEMPS_DIR, td)): # pylint: disable=protected-access
with env.clang_session(
_CLANG_PATH, _MOCK_MODULE, MockTask,
interactive=True) as clang_session:
Expand Down
46 changes: 34 additions & 12 deletions compiler_opt/rl/imitation_learning/generate_bc_trajectories_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Generator, Union
import json

# from absl import flags
from absl import flags
from absl import logging
import bisect
import dataclasses
Expand All @@ -46,6 +46,14 @@
from compiler_opt.distributed import buffered_scheduler
from compiler_opt.distributed.local import local_worker_manager

_PERSISTENT_OBJECTS_PATH = flags.DEFINE_string(
'persistent_objects_path', None,
('If specified, the temp compiled binaries throughout'
'the trajectory generation will be saved in persistent_objects_path'
'for linking the final binary.'))

FLAGS = flags.FLAGS

ProfilingDictValueType = Dict[str, Union[str, float, int]]


Expand Down Expand Up @@ -318,6 +326,7 @@ def __init__(
tensor_spec.BoundedTensorSpec,
]] = None,
reward_key: str = '',
explicit_temps_dir: Optional[str] = None,
**kwargs,
):
self._loaded_module_spec = loaded_module_spec
Expand All @@ -343,6 +352,7 @@ def __init__(
task_type=mlgo_task_type,
obs_spec=obs_spec,
action_spec=action_spec,
explicit_temps_dir=explicit_temps_dir,
interactive_only=True,
)
if self._env.action_spec:
Expand Down Expand Up @@ -603,8 +613,8 @@ def _process_obs(self, curr_obs, sequence_example):
class ModuleWorkerResultProcessor:
"""Utility class to process ModuleExplorer results for ModuleWorker."""

def __init__(self, base_path: Optional[str] = None):
self._base_path = base_path
def __init__(self, persistent_objects_path: Optional[str] = None):
self._persistent_objects_path = persistent_objects_path

def _partition_for_loss(self, seq_example: tf.train.SequenceExample,
partitions: List[float], label_name: str):
Expand Down Expand Up @@ -654,12 +664,13 @@ def process_succeeded(
logging.info('best policy idx: %s, best exploration idxs %s',
best_policy_idx, best_exploration_idxs)

if self._base_path:
if self._persistent_objects_path:
# as long as we have one process handles one module this can stay here
temp_working_dir_idx = working_dir_list[best_policy_idx][1]
temp_working_dir_list = working_dir_list[best_policy_idx][0]
temp_working_dir = temp_working_dir_list[temp_working_dir_idx]
self._save_binary(self._base_path, spec_name, temp_working_dir)
self._save_binary(self._persistent_objects_path, spec_name,
temp_working_dir)

self._partition_for_loss(seq_example, partitions, label_name)

Expand Down Expand Up @@ -689,11 +700,12 @@ def _profiling_dict(
}
return per_module_dict

def _save_binary(self, base_path: str, save_path: str, binary_path: str):
def _save_binary(self, persistent_objects_path: str, save_path: str,
binary_path: str):
path_head_tail = os.path.split(save_path)
path_head = path_head_tail[0]
path_tail = path_head_tail[1]
save_dir = os.path.join(base_path, path_head)
save_dir = os.path.join(persistent_objects_path, path_head)
if not os.path.exists(save_dir):
os.makedirs(save_dir, exist_ok=True)
shutil.copy(
Expand Down Expand Up @@ -725,7 +737,8 @@ class ModuleWorker(worker.Worker):
explore_on_features: dict of feature names and functions which specify
when to explore on the respective feature
obs_action_specs: optional observation spec annotating TimeStep
base_path: root path to save best compiled binaries for linking
persistent_objects_path: root path to save best compiled binaries
for linking
partitions: a tuple of limits defining the buckets, see partition_for_loss
env_args: additional arguments to pass to the ModuleExplorer, used in
creating the environment. This has to include the reward_key
Expand All @@ -748,7 +761,7 @@ def __init__(
time_step.TimeStep,
tensor_spec.BoundedTensorSpec,
]] = None,
base_path: Optional[str] = None,
persistent_objects_path: Optional[str] = None,
partitions: List[float] = [
0.,
],
Expand All @@ -775,8 +788,8 @@ def __init__(
[tf.Tensor], bool]]] = explore_on_features
self._obs_action_specs: Optional[Tuple[
time_step.TimeStep, tensor_spec.BoundedTensorSpec]] = obs_action_specs
self._mw_utility = ModuleWorkerResultProcessor(base_path)
self._base_path = base_path
self._mw_utility = ModuleWorkerResultProcessor(persistent_objects_path)
self._persistent_objects_path = persistent_objects_path
self._partitions = partitions
self._envargs = envargs

Expand Down Expand Up @@ -858,7 +871,7 @@ def select_best_exploration(
try:
shutil.rmtree(temp_dir_head)
except FileNotFoundError as e:
if not self._base_path:
if not self._persistent_objects_path:
continue
else:
raise FileNotFoundError(
Expand Down Expand Up @@ -918,6 +931,13 @@ def gen_trajectories(
worker_manager_class: A pool of workers hosted on the local machines, each
in its own process.
"""
explicit_temps_dir = FLAGS.explicit_temps_dir
persistent_objects_path = _PERSISTENT_OBJECTS_PATH.value
if not explicit_temps_dir and persistent_objects_path:
logging.warning('Setting explicit_temps_dir to persistent_objects_path=%s',
persistent_objects_path)
explicit_temps_dir = os.path.join(persistent_objects_path, 'temp_dirs')

cps = corpus.Corpus(data_path=data_path, delete_flags=delete_flags)
logging.info('Done loading module specs from corpus.')

Expand All @@ -944,6 +964,8 @@ def gen_trajectories(
mlgo_task_type=mlgo_task_type,
callable_policies=callable_policies,
explore_on_features=explore_on_features,
persistent_objects_path=persistent_objects_path,
explicit_temps_dir=explicit_temps_dir,
gin_config_str=gin.config_str(),
) as lwm:

Expand Down
2 changes: 0 additions & 2 deletions compiler_opt/rl/inlining/gin_configs/imitation_learning.gin
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ generate_bc_trajectories_lib.ModuleWorker.mlgo_task_type=@env.InliningForSizeTas
generate_bc_trajectories_lib.ModuleWorker.policy_paths=['']
generate_bc_trajectories_lib.ModuleWorker.exploration_policy_paths=[]
generate_bc_trajectories_lib.ModuleWorker.explore_on_features=None
generate_bc_trajectories_lib.ModuleWorker.base_path=''
generate_bc_trajectories_lib.ModuleWorker.partitions=[
285.0, 376.0, 452.0, 512.0, 571.0, 627.5, 720.0, 809.5, 1304.0, 1832.0,
2467.0, 3344.0, 4545.0, 6459.0, 9845.0, 17953.0, 29430.5, 85533.5,
124361.0]
generate_bc_trajectories_lib.ModuleWorker.reward_key='default'
# generate_bc_trajectories_lib.ModuleWorker.gin_config_str=None

generate_bc_trajectories_lib.gen_trajectories.data_path=''
generate_bc_trajectories_lib.gen_trajectories.delete_flags=('-split-dwarf-file', '-split-dwarf-output')
Expand Down

0 comments on commit f179ce1

Please sign in to comment.