Skip to content

Commit

Permalink
Add regalloc_trace_worker
Browse files Browse the repository at this point in the history
This patch adds the worker for the new trace-based regalloc training
mode.

Reviewers: mtrofin

Reviewed By: mtrofin

Pull Request: #415
  • Loading branch information
boomanaiden154 authored Jan 13, 2025
1 parent 28a0353 commit 3ddfad9
Show file tree
Hide file tree
Showing 3 changed files with 344 additions and 0 deletions.
14 changes: 14 additions & 0 deletions compiler_opt/es/regalloc_trace/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# coding=utf-8
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
180 changes: 180 additions & 0 deletions compiler_opt/es/regalloc_trace/regalloc_trace_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# coding=utf-8
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Worker for regalloc using trace-based cost modeling.
This worker is designed specifically for a trace based cost modelling
methodology. It compiles an entire corpus in parallel with a thread pool, and
then passes all those modules to basic_block_trace_model along with traces and
other relevant data to produce an overall cost for the model being evaluated.
"""

from typing import Optional, Collection
import os
import pathlib
import subprocess
import json
import concurrent.futures
import tempfile

import gin

from compiler_opt.rl import corpus
from compiler_opt.distributed import worker
from compiler_opt.rl import policy_saver


@gin.configurable
class RegallocTraceWorker(worker.Worker):
"""A worker that produces rewards for a given regalloc policy.
RegallocTraceWorker exposes a compile_corpus_and_evaluate function, which
compiles a set of modules in parallel locally, evaluates them with
basic_block_trace_model, and then returns the total cost of the evaluated
segments.
"""

def __init__(self, clang_path: str, basic_block_trace_model_path: str,
thread_count: int, corpus_path: str):
"""Initializes the RegallocTraceWorker class.
Args:
clang_path: The path to the clang binary to use for compiling the corpus.
basic_block_trace_model_path: The path to the basic_block_trace_model
binary to use for trace-based modelling. basic_block_trace_model takes
in a set of modules, a trace, and auxiliary information for
interpreting the trace, simulates the trace against the code in the
passed-in modules, returning estimated cycle counts.
thread_count: The number of threads to use for concurrent compilation
and modelling.
corpus_path: The path to the corpus that modules will be compiled from.
"""
self._clang_path = clang_path
self._basic_block_trace_model_path = basic_block_trace_model_path
self._thread_count = thread_count
self._corpus_path = corpus_path

def _compile_module(self, module_to_compile: corpus.ModuleSpec,
output_directory: str, tflite_policy_path: Optional[str]):
command_vector = [self._clang_path]
context = corpus.Corpus.ReplaceContext(
os.path.join(self._corpus_path, module_to_compile.name) + ".bc",
# We add the additional ThinLTO index unconditionallyas if we are not
# using ThinLTO, we will just never end up replacing anything.
os.path.join(self._corpus_path, module_to_compile.name) + ".thinlto.bc")
command_vector.extend([
option.format(context=context)
for option in module_to_compile.command_line
])

if tflite_policy_path is not None:
command_vector.extend([
"-mllvm", "-regalloc-enable-advisor=development", "-mllvm",
f"-regalloc-model={tflite_policy_path}"
])
else:
# Force the default advisor if we aren't explicitly using a new policy
# to prevent enabling the release advisor if it was specified in the
# corpus.
command_vector.extend(["-mllvm", "-regalloc-enable-advisor=default"])

module_output_path = os.path.join(output_directory,
module_to_compile.name + ".bc.o")
pathlib.Path(os.path.dirname(module_output_path)).mkdir(
parents=True, exist_ok=True)
command_vector.extend(["-o", module_output_path])

subprocess.run(
command_vector,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

def _build_corpus(self, modules: Collection[corpus.ModuleSpec],
output_directory: str,
tflite_policy: Optional[policy_saver.Policy]):
with tempfile.TemporaryDirectory() as tflite_policy_dir:
if tflite_policy:
tflite_policy.to_filesystem(tflite_policy_dir)
else:
tflite_policy_dir = None

compile_futures = []
with concurrent.futures.ThreadPoolExecutor(
max_workers=self._thread_count) as thread_pool:
for module in modules:
compile_futures.append(
thread_pool.submit(self._compile_module, module, output_directory,
tflite_policy_dir))

for future in compile_futures:
if future.exception() is not None:
raise future.exception()

# Write out a corpus description. basic_block_trace_model uses a corpus
# description JSON to know which object files to load, so we need to emit
# one before performing evaluation.
corpus_description_path = os.path.join(output_directory,
"corpus_description.json")
corpus_description = {
"modules": [module_spec.name for module_spec in modules]
}

with open(
corpus_description_path, "w",
encoding="utf-8") as corpus_description_file:
json.dump(corpus_description, corpus_description_file)

def _evaluate_corpus(self, module_directory: str, function_index_path: str,
bb_trace_path: str):
corpus_description_path = os.path.join(module_directory,
"corpus_description.json")

command_vector = [
self._basic_block_trace_model_path,
f"--corpus_path={corpus_description_path}",
f"--function_index_path={function_index_path}",
f"--thread_count={self._thread_count}",
f"--bb_trace_path={bb_trace_path}", "--model_type=mca"
]

output = subprocess.run(
command_vector,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True)

segment_costs = []
for line in output.stdout.decode("utf-8").split("\n"):
try:
value = float(line)
segment_costs.append(value)
except ValueError:
continue

if len(segment_costs) < 1:
raise ValueError("Did not find any valid segment costs.")

return segment_costs

def compile_corpus_and_evaluate(
self, modules: Collection[corpus.ModuleSpec], function_index_path: str,
bb_trace_path: str,
tflite_policy: Optional[policy_saver.Policy]) -> float:
with tempfile.TemporaryDirectory() as compilation_dir:
self._build_corpus(modules, compilation_dir, tflite_policy)

segment_costs = self._evaluate_corpus(compilation_dir,
function_index_path, bb_trace_path)
return sum(segment_costs)
150 changes: 150 additions & 0 deletions compiler_opt/es/regalloc_trace/regalloc_trace_worker_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# coding=utf-8
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test for RegallocTraceWorker."""

from typing import List
import os
import json
import stat
import textwrap

from absl.testing import absltest

from compiler_opt.es.regalloc_trace import regalloc_trace_worker
from compiler_opt.rl import corpus
from compiler_opt.testing import model_test_utils
from compiler_opt.rl import policy_saver


def _setup_corpus(corpus_dir: str) -> List[corpus.ModuleSpec]:
modules = [
corpus.ModuleSpec("module_a", 1, ("-fmodule-a",), True),
corpus.ModuleSpec("module_b", 1, ("-fmodule-b",), True)
]

corpus_description = {
"has_thinlto": True,
"modules": [os.path.join(corpus_dir, module.name) for module in modules]
}

with open(
os.path.join(corpus_dir, "corpus_description.json"),
"w",
encoding="utf-8") as corpus_description_handle:
json.dump(corpus_description, corpus_description_handle)

return modules


def _create_test_binary(binary_path: str, output_path: str):
test_binary = textwrap.dedent(f"""\
#!/bin/bash
echo "$@" >> {output_path}
echo 1
echo 1
""")

with open(binary_path, "w", encoding="utf-8") as binary_handle:
binary_handle.write(test_binary)
binary_stat = os.stat(binary_path)
os.chmod(binary_path, binary_stat.st_mode | stat.S_IEXEC)


class RegallocTraceWorkerTest(absltest.TestCase):

def test_build_corpus_and_evaluate(self):
corpus_dir = self.create_tempdir("corpus")
corpus_modules = _setup_corpus(corpus_dir)
fake_clang_binary = self.create_tempfile("fake_clang")
fake_clang_invocations = self.create_tempfile("fake_clang_invocations")
_create_test_binary(fake_clang_binary.full_path,
fake_clang_invocations.full_path)
fake_bb_trace_model_binary = self.create_tempfile(
"fake_basic_block_trace_model")
fake_bb_trace_model_invocations = self.create_tempfile(
"fake_basic_block_trace_model_invocations")
_create_test_binary(fake_bb_trace_model_binary.full_path,
fake_bb_trace_model_invocations.full_path)

worker = regalloc_trace_worker.RegallocTraceWorker(
fake_clang_binary.full_path, fake_bb_trace_model_binary.full_path, 1,
corpus_dir.full_path)
total_cost = worker.compile_corpus_and_evaluate(corpus_modules,
"function_index_path.pb",
"bb_trace_path.pb", None)
self.assertEqual(total_cost, 2)

# Check that we are compiling the modules with the appropriate flags and
# the default regalloc advisor given we did not pass in any TFLite policy.
clang_command_lines = fake_clang_invocations.read_text().split("\n")
clang_command_lines.remove("")
self.assertLen(clang_command_lines, 2)
self.assertTrue("-fmodule-a" in clang_command_lines[0])
self.assertTrue(
"-regalloc-enable-advisor=default" in clang_command_lines[0])
self.assertTrue("-fmodule-b" in clang_command_lines[1])
self.assertTrue(
"-regalloc-enable-advisor=default" in clang_command_lines[1])

# Check that we pass the expected flags to basic_block_trace_model.
bb_trace_model_command_line = fake_bb_trace_model_invocations.read_text(
).split("\n")[0].split()
self.assertLen(bb_trace_model_command_line, 5)
self.assertTrue("--corpus_path" in bb_trace_model_command_line[0])
self.assertEqual("--function_index_path=function_index_path.pb",
bb_trace_model_command_line[1])
self.assertEqual("--thread_count=1", bb_trace_model_command_line[2])
self.assertEqual("--bb_trace_path=bb_trace_path.pb",
bb_trace_model_command_line[3])
self.assertEqual("--model_type=mca", bb_trace_model_command_line[4])

def test_compile_corpus_and_evaluate_with_tflite(self):
corpus_dir = self.create_tempdir("corpus")
corpus_modules = _setup_corpus(corpus_dir)
fake_clang_binary = self.create_tempfile("fake_clang")
fake_clang_invocations = self.create_tempfile("fake_clang_invocations")
_create_test_binary(fake_clang_binary.full_path,
fake_clang_invocations.full_path)
fake_bb_trace_model_binary = self.create_tempfile(
"fake_basic_block_trace_model")
fake_bb_trace_model_invocations = self.create_tempfile(
"fake_basic_block_trace_model_invocations")
_create_test_binary(fake_bb_trace_model_binary.full_path,
fake_bb_trace_model_invocations.full_path)

saved_model_dir = self.create_tempdir("saved_model")
tflite_dir = self.create_tempdir("converted_model")
model_test_utils.gen_test_model(saved_model_dir.full_path)
policy_saver.convert_mlgo_model(saved_model_dir.full_path,
tflite_dir.full_path)
serialized_policy = policy_saver.Policy.from_filesystem(
tflite_dir.full_path)

worker = regalloc_trace_worker.RegallocTraceWorker(
fake_clang_binary.full_path, fake_bb_trace_model_binary.full_path, 1,
corpus_dir.full_path)
worker.compile_corpus_and_evaluate(corpus_modules, "function_index_path.pb",
"bb_trace_path.pb", serialized_policy)

# Assert that we pass the TFLite model to the clang invocations.
clang_command_lines = fake_clang_invocations.read_text().split("\n")
clang_command_lines.remove("")
self.assertLen(clang_command_lines, 2)
self.assertTrue(
"-regalloc-enable-advisor=development" in clang_command_lines[0])
self.assertTrue("-regalloc-model=" in clang_command_lines[0])
self.assertTrue(
"-regalloc-enable-advisor=development" in clang_command_lines[1])
self.assertTrue("-regalloc-model=" in clang_command_lines[1])

0 comments on commit 3ddfad9

Please sign in to comment.