diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py index e8b9bc40..d5d15a74 100644 --- a/utils/run_benchmarks.py +++ b/utils/run_benchmarks.py @@ -160,6 +160,7 @@ def run_benchmark( ) -> tuple[str, float, str, float, int, int]: xl_folder = path.join(benchmarks_folder, "xlsx", benchmark["input_folder"]) dd_folder = path.join(benchmarks_folder, "dd", benchmark["dd_folder"]) + include_files = benchmark.get("dd_files", []) csv_folder = path.join(benchmarks_folder, "csv", benchmark["name"]) out_folder = path.join(benchmarks_folder, out_folder, benchmark["name"]) @@ -174,7 +175,9 @@ def run_benchmark( "xl2times/dd_to_csv.py", dd_folder, csv_folder, - ], + "--include_files", + ] + + include_files, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, @@ -191,7 +194,7 @@ def run_benchmark( else: # If debug option is set, run as a function call to allow stepping with a debugger. try: - main([dd_folder, csv_folder]) + main([dd_folder, csv_folder, "--include_files"] + include_files) except Exception: logger.exception(f"dd_to_csv failed on {benchmark['name']}") shutil.rmtree(csv_folder, ignore_errors=True) diff --git a/xl2times/dd_to_csv.py b/xl2times/dd_to_csv.py index 46f652b6..a517b212 100644 --- a/xl2times/dd_to_csv.py +++ b/xl2times/dd_to_csv.py @@ -173,9 +173,8 @@ def generate_headers_by_attr() -> dict[str, list[str]]: def convert_dd_to_tabular( - basedir: str, output_dir: str, headers_by_attr: dict[str, list[str]] + dd_files: list[Path], output_dir: str, headers_by_attr: dict[str, list[str]] ) -> None: - dd_files = [p for p in Path(basedir).rglob("*.dd")] all_sets = defaultdict(list) all_parameters = defaultdict(list) @@ -226,8 +225,18 @@ def main(arg_list: None | list[str] = None): args_parser.add_argument( "output_dir", type=str, help="Output directory to save the .csv files in." ) + args_parser.add_argument( + "--include_files", + type=str, + nargs="*", + help="List of .dd file stems to process.", + ) args = args_parser.parse_args(arg_list) - convert_dd_to_tabular(args.input_dir, args.output_dir, generate_headers_by_attr()) + dd_files = [p for p in Path(args.input_dir).rglob("*.dd")] + if args.include_files: + valid_stems = {stem.lower() for stem in args.include_files} + dd_files = [p for p in dd_files if p.stem.lower() in valid_stems] + convert_dd_to_tabular(dd_files, args.output_dir, generate_headers_by_attr()) if __name__ == "__main__":