|
13 | 13 | from scinum import Number
|
14 | 14 |
|
15 | 15 | import cmsdb.processes as cmsdb_procs
|
16 |
| -from columnflow.util import DotDict |
| 16 | +from columnflow.util import DotDict, dev_sandbox |
17 | 17 | from columnflow.tasks.external import GetDatasetLFNs
|
18 | 18 | from columnflow.config_util import get_root_processes_from_campaign
|
19 | 19 |
|
@@ -397,6 +397,7 @@ def configure_hbw_datasets(
|
397 | 397 | limit_dataset_files: int | None = None,
|
398 | 398 | add_dataset_extensions: bool = False,
|
399 | 399 | ):
|
| 400 | + enable_uhh_campaign_usage(config) |
400 | 401 | for dataset in config.datasets:
|
401 | 402 | if add_dataset_extensions:
|
402 | 403 | add_dataset_extension_to_nominal(dataset)
|
@@ -541,3 +542,45 @@ def get_dataset_lfns_2017(
|
541 | 542 | lfn_base.child(basename, type="f").path
|
542 | 543 | for basename in lfn_base.listdir(pattern="*.root")
|
543 | 544 | ]
|
| 545 | + |
| 546 | + |
| 547 | +def enable_uhh_campaign_usage(cfg: od.Config) -> None: |
| 548 | + # custom lfn retrieval method in case the underlying campaign is custom uhh |
| 549 | + def get_dataset_lfns_uhh( |
| 550 | + dataset_inst: od.Dataset, |
| 551 | + shift_inst: od.Shift, |
| 552 | + dataset_key: str, |
| 553 | + ) -> list[str]: |
| 554 | + if "uhh" not in dataset_inst.x("campaign", ""): |
| 555 | + # for non-uhh datasets, use default GetDatasetLFNs method |
| 556 | + return GetDatasetLFNs.get_dataset_lfns_dasgoclient( |
| 557 | + GetDatasetLFNs, dataset_inst=dataset_inst, shift_inst=shift_inst, dataset_key=dataset_key, |
| 558 | + ) |
| 559 | + cpn_name = dataset_inst.x.campaign |
| 560 | + # destructure dataset_key into parts and create the lfn base directory |
| 561 | + dataset_id, full_campaign, tier = dataset_key.split("/")[1:] |
| 562 | + main_campaign, sub_campaign = full_campaign.split("-", 1) |
| 563 | + lfn_base = law.wlcg.WLCGDirectoryTarget( |
| 564 | + f"/store/{dataset_inst.data_source}/{main_campaign}/{dataset_id}/{tier}/{sub_campaign}/0", |
| 565 | + # fs=f"wlcg_fs_{cfg.campaign.x.custom['name']}", |
| 566 | + fs=f"wlcg_fs_{cpn_name}", |
| 567 | + ) |
| 568 | + |
| 569 | + # loop though files and interpret paths as lfns |
| 570 | + return [ |
| 571 | + lfn_base.child(basename, type="f").path |
| 572 | + for basename in lfn_base.listdir(pattern="*.root") |
| 573 | + ] |
| 574 | + |
| 575 | + if any("uhh" in cpn_name for cpn_name in cfg.campaign.x("campaigns", [])): |
| 576 | + # define the lfn retrieval function |
| 577 | + cfg.x.get_dataset_lfns = get_dataset_lfns_uhh |
| 578 | + |
| 579 | + # define a custom sandbox |
| 580 | + cfg.x.get_dataset_lfns_sandbox = dev_sandbox("bash::$CF_BASE/sandboxes/cf.sh") |
| 581 | + |
| 582 | + # define custom remote fs's to look at |
| 583 | + cfg.x.get_dataset_lfns_remote_fs = lambda dataset_inst: [] if "uhh" not in dataset_inst.x("campaign", "") else [ |
| 584 | + f"local_fs_{dataset_inst.x.campaign}", |
| 585 | + f"wlcg_fs_{dataset_inst.x.campaign}", |
| 586 | + ] |
0 commit comments