Skip to content

Commit d2a3687

Browse files
committed
enable usage of uhh campaigns
1 parent 64c45b9 commit d2a3687

File tree

3 files changed

+81
-2
lines changed

3 files changed

+81
-2
lines changed

hbw/config/datasets.py

+44-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from scinum import Number
1414

1515
import cmsdb.processes as cmsdb_procs
16-
from columnflow.util import DotDict
16+
from columnflow.util import DotDict, dev_sandbox
1717
from columnflow.tasks.external import GetDatasetLFNs
1818
from columnflow.config_util import get_root_processes_from_campaign
1919

@@ -397,6 +397,7 @@ def configure_hbw_datasets(
397397
limit_dataset_files: int | None = None,
398398
add_dataset_extensions: bool = False,
399399
):
400+
enable_uhh_campaign_usage(config)
400401
for dataset in config.datasets:
401402
if add_dataset_extensions:
402403
add_dataset_extension_to_nominal(dataset)
@@ -541,3 +542,45 @@ def get_dataset_lfns_2017(
541542
lfn_base.child(basename, type="f").path
542543
for basename in lfn_base.listdir(pattern="*.root")
543544
]
545+
546+
547+
def enable_uhh_campaign_usage(cfg: od.Config) -> None:
548+
# custom lfn retrieval method in case the underlying campaign is custom uhh
549+
def get_dataset_lfns_uhh(
550+
dataset_inst: od.Dataset,
551+
shift_inst: od.Shift,
552+
dataset_key: str,
553+
) -> list[str]:
554+
if "uhh" not in dataset_inst.x("campaign", ""):
555+
# for non-uhh datasets, use default GetDatasetLFNs method
556+
return GetDatasetLFNs.get_dataset_lfns_dasgoclient(
557+
GetDatasetLFNs, dataset_inst=dataset_inst, shift_inst=shift_inst, dataset_key=dataset_key,
558+
)
559+
cpn_name = dataset_inst.x.campaign
560+
# destructure dataset_key into parts and create the lfn base directory
561+
dataset_id, full_campaign, tier = dataset_key.split("/")[1:]
562+
main_campaign, sub_campaign = full_campaign.split("-", 1)
563+
lfn_base = law.wlcg.WLCGDirectoryTarget(
564+
f"/store/{dataset_inst.data_source}/{main_campaign}/{dataset_id}/{tier}/{sub_campaign}/0",
565+
# fs=f"wlcg_fs_{cfg.campaign.x.custom['name']}",
566+
fs=f"wlcg_fs_{cpn_name}",
567+
)
568+
569+
# loop though files and interpret paths as lfns
570+
return [
571+
lfn_base.child(basename, type="f").path
572+
for basename in lfn_base.listdir(pattern="*.root")
573+
]
574+
575+
if any("uhh" in cpn_name for cpn_name in cfg.campaign.x("campaigns", [])):
576+
# define the lfn retrieval function
577+
cfg.x.get_dataset_lfns = get_dataset_lfns_uhh
578+
579+
# define a custom sandbox
580+
cfg.x.get_dataset_lfns_sandbox = dev_sandbox("bash::$CF_BASE/sandboxes/cf.sh")
581+
582+
# define custom remote fs's to look at
583+
cfg.x.get_dataset_lfns_remote_fs = lambda dataset_inst: [] if "uhh" not in dataset_inst.x("campaign", "") else [
584+
f"local_fs_{dataset_inst.x.campaign}",
585+
f"wlcg_fs_{dataset_inst.x.campaign}",
586+
]

hbw/tasks/campaigns.py

+2
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ def get_custom_campaign(self):
137137
dataset_inst.x.campaign = campaign_inst.name
138138
hbw_campaign_inst.add_dataset(dataset_inst)
139139

140+
hbw_campaign_inst.x.campaigns = list(self.campaigns)
141+
140142
return hbw_campaign_inst
141143

142144
from hbw.util import timeit_multiple

law.cfg

+35-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ check_overlapping_inputs: None
7373
[outputs]
7474

7575
# list of all used file systems
76-
wlcg_file_systems: wlcg_fs, wlcg_fs_desy, wlcg_fs_cernbox, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirector
76+
wlcg_file_systems: wlcg_fs, wlcg_fs_desy, wlcg_fs_cernbox, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirector, wlcg_fs_run3_2022_preEE_nano_uhh_v12, wlcg_fs_run3_2022_postEE_nano_uhh_v12
7777

7878
# list of file systems used by columnflow.tasks.external.GetDatasetLFNs.iter_nano_files to
7979
# look for the correct fs per nano input file (in that order)
@@ -259,6 +259,40 @@ gsiftp_base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2/stor
259259
base: &::gsiftp_base
260260

261261

262+
[wlcg_fs_run3_2022_preEE_nano_uhh_v12]
263+
264+
webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
265+
gsiftp_base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
266+
xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
267+
base: &::xrootd_base
268+
use_cache: $CF_WLCG_USE_CACHE
269+
cache_root: $CF_WLCG_CACHE_ROOT
270+
cache_cleanup: $CF_WLCG_CACHE_CLEANUP
271+
cache_max_size: 15GB
272+
cache_global_lock: True
273+
274+
[local_fs_run3_2022_preEE_nano_uhh_v12]
275+
276+
base: file:///pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3
277+
278+
279+
[wlcg_fs_run3_2022_postEE_nano_uhh_v12]
280+
281+
webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
282+
gsiftp_base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
283+
xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
284+
base: &::xrootd_base
285+
use_cache: $CF_WLCG_USE_CACHE
286+
cache_root: $CF_WLCG_CACHE_ROOT
287+
cache_cleanup: $CF_WLCG_CACHE_CLEANUP
288+
cache_max_size: 15GB
289+
cache_global_lock: True
290+
291+
[local_fs_run3_2022_postEE_nano_uhh_v12]
292+
293+
base: file:///pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v12/prod1
294+
295+
262296

263297
[luigi_resources]
264298

0 commit comments

Comments
 (0)