Skip to content

Commit 0821387

Browse files
authored
Merge pull request #72 from uhh-cms/run3_config
Run3 updates
2 parents fbe366b + 0412083 commit 0821387

15 files changed

+272
-90
lines changed

hbw/calibration/default.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ def base_init(self: Calibrator) -> None:
5151
if self.bjet_regression:
5252
self.calibrators.append(bjet_regression)
5353

54-
if self.dataset_inst.is_mc:
55-
# TODO: we might need to modify jer when using bjet calibration
54+
# run JER only on MC
55+
# and not for 2022 (TODO: update as soon as JER is done for Summer22)
56+
if self.dataset_inst.is_mc and not self.config_inst.campaign.x.year == 2022:
5657
self.calibrators.append(jer)
5758

5859
self.uses |= set(self.calibrators)

hbw/config/config_run2.py

+45-44
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
from __future__ import annotations
88

99
import os
10-
import re
1110

1211
import yaml
1312
from scinum import Number
1413
import law
1514
import order as od
1615

1716
from columnflow.util import DotDict
17+
from columnflow.config_util import add_shift_aliases
1818
from hbw.config.styling import stylize_processes
1919
from hbw.config.categories import add_categories_selection
2020
from hbw.config.variables import add_variables
@@ -139,14 +139,14 @@ def add_config(
139139
jerc_postfix = ""
140140
if year == 2016 and campaign.x.vfp == "post":
141141
jerc_postfix = "APV"
142-
elif year == 2022:
143-
jerc_postfix = f"{campaign.x.EE}EE"
142+
elif year == 2022 and campaign.x.EE == "post":
143+
jerc_postfix = "EE"
144144

145145
if cfg.x.run == 2:
146146
jerc_campaign = f"Summer19UL{year2}{jerc_postfix}"
147147
jet_type = "AK4PFchs"
148148
elif cfg.x.run == 3:
149-
jerc_campaign = f"Winter{year2}Run3"
149+
jerc_campaign = f"Summer{year2}{jerc_postfix}_22Sep2023"
150150
jet_type = "AK4PFPuppi"
151151

152152
cfg.x.jec = DotDict.wrap({
@@ -219,8 +219,8 @@ def add_config(
219219
# https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=107
220220
# TODO: get jerc working for Run3
221221
cfg.x.jer = DotDict.wrap({
222-
"campaign": jerc_campaign if cfg.x.run == 2 else f"JR_{jerc_campaign}",
223-
"version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "V1"}[year],
222+
"campaign": jerc_campaign,
223+
"version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "V2"}[year],
224224
"jet_type": jet_type,
225225
})
226226

@@ -297,19 +297,6 @@ def add_config(
297297
# (used in the muon producer)
298298
cfg.x.muon_sf_names = ("NUM_TightRelIso_DEN_TightIDandIPCut", f"{cfg.x.cpn_tag}_UL")
299299

300-
# helper to add column aliases for both shifts of a source
301-
# TODO: switch to the columnflow function (but what happened to *selection_dependent*?)
302-
def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent: bool):
303-
304-
for direction in ["up", "down"]:
305-
shift = cfg.get_shift(od.Shift.join_name(shift_source, direction))
306-
# format keys and values
307-
inject_shift = lambda s: re.sub(r"\{([^_])", r"{_\1", s).format(**shift.__dict__)
308-
_aliases = {inject_shift(key): inject_shift(value) for key, value in aliases.items()}
309-
alias_type = "column_aliases_selection_dependent" if selection_dependent else "column_aliases"
310-
# extend existing or register new column aliases
311-
shift.set_aux(alias_type, shift.get_aux(alias_type, {})).update(_aliases)
312-
313300
# register shifts
314301
# TODO: make shifts year-dependent
315302
cfg.add_shift(name="nominal", id=0)
@@ -320,27 +307,28 @@ def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent
320307
cfg.add_shift(name="minbias_xs_up", id=7, type="shape")
321308
cfg.add_shift(name="minbias_xs_down", id=8, type="shape")
322309
add_shift_aliases(
310+
cfg,
323311
"minbias_xs",
324312
{
325313
"pu_weight": "pu_weight_{name}",
326314
"normalized_pu_weight": "normalized_pu_weight_{name}",
327315
},
328-
selection_dependent=False)
316+
)
329317
cfg.add_shift(name="top_pt_up", id=9, type="shape")
330318
cfg.add_shift(name="top_pt_down", id=10, type="shape")
331-
add_shift_aliases("top_pt", {"top_pt_weight": "top_pt_weight_{direction}"}, selection_dependent=False)
319+
add_shift_aliases(cfg, "top_pt", {"top_pt_weight": "top_pt_weight_{direction}"})
332320

333321
cfg.add_shift(name="e_sf_up", id=40, type="shape")
334322
cfg.add_shift(name="e_sf_down", id=41, type="shape")
335323
cfg.add_shift(name="e_trig_sf_up", id=42, type="shape")
336324
cfg.add_shift(name="e_trig_sf_down", id=43, type="shape")
337-
add_shift_aliases("e_sf", {"electron_weight": "electron_weight_{direction}"}, selection_dependent=False)
325+
add_shift_aliases(cfg, "e_sf", {"electron_weight": "electron_weight_{direction}"})
338326

339327
cfg.add_shift(name="mu_sf_up", id=50, type="shape")
340328
cfg.add_shift(name="mu_sf_down", id=51, type="shape")
341329
cfg.add_shift(name="mu_trig_sf_up", id=52, type="shape")
342330
cfg.add_shift(name="mu_trig_sf_down", id=53, type="shape")
343-
add_shift_aliases("mu_sf", {"muon_weight": "muon_weight_{direction}"}, selection_dependent=False)
331+
add_shift_aliases(cfg, "mu_sf", {"muon_weight": "muon_weight_{direction}"})
344332

345333
btag_uncs = [
346334
"hf", "lf", f"hfstats1_{year}", f"hfstats2_{year}",
@@ -350,12 +338,12 @@ def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent
350338
cfg.add_shift(name=f"btag_{unc}_up", id=100 + 2 * i, type="shape")
351339
cfg.add_shift(name=f"btag_{unc}_down", id=101 + 2 * i, type="shape")
352340
add_shift_aliases(
341+
cfg,
353342
f"btag_{unc}",
354343
{
355344
"normalized_btag_weight": f"normalized_btag_weight_{unc}_" + "{direction}",
356345
"normalized_njet_btag_weight": f"normalized_njet_btag_weight_{unc}_" + "{direction}",
357346
},
358-
selection_dependent=False,
359347
)
360348

361349
cfg.add_shift(name="mur_up", id=201, type="shape")
@@ -368,28 +356,40 @@ def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent
368356
cfg.add_shift(name="pdf_down", id=208, type="shape")
369357

370358
for unc in ["mur", "muf", "murf_envelope", "pdf"]:
371-
# add_shift_aliases(unc, {f"{unc}_weight": f"{unc}_weight_" + "{direction}"}, selection_dependent=False)
359+
# add_shift_aliases(cfg, unc, {f"{unc}_weight": f"{unc}_weight_" + "{direction}"})
372360
add_shift_aliases(
361+
cfg,
373362
unc,
374363
{f"normalized_{unc}_weight": f"normalized_{unc}_weight_" + "{direction}"},
375-
selection_dependent=False,
376364
)
377365

378366
with open(os.path.join(thisdir, "jec_sources.yaml"), "r") as f:
379367
all_jec_sources = yaml.load(f, yaml.Loader)["names"]
380368
for jec_source in cfg.x.jec["uncertainty_sources"]:
381369
idx = all_jec_sources.index(jec_source)
382-
cfg.add_shift(name=f"jec_{jec_source}_up", id=5000 + 2 * idx, type="shape")
383-
cfg.add_shift(name=f"jec_{jec_source}_down", id=5001 + 2 * idx, type="shape")
370+
cfg.add_shift(
371+
name=f"jec_{jec_source}_up",
372+
id=5000 + 2 * idx,
373+
type="shape",
374+
tags={"jec"},
375+
aux={"jec_source": jec_source},
376+
)
377+
cfg.add_shift(
378+
name=f"jec_{jec_source}_down",
379+
id=5001 + 2 * idx,
380+
type="shape",
381+
tags={"jec"},
382+
aux={"jec_source": jec_source},
383+
)
384384
add_shift_aliases(
385+
cfg,
385386
f"jec_{jec_source}",
386387
{"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"},
387-
selection_dependent=True,
388388
)
389389

390-
cfg.add_shift(name="jer_up", id=6000, type="shape", tags={"selection_dependent"})
391-
cfg.add_shift(name="jer_down", id=6001, type="shape", tags={"selection_dependent"})
392-
add_shift_aliases("jer", {"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"}, selection_dependent=True)
390+
cfg.add_shift(name="jer_up", id=6000, type="shape", tags={"jer"})
391+
cfg.add_shift(name="jer_down", id=6001, type="shape", tags={"jer"})
392+
add_shift_aliases(cfg, "jer", {"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"})
393393

394394
def make_jme_filename(jme_aux, sample_type, name, era=None):
395395
"""
@@ -409,11 +409,13 @@ def make_jme_filename(jme_aux, sample_type, name, era=None):
409409
json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c"
410410
corr_tag = f"{cfg.x.cpn_tag}_UL"
411411
elif cfg.x.run == 3:
412-
# TODO: Update when possible
413-
json_mirror = "/afs/desy.de/user/p/paaschal/public/mirrors/jsonpog-integration"
414-
corr_tag = f"{year}_Prompt"
412+
json_mirror = "/afs/cern.ch/user/m/mfrahm/public/mirrors/jsonpog-integration-f35ab53e"
413+
corr_tag = f"{year}_Summer22{jerc_postfix}"
415414

416415
cfg.x.external_files = DotDict.wrap({
416+
# pileup weight corrections
417+
"pu_sf": (f"{json_mirror}/POG/LUM/{corr_tag}/puWeights.json.gz", "v1"),
418+
417419
# jet energy correction
418420
"jet_jerc": (f"{json_mirror}/POG/JME/{corr_tag}/jet_jerc.json.gz", "v1"),
419421

@@ -431,21 +433,20 @@ def make_jme_filename(jme_aux, sample_type, name, era=None):
431433
})
432434

433435
# temporary fix due to missing corrections in run 3
436+
# electron, muon and met still missing, btag, and pu are TODO
434437
if cfg.x.run == 3:
438+
cfg.add_tag("skip_pu_weights")
439+
435440
cfg.add_tag("skip_btag_weights")
441+
cfg.x.external_files.pop("btag_sf_corr")
442+
436443
cfg.add_tag("skip_electron_weights")
437-
cfg.add_tag("skip_muon_weights")
438-
cfg.add_tag("skip_pu_weights")
439444
cfg.x.external_files.pop("electron_sf")
445+
446+
cfg.add_tag("skip_muon_weights")
440447
cfg.x.external_files.pop("muon_sf")
441-
cfg.x.external_files.pop("btag_sf_corr")
442-
cfg.x.external_files.pop("met_phi_corr")
443448

444-
# NOTE: we should mirror and gzip them
445-
if campaign.x.EE == "pre":
446-
cfg.x.external_files["muon_sf"] = ("https://gitlab.cern.ch/cms-muonPOG/muonefficiencies/-/raw/master/Run3/2022/2022_Z/ScaleFactors_Muon_Z_ID_ISO_2022_schemaV2.json", "v1") # noqa
447-
elif campaign.x.EE == "post":
448-
cfg.x.external_files["muon_sf"] = ("https://gitlab.cern.ch/cms-muonPOG/muonefficiencies/-/raw/master/Run3/2022_EE/2022_Z/ScaleFactors_Muon_Z_ID_ISO_2022_EE_schemaV2.json", "v1") # noqa
449+
cfg.x.external_files.pop("met_phi_corr")
449450

450451
cfg.x.met_filters = {
451452
"Flag.goodVertices",

hbw/config/datasets.py

+28-6
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@
5252
"data_e_e",
5353
"data_e_f",
5454
],
55+
}
56+
57+
data_egamma = {
5558
"2022preEE": [
5659
"data_egamma_c",
5760
"data_egamma_d",
@@ -170,7 +173,7 @@
170173
"qcd_mu_pt1000_pythia",
171174
],
172175
"2022postEE": [
173-
"qcd_mu_pt15to20_pythia", # 1 file not possible to open
176+
# "qcd_mu_pt15to20_pythia", # empty after selection
174177
"qcd_mu_pt20to30_pythia",
175178
"qcd_mu_pt30to50_pythia",
176179
"qcd_mu_pt50to80_pythia",
@@ -179,12 +182,12 @@
179182
"qcd_mu_pt170to300_pythia",
180183
"qcd_mu_pt300to470_pythia",
181184
"qcd_mu_pt470to600_pythia",
182-
"qcd_mu_pt600to800_pythia", # 1 file not possible to open
185+
"qcd_mu_pt600to800_pythia",
183186
"qcd_mu_pt800to1000_pythia",
184187
"qcd_mu_pt1000_pythia",
185188
],
186189
"2022preEE": [
187-
"qcd_mu_pt15to20_pythia", # 1 file not possible to open
190+
"qcd_mu_pt15to20_pythia",
188191
"qcd_mu_pt20to30_pythia",
189192
"qcd_mu_pt30to50_pythia",
190193
"qcd_mu_pt50to80_pythia",
@@ -193,7 +196,7 @@
193196
"qcd_mu_pt170to300_pythia",
194197
"qcd_mu_pt300to470_pythia",
195198
"qcd_mu_pt470to600_pythia",
196-
"qcd_mu_pt600to800_pythia", # 1 file not possible to open
199+
"qcd_mu_pt600to800_pythia",
197200
"qcd_mu_pt800to1000_pythia",
198201
"qcd_mu_pt1000_pythia",
199202
],
@@ -211,7 +214,22 @@
211214
"qcd_em_pt300toInf_pythia",
212215
],
213216
"2022postEE": [
214-
# empty for now
217+
# "qcd_em_pt10to30_pythia", # missing process + probably empty anyways
218+
# "qcd_em_pt30to50_pythia", # empty after selection
219+
"qcd_em_pt50to80_pythia",
220+
"qcd_em_pt80to120_pythia",
221+
"qcd_em_pt120to170_pythia",
222+
"qcd_em_pt170to300_pythia",
223+
"qcd_em_pt300toInf_pythia",
224+
],
225+
"2022preEE": [
226+
# "qcd_em_pt10to30_pythia", # missing process + probably empty anyways
227+
"qcd_em_pt30to50_pythia",
228+
"qcd_em_pt50to80_pythia",
229+
"qcd_em_pt80to120_pythia",
230+
"qcd_em_pt120to170_pythia",
231+
"qcd_em_pt170to300_pythia",
232+
"qcd_em_pt300toInf_pythia",
215233
],
216234
}
217235

@@ -286,8 +304,11 @@
286304
"ggHH_kl_2p45_kt_1_dl_hbbhww_powheg",
287305
"ggHH_kl_5_kt_1_dl_hbbhww_powheg",
288306
],
307+
"2022preEE": [
308+
"ggHH_kl_1_kt_1_dl_hbbhww_powheg",
309+
],
289310
"2022postEE": [
290-
# empty for now
311+
"ggHH_kl_1_kt_1_dl_hbbhww_powheg",
291312
],
292313
}
293314

@@ -363,6 +384,7 @@ def get_dataset_names(cpn_tag: int | str, as_list: bool = False) -> DotDict[str:
363384
dataset_names = DotDict.wrap(
364385
data_mu=data_mu.get(cpn_tag, []),
365386
data_e=data_e.get(cpn_tag, []),
387+
data_egamma=data_egamma.get(cpn_tag, []),
366388
tt=tt.get(cpn_tag, []),
367389
st=st.get(cpn_tag, []),
368390
w_lnu=w_lnu.get(cpn_tag, []),

hbw/config/defaults_and_groups.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def set_config_defaults_and_groups(config_inst):
142142
"signal": ["ggHH_*", "qqHH_*"], "gghh": ["ggHH_*"], "qqhh": ["qqHH_*"],
143143
}
144144
config_inst.x.process_groups["dmuch"] = ["data_mu"] + config_inst.x.process_groups["much"]
145-
config_inst.x.process_groups["dech"] = ["data_e"] + config_inst.x.process_groups["ech"]
145+
config_inst.x.process_groups["dech"] = ["data_e", "data_egamma"] + config_inst.x.process_groups["ech"]
146146

147147
# dataset groups for conveniently looping over certain datasets
148148
# (used in wrapper_factory and during plotting)
@@ -167,8 +167,8 @@ def set_config_defaults_and_groups(config_inst):
167167
"sl_ech": ["1e", "1e__resolved", "1e__boosted"],
168168
"sl_much_resolved": ["1mu__resolved", "1mu__resolved__1b", "1mu__resolved__2b"],
169169
"sl_ech_resolved": ["1e__resolved", "1e__resolved__1b", "1e__resolved__2b"],
170-
"sl_much_boosted": ["1mu__boosted", "1mu_boosted__1b", "1mu_boosted__2b"],
171-
"sl_ech_boosted": ["1e__boosted", "1e__boosted__1b", "1e__boosted__2b"],
170+
"sl_much_boosted": ["1mu__boosted"],
171+
"sl_ech_boosted": ["1e__boosted"],
172172
"default": ["incl", "1e", "1mu"],
173173
"test": ["incl", "1e"],
174174
"dilep": ["incl", "2e", "2mu", "emu"],
@@ -240,7 +240,7 @@ def set_config_defaults_and_groups(config_inst):
240240
"unstack_signal": {proc.name: {"unstack": True} for proc in config_inst.processes if "HH" in proc.name},
241241
"scale_signal": {
242242
proc.name: {"unstack": True, "scale": 10000}
243-
for proc in config_inst.processes if "HH" in proc.name
243+
for proc, _, _ in config_inst.walk_processes() if proc.has_tag("is_signal")
244244
},
245245
"dilep": {
246246
"ggHH_kl_0_kt_1_dl_hbbhww": {"scale": 10000, "unstack": True},

hbw/config/processes.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ def configure_hbw_processes(config: od.Config):
7878
id=31199,
7979
label="QCD Electron enriched",
8080
)
81+
elif qcd_em:
82+
qcd_ele = add_parent_process( # noqa
83+
config,
84+
[qcd_em],
85+
name="qcd_ele",
86+
id=31199,
87+
label="QCD Electron enriched",
88+
)
8189

8290
# custom v_lep process for ML Training, combining W+DY
8391
w_lnu = config.get_process("w_lnu")
@@ -121,7 +129,7 @@ def configure_hbw_processes(config: od.Config):
121129
sig.add_process(proc)
122130

123131
# add auxiliary information if process is signal
124-
for proc_inst in config.processes:
132+
for proc_inst, _, _ in config.walk_processes():
125133
is_signal = any([
126134
signal_tag in proc_inst.name
127135
for signal_tag in ("qqHH", "ggHH", "radion", "gravition")

hbw/config/variables.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -267,23 +267,23 @@ def add_variables(config: od.Config) -> None:
267267
deepjet_wps = config.x.btag_working_points.deepjet
268268
config.add_variable(
269269
name="n_deepjet_loose",
270-
expression=lambda events: ak.num(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1),
270+
expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1),
271271
aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
272272
binning=(7, -0.5, 6.5),
273273
x_title="Number of deepjets (loose WP)",
274274
discrete_x=True,
275275
)
276276
config.add_variable(
277277
name="n_deepjet_medium",
278-
expression=lambda events: ak.num(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1),
278+
expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1),
279279
aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
280280
binning=(7, -0.5, 6.5),
281281
x_title="Number of deepjets (medium WP)",
282282
discrete_x=True,
283283
)
284284
config.add_variable(
285285
name="n_deepjet_tight",
286-
expression=lambda events: ak.num(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1),
286+
expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1),
287287
aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
288288
binning=(7, -0.5, 6.5),
289289
x_title="Number of deepjets (tight WP)",

0 commit comments

Comments
 (0)