Skip to content

Commit df0f707

Browse files
authored
Merge pull request #107 from uhh-cms/feature/calibrations_cleanup
Feature/calibrations cleanup
2 parents c88c9bc + 95c8198 commit df0f707

26 files changed

+1052
-188
lines changed

hbw/calibration/default.py

+138-52
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,21 @@
99
from columnflow.calibration import Calibrator, calibrator
1010
from columnflow.calibration.cms.met import met_phi
1111
from columnflow.calibration.cms.jets import jec, jer
12-
from columnflow.production.cms.seeds import deterministic_seeds
12+
from columnflow.calibration.cms.egamma import electrons
13+
from columnflow.production.cms.seeds import (
14+
deterministic_seeds, deterministic_electron_seeds, deterministic_event_seeds,
15+
)
16+
from columnflow.production.cms.supercluster_eta import electron_sceta
1317
from columnflow.util import maybe_import, try_float
1418
from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT
1519

1620
from hbw.util import MET_COLUMN
1721

1822
from hbw.calibration.jet import bjet_regression
23+
# from hbw.calibration.jet import (
24+
# fatjet_jec_data, fatjet_jec_total, fatjet_jer,
25+
# bjet_regression,
26+
# )
1927

2028
ak = maybe_import("awkward")
2129
np = maybe_import("numpy")
@@ -24,17 +32,49 @@
2432
logger = law.logger.get_logger(__name__)
2533

2634

35+
# customized electron calibrator (also needs deterministic event seeds...)
36+
electrons.deterministic_seed_index = 0
37+
38+
39+
@calibrator(
40+
version=1,
41+
uses={electron_sceta, deterministic_event_seeds, deterministic_electron_seeds},
42+
produces={deterministic_event_seeds, deterministic_electron_seeds},
43+
)
44+
def ele(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
45+
"""
46+
Electron calibrator, combining scale and resolution.
47+
"""
48+
# obtain the electron super cluster eta needed for the calibration
49+
events = self[electron_sceta](events, **kwargs)
50+
51+
events = self[deterministic_event_seeds](events, **kwargs)
52+
events = self[deterministic_electron_seeds](events, **kwargs)
53+
54+
# apply the electron calibration
55+
events = self[self.electron_calib_cls](events, **kwargs)
56+
return events
57+
58+
59+
@ele.init
60+
def ele_init(self: Calibrator) -> None:
61+
self.electron_calib_cls = electrons
62+
63+
self.uses |= {self.electron_calib_cls}
64+
self.produces |= {self.electron_calib_cls}
65+
66+
2767
@calibrator(
28-
# jec uncertainty_sources: set to None to use config default
29-
jec_sources=["Total"],
3068
version=1,
3169
# add dummy produces such that this calibrator will always be run when requested
3270
# (temporary workaround until init's are only run as often as necessary)
71+
# TODO: deterministic FatJet seeds
3372
produces={"FatJet.pt"},
3473
)
3574
def fatjet(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
3675
"""
3776
FatJet calibrator, combining JEC and JER.
77+
Uses as JER uncertainty either only "Total" for MC or no uncertainty for data.
3878
"""
3979
if self.task.local_shift != "nominal":
4080
raise Exception("FatJet Calibrator should not be run for shifts other than nominal")
@@ -53,42 +93,65 @@ def fatjet_init(self: Calibrator) -> None:
5393
# init only required for task itself
5494
return
5595

96+
# derive calibrators to add settings once
97+
flag = f"custom_fatjet_calibs_registered_{self.cls_name}"
98+
if not self.config_inst.x(flag, False):
99+
fatjet_jec_cls_dict = {
100+
"jet_name": "FatJet",
101+
"gen_jet_name": "GenJetAK8",
102+
# MET propagation is performed in AK4 jet calibrator; fatjet should never use any MET columns
103+
"propagate_met": False,
104+
"met_name": "DO_NOT_USE",
105+
"raw_met_name": "DO_NOT_USE",
106+
}
107+
fatjet_jer_cls_dict = fatjet_jec_cls_dict.copy()
108+
# NOTE: deterministic FatJet seeds are not yet possible to produce
109+
# fatjet_jer_cls_dict["deterministic_seed_index"] = 0
110+
111+
# fatjet_jec = jec.derive("fatjet_jec", cls_dict={
112+
# **fatjet_jec_cls_dict,
113+
# })
114+
self.config_inst.x.fatjet_jec_data_cls = jec.derive("fatjet_jec_data", cls_dict={
115+
**fatjet_jec_cls_dict,
116+
"data_only": True,
117+
"nominal_only": True,
118+
"uncertainty_sources": [],
119+
})
120+
self.config_inst.x.fatjet_jec_total_cls = jec.derive("fatjet_jec_total", cls_dict={
121+
**fatjet_jec_cls_dict,
122+
"mc_only": True,
123+
"nominal_only": True,
124+
"uncertainty_sources": ["Total"],
125+
})
126+
self.config_inst.x.fatjet_jer_cls = jer.derive("deterministic_fatjet_jer", cls_dict=fatjet_jer_cls_dict)
127+
128+
# change the flag
129+
self.config_inst.set_aux(flag, True)
130+
56131
if not getattr(self, "dataset_inst", None):
57132
return
58133

59-
# list of calibrators to apply (in that order)
60-
self.calibrators = []
61-
62-
fatjet_jec_cls_dict = {
63-
"jet_name": "FatJet",
64-
"gen_jet_name": "GenJetAK8",
65-
# MET propagation is performed in AK4 jet calibrator; fatjet should never use any MET columns
66-
"propagate_met": False,
67-
"met_name": "DO_NOT_USE",
68-
"raw_met_name": "DO_NOT_USE",
69-
}
70-
fatjet_jer_cls_dict = fatjet_jec_cls_dict.copy()
71-
# NOTE: deterministic FatJet seeds are not yet possible to produce
72-
# fatjet_jer_cls_dict["deterministic_seed_index"] = 0
73-
74-
uncertainty_sources = [] if self.dataset_inst.is_data else self.jec_sources
75-
jec_cls_name = f"fatjet_jec{'_nominal' if uncertainty_sources == [] else ''}"
76-
self.fatjet_jec_cls = jec.derive(jec_cls_name, cls_dict={
77-
**fatjet_jec_cls_dict,
78-
"uncertainty_sources": uncertainty_sources,
79-
})
80-
self.fatjet_jer_cls = jer.derive("deterministic_fatjet_jer", cls_dict=fatjet_jer_cls_dict)
134+
# chose the JEC and JER calibrators based on dataset instance
135+
self.fatjet_jec_cls = (
136+
self.config_inst.x.fatjet_jec_total_cls if self.dataset_inst.is_mc
137+
else self.config_inst.x.fatjet_jec_data_cls
138+
)
139+
self.fatjet_jer_cls = self.config_inst.x.fatjet_jer_cls
81140

82141
self.uses |= {self.fatjet_jec_cls, self.fatjet_jer_cls}
83142
self.produces |= {self.fatjet_jec_cls, self.fatjet_jer_cls}
84143

85144

145+
fatjet_test = fatjet.derive("fatjet_test")
146+
147+
86148
@calibrator(
87149
uses={deterministic_seeds, MET_COLUMN("{pt,phi}")},
88150
produces={deterministic_seeds},
89151
# jec uncertainty_sources: set to None to use config default
90152
jec_sources=["Total"],
91153
bjet_regression=True,
154+
skip_jer=False,
92155
version=1,
93156
)
94157
def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
@@ -126,51 +189,74 @@ def jet_base_init(self: Calibrator) -> None:
126189
# init only required for task itself
127190
return
128191

192+
# derive calibrators to add settings once
193+
flag = f"custom_jet_calibs_registered_{self.cls_name}"
194+
if not self.config_inst.x(flag, False):
195+
met_name = self.config_inst.x.met_name
196+
raw_met_name = self.config_inst.x.raw_met_name
197+
198+
jec_cls_kwargs = {
199+
"nominal_only": True,
200+
"met_name": met_name,
201+
"raw_met_name": raw_met_name,
202+
}
203+
204+
# jec calibrators
205+
self.config_inst.x.calib_jec_full_cls = jec.derive("jec_full", cls_dict={
206+
**jec_cls_kwargs,
207+
"mc_only": True,
208+
"uncertainty_sources": self.jec_sources,
209+
})
210+
self.config_inst.x.calib_jec_data_cls = jec.derive("jec_data", cls_dict={
211+
**jec_cls_kwargs,
212+
"data_only": True,
213+
"uncertainty_sources": [],
214+
})
215+
# version of jer that uses the first random number from deterministic_seeds
216+
self.config_inst.x.calib_deterministic_jer_cls = jer.derive("deterministic_jer", cls_dict={
217+
"deterministic_seed_index": 0,
218+
"met_name": met_name,
219+
})
220+
# derive met_phi calibrator (currently only used in run 2)
221+
self.config_inst.x.calib_met_phi_cls = met_phi.derive("met_phi", cls_dict={
222+
"met_name": met_name,
223+
})
224+
# change the flag
225+
self.config_inst.set_aux(flag, True)
226+
129227
if not getattr(self, "dataset_inst", None):
130228
return
131229

132-
met_name = self.config_inst.x.met_name
133-
raw_met_name = self.config_inst.x.raw_met_name
134-
135230
# list of calibrators to apply (in that order)
136231
self.calibrators = []
137232

138-
uncertainty_sources = [] if self.dataset_inst.is_data else self.jec_sources
139-
jec_cls_name = f"ak4_jec{'_nominal' if uncertainty_sources == [] else ''}"
140-
141-
jec_cls = jec.derive(
142-
jec_cls_name,
143-
cls_dict={
144-
"uncertainty_sources": uncertainty_sources,
145-
"met_name": met_name,
146-
"raw_met_name": raw_met_name,
147-
},
233+
# JEC
234+
jec_cls = (
235+
self.config_inst.x.calib_jec_full_cls if self.dataset_inst.is_mc
236+
else self.config_inst.x.calib_jec_data_cls
148237
)
149238
self.calibrators.append(jec_cls)
150239

240+
# BJet regression
151241
if self.bjet_regression:
152242
self.calibrators.append(bjet_regression)
153243

154-
# run JER only on MC
155-
if self.dataset_inst.is_mc:
156-
# version of jer that uses the first random number from deterministic_seeds
157-
deterministic_jer_cls = jer.derive(
158-
"deterministic_jer",
159-
cls_dict={
160-
"deterministic_seed_index": 0,
161-
"met_name": met_name,
162-
},
163-
)
164-
self.calibrators.append(deterministic_jer_cls)
244+
# JER (only for MC)
245+
jer_cls = self.config_inst.x.calib_deterministic_jer_cls
246+
if self.dataset_inst.is_mc and not self.skip_jer:
247+
self.calibrators.append(jer_cls)
165248

249+
# MET phi (only in run 2)
166250
if self.config_inst.x.run == 2:
167-
# derive met_phi calibrator (currently only for run 2)
168-
met_phi_cls = met_phi.derive("met_phi", cls_dict={"met_name": met_name})
251+
met_phi_cls = self.config_inst.x.calib_met_phi_cls
169252
self.calibrators.append(met_phi_cls)
170253

171254
self.uses |= set(self.calibrators)
172255
self.produces |= set(self.calibrators)
173256

174257

175-
skip_jecunc = jet_base.derive("skip_jecunc", cls_dict=dict(bjet_regression=False))
258+
jec_only = jet_base.derive("jec_only", cls_dict=dict(bjet_regression=False, skip_jer=True))
259+
skip_jer = jet_base.derive("skip_jer", cls_dict=dict(bjet_regression=True, skip_jer=True))
260+
no_breg = jet_base.derive("no_breg", cls_dict=dict(bjet_regression=False))
176261
with_b_reg = jet_base.derive("with_b_reg", cls_dict=dict(bjet_regression=True))
262+
with_b_reg_test = jet_base.derive("with_b_reg_test", cls_dict=dict(bjet_regression=True))

hbw/calibration/jet.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
from columnflow.util import maybe_import, try_float
1212
from columnflow.columnar_util import set_ak_column
13-
from columnflow.calibration.cms.jets import jec
1413
from columnflow.calibration import calibrator, Calibrator
1514

1615

@@ -24,9 +23,9 @@
2423

2524
set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32)
2625

27-
28-
# custom jec calibrator that only runs nominal correction
29-
jec_nominal = jec.derive("jec_nominal", cls_dict={"uncertainty_sources": ["Total"]})
26+
#
27+
# BJet calibrator
28+
#
3029

3130

3231
@calibrator(

hbw/categorization/categories.py

+28-4
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from columnflow.selection import SelectionResult
1414
from columnflow.columnar_util import has_ak_column, optional_column
1515

16-
from hbw.util import MET_COLUMN
16+
from hbw.util import MET_COLUMN, BTAG_COLUMN
1717

1818
np = maybe_import("numpy")
1919
ak = maybe_import("awkward")
@@ -125,6 +125,23 @@ def catid_lep(
125125
catid_emu = catid_lep.derive("catid_emu", cls_dict={"n_electron": 1, "n_muon": 1})
126126

127127

128+
@categorizer(
129+
uses={"{Muon,Electron}.pt"},
130+
)
131+
def catid_ge3lep(
132+
self: Categorizer, events: ak.Array, results: SelectionResult | None = None, **kwargs,
133+
) -> tuple[ak.Array, ak.Array]:
134+
if results:
135+
electron = events.Electron[results.objects.Electron.Electron]
136+
muon = events.Muon[results.objects.Muon.Muon]
137+
else:
138+
electron = events.Electron
139+
muon = events.Muon
140+
141+
mask = ak.sum(electron.pt > 0, axis=-1) + ak.sum(muon.pt > 0, axis=-1) >= 3
142+
return events, mask
143+
144+
128145
#
129146
# Categorizer for ABCD (either during cf.SelectEvents or cf.ProduceColumns)
130147
#
@@ -246,9 +263,6 @@ def catid_njet3(
246263
return events, mask
247264

248265

249-
from hbw.util import BTAG_COLUMN
250-
251-
252266
@categorizer(uses={BTAG_COLUMN("Jet")})
253267
def catid_1b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
254268
btag_column = self.config_inst.x.btag_column
@@ -301,3 +315,13 @@ def dnn_mask(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, a
301315
outp_mask = outp_mask & mask
302316

303317
return events, outp_mask
318+
319+
320+
@categorizer(uses={"{Electron,Muon}.{pt,eta,phi,mass}", "mll"})
321+
def mask_fn_highpt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
322+
"""
323+
Categorizer that selects events in the phase space that we understand.
324+
Needs to be used in combination with a Producer that defines the leptons.
325+
"""
326+
mask = (events.Lepton[:, 0].pt > 70) & (events.Lepton[:, 1].pt > 50) & (events.mll > 20)
327+
return events, mask

hbw/config/categories.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
# coding: utf-8
23

34
"""
@@ -136,7 +137,7 @@ def add_mll_categories(config: od.Config) -> None:
136137
def add_lepton_categories(config: od.Config) -> None:
137138
config.x.lepton_channels = {
138139
"sl": ("1e", "1mu"),
139-
"dl": ("2e", "2mu", "emu"),
140+
"dl": ("2e", "2mu", "emu", "ge3lep"),
140141
}[config.x.lepton_tag]
141142

142143
cat_1e = config.add_category( # noqa: F841
@@ -174,6 +175,13 @@ def add_lepton_categories(config: od.Config) -> None:
174175
label="1 Electron 1 Muon",
175176
)
176177

178+
cat_emu = config.add_category( # noqa: F841
179+
name="ge3lep",
180+
id=60,
181+
selection="catid_ge3lep",
182+
label=r"$N_{lep} \geq 3$",
183+
)
184+
177185

178186
@call_once_on_config()
179187
def add_njet_categories(config: od.Config) -> None:

0 commit comments

Comments
 (0)