Skip to content

Commit b24a07e

Browse files
committed
first implementation of Aachen DL selection
1 parent 9818238 commit b24a07e

File tree

3 files changed

+620
-3
lines changed

3 files changed

+620
-3
lines changed

hbw/selection/common.py

+324-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
import law
1313

14-
from columnflow.util import maybe_import
15-
from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT, get_ak_routes
14+
from columnflow.util import maybe_import, DotDict
15+
from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT, get_ak_routes, optional_column as optional
1616
from columnflow.production.util import attach_coffea_behavior
1717

1818
from columnflow.selection import Selector, SelectionResult, selector
@@ -60,6 +60,328 @@ def check_columns(
6060
raise Exception("This Selector is only for checking nano content")
6161

6262

63+
@selector(
64+
uses=four_vec("Jet", {"jetId"}) | {optional("Jet.puId")},
65+
exposed=True,
66+
b_tagger="deepjet",
67+
btag_wp="medium",
68+
)
69+
def jet_selection(
70+
self: Selector,
71+
events: ak.Array,
72+
lepton_results: SelectionResult,
73+
stats: defaultdict,
74+
**kwargs,
75+
) -> Tuple[ak.Array, SelectionResult]:
76+
"""
77+
Central definition of Jets in HH(bbWW)
78+
"""
79+
80+
steps = DotDict()
81+
82+
# assign local index to all Jets
83+
events = set_ak_column(events, "local_index", ak.local_index(events.Jet))
84+
85+
# default jet definition
86+
jet_mask = (
87+
(events.Jet.pt >= 25) &
88+
(abs(events.Jet.eta) <= 2.4) &
89+
(events.Jet.jetId >= 2) & # 1: loose, 2: tight, 4: isolated, 6: tight+isolated
90+
ak.all(events.Jet.metric_table(lepton_results.x.lepton) > 0.4, axis=2)
91+
)
92+
93+
# apply loose Jet puId to jets with pt below 50 GeV (not in Run3 samples so skip this for now)
94+
if self.config_inst.x.run == 2:
95+
jet_pu_mask = (events.Jet.puId >= 4) | (events.Jet.pt > 50)
96+
jet_mask = jet_mask & jet_pu_mask
97+
98+
# get the jet indices for pt-sorting of jets
99+
jet_indices = masked_sorted_indices(jet_mask, events.Jet.pt)
100+
101+
# add jet steps
102+
events = set_ak_column(events, "cutflow.n_jet", ak.sum(jet_mask, axis=1))
103+
steps["nJet1"] = events.cutflow.n_jet >= 1
104+
steps["nJet2"] = events.cutflow.n_jet >= 2
105+
steps["nJet3"] = events.cutflow.n_jet >= 3
106+
steps["nJet4"] = events.cutflow.n_jet >= 4
107+
108+
# define btag mask
109+
wp_score = self.config_inst.x.btag_working_points[self.config_inst.x.b_tagger][self.config_inst.x.btag_wp]
110+
b_score = events.Jet[self.config_inst.x.btag_column]
111+
btag_mask = (jet_mask) & (b_score >= wp_score)
112+
113+
# add btag steps
114+
events = set_ak_column(events, "cutflow.n_btag", ak.sum(btag_mask, axis=1))
115+
steps["nBjet1"] = events.cutflow.n_btag >= 1
116+
steps["nBjet2"] = events.cutflow.n_btag >= 2
117+
118+
# define b-jets as the two b-score leading jets, b-score sorted
119+
bjet_indices = masked_sorted_indices(jet_mask, b_score)[:, :2]
120+
121+
# define lightjets as all non b-jets, pt-sorted
122+
b_idx = ak.fill_none(ak.pad_none(bjet_indices, 2), -1)
123+
lightjet_indices = jet_indices[(jet_indices != b_idx[:, 0]) & (jet_indices != b_idx[:, 1])]
124+
125+
# build and return selection results plus new columns
126+
return events, SelectionResult(
127+
steps=steps,
128+
objects={
129+
"Jet": {
130+
"Jet": jet_indices,
131+
"Bjet": bjet_indices,
132+
"Lightjet": lightjet_indices,
133+
},
134+
},
135+
aux={
136+
"jet_mask": jet_mask,
137+
"n_central_jets": ak.num(jet_indices),
138+
},
139+
)
140+
141+
142+
@jet_selection.init
143+
def jet_selection_init(self: Selector) -> None:
144+
# set the main b_tagger + working point as defined from the selector
145+
self.config_inst.x.b_tagger = self.b_tagger
146+
self.config_inst.x.btag_wp = self.btag_wp
147+
148+
self.btag_column = self.config_inst.x.btag_column = {
149+
"deepjet": "btagDeepFlavB",
150+
"particlenet": "btagPNetB",
151+
}.get(self.config_inst.x.b_tagger, self.config_inst.x.b_tagger)
152+
153+
self.uses.add(f"Jet.{self.config_inst.x.btag_column}")
154+
155+
# update selector steps labels
156+
self.config_inst.x.selector_step_labels = self.config_inst.x("selector_step_labels", {})
157+
self.config_inst.x.selector_step_labels.update({
158+
"nJet1": r"$N_{jets}^{AK4} \geq 1$",
159+
"nJet2": r"$N_{jets}^{AK4} \geq 2$",
160+
"nJet3": r"$N_{jets}^{AK4} \geq 3$",
161+
"nJet4": r"$N_{jets}^{AK4} \geq 4$",
162+
"nBjet1": r"$N_{jets}^{BTag} \geq 1$",
163+
"nBjet2": r"$N_{jets}^{BTag} \geq 2$",
164+
})
165+
166+
if self.config_inst.x("do_cutflow_features", False):
167+
# add cutflow features to *produces* only when requested
168+
self.produces.add("cutflow.n_jet")
169+
self.produces.add("cutflow.n_btag")
170+
171+
# create variable instances
172+
self.config_inst.add_variable(
173+
name="cf_n_jet",
174+
expression="cutflow.n_jet",
175+
binning=(7, -0.5, 6.5),
176+
x_title="Number of jets",
177+
discrete_x=True,
178+
)
179+
self.config_inst.add_variable(
180+
name="cf_n_btag",
181+
expression="cutflow.n_btag",
182+
binning=(7, -0.5, 6.5),
183+
x_title=f"Number of b-tagged jets ({self.b_tagger}, {self.btag_wp} WP)",
184+
discrete_x=True,
185+
)
186+
187+
188+
@selector(
189+
uses=(
190+
four_vec("Electron", {
191+
"dxy", "dz", "miniPFRelIso_all", "sip3d", "cutBased", "lostHits", # Electron Preselection
192+
"mvaIso_WP90", # used as replacement for "mvaNoIso_WPL" in Preselection
193+
"mvaTTH", "jetRelIso", # cone-pt
194+
"deltaEtaSC", "sieie", "hoe", "eInvMinusPInv", "convVeto", "jetIdx", # Fakeable Electron
195+
}) |
196+
four_vec("Muon", {
197+
"dxy", "dz", "miniPFRelIso_all", "sip3d", "looseId", # Muon Preselection
198+
"mediumId", "mvaTTH", "jetRelIso", # cone-pt
199+
"jetIdx", # Fakeable Muon
200+
}) |
201+
four_vec("Tau", {
202+
"dz", "idDeepTau2017v2p1VSe", "idDeepTau2017v2p1VSmu", "idDeepTau2017v2p1VSjet",
203+
}) | {
204+
jet_selection, # the jet_selection init needs to be run to set the correct b_tagger
205+
}
206+
),
207+
produces={"Electron.cone_pt", "Muon.cone_pt"},
208+
)
209+
def lepton_definition(
210+
self: Selector,
211+
events: ak.Array,
212+
stats: defaultdict,
213+
synchronization: bool = True,
214+
**kwargs,
215+
) -> Tuple[ak.Array, SelectionResult]:
216+
"""
217+
Central definition of Leptons in HH(bbWW)
218+
"""
219+
# initialize dicts for the selection steps
220+
steps = DotDict()
221+
222+
# reconstruct relevant variables
223+
events = set_ak_column(events, "Electron.cone_pt", ak.where(
224+
events.Electron.mvaTTH > 0.30,
225+
events.Electron.pt,
226+
0.9 * events.Electron.pt * (1.0 + events.Electron.jetRelIso),
227+
))
228+
events = set_ak_column(events, "Muon.cone_pt", ak.where(
229+
(events.Muon.mediumId & (events.Muon.mvaTTH > 0.50)),
230+
events.Muon.pt,
231+
0.9 * events.Muon.pt * (1.0 + events.Muon.jetRelIso),
232+
))
233+
234+
electron = events.Electron
235+
muon = events.Muon
236+
237+
# preselection masks
238+
e_mask_loose = (
239+
(electron.cone_pt >= 7) &
240+
(abs(electron.eta) <= 2.5) &
241+
(abs(electron.dxy) <= 0.05) &
242+
(abs(electron.dz) <= 0.1) &
243+
(electron.sip3d <= 8) &
244+
(electron.mvaIso_WP90) & # TODO: replace when possible
245+
# (electron.mvaNoIso_WPL) & # missing
246+
(electron.lostHits <= 1)
247+
)
248+
mu_mask_loose = (
249+
(muon.cone_pt >= 5) &
250+
(abs(muon.eta) <= 2.4) &
251+
(abs(muon.dxy) <= 0.05) &
252+
(abs(muon.dz) <= 0.1) &
253+
(muon.miniPFRelIso_all <= 0.4) &
254+
(muon.sip3d <= 8) &
255+
(muon.looseId)
256+
)
257+
258+
# lepton invariant mass cuts
259+
loose_leptons = ak.concatenate([
260+
events.Electron[e_mask_loose] * 1,
261+
events.Muon[mu_mask_loose] * 1,
262+
], axis=1)
263+
264+
lepton_pairs = ak.combinations(loose_leptons, 2)
265+
l1, l2 = ak.unzip(lepton_pairs)
266+
lepton_pairs["m_inv"] = (l1 + l2).mass
267+
268+
steps["ll_lowmass_veto"] = ~ak.any((lepton_pairs.m_inv < 12), axis=1)
269+
steps["ll_zmass_veto"] = ~ak.any((lepton_pairs.m_inv >= 81.2) & (lepton_pairs.m_inv <= 101.2), axis=1)
270+
271+
# get the correct btag WPs and column from the config (as setup by jet_selection)
272+
btag_wp_score = self.config_inst.x.btag_working_points[self.config_inst.x.b_tagger][self.config_inst.x.btag_wp]
273+
btag_tight_score = self.config_inst.x.btag_working_points[self.config_inst.x.b_tagger]["tight"]
274+
btag_column = self.config_inst.x.btag_column
275+
276+
# TODO: I am not sure if the lepton.matched_jet is working as intended
277+
# TODO: fakeable masks seem to be too tight
278+
279+
# fakeable masks
280+
e_mask_fakeable = (
281+
e_mask_loose &
282+
(
283+
(abs(electron.eta + electron.deltaEtaSC) > 1.479) & (electron.sieie <= 0.030) |
284+
(abs(electron.eta + electron.deltaEtaSC) <= 1.479) & (electron.sieie <= 0.011)
285+
) &
286+
(electron.hoe <= 0.10) &
287+
(electron.eInvMinusPInv >= -0.04) &
288+
(electron.convVeto) &
289+
(electron.lostHits == 0) &
290+
((electron.mvaTTH >= 0.30) | (electron.mvaIso_WP90)) &
291+
(ak.all(electron.matched_jet[btag_column] <= btag_wp_score, axis=1)) &
292+
((electron.mvaTTH >= 0.30) | (electron.jetRelIso < 0.70))
293+
)
294+
295+
mu_mask_fakeable = (
296+
mu_mask_loose &
297+
(muon.cone_pt >= 10) &
298+
(
299+
((muon.mvaTTH < 0.30) & (ak.all(muon.matched_jet[btag_column] <= btag_tight_score, axis=1))) |
300+
((muon.mvaTTH >= 0.30) & (ak.all(muon.matched_jet[btag_column] <= btag_wp_score, axis=1)))
301+
) &
302+
# missing: DeepJet of nearby jet
303+
((muon.mvaTTH >= 0.50) | (muon.jetRelIso < 0.80))
304+
)
305+
306+
# tight masks
307+
e_mask_tight = (
308+
e_mask_fakeable &
309+
(electron.mvaTTH >= 0.30)
310+
)
311+
mu_mask_tight = (
312+
mu_mask_fakeable &
313+
(muon.mvaTTH >= 0.50) &
314+
(muon.mediumId)
315+
)
316+
317+
# tau veto mask (only needed in SL?)
318+
tau_mask_veto = (
319+
(abs(events.Tau.eta) < 2.3) &
320+
# (abs(events.Tau.dz) < 0.2) &
321+
(events.Tau.pt > 20.0) &
322+
(events.Tau.idDeepTau2017v2p1VSe >= 4) & # 4: VLoose
323+
(events.Tau.idDeepTau2017v2p1VSmu >= 8) & # 8: Tight
324+
(events.Tau.idDeepTau2017v2p1VSjet >= 2) # 2: VVLoose
325+
)
326+
327+
# store number of Loose/Fakeable/Tight electrons/muons/taus as cutflow variables
328+
events = set_ak_column(events, "cutflow.n_loose_electron", ak.sum(e_mask_loose, axis=1))
329+
events = set_ak_column(events, "cutflow.n_loose_muon", ak.sum(mu_mask_loose, axis=1))
330+
events = set_ak_column(events, "cutflow.n_fakeable_electron", ak.sum(e_mask_fakeable, axis=1))
331+
events = set_ak_column(events, "cutflow.n_fakeable_muon", ak.sum(mu_mask_fakeable, axis=1))
332+
events = set_ak_column(events, "cutflow.n_tight_electron", ak.sum(e_mask_tight, axis=1))
333+
events = set_ak_column(events, "cutflow.n_tight_muon", ak.sum(mu_mask_tight, axis=1))
334+
events = set_ak_column(events, "cutflow.n_veto_tau", ak.sum(tau_mask_veto, axis=1))
335+
336+
# create the SelectionResult
337+
lepton_results = SelectionResult(
338+
steps=steps,
339+
objects={
340+
"Electron": {
341+
"LooseElectron": masked_sorted_indices(e_mask_loose, electron.pt),
342+
"FakeableElectron": masked_sorted_indices(e_mask_fakeable, electron.pt),
343+
"TightElectron": masked_sorted_indices(e_mask_tight, electron.pt),
344+
},
345+
"Muon": {
346+
"LooseMuon": masked_sorted_indices(mu_mask_loose, muon.pt),
347+
"FakeableMuon": masked_sorted_indices(mu_mask_fakeable, muon.pt),
348+
"TightMuon": masked_sorted_indices(mu_mask_tight, muon.pt),
349+
},
350+
"Tau": {"VetoTau": masked_sorted_indices(tau_mask_veto, events.Tau.pt)},
351+
},
352+
aux={
353+
"mu_mask_fakeable": mu_mask_fakeable,
354+
"e_mask_fakeable": e_mask_fakeable,
355+
"mu_mask_tight": mu_mask_tight,
356+
"e_mask_tight": e_mask_tight,
357+
},
358+
)
359+
360+
return events, lepton_results
361+
362+
363+
@lepton_definition.init
364+
def lepton_definition_init(self: Selector) -> None:
365+
# update selector steps labels
366+
self.config_inst.x.selector_step_labels = self.config_inst.x("selector_step_labels", {})
367+
self.config_inst.x.selector_step_labels.update({
368+
"ll_lowmass_veto": r"$m_{ll} > 12 GeV$",
369+
"ll_zmass_veto": r"$|m_{ll} - m_{Z}| > 10 GeV$",
370+
})
371+
372+
if self.config_inst.x("do_cutflow_features", False):
373+
# add cutflow features to *produces* only when requested
374+
self.produces.add("cutflow.n_loose_electron")
375+
self.produces.add("cutflow.n_loose_muon")
376+
self.produces.add("cutflow.n_fakeable_electron")
377+
self.produces.add("cutflow.n_fakeable_muon")
378+
self.produces.add("cutflow.n_tight_electron")
379+
self.produces.add("cutflow.n_tight_muon")
380+
self.produces.add("cutflow.n_veto_tau")
381+
382+
# TODO: add cutflow variables aswell
383+
384+
63385
@selector(
64386
uses=four_vec("Jet", {"btagDeepFlavB"}),
65387
exposed=False,

hbw/selection/cutflow_features.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,15 @@
2727
# skip the checking existence of used/produced columns for now because some columns are not there
2828
check_used_columns=False,
2929
check_produced_columns=False,
30-
3130
)
3231
def cutflow_features(self: Producer, events: ak.Array, results: SelectionResult, **kwargs) -> ak.Array:
32+
"""
33+
Producer to produce columns as part of SelectEvents. This mostly just applies object definitions
34+
to the nano and stores the resulting objects with the "cutflow." prefix.
35+
All columns of interested are then stored by adding them to the *produces*.
3336
37+
NOTE: probably needs to be updated since selection has been reworked.
38+
"""
3439
# apply event results to objects and define objects in a convenient way for reconstructing variables
3540
# but create temporary ak.Array to not override objects in events
3641
arr = self[prepare_objects](events, results)

0 commit comments

Comments
 (0)