Skip to content

Commit d482b9a

Browse files
authored
Merge pull request #66 from uhh-cms/neutrino_reconstruction
Neutrino reconstruction
2 parents 5c03497 + cfb29af commit d482b9a

File tree

4 files changed

+224
-4
lines changed

4 files changed

+224
-4
lines changed

hbw/config/ml_variables.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def add_ml_variables(config: od.Config) -> None:
233233
name=f"mli_{obj}_{var}",
234234
expression=f"mli_{obj}_{var}",
235235
binning=default_var_binning[var],
236-
unit=default_var_unit.get(var, var),
236+
unit=default_var_unit.get(var, "1"),
237237
x_title="{obj} {var}".format(obj=obj, var=var),
238238
)
239239

@@ -243,7 +243,7 @@ def add_ml_variables(config: od.Config) -> None:
243243
name=f"mli_{obj}_{var}",
244244
expression=f"mli_{obj}_{var}",
245245
binning=default_var_binning[var],
246-
unit=default_var_unit.get(var, var),
246+
unit=default_var_unit.get(var, "1"),
247247
x_title="{obj} {var}".format(obj=obj, var=var),
248248
)
249249

@@ -253,6 +253,6 @@ def add_ml_variables(config: od.Config) -> None:
253253
name=f"mli_{obj}_{var}",
254254
expression=f"mli_{obj}_{var}",
255255
binning=default_var_binning[var],
256-
unit=default_var_unit.get(var, var),
256+
unit=default_var_unit.get(var, "1"),
257257
x_title="{obj} {var}".format(obj=obj, var=var),
258258
)

hbw/config/variables.py

+40
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from columnflow.columnar_util import EMPTY_FLOAT
1515
from hbw.util import call_once_on_config
16+
from hbw.config.styling import default_var_binning, default_var_unit
1617

1718

1819
@call_once_on_config()
@@ -138,6 +139,45 @@ def add_feature_variables(config: od.Config) -> None:
138139
)
139140

140141

142+
@call_once_on_config()
143+
def add_neutrino_variables(config: od.Config) -> None:
144+
"""
145+
Adds variables to a *config* that are produced as part of the `neutrino_reconstruction` producer.
146+
"""
147+
148+
for obj in ["Neutrino", "Neutrino1", "Neutrino2"]:
149+
# pt and phi should be the same as MET, mass should always be 0
150+
for var in ["pt", "eta", "phi", "mass"]:
151+
config.add_variable(
152+
name=f"{obj}_{var}",
153+
expression=f"{obj}.{var}",
154+
binning=default_var_binning[var],
155+
unit=default_var_unit.get(var, "1"),
156+
x_title="{obj} {var}".format(obj=obj, var=var),
157+
)
158+
159+
160+
@call_once_on_config()
161+
def add_top_reco_variables(config: od.Config) -> None:
162+
"""
163+
Adds variables to a *config* that are produced as part of the `top_reconstruction` producer.
164+
"""
165+
# add neutrino variables aswell since the neutrino needs to be reconstructed anyway
166+
add_neutrino_variables(config)
167+
168+
# add reconstructed top variables
169+
for obj in ["tlep_hyp1", "tlep_hyp2"]:
170+
# pt and phi should be the same as MET, mass should always be 0
171+
for var in ["pt", "eta", "phi", "mass"]:
172+
config.add_variable(
173+
name=f"{obj}_{var}",
174+
expression=f"{obj}.{var}",
175+
binning=default_var_binning[var],
176+
unit=default_var_unit.get(var, "1"),
177+
x_title="{obj} {var}".format(obj=obj, var=var),
178+
)
179+
180+
141181
@call_once_on_config()
142182
def add_variables(config: od.Config) -> None:
143183
"""

hbw/production/neutrino.py

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# coding: utf-8
2+
"""
3+
Producers for Neutrino reconstruction.
4+
"""
5+
6+
import functools
7+
8+
import law
9+
10+
from columnflow.production import Producer, producer
11+
from columnflow.util import maybe_import
12+
from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT
13+
14+
# from cmsdb.constants import m_w
15+
16+
from hbw.util import four_vec
17+
from hbw.production.prepare_objects import prepare_objects
18+
from hbw.config.variables import add_neutrino_variables, add_top_reco_variables
19+
20+
21+
np = maybe_import("numpy")
22+
ak = maybe_import("awkward")
23+
24+
logger = law.logger.get_logger(__name__)
25+
26+
27+
# helper
28+
set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32)
29+
30+
31+
@producer(
32+
uses=prepare_objects,
33+
produces=four_vec(["Neutrino", "Neutrino1", "Neutrino2"]),
34+
)
35+
def neutrino_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
36+
"""
37+
Producer to reconstruct a neutrino orignating from a leptonically decaying W boson.
38+
Assumes that Neutrino pt can be reconstructed via MET and that the W boson has been
39+
produced on-shell.
40+
41+
TODO: reference
42+
"""
43+
# add behavior and define new collections (e.g. Lepton)
44+
events = self[prepare_objects](events, **kwargs)
45+
46+
# TODO: might be outdated, should be defined in cmsdb
47+
w_mass = 80.379
48+
49+
# get input variables (assuming that there is only one lepton)
50+
E_l = events.Lepton.E[:, 0]
51+
pt_l = events.Lepton.pt[:, 0]
52+
pz_l = events.Lepton.pz[:, 0]
53+
pt_nu = events.MET.pt
54+
55+
delta_phi = abs(events.Lepton[:, 0].delta_phi(events.MET))
56+
mu = w_mass**2 / 2 + pt_nu * pt_l * np.cos(delta_phi)
57+
58+
# Neutrino pz will be calculated as: pz_nu = A +- sqrt(B-C)
59+
A = mu * pz_l / pt_l**2
60+
B = mu**2 * pz_l**2 / pt_l**4
61+
C = (E_l**2 * pt_nu**2 - mu**2) / pt_l**2
62+
63+
pz_nu_1 = ak.where(
64+
B - C >= 0,
65+
# solution is real
66+
A + np.sqrt(B - C),
67+
# complex solution -> take only the real part
68+
A,
69+
)
70+
71+
pz_nu_2 = ak.where(
72+
B - C >= 0,
73+
# solution is real
74+
A - np.sqrt(B - C),
75+
# complex solution -> take only the real part
76+
A,
77+
)
78+
79+
pz_nu_solutions = [pz_nu_1, pz_nu_2]
80+
81+
for i, pz_nu in enumerate(pz_nu_solutions, start=1):
82+
# convert to float64 to prevent rounding errors
83+
pt_nu = ak.values_astype(pt_nu, np.float64)
84+
pz_nu = ak.values_astype(pz_nu, np.float64)
85+
86+
# calculate Neutrino eta to define the Neutrino 4-vector
87+
p_nu_1 = np.sqrt(pt_nu**2 + pz_nu**2)
88+
eta_nu_1 = np.log((p_nu_1 + pz_nu) / (p_nu_1 - pz_nu)) / 2
89+
# store Neutrino 4 vector components
90+
events[f"Neutrino{i}"] = events.MET
91+
events = set_ak_column_f32(events, f"Neutrino{i}.eta", eta_nu_1)
92+
93+
# sanity check: Neutrino pz should be the same as pz_nu within rounding errors
94+
sanity_check_1 = ak.sum(abs(events[f"Neutrino{i}"].pz - pz_nu) > abs(events[f"Neutrino{i}"].pz) / 100)
95+
if sanity_check_1:
96+
logger.warning(
97+
"Number of events with Neutrino.pz that differs from pz_nu by more than 1 percent: "
98+
f"{sanity_check_1} (solution {i})",
99+
)
100+
101+
# sanity check: reconstructing W mass should always (if B-C>0) give the input W mass (80.4 GeV)
102+
W_on_shell = events[f"Neutrino{i}"] + events.Lepton[:, 0]
103+
sanity_check_2 = ak.sum(abs(ak.where(B - C >= 0, W_on_shell.mass, w_mass) - w_mass) > 1)
104+
if sanity_check_2:
105+
logger.warning(
106+
"Number of events with W mass from reconstructed Neutrino (real solutions only) that "
107+
f"differs by more than 1 GeV from the input W mass: {sanity_check_2} (solution {i})",
108+
)
109+
110+
# sanity check: for complex solutions, only the real part is considered -> both solutions should be identical
111+
sanity_check_3 = ak.sum(ak.where(B - C <= 0, events.Neutrino1.eta - events.Neutrino2.eta, 0))
112+
if sanity_check_3:
113+
raise Exception(
114+
"When finding complex neutrino solutions, both reconstructed Neutrinos should be identical",
115+
)
116+
117+
# combine both Neutrino solutions by taking the solution with smaller absolute eta
118+
events = set_ak_column_f32(
119+
events, "Neutrino",
120+
ak.where(abs(events.Neutrino1.eta) > abs(events.Neutrino2.eta), events.Neutrino2, events.Neutrino1),
121+
)
122+
return events
123+
124+
125+
@neutrino_reconstruction.init
126+
def neutrino_reconstruction_init(self: Producer) -> None:
127+
# add variable instances to config
128+
add_neutrino_variables(self.config_inst)
129+
130+
131+
@producer(
132+
uses={neutrino_reconstruction, prepare_objects} | four_vec("Bjet"),
133+
produces={neutrino_reconstruction} | four_vec({"tlep_hyp1", "tlep_hyp2"}),
134+
)
135+
def top_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
136+
"""
137+
Producer to reconstruct ttbar top quark masses using the neutrino_reconstruction Producer
138+
"""
139+
# add behavior and define new collections (e.g. Lepton)
140+
events = self[prepare_objects](events, **kwargs)
141+
142+
# run the neutrino reconstruction
143+
events = self[neutrino_reconstruction](events, **kwargs)
144+
145+
# object padding (there are some boosted events that only contain one Jet)
146+
events = set_ak_column(events, "Bjet", ak.pad_none(events.Bjet, 2))
147+
148+
dr_b1_lep = events.Bjet[:, 0].delta_r(events.Lepton[:, 0])
149+
dr_b2_lep = events.Bjet[:, 1].delta_r(events.Lepton[:, 0])
150+
151+
blep = ak.where(dr_b1_lep < dr_b2_lep, events.Bjet[:, 0], events.Bjet[:, 1])
152+
bhad = ak.where(dr_b1_lep > dr_b2_lep, events.Bjet[:, 0], events.Bjet[:, 1])
153+
154+
tlep_hyp1 = blep + events.Lepton[:, 0] + events.Neutrino
155+
tlep_hyp2 = bhad + events.Lepton[:, 0] + events.Neutrino
156+
157+
# events = set_ak_column_f32(events, "tlep_hyp1", tlep_hyp1)
158+
# events = set_ak_column_f32(events, "tlep_hyp2", tlep_hyp2)
159+
160+
# tlep vectors store columns (x, y, z, t), so set all 4-vec components by hand
161+
for var in ("pt", "eta", "phi", "mass"):
162+
events = set_ak_column_f32(events, f"tlep_hyp1.{var}", getattr(tlep_hyp1, var))
163+
events = set_ak_column_f32(events, f"tlep_hyp2.{var}", getattr(tlep_hyp2, var))
164+
165+
# fill nan/none values of all produced columns
166+
for route in self.produced_columns:
167+
# replace nan, none, and inf values with EMPTY_FLOAT
168+
col = route.apply(events)
169+
col = ak.fill_none(ak.nan_to_none(route.apply(events)), EMPTY_FLOAT)
170+
col = ak.where(np.isinf(col), EMPTY_FLOAT, col)
171+
172+
events = set_ak_column(events, route.string_column, col)
173+
174+
return events
175+
176+
177+
@neutrino_reconstruction.init
178+
def top_reconstruction_init(self: Producer) -> None:
179+
# add variable instances to config
180+
add_top_reco_variables(self.config_inst)

law.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ default_analysis: hbw.analysis.hbw_merged.hbw_merged
2626
default_config: c17
2727
default_dataset: ggHH_kl_1_kt_1_sl_hbbhww_powheg
2828

29-
production_modules: columnflow.production.{categories,processes,pileup,normalization,seeds}, hbw.production.{weights,features,ml_inputs,categories,gen_hbw_decay}, hbw.ml.stats
29+
production_modules: columnflow.production.{categories,processes,pileup,normalization,seeds}, hbw.production.{weights,features,ml_inputs,categories,gen_hbw_decay,neutrino}, hbw.ml.stats
3030
calibration_modules: columnflow.calibration.jets, hbw.calibration.default
3131
selection_modules: hbw.selection.{common,sl,dl}
3232
categorization_modules: hbw.selection.categories

0 commit comments

Comments
 (0)