Skip to content

Commit 296cc98

Browse files
author
Lara Sophie Markus
committed
cleanup and fixes in postfit plotting
1 parent 13970d5 commit 296cc98

File tree

4 files changed

+85
-55
lines changed

4 files changed

+85
-55
lines changed

hbw/config/defaults_and_groups.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,8 @@ def set_config_defaults_and_groups(config_inst):
151151
"h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
152152
],
153153
"test_postfit": [
154-
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
155-
"hh_ggf_hbb_hww2l2nu_kl1_kt1",
154+
# "hh_vbf_hbb_hww2l2nu",
155+
"hh_ggf_hbb_hww2l2nu",
156156
"st",
157157
"tt",
158158
"dy",
@@ -386,8 +386,8 @@ def set_config_defaults_and_groups(config_inst):
386386
for proc, _, _ in config_inst.walk_processes() if proc.has_tag("is_signal")
387387
},
388388
"dilep": {
389-
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1": {"scale": 90000, "unstack": True},
390-
"hh_ggf_hbb_hww2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
389+
"hh_vbf_hbb_hww2l2nu": {"scale": 90000, "unstack": True},
390+
"hh_ggf_hbb_hww2l2nu": {"scale": 10000, "unstack": True},
391391
},
392392
"dileptest": {
393393
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},

hbw/config/styling.py

+2
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,9 @@ def stylize_processes(config: od.Config) -> None:
203203

204204
# unstack signal in plotting
205205
if "hh_" in proc.name.lower():
206+
proc.has_tag("is_signal")
206207
proc.unstack = True
208+
proc.scale = "stack"
207209

208210
# labels used for ML categories
209211
proc.x.ml_label = ml_labels.get(proc.name, proc.name)

hbw/tasks/inference.py

+10
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,9 @@ def run(self):
508508
# combine category names with card fn to a single string
509509
datacards = ",".join([f"{cat_name}=$CARDS_PATH/{card_fn}" for cat_name, card_fn in zip(cat_names, card_fns)])
510510

511+
# # name of the output root file that contains the Pre+Postfit shapes
512+
# output_file = ""
513+
511514
base_cmd = f"export CARDS_PATH={cards_path}" + "\n"
512515

513516
print("\n\n")
@@ -541,4 +544,11 @@ def run(self):
541544
f"--order-by-impact"
542545
)
543546
print(cmd, "\n\n")
547+
548+
# running PreAndPostfitShapes for Pre+Postfit plots
549+
cmd = base_cmd + (
550+
f"law run PreAndPostFitShapes --version {identifier} --datacards {datacards} "
551+
# f"--output-name {output_file}"
552+
)
553+
print(cmd, "\n\n")
544554
output["FitDiagnostics"].dump(cmd, formatter="text")

hbw/tasks/postfit_plots.py

+69-51
Original file line numberDiff line numberDiff line change
@@ -34,38 +34,38 @@
3434
logger = law.logger.get_logger(__name__)
3535

3636

37-
def reverse_inf_proc(proc):
38-
"""
39-
Helper function that reverses the transformations done by inf_proc.
40-
"""
41-
if proc.startswith("ggHH_"):
42-
# Adjust pattern to split the last part into two groups
43-
pattern = r"ggHH_kl_([mp\d]+)_kt_([mp\d]+)_([a-zA-Z\d]{3})([a-zA-Z\d]+)"
44-
replacement = r"hh_ggf_\3_\4_kl\1_kt\2"
45-
return re.sub(pattern, replacement, proc)
46-
elif proc.startswith("qqHH_"):
47-
# Adjust pattern to split the last part into two groups
48-
pattern = r"qqHH_CV_([mp\d]+)_C2V_([mp\d]+)_kl_([mp\d]+)_([a-zA-Z\d]{3})([a-zA-Z\d]+)"
49-
replacement = r"hh_vbf_\4_\5_kv\1_k2v\2_kl\3"
50-
return re.sub(pattern, replacement, proc)
51-
elif proc == "qqH":
52-
pattern = r"qqH"
53-
replacement = r"h_vbf"
54-
return re.sub(pattern, replacement, proc)
55-
elif proc == "ggH":
56-
pattern = r"ggH"
57-
replacement = r"h_ggf"
58-
return re.sub(pattern, replacement, proc)
59-
elif proc == "ggZH":
60-
pattern = r"ggZH"
61-
replacement = r"zh_gg"
62-
return re.sub(pattern, replacement, proc)
63-
elif "H" in proc:
64-
proc = proc.lower()
65-
return proc
66-
else:
67-
# If the string doesn't match the patterns, return it unchanged
68-
return proc
37+
# def reverse_inf_proc(proc):
38+
# """
39+
# Helper function that reverses the transformations done by inf_proc.
40+
# """
41+
# if proc.startswith("ggHH_"):
42+
# # Adjust pattern to split the last part into two groups
43+
# pattern = r"ggHH_kl_([mp\d]+)_kt_([mp\d]+)_([a-zA-Z\d]{3})([a-zA-Z\d]+)"
44+
# replacement = r"hh_ggf_\3_\4_kl\1_kt\2"
45+
# return re.sub(pattern, replacement, proc)
46+
# elif proc.startswith("qqHH_"):
47+
# # Adjust pattern to split the last part into two groups
48+
# pattern = r"qqHH_CV_([mp\d]+)_C2V_([mp\d]+)_kl_([mp\d]+)_([a-zA-Z\d]{3})([a-zA-Z\d]+)"
49+
# replacement = r"hh_vbf_\4_\5_kv\1_k2v\2_kl\3"
50+
# return re.sub(pattern, replacement, proc)
51+
# elif proc == "qqH":
52+
# pattern = r"qqH"
53+
# replacement = r"h_vbf"
54+
# return re.sub(pattern, replacement, proc)
55+
# elif proc == "ggH":
56+
# pattern = r"ggH"
57+
# replacement = r"h_ggf"
58+
# return re.sub(pattern, replacement, proc)
59+
# elif proc == "ggZH":
60+
# pattern = r"ggZH"
61+
# replacement = r"zh_gg"
62+
# return re.sub(pattern, replacement, proc)
63+
# elif "H" in proc:
64+
# proc = proc.lower()
65+
# return proc
66+
# else:
67+
# # If the string doesn't match the patterns, return it unchanged
68+
# return proc
6969

7070

7171
def load_hists_uproot(fit_diagnostics_path, fit_type):
@@ -152,9 +152,8 @@ def get_hists_from_multidimfit(tfile):
152152
apply_process_settings,
153153
)
154154

155-
156155
def plot_postfit_shapes(
157-
hists: OrderedDict[od.Process, hist.Hist],
156+
h: OrderedDict, # [od.Process, hist.Hist],
158157
config_inst: od.Config,
159158
category_inst: od.Category,
160159
variable_insts: list[od.Variable],
@@ -168,7 +167,7 @@ def plot_postfit_shapes(
168167
**kwargs,
169168
) -> tuple(plt.Figure, tuple(plt.Axes)):
170169
variable_inst = law.util.make_tuple(variable_insts)[0]
171-
hists = apply_process_settings(hists, process_settings)
170+
hists = apply_process_settings(h.copy(), process_settings)
172171
plot_config = prepare_plot_config(
173172
hists,
174173
shape_norm=shape_norm,
@@ -232,11 +231,20 @@ class PlotPostfitShapes(
232231
description="Whether to do prefit or postfit plots; defaults to False",
233232
)
234233

234+
@property
235+
def fit_type(self) -> str:
236+
if self.prefit:
237+
fit_type = "prefit"
238+
else:
239+
fit_type = "postfit"
240+
self._fit_type = fit_type
241+
return self._fit_type
242+
235243
def requires(self):
236244
return {}
237245

238246
def output(self):
239-
return {"plots": self.target("plots", dir=True)}
247+
return {"plots": self.target(f"plots_{self.fit_type}", dir=True)}
240248

241249
@view_output_plots
242250
def run(self):
@@ -246,21 +254,18 @@ def run(self):
246254
)
247255

248256
outp = self.output()
249-
if self.prefit:
250-
fit_type = "prefit"
251-
else:
252-
fit_type = "postfit"
253257

254-
all_hists = load_hists_uproot(self.fit_diagnostics_file, fit_type)
258+
all_hists = load_hists_uproot(self.fit_diagnostics_file, self.fit_type)
255259
process_insts = list(map(self.config_inst.get_process, self.processes))
256260

257261
for channel, h_in in all_hists.items():
258262
has_category = self.inference_model_inst.has_category(channel)
259263
if not has_category:
260264
logger.warning(f"Category {channel} is not part of the inference model {self.inference_model}")
261265

262-
hists = defaultdict(OrderedDict)
266+
hist_map = defaultdict(list)
263267

268+
# First map process inst for plotting to processes of root shapes
264269
for proc_key in list(h_in.keys()):
265270
proc_inst = None
266271
# try getting the config process via InferenceModel
@@ -272,23 +277,32 @@ def run(self):
272277
# try getting proc inst directly via config
273278
proc_inst = self.config_inst.get_process(proc_key, default=None)
274279

275-
# replace string keys with process instances
280+
# replace string keys with process instances
281+
# map HHinference processes to plotting proc_inst
276282
if proc_inst:
277283
plot_proc = [
278284
proc for proc in process_insts if proc.has_process(proc_inst) or proc.name == proc_inst.name
279285
]
280286
if len(plot_proc) != 1:
281287
if len(plot_proc) > 1:
282-
raise Exception(f"{proc_key} was assigned to more then one porcess insts ({plot_proc}) ")
288+
logger.warning(f"{proc_key} was assigned to more then one process insts ({plot_proc}) ")
283289
else:
284290
logger.warning(f"{proc_key} in root file, but won't be plotted.")
285-
continue
291+
continue
286292

287-
if plot_proc[0] not in hists:
288-
hists[plot_proc[0]] = {}
289-
hists[plot_proc[0]] = h_in[proc_key]
293+
if plot_proc[0] not in hist_map:
294+
hist_map[plot_proc[0]] = [proc_key]
290295
else:
291-
hists[plot_proc[0]] = hists[plot_proc[0]] + h_in[proc_key]
296+
hist_map[plot_proc[0]].append(proc_key)
297+
298+
# Plot Pre/Postfit plot for each channel
299+
for channel, h_in in all_hists.items():
300+
hists = defaultdict(OrderedDict)
301+
for proc in hist_map:
302+
plot_proc = proc.copy()
303+
hists[plot_proc] = h_in[hist_map[proc][0]]
304+
for p in hist_map[proc][1:]:
305+
hists[plot_proc] += h_in[p]
292306

293307
if has_category:
294308
inference_category = self.inference_model_inst.get_category(channel)
@@ -299,14 +313,18 @@ def run(self):
299313
config_category = od.Category(channel, id=1)
300314
variable_inst = od.Variable("dummy")
301315

316+
# take copy of proc_inst so labeling, sclaing etc is not modified on proc inst directly
317+
318+
# __import__("IPYthon").embed()
319+
h = hists.copy()
302320
# call the plot function
303321
fig, _ = self.call_plot_func(
304322
self.plot_function,
305-
hists=hists,
323+
h=h,
306324
config_inst=self.config_inst,
307325
category_inst=config_category,
308326
variable_insts=variable_inst,
309327
**self.get_plot_parameters(),
310328
)
311329

312-
outp["plots"].child(f"{channel}_{fit_type}.pdf", type="f").dump(fig, formatter="mpl")
330+
outp["plots"].child(f"{channel}_{self.fit_type}.pdf", type="f").dump(fig, formatter="mpl")

0 commit comments

Comments
 (0)