Skip to content

Commit 13970d5

Browse files
committed
Modified PostfitPlot task
1 parent 490fcc1 commit 13970d5

File tree

11 files changed

+287
-148
lines changed

11 files changed

+287
-148
lines changed

hbw/config/defaults_and_groups.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,16 @@ def set_config_defaults_and_groups(config_inst):
150150
"vv",
151151
"h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
152152
],
153+
"test_postfit": [
154+
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
155+
"hh_ggf_hbb_hww2l2nu_kl1_kt1",
156+
"st",
157+
"tt",
158+
"dy",
159+
"w_lnu",
160+
"vv",
161+
"h",
162+
],
153163
"all": ["*"],
154164
"default": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "dy", "st", "vv", "w_lnu", "h"], # noqa: E501
155165
"sl": ["hh_ggf_hbb_hvv_kl1_kt1", "hh_vbf_hbb_hvv_kv1_k2v1_kl1", "tt", "qcd", "st", "dy", "vv", "w_lnu", "h"], # noqa: E501
@@ -303,6 +313,7 @@ def set_config_defaults_and_groups(config_inst):
303313
"sr__2mu__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__2mu__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
304314
"sr__2e__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__2e__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
305315
"sr__emu__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1", "sr__emu__2b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
316+
"sr__1b", "sr__2b",
306317
),
307318
"vbfSR_dl": (
308319
"sr__1b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1", "sr__2b__ml_hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1",
@@ -375,10 +386,8 @@ def set_config_defaults_and_groups(config_inst):
375386
for proc, _, _ in config_inst.walk_processes() if proc.has_tag("is_signal")
376387
},
377388
"dilep": {
378-
"hh_ggf_hbb_hvv2l2nu_kl0_kt1": {"scale": 10000, "unstack": True},
379-
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
380-
"hh_ggf_hbb_hvv2l2nu_kl2p45_kt1": {"scale": 10000, "unstack": True},
381-
"hh_ggf_hbb_hvv2l2nu_kl5_kt1": {"scale": 10000, "unstack": True},
389+
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1": {"scale": 90000, "unstack": True},
390+
"hh_ggf_hbb_hww2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},
382391
},
383392
"dileptest": {
384393
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": {"scale": 10000, "unstack": True},

hbw/inference/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def config_variable(self: InferenceModel, config_cat_inst: od.Config):
8989
dnn_proc = dnn_cat.replace("ml_", "")
9090
return f"mlscore.{dnn_proc}"
9191
else:
92-
return "mli_mbb"
92+
return "mli_lep_pt"
9393

9494
def customize_category(self: InferenceModel, cat_inst: DotDict, config_cat_inst: od.Config):
9595
""" Function to allow customizing the inference category """

hbw/inference/dl.py

+51-3
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@
240240
})
241241

242242

243-
dl.derive("dl_ml_study_1", cls_dict={
243+
dl_ml_study_1 = dl.derive("dl_ml_study_1", cls_dict={
244244
"ml_model_name": "dl_22post_ml_study_1",
245245
"config_categories": [
246246
"sr__1b__ml_signal_ggf",
@@ -283,7 +283,11 @@
283283
"systematics": rate_systematics,
284284
})
285285

286-
dl.derive("dl_ml_study_3", cls_dict={
286+
dl_ml_study_1.derive("dl_ml_study_1_handle", cls_dict={
287+
"ml_model_name": "dl_22post_ml_study_1_handle",
288+
})
289+
290+
dl_ml_study_3 = dl.derive("dl_ml_study_3", cls_dict={
287291
"ml_model_name": "dl_22_procs1_w0",
288292
"config_categories": [
289293
"sr__1b__ml_hh_ggf_hbb_hvv2l2nu_kl1_kt1",
@@ -325,7 +329,11 @@
325329
"systematics": rate_systematics,
326330
})
327331

328-
dl.derive("dl_ml_study_2", cls_dict={
332+
dl_ml_study_3.derive("dl_ml_study_3_handle", cls_dict={
333+
"ml_model_name": "dl_22_procs1_w0_handle",
334+
})
335+
336+
dl_ml_study_2 = dl.derive("dl_ml_study_2", cls_dict={
329337
"ml_model_name": "dl_22post_ml_study_2",
330338
"config_categories": [
331339
"sr__1b__ml_signal_ggf2",
@@ -367,6 +375,14 @@
367375
"systematics": rate_systematics,
368376
})
369377

378+
dl_ml_study_2.derive("dl_ml_study_2_handle", cls_dict={
379+
"ml_model_name": "dl_22post_ml_study_2_handle",
380+
})
381+
382+
dl_ml_study_2.derive("dl_ml_study_2_ignore", cls_dict={
383+
"ml_model_name": "dl_22post_ml_study_2",
384+
})
385+
370386
dl.derive("dl_hww_and_hzz", cls_dict={
371387
"processes": [
372388
"hh_ggf_hbb_hww_kl0_kt1",
@@ -531,3 +547,35 @@
531547
"systematics": rate_systematics},
532548
)
533549
dl.derive("dl_rates_only", cls_dict={"systematics": rate_systematics})
550+
551+
dl.derive("dl_postfit_test", cls_dict={
552+
"ml_model_name": None,
553+
"config_categories": [
554+
"sr__1b",
555+
"sr__2b",
556+
],
557+
"processes": [
558+
# "hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
559+
"hh_vbf_hbb_hww2l2nu_kv1p74_k2v1p37_kl14p4",
560+
"hh_vbf_hbb_hww2l2nu_kvm0p758_k2v1p44_klm19p3",
561+
"hh_vbf_hbb_hww2l2nu_kvm0p012_k2v0p03_kl10p2",
562+
"hh_vbf_hbb_hww2l2nu_kvm2p12_k2v3p87_klm5p96",
563+
"hh_vbf_hbb_hww2l2nu_kv1_k2v1_kl1",
564+
"hh_vbf_hbb_hww2l2nu_kv1_k2v0_kl1",
565+
"hh_vbf_hbb_hww2l2nu_kvm0p962_k2v0p959_klm1p43",
566+
"hh_vbf_hbb_hww2l2nu_kvm1p21_k2v1p94_klm0p94",
567+
"hh_vbf_hbb_hww2l2nu_kvm1p6_k2v2p72_klm1p36",
568+
"hh_vbf_hbb_hww2l2nu_kvm1p83_k2v3p57_klm3p39",
569+
"hh_ggf_hbb_hww2l2nu_kl0_kt1",
570+
"hh_ggf_hbb_hww2l2nu_kl1_kt1",
571+
"hh_ggf_hbb_hww2l2nu_kl2p45_kt1",
572+
"hh_ggf_hbb_hww2l2nu_kl5_kt1",
573+
"st",
574+
"tt",
575+
"dy",
576+
"w_lnu",
577+
"vv",
578+
"h_ggf", "h_vbf", "zh", "wh", "zh_gg", "tth",
579+
],
580+
"systematics": rate_systematics,
581+
})

hbw/ml/data_loader.py

+40-6
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,11 @@ def __init__(
141141
"""
142142
self._ml_model_inst = ml_model_inst
143143
self._process = process
144+
self._skip_mask = skip_mask
144145

145146
proc_mask, _ = get_proc_mask(events, process, ml_model_inst.config_inst)
146147
self._stats = stats
148+
# __import__("IPython").embed()
147149
# del_sub_proc_stats(process, sub_id)
148150
if not skip_mask:
149151
self._events = events[proc_mask]
@@ -177,6 +179,10 @@ def parameters(self):
177179
}
178180
return self._parameters
179181

182+
@property
183+
def skip_mask(self):
184+
return self._skip_mask
185+
180186
@property
181187
def ml_model_inst(self):
182188
return self._ml_model_inst
@@ -255,6 +261,14 @@ def shuffle_indices(self) -> np.ndarray:
255261
self._shuffle_indices = np.random.permutation(self.n_events)
256262
return self._shuffle_indices
257263

264+
@property
265+
def num_event_per_process(self) -> str:
266+
if not self.skip_mask:
267+
self._num_events_per_process = "num_events_pos_weights_per_process"
268+
else:
269+
self._num_events_per_process = "num_events_per_process"
270+
return self._num_events_per_process
271+
258272
def get_xsec_train_weights(self) -> np.ndarray:
259273
"""
260274
Weighting such that each event has roughly the same weight,
@@ -267,10 +281,20 @@ def get_xsec_train_weights(self) -> np.ndarray:
267281
raise Exception("cannot determine train weights without stats")
268282

269283
_, sub_id = get_proc_mask(self._events, self.process, self.ml_model_inst.config_inst)
270-
sum_abs_weights = np.sum([self.stats[self.process]["sum_abs_weights_per_process"][str(id)] for id in sub_id])
271-
num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id])
284+
sum_weights = np.sum([self.stats[self.process]["sum_pos_weights_per_process"][str(id)] for id in sub_id])
285+
num_events = np.sum(
286+
[self.stats[self.process][self.num_event_per_process][str(id)] for id in sub_id],
287+
)
288+
# if not self.skip_mask:
289+
# num_events = np.sum(
290+
# [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id],
291+
# )
292+
# else:
293+
# num_events = np.sum(
294+
# [self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id],
295+
# )
272296

273-
xsec_train_weights = self.weights / sum_abs_weights * num_events
297+
xsec_train_weights = self.weights / sum_weights * num_events
274298

275299
return xsec_train_weights
276300

@@ -286,7 +310,15 @@ def get_equal_train_weights(self) -> np.ndarray:
286310

287311
combined_proc_inst = self.ml_model_inst.config_inst.get_process(self.process)
288312
_, sub_id_proc = get_proc_mask(self._events, self.process, self.ml_model_inst.config_inst)
289-
num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id_proc])
313+
num_events = np.sum(
314+
[self.stats[self.process][self.num_event_per_process][str(id)] for id in sub_id_proc],
315+
)
316+
# if not self.skip_mask:
317+
# num_events = np.sum(
318+
# [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id_proc],
319+
# )
320+
# else:
321+
# num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id_proc])
290322
targeted_sum_of_weights_per_process = (
291323
num_events / len(combined_proc_inst.x.ml_config.sub_processes)
292324
)
@@ -724,8 +756,10 @@ def prediction(self) -> np.ndarray:
724756
self._prediction = self.load_data("prediction")
725757
else:
726758
# calcluate prediction if needed
727-
if not hasattr(self._ml_model_inst, "trained_model"):
759+
if not hasattr(self._ml_model_inst, "best_model"):
760+
# if not hasattr(self._ml_model_inst, "trained_model"):
728761
raise Exception("No trained model found in the MLModel instance. Cannot calculate prediction.")
729-
self._prediction = predict_numpy_on_batch(self._ml_model_inst.trained_model, self.features)
762+
# self._prediction = predict_numpy_on_batch(self._ml_model_inst.trained_model, self.features)
763+
self._prediction = predict_numpy_on_batch(self._ml_model_inst.best_model, self.features)
730764

731765
return self._prediction # TODO ML best model

hbw/ml/derived/dl.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def setup(self):
293293
"hh_vbf_hbb_hvv2l2nu_kvm1p6_k2v2p72_klm1p36",
294294
"hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
295295
],
296-
"weighting": "xsec",
296+
"weighting": "equal",
297297
},
298298
},
299299
"processes": [
@@ -345,6 +345,7 @@ def setup(self):
345345

346346
dl_22post_ml_study_1 = dl_22post.derive("dl_22post_ml_study_1", cls_dict={
347347
"training_configs": lambda self, requested_configs: ["c22post"],
348+
"negative_weights": "ignore",
348349
"combine_processes": {
349350
"signal_ggf": {
350351
# "name": "tt_and_st",
@@ -381,8 +382,13 @@ def setup(self):
381382
],
382383
})
383384

385+
dl_22post_ml_study_1_handle = dl_22post_ml_study_1.derive("dl_22post_ml_study_1_handle", cls_dict={
386+
"negative_weights": "handle",
387+
})
388+
384389
dl_22post_ml_study_2 = dl_22post.derive("dl_22post_ml_study_2", cls_dict={
385390
"training_configs": lambda self, requested_configs: ["c22post"],
391+
"negative_weights": "ignore",
386392
"combine_processes": {
387393
"signal_ggf2": {
388394
# "name": "tt_and_st",
@@ -407,7 +413,7 @@ def setup(self):
407413
"hh_vbf_hbb_hvv2l2nu_kvm1p83_k2v3p57_klm3p39",
408414

409415
],
410-
"weighting": "xsec",
416+
"weighting": "equal",
411417
},
412418
},
413419
"processes": [
@@ -419,6 +425,11 @@ def setup(self):
419425
"h",
420426
],
421427
})
428+
429+
dl_22post_ml_study_2_handle = dl_22post_ml_study_2.derive("dl_22post_ml_study_2_handle", cls_dict={
430+
"negative_weights": "handle",
431+
})
432+
422433
#
423434
# setups with different processes (0: baseline, 1: add SM vbf + single H, 2: add SL+all HH variations)
424435
# NOTE: we should decide which signal processes exactly to use:
@@ -435,6 +446,7 @@ def setup(self):
435446
})
436447
dl_22_procs1_w0 = dl_22_procs1.derive("dl_22_procs1_w0", cls_dict={
437448
"training_configs": lambda self, requested_configs: ["c22post"],
449+
"negative_weights": "ignore",
438450
"ml_process_weights": {
439451
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": 1,
440452
"hh_vbf_hbb_hvv2l2nu_kv1_k2v1_kl1": 1,
@@ -444,6 +456,11 @@ def setup(self):
444456
"h": 1,
445457
},
446458
})
459+
460+
dl_22_procs1_w0_handle = dl_22_procs1_w0.derive("dl_22_procs1_w0_handle", cls_dict={
461+
"negative_weights": "handle",
462+
})
463+
447464
dl_22_procs1_w1 = dl_22_procs1.derive("dl_22_procs1_w1", cls_dict={
448465
"ml_process_weights": {
449466
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": 1,

hbw/ml/mixins.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def prepare_ml_model(
5555
import tensorflow.keras as keras
5656
from keras.models import Sequential
5757
from keras.layers import Dense, BatchNormalization
58-
# from hbw.ml.tf_util import cumulated_crossentropy, categorical_crossentropy
58+
from hbw.ml.tf_util import cumulated_crossentropy # , categorical_crossentropy
5959

6060
n_inputs = len(set(self.input_features))
6161
n_outputs = len(self.processes)
@@ -110,7 +110,7 @@ def prepare_ml_model(
110110
model.compile(
111111
# NOTE: we'd preferrably use the Keras CCE, but it does not work when assigning one event
112112
# to multiple classes (target with multiple entries != 0)
113-
loss="cumulated_crossentropy",
113+
loss=cumulated_crossentropy,
114114
optimizer=optimizer,
115115
metrics=["categorical_accuracy"],
116116
weighted_metrics=["categorical_accuracy"],

hbw/ml/stats.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,16 @@ def ml_preparation(
7676
events = set_ak_column_f32(events, "event_weight", weight)
7777
stats["sum_weights"] += float(ak.sum(weight, axis=0))
7878
weight_map["sum_weights"] = weight
79-
weight_map["sum_abs_weights"] = (weight, weight > 0)
80-
weight_map["sum_pos_weights"] = np.abs(weight)
79+
weight_map["sum_pos_weights"] = (weight, weight > 0)
80+
weight_map["sum_abs_weights"] = np.abs(weight)
81+
weight_map["num_events_pos_weights"] = weight > 0
8182

8283
# normalization weight only
8384
norm_weight = events["stitched_normalization_weight"]
8485
stats["sum_norm_weights"] += float(ak.sum(norm_weight, axis=0))
8586
weight_map["sum_norm_weights"] = norm_weight
86-
weight_map["sum_abs_norm_weights"] = (norm_weight, norm_weight > 0)
87-
weight_map["sum_pos_norm_weights"] = np.abs(norm_weight)
87+
weight_map["sum_pos_norm_weights"] = (norm_weight, norm_weight > 0)
88+
weight_map["sum_abs_norm_weights"] = np.abs(norm_weight)
8889

8990
group_map = {
9091
"process": {

hbw/plotting/plot_fits.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ def scalable_exponnorm(x, A, loc, scale, K=1):
5252

5353

5454
def plot_fit(
55-
hists: OrderedDict[od.Process, hist.Hist],
55+
hists: dict[str, OrderedDict[od.Process, hist.Hist]],
56+
# hists: OrderedDict[od.Process, hist.Hist],
5657
config_inst: od.Config,
5758
category_inst: od.Category,
5859
variable_insts: list[od.Variable],

0 commit comments

Comments
 (0)