Skip to content

Commit 305bf0f

Browse files
authored
Merge pull request #74 from uhh-cms/feature/rework_event_selection
Rework event selection
2 parents b79a95d + 84e2309 commit 305bf0f

38 files changed

+2946
-895
lines changed

hbw/analysis/create_analysis.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -72,46 +72,55 @@ def create_hbw_analysis(
7272
campaign_run3_2022_postEE_nano_v12 = cmsdb.campaigns.run3_2022_postEE_nano_v12.campaign_run3_2022_postEE_nano_v12
7373
campaign_run3_2022_postEE_nano_v12.x.EE = "post"
7474

75-
# default configs
75+
# 2017
7676
c17 = add_config( # noqa
7777
analysis_inst,
7878
campaign_run2_2017_nano_v9.copy(),
7979
config_name="c17",
80-
config_id=17,
80+
config_id=1700,
81+
add_dataset_extensions=False,
8182
)
82-
# configs with limited number of files
8383
l17 = add_config( # noqa
8484
analysis_inst,
8585
campaign_run2_2017_nano_v9.copy(),
8686
config_name="l17",
87-
config_id=117,
87+
config_id=1701,
8888
limit_dataset_files=2,
89+
add_dataset_extensions=False,
8990
)
91+
92+
# 2022 preEE
9093
c22pre = add_config( # noqa
9194
analysis_inst,
9295
campaign_run3_2022_preEE_nano_v12.copy(),
9396
config_name="c22pre",
94-
config_id=2201,
97+
config_id=2200,
98+
add_dataset_extensions=False,
9599
)
96100
l22pre = add_config( # noqa
97101
analysis_inst,
98102
campaign_run3_2022_preEE_nano_v12.copy(),
99103
config_name="l22pre",
100-
config_id=12201,
104+
config_id=2201,
101105
limit_dataset_files=2,
106+
add_dataset_extensions=False,
102107
)
108+
109+
# 2022 postEE
103110
c22post = add_config( # noqa
104111
analysis_inst,
105112
campaign_run3_2022_postEE_nano_v12.copy(),
106113
config_name="c22post",
107-
config_id=2202,
114+
config_id=2210,
115+
add_dataset_extensions=False,
108116
)
109117
l22post = add_config( # noqa
110118
analysis_inst,
111119
campaign_run3_2022_postEE_nano_v12.copy(),
112120
config_name="l22post",
113-
config_id=12202,
121+
config_id=2211,
114122
limit_dataset_files=2,
123+
add_dataset_extensions=False,
115124
)
116125

117126
return analysis_inst

hbw/config/categories.py

+135-49
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424
import law
2525

26+
from time import time
27+
2628
from columnflow.config_util import create_category_combinations
2729
from columnflow.ml import MLModel
2830
from hbw.util import call_once_on_config
@@ -72,62 +74,126 @@ def add_gen_categories(config: od.Config) -> None:
7274

7375

7476
@call_once_on_config()
75-
def add_categories_selection(config: od.Config) -> None:
76-
"""
77-
Adds categories to a *config*, that are typically produced in `SelectEvents`.
78-
"""
77+
def add_abcd_categories(config: od.Config) -> None:
78+
config.add_category(
79+
name="sr",
80+
id=1,
81+
selection="catid_sr",
82+
)
83+
config.add_category(
84+
name="fake",
85+
id=2,
86+
selection="catid_fake",
87+
)
88+
config.add_category(
89+
name="highmet",
90+
id=3,
91+
selection="catid_highmet",
92+
label=r"MET \geq 20",
93+
)
94+
config.add_category(
95+
name="lowmet",
96+
id=6,
97+
selection="catid_lowmet",
98+
label=r"MET < 20",
99+
)
79100

80-
# adds categories based on the existence of gen particles
81-
add_gen_categories(config)
82101

102+
@call_once_on_config()
103+
def add_lepton_categories(config: od.Config) -> None:
83104
config.x.lepton_channels = {
84105
"sl": ("1e", "1mu"),
85106
"dl": ("2e", "2mu", "emu"),
86107
}[config.x.lepton_tag]
87108

88109
config.add_category(
89110
name="incl",
90-
id=1,
111+
id=0,
91112
selection="catid_selection_incl",
92113
label="Inclusive",
93114
)
94115

95116
cat_1e = config.add_category( # noqa
96117
name="1e",
97-
id=1000,
118+
id=10,
98119
selection="catid_selection_1e",
99120
label="1 Electron",
100121
)
101122

102123
cat_1mu = config.add_category( # noqa
103124
name="1mu",
104-
id=2000,
125+
id=20,
105126
selection="catid_selection_1mu",
106127
label="1 Muon",
107128
)
108129
# dl categories
109130
cat_2e = config.add_category( # noqa
110131
name="2e",
111-
id=3000,
132+
id=30,
112133
selection="catid_selection_2e",
113134
label="2 Electron",
114135
)
115136

116137
cat_2mu = config.add_category( # noqa
117138
name="2mu",
118-
id=4000,
139+
id=40,
119140
selection="catid_selection_2mu",
120141
label="2 Muon",
121142
)
122143

123144
cat_emu = config.add_category( # noqa
124145
name="emu",
125-
id=5000,
146+
id=50,
126147
selection="catid_selection_emu",
127148
label="1 Electron 1 Muon",
128149
)
129150

130151

152+
@call_once_on_config()
153+
def add_jet_categories(config: od.Config) -> None:
154+
cat_resolved = config.add_category( # noqa
155+
name="resolved",
156+
id=100,
157+
selection="catid_resolved",
158+
label="resolved",
159+
)
160+
cat_boosted = config.add_category( # noqa
161+
name="boosted",
162+
id=200,
163+
selection="catid_boosted",
164+
label="boosted",
165+
)
166+
167+
cat_1b = config.add_category( # noqa
168+
name="1b",
169+
id=300,
170+
selection="catid_1b",
171+
label="1b",
172+
)
173+
cat_2b = config.add_category( # noqa
174+
name="2b",
175+
id=600,
176+
selection="catid_2b",
177+
label="2b",
178+
)
179+
180+
181+
@call_once_on_config()
182+
def add_categories_selection(config: od.Config) -> None:
183+
"""
184+
Adds categories to a *config*, that are typically produced in `SelectEvents`.
185+
"""
186+
187+
# adds categories based on the existence of gen particles
188+
add_gen_categories(config)
189+
190+
# adds categories for ABCD background estimation
191+
add_abcd_categories(config)
192+
193+
# adds categories based on number of leptons
194+
add_lepton_categories(config)
195+
196+
131197
def name_fn(root_cats):
132198
cat_name = "__".join(cat.name for cat in root_cats.values())
133199
return cat_name
@@ -149,6 +215,10 @@ def add_categories_production(config: od.Config) -> None:
149215
"""
150216
Adds categories to a *config*, that are typically produced in `ProduceColumns`.
151217
"""
218+
if config.has_tag("add_categories_ml_called"):
219+
logger.warning("We should not call *add_categories_production* when also building ML categories")
220+
# when ML categories already exist, don't do anything
221+
return
152222
#
153223
# switch existing categories to different production module
154224
#
@@ -168,81 +238,99 @@ def add_categories_production(config: od.Config) -> None:
168238
cat_emu = config.get_category("emu")
169239
cat_emu.selection = "catid_emu"
170240

171-
#
172-
# define additional 'main' categories
173-
#
174-
175-
cat_resolved = config.add_category(
176-
name="resolved",
177-
id=10,
178-
selection="catid_resolved",
179-
label="resolved",
180-
)
181-
cat_boosted = config.add_category(
182-
name="boosted",
183-
id=20,
184-
selection="catid_boosted",
185-
label="boosted",
186-
)
187-
188-
cat_1b = config.add_category(
189-
name="1b",
190-
id=100,
191-
selection="catid_1b",
192-
label="1b",
193-
)
194-
cat_2b = config.add_category(
195-
name="2b",
196-
id=200,
197-
selection="catid_2b",
198-
label="2b",
199-
)
241+
add_jet_categories(config)
200242

201243
#
202244
# define all combinations of categories
203245
#
204246

205247
category_blocks = OrderedDict({
248+
"lepid": [config.get_category("sr"), config.get_category("fake")],
249+
# "met": [config.get_category("highmet"), config.get_category("lowmet")],
206250
"lep": [config.get_category(lep_ch) for lep_ch in config.x.lepton_channels],
207-
"jet": [cat_resolved, cat_boosted],
208-
"b": [cat_1b, cat_2b],
251+
"jet": [config.get_category("resolved"), config.get_category("boosted")],
252+
"b": [config.get_category("1b"), config.get_category("2b")],
209253
})
210-
254+
t0 = time()
211255
n_cats = create_category_combinations(
212256
config,
213257
category_blocks,
214258
name_fn=name_fn,
215259
kwargs_fn=kwargs_fn,
216260
skip_existing=False, # there should be no existing sub-categories
217261
)
218-
logger.info(f"Number of produced category insts: {n_cats}")
262+
logger.info(f"Number of produced category insts: {n_cats} (took {(time() - t0):.3f}s)")
219263

220264

221265
@call_once_on_config()
222266
def add_categories_ml(config, ml_model_inst):
267+
if config.has_tag("add_categories_production_called"):
268+
raise Exception("We should not call *add_categories_production* when also building ML categories")
269+
#
270+
# prepare non-ml categories
271+
#
272+
273+
cat_1e = config.get_category("1e")
274+
cat_1e.selection = "catid_1e"
275+
276+
cat_1mu = config.get_category("1mu")
277+
cat_1mu.selection = "catid_1mu"
278+
279+
cat_2e = config.get_category("2e")
280+
cat_2e.selection = "catid_2e"
281+
282+
cat_2mu = config.get_category("2mu")
283+
cat_2mu.selection = "catid_2mu"
284+
285+
cat_emu = config.get_category("emu")
286+
cat_emu.selection = "catid_emu"
287+
288+
add_jet_categories(config)
289+
290+
#
291+
# add parent ml model categories
292+
#
293+
223294
# if not already done, get the ml_model instance
224295
if isinstance(ml_model_inst, str):
225296
ml_model_inst = MLModel.get_cls(ml_model_inst)(config)
226297

227298
# add ml categories directly to the config
299+
# NOTE: this is a bit dangerous, because our ID depends on the MLModel, but
300+
# we can reconfigure our MLModel after having created these categories
228301
ml_categories = []
229302
for i, proc in enumerate(ml_model_inst.processes):
230303
ml_categories.append(config.add_category(
231304
# NOTE: name and ID is unique as long as we don't use
232305
# multiple ml_models simutaneously
233306
name=f"ml_{proc}",
234-
id=(i + 1) * 10000,
307+
id=(i + 1) * 1000,
235308
selection=f"catid_ml_{proc}",
236309
label=f"ml_{proc}",
237310
))
238311

312+
#
313+
# create combination of categories
314+
#
315+
316+
# NOTE: building this many categories takes forever: has to be improved...
239317
category_blocks = OrderedDict({
318+
"lepid": [config.get_category("sr"), config.get_category("fake")],
319+
# "met": [config.get_category("highmet"), config.get_category("lowmet")],
240320
"lep": [config.get_category(lep_ch) for lep_ch in config.x.lepton_channels],
241321
"jet": [config.get_category("resolved"), config.get_category("boosted")],
242322
"b": [config.get_category("1b"), config.get_category("2b")],
243323
"dnn": ml_categories,
244324
})
245325

326+
# # NOTE: temporary solution: only build DNN leafs
327+
# combined_categories = [cat for cat in config.get_leaf_categories() if len(cat.parent_categories) != 0]
328+
# category_blocks = OrderedDict({
329+
# "leafs": combined_categories,
330+
# "dnn": ml_categories,
331+
# })
332+
333+
t0 = time()
246334
# create combination of categories
247335
n_cats = create_category_combinations(
248336
config,
@@ -251,6 +339,4 @@ def add_categories_ml(config, ml_model_inst):
251339
kwargs_fn=kwargs_fn,
252340
skip_existing=True,
253341
)
254-
logger.info(f"Number of produced ml category insts: {n_cats}")
255-
256-
# TODO unfinished
342+
logger.info(f"Number of produced ml category insts: {n_cats} (took {(time() - t0):.3f}s)")

0 commit comments

Comments
 (0)