23
23
24
24
import law
25
25
26
+ from time import time
27
+
26
28
from columnflow .config_util import create_category_combinations
27
29
from columnflow .ml import MLModel
28
30
from hbw .util import call_once_on_config
@@ -72,62 +74,126 @@ def add_gen_categories(config: od.Config) -> None:
72
74
73
75
74
76
@call_once_on_config ()
75
- def add_categories_selection (config : od .Config ) -> None :
76
- """
77
- Adds categories to a *config*, that are typically produced in `SelectEvents`.
78
- """
77
+ def add_abcd_categories (config : od .Config ) -> None :
78
+ config .add_category (
79
+ name = "sr" ,
80
+ id = 1 ,
81
+ selection = "catid_sr" ,
82
+ )
83
+ config .add_category (
84
+ name = "fake" ,
85
+ id = 2 ,
86
+ selection = "catid_fake" ,
87
+ )
88
+ config .add_category (
89
+ name = "highmet" ,
90
+ id = 3 ,
91
+ selection = "catid_highmet" ,
92
+ label = r"MET \geq 20" ,
93
+ )
94
+ config .add_category (
95
+ name = "lowmet" ,
96
+ id = 6 ,
97
+ selection = "catid_lowmet" ,
98
+ label = r"MET < 20" ,
99
+ )
79
100
80
- # adds categories based on the existence of gen particles
81
- add_gen_categories (config )
82
101
102
+ @call_once_on_config ()
103
+ def add_lepton_categories (config : od .Config ) -> None :
83
104
config .x .lepton_channels = {
84
105
"sl" : ("1e" , "1mu" ),
85
106
"dl" : ("2e" , "2mu" , "emu" ),
86
107
}[config .x .lepton_tag ]
87
108
88
109
config .add_category (
89
110
name = "incl" ,
90
- id = 1 ,
111
+ id = 0 ,
91
112
selection = "catid_selection_incl" ,
92
113
label = "Inclusive" ,
93
114
)
94
115
95
116
cat_1e = config .add_category ( # noqa
96
117
name = "1e" ,
97
- id = 1000 ,
118
+ id = 10 ,
98
119
selection = "catid_selection_1e" ,
99
120
label = "1 Electron" ,
100
121
)
101
122
102
123
cat_1mu = config .add_category ( # noqa
103
124
name = "1mu" ,
104
- id = 2000 ,
125
+ id = 20 ,
105
126
selection = "catid_selection_1mu" ,
106
127
label = "1 Muon" ,
107
128
)
108
129
# dl categories
109
130
cat_2e = config .add_category ( # noqa
110
131
name = "2e" ,
111
- id = 3000 ,
132
+ id = 30 ,
112
133
selection = "catid_selection_2e" ,
113
134
label = "2 Electron" ,
114
135
)
115
136
116
137
cat_2mu = config .add_category ( # noqa
117
138
name = "2mu" ,
118
- id = 4000 ,
139
+ id = 40 ,
119
140
selection = "catid_selection_2mu" ,
120
141
label = "2 Muon" ,
121
142
)
122
143
123
144
cat_emu = config .add_category ( # noqa
124
145
name = "emu" ,
125
- id = 5000 ,
146
+ id = 50 ,
126
147
selection = "catid_selection_emu" ,
127
148
label = "1 Electron 1 Muon" ,
128
149
)
129
150
130
151
152
+ @call_once_on_config ()
153
+ def add_jet_categories (config : od .Config ) -> None :
154
+ cat_resolved = config .add_category ( # noqa
155
+ name = "resolved" ,
156
+ id = 100 ,
157
+ selection = "catid_resolved" ,
158
+ label = "resolved" ,
159
+ )
160
+ cat_boosted = config .add_category ( # noqa
161
+ name = "boosted" ,
162
+ id = 200 ,
163
+ selection = "catid_boosted" ,
164
+ label = "boosted" ,
165
+ )
166
+
167
+ cat_1b = config .add_category ( # noqa
168
+ name = "1b" ,
169
+ id = 300 ,
170
+ selection = "catid_1b" ,
171
+ label = "1b" ,
172
+ )
173
+ cat_2b = config .add_category ( # noqa
174
+ name = "2b" ,
175
+ id = 600 ,
176
+ selection = "catid_2b" ,
177
+ label = "2b" ,
178
+ )
179
+
180
+
181
+ @call_once_on_config ()
182
+ def add_categories_selection (config : od .Config ) -> None :
183
+ """
184
+ Adds categories to a *config*, that are typically produced in `SelectEvents`.
185
+ """
186
+
187
+ # adds categories based on the existence of gen particles
188
+ add_gen_categories (config )
189
+
190
+ # adds categories for ABCD background estimation
191
+ add_abcd_categories (config )
192
+
193
+ # adds categories based on number of leptons
194
+ add_lepton_categories (config )
195
+
196
+
131
197
def name_fn (root_cats ):
132
198
cat_name = "__" .join (cat .name for cat in root_cats .values ())
133
199
return cat_name
@@ -149,6 +215,10 @@ def add_categories_production(config: od.Config) -> None:
149
215
"""
150
216
Adds categories to a *config*, that are typically produced in `ProduceColumns`.
151
217
"""
218
+ if config .has_tag ("add_categories_ml_called" ):
219
+ logger .warning ("We should not call *add_categories_production* when also building ML categories" )
220
+ # when ML categories already exist, don't do anything
221
+ return
152
222
#
153
223
# switch existing categories to different production module
154
224
#
@@ -168,81 +238,99 @@ def add_categories_production(config: od.Config) -> None:
168
238
cat_emu = config .get_category ("emu" )
169
239
cat_emu .selection = "catid_emu"
170
240
171
- #
172
- # define additional 'main' categories
173
- #
174
-
175
- cat_resolved = config .add_category (
176
- name = "resolved" ,
177
- id = 10 ,
178
- selection = "catid_resolved" ,
179
- label = "resolved" ,
180
- )
181
- cat_boosted = config .add_category (
182
- name = "boosted" ,
183
- id = 20 ,
184
- selection = "catid_boosted" ,
185
- label = "boosted" ,
186
- )
187
-
188
- cat_1b = config .add_category (
189
- name = "1b" ,
190
- id = 100 ,
191
- selection = "catid_1b" ,
192
- label = "1b" ,
193
- )
194
- cat_2b = config .add_category (
195
- name = "2b" ,
196
- id = 200 ,
197
- selection = "catid_2b" ,
198
- label = "2b" ,
199
- )
241
+ add_jet_categories (config )
200
242
201
243
#
202
244
# define all combinations of categories
203
245
#
204
246
205
247
category_blocks = OrderedDict ({
248
+ "lepid" : [config .get_category ("sr" ), config .get_category ("fake" )],
249
+ # "met": [config.get_category("highmet"), config.get_category("lowmet")],
206
250
"lep" : [config .get_category (lep_ch ) for lep_ch in config .x .lepton_channels ],
207
- "jet" : [cat_resolved , cat_boosted ],
208
- "b" : [cat_1b , cat_2b ],
251
+ "jet" : [config . get_category ( "resolved" ), config . get_category ( "boosted" ) ],
252
+ "b" : [config . get_category ( "1b" ), config . get_category ( "2b" ) ],
209
253
})
210
-
254
+ t0 = time ()
211
255
n_cats = create_category_combinations (
212
256
config ,
213
257
category_blocks ,
214
258
name_fn = name_fn ,
215
259
kwargs_fn = kwargs_fn ,
216
260
skip_existing = False , # there should be no existing sub-categories
217
261
)
218
- logger .info (f"Number of produced category insts: { n_cats } " )
262
+ logger .info (f"Number of produced category insts: { n_cats } (took { ( time () - t0 ):.3f } s) " )
219
263
220
264
221
265
@call_once_on_config ()
222
266
def add_categories_ml (config , ml_model_inst ):
267
+ if config .has_tag ("add_categories_production_called" ):
268
+ raise Exception ("We should not call *add_categories_production* when also building ML categories" )
269
+ #
270
+ # prepare non-ml categories
271
+ #
272
+
273
+ cat_1e = config .get_category ("1e" )
274
+ cat_1e .selection = "catid_1e"
275
+
276
+ cat_1mu = config .get_category ("1mu" )
277
+ cat_1mu .selection = "catid_1mu"
278
+
279
+ cat_2e = config .get_category ("2e" )
280
+ cat_2e .selection = "catid_2e"
281
+
282
+ cat_2mu = config .get_category ("2mu" )
283
+ cat_2mu .selection = "catid_2mu"
284
+
285
+ cat_emu = config .get_category ("emu" )
286
+ cat_emu .selection = "catid_emu"
287
+
288
+ add_jet_categories (config )
289
+
290
+ #
291
+ # add parent ml model categories
292
+ #
293
+
223
294
# if not already done, get the ml_model instance
224
295
if isinstance (ml_model_inst , str ):
225
296
ml_model_inst = MLModel .get_cls (ml_model_inst )(config )
226
297
227
298
# add ml categories directly to the config
299
+ # NOTE: this is a bit dangerous, because our ID depends on the MLModel, but
300
+ # we can reconfigure our MLModel after having created these categories
228
301
ml_categories = []
229
302
for i , proc in enumerate (ml_model_inst .processes ):
230
303
ml_categories .append (config .add_category (
231
304
# NOTE: name and ID is unique as long as we don't use
232
305
# multiple ml_models simutaneously
233
306
name = f"ml_{ proc } " ,
234
- id = (i + 1 ) * 10000 ,
307
+ id = (i + 1 ) * 1000 ,
235
308
selection = f"catid_ml_{ proc } " ,
236
309
label = f"ml_{ proc } " ,
237
310
))
238
311
312
+ #
313
+ # create combination of categories
314
+ #
315
+
316
+ # NOTE: building this many categories takes forever: has to be improved...
239
317
category_blocks = OrderedDict ({
318
+ "lepid" : [config .get_category ("sr" ), config .get_category ("fake" )],
319
+ # "met": [config.get_category("highmet"), config.get_category("lowmet")],
240
320
"lep" : [config .get_category (lep_ch ) for lep_ch in config .x .lepton_channels ],
241
321
"jet" : [config .get_category ("resolved" ), config .get_category ("boosted" )],
242
322
"b" : [config .get_category ("1b" ), config .get_category ("2b" )],
243
323
"dnn" : ml_categories ,
244
324
})
245
325
326
+ # # NOTE: temporary solution: only build DNN leafs
327
+ # combined_categories = [cat for cat in config.get_leaf_categories() if len(cat.parent_categories) != 0]
328
+ # category_blocks = OrderedDict({
329
+ # "leafs": combined_categories,
330
+ # "dnn": ml_categories,
331
+ # })
332
+
333
+ t0 = time ()
246
334
# create combination of categories
247
335
n_cats = create_category_combinations (
248
336
config ,
@@ -251,6 +339,4 @@ def add_categories_ml(config, ml_model_inst):
251
339
kwargs_fn = kwargs_fn ,
252
340
skip_existing = True ,
253
341
)
254
- logger .info (f"Number of produced ml category insts: { n_cats } " )
255
-
256
- # TODO unfinished
342
+ logger .info (f"Number of produced ml category insts: { n_cats } (took { (time () - t0 ):.3f} s)" )
0 commit comments