10
10
11
11
# import luigi
12
12
import law
13
+ import order as od
13
14
14
15
from columnflow .tasks .framework .base import Requirements , RESOLVE_DEFAULT
15
16
from columnflow .tasks .framework .parameters import SettingsParameter
24
25
array = maybe_import ("array" )
25
26
26
27
28
+ logger = law .logger .get_logger (__name__ )
29
+
30
+
27
31
def get_hist_name (cat_name : str , proc_name : str , syst_name : str | None = None ) -> str :
28
32
hist_name = f"{ cat_name } /{ proc_name } "
29
33
if syst_name :
@@ -72,7 +76,7 @@ def get_rebin_values(hist, N_bins_final: int = 10):
72
76
73
77
# determine events per bin the final histogram should have
74
78
events_per_bin = hist .Integral () / N_bins_final
75
- print (f"============ { round (events_per_bin , 3 )} events per bin" )
79
+ logger . info (f"============ { round (events_per_bin , 3 )} events per bin" )
76
80
77
81
# bookkeeping number of bins and number of events
78
82
bin_count = 1
@@ -88,17 +92,17 @@ def get_rebin_values(hist, N_bins_final: int = 10):
88
92
89
93
N_events += hist .GetBinContent (i )
90
94
if i % 100 == 0 :
91
- print (f"========== Bin { i } of { N_bins_input } , { N_events } events" )
95
+ logger . info (f"========== Bin { i } of { N_bins_input } , { N_events } events" )
92
96
if N_events >= events_per_bin * bin_count :
93
97
# when *N_events* surpasses threshold, append the corresponding bin edge and count
94
- print (f"++++++++++ Append bin edge { bin_count } of { N_bins_final } at edge { hist .GetBinLowEdge (i )} " )
98
+ logger . info (f"++++++++++ Append bin edge { bin_count } of { N_bins_final } at edge { hist .GetBinLowEdge (i )} " )
95
99
rebin_values .append (hist .GetBinLowEdge (i + 1 ))
96
100
bin_count += 1
97
101
98
102
# final bin is x_max
99
103
x_max = hist .GetBinLowEdge (N_bins_input + 1 )
100
104
rebin_values .append (x_max )
101
- print (f"final bin edges: { rebin_values } " )
105
+ logger . info (f"final bin edges: { rebin_values } " )
102
106
return rebin_values
103
107
104
108
@@ -127,28 +131,28 @@ def check_empty_bins(hist, fill_empty: float = 1e-5, required_entries: int = 3)
127
131
value = hist .GetBinContent (i )
128
132
error = hist .GetBinError (i )
129
133
if value <= 0 :
130
- print (f"==== Found empty or negative bin { i } , (value: { value } , error: { error } )" )
134
+ logger . info (f"==== Found empty or negative bin { i } , (value: { value } , error: { error } )" )
131
135
count += 1
132
136
if fill_empty >= 0 :
133
- print (f" Bin { i } value + error will be filled with { fill_empty } " )
137
+ logger . info (f" Bin { i } value + error will be filled with { fill_empty } " )
134
138
hist .SetBinContent (i , fill_empty )
135
139
hist .SetBinError (i , fill_empty )
136
140
137
141
if error > max_error (value ):
138
- print (
142
+ logger . warning (
139
143
f"==== Bin { i } has less than { required_entries } entries (value: { value } , error: { error } ); "
140
144
f"Rebinning procedure might have to be restarted with less bins than { hist .GetNbinsX ()} " ,
141
145
)
142
146
return count
143
147
144
148
145
149
def print_hist (hist , max_bins : int = 20 ):
146
- print ("Printing bin number, lower edge and bin content" )
150
+ logger . info ("Printing bin number, lower edge and bin content" )
147
151
for i in range (0 , hist .GetNbinsX () + 2 ):
148
152
if i > max_bins :
149
153
return
150
154
151
- print (f"{ i } \t { hist .GetBinLowEdge (i )} \t { hist .GetBinContent (i )} " )
155
+ logger . info (f"{ i } \t { hist .GetBinLowEdge (i )} \t { hist .GetBinContent (i )} " )
152
156
153
157
154
158
class ModifyDatacardsFlatRebin (
@@ -185,11 +189,17 @@ def resolve_param_values(cls, params):
185
189
params = super ().resolve_param_values (params )
186
190
187
191
if config_inst := params .get ("config_inst" ):
188
- def resolve_category_groups (param , group_str ):
192
+ def resolve_category_groups (param ):
193
+ outp_param = {}
189
194
for cat_name in list (param .keys ()):
190
- if resolved_cats := config_inst .x (group_str , {}).get (cat_name , None ):
195
+ resolved_cats = cls .find_config_objects (
196
+ (cat_name ,), config_inst , od .Category ,
197
+ object_groups = config_inst .x .category_groups , deep = True ,
198
+ )
199
+ if resolved_cats :
191
200
for resolved_cat in law .util .make_tuple (resolved_cats ):
192
- param [resolved_cat ] = param [cat_name ]
201
+ outp_param [resolved_cat ] = param [cat_name ]
202
+ return outp_param
193
203
194
204
# resolve default and groups for `bins_per_category`
195
205
params ["bins_per_category" ] = cls .resolve_config_default (
@@ -198,7 +208,7 @@ def resolve_category_groups(param, group_str):
198
208
container = config_inst ,
199
209
default_str = "default_bins_per_category" ,
200
210
)
201
- resolve_category_groups ( params ["bins_per_category" ], "inference_category_groups" )
211
+ params ["bins_per_category" ] = resolve_category_groups ( params [ "bins_per_category" ] )
202
212
203
213
# set `inference_category_rebin_processes` as parameter and resolve groups
204
214
params ["inference_category_rebin_processes" ] = cls .resolve_config_default (
@@ -207,25 +217,34 @@ def resolve_category_groups(param, group_str):
207
217
container = config_inst ,
208
218
default_str = "inference_category_rebin_processes" ,
209
219
)
210
- resolve_category_groups (params ["inference_category_rebin_processes" ], "inference_category_groups" )
211
-
220
+ params ["inference_category_rebin_processes" ] = resolve_category_groups (
221
+ params ["inference_category_rebin_processes" ],
222
+ )
212
223
return params
213
224
214
225
def get_n_bins (self , DEFAULT_N_BINS = 8 ):
215
226
""" Method to get the requested number of bins for the current category. Defaults to *DEFAULT_N_BINS*"""
216
- cat_name = self .branch_data .name
217
- return int (self .bins_per_category .get (cat_name , DEFAULT_N_BINS ))
227
+ config_category = self .branch_data .config_category
228
+ n_bins = self .bins_per_category .get (config_category , None )
229
+ if not n_bins :
230
+ logger .warning (f"No number of bins setup for category { config_category } ; will default to { DEFAULT_N_BINS } ." )
231
+ n_bins = DEFAULT_N_BINS
232
+ return int (n_bins )
218
233
219
234
def get_rebin_processes (self ):
220
235
"""
221
236
Method to resolve the requested processes on which to flatten the histograms of the current category.
222
237
Defaults to all processes of the current category.
223
238
"""
224
- cat_name = self .branch_data .name
239
+ config_category = self .branch_data .config_category
225
240
proc_names = [proc .name for proc in self .branch_data .processes ]
226
241
227
- rebin_process_condition = self .inference_category_rebin_processes .get (cat_name , None )
242
+ rebin_process_condition = self .inference_category_rebin_processes .get (config_category , None )
228
243
if not rebin_process_condition :
244
+ logger .warning (
245
+ f"No rebin condition found for category { config_category } ; rebinning will be flat "
246
+ f"on all processes { proc_names } " ,
247
+ )
229
248
return proc_names
230
249
231
250
# transform `rebin_process_condition` into Callable if required
@@ -237,7 +256,7 @@ def get_rebin_processes(self):
237
256
# check for each process if the *rebin_process_condition* is fulfilled
238
257
if not rebin_process_condition (proc_name ):
239
258
proc_names .remove (proc_name )
240
-
259
+ logger . info ( f"Category { config_category } will be rebinned flat in processes { proc_names } " )
241
260
return proc_names
242
261
243
262
def create_branch_map (self ):
@@ -286,7 +305,7 @@ def run(self):
286
305
outputs ["card" ].dump (datacard , formatter = "text" )
287
306
288
307
with uproot .open (inp_shapes .fn ) as file :
289
- print (f"File keys: { file .keys ()} " )
308
+ logger . info (f"File keys: { file .keys ()} " )
290
309
# determine which histograms are present
291
310
cat_names , proc_names , syst_names = get_cat_proc_syst_names (file )
292
311
@@ -319,7 +338,7 @@ def run(self):
319
338
for h in hists [1 :]:
320
339
hist += h
321
340
322
- print (f"Finding rebin values for category { cat_name } using processes { rebin_processes } " )
341
+ logger . info (f"Finding rebin values for category { cat_name } using processes { rebin_processes } " )
323
342
rebin_values = get_rebin_values (hist , self .get_n_bins ())
324
343
outputs ["edges" ].dump (rebin_values , formatter = "json" )
325
344
@@ -339,5 +358,5 @@ def run(self):
339
358
340
359
h_rebin = apply_binning (h , rebin_values )
341
360
problematic_bin_count = check_empty_bins (h_rebin ) # noqa
342
- print (f"Inserting histogram with name { key } " )
361
+ logger . info (f"Inserting histogram with name { key } " )
343
362
out_file [key ] = uproot .from_pyroot (h_rebin )
0 commit comments