@@ -141,9 +141,11 @@ def __init__(
141
141
"""
142
142
self ._ml_model_inst = ml_model_inst
143
143
self ._process = process
144
+ self ._skip_mask = skip_mask
144
145
145
146
proc_mask , _ = get_proc_mask (events , process , ml_model_inst .config_inst )
146
147
self ._stats = stats
148
+ # __import__("IPython").embed()
147
149
# del_sub_proc_stats(process, sub_id)
148
150
if not skip_mask :
149
151
self ._events = events [proc_mask ]
@@ -177,6 +179,10 @@ def parameters(self):
177
179
}
178
180
return self ._parameters
179
181
182
+ @property
183
+ def skip_mask (self ):
184
+ return self ._skip_mask
185
+
180
186
@property
181
187
def ml_model_inst (self ):
182
188
return self ._ml_model_inst
@@ -255,6 +261,14 @@ def shuffle_indices(self) -> np.ndarray:
255
261
self ._shuffle_indices = np .random .permutation (self .n_events )
256
262
return self ._shuffle_indices
257
263
264
+ @property
265
+ def num_event_per_process (self ) -> str :
266
+ if not self .skip_mask :
267
+ self ._num_events_per_process = "num_events_pos_weights_per_process"
268
+ else :
269
+ self ._num_events_per_process = "num_events_per_process"
270
+ return self ._num_events_per_process
271
+
258
272
def get_xsec_train_weights (self ) -> np .ndarray :
259
273
"""
260
274
Weighting such that each event has roughly the same weight,
@@ -267,10 +281,20 @@ def get_xsec_train_weights(self) -> np.ndarray:
267
281
raise Exception ("cannot determine train weights without stats" )
268
282
269
283
_ , sub_id = get_proc_mask (self ._events , self .process , self .ml_model_inst .config_inst )
270
- sum_abs_weights = np .sum ([self .stats [self .process ]["sum_abs_weights_per_process" ][str (id )] for id in sub_id ])
271
- num_events = np .sum ([self .stats [self .process ]["num_events_per_process" ][str (id )] for id in sub_id ])
284
+ sum_weights = np .sum ([self .stats [self .process ]["sum_pos_weights_per_process" ][str (id )] for id in sub_id ])
285
+ num_events = np .sum (
286
+ [self .stats [self .process ][self .num_event_per_process ][str (id )] for id in sub_id ],
287
+ )
288
+ # if not self.skip_mask:
289
+ # num_events = np.sum(
290
+ # [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id],
291
+ # )
292
+ # else:
293
+ # num_events = np.sum(
294
+ # [self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id],
295
+ # )
272
296
273
- xsec_train_weights = self .weights / sum_abs_weights * num_events
297
+ xsec_train_weights = self .weights / sum_weights * num_events
274
298
275
299
return xsec_train_weights
276
300
@@ -286,7 +310,15 @@ def get_equal_train_weights(self) -> np.ndarray:
286
310
287
311
combined_proc_inst = self .ml_model_inst .config_inst .get_process (self .process )
288
312
_ , sub_id_proc = get_proc_mask (self ._events , self .process , self .ml_model_inst .config_inst )
289
- num_events = np .sum ([self .stats [self .process ]["num_events_per_process" ][str (id )] for id in sub_id_proc ])
313
+ num_events = np .sum (
314
+ [self .stats [self .process ][self .num_event_per_process ][str (id )] for id in sub_id_proc ],
315
+ )
316
+ # if not self.skip_mask:
317
+ # num_events = np.sum(
318
+ # [self.stats[self.process]["num_events_pos_weights_per_process"][str(id)] for id in sub_id_proc],
319
+ # )
320
+ # else:
321
+ # num_events = np.sum([self.stats[self.process]["num_events_per_process"][str(id)] for id in sub_id_proc])
290
322
targeted_sum_of_weights_per_process = (
291
323
num_events / len (combined_proc_inst .x .ml_config .sub_processes )
292
324
)
@@ -724,8 +756,10 @@ def prediction(self) -> np.ndarray:
724
756
self ._prediction = self .load_data ("prediction" )
725
757
else :
726
758
# calcluate prediction if needed
727
- if not hasattr (self ._ml_model_inst , "trained_model" ):
759
+ if not hasattr (self ._ml_model_inst , "best_model" ):
760
+ # if not hasattr(self._ml_model_inst, "trained_model"):
728
761
raise Exception ("No trained model found in the MLModel instance. Cannot calculate prediction." )
729
- self ._prediction = predict_numpy_on_batch (self ._ml_model_inst .trained_model , self .features )
762
+ # self._prediction = predict_numpy_on_batch(self._ml_model_inst.trained_model, self.features)
763
+ self ._prediction = predict_numpy_on_batch (self ._ml_model_inst .best_model , self .features )
730
764
731
765
return self ._prediction # TODO ML best model
0 commit comments