Skip to content

Commit 50c6ed2

Browse files
authored
Merge pull request #6 from jbsauvan/cleanup
Updates and cleanup for the egamma isolation
2 parents 4600200 + 8103451 commit 50c6ed2

19 files changed

+685
-159
lines changed

README.md

+6-10
Original file line numberDiff line numberDiff line change
@@ -56,24 +56,20 @@ This script runs all the steps needed for the egamma isolation:
5656
* Train multiple quantile regressions to derive isolation cuts for several efficiency working points. These cuts are derived as a function of `|ieta|` and `rho`. The trainings will be launched on batch.
5757
* Apply the `ntt` to `rho` mapping to the isolation regression and save results as 2D histograms (`|ieta|`, `ntt`) for all the working points.
5858
* Produce efficiencies of all the working points vs eta and pt of the offline electron, and npv, rho
59+
* Combine different working points according to a predefined efficiency shape depending on `ieta` and `et`
60+
61+
Features available but not used in this version:
5962
* Find the optimal inclusive working point, in terms of background rejection and signal efficiency
60-
* Find the optimal working point in bins of |ieta|
63+
* Find the optimal working point in bins of `|ieta|`
6164

6265
The optimization of the working points is done by looking at the efficiency gradient for signal and background. The optimal working point is chosen as the point where the background gradient becomes smaller or equal to the signal gradient. This means that cutting harder than this point will kill signal more (or equally) than it kills background.
6366

6467
```
6568
Usage: python egamma_isolation.py [options]
6669
6770
Options:
68-
-h, --help show this help message and exit
69-
--inputfile=INPUT_FILE Input file
70-
--tree=TREE_NAME Tree in the input file
71-
--outputdir=OUTPUT_DIR Output directory
72-
--name=NAME Name used for the results
73-
--test Flag to test regression on a test sample
74-
--inputs=INPUTS List of input variables of the form "var1,var2,..."
75-
--pileupref=PILEUP_REF Reference variable used for pile-up
76-
--target=TARGET Target variable
71+
-h, --help show this help message and exit
72+
--cfg=PARAMETER_FILE Python file containing the definition of parameters
7773
```
7874

7975

batch/python/batch_launcher.py

+12
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,18 @@ def job_version(directory):
1717
version_date = "v_"+str(version_max+1)+"_"+str(date.today())
1818
return version_date
1919

20+
def latest_version(directory):
21+
version_date = ''
22+
if os.path.isdir(directory):
23+
dirs= [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory,f)) and f[:2]=='v_']
24+
version_max = 0
25+
for d in dirs:
26+
version = int(d.split("_")[1])
27+
if version > version_max:
28+
version_max = version
29+
version_date = d
30+
return version_date
31+
2032
def wait_jobs(directory, wait=15):
2133
jobnames = [os.path.splitext(os.path.basename(f))[0] for f in glob.glob(directory+'/jobs/*.sub')]
2234
while True:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import numpy as np
2+
from identification_isolation.isolation_parameters import IsolationParameters
3+
from identification_isolation.egamma_isolation import double_slope_relaxation_vs_pt
4+
5+
from root_numpy import array2hist
6+
from rootpy.plotting import Hist
7+
8+
9+
parameters = IsolationParameters()
10+
## General
11+
parameters.name = 'isolation_egamma'
12+
parameters.version = 'automatic'
13+
parameters.signal_file = '/data_CMS/cms/sauvan/L1/2016/V3_new/IsolationValidation/2016C/ZElectron//v_5_2016-07-29/tagAndProbe_isolationValidation_2016C_ZElectron.root'
14+
parameters.signal_tree = 'ntTagAndProbe_IsolationValidation_Stage2_Rebuilt_tree'
15+
parameters.background_file = '/data_CMS/cms/sauvan/L1/2016/V3_inconsistent/IsolationNtuples/ZeroBias_2016C_1e34/v_3_2016-07-29/zeroBias_IsolationNtuple.root'
16+
parameters.background_tree = 'ntZeroBias_IsolationNtuple_tree'
17+
parameters.working_directory = '/home/llr/cms/sauvan/DATA/TMP/testbatch'
18+
## Variable names
19+
parameters.variables.ieta = 'abs(ieta)'
20+
parameters.variables.et = 'et_raw'
21+
parameters.variables.ntt = 'ntt'
22+
parameters.variables.rho = 'rho'
23+
parameters.variables.iso = 'iso'
24+
## Steps
25+
parameters.steps.train_workingpoints = True
26+
parameters.steps.fit_ntt_vs_rho = True
27+
parameters.steps.test_workingpoints = True
28+
parameters.steps.do_compression = True
29+
## eta-pt efficiency shape
30+
parameters.eta_pt_optimization.eta_optimization = 'none'
31+
efficiencies_low_array = np.array([0.80,0.80,0.80,0.80,0.80,0.75,0.80, 0.85])
32+
efficiencies_high_array = np.array([0.92,0.95,0.95,0.95,0.95,0.95,0.95, 0.95])
33+
eta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 22.5, 25.5, 28.5]
34+
efficiencies_low = Hist(eta_binning)
35+
efficiencies_high = Hist(eta_binning)
36+
array2hist(efficiencies_low_array, efficiencies_low)
37+
array2hist(efficiencies_high_array, efficiencies_high)
38+
parameters.eta_pt_optimization.eta_pt_efficiency_shapes = \
39+
double_slope_relaxation_vs_pt(efficiencies_low,\
40+
efficiencies_high,\
41+
threshold_low=56.,\
42+
threshold_high=80.,\
43+
eff_min=0.5,\
44+
max_et=120.)
45+
## LUT Compression
46+
parameters.compression.eta = [0,5,6,9,10,12,13,14,17,18,19,20,23,24,25,26,32]
47+
parameters.compression.et = [0,18,20,22,28,32,37,42,52,63,73,81,87,91,111,151,256]
48+
parameters.compression.ntt = [0,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,256]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import numpy as np
2+
from identification_isolation.isolation_parameters import IsolationParameters
3+
from identification_isolation.egamma_isolation import double_slope_relaxation_vs_pt
4+
5+
from root_numpy import array2hist
6+
from rootpy.plotting import Hist
7+
8+
9+
parameters = IsolationParameters()
10+
## General
11+
parameters.name = 'isolation_egamma'
12+
parameters.version = 'automatic'
13+
parameters.signal_file = '/data_CMS/cms/sauvan/L1/2016/V3_new_retunedSK/IsolationValidation/2016C/ZElectron//v_1_2016-08-05/tagAndProbe_isolationValidation_2016C_ZElectron.root'
14+
parameters.signal_tree = 'ntTagAndProbe_IsolationValidation_Stage2_Rebuilt_tree'
15+
parameters.background_file = '/data_CMS/cms/sauvan/L1/2016/V3_new_retunedSK/IsolationNtuples/ZeroBias_2016C_1e34/v_1_2016-08-06/zeroBias_IsolationNtuple.root'
16+
parameters.background_tree = 'ntZeroBias_IsolationNtuple_tree'
17+
parameters.working_directory = '/home/llr/cms/sauvan/DATA/TMP/egamma_isolation_retunedSK/'
18+
## Variable names
19+
parameters.variables.ieta = 'abs(ieta)'
20+
parameters.variables.et = 'et_raw'
21+
parameters.variables.ntt = 'ntt'
22+
parameters.variables.rho = 'rho'
23+
parameters.variables.iso = 'iso'
24+
## Steps
25+
parameters.steps.train_workingpoints = True
26+
parameters.steps.fit_ntt_vs_rho = True
27+
parameters.steps.test_workingpoints = True
28+
parameters.steps.do_compression = True
29+
## eta-pt efficiency shape
30+
parameters.eta_pt_optimization.eta_optimization = 'none'
31+
efficiencies_low_array = np.array([0.80,0.80,0.80,0.80,0.80,0.75,0.80, 0.85])
32+
efficiencies_high_array = np.array([0.92,0.95,0.95,0.95,0.95,0.95,0.95, 0.95])
33+
eta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 22.5, 25.5, 28.5]
34+
efficiencies_low = Hist(eta_binning)
35+
efficiencies_high = Hist(eta_binning)
36+
array2hist(efficiencies_low_array, efficiencies_low)
37+
array2hist(efficiencies_high_array, efficiencies_high)
38+
parameters.eta_pt_optimization.eta_pt_efficiency_shapes = \
39+
double_slope_relaxation_vs_pt(efficiencies_low,\
40+
efficiencies_high,\
41+
threshold_low=56.,\
42+
threshold_high=80.,\
43+
eff_min=0.5,\
44+
max_et=120.)
45+
## LUT Compression
46+
parameters.compression.eta = [0,5,6,9,10,12,13,14,17,18,19,20,23,24,25,26,32]
47+
parameters.compression.et = [0,18,20,22,28,32,37,42,52,63,73,81,87,91,111,151,256]
48+
parameters.compression.ntt = [0,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,256]
+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__all__ = ['quantile_regression', 'correlations', 'egamma_isolation', 'efficiency']
1+
__all__ = ['quantile_regression', 'cut_functions', 'correlations', 'egamma_isolation', 'efficiency', 'rate']
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import numpy as np
2+
from utilities.numpy_utilities import find_closest
3+
from root_numpy import hist2array, evaluate, array2hist
4+
5+
6+
# Compound of multivariate regression and input mappings
7+
class RegressionWithInputMapping:
8+
def __init__(self, iso_regression, input_mappings, name='isolation'):
9+
self.name = name
10+
self.iso_regression = iso_regression
11+
# dictionary input index -> function to be applied on inputs
12+
self.input_mappings = input_mappings
13+
# Vectorize the functions such that they can take arrays as input
14+
for index, mapping in self.input_mappings.items():
15+
self.input_mappings[index] = np.vectorize(mapping)
16+
17+
def predict(self, values):
18+
#print 'In IsolationCuts.predict()'
19+
# Apply input mappings
20+
mapped_inputs = np.array(values, dtype=np.float64)
21+
for index,mapping in self.input_mappings.items():
22+
# Apply mapping on column 'index'
23+
mapped_inputs_i = mapping(mapped_inputs[:,[index]])
24+
# Replace column 'index' with mapped inputs
25+
mapped_inputs = np.delete(mapped_inputs, index, axis=1)
26+
mapped_inputs = np.insert(mapped_inputs, [index], mapped_inputs_i, axis=1)
27+
# Apply iso regression on mapped inputs
28+
output = self.iso_regression.predict(mapped_inputs)
29+
#print 'Out IsolationCuts.predict()'
30+
return output
31+
32+
33+
class CombinedWorkingPoints:
34+
# TODO: Improve performance
35+
def __init__(self, working_points, functions, efficiency_map):
36+
efficiency_array = hist2array(efficiency_map)
37+
working_points_indices = find_closest(working_points, efficiency_array)
38+
self.function_index_map = efficiency_map.empty_clone()
39+
array2hist(working_points_indices, self.function_index_map)
40+
self.indices = working_points_indices
41+
self.functions = functions
42+
self.dim = len(efficiency_array.shape)
43+
44+
def value(self, inputs, map_positions):
45+
# remove overflows (overwrite with a value just below the histogram boundary)
46+
upper_bounds = [self.function_index_map.bounds(axis)[1]-1e-3 for axis in range(len(self.function_index_map.axes))]
47+
map_positions_no_overflow = np.apply_along_axis(lambda x:np.minimum(x,upper_bounds), 1, map_positions)
48+
# evaluate of a 1D histograms take flatten array as input
49+
if self.dim==1: map_positions_no_overflow = map_positions_no_overflow.ravel()
50+
indices = evaluate(self.function_index_map, map_positions_no_overflow).astype(np.int32)
51+
# Compute isolation for all used working points
52+
outputs = []
53+
for i,function in enumerate(self.functions):
54+
if i in self.indices: outputs.append(function(inputs))
55+
else: outputs.append(np.array([]))
56+
#output = [self.functions[index]([input]) for index,input in zip(indices,inputs)]
57+
# Associate the correct working point for each entry
58+
output = np.zeros(len(indices))
59+
for i,index in enumerate(indices):
60+
output[i] = outputs[index][i]
61+
return output
62+
63+

identification_isolation/python/efficiency.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def efficiency_graph(pass_function, function_inputs, xs, bins=None, error=0.005)
3737
percentiles = [0.,100.]
3838
if k>0:
3939
nbins = (error*n)**2/k / (1-k/n)
40-
# Compute the bin boundaries with the same number of events in all bins
40+
# Compute the bin bounaries with the same number of events in all bins
4141
percentiles = np.arange(0., 100., 100./nbins)
4242
percentiles[-1] = 100.
4343
bins = np.unique(np.percentile(xs, percentiles))

0 commit comments

Comments
 (0)