LLRCMS
diff --git a/‎README.md
+6-10 b/‎README.md
+6-10
diff --git a/‎batch/python/batch_launcher.py
+12 b/‎batch/python/batch_launcher.py
+12
diff --git a/‎identification_isolation/config/isolation_egamma_JB.py
+48 b/‎identification_isolation/config/isolation_egamma_JB.py
+48
diff --git a/‎identification_isolation/config/isolation_egamma_retunedSK_JB.py
+48 b/‎identification_isolation/config/isolation_egamma_retunedSK_JB.py
+48
diff --git a/‎identification_isolation/python/__init__.py
+1-1 b/‎identification_isolation/python/__init__.py
+1-1
diff --git a/‎identification_isolation/python/cut_functions.py
+63 b/‎identification_isolation/python/cut_functions.py
+63
diff --git a/‎identification_isolation/python/efficiency.py
+1-1 b/‎identification_isolation/python/efficiency.py
+1-1
@@ -56,24 +56,20 @@ This script runs all the steps needed for the egamma isolation:
 * Train multiple quantile regressions to derive isolation cuts for several efficiency working points. These cuts are derived as a function of `|ieta|` and `rho`. The trainings will be launched on batch.
 * Apply the `ntt` to `rho` mapping to the isolation regression and save results as 2D histograms (`|ieta|`, `ntt`) for all the working points.   
 * Produce efficiencies of all the working points vs eta and pt of the offline electron, and npv, rho
+* Combine different working points according to a predefined efficiency shape depending on `ieta` and `et`
+
+Features available but not used in this version:
 * Find the optimal inclusive working point, in terms of background rejection and signal efficiency
-* Find the optimal working point in bins of |ieta|
+* Find the optimal working point in bins of `|ieta|`
 
 The optimization of the working points is done by looking at the efficiency gradient for signal and background. The optimal working point is chosen as the point where the background gradient becomes smaller or equal to the signal gradient. This means that cutting harder than this point will kill signal more (or equally) than it kills background.   
 
 ```
 Usage: python egamma_isolation.py [options]
 
 Options:
-  -h, --help              show this help message and exit
-  --inputfile=INPUT_FILE  Input file
-  --tree=TREE_NAME        Tree in the input file
-  --outputdir=OUTPUT_DIR  Output directory
-  --name=NAME             Name used for the results
-  --test                  Flag to test regression on a test sample
-  --inputs=INPUTS         List of input variables of the form "var1,var2,..."
-  --pileupref=PILEUP_REF  Reference variable used for pile-up
-  --target=TARGET         Target variable
+  -h, --help            show this help message and exit
+  --cfg=PARAMETER_FILE  Python file containing the definition of parameters
 ```
 
 
 
@@ -17,6 +17,18 @@ def job_version(directory):
         version_date = "v_"+str(version_max+1)+"_"+str(date.today())
     return version_date
 
+def latest_version(directory):
+    version_date = ''
+    if os.path.isdir(directory):
+        dirs= [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory,f)) and f[:2]=='v_']
+        version_max = 0
+        for d in dirs:
+            version = int(d.split("_")[1])
+            if version > version_max:
+                version_max = version
+                version_date = d
+    return version_date
+
 def wait_jobs(directory, wait=15):
     jobnames = [os.path.splitext(os.path.basename(f))[0] for f in glob.glob(directory+'/jobs/*.sub')]
     while True:
 
@@ -0,0 +1,48 @@
+import numpy as np
+from identification_isolation.isolation_parameters import IsolationParameters
+from identification_isolation.egamma_isolation import double_slope_relaxation_vs_pt
+
+from root_numpy import array2hist
+from rootpy.plotting import Hist
+
+
+parameters = IsolationParameters()
+## General
+parameters.name = 'isolation_egamma'
+parameters.version = 'automatic'
+parameters.signal_file = '/data_CMS/cms/sauvan/L1/2016/V3_new/IsolationValidation/2016C/ZElectron//v_5_2016-07-29/tagAndProbe_isolationValidation_2016C_ZElectron.root'
+parameters.signal_tree = 'ntTagAndProbe_IsolationValidation_Stage2_Rebuilt_tree'
+parameters.background_file = '/data_CMS/cms/sauvan/L1/2016/V3_inconsistent/IsolationNtuples/ZeroBias_2016C_1e34/v_3_2016-07-29/zeroBias_IsolationNtuple.root'
+parameters.background_tree = 'ntZeroBias_IsolationNtuple_tree'
+parameters.working_directory = '/home/llr/cms/sauvan/DATA/TMP/testbatch'
+## Variable names
+parameters.variables.ieta = 'abs(ieta)'
+parameters.variables.et = 'et_raw'
+parameters.variables.ntt = 'ntt'
+parameters.variables.rho = 'rho'
+parameters.variables.iso = 'iso'
+## Steps
+parameters.steps.train_workingpoints = True
+parameters.steps.fit_ntt_vs_rho = True
+parameters.steps.test_workingpoints = True
+parameters.steps.do_compression = True
+## eta-pt efficiency shape
+parameters.eta_pt_optimization.eta_optimization = 'none'
+efficiencies_low_array = np.array([0.80,0.80,0.80,0.80,0.80,0.75,0.80, 0.85])
+efficiencies_high_array = np.array([0.92,0.95,0.95,0.95,0.95,0.95,0.95, 0.95])
+eta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 22.5, 25.5, 28.5]
+efficiencies_low = Hist(eta_binning)
+efficiencies_high = Hist(eta_binning)
+array2hist(efficiencies_low_array, efficiencies_low)
+array2hist(efficiencies_high_array, efficiencies_high)
+parameters.eta_pt_optimization.eta_pt_efficiency_shapes = \
+        double_slope_relaxation_vs_pt(efficiencies_low,\
+                                      efficiencies_high,\
+                                      threshold_low=56.,\
+                                      threshold_high=80.,\
+                                      eff_min=0.5,\
+                                      max_et=120.)
+## LUT Compression
+parameters.compression.eta = [0,5,6,9,10,12,13,14,17,18,19,20,23,24,25,26,32]
+parameters.compression.et = [0,18,20,22,28,32,37,42,52,63,73,81,87,91,111,151,256]
+parameters.compression.ntt = [0,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,256]
@@ -0,0 +1,48 @@
+import numpy as np
+from identification_isolation.isolation_parameters import IsolationParameters
+from identification_isolation.egamma_isolation import double_slope_relaxation_vs_pt
+
+from root_numpy import array2hist
+from rootpy.plotting import Hist
+
+
+parameters = IsolationParameters()
+## General
+parameters.name = 'isolation_egamma'
+parameters.version = 'automatic'
+parameters.signal_file = '/data_CMS/cms/sauvan/L1/2016/V3_new_retunedSK/IsolationValidation/2016C/ZElectron//v_1_2016-08-05/tagAndProbe_isolationValidation_2016C_ZElectron.root'
+parameters.signal_tree = 'ntTagAndProbe_IsolationValidation_Stage2_Rebuilt_tree'
+parameters.background_file = '/data_CMS/cms/sauvan/L1/2016/V3_new_retunedSK/IsolationNtuples/ZeroBias_2016C_1e34/v_1_2016-08-06/zeroBias_IsolationNtuple.root'
+parameters.background_tree = 'ntZeroBias_IsolationNtuple_tree'
+parameters.working_directory = '/home/llr/cms/sauvan/DATA/TMP/egamma_isolation_retunedSK/'
+## Variable names
+parameters.variables.ieta = 'abs(ieta)'
+parameters.variables.et = 'et_raw'
+parameters.variables.ntt = 'ntt'
+parameters.variables.rho = 'rho'
+parameters.variables.iso = 'iso'
+## Steps
+parameters.steps.train_workingpoints = True
+parameters.steps.fit_ntt_vs_rho = True
+parameters.steps.test_workingpoints = True
+parameters.steps.do_compression = True
+## eta-pt efficiency shape
+parameters.eta_pt_optimization.eta_optimization = 'none'
+efficiencies_low_array = np.array([0.80,0.80,0.80,0.80,0.80,0.75,0.80, 0.85])
+efficiencies_high_array = np.array([0.92,0.95,0.95,0.95,0.95,0.95,0.95, 0.95])
+eta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 22.5, 25.5, 28.5]
+efficiencies_low = Hist(eta_binning)
+efficiencies_high = Hist(eta_binning)
+array2hist(efficiencies_low_array, efficiencies_low)
+array2hist(efficiencies_high_array, efficiencies_high)
+parameters.eta_pt_optimization.eta_pt_efficiency_shapes = \
+        double_slope_relaxation_vs_pt(efficiencies_low,\
+                                      efficiencies_high,\
+                                      threshold_low=56.,\
+                                      threshold_high=80.,\
+                                      eff_min=0.5,\
+                                      max_et=120.)
+## LUT Compression
+parameters.compression.eta = [0,5,6,9,10,12,13,14,17,18,19,20,23,24,25,26,32]
+parameters.compression.et = [0,18,20,22,28,32,37,42,52,63,73,81,87,91,111,151,256]
+parameters.compression.ntt = [0,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,256]
@@ -1 +1 @@
-__all__ = ['quantile_regression', 'correlations', 'egamma_isolation', 'efficiency']
+__all__ = ['quantile_regression', 'cut_functions', 'correlations', 'egamma_isolation', 'efficiency', 'rate']
@@ -0,0 +1,63 @@
+import numpy as np
+from utilities.numpy_utilities import find_closest
+from root_numpy import hist2array, evaluate, array2hist
+
+
+# Compound of multivariate regression and input mappings
+class RegressionWithInputMapping:
+    def __init__(self, iso_regression, input_mappings, name='isolation'):
+        self.name = name
+        self.iso_regression = iso_regression
+        # dictionary input index -> function to be applied on inputs
+        self.input_mappings = input_mappings
+        # Vectorize the functions such that they can take arrays as input
+        for index, mapping in self.input_mappings.items():
+            self.input_mappings[index] = np.vectorize(mapping)
+
+    def predict(self, values):
+        #print 'In IsolationCuts.predict()'
+        # Apply input mappings
+        mapped_inputs = np.array(values, dtype=np.float64)
+        for index,mapping in self.input_mappings.items():
+            # Apply mapping on column 'index'
+            mapped_inputs_i = mapping(mapped_inputs[:,[index]])
+            # Replace column 'index' with mapped inputs
+            mapped_inputs = np.delete(mapped_inputs, index, axis=1)
+            mapped_inputs = np.insert(mapped_inputs, [index], mapped_inputs_i, axis=1)
+        # Apply iso regression on mapped inputs
+        output = self.iso_regression.predict(mapped_inputs)
+        #print 'Out IsolationCuts.predict()'
+        return output
+
+
+class CombinedWorkingPoints:
+    # TODO: Improve performance
+    def __init__(self, working_points, functions, efficiency_map):
+        efficiency_array = hist2array(efficiency_map)
+        working_points_indices = find_closest(working_points, efficiency_array)
+        self.function_index_map = efficiency_map.empty_clone()
+        array2hist(working_points_indices, self.function_index_map)
+        self.indices = working_points_indices
+        self.functions = functions
+        self.dim = len(efficiency_array.shape)
+
+    def value(self, inputs, map_positions):
+        # remove overflows (overwrite with a value just below the histogram boundary)
+        upper_bounds = [self.function_index_map.bounds(axis)[1]-1e-3 for axis in range(len(self.function_index_map.axes))]
+        map_positions_no_overflow = np.apply_along_axis(lambda x:np.minimum(x,upper_bounds), 1, map_positions)
+        # evaluate of a 1D histograms take flatten array as input
+        if self.dim==1: map_positions_no_overflow = map_positions_no_overflow.ravel()
+        indices = evaluate(self.function_index_map, map_positions_no_overflow).astype(np.int32)
+        # Compute isolation for all used working points
+        outputs = []
+        for i,function in enumerate(self.functions):
+            if i in self.indices: outputs.append(function(inputs))
+            else: outputs.append(np.array([]))
+        #output = [self.functions[index]([input]) for index,input in zip(indices,inputs)]
+        # Associate the correct working point for each entry
+        output = np.zeros(len(indices))
+        for i,index in enumerate(indices):
+            output[i] = outputs[index][i]
+        return output
+
+
@@ -37,7 +37,7 @@ def efficiency_graph(pass_function, function_inputs, xs, bins=None, error=0.005)
         percentiles = [0.,100.]
         if k>0: 
             nbins = (error*n)**2/k / (1-k/n)
-            # Compute the bin boundaries with the same number of events in all bins
+            # Compute the bin bounaries with the same number of events in all bins
             percentiles = np.arange(0., 100., 100./nbins)
             percentiles[-1] = 100.
         bins = np.unique(np.percentile(xs, percentiles))
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__all__ = ['quantile_regression', 'correlations', 'egamma_isolation', 'efficiency']`
	`1`	`+__all__ = ['quantile_regression', 'cut_functions', 'correlations', 'egamma_isolation', 'efficiency', 'rate']`