From ec25b9db0f748e467e924983bc2d00992cec59c2 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 12 Feb 2025 10:29:00 +0100 Subject: [PATCH 01/11] treat 1-point datasets equally in sequential and parallel fits --- n3fit/src/n3fit/model_gen.py | 37 +++++++---- n3fit/src/n3fit/model_trainer.py | 18 +++-- validphys2/src/validphys/n3fit_data.py | 70 +++++++++++++++----- validphys2/src/validphys/n3fit_data_utils.py | 34 ++-------- 4 files changed, 91 insertions(+), 68 deletions(-) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index 868409f489..e169561429 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -1,11 +1,11 @@ """ - Library of functions which generate the NN objects +Library of functions which generate the NN objects - Contains: - # observable_generator: - Generates the output layers as functions - # pdfNN_layer_generator: - Generates the PDF NN layer to be fitted +Contains: + # observable_generator: + Generates the output layers as functions + # pdfNN_layer_generator: + Generates the PDF NN layer to be fitted """ @@ -26,7 +26,7 @@ base_layer_selector, ) from n3fit.backends import operations as op -from n3fit.backends import regularizer_selector +from n3fit.backends import regularizer_selector as reg_sec from n3fit.layers import ( DIS, DY, @@ -128,6 +128,7 @@ def observable_generator( spec_dict, boundary_condition=None, mask_array=None, + validation_mask_array=None, training_data=None, validation_data=None, invcovmat_tr=None, @@ -170,6 +171,10 @@ def observable_generator( boundary_condition: dict dictionary containing the instance of the a PDF set to be used as a Boundary Condition. + mask_array: np.ndarray + training mask per replica + validation_mask_array: np.ndarray + validation mask per replica, when not given ¬mask_array will be used n_replicas: int number of replicas fitted simultaneously positivity_initial: float @@ -245,12 +250,18 @@ def observable_generator( model_inputs = np.concatenate(model_inputs).reshape(1, -1) # Make the mask layers... - if mask_array is not None: - tr_mask_layer = Mask(mask_array, name=f"trmask_{spec_name}") - vl_mask_layer = Mask(~mask_array, name=f"vlmask_{spec_name}") - else: + if mask_array is None: tr_mask_layer = None - vl_mask_layer = None + if validation_mask_array is None: + vl_mask_layer = None + else: + vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}") + else: + tr_mask_layer = Mask(mask_array, name=f"trmask_{spec_name}") + if validation_mask_array is None: + vl_mask_layer = Mask(~mask_array, name=f"vlmask_{spec_name}") + else: + vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}") # Make rotations of the final data (if any) if spec_dict.get("data_transformation") is not None: @@ -724,7 +735,7 @@ def generate_nn( """ nodes_list = list(nodes) # so we can modify it x_input = Input(shape=(None, nodes_in), batch_size=1, name="NN_input") - reg = regularizer_selector(regularizer, **regularizer_args) + reg = reg_sec(regularizer, **regularizer_args) if layer_type == "dense_per_flavour": # set the arguments that will define the layer diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index d864d2c6e5..5cf5c54d0e 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -1,12 +1,12 @@ """ - The ModelTrainer class is the true driver around the n3fit code +The ModelTrainer class is the true driver around the n3fit code - This class is initialized with all information about the NN, inputs and outputs. - The construction of the NN and the fitting is performed at the same time when the - hyperparametrizable method of the function is called. +This class is initialized with all information about the NN, inputs and outputs. +The construction of the NN and the fitting is performed at the same time when the +hyperparametrizable method of the function is called. - This allows to use hyperscanning libraries, that need to change the parameters of the network - between iterations while at the same time keeping the amount of redundant calls to a minimum +This allows to use hyperscanning libraries, that need to change the parameters of the network +between iterations while at the same time keeping the amount of redundant calls to a minimum """ from collections import namedtuple @@ -528,9 +528,12 @@ def _generate_observables( self._reset_observables() log.info("Generating layers") - # We need to transpose Experimental data, stacking over replicas + # validphys has generated the self.exp_info information replica-by-replica + # Here we transpose all information for convenience so that the loop over observables + # and the vectorization over replicas is made explicit experiment_data = { "trmask": [], + "vlmask": [], "expdata": [], "expdata_vl": [], "invcovmat": [], @@ -562,6 +565,7 @@ def _generate_observables( exp_dict, self.boundary_condition, mask_array=experiment_data["trmask"][i], + validation_mask_array=experiment_data["vlmask"][i], training_data=experiment_data["expdata"][i], validation_data=experiment_data["expdata_vl"][i], invcovmat_tr=experiment_data["invcovmat"][i], diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 46d112e12c..3b174f6bf8 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -97,16 +97,8 @@ def tr_masks(data, replica_trvlseed, parallel_models=False, replica=1, replicas= # We do this so that a given dataset will always have the same number of points masked trmax = int(ndata * frac) if trmax == 0: - if parallel_models: - if replica == replicas[0]: - log.warning( - f'Single-datapoint dataset {dataset.name} encountered in parallel multi-replica fit: ' - 'all replicas will include it in their training data' - ) - trmax = 1 - else: - # If that number is 0, then get 1 point with probability frac - trmax = int(rng.random() < frac) + # If that number is 0, then get 1 point with probability frac + trmax = int(rng.random() < frac) mask = np.concatenate([np.ones(trmax, dtype=bool), np.zeros(ndata - trmax, dtype=bool)]) rng.shuffle(mask) trmask_partial.append(mask) @@ -181,13 +173,13 @@ def kfold_masks(kpartitions, data): @functools.lru_cache -def fittable_datasets_masked(data, tr_masks): +def fittable_datasets_masked(data): """Generate a list of :py:class:`validphys.n3fit_data_utils.FittableDataSet` from a group of dataset and the corresponding training/validation masks """ # This is separated from fitting_data_dict so that we can cache the result # when the trvlseed is the same for all replicas (great for parallel replicas) - return validphys_group_extractor(data.datasets, tr_masks.masks) + return validphys_group_extractor(data.datasets) def fitting_data_dict( @@ -259,9 +251,31 @@ def fitting_data_dict( dt_trans_tr = None dt_trans_vl = None + # In the fittable datasets the fktables masked for 1-point datasets will be set to 0 + # Here we want to have the data both in training and validation, + # but set to 0 the data, so that it doesn't affect the chi2 value. + zero_tr = [] + zero_vl = [] + idx = 0 + for data_mask in tr_masks: + dlen = len(data_mask) + if dlen == 1: + if data_mask[0]: + zero_vl.append(idx) + else: + zero_tr.append(idx) + idx += dlen + tr_mask = np.concatenate(tr_masks) vl_mask = ~tr_mask + # Now set to true the masks + tr_mask[zero_tr] = True + vl_mask[zero_vl] = True + # And prepare the index to 0 the (inverse) covmat + data_zero_tr = np.cumsum(tr_mask)[zero_tr] - 1 + data_zero_vl = np.cumsum(vl_mask)[zero_vl] - 1 + if diagonal_basis: expdata = np.matmul(dt_trans, expdata) # make a 1d array of the diagonal @@ -274,18 +288,38 @@ def fitting_data_dict( # prepare a masking rotation dt_trans_tr = dt_trans[tr_mask] dt_trans_vl = dt_trans[vl_mask] + + # TODO: check the effect of this when diagonalization + invcovmat_tr[data_zero_tr] = 0.0 + invcovmat_vl[data_zero_vl] = 0.0 else: covmat_tr = covmat[tr_mask].T[tr_mask] - invcovmat_tr = np.linalg.inv(covmat_tr) - covmat_vl = covmat[vl_mask].T[vl_mask] + + # Remove possible correlations for 1-point datasets + # that should've been masked out + covmat_tr[data_zero_tr, :] = covmat_tr[:, data_zero_tr] = 0.0 + covmat_vl[data_zero_vl, :] = covmat_vl[:, data_zero_vl] = 0.0 + # Avoid infinities + covmat_tr[np.ix_(data_zero_tr, data_zero_tr)] = 1.0 + covmat_vl[np.ix_(data_zero_vl, data_zero_vl)] = 1.0 + + invcovmat_tr = np.linalg.inv(covmat_tr) invcovmat_vl = np.linalg.inv(covmat_vl) - ndata_tr = np.count_nonzero(tr_mask) - expdata_tr = expdata[tr_mask].reshape(1, ndata_tr) + # Set to 0 the points in the diagonal that were left as 1 + invcovmat_tr[np.ix_(data_zero_tr, data_zero_tr)] = 0.0 + invcovmat_vl[np.ix_(data_zero_vl, data_zero_vl)] = 0.0 + ndata_tr = np.count_nonzero(tr_mask) ndata_vl = np.count_nonzero(vl_mask) - expdata_vl = expdata[vl_mask].reshape(1, ndata_vl) + + # And subtract them for ndata + ndata_tr -= len(data_zero_tr) + ndata_vl -= len(data_zero_vl) + + expdata_tr = expdata[tr_mask].reshape(1, -1) + expdata_vl = expdata[vl_mask].reshape(1, -1) # Now save a dictionary of training/validation/experimental folds # for training and validation we need to apply the tr/vl masks @@ -539,7 +573,7 @@ def _fitting_lagrange_dict(lambdadataset): integrability = isinstance(lambdadataset, IntegrabilitySetSpec) mode = "integrability" if integrability else "positivity" log.info("Loading %s dataset %s", mode, lambdadataset) - positivity_datasets = validphys_group_extractor([lambdadataset], []) + positivity_datasets = validphys_group_extractor([lambdadataset]) ndata = positivity_datasets[0].ndata return { "datasets": positivity_datasets, diff --git a/validphys2/src/validphys/n3fit_data_utils.py b/validphys2/src/validphys/n3fit_data_utils.py index fe908d73cb..07ea71d84b 100644 --- a/validphys2/src/validphys/n3fit_data_utils.py +++ b/validphys2/src/validphys/n3fit_data_utils.py @@ -7,10 +7,8 @@ The ``validphys_group_extractor`` will loop over every dataset of a given group loading their fktables (and applying any necessary cuts). """ -import dataclasses -from itertools import zip_longest -import numpy as np +import dataclasses @dataclasses.dataclass @@ -38,16 +36,6 @@ class FittableDataSet: # Things that can have default values: operation: str = "NULL" - frac: float = 1.0 - training_mask: np.ndarray = None # boolean array - - def __post_init__(self): - self._tr_mask = None - self._vl_mask = None - if self.training_mask is not None: - data_idx = self.fktables_data[0].sigma.index.get_level_values(0).unique() - self._tr_mask = data_idx[self.training_mask].values - self._vl_mask = data_idx[~self.training_mask].values @property def ndata(self): @@ -63,20 +51,8 @@ def fktables(self): """Return the list of fktable tensors for the dataset""" return [fk.get_np_fktable() for fk in self.fktables_data] - def training_fktables(self): - """Return the fktable tensors for the trainig data""" - if self._tr_mask is not None: - return [fk.with_cuts(self._tr_mask).get_np_fktable() for fk in self.fktables_data] - return self.fktables() - - def validation_fktables(self): - """Return the fktable tensors for the validation data""" - if self._vl_mask is not None: - return [fk.with_cuts(self._vl_mask).get_np_fktable() for fk in self.fktables_data] - return self.fktables() - -def validphys_group_extractor(datasets, tr_masks): +def validphys_group_extractor(datasets): """ Receives a grouping spec from validphys (most likely an experiment) and loops over its content extracting and parsing all information required for the fit @@ -85,8 +61,6 @@ def validphys_group_extractor(datasets, tr_masks): ---------- datasets: list(:py:class:`validphys.core.DataSetSpec`) List of dataset specs in this group - tr_masks: list(np.array) - List of training masks to be set for each dataset Returns ------- @@ -94,9 +68,9 @@ def validphys_group_extractor(datasets, tr_masks): """ loaded_obs = [] # Use zip_longest since tr_mask can be (and it is fine) an empty list - for dspec, mask in zip_longest(datasets, tr_masks): + for dspec in datasets: # Load all fktables with the appropiate cuts fktables = [fk.load_with_cuts(dspec.cuts) for fk in dspec.fkspecs] # And now put them in a FittableDataSet object which - loaded_obs.append(FittableDataSet(dspec.name, fktables, dspec.op, dspec.frac, mask)) + loaded_obs.append(FittableDataSet(dspec.name, fktables, dspec.op)) return loaded_obs From 3b398fde13d4f081454c47b63acca2453252f73e Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 12 Feb 2025 14:44:41 +0100 Subject: [PATCH 02/11] ensure that the number of training/validation points is correct per replica fix --- n3fit/src/n3fit/model_trainer.py | 50 +++++++++++++---- n3fit/src/n3fit/stopping.py | 78 ++++++++++++-------------- validphys2/src/validphys/n3fit_data.py | 4 +- validphys2/src/validphys/pdfgrids.py | 5 +- 4 files changed, 79 insertions(+), 58 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 5cf5c54d0e..3c94d47983 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -151,7 +151,6 @@ def __init__( self.exp_info = list(exp_info) self.pos_info = [] if pos_info is None else pos_info self.integ_info = [] if integ_info is None else integ_info - self.all_info = self.exp_info[0] + self.pos_info + self.integ_info self.boundary_condition = boundary_condition self.flavinfo = flavinfo self.fitbasis = fitbasis @@ -716,20 +715,47 @@ def _prepare_reporting(self, partition): to select the bits necessary for reporting the chi2. Receives the chi2 partition data to see whether any dataset is to be left out """ - reported_keys = ["name", "count_chi2", "positivity", "integrability", "ndata", "ndata_vl"] + reported_keys = ["name", "count_chi2", "positivity", "integrability"] reporting_list = [] - for exp_dict in self.all_info: + + # Most of the information is shared among replicas, only ndata/ndata_vl + # might change replica to replica and they need to be filled with care + for idx, exp_dict in enumerate(self.exp_info[0]): + # Fill in the keys that are equal across replicas + reporting_dict = {k: exp_dict.get(k) for k in reported_keys} + + # Now loop over replicas to fill in all data points as a list + list_ndata = [] + list_ndata_vl = [] + for replica in self.exp_info: + replica_exp_dict = replica[idx] + + ndata = replica_exp_dict.get("ndata") + ndata_vl = replica_exp_dict.get("ndata_vl") + + if partition: + # If we are in a k-fold partition, we need to remove the folded data + # from both the training and validation to avoid calculating the chi2 wrong + for dataset in replica_exp_dict["datasets"]: + if dataset in partition["datasets"]: + dataset_ndata = dataset["ndata"] + frac = dataset["frac"] + ndata -= int(dataset_ndata * frac) + ndata_vl -= int(dataset_ndata * (1 - frac)) + + list_ndata.append(ndata) + list_ndata_vl.append(ndata_vl) + + reporting_dict["ndata"] = list_ndata + reporting_dict["ndata_vl"] = list_ndata_vl + reporting_list.append(reporting_dict) + + for exp_dict in self.pos_info + self.integ_info: reporting_dict = {k: exp_dict.get(k) for k in reported_keys} - if partition: - # If we are in a partition we need to remove the number of datapoints - # in order to avoid calculating the chi2 wrong - for dataset in exp_dict["datasets"]: - if dataset in partition["datasets"]: - ndata = dataset["ndata"] - frac = dataset["frac"] - reporting_dict["ndata"] -= int(ndata * frac) - reporting_dict["ndata_vl"] = int(ndata * (1 - frac)) + reporting_dict["ndata"] = [exp_dict.get("ndata")] + reporting_dict["ndata_vl"] = [exp_dict.get("ndata_vl")] reporting_list.append(reporting_dict) + return reporting_list def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool: diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 48194cdf1b..55efb9d78b 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -1,31 +1,31 @@ """ - Module containing the classes related to the stopping alogirthm - - In this module there are four Classes: - - - FitState: this class contains the information of the fit - for a given point in history - - FitHistory: this class contains the information necessary - in order to reset the state of the fit to the point - in which the history was saved. - i.e., a list of FitStates - - Stopping: this class monitors the chi2 of the validation - and training sets and decides when to stop - - Positivity: Decides whether a given point fullfills the positivity conditions - - Validation: Controls the NNPDF cross-validation algorithm - - Note: - There are situations in which the validation set is empty, in those cases - the training set is used as validation set. - This implies several changes in the behaviour of this class as the training chi2 will - now be monitored for stability. - In order to parse the set of loss functions coming from the backend::MetaModel, - the function `parse_losses` relies on the fact that they are all suffixed with `_loss` - the validation case, instead, is suffixed with `val_loss`. In the particular casse in - which both training and validation model correspond to the same backend::MetaModel only - the `_loss` suffix can be found. This is taken into account by the class `Stopping` - which will tell `Validation` that no validation set was found and that the training is to - be used instead. +Module containing the classes related to the stopping alogirthm + +In this module there are four Classes: + +- FitState: this class contains the information of the fit + for a given point in history +- FitHistory: this class contains the information necessary + in order to reset the state of the fit to the point + in which the history was saved. + i.e., a list of FitStates +- Stopping: this class monitors the chi2 of the validation + and training sets and decides when to stop +- Positivity: Decides whether a given point fullfills the positivity conditions +- Validation: Controls the NNPDF cross-validation algorithm + +Note: + There are situations in which the validation set is empty, in those cases +the training set is used as validation set. +This implies several changes in the behaviour of this class as the training chi2 will +now be monitored for stability. + In order to parse the set of loss functions coming from the backend::MetaModel, +the function `parse_losses` relies on the fact that they are all suffixed with `_loss` +the validation case, instead, is suffixed with `val_loss`. In the particular casse in +which both training and validation model correspond to the same backend::MetaModel only +the `_loss` suffix can be found. This is taken into account by the class `Stopping` +which will tell `Validation` that no validation set was found and that the training is to +be used instead. """ import logging @@ -47,16 +47,15 @@ def parse_ndata(all_data): """ - Parses the list of dictionaries received from ModelTrainer - into a dictionary containing only the name of the experiments - together with the number of points. + Parses the list of dictionaries received from ModelTrainer into dictionaries + containing only the name of the experiments and the number of points per replica Returns ------- `tr_ndata` - dictionary of {'exp' : ndata} + dictionary of {'exp' : np.ndarray} `vl_ndata` - dictionary of {'exp' : ndata} + dictionary of {'exp' : np.ndarray} `pos_set`: list of the names of the positivity sets Note: if there is no validation (total number of val points == 0) @@ -71,9 +70,9 @@ def parse_ndata(all_data): tr_ndata = dictionary["ndata"] vl_ndata = dictionary["ndata_vl"] if tr_ndata: - tr_ndata_dict[exp_name] = tr_ndata + tr_ndata_dict[exp_name] = np.array(tr_ndata) if vl_ndata: - vl_ndata_dict[exp_name] = vl_ndata + vl_ndata_dict[exp_name] = np.array(vl_ndata) if dictionary.get("positivity") and not dictionary.get("integrability"): pos_set.append(exp_name) if not vl_ndata_dict: @@ -111,24 +110,19 @@ def parse_losses(history_object, data, suffix="loss"): except AttributeError: # So it works whether we pass the out or the out.history hobj = history_object - # In the general case epochs = 1. - # In case that we are doing more than 1 epoch, take the last result as it is the result - # the model is in at the moment - # This value is only used for printing output purposes so should not have any significance dict_chi2 = {} total_points = 0 - total_loss = 0 + total_loss = np.zeros_like(hobj["loss"]) for exp_name, npoints in data.items(): loss = np.array(hobj[exp_name + f"_{suffix}"]) - dict_chi2[exp_name] = loss / npoints + dict_chi2[exp_name] = loss / np.maximum(npoints, 1) total_points += npoints total_loss += loss # By taking the loss from the history object we would be saving the total loss # including positivity sets and (if added/enabled) regularizsers # instead we want to restrict ourselves to the loss coming from experiments - # total_loss = np.mean(hobj["loss"]) / total_points - total_loss /= total_points + total_loss /= np.maximum(total_points, 1) dict_chi2["total"] = total_loss return total_loss, dict_chi2 diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 3b174f6bf8..9ad0cf0165 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -301,8 +301,8 @@ def fitting_data_dict( covmat_tr[data_zero_tr, :] = covmat_tr[:, data_zero_tr] = 0.0 covmat_vl[data_zero_vl, :] = covmat_vl[:, data_zero_vl] = 0.0 # Avoid infinities - covmat_tr[np.ix_(data_zero_tr, data_zero_tr)] = 1.0 - covmat_vl[np.ix_(data_zero_vl, data_zero_vl)] = 1.0 + covmat_tr[data_zero_tr, data_zero_tr] = 1.0 + covmat_vl[data_zero_vl, data_zero_vl] = 1.0 invcovmat_tr = np.linalg.inv(covmat_tr) invcovmat_vl = np.linalg.inv(covmat_vl) diff --git a/validphys2/src/validphys/pdfgrids.py b/validphys2/src/validphys/pdfgrids.py index 09dab95d44..039de9cc9a 100644 --- a/validphys2/src/validphys/pdfgrids.py +++ b/validphys2/src/validphys/pdfgrids.py @@ -2,6 +2,7 @@ High level providers for PDF and luminosity grids, formatted in such a way to facilitate plotting and analysis. """ + from collections import namedtuple import dataclasses import logging @@ -240,7 +241,7 @@ def lumigrid2d( lpdf = pdf.load() nmembers = pdf.get_members() - weights = np.full(shape=(nmembers, nbins_m, nbins_y), fill_value=np.NaN) + weights = np.full(shape=(nmembers, nbins_m, nbins_y), fill_value=np.nan) for irep in range(nmembers): for im, mx in enumerate(mxs): @@ -321,7 +322,7 @@ def lumigrid1d( lpdf = pdf.load() nmembers = pdf.get_members() - weights = np.full(shape=(nmembers, nbins_m), fill_value=np.NaN) + weights = np.full(shape=(nmembers, nbins_m), fill_value=np.nan) for im, (mx, sqrt_tau) in enumerate(zip(mxs, sqrt_taus)): y_min = -np.log(1 / sqrt_tau) From 5911f561a21f35768cc95240d1f1fcead728d6ae Mon Sep 17 00:00:00 2001 From: achiefa Date: Wed, 31 Jul 2024 10:09:54 +0100 Subject: [PATCH 03/11] First attempt for saving pseudodata with multiple replicas --- n3fit/src/n3fit/scripts/n3fit_exec.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 73133ebf6f..2f600fed12 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -128,14 +128,17 @@ def from_yaml(cls, o, *args, **kwargs): if fps != True: raise TypeError(f"fitting::savepseudodata is neither True nor False ({fps})") if len(kwargs["environment"].replicas) != 1: - raise ConfigError( - "Cannot request that multiple replicas are fitted and that " - "pseudodata is saved. Either set `fitting::savepseudodata` " - "to `false` or fit replicas one at a time." - ) - # take same namespace configuration on the pseudodata_table action. - training_action = namespace + "training_pseudodata" - validation_action = namespace + "validation_pseudodata" + #raise ConfigError( + # "Cannot request that multiple replicas are fitted and that " + # "pseudodata is saved. Either set `fitting::savepseudodata` " + # "to `false` or fit replicas one at a time." + #) + training_action = collect(namespace + "training_pseudodata", ("replicas",)) + validation_action = collect(namespace + "validation_pseudodata", ("replicas",)) + else: + # take same namespace configuration on the pseudodata_table action. + training_action = namespace + "training_pseudodata" + validation_action = namespace + "validation_pseudodata" N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) From 0f45e12ec60e18771cae0f4aa19ba53b49c3ebe4 Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 1 Aug 2024 09:43:12 +0100 Subject: [PATCH 04/11] Moving collect over replcias in n3fit_data --- n3fit/src/n3fit/scripts/n3fit_exec.py | 9 +++------ validphys2/src/validphys/n3fit_data.py | 3 ++- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 2f600fed12..767768a219 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -127,18 +127,15 @@ def from_yaml(cls, o, *args, **kwargs): if fps := file_content["fitting"].get("savepseudodata", True): if fps != True: raise TypeError(f"fitting::savepseudodata is neither True nor False ({fps})") - if len(kwargs["environment"].replicas) != 1: + #if len(kwargs["environment"].replicas) != 1: #raise ConfigError( # "Cannot request that multiple replicas are fitted and that " # "pseudodata is saved. Either set `fitting::savepseudodata` " # "to `false` or fit replicas one at a time." #) - training_action = collect(namespace + "training_pseudodata", ("replicas",)) - validation_action = collect(namespace + "validation_pseudodata", ("replicas",)) - else: # take same namespace configuration on the pseudodata_table action. - training_action = namespace + "training_pseudodata" - validation_action = namespace + "validation_pseudodata" + training_action = namespace + "training_pseudodata" + validation_action = namespace + "validation_pseudodata" N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 9ad0cf0165..c807f60df4 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -374,7 +374,8 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn """ return (replica, exps_fitting_data_dict, replica_nnseed) - +replicas_training_pseudodata = collect("training_pseudodata", ("replicas",)) +replicas_validationf_pseudodata = collect("validation_pseudodata", ("replicas",)) replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",)) groups_replicas_indexed_make_replica = collect( "indexed_make_replica", ("replicas", "group_dataset_inputs_by_experiment") From 12e08bc95149337e87ac9c75ef8186342d5f6ebb Mon Sep 17 00:00:00 2001 From: achiefa Date: Thu, 1 Aug 2024 10:37:57 +0100 Subject: [PATCH 05/11] Correcting bug --- n3fit/src/n3fit/scripts/n3fit_exec.py | 4 ++-- validphys2/src/validphys/n3fit_data.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 767768a219..32a8420c48 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -134,8 +134,8 @@ def from_yaml(cls, o, *args, **kwargs): # "to `false` or fit replicas one at a time." #) # take same namespace configuration on the pseudodata_table action. - training_action = namespace + "training_pseudodata" - validation_action = namespace + "validation_pseudodata" + training_action = namespace + "replicas_training_pseudodata" + validation_action = namespace + "replicas_validation_pseudodata" N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index c807f60df4..daadc26e36 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -375,7 +375,7 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn return (replica, exps_fitting_data_dict, replica_nnseed) replicas_training_pseudodata = collect("training_pseudodata", ("replicas",)) -replicas_validationf_pseudodata = collect("validation_pseudodata", ("replicas",)) +replicas_validation_pseudodata = collect("validation_pseudodata", ("replicas",)) replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",)) groups_replicas_indexed_make_replica = collect( "indexed_make_replica", ("replicas", "group_dataset_inputs_by_experiment") From d38c030f15a37254db0853e50861315e70677728 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 19 Feb 2025 07:58:31 +0100 Subject: [PATCH 06/11] save pseudodata for each replica in the right folder ; add complicated reportengine wrapper ; remove complicated validphys functio --- n3fit/src/n3fit/scripts/n3fit_exec.py | 13 ++- n3fit/src/n3fit/stopping.py | 4 +- validphys2/src/validphys/n3fit_data.py | 111 +++++++++++++++++-------- 3 files changed, 84 insertions(+), 44 deletions(-) diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 32a8420c48..2175460071 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -127,17 +127,14 @@ def from_yaml(cls, o, *args, **kwargs): if fps := file_content["fitting"].get("savepseudodata", True): if fps != True: raise TypeError(f"fitting::savepseudodata is neither True nor False ({fps})") - #if len(kwargs["environment"].replicas) != 1: - #raise ConfigError( - # "Cannot request that multiple replicas are fitted and that " - # "pseudodata is saved. Either set `fitting::savepseudodata` " - # "to `false` or fit replicas one at a time." - #) - # take same namespace configuration on the pseudodata_table action. + training_action = namespace + "replicas_training_pseudodata" validation_action = namespace + "replicas_validation_pseudodata" + all_data_action = namespace + "replicas_pseudodata" - N3FIT_FIXED_CONFIG['actions_'].extend((training_action, validation_action)) + N3FIT_FIXED_CONFIG['actions_'].extend( + (training_action, validation_action, all_data_action) + ) if thconfig := file_content.get('fiatlux'): N3FIT_FIXED_CONFIG['fiatlux'] = thconfig diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py index 55efb9d78b..99be8f45e7 100644 --- a/n3fit/src/n3fit/stopping.py +++ b/n3fit/src/n3fit/stopping.py @@ -69,9 +69,9 @@ def parse_ndata(all_data): if dictionary.get("count_chi2"): tr_ndata = dictionary["ndata"] vl_ndata = dictionary["ndata_vl"] - if tr_ndata: + if sum(tr_ndata) != 0: tr_ndata_dict[exp_name] = np.array(tr_ndata) - if vl_ndata: + if sum(vl_ndata) != 0: vl_ndata_dict[exp_name] = np.array(vl_ndata) if dictionary.get("positivity") and not dictionary.get("integrability"): pos_set.append(exp_name) diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index daadc26e36..29c79a1277 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -5,7 +5,8 @@ :py:func:`n3fit.performfit.performfit`. """ -from collections import defaultdict +from collections import abc, defaultdict +from copy import copy import functools import hashlib import logging @@ -13,7 +14,7 @@ import numpy as np import pandas as pd -from reportengine import collect +from reportengine import collect, namespaces from reportengine.table import table from validphys.core import IntegrabilitySetSpec, TupleComp from validphys.n3fit_data_utils import validphys_group_extractor @@ -21,6 +22,49 @@ log = logging.getLogger(__name__) +def _per_replica(f): + """Decorator to be used on top of reportengine's decorator. + It replaces the preparation step of the decorator with a custom function, + which modifies the output behaviour when there is a collection of replicas. + + If there is no ``replica_path`` in the environment or collection over replicas + this function does nothing. Otherwise, it removes the replica number from the + output file and directs the output to ``replica_`` instead. + """ + original_prepare = f.prepare + + def prepare_replica_path(*, spec, namespace, environment, **kwargs): + if not hasattr(environment, "replica_path") or "replicas" not in namespace: + return original_prepare(spec=spec, namespace=namespace, environment=environment) + + if not isinstance(namespace["replicas"], abc.Collection): + return original_prepare(spec=spec, namespace=namespace, environment=environment) + + # Now loop over the function input to get the replica collection] + # which we will then remove + rnumber = None + new_nsspec = [] + for farg in spec.nsspec: + if isinstance(farg, abc.Collection) and farg[0] == "replicas": + rnumber = namespaces.value_from_spcec_ele(namespace, farg) + else: + new_nsspec.append(farg) + if rnumber is None: + raise ValueError("Wrong call to @_replica_table") + + replica_path = environment.replica_path / f"replica_{rnumber}" + + new_env = copy(environment) + new_env.table_folder = replica_path + new_spec = spec._replace(nsspec=tuple(new_nsspec)) + + return original_prepare(spec=new_spec, namespace=namespace, environment=new_env) + + f.prepare = prepare_replica_path + + return f + + def replica_trvlseed(replica, trvlseed, same_trvl_per_replica=False): """Generates the ``trvlseed`` for a ``replica``.""" # TODO: move to the new infrastructure @@ -74,7 +118,7 @@ def __iter__(self): yield from self.masks -def tr_masks(data, replica_trvlseed, parallel_models=False, replica=1, replicas=(1,)): +def tr_masks(data, replica_trvlseed): """Generate the boolean masks used to split data into training and validation points. Returns a list of 1-D boolean arrays, one for each dataset. Each array has length equal to N_data, the datapoints which @@ -374,50 +418,48 @@ def replica_nnseed_fitting_data_dict(replica, exps_fitting_data_dict, replica_nn """ return (replica, exps_fitting_data_dict, replica_nnseed) + replicas_training_pseudodata = collect("training_pseudodata", ("replicas",)) replicas_validation_pseudodata = collect("validation_pseudodata", ("replicas",)) +replicas_pseudodata = collect("pseudodata_table", ("replicas",)) replicas_nnseed_fitting_data_dict = collect("replica_nnseed_fitting_data_dict", ("replicas",)) groups_replicas_indexed_make_replica = collect( "indexed_make_replica", ("replicas", "group_dataset_inputs_by_experiment") ) +experiment_indexed_make_replica = collect( + "indexed_make_replica", ("group_dataset_inputs_by_experiment",) +) -@table -def pseudodata_table(groups_replicas_indexed_make_replica, replicas): - """Creates a pandas DataFrame containing the generated pseudodata. The - index is :py:func:`validphys.results.experiments_index` and the columns - are the replica numbers. +def replica_pseudodata(experiment_indexed_make_replica, replica): + """Creates a pandas DataFrame containing the generated pseudodata. + The index is :py:func:`validphys.results.experiments_index` and the columns + is the replica numbers. Notes ----- Whilst running ``n3fit``, this action will only be called if - `fitting::savepseudodata` is `true` (as per the default setting) and - replicas are fitted one at a time. The table can be found in the replica - folder i.e. /nnfit/replica_*/ + `fitting::savepseudodata` is `true` (as per the default setting) + The table can be found in the replica folder i.e. /nnfit/replica_*/ """ - # groups_replicas_indexed_make_replica is collected over both replicas and dataset_input groups, - # in that order. What this means is that groups_replicas_indexed_make_replica is a list of size - # number_of_replicas x number_of_data_groups. Where the ordering inside the list is as follows: - # [data1_rep1, data2_rep1, ..., datan_rep1, ..., data1_repn, data2_repn, ..., datan_repn]. - - # To correctly put this into a single dataframe, we first need to know the number of - # dataset_input groups there are for each replica - groups_per_replica = len(groups_replicas_indexed_make_replica) // len(replicas) - # then we make a list of pandas dataframes, each containing the pseudodata of all datasets - # generated for a single replica - df = [ - pd.concat(groups_replicas_indexed_make_replica[i : i + groups_per_replica]) - for i in range(0, len(groups_replicas_indexed_make_replica), groups_per_replica) - ] - # then we concatentate the pseudodata of all replicas into a single dataframe - df = pd.concat(df, axis=1) - # and finally we add as column titles the replica name - df.columns = [f"replica {rep}" for rep in replicas] + df = pd.concat(experiment_indexed_make_replica) + df.columns = [f"replica {replica}"] return df +@_per_replica +@table +def pseudodata_table(replica_pseudodata): + """Save the pseudodata for the given replica. + Deactivate by setting ``fitting::savepseudodata: False`` + from within the fit runcard. + """ + return replica_pseudodata + + +@_per_replica @table -def training_pseudodata(pseudodata_table, training_mask): +def training_pseudodata(replica_pseudodata, replica_training_mask): """Save the training data for the given replica. Deactivate by setting ``fitting::savepseudodata: False`` from within the fit runcard. @@ -426,20 +468,21 @@ def training_pseudodata(pseudodata_table, training_mask): -------- :py:func:`validphys.n3fit_data.validation_pseudodata` """ - return pseudodata_table.loc[training_mask.values] + return replica_pseudodata.loc[replica_training_mask.values] +@_per_replica @table -def validation_pseudodata(pseudodata_table, training_mask): +def validation_pseudodata(replica_pseudodata, replica_training_mask): """Save the training data for the given replica. Deactivate by setting ``fitting::savepseudodata: False`` from within the fit runcard. See Also -------- - :py:func:`validphys.n3fit_data.training_pseudodata` + :py:func:`validphys.n3fit_data.validation_pseudodata` """ - return pseudodata_table.loc[~training_mask.values] + return replica_pseudodata.loc[~replica_training_mask.values] exps_tr_masks = collect("tr_masks", ("group_dataset_inputs_by_experiment",)) From 9aab46501c73fe922cdc818773d2046a4897e1c6 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 19 Feb 2025 15:31:40 +0100 Subject: [PATCH 07/11] test that parallel and sequential runs produce exactly the same fits and pseudodata --- doc/sphinx/source/n3fit/runcard_detailed.rst | 10 +-- n3fit/src/n3fit/scripts/n3fit_exec.py | 9 ++ n3fit/src/n3fit/tests/test_fit.py | 88 ++++++++++++++++++-- 3 files changed, 93 insertions(+), 14 deletions(-) diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst index 224261fa6d..82a26e68cb 100644 --- a/doc/sphinx/source/n3fit/runcard_detailed.rst +++ b/doc/sphinx/source/n3fit/runcard_detailed.rst @@ -318,10 +318,8 @@ flag in the runcard to ``true`` when running a range of replicas. Running in parallel can be quite hard on memory and it is only advantageous when fitting on a GPU, where one can find a speed up equal to the number of models run in parallel (each model being a different replica). - -When running in parallel it might be advantageous (e.g., for debugging) -to set the training validation split to be equal for all replicas, -this can be done with the `same_trvl_per_replica: true` runcard flag. +Running in parallel models produces the exact same pseudodata as the sequential runs. +Note that numerical differences might be generated during the training In other words, in order to run several replicas in parallel in a machine (be it a big CPU or, most likely, a GPU) @@ -332,8 +330,8 @@ top-level option: parallel_models: true -Note that currently, in order to run with parallel models, one has to set ``savepseudodata: false`` -in the ``fitting`` section of the runcard. Once this is done, the user can run ``n3fit`` with a + +Once this is done, the user can run ``n3fit`` with a replica range to be parallelized (in this case from replica 1 to replica 4). .. code-block:: bash diff --git a/n3fit/src/n3fit/scripts/n3fit_exec.py b/n3fit/src/n3fit/scripts/n3fit_exec.py index 2175460071..51990aa1ab 100755 --- a/n3fit/src/n3fit/scripts/n3fit_exec.py +++ b/n3fit/src/n3fit/scripts/n3fit_exec.py @@ -41,6 +41,11 @@ TAB_FOLDER = "tables" +# Supress the arguments that we don't want the help of n3fit to show +# note that these would still be parsed by vp/reportengine +SUPPRESS = ["parallel", "no-parallel", "scheduler", "style", "format"] + + class N3FitError(Exception): """Exception raised when n3fit cannot succeed and knows why""" @@ -242,6 +247,10 @@ def __init__(self): @property def argparser(self): parser = super().argparser + + for argo in SUPPRESS: + parser.add_argument(f"--{argo}", help=argparse.SUPPRESS) + parser.add_argument( "-o", "--output", help="Output folder and name of the fit", default=None ) diff --git a/n3fit/src/n3fit/tests/test_fit.py b/n3fit/src/n3fit/tests/test_fit.py index 0610250843..b87ae88df2 100644 --- a/n3fit/src/n3fit/tests/test_fit.py +++ b/n3fit/src/n3fit/tests/test_fit.py @@ -1,14 +1,14 @@ """ - Regression tests for n3fit +Regression tests for n3fit - This file will run a fit with a runcard which includes: - - A DIS dataset - - A Hadronic dataset - - Two positivity sets - And checks that the results have not changed from the previous iteration of the code +This file will run a fit with a runcard which includes: + - A DIS dataset + - A Hadronic dataset + - Two positivity sets +And checks that the results have not changed from the previous iteration of the code - If the results are known to need a change, - it is necessary to flag _something_ to regenerate regression +If the results are known to need a change, +it is necessary to flag _something_ to regenerate regression """ import json @@ -19,6 +19,7 @@ import h5py from numpy.testing import assert_allclose, assert_equal +import pandas as pd import pytest import n3fit @@ -240,6 +241,77 @@ def test_multireplica_runs(tmp_path, runcard): compare_weights(option_1, option_2, file_1, file_2) +@pytest.mark.linux +def test_parallel_against_sequential(tmp_path, rep_from=6, rep_to=8): + """Checks that running in parallel and sequentially produces exactly the same results. + + This test runs several fits: + 1. A sequential fit of 3 replicas in a loop (6 to 8), (rep_from to rep_to) + 2. A parallel fit from replica 6 to 8 + + And checks: + 1) The .csv generated by the fit: + a) The same pseudodata has been generated by ``make_replica`` + b) Exaclty the same cuts are being used in the parallel and sequential fits + c) And can be reproduced! + 2) The .json file that contains the fit parameters and results, + at one epoch numerical differences between sequential and parallel fits + """ + input_card = REGRESSION_FOLDER / QUICKNAME + card_parallel = tmp_path / "parallel.yml" + card_sequenti = tmp_path / "sequenti.yml" + + n3fit_input = yaml_safe.load(input_card.with_suffix(".yml")) + n3fit_input["debug"] = False + n3fit_input.pop("load") + + # Complicate slightly the choice of dataset so that different scenarios are tested + datasets = [ + "HERA_CC_318GEV_EM-SIGMARED", + "HERA_CC_318GEV_EP-SIGMARED", + "ATLAS_Z0_7TEV_49FB_HIMASS", + "ATLAS_TTBAR_8TEV_TOT_X-SEC", + "CMS_SINGLETOP_13TEV_TCHANNEL-XSEC", + ] + dataset_inputs = [{"dataset": d, "frac": 0.6, "variant": "legacy"} for d in datasets] + n3fit_input["dataset_inputs"] = dataset_inputs + # Exit inmediately + n3fit_input["parameters"]["epochs"] = 1 + # Save pseudodata + n3fit_input["fitting"]["savepseudodata"] = True + + n3fit_input["parallel_models"] = False + yaml_safe.dump(n3fit_input, card_sequenti) + n3fit_input["parallel_models"] = True + yaml_safe.dump(n3fit_input, card_parallel) + + name_seq = card_sequenti.with_suffix("").name + name_par = card_parallel.with_suffix("").name + + # Now run both + for r in range(rep_from, rep_to + 1): + sp.run(f"{EXE} {card_sequenti} {r}".split(), cwd=tmp_path, check=True) + sp.run(f"{EXE} {card_parallel} {rep_from} -r {rep_to}".split(), cwd=tmp_path, check=True) + + # Loop over all pseudodata files for both fits and load them up + folder_seq = card_sequenti.with_suffix("") / "nnfit" + folder_par = card_parallel.with_suffix("") / "nnfit" + + # Both should have exactly the same pseudodata in the same locations + for csvfile_seq in folder_seq.glob("*/*.csv"): + csvfile_par = folder_par / csvfile_seq.relative_to(folder_seq) + + result_seq = pd.read_csv(csvfile_seq, sep="\t", index_col=[0, 1, 2], header=0) + result_par = pd.read_csv(csvfile_par, sep="\t", index_col=[0, 1, 2], header=0) + pd.testing.assert_frame_equal(result_seq, result_par) + + # Check the rest of the fit, while numerical differences are expected between sequential + # and parallel runs, one single epoch should not be enough to generate them + for r in range(rep_from, rep_to + 1): + seq_json = folder_seq / f"replica_{r}" / f"{name_seq}.json" + check_fit_results(tmp_path, name_par, r, seq_json) + + def compare_weights(option_1, option_2, file_1, file_2): """Reads two weight files and checks that the weights are the same between the two""" for key in file_1.keys(): From 2b92347b1875934083f3dcf3afb99bcc27baf279 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Wed, 19 Feb 2025 17:47:18 +0100 Subject: [PATCH 08/11] apply review comments --- n3fit/src/n3fit/model_gen.py | 20 +++++++++++--------- n3fit/src/n3fit/model_trainer.py | 10 +++++----- pyproject.toml | 1 - 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index e169561429..e239319990 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -26,7 +26,6 @@ base_layer_selector, ) from n3fit.backends import operations as op -from n3fit.backends import regularizer_selector as reg_sec from n3fit.layers import ( DIS, DY, @@ -42,6 +41,8 @@ from n3fit.msr import generate_msr_model_and_grid from validphys.photon.compute import Photon +from n3fit.backends import regularizer_selector # isort: skip isort and black don't agree + @dataclass class ObservableWrapper: @@ -127,7 +128,7 @@ def __call__(self, pdf_layer, mask=None): def observable_generator( spec_dict, boundary_condition=None, - mask_array=None, + training_mask_array=None, validation_mask_array=None, training_data=None, validation_data=None, @@ -143,7 +144,6 @@ def observable_generator( the result of the observable for each contained dataset (n_points,). In summary the model has the following structure: - One experiment layer, made of any number of observable layers. Observable layers, corresponding to commondata datasets and made of any number of fktables (and an operation on them). @@ -171,10 +171,12 @@ def observable_generator( boundary_condition: dict dictionary containing the instance of the a PDF set to be used as a Boundary Condition. - mask_array: np.ndarray + training_mask_array: np.ndarray training mask per replica validation_mask_array: np.ndarray - validation mask per replica, when not given ¬mask_array will be used + validation mask per replica, when not given ~training_mask_array will be used + while in general the validation is a negation of the training, in special cases + such as 1-point datasets, these are accepted by both masks and then removed by the loss n_replicas: int number of replicas fitted simultaneously positivity_initial: float @@ -250,16 +252,16 @@ def observable_generator( model_inputs = np.concatenate(model_inputs).reshape(1, -1) # Make the mask layers... - if mask_array is None: + if training_mask_array is None: tr_mask_layer = None if validation_mask_array is None: vl_mask_layer = None else: vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}") else: - tr_mask_layer = Mask(mask_array, name=f"trmask_{spec_name}") + tr_mask_layer = Mask(training_mask_array, name=f"trmask_{spec_name}") if validation_mask_array is None: - vl_mask_layer = Mask(~mask_array, name=f"vlmask_{spec_name}") + vl_mask_layer = Mask(~training_mask_array, name=f"vlmask_{spec_name}") else: vl_mask_layer = Mask(validation_mask_array, name=f"vlmask_{spec_name}") @@ -735,7 +737,7 @@ def generate_nn( """ nodes_list = list(nodes) # so we can modify it x_input = Input(shape=(None, nodes_in), batch_size=1, name="NN_input") - reg = reg_sec(regularizer, **regularizer_args) + reg = regularizer_selector(regularizer, **regularizer_args) if layer_type == "dense_per_flavour": # set the arguments that will define the layer diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index 3c94d47983..b4e110ebf2 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -9,18 +9,18 @@ between iterations while at the same time keeping the amount of redundant calls to a minimum """ +import logging from collections import namedtuple from itertools import zip_longest -import logging import numpy as np +import n3fit.hyper_optimization.penalties +import n3fit.hyper_optimization.rewards from n3fit import model_gen from n3fit.backends import NN_LAYER_ALL_REPLICAS, Lambda, MetaModel, callbacks, clear_backend_state from n3fit.backends import operations as op from n3fit.hyper_optimization.hyper_scan import HYPEROPT_STATUSES -import n3fit.hyper_optimization.penalties -import n3fit.hyper_optimization.rewards from n3fit.hyper_optimization.rewards import HyperLoss from n3fit.scaler import generate_scaler from n3fit.stopping import Stopping @@ -563,7 +563,7 @@ def _generate_observables( exp_layer = model_gen.observable_generator( exp_dict, self.boundary_condition, - mask_array=experiment_data["trmask"][i], + training_mask_array=experiment_data["trmask"][i], validation_mask_array=experiment_data["vlmask"][i], training_data=experiment_data["expdata"][i], validation_data=experiment_data["expdata_vl"][i], @@ -599,7 +599,7 @@ def _generate_observables( pos_dict, self.boundary_condition, positivity_initial=pos_initial, - mask_array=replica_masks, + training_mask_array=replica_masks, training_data=training_data, validation_data=training_data, n_replicas=len(self.replicas), diff --git a/pyproject.toml b/pyproject.toml index 78d413a074..db1a74ce9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,7 +143,6 @@ line_length = 100 profile = "black" # https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#custom-configuration skip_gitignore = true known_first_party = ["validphys", "eko", "n3fit", "nnpdf_data", "reportengine"] -force_sort_within_sections = true [tool.pytest.ini_options] addopts = "--disable-warnings" From 335b3bb7ca55682fa66fcf3809051dd7243844c6 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 20 Feb 2025 10:50:17 +0100 Subject: [PATCH 09/11] modify comments and reset isort option --- pyproject.toml | 1 + validphys2/src/validphys/n3fit_data.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index db1a74ce9a..78d413a074 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,6 +143,7 @@ line_length = 100 profile = "black" # https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#custom-configuration skip_gitignore = true known_first_party = ["validphys", "eko", "n3fit", "nnpdf_data", "reportengine"] +force_sort_within_sections = true [tool.pytest.ini_options] addopts = "--disable-warnings" diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 29c79a1277..4580205f43 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -23,7 +23,7 @@ def _per_replica(f): - """Decorator to be used on top of reportengine's decorator. + """Decorator to be used on top of reportengine's decorators. It replaces the preparation step of the decorator with a custom function, which modifies the output behaviour when there is a collection of replicas. @@ -40,8 +40,8 @@ def prepare_replica_path(*, spec, namespace, environment, **kwargs): if not isinstance(namespace["replicas"], abc.Collection): return original_prepare(spec=spec, namespace=namespace, environment=environment) - # Now loop over the function input to get the replica collection] - # which we will then remove + # Loop over the function input arguments to find the collection of replicas + # pass down all other arguments unchanged rnumber = None new_nsspec = [] for farg in spec.nsspec: @@ -50,7 +50,7 @@ def prepare_replica_path(*, spec, namespace, environment, **kwargs): else: new_nsspec.append(farg) if rnumber is None: - raise ValueError("Wrong call to @_replica_table") + raise ValueError("Wrong call to @_replica_table, no replica number found.") replica_path = environment.replica_path / f"replica_{rnumber}" From 9552d7326512c04c121b34f2d74f3921d95f8169 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Thu, 20 Feb 2025 12:59:00 +0100 Subject: [PATCH 10/11] update the docstring of ModelTrainer to take into account the multireplica case --- n3fit/src/n3fit/model_trainer.py | 38 +++++++++++++++----------- validphys2/src/validphys/n3fit_data.py | 5 ++-- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py index b4e110ebf2..faefa47dc1 100644 --- a/n3fit/src/n3fit/model_trainer.py +++ b/n3fit/src/n3fit/model_trainer.py @@ -9,18 +9,18 @@ between iterations while at the same time keeping the amount of redundant calls to a minimum """ -import logging from collections import namedtuple from itertools import zip_longest +import logging import numpy as np -import n3fit.hyper_optimization.penalties -import n3fit.hyper_optimization.rewards from n3fit import model_gen from n3fit.backends import NN_LAYER_ALL_REPLICAS, Lambda, MetaModel, callbacks, clear_backend_state from n3fit.backends import operations as op from n3fit.hyper_optimization.hyper_scan import HYPEROPT_STATUSES +import n3fit.hyper_optimization.penalties +import n3fit.hyper_optimization.rewards from n3fit.hyper_optimization.rewards import HyperLoss from n3fit.scaler import generate_scaler from n3fit.stopping import Stopping @@ -118,32 +118,38 @@ def __init__( ---------- experiments_data: list list of `validphys.core.DataGroupSpec` containing experiments - exp_info: list - list of dictionaries containing experiments + exp_info: list(list(dict)) + A list of dictionaries (once per experiment) for each replica. + This dictionary contains the experimental data and inverse covmats seprated by + training/validation as well as possible data transformations (e.g. diagonalization) pos_info: list - list of dictionaries containing positivity sets + list of dictionaries containing positivity sets, similar to ``exp_info`` + but all replicas equal integ_info: list - list of dictionaries containing integrability sets - flavinfo: list - the object returned by fitting['basis'] + list of dictionaries containing integrability sets, similar to ``pos_info`` + flavinfo: list(dict) + the fitting::basis object from the runcard fitbasis: str - the name of the basis being fitted + the name of the basis in which the fit is being done, fitting::fitbasis nnseeds: list(int) - the seed used to initialise the NN for each model to be passed to model_gen + seed used to initialise the NN for each model to be passed to model_gen + generated by ``validphys.n3fit_data.replica_nnseed``, one per replica debug: bool - flag to activate some debug options + flag to activate debug options kfold_parameters: dict parameters defining the kfolding method max_cores: int - maximum number of cores the fitting can use to run + maximum number of cores available to the fitting routine model_file: str - whether to save the models + name of the (h5) file in which the final NN model will be saved in each replica + if not given, the model is not saved. sum_rules: str - whether sum rules should be enabled (All, MSR, VSR, False) + which sum rules should be enabled (All, MSR, VSR, False), defaults to ALL theoryid: validphys.core.TheoryIDSpec - object contining info for generating the photon + object contining the theoryid that should be used to generate the photon lux_params: dict dictionary containing the params needed from LuxQED + if not give, the photon is not generated replicas: list list with the replicas ids to be fitted """ diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py index 4580205f43..d050f60cbd 100644 --- a/validphys2/src/validphys/n3fit_data.py +++ b/validphys2/src/validphys/n3fit_data.py @@ -340,11 +340,10 @@ def fitting_data_dict( covmat_tr = covmat[tr_mask].T[tr_mask] covmat_vl = covmat[vl_mask].T[vl_mask] - # Remove possible correlations for 1-point datasets - # that should've been masked out + # Remove possible correlations for 1-point datasets that should've been masked out covmat_tr[data_zero_tr, :] = covmat_tr[:, data_zero_tr] = 0.0 covmat_vl[data_zero_vl, :] = covmat_vl[:, data_zero_vl] = 0.0 - # Avoid infinities + # Set the diagonal to 1 to avoid infinities or inconsistencies when computing the inverse covmat_tr[data_zero_tr, data_zero_tr] = 1.0 covmat_vl[data_zero_vl, data_zero_vl] = 1.0 From 90e516cdc819366f87bef1684aa53b99c092be74 Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Thu, 20 Feb 2025 16:24:07 +0100 Subject: [PATCH 11/11] change fitbot ref --- .github/workflows/fitbot.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fitbot.yml b/.github/workflows/fitbot.yml index e937b88bd5..53e646d0d8 100644 --- a/.github/workflows/fitbot.yml +++ b/.github/workflows/fitbot.yml @@ -10,7 +10,7 @@ on: env: N3FIT_MAXNREP: 20 # total number of replicas to fit POSTFIT_NREP: 16 # requested replicas for postfit - REFERENCE_SET: NNBOT-b1374cba6-2024-11-28 # reference set for exact results + REFERENCE_SET: NNBOT-4083fbacb-2025-02-20 # reference set for exact results STABLE_REFERENCE_SET: NNBOT-c0f99b7b3-2024-02-28 # reference set for last tag PYTHONHASHSEED: "0"