Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run3 updates #72

Merged
merged 17 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions hbw/calibration/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ def base_init(self: Calibrator) -> None:
if self.bjet_regression:
self.calibrators.append(bjet_regression)

if self.dataset_inst.is_mc:
# TODO: we might need to modify jer when using bjet calibration
# run JER only on MC
# and not for 2022 (TODO: update as soon as JER is done for Summer22)
if self.dataset_inst.is_mc and not self.config_inst.campaign.x.year == 2022:
self.calibrators.append(jer)

self.uses |= set(self.calibrators)
Expand Down
89 changes: 45 additions & 44 deletions hbw/config/config_run2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from __future__ import annotations

import os
import re

import yaml
from scinum import Number
import law
import order as od

from columnflow.util import DotDict
from columnflow.config_util import add_shift_aliases
from hbw.config.styling import stylize_processes
from hbw.config.categories import add_categories_selection
from hbw.config.variables import add_variables
Expand Down Expand Up @@ -139,14 +139,14 @@ def add_config(
jerc_postfix = ""
if year == 2016 and campaign.x.vfp == "post":
jerc_postfix = "APV"
elif year == 2022:
jerc_postfix = f"{campaign.x.EE}EE"
elif year == 2022 and campaign.x.EE == "post":
jerc_postfix = "EE"

if cfg.x.run == 2:
jerc_campaign = f"Summer19UL{year2}{jerc_postfix}"
jet_type = "AK4PFchs"
elif cfg.x.run == 3:
jerc_campaign = f"Winter{year2}Run3"
jerc_campaign = f"Summer{year2}{jerc_postfix}_22Sep2023"
jet_type = "AK4PFPuppi"

cfg.x.jec = DotDict.wrap({
Expand Down Expand Up @@ -219,8 +219,8 @@ def add_config(
# https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=107
# TODO: get jerc working for Run3
cfg.x.jer = DotDict.wrap({
"campaign": jerc_campaign if cfg.x.run == 2 else f"JR_{jerc_campaign}",
"version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "V1"}[year],
"campaign": jerc_campaign,
"version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "V2"}[year],
"jet_type": jet_type,
})

Expand Down Expand Up @@ -297,19 +297,6 @@ def add_config(
# (used in the muon producer)
cfg.x.muon_sf_names = ("NUM_TightRelIso_DEN_TightIDandIPCut", f"{cfg.x.cpn_tag}_UL")

# helper to add column aliases for both shifts of a source
# TODO: switch to the columnflow function (but what happened to *selection_dependent*?)
def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent: bool):

for direction in ["up", "down"]:
shift = cfg.get_shift(od.Shift.join_name(shift_source, direction))
# format keys and values
inject_shift = lambda s: re.sub(r"\{([^_])", r"{_\1", s).format(**shift.__dict__)
_aliases = {inject_shift(key): inject_shift(value) for key, value in aliases.items()}
alias_type = "column_aliases_selection_dependent" if selection_dependent else "column_aliases"
# extend existing or register new column aliases
shift.set_aux(alias_type, shift.get_aux(alias_type, {})).update(_aliases)

# register shifts
# TODO: make shifts year-dependent
cfg.add_shift(name="nominal", id=0)
Expand All @@ -320,27 +307,28 @@ def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent
cfg.add_shift(name="minbias_xs_up", id=7, type="shape")
cfg.add_shift(name="minbias_xs_down", id=8, type="shape")
add_shift_aliases(
cfg,
"minbias_xs",
{
"pu_weight": "pu_weight_{name}",
"normalized_pu_weight": "normalized_pu_weight_{name}",
},
selection_dependent=False)
)
cfg.add_shift(name="top_pt_up", id=9, type="shape")
cfg.add_shift(name="top_pt_down", id=10, type="shape")
add_shift_aliases("top_pt", {"top_pt_weight": "top_pt_weight_{direction}"}, selection_dependent=False)
add_shift_aliases(cfg, "top_pt", {"top_pt_weight": "top_pt_weight_{direction}"})

cfg.add_shift(name="e_sf_up", id=40, type="shape")
cfg.add_shift(name="e_sf_down", id=41, type="shape")
cfg.add_shift(name="e_trig_sf_up", id=42, type="shape")
cfg.add_shift(name="e_trig_sf_down", id=43, type="shape")
add_shift_aliases("e_sf", {"electron_weight": "electron_weight_{direction}"}, selection_dependent=False)
add_shift_aliases(cfg, "e_sf", {"electron_weight": "electron_weight_{direction}"})

cfg.add_shift(name="mu_sf_up", id=50, type="shape")
cfg.add_shift(name="mu_sf_down", id=51, type="shape")
cfg.add_shift(name="mu_trig_sf_up", id=52, type="shape")
cfg.add_shift(name="mu_trig_sf_down", id=53, type="shape")
add_shift_aliases("mu_sf", {"muon_weight": "muon_weight_{direction}"}, selection_dependent=False)
add_shift_aliases(cfg, "mu_sf", {"muon_weight": "muon_weight_{direction}"})

btag_uncs = [
"hf", "lf", f"hfstats1_{year}", f"hfstats2_{year}",
Expand All @@ -350,12 +338,12 @@ def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent
cfg.add_shift(name=f"btag_{unc}_up", id=100 + 2 * i, type="shape")
cfg.add_shift(name=f"btag_{unc}_down", id=101 + 2 * i, type="shape")
add_shift_aliases(
cfg,
f"btag_{unc}",
{
"normalized_btag_weight": f"normalized_btag_weight_{unc}_" + "{direction}",
"normalized_njet_btag_weight": f"normalized_njet_btag_weight_{unc}_" + "{direction}",
},
selection_dependent=False,
)

cfg.add_shift(name="mur_up", id=201, type="shape")
Expand All @@ -368,28 +356,40 @@ def add_shift_aliases(shift_source: str, aliases: dict[str], selection_dependent
cfg.add_shift(name="pdf_down", id=208, type="shape")

for unc in ["mur", "muf", "murf_envelope", "pdf"]:
# add_shift_aliases(unc, {f"{unc}_weight": f"{unc}_weight_" + "{direction}"}, selection_dependent=False)
# add_shift_aliases(cfg, unc, {f"{unc}_weight": f"{unc}_weight_" + "{direction}"})
add_shift_aliases(
cfg,
unc,
{f"normalized_{unc}_weight": f"normalized_{unc}_weight_" + "{direction}"},
selection_dependent=False,
)

with open(os.path.join(thisdir, "jec_sources.yaml"), "r") as f:
all_jec_sources = yaml.load(f, yaml.Loader)["names"]
for jec_source in cfg.x.jec["uncertainty_sources"]:
idx = all_jec_sources.index(jec_source)
cfg.add_shift(name=f"jec_{jec_source}_up", id=5000 + 2 * idx, type="shape")
cfg.add_shift(name=f"jec_{jec_source}_down", id=5001 + 2 * idx, type="shape")
cfg.add_shift(
name=f"jec_{jec_source}_up",
id=5000 + 2 * idx,
type="shape",
tags={"jec"},
aux={"jec_source": jec_source},
)
cfg.add_shift(
name=f"jec_{jec_source}_down",
id=5001 + 2 * idx,
type="shape",
tags={"jec"},
aux={"jec_source": jec_source},
)
add_shift_aliases(
cfg,
f"jec_{jec_source}",
{"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"},
selection_dependent=True,
)

cfg.add_shift(name="jer_up", id=6000, type="shape", tags={"selection_dependent"})
cfg.add_shift(name="jer_down", id=6001, type="shape", tags={"selection_dependent"})
add_shift_aliases("jer", {"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"}, selection_dependent=True)
cfg.add_shift(name="jer_up", id=6000, type="shape", tags={"jer"})
cfg.add_shift(name="jer_down", id=6001, type="shape", tags={"jer"})
add_shift_aliases(cfg, "jer", {"Jet.pt": "Jet.pt_{name}", "Jet.mass": "Jet.mass_{name}"})

def make_jme_filename(jme_aux, sample_type, name, era=None):
"""
Expand All @@ -409,11 +409,13 @@ def make_jme_filename(jme_aux, sample_type, name, era=None):
json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c"
corr_tag = f"{cfg.x.cpn_tag}_UL"
elif cfg.x.run == 3:
# TODO: Update when possible
json_mirror = "/afs/desy.de/user/p/paaschal/public/mirrors/jsonpog-integration"
corr_tag = f"{year}_Prompt"
json_mirror = "/afs/cern.ch/user/m/mfrahm/public/mirrors/jsonpog-integration-f35ab53e"
corr_tag = f"{year}_Summer22{jerc_postfix}"

cfg.x.external_files = DotDict.wrap({
# pileup weight corrections
"pu_sf": (f"{json_mirror}/POG/LUM/{corr_tag}/puWeights.json.gz", "v1"),

# jet energy correction
"jet_jerc": (f"{json_mirror}/POG/JME/{corr_tag}/jet_jerc.json.gz", "v1"),

Expand All @@ -431,21 +433,20 @@ def make_jme_filename(jme_aux, sample_type, name, era=None):
})

# temporary fix due to missing corrections in run 3
# electron, muon and met still missing, btag, and pu are TODO
if cfg.x.run == 3:
cfg.add_tag("skip_pu_weights")

cfg.add_tag("skip_btag_weights")
cfg.x.external_files.pop("btag_sf_corr")

cfg.add_tag("skip_electron_weights")
cfg.add_tag("skip_muon_weights")
cfg.add_tag("skip_pu_weights")
cfg.x.external_files.pop("electron_sf")

cfg.add_tag("skip_muon_weights")
cfg.x.external_files.pop("muon_sf")
cfg.x.external_files.pop("btag_sf_corr")
cfg.x.external_files.pop("met_phi_corr")

# NOTE: we should mirror and gzip them
if campaign.x.EE == "pre":
cfg.x.external_files["muon_sf"] = ("https://gitlab.cern.ch/cms-muonPOG/muonefficiencies/-/raw/master/Run3/2022/2022_Z/ScaleFactors_Muon_Z_ID_ISO_2022_schemaV2.json", "v1") # noqa
elif campaign.x.EE == "post":
cfg.x.external_files["muon_sf"] = ("https://gitlab.cern.ch/cms-muonPOG/muonefficiencies/-/raw/master/Run3/2022_EE/2022_Z/ScaleFactors_Muon_Z_ID_ISO_2022_EE_schemaV2.json", "v1") # noqa
cfg.x.external_files.pop("met_phi_corr")

cfg.x.met_filters = {
"Flag.goodVertices",
Expand Down
34 changes: 28 additions & 6 deletions hbw/config/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@
"data_e_e",
"data_e_f",
],
}

data_egamma = {
"2022preEE": [
"data_egamma_c",
"data_egamma_d",
Expand Down Expand Up @@ -170,7 +173,7 @@
"qcd_mu_pt1000_pythia",
],
"2022postEE": [
"qcd_mu_pt15to20_pythia", # 1 file not possible to open
# "qcd_mu_pt15to20_pythia", # empty after selection
"qcd_mu_pt20to30_pythia",
"qcd_mu_pt30to50_pythia",
"qcd_mu_pt50to80_pythia",
Expand All @@ -179,12 +182,12 @@
"qcd_mu_pt170to300_pythia",
"qcd_mu_pt300to470_pythia",
"qcd_mu_pt470to600_pythia",
"qcd_mu_pt600to800_pythia", # 1 file not possible to open
"qcd_mu_pt600to800_pythia",
"qcd_mu_pt800to1000_pythia",
"qcd_mu_pt1000_pythia",
],
"2022preEE": [
"qcd_mu_pt15to20_pythia", # 1 file not possible to open
"qcd_mu_pt15to20_pythia",
"qcd_mu_pt20to30_pythia",
"qcd_mu_pt30to50_pythia",
"qcd_mu_pt50to80_pythia",
Expand All @@ -193,7 +196,7 @@
"qcd_mu_pt170to300_pythia",
"qcd_mu_pt300to470_pythia",
"qcd_mu_pt470to600_pythia",
"qcd_mu_pt600to800_pythia", # 1 file not possible to open
"qcd_mu_pt600to800_pythia",
"qcd_mu_pt800to1000_pythia",
"qcd_mu_pt1000_pythia",
],
Expand All @@ -211,7 +214,22 @@
"qcd_em_pt300toInf_pythia",
],
"2022postEE": [
# empty for now
# "qcd_em_pt10to30_pythia", # missing process + probably empty anyways
# "qcd_em_pt30to50_pythia", # empty after selection
"qcd_em_pt50to80_pythia",
"qcd_em_pt80to120_pythia",
"qcd_em_pt120to170_pythia",
"qcd_em_pt170to300_pythia",
"qcd_em_pt300toInf_pythia",
],
"2022preEE": [
# "qcd_em_pt10to30_pythia", # missing process + probably empty anyways
"qcd_em_pt30to50_pythia",
"qcd_em_pt50to80_pythia",
"qcd_em_pt80to120_pythia",
"qcd_em_pt120to170_pythia",
"qcd_em_pt170to300_pythia",
"qcd_em_pt300toInf_pythia",
],
}

Expand Down Expand Up @@ -286,8 +304,11 @@
"ggHH_kl_2p45_kt_1_dl_hbbhww_powheg",
"ggHH_kl_5_kt_1_dl_hbbhww_powheg",
],
"2022preEE": [
"ggHH_kl_1_kt_1_dl_hbbhww_powheg",
],
"2022postEE": [
# empty for now
"ggHH_kl_1_kt_1_dl_hbbhww_powheg",
],
}

Expand Down Expand Up @@ -363,6 +384,7 @@ def get_dataset_names(cpn_tag: int | str, as_list: bool = False) -> DotDict[str:
dataset_names = DotDict.wrap(
data_mu=data_mu.get(cpn_tag, []),
data_e=data_e.get(cpn_tag, []),
data_egamma=data_egamma.get(cpn_tag, []),
tt=tt.get(cpn_tag, []),
st=st.get(cpn_tag, []),
w_lnu=w_lnu.get(cpn_tag, []),
Expand Down
8 changes: 4 additions & 4 deletions hbw/config/defaults_and_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def set_config_defaults_and_groups(config_inst):
"signal": ["ggHH_*", "qqHH_*"], "gghh": ["ggHH_*"], "qqhh": ["qqHH_*"],
}
config_inst.x.process_groups["dmuch"] = ["data_mu"] + config_inst.x.process_groups["much"]
config_inst.x.process_groups["dech"] = ["data_e"] + config_inst.x.process_groups["ech"]
config_inst.x.process_groups["dech"] = ["data_e", "data_egamma"] + config_inst.x.process_groups["ech"]

# dataset groups for conveniently looping over certain datasets
# (used in wrapper_factory and during plotting)
Expand All @@ -167,8 +167,8 @@ def set_config_defaults_and_groups(config_inst):
"sl_ech": ["1e", "1e__resolved", "1e__boosted"],
"sl_much_resolved": ["1mu__resolved", "1mu__resolved__1b", "1mu__resolved__2b"],
"sl_ech_resolved": ["1e__resolved", "1e__resolved__1b", "1e__resolved__2b"],
"sl_much_boosted": ["1mu__boosted", "1mu_boosted__1b", "1mu_boosted__2b"],
"sl_ech_boosted": ["1e__boosted", "1e__boosted__1b", "1e__boosted__2b"],
"sl_much_boosted": ["1mu__boosted"],
"sl_ech_boosted": ["1e__boosted"],
"default": ["incl", "1e", "1mu"],
"test": ["incl", "1e"],
"dilep": ["incl", "2e", "2mu", "emu"],
Expand Down Expand Up @@ -240,7 +240,7 @@ def set_config_defaults_and_groups(config_inst):
"unstack_signal": {proc.name: {"unstack": True} for proc in config_inst.processes if "HH" in proc.name},
"scale_signal": {
proc.name: {"unstack": True, "scale": 10000}
for proc in config_inst.processes if "HH" in proc.name
for proc, _, _ in config_inst.walk_processes() if proc.has_tag("is_signal")
},
"dilep": {
"ggHH_kl_0_kt_1_dl_hbbhww": {"scale": 10000, "unstack": True},
Expand Down
10 changes: 9 additions & 1 deletion hbw/config/processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ def configure_hbw_processes(config: od.Config):
id=31199,
label="QCD Electron enriched",
)
elif qcd_em:
qcd_ele = add_parent_process( # noqa
config,
[qcd_em],
name="qcd_ele",
id=31199,
label="QCD Electron enriched",
)

# custom v_lep process for ML Training, combining W+DY
w_lnu = config.get_process("w_lnu")
Expand Down Expand Up @@ -121,7 +129,7 @@ def configure_hbw_processes(config: od.Config):
sig.add_process(proc)

# add auxiliary information if process is signal
for proc_inst in config.processes:
for proc_inst, _, _ in config.walk_processes():
is_signal = any([
signal_tag in proc_inst.name
for signal_tag in ("qqHH", "ggHH", "radion", "gravition")
Expand Down
6 changes: 3 additions & 3 deletions hbw/config/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,23 +267,23 @@ def add_variables(config: od.Config) -> None:
deepjet_wps = config.x.btag_working_points.deepjet
config.add_variable(
name="n_deepjet_loose",
expression=lambda events: ak.num(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1),
expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1),
aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
binning=(7, -0.5, 6.5),
x_title="Number of deepjets (loose WP)",
discrete_x=True,
)
config.add_variable(
name="n_deepjet_medium",
expression=lambda events: ak.num(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1),
expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1),
aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
binning=(7, -0.5, 6.5),
x_title="Number of deepjets (medium WP)",
discrete_x=True,
)
config.add_variable(
name="n_deepjet_tight",
expression=lambda events: ak.num(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1),
expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1),
aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
binning=(7, -0.5, 6.5),
x_title="Number of deepjets (tight WP)",
Expand Down
Loading
Loading