# based on https://github.com/LLRCMS/KLUBAnalysis/blob/VBF_legacy/src/PuJetIdSF.cc
import os
from copy import deepcopy as copy
from PhysicsTools.NanoAODTools.postprocessing.framework.datamodel import Collection
from analysis_tools.utils import import_root, getContentHisto2D
from Base.Modules.baseModules import JetLepMetModule, DummyModule, JetLepMetSyst
ROOT = import_root()
class PUjetID_SFProducer(JetLepMetModule):
def __init__(self, year, *args, **kwargs):
super(PUjetID_SFProducer, self).__init__(*args, **kwargs)
self.year = year
path = os.path.expandvars("$CMSSW_BASE/src/Corrections/JME/python/pujetid_sf/")
self.f_eff = ROOT.TFile.Open("%s/h2_eff_mc_%s_L.root" % (path, self.year))
self.f_eff_sf = ROOT.TFile.Open("%s/h2_eff_sf_%s_L.root" % (path, self.year))
self.f_mistag = ROOT.TFile.Open("%s/h2_mistag_mc_%s_L.root" % (path, self.year))
self.f_mistag_sf = ROOT.TFile.Open("%s/h2_mistag_sf_%s_L.root" % (path, self.year))
self.f_sf_err = ROOT.TFile.Open("%s/scalefactorsPUID_81Xtraining.root" % path)
self.h_eff_ = copy(self.f_eff.Get("h2_eff_mc%s_L" % self.year))
self.h_eff_sf_ = copy(self.f_eff_sf.Get("h2_eff_sf%s_L" % self.year))
self.h_eff_sf_err_ = copy(self.f_sf_err.Get("h2_eff_sf%s_L_Systuncty" % self.year))
self.h_mistag_ = copy(self.f_mistag.Get("h2_mistag_mc%s_L" % self.year))
self.h_mistag_sf_ = copy(self.f_mistag_sf.Get("h2_mistag_sf%s_L" % self.year))
self.h_mistag_sf_err_ = copy(self.f_sf_err.Get("h2_mistag_sf%s_L_Systuncty" % self.year))
pass
def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
self.out = wrappedOutputTree
self.out.branch('PUjetID_SF', 'F')
self.out.branch('PUjetID_SF_up', 'F')
self.out.branch('PUjetID_SF_down', 'F')
self.out.branch('PUjetID_SF_eff_up', 'F')
self.out.branch('PUjetID_SF_eff_down', 'F')
self.out.branch('PUjetID_SF_mistag_up', 'F')
self.out.branch('PUjetID_SF_mistag_down', 'F')
self.out.branch('PUjetID_SF_eff_eta_s2p5_up', 'F')
self.out.branch('PUjetID_SF_eff_eta_s2p5_down', 'F')
self.out.branch('PUjetID_SF_mistag_eta_s2p5_up', 'F')
self.out.branch('PUjetID_SF_mistag_eta_s2p5_down', 'F')
self.out.branch('PUjetID_SF_eff_eta_l2p5_up', 'F')
self.out.branch('PUjetID_SF_eff_eta_l2p5_down', 'F')
self.out.branch('PUjetID_SF_mistag_eta_l2p5_up', 'F')
self.out.branch('PUjetID_SF_mistag_eta_l2p5_down', 'F')
def endFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
pass
def get_eff_sf_and_error(self, isReal, pt, eta):
if pt < 20.:
pt = 20.
elif pt > 50.:
pt = 50.
if isReal:
return (getContentHisto2D(self.h_eff_, pt, eta),
getContentHisto2D(self.h_eff_sf_, pt, eta),
getContentHisto2D(self.h_eff_sf_err_, pt, eta))
else:
return (getContentHisto2D(self.h_mistag_, pt, eta),
getContentHisto2D(self.h_mistag_sf_, pt, eta),
getContentHisto2D(self.h_mistag_sf_err_, pt, eta))
def analyze(self, event):
"""process event, return True (go to next module) or False (fail, go to next event)"""
P_MC = 1.
P_DATA = 1.
P_DATA_up = 1.
P_DATA_down = 1.
P_DATA_effic_up = 1.
P_DATA_effic_down = 1.
P_DATA_mistag_up = 1.
P_DATA_mistag_down = 1.
P_DATA_effic_eta_s2p5_up = 1.
P_DATA_effic_eta_s2p5_down = 1.
P_DATA_effic_eta_l2p5_up = 1.
P_DATA_effic_eta_l2p5_down = 1.
P_DATA_mistag_eta_s2p5_up = 1.
P_DATA_mistag_eta_s2p5_down = 1.
P_DATA_mistag_eta_l2p5_up = 1.
P_DATA_mistag_eta_l2p5_down = 1.
muons = Collection(event, "Muon")
electrons = Collection(event, "Electron")
taus = Collection(event, "Tau")
jets = Collection(event, "Jet")
genjets = Collection(event, "GenJet")
dau1, dau2, dau1_tlv, dau2_tlv = self.get_daus(event, muons, electrons, taus)
for jet in jets:
if jet.jetId < 2:
continue
jet_tlv = ROOT.TLorentzVector()
jet_tlv.SetPtEtaPhiM(
eval("jet.pt%s" % self.jet_syst),
jet.eta,
jet.phi,
eval("jet.mass%s" % self.jet_syst)
)
if jet_tlv.Pt() < 20 or jet_tlv.Pt() > 50 or abs(jet_tlv.Eta()) > 4.7:
continue
if jet_tlv.DeltaR(dau1_tlv) < 0.5 or jet_tlv.DeltaR(dau2_tlv) < 0.5:
continue
# noisy jet removal for 2017
# https://twiki.cern.ch/twiki/bin/view/CMS/HiggsToTauTauWorkingLegacyRun2#Jets
if self.year == 2017 and abs(tlv_jet.Eta()) > 2.65 and abs(tlv_jet.Eta()) < 3.139:
continue
isRealJet = False
for genjet in genjets:
genjet_tlv = ROOT.TLorentzVector()
genjet_tlv.SetPtEtaPhiM(genjet.pt, genjet.eta, genjet.phi, genjet.mass)
if jet_tlv.DeltaR(genjet_tlv) < 0.4:
isRealJet = True
break
passPUjetIDLoose = jet.puId >= 4 or eval("jet.pt%s" % self.jet_syst) > 50
eff, sf, sf_err = self.get_eff_sf_and_error(isRealJet, jet_tlv.Pt(), jet_tlv.Eta())
sf_up = min([max([0, sf + sf_err]), 5])
sf_down = min([max([0, sf - sf_err]), 5])
if passPUjetIDLoose:
P_MC *= eff;
P_DATA *= sf*eff;
P_DATA_up *= sf_up*eff;
P_DATA_down *= sf_down*eff;
P_DATA_effic_up *= sf_up*eff;
P_DATA_effic_down *= sf_down*eff;
P_DATA_mistag_up *= sf*eff; # true jet --> use nominal SF for mistag
P_DATA_mistag_down *= sf*eff; # true jet --> use nominal SF for mistag
if abs(jet_tlv.Eta()) <= 2.5:
P_DATA_effic_eta_s2p5_up *= sf_up * eff
P_DATA_effic_eta_s2p5_down *= sf_down * eff
P_DATA_effic_eta_l2p5_up *= sf * eff
P_DATA_effic_eta_l2p5_down *= sf * eff
P_DATA_mistag_eta_s2p5_up *= sf * eff
P_DATA_mistag_eta_s2p5_down *= sf * eff
P_DATA_mistag_eta_l2p5_up *= sf * eff
P_DATA_mistag_eta_l2p5_down *= sf * eff
else:
P_DATA_effic_eta_s2p5_up *= sf * eff
P_DATA_effic_eta_s2p5_down *= sf * eff
P_DATA_effic_eta_l2p5_up *= sf_up * eff
P_DATA_effic_eta_l2p5_down *= sf_down * eff
P_DATA_mistag_eta_s2p5_up *= sf * eff
P_DATA_mistag_eta_s2p5_down *= sf * eff
P_DATA_mistag_eta_l2p5_up *= sf * eff
P_DATA_mistag_eta_l2p5_down *= sf * eff
else:
P_MC *= (1. - eff);
P_DATA *= (1. - sf * eff);
P_DATA_up *= (1. - sf_up * eff);
P_DATA_down *= (1. - sf_down * eff);
P_DATA_effic_up *= (1. - sf * eff); # fake jet --> use nominal SF for effic
P_DATA_effic_down *= (1. - sf * eff); # fake jet --> use nominal SF for effic
P_DATA_mistag_up *= (1. - sf_up * eff);
P_DATA_mistag_down *= (1. - sf_down * eff);
if abs(jet_tlv.Eta()) <= 2.5:
P_DATA_effic_eta_s2p5_up *= (1 - sf * eff)
P_DATA_effic_eta_s2p5_down *= (1 - sf * eff)
P_DATA_effic_eta_l2p5_up *= (1 - sf * eff)
P_DATA_effic_eta_l2p5_down *= (1 - sf * eff)
P_DATA_mistag_eta_s2p5_up *= (1 - sf_up * eff)
P_DATA_mistag_eta_s2p5_down *= (1 - sf_down * eff)
P_DATA_mistag_eta_l2p5_up *= (1 - sf * eff)
P_DATA_mistag_eta_l2p5_down *= (1 - sf * eff)
else:
P_DATA_effic_eta_s2p5_up *= (1 - sf * eff)
P_DATA_effic_eta_s2p5_down *= (1 - sf * eff)
P_DATA_effic_eta_l2p5_up *= (1 - sf * eff)
P_DATA_effic_eta_l2p5_down *= (1 - sf * eff)
P_DATA_mistag_eta_s2p5_up *= (1 - sf * eff)
P_DATA_mistag_eta_s2p5_down *= (1 - sf * eff)
P_DATA_mistag_eta_l2p5_up *= (1 - sf_up * eff)
P_DATA_mistag_eta_l2p5_down *= (1 - sf_down * eff)
self.out.fillBranch('PUjetID_SF', P_DATA / P_MC)
self.out.fillBranch('PUjetID_SF_up', P_DATA_up / P_MC)
self.out.fillBranch('PUjetID_SF_down', P_DATA_down / P_MC)
self.out.fillBranch('PUjetID_SF_eff_up', P_DATA_effic_up / P_MC)
self.out.fillBranch('PUjetID_SF_eff_down', P_DATA_effic_down / P_MC)
self.out.fillBranch('PUjetID_SF_mistag_up', P_DATA_mistag_up / P_MC)
self.out.fillBranch('PUjetID_SF_mistag_down', P_DATA_mistag_down / P_MC)
self.out.fillBranch('PUjetID_SF_eff_eta_s2p5_up', P_DATA_effic_eta_s2p5_up / P_MC)
self.out.fillBranch('PUjetID_SF_eff_eta_s2p5_down', P_DATA_effic_eta_s2p5_down / P_MC)
self.out.fillBranch('PUjetID_SF_mistag_eta_s2p5_up', P_DATA_effic_eta_l2p5_up / P_MC)
self.out.fillBranch('PUjetID_SF_mistag_eta_s2p5_down', P_DATA_effic_eta_l2p5_down / P_MC)
self.out.fillBranch('PUjetID_SF_eff_eta_l2p5_up', P_DATA_mistag_eta_s2p5_up / P_MC)
self.out.fillBranch('PUjetID_SF_eff_eta_l2p5_down', P_DATA_mistag_eta_s2p5_down / P_MC)
self.out.fillBranch('PUjetID_SF_mistag_eta_l2p5_up', P_DATA_mistag_eta_l2p5_up / P_MC)
self.out.fillBranch('PUjetID_SF_mistag_eta_l2p5_down', P_DATA_mistag_eta_l2p5_down / P_MC)
return True
def PUjetID_SF(**kwargs):
isMC = kwargs.pop("isMC")
year = kwargs.pop("year")
if not isMC:
return lambda: DummyModule(**kwargs)
return lambda: PUjetID_SFProducer(year, **kwargs)
class PUjetID_SFRDFProducer(JetLepMetSyst):
def __init__(self, year, *args, **kwargs):
super(PUjetID_SFRDFProducer, self).__init__(*args, **kwargs)
self.year = year
self.isMC = kwargs.pop("isMC")
self.isUL = kwargs.pop("isUL")
self.ispreVFP = kwargs.pop("ispreVFP", False)
self.lep_pt = kwargs.pop("lep_pt", "{}")
self.lep_eta = kwargs.pop("lep_eta", "{}")
self.lep_phi = kwargs.pop("lep_phi", "{}")
self.lep_mass = kwargs.pop("lep_mass", "{}")
if self.isMC:
base = "{}/{}/src/Corrections/JME".format(
os.getenv("CMT_CMSSW_BASE"), os.getenv("CMT_CMSSW_VERSION"))
if "/libCorrectionsJME.so" not in ROOT.gSystem.GetLibraries():
ROOT.gSystem.Load("libCorrectionsJME.so")
if not self.isUL:
ROOT.gROOT.ProcessLine(".L {}/interface/PUjetID_SFinterface.h".format(base))
folder_path = os.path.expandvars(
"$CMSSW_BASE/src/Corrections/JME/python/pujetid_sf/")
ROOT.gInterpreter.ProcessLine("""
auto PUjetID_SF = PUjetID_SFinterface(%s, "%s");
""" % (int(self.year), folder_path))
ROOT.gInterpreter.Declare("""
using Vfloat = const ROOT::RVec<float>&;
using VInt = const ROOT::RVec<int>&;
std::vector<double> get_pujetid_sf (
Vfloat Jet_pt, Vfloat Jet_eta, Vfloat Jet_phi, Vfloat Jet_mass,
VInt Jet_puId, Vfloat Jet_jetId,
Vfloat GenJet_pt, Vfloat GenJet_eta, Vfloat GenJet_phi, Vfloat GenJet_mass,
int pairType, int dau1_index, int dau2_index,
Vfloat muon_pt, Vfloat muon_eta, Vfloat muon_phi, Vfloat muon_mass,
Vfloat electron_pt, Vfloat electron_eta,
Vfloat electron_phi, Vfloat electron_mass,
Vfloat tau_pt, Vfloat tau_eta, Vfloat tau_phi, Vfloat tau_mass
)
{
float dau1_pt, dau1_eta, dau1_phi, dau1_mass;
float dau2_pt, dau2_eta, dau2_phi, dau2_mass;
if (pairType == 0) {
dau1_pt = muon_pt.at(dau1_index);
dau1_eta = muon_eta.at(dau1_index);
dau1_phi = muon_phi.at(dau1_index);
dau1_mass = muon_mass.at(dau1_index);
} else if (pairType == 1) {
dau1_pt = electron_pt.at(dau1_index);
dau1_eta = electron_eta.at(dau1_index);
dau1_phi = electron_phi.at(dau1_index);
dau1_mass = electron_mass.at(dau1_index);
} else if (pairType == 2) {
dau1_pt = tau_pt.at(dau1_index);
dau1_eta = tau_eta.at(dau1_index);
dau1_phi = tau_phi.at(dau1_index);
dau1_mass = tau_mass.at(dau1_index);
} else {
dau1_pt = -999.;
dau1_eta = -999.;
dau1_phi = -999.;
dau1_mass = -999.;
}
dau2_pt = tau_pt.at(dau2_index);
dau2_eta = tau_eta.at(dau2_index);
dau2_phi = tau_phi.at(dau2_index);
dau2_mass = tau_mass.at(dau2_index);
return PUjetID_SF.get_pu_weights(
Jet_pt, Jet_eta, Jet_phi, Jet_mass, Jet_jetId, Jet_puId,
GenJet_pt, GenJet_eta,GenJet_phi, GenJet_mass,
dau1_pt, dau1_eta, dau1_phi, dau1_mass,
dau2_pt, dau2_eta, dau2_phi, dau2_mass);
}
""")
else:
import correctionlib
correctionlib.register_pyroot_binding()
if not os.getenv("_PUjetID_SF"):
os.environ["_PUjetID_SF"] = "PUjetID_SF"
ROOT.gROOT.ProcessLine(".L {}/interface/PUjetID_SFULinterface.h".format(base))
json_path = ("/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/"
"POG/JME/{}/jmar.json.gz")
if self.year == 2018:
self.tag = "2018_UL"
filename = json_path.format(self.tag)
elif self.year == 2017:
self.tag = "2017_UL"
filename = json_path.format(self.tag)
elif self.year == 2016:
if self.ispreVFP:
self.tag = "2016preVFP_UL"
filename = json_path.format(self.tag)
else:
self.tag = "2016postVFP_UL"
filename = json_path.format(self.tag)
else:
raise ValueError("Incorrect year")
ROOT.gInterpreter.ProcessLine("""
auto _PUjetID_SF = PUjetID_SFULinterface(%s, "%s");
""" % (int(self.year), filename))
def run(self, df):
if not self.isMC:
return df, []
if not self.isUL:
branches = ['PUjetID_SF', 'PUjetID_SF_up', 'PUjetID_SF_down',
'PUjetID_SF_eff_up', 'PUjetID_SF_eff_down',
'PUjetID_SF_mistag_up', 'PUjetID_SF_mistag_down',
'PUjetID_SF_eff_eta_s2p5_up', 'PUjetID_SF_eff_eta_s2p5_down',
'PUjetID_SF_mistag_eta_s2p5_up', 'PUjetID_SF_mistag_eta_s2p5_down',
'PUjetID_SF_eff_eta_l2p5_up', 'PUjetID_SF_eff_eta_l2p5_down',
'PUjetID_SF_mistag_eta_l2p5_up', 'PUjetID_SF_mistag_eta_l2p5_down']
df = df.Define("pujetid_sf_results", "get_pujetid_sf("
"Jet_pt{3}, Jet_eta, Jet_phi, Jet_mass{3}, "
"Jet_puId, Jet_jetId, "
"GenJet_pt, GenJet_eta, GenJet_phi, GenJet_mass, "
"pairType, dau1_index, dau2_index, "
"Muon_pt{0}, Muon_eta, Muon_phi, Muon_mass{0}, "
"Electron_pt{1}, Electron_eta, Electron_phi, Electron_mass{1}, "
"Tau_pt{2}, Tau_eta, Tau_phi, Tau_mass{2}"
")".format(
self.muon_syst, self.electron_syst, self.tau_syst, self.jet_syst))
else:
branches = ['PUjetID_SF', 'PUjetID_SF_up', 'PUjetID_SF_down']
df = df.Define("pujetid_sf_results", "_PUjetID_SF.get_pu_weights("
"Jet_pt{0}, Jet_eta, Jet_phi, Jet_mass{0}, Jet_jetId, Jet_puId, "
"GenJet_pt, GenJet_eta, GenJet_phi, GenJet_mass, "
"{1}, {2}, {3}, {4}"
")".format(self.jet_syst, self.lep_pt, self.lep_eta, self.lep_phi, self.lep_mass))
for ib, branch in enumerate(branches):
df = df.Define(branch, "pujetid_sf_results[%s]" % ib)
return df, branches
[docs]def PUjetID_SFRDF(**kwargs):
"""
Module to compute PU Jet Id scale factors.
:param lep_pt: pt of the leptons to remove from the Jet collection. Can be included as a
straight vector from the RDataFrame or as vector made out of floats available in the
RDataFrame (e.g. "{lep1_pt, lep2_pt}"). Default: none.
:type lep_pt: str
:param lep_eta: eta of the leptons to remove from the Jet collection. Can be included as a
straight vector from the RDataFrame or as vector made out of floats available in the
RDataFrame (e.g. "{lep1_eta, lep2_eta}"). Default: none.
:type lep_eta: str
:param lep_phi: phi of the leptons to remove from the Jet collection. Can be included as a
straight vector from the RDataFrame or as vector made out of floats available in the
RDataFrame (e.g. "{lep1_phi, lep2_phi}"). Default: none.
:type lep_phi: str
:param lep_mass: mass of the leptons to remove from the Jet collection. Can be included as a
straight vector from the RDataFrame or as vector made out of floats available in the
RDataFrame (e.g. "{lep1_mass, lep2_mass}"). Default: none.
:type lep_mass: str
YAML sintaxis:
.. code-block:: yaml
codename:
name: PUjetID_SFRDF
path: Corrections.JME.PUjetID_SF
parameters:
year: self.config.year
isMC: self.dataset.process.isMC
isUL: self.dataset.has_tag('ul')
ispreVFP: self.config.get_aux("isPreVFP", False)
lep_pt: ...
lep_eta: ...
lep_phi: ...
lep_mass: ...
"""
year = kwargs.pop("year")
return lambda: PUjetID_SFRDFProducer(year, **kwargs)