# coding: utf-8
"""
Plotting tasks.
"""
__all__ = []
import os
from copy import deepcopy as copy
import json
import math
import itertools
import functools
import array
from collections import OrderedDict
import law
import luigi
import plotlib.root as r
from cmt.util import hist_to_array, hist_to_graph, get_graph_maximum, update_graph_values
from ctypes import c_double
from analysis_tools import ObjectCollection
from analysis_tools.utils import (
import_root, create_file_dir, join_root_selection, randomize
)
from plotting_tools.root import get_labels, Canvas, RatioCanvas
from cmt.base_tasks.base import (
DatasetTaskWithCategory, DatasetWrapperTask, HTCondorWorkflow, SGEWorkflow,
ConfigTaskWithCategory, RDFModuleTask, InputData
)
from cmt.base_tasks.preprocessing import (
Categorization, MergeCategorization, MergeCategorizationStats, EventCounterDAS
)
cmt_style = r.styles.copy("default", "cmt_style")
cmt_style.style.ErrorX = 0
cmt_style.x_axis.TitleOffset = 1.22
cmt_style.y_axis.TitleOffset = 1.48
cmt_style.legend.TextSize = 20
cmt_style.style.legend_dy = 0.035
cmt_style.style.legend_y2 = 0.93
EMPTY = -1.e5
directions = ["up", "down"]
ROOT = import_root()
[docs]class BasePlotTask(ConfigTaskWithCategory):
"""
Task that wraps parameters used in all plotting tasks. Can't be run.
:param feature_names: names of features to plot. Uses all features when empty.
:type feature_names: csv string
:param feature_tags: list of tags for filtering features selected via feature names.
:type feature_tags: csv string
:param skip_feature_names: names or name pattern of features to skip
:type skip_feature_names: csv string
:param skip_feature_tags: list of tags of features to skip
:type skip_feature_tags: csv string
:param apply_weights: whether to apply weights and scaling to all histograms.
:type apply_weights: bool
:param n_bins: Custom number of bins for plotting,
defaults to the value configured by the feature when empty.
:type n_bins: int
:param systematics: NOT YET IMPLEMENTED. List of custom systematics to be considered.
:type systematics: csv list
:param store_systematics: whether to store systematic templates inside the output root files.
:type store_systematics: bool
:param shape_region: shape region used for QCD computation.
:type shape_region: str from choice list
:param remove_horns: NOT YET IMPLEMENTED. Whether to remove the eta horns present in 2017
:type remove_horns: bool
:param optimization_method: Optimization method to be used. Only `bayesian_blocks` available.
:type optimization_method: str
"""
feature_names = law.CSVParameter(default=(), description="names of features to plot, uses all "
"features when empty, default: ()")
feature_tags = law.CSVParameter(default=(), description="list of tags for filtering features "
"selected via feature_names, default: ()")
skip_feature_names = law.CSVParameter(default=(), description="names or name pattern of "
"features to skip, default: ()")
skip_feature_tags = law.CSVParameter(default=("multiclass_dnn",), description="list of tags of "
"features to skip, default: (multiclass_dnn,)")
apply_weights = luigi.BoolParameter(default=True, description="whether to apply "
"mc weights to histograms, default: True")
n_bins = luigi.IntParameter(default=law.NO_INT, description="custom number of bins for "
"plotting, defaults to the value configured by the feature when empty, default: empty")
systematics = law.CSVParameter(default=(), description="list of systematics, default: empty")
store_systematics = luigi.BoolParameter(default=True, description="whether to store "
"systematic templates inside root files, default: True")
shape_region = luigi.ChoiceParameter(default="os_inviso", choices=("os_inviso", "ss_iso"),
significant=False, description="shape region default: os_inviso")
remove_horns = luigi.BoolParameter(default=False, description="whether to remove horns "
"from distributions, default: False")
tree_name = luigi.Parameter(default="Events", description="name of the tree inside "
"the root file, default: Events (nanoAOD)")
optimization_method = luigi.ChoiceParameter(default="", choices=("", "bayesian_blocks"),
significant=False, description="optimization method to be used, default: none")
def __init__(self, *args, **kwargs):
super(BasePlotTask, self).__init__(*args, **kwargs)
# select features
self.features = self.get_features()
def get_features(self):
features = []
for feature in self.config.features:
if self.feature_names and not law.util.multi_match(feature.name, self.feature_names):
continue
if self.feature_tags and not any([feature.has_tag(tag) for tag in self.feature_tags]):
continue
if self.skip_feature_names and \
law.util.multi_match(feature.name, self.skip_feature_names):
continue
if self.skip_feature_tags and feature.has_tag(self.skip_feature_tags):
continue
features.append(feature)
if len(features) == 0:
raise ValueError("No features were included. Did you spell them correctly?")
return features
def round(self, number):
if number - round(number) < 0.0001:
return number
if number > 10.:
return round(number)
elif number > 1.:
return round(number, 1)
i = 1
while True:
if number > (1 / (10 ** i)):
return round(number, i + 1)
i += 1
def get_binning(self, feature, ifeat=0):
if self.optimization_method:
y_axis_adendum = ""
binning = self.input()["bin_opt"].collection.targets[ifeat].load(formatter="json")
n_bins = len(binning) - 1
binning_args = n_bins, array.array("f", binning)
elif isinstance(feature.binning, tuple):
if self.n_bins == law.NO_INT:
nbins = feature.binning[0]
else:
nbins = self.n_bins
y_axis_adendum = (" / %s %s" % (
self.round((feature.binning[2] - feature.binning[1]) / nbins),
feature.get_aux("units")) if feature.get_aux("units") else "")
binning = (nbins, feature.binning[1], feature.binning[2])
binning_args = tuple(binning)
else:
y_axis_adendum = ""
if self.n_bins == law.NO_INT:
n_bins = len(feature.binning) - 1
binning_args = n_bins, array.array("f", feature.binning)
else:
n_bins = self.n_bins
binning_args = tuple(n_bins, feature.binning[0], feature.binning[-1])
return binning_args, y_axis_adendum
def get_feature_systematics(self, feature):
return feature.systematics
def get_systs(self, feature, isMC):
if not isMC:
return []
systs = self.get_feature_systematics(feature) \
+ self.config.get_weights_systematics(self.config.weights[self.category.name], isMC)
systs += self.config.get_systematics_from_expression(self.category.selection)
if self.region:
systs += self.config.get_systematics_from_expression(self.region.selection)
return self.get_unique_systs(systs)
def get_unique_systs(self, systs):
unique_systs = []
for syst in systs:
if syst not in unique_systs:
unique_systs.append(syst)
return unique_systs
def get_output_postfix(self):
postfix = ""
if self.region:
postfix += "__" + self.region.name
if not self.apply_weights:
postfix += "__noweights"
if self.optimization_method == "bayesian_blocks":
postfix += "__opt_bb"
if self.n_bins != law.NO_INT:
postfix += "__%s_bins" % self.n_bins
return postfix
[docs]class PrePlot(DatasetTaskWithCategory, BasePlotTask, law.LocalWorkflow, HTCondorWorkflow,
SGEWorkflow, RDFModuleTask):
"""
Performs the filling of histograms for all features considered. If systematics are considered,
it also produces the same histograms after applying those.
:param skip_processing: whether to skip the preprocessing and categorization steps.
:type skip_processing: bool
:param skip_merging: whether to skip the MergeCategorization task.
:type skip_merging: bool
:param preplot_modules_file: filename inside ``cmt/config/`` or ``../config/`` (w/o extension)
with the RDF modules to run.
:type preplot_modules_file: str
"""
skip_processing = luigi.BoolParameter(default=False, description="whether to skip"
" preprocessing and categorization steps, default: False")
skip_merging = luigi.BoolParameter(default=False, description="whether to skip"
" MergeCategorization task, default: False")
preplot_modules_file = luigi.Parameter(description="filename with modules to run RDataFrame",
default=law.NO_STR)
dataset_names = law.CSVParameter(description="dataset_names to use for optimization",
default=())
def __init__(self, *args, **kwargs):
super(PrePlot, self).__init__(*args, **kwargs)
self.custom_output_tag = "_%s" % self.region_name
self.threshold = self.dataset.get_aux("event_threshold", None)
self.merging_factor = self.dataset.get_aux("preprocess_merging_factor", None)
self.syst_list = self.get_syst_list()
[docs] def get_syst_list(self):
"""
Returns a list of systematic names that affect present or past selections, so dedicated
input ntuples are needed as requirements
"""
if self.skip_processing:
return []
syst_list = []
isMC = self.dataset.process.isMC
for feature in self.features:
systs = self.get_unique_systs(self.get_systs(feature, isMC))
for syst in systs:
if syst in syst_list:
continue
try:
systematic = self.config.systematics.get(syst)
if self.category.name in systematic.get_aux("affected_categories", []):
syst_list.append(syst)
except:
continue
other_systs = self.config.get_systematics_from_expression(self.category.selection)
if self.region:
other_systs += self.config.get_systematics_from_expression(self.region.selection)
for syst in other_systs:
if syst in syst_list:
continue
try:
systematic = self.config.systematics.get(syst)
if self.category.name in systematic.get_aux("affected_categories", []):
syst_list.append(syst)
except:
continue
return syst_list
[docs] def create_branch_map(self):
"""
:return: number of files after merging (usually 1) unless skip_processing == True
:rtype: int
"""
if self.skip_processing or self.skip_merging:
return len(self.dataset.get_files(
os.path.expandvars("$CMT_TMP_DIR/%s/" % self.config_name), add_prefix=False,
check_empty=True))
return self.n_files_after_merging
[docs] def workflow_requires(self):
"""
"""
if self.skip_merging:
reqs = {"data": {"central": Categorization.vreq(self, workflow="local")}}
if self.store_systematics:
for syst, d in itertools.product(self.syst_list, directions):
reqs["data"][f"{syst}_{d}"] = Categorization.vreq(self, workflow="local",
systematic=syst, systematic_direction=d)
elif self.skip_processing:
reqs= {"data": {"central": InputData.req(self)}}
else:
reqs = {"data": {"central": MergeCategorization.vreq(self, workflow="local")}}
if self.store_systematics:
for syst, d in itertools.product(self.syst_list, directions):
reqs["data"][f"{syst}_{d}"] = MergeCategorization.vreq(self, workflow="local",
systematic=syst, systematic_direction=d)
if self.optimization_method == "bayesian_blocks":
from cmt.base_tasks.optimization import BayesianBlocksOptimization
reqs["bin_opt"] = BayesianBlocksOptimization.vreq(self, plot_systematics=False,
_exclude=["branch", "branches", "custom_output_tag", "plot_systematics", "workflow"])
return reqs
[docs] def requires(self):
"""
Each branch requires one input file for the central value and two per systematic considered
"""
if self.skip_merging:
reqs = {"central": Categorization.vreq(self, workflow="local", branch=self.branch)}
if self.store_systematics:
for syst, d in itertools.product(self.syst_list, directions):
reqs[f"{syst}_{d}"] = Categorization.vreq(self, workflow="local",
systematic=syst, systematic_direction=d, branch=self.branch)
elif self.skip_processing:
reqs= {"central": InputData.req(self, file_index=self.branch)}
else:
reqs = {"central": MergeCategorization.vreq(self, workflow="local", branch=self.branch)}
if self.store_systematics:
for syst, d in itertools.product(self.syst_list, directions):
reqs[f"{syst}_{d}"] = MergeCategorization.vreq(self, workflow="local",
systematic=syst, systematic_direction=d, branch=self.branch)
if self.optimization_method:
from cmt.base_tasks.optimization import BayesianBlocksOptimization
reqs["bin_opt"] = BayesianBlocksOptimization.vreq(self, plot_systematics=False,
_exclude=["branch", "branches", "custom_output_tag", "plot_systematics", "workflow"])
return reqs
[docs] def output(self):
"""
:return: One file per input file with all histograms to be plotted for each feature
:rtype: `.root`
"""
return self.local_target("data{}_{}.root".format(
self.get_output_postfix(), self.branch))
[docs] def get_weight(self, category, syst_name, syst_direction, **kwargs):
"""
Obtains the product of all weights depending on the category/channel applied.
Returns "1" if it's a data sample or the apply_weights parameter is set to False.
:return: Product of all weights to be applied
:rtype: str
"""
if self.config.processes.get(self.dataset.process.name).isData or not self.apply_weights:
return "1"
else:
return self.config.get_weights_expression(
self.config.weights[category], syst_name, syst_direction)
return self.config.weights.default
def define_histograms(self, dfs, nentries):
histos = []
isMC = self.dataset.process.isMC
for ifeat, feature in enumerate(self.features):
binning_args, y_axis_adendum = self.get_binning(feature, ifeat)
x_title = (str(feature.get_aux("x_title"))
+ (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else ""))
y_title = "Events" + y_axis_adendum
title = "; %s; %s" % (x_title, y_title)
systs_directions = [("central", "")]
if isMC and self.store_systematics:
systs = self.get_systs(feature, isMC)
systs_directions += list(itertools.product(systs, directions))
# loop over systematics and up/down variations
for syst_name, direction in systs_directions:
# Select the appropriate RDF (input file) depending on the syst and direction
key = "central"
if f"{syst_name}_{direction}" in dfs:
key = f"{syst_name}_{direction}"
df = dfs[key]
# apply selection if needed
if feature.selection:
feat_df = df.Define("feat_selection", self.config.get_object_expression(
feature.selection, isMC, syst_name, direction)).Filter("feat_selection")
else:
feat_df = df
# define tag just for the histogram's name
if syst_name != "central" and isMC:
tag = "_%s" % syst_name
if direction != "":
tag += "_%s" % direction
else:
tag = ""
feature_expression = self.config.get_object_expression(
feature, isMC, syst_name, direction)
feature_name = feature.name + tag
hist_base = ROOT.TH1D(feature_name, title, *binning_args)
if nentries[key] > 0:
hmodel = ROOT.RDF.TH1DModel(hist_base)
histos.append(
feat_df.Define(
"weight", "{}".format(self.get_weight(
self.category.name, syst_name, direction))
).Define(
"var", feature_expression).Histo1D(hmodel, "var", "weight")
)
else: # no entries available, append empty histogram
histos.append(hist_base)
return histos
[docs] @law.decorator.notify
@law.decorator.localize(input=False)
def run(self):
"""
Creates one RDataFrame per input file, runs the desired RDFModules
and produces a set of plots per each feature, one for the nominal value
and others (if available) for all systematics.
"""
# prepare inputs and outputs
if self.skip_processing:
inp = self.get_input()
else:
inp = self.input()
outp = self.output().path
ROOT.ROOT.EnableImplicitMT(self.request_cpus)
# create one RDF for the central value and each needed systematic
dfs = {}
nentries = {}
for elem in ["central"] + [f"{syst}_{d}"
for (syst, d) in itertools.product(self.syst_list, directions)]:
if self.skip_processing:
inp_to_consider = self.get_path(inp[elem])[0]
if not self.dataset.friend_datasets:
dfs[elem] = ROOT.RDataFrame(self.tree_name, self.get_path(inp[elem]))
# friend tree
else:
tchain = ROOT.TChain()
for f in self.get_path(inp[elem]):
tchain.Add("{}/{}".format(f, self.tree_name))
friend_tchain = ROOT.TChain()
for f in self.get_path(inp[elem], 1):
friend_tchain.Add("{}/{}".format(f, self.tree_name))
tchain.AddFriend(friend_tchain, "friend")
dfs[elem] = ROOT.RDataFrame(tchain)
elif self.skip_merging:
inp_to_consider = inp[elem].path
dfs[elem] = ROOT.RDataFrame(self.tree_name, inp_to_consider)
else:
inp_to_consider = inp[elem].targets[self.branch].path
dfs[elem] = ROOT.RDataFrame(self.tree_name, inp_to_consider)
empty_file = False
try:
tf = ROOT.TFile.Open(inp_to_consider)
tree = tf.Get(self.tree_name)
nentries[elem] = tree.GetEntries()
tf.Close()
except: # no tree inside the file
nentries[elem] = 0
empty_file = True
# applying modules according to the systematic considered
syst = ""
d = ""
if "_" in elem:
syst = elem.split("_")[0]
d = elem.split("_")[1]
if not empty_file:
modules = self.get_feature_modules(self.preplot_modules_file,
systematic=syst, systematic_direction=d)
if len(modules) > 0:
for module in modules:
dfs[elem], _ = module.run(dfs[elem])
selection = "1"
dataset_selection = self.config.get_object_expression(
self.dataset.get_aux("selection", "1"), self.dataset.process.isMC, syst, d)
if self.skip_processing:
selection = self.config.get_object_expression(
self.category, self.dataset.process.isMC, syst, d)
if dataset_selection and dataset_selection != "1":
if selection != "1":
selection = join_root_selection(dataset_selection, selection, op="and")
else:
selection = dataset_selection
if self.region_name != law.NO_STR:
region_selection = self.config.get_object_expression(
self.config.regions.get(self.region_name).selection,
self.dataset.process.isMC, syst, d)
if selection != "1":
selection = join_root_selection(region_selection, selection, op="and")
else:
selection = region_selection
if selection != "1":
dfs[elem] = dfs[elem].Define("selection", selection).Filter("selection")
histos = self.define_histograms(dfs, nentries)
out = ROOT.TFile.Open(create_file_dir(outp), "RECREATE")
for histo in histos:
histo = histo.Clone()
histo.Sumw2()
out.cd()
histo.Write()
out.Close()
[docs]class FeaturePlot(BasePlotTask, DatasetWrapperTask):
"""
Performs the actual histogram plotting: loads the histograms obtained in the PrePlot tasks,
rescales them if needed and plots and saves them.
Example command:
``law run FeaturePlot --version test --category-name etau --config-name ul_2018 \
--process-group-name etau --feature-names Htt_svfit_mass,lep1_pt,bjet1_pt,lep1_eta,bjet1_eta \
--workers 20 --PrePlot-workflow local --stack --hide-data False --do-qcd --region-name etau_os_iso\
--dataset-names tt_dl,tt_sl,dy_high,wjets,data_etau_a,data_etau_b,data_etau_c,data_etau_d \
--MergeCategorizationStats-version test_old``
:param stack: whether to show all backgrounds stacked (True) or normalized to 1 (False)
:type stack: bool
:param do_qcd: whether to estimate QCD using the ABCD method
:type do_qcd: bool
:param qcd_wp: working point to use for QCD estimation
:type qcd_wp: str from choice list
:param qcd_signal_region_wp: region to use as signal region for QCD estimation
:type qcd_signal_region_wp: str
:param shape_region: region to use as shape region for QCD estimation
:type shape_region: str from choice list
:param qcd_sym_shape: whether to symmetrise the shape coming from both possible shape regions
:type qcd_sym_shape: bool
:param qcd_category_name: category name used for the same sign regions in QCD estimation
:type qcd_category_name: str
:param hide_data: whether to show (False) or hide (True) the data histograms
:type hide_data: bool
:param normalize_signals: whether to normalize signals to the
total background yield (True) or not (False)
:type normalize_signals: bool
:param avoid_normalization: whether to avoid normalizing by cross section and initial
number of events
:type avoid_normalization: bool
:param blinded: whether to blind data in specified regions. The blinding ranges are specified
using the `blinded_range` parameter in the Feature definition. This parameter can include a
list ([initial, final]) or a list of lists ([[init_1, fin_1], [init_2, fin_2], ...])
:type blinded: bool
:param save_png: whether to save plots in png
:type save_png: bool
:param save_pdf: whether to save plots in pdf
:type save_pdf: bool
:param save_root: whether to write plots in a root file
:type save_root: bool
:param save_yields: whether to save histogram yields in a json file
:type save_yields: bool
:param process_group_name: name of the process grouping name
:type process_group_name: str
:param bins_in_x_axis: (NOT YET IMPLEMENTED) whether to plot histograms with the bin numbers
in the x axis instead of the actual values
:type bins_in_x_axis: bool
:param plot_systematics: (NOT YET FULLY IMPLEMENTED) whether to plot histograms
with their uncertainties
:type plot_systematics: bool
:param fixed_colors: whether to plot histograms with their defined colors (False) or fixed colors
(True) starting from ``ROOT`` color ``2``.
:type fixed_colors: bool
:param log_y: whether to set y axis to log scale
:type log_y: bool
:param include_fit: YAML file inside config folder (w/o extension) including input parameters
for the fit
:type include_fit: str
:param propagate_syst_qcd: whether to propagate systematics to qcd background
:type propagate_syst_qcd: bool
"""
stack = luigi.BoolParameter(default=False, description="when set, stack backgrounds, weight "
"them with dataset and category weights, and normalize afterwards, default: False")
do_qcd = luigi.BoolParameter(default=False, description="whether to compute the QCD shape, "
"default: False")
qcd_wp = luigi.ChoiceParameter(default=law.NO_STR,
choices=(law.NO_STR, "vvvl_vvl", "vvl_vl", "vl_l", "l_m"), significant=False,
description="working points to use for qcd computation, default: empty (vvvl - m)")
qcd_signal_region_wp = luigi.Parameter(default="os_iso", description="signal region wp, "
"default: os_iso")
shape_region = luigi.ChoiceParameter(default="os_inviso", choices=("os_inviso", "ss_iso"),
significant=True, description="shape region default: os_inviso")
qcd_sym_shape = luigi.BoolParameter(default=False, description="symmetrize QCD shape, "
"default: False")
qcd_category_name = luigi.Parameter(default="default", description="category use "
"for qcd regions ss_iso and ss_inviso, default=default (same as category)")
do_sideband = luigi.BoolParameter(default=False, description="whether to compute the background "
"shape from sideband region, default: False")
hide_data = luigi.BoolParameter(default=True, description="hide data events, default: True")
normalize_signals = luigi.BoolParameter(default=True, description="whether to normalize "
"signals to the total bkg yield, default: True")
avoid_normalization = luigi.BoolParameter(default=False, description="whether to avoid "
"normalizing by cross section and initial number of events, default: False")
blinded = luigi.BoolParameter(default=False, description="whether to blind bins above a "
"certain feature value, default: False")
save_png = luigi.BoolParameter(default=False, description="whether to save created histograms "
"in png, default: True")
save_pdf = luigi.BoolParameter(default=True, description="whether to save created histograms "
"in pdf, default: True")
save_root = luigi.BoolParameter(default=False, description="whether to save created histograms "
"in root files, default: False")
save_yields = luigi.BoolParameter(default=False, description="whether to save event yields per "
"process, default: False")
process_group_name = luigi.Parameter(default="default", description="the name of the process "
"grouping, only encoded into output paths when changed, default: default")
bins_in_x_axis = luigi.BoolParameter(default=False, description="whether to show in the x axis "
"bin numbers instead of the feature value, default: False")
plot_systematics = luigi.BoolParameter(default=True, description="whether plot systematics, "
"default: True")
fixed_colors = luigi.BoolParameter(default=False, description="whether to use fixed colors "
"for plotting, default: False")
log_y = luigi.BoolParameter(default=False, description="set logarithmic scale for Y axis, "
"default: False")
include_fit = luigi.Parameter(default="", description="fit to be included in the plots, "
"default: None")
propagate_syst_qcd = luigi.BoolParameter(default=False, description="whether to propagate systematics to qcd background, "
"default: False")
# # optimization parameters
# bin_opt_version = luigi.Parameter(default=law.NO_STR, description="version of the binning "
# "optimization task to use, not used when empty, default: empty")
# n_start_bins = luigi.IntParameter(default=10, description="number of bins to be used "
# "as initial value for scans, default: 10")
# threshold = luigi.FloatParameter(default=-1., description="threshold per bin, "
# "default: -1.")
# threshold_method = luigi.ChoiceParameter(default="yield",
# choices=("yield", "raw_events", "tt_dy", "tt_and_dy", "tt_dy_and_yield",
# "tt_and_dy_and_yield", "rel_error"),
# significant=False, description="threshold method to be used, default: yield")
# region_to_optimize = luigi.Parameter(default=law.NO_STR, description="region used for the "
# "optimization, default: same region as plot")
# Not implemented yet
bin_opt_version = None
# remove_horns = False
default_process_group_name = "default"
additional_scaling = {"dummy": 1} # Temporary fix, the DictParameter fails when empty
def __init__(self, *args, **kwargs):
super(FeaturePlot, self).__init__(*args, **kwargs)
# select processes and datasets
assert self.process_group_name in self.config.process_group_names
assert not (self.do_qcd and self.do_sideband)
self.processes_datasets = {}
self.datasets_to_run = []
def get_processes(dataset=None, process=None):
processes = ObjectCollection()
if dataset and not process:
process = self.config.processes.get(dataset.process.name)
processes.append(process)
if process.parent_process:
processes += get_processes(process=self.config.processes.get(
process.parent_process))
return processes
for dataset in self.datasets:
processes = get_processes(dataset=dataset)
filtered_processes = ObjectCollection()
for process in processes:
if process.name in self.config.process_group_names[self.process_group_name]:
filtered_processes.append(process)
if len(filtered_processes) > 1:
raise Exception("%s process group name includes not orthogonal processes %s"
% (self.process_group_name, ", ".join(filtered_processes.names)))
elif len(filtered_processes) == 1:
process = filtered_processes[0]
if process not in self.processes_datasets:
self.processes_datasets[process] = []
self.processes_datasets[process].append(dataset)
self.datasets_to_run.append(dataset)
if len(self.datasets_to_run) == 0:
raise ValueError("No datasets were selected. Are you sure you want to use"
" %s as process_group_name?" % self.process_group_name)
# get QCD regions when requested
self.qcd_regions = None
if self.do_qcd:
wp = self.qcd_wp if self.qcd_wp != law.NO_STR else ""
self.qcd_regions = self.config.get_qcd_regions(region=self.region, category=self.category,
wp=wp, shape_region=self.shape_region, signal_region_wp=self.qcd_signal_region_wp,
sym=self.qcd_sym_shape)
# complain when no data is present
if not any(dataset.process.isData for dataset in self.datasets):
raise Exception("no real dataset passed for QCD estimation")
self.sideband_regions = None
if self.do_sideband: # Several fixes may be needed later for this
assert self.region.name == "signal"
self.sideband_regions = {key: self.config.regions.get(key)
for key in ["signal", "background"]}
# obtain the list of systematics that apply to the normalization only if this is done
self.norm_syst_list = []
weights = self.config.weights.total_events_weights
for weight in weights:
try:
feature = self.config.features.get(weight)
for syst in feature.systematics:
if syst not in self.norm_syst_list:
self.norm_syst_list.append(syst)
except: # weight not defined as a feature -> no syst available
continue
[docs] def requires(self):
"""
All requirements needed:
* Histograms coming from the PrePlot task.
* Number of total events coming from the MergeCategorizationStats task \
(to normalize MC histograms).
* If estimating QCD, FeaturePlot for the three additional QCD regions needed.
"""
reqs = {}
reqs["data"] = OrderedDict(
((dataset.name, category.name), PrePlot.vreq(self,
dataset_name=dataset.name, category_name=self.get_data_category(category).name))
for dataset, category in itertools.product(
self.datasets_to_run, self.expand_category())
)
if not self.avoid_normalization:
reqs["stats"] = OrderedDict()
for dataset in self.datasets_to_run:
if dataset.process.isData:
continue
reqs["stats"][dataset.name] = {}
for elem in ["central"] + [f"{syst}_{d}"
for (syst, d) in itertools.product(self.norm_syst_list, directions)]:
syst = ""
d = ""
if "_" in elem:
syst = elem.split("_")[0]
d = elem.split("_")[1]
if dataset.get_aux("secondary_dataset", None):
reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self,
dataset_name=dataset.get_aux("secondary_dataset"),
systematic=syst, systematic_direction=d)
else:
reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self,
dataset_name=dataset.name, systematic=syst, systematic_direction=d)
if self.do_qcd:
reqs["qcd"] = {
key: self.req(self, region_name=region.name, blinded=False, hide_data=False,
do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False,
remove_horns=False,_exclude=["feature_tags", "shape_region",
"qcd_category_name", "qcd_sym_shape", "qcd_signal_region_wp"])
for key, region in self.qcd_regions.items()
}
if self.qcd_category_name != "default": # to be fixed
reqs["qcd"]["ss_iso"] = FeaturePlot.vreq(self,
region_name=self.qcd_regions.ss_iso.name, category_name=self.qcd_category_name,
blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True,
save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,),
bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags",
"shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version",
"qcd_signal_region_wp"])
# reqs["qcd"]["os_inviso"] = FeaturePlot.vreq(self,
# region_name=self.qcd_regions.os_inviso.name, category_name=self.qcd_category_name,
# blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True,
# save_pdf=True, save_yields=True, feature_names=(self.qcd_feature,),
# remove_horns=self.remove_horns, _exclude=["feature_tags", "bin_opt_version"])
reqs["qcd"]["ss_inviso"] = FeaturePlot.vreq(self,
region_name=self.qcd_regions.ss_inviso.name,
category_name=self.qcd_category_name,
blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True,
save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,),
bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags",
"shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version",
"qcd_signal_region_wp"])
if self.do_sideband:
reqs["sideband"] = {
key: self.req(self, region_name=region.name, blinded=False, hide_data=False,
do_sideband=False, stack=True, save_root=True, save_pdf=False, save_yields=False,
remove_horns=False,_exclude=["feature_tags", "shape_region",
"qcd_category_name", "qcd_sym_shape", "qcd_signal_region_wp"])
for key, region in self.sideband_regions.items()
}
if self.optimization_method:
from cmt.base_tasks.optimization import BayesianBlocksOptimization
reqs["bin_opt"] = BayesianBlocksOptimization.vreq(self, plot_systematics=False)
if self.include_fit:
import yaml
from cmt.utils.yaml_utils import ordered_load
with open(self.retrieve_file("config/{}.yaml".format(self.include_fit))) as f:
fit_params = ordered_load(f, yaml.SafeLoader)
from cmt.base_tasks.analysis import Fit
params = ", ".join([f"{param}='{value}'"
for param, value in fit_params.items() if param != "fit_parameters"])
if "fit_parameters" in fit_params:
params += ", fit_parameters={" + ", ".join([f"'{param}': '{value}'"
for param, value in fit_params["fit_parameters"].items()]) + "}"
reqs["fit"] = eval(f"Fit.vreq(self, {params}, _exclude=['include_fit'])")
return reqs
[docs] def get_output_postfix(self, key="pdf"):
"""
:return: string to be included in the output filenames
:rtype: str
"""
postfix = super(FeaturePlot, self).get_output_postfix()
if self.process_group_name != self.default_process_group_name:
postfix += "__pg_" + self.process_group_name
if self.do_qcd:
postfix += "__qcd"
if self.do_sideband:
postfix += "__sideband"
if self.hide_data:
postfix += "__nodata"
elif self.blinded and key not in ("root", "yields"):
postfix += "__blinded"
if self.stack and key not in ("root", "yields"):
postfix += "__stack"
if self.include_fit:
postfix += "__withfit"
if self.log_y and key not in ("root", "yields"):
postfix += "__log"
return postfix
[docs] def output(self):
"""
Output files to be filled: pdf, png, root or json
"""
# output definitions, i.e. key, file prefix, extension
output_data = []
if self.save_pdf:
output_data.append(("pdf", "", "pdf"))
if self.save_png:
output_data.append(("png", "", "png"))
if self.save_root:
output_data.append(("root", "root/", "root"))
if self.save_yields:
output_data.append(("yields", "yields/", "json"))
return {
key: law.SiblingFileCollection(OrderedDict(
(feature.name, self.local_target("{}{}{}.{}".format(
prefix, feature.name, self.get_output_postfix(key), ext)))
for feature in self.features
))
for key, prefix, ext in output_data
}
[docs] def complete(self):
"""
Task is completed when all output are present
"""
return ConfigTaskWithCategory.complete(self)
[docs] def setup_signal_hist(self, hist, color):
"""
Method to apply signal format to an histogram
"""
hist.hist_type = "signal"
hist.legend_style = "l"
hist.SetLineColor(color)
hist.SetLineWidth(2)
[docs] def setup_background_hist(self, hist, color):
"""
Method to apply background format to an histogram
"""
hist.hist_type = "background"
if self.stack:
hist.SetLineColor(ROOT.kBlack)
hist.SetLineWidth(1)
hist.SetFillColor(color)
hist.legend_style = "f"
else:
hist.SetLineColor(color)
hist.legend_style = "l"
hist.SetLineWidth(2)
[docs] def setup_data_hist(self, hist, color):
"""
Method to apply data format to an histogram
"""
hist.legend_style = "lp"
hist.hist_type = "data"
hist.SetMarkerStyle(20)
hist.SetMarkerColor(color)
hist.SetLineColor(color)
hist.SetBinErrorOption((ROOT.TH1.kPoisson if self.stack else ROOT.TH1.kNormal))
def get_norm_systematics(self):
return self.config.get_norm_systematics(self.processes_datasets, self.region)
[docs] def plot(self, feature, ifeat=0):
"""
Performs the actual plotting.
"""
# helper to extract the qcd shape in a region
def get_qcd(region, files, syst='', bin_limit=0.):
d_hist = files[region].Get("histograms/" + self.data_names[0])
if not d_hist:
raise Exception("data histogram '{}' not found for region '{}' in tfile {}".format(
self.data_names[0], region, files[region]))
b_hists = []
for b_name in self.background_names:
b_hist = files[region].Get("histograms/" + b_name + syst)
if not b_hist:
raise Exception("background histogram '{}' not found in region '{}'".format(
b_name, region))
b_hists.append(b_hist)
qcd_hist = d_hist.Clone(randomize("qcd_" + region + syst))
for hist in b_hists:
qcd_hist.Add(hist, -1.)
# removing negative bins
for ibin in range(1, qcd_hist.GetNbinsX() + 1):
if qcd_hist.GetBinContent(ibin) < bin_limit:
qcd_hist.SetBinContent(ibin, 1.e-6)
return qcd_hist
# helper to extract data and bkg histos in sideband and signal regions
def get_sideband(region, files, bin_limit=0.):
d_hist = files[region].Get("histograms/" + self.data_names[0]).Clone(
randomize("bkg_" + region))
if not d_hist:
raise Exception("data histogram '{}' not found for region '{}' in tfile {}".format(
self.data_names[0], region, files[region]))
b_hist = None
for b_name in self.background_names:
if not b_hist:
b_hist = files[region].Get("histograms/" + b_name).Clone(
randomize("bkg_" + region))
else:
b_hist.Add(files[region].Get("histograms/" + b_name))
return d_hist, b_hist
def get_integral_and_error(hist):
error = c_double(0.)
integral = hist.IntegralAndError(0, hist.GetNbinsX() + 1, error)
error = error.value
compatible = True if integral <= 0 else False
# compatible = True if integral - error <= 0 else False
return integral, error, compatible
def get_ratio(num, den):
if den == 0:
return 0
return num / den
# I think these are for the SIGNAL REGION ONLY
background_hists = self.histos["background"]
signal_hists = self.histos["signal"]
data_hists = self.histos["data"]
all_hists = self.histos["all"]
if self.plot_systematics:
bkg_histo_syst = self.histos["bkg_histo_syst"]
binning_args, y_axis_adendum = self.get_binning(feature, ifeat)
x_title = (str(feature.get_aux("x_title"))
+ (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else ""))
y_title = ("Events" if self.stack else "Normalized Events") + y_axis_adendum
hist_title = "; %s; %s" % (x_title, y_title)
# qcd shape files
qcd_shape_files = None
if self.do_qcd:
qcd_shape_files = {}
for key, region in self.qcd_regions.items():
if self.qcd_category_name != "default":
my_feature = (feature.name if "shape" in key or key == "os_inviso"
else self.qcd_feature)
else:
my_feature = feature.name
qcd_shape_files[key] = ROOT.TFile.Open(self.input()["qcd"][key]["root"].targets[my_feature].path)
# do the qcd extrapolation
if "shape" in qcd_shape_files:
qcd_hist = get_qcd("shape", qcd_shape_files).Clone(randomize("qcd"))
n_qcd_hist, n_qcd_hist_error, n_qcd_hist_compatible = get_integral_and_error(qcd_hist)
if not n_qcd_hist_compatible:
qcd_hist.Scale(1. / n_qcd_hist)
else: #sym shape
qcd_hist = get_qcd("shape1", qcd_shape_files).Clone(randomize("qcd"))
qcd_hist2 = get_qcd("shape2", qcd_shape_files).Clone(randomize("qcd"))
n_qcd_hist1, n_qcd_hist1_error, n_qcd_hist1_compatible = get_integral_and_error(qcd_hist)
n_qcd_hist2, n_qcd_hist2_error, n_qcd_hist2_compatible = get_integral_and_error(qcd_hist2)
qcd_hist.Scale(1. / n_qcd_hist1)
qcd_hist2.Scale(1. / n_qcd_hist2)
qcd_hist.Add(qcd_hist2)
qcd_hist.Scale(0.5)
n_os_inviso, n_os_inviso_error, n_os_inviso_compatible = get_integral_and_error(
get_qcd("os_inviso", qcd_shape_files, bin_limit=-999)) # C
n_ss_iso, n_ss_iso_error, n_ss_iso_compatible = get_integral_and_error(
get_qcd("ss_iso", qcd_shape_files, bin_limit=-999)) # B
n_ss_inviso, n_ss_inviso_error, n_ss_inviso_compatible = get_integral_and_error(
get_qcd("ss_inviso", qcd_shape_files, bin_limit=-999)) # D
# if not n_ss_iso or not n_ss_inviso:
if n_os_inviso_compatible or n_ss_iso_compatible or n_ss_inviso_compatible:
print("****WARNING: QCD normalization failed (negative yield), Removing QCD!")
qcd_scaling = 0.
# qcd_inviso = 0.
# qcd_inviso_error = 0.
qcd_hist.Scale(qcd_scaling)
else:
# qcd_inviso = n_os_inviso / n_ss_inviso # C/D
# qcd_inviso_error = qcd_inviso * math.sqrt(
# (n_os_inviso_error / n_os_inviso) * (n_os_inviso_error / n_os_inviso)
# + (n_ss_inviso_error / n_ss_inviso) * (n_ss_inviso_error / n_ss_inviso)
# # + 4 * (n_ss_inviso_error / n_ss_inviso) * (n_ss_inviso_error / n_ss_inviso)
# )
qcd_scaling = n_os_inviso * n_ss_iso / n_ss_inviso # C*B/D
os_inviso_rel_error = n_os_inviso_error / n_os_inviso
ss_iso_rel_error = n_ss_iso_error / n_ss_iso
ss_inviso_rel_error = n_ss_inviso_error / n_ss_inviso
new_errors_sq = []
for ib in range(1, qcd_hist.GetNbinsX() + 1):
if qcd_hist.GetBinContent(ib) > 0:
bin_rel_error = qcd_hist.GetBinError(ib) / qcd_hist.GetBinContent(ib)
else:
bin_rel_error = 0
new_errors_sq.append(bin_rel_error * bin_rel_error
+ os_inviso_rel_error * os_inviso_rel_error
+ ss_iso_rel_error * ss_iso_rel_error
+ ss_inviso_rel_error * ss_inviso_rel_error)
qcd_hist.Scale(qcd_scaling)
# fix errors
for ib in range(1, qcd_hist.GetNbinsX() + 1):
qcd_hist.SetBinError(ib, qcd_hist.GetBinContent(ib)
* math.sqrt(new_errors_sq[ib - 1]))
# store and style
yield_error = c_double(0.)
qcd_hist.cmt_yield = qcd_hist.IntegralAndError(
0, qcd_hist.GetNbinsX() + 1, yield_error)
qcd_hist.cmt_yield_error = yield_error.value
qcd_hist.cmt_bin_yield = []
qcd_hist.cmt_bin_yield_error = []
for ibin in range(1, qcd_hist.GetNbinsX() + 1):
qcd_hist.cmt_bin_yield.append(qcd_hist.GetBinContent(ibin))
qcd_hist.cmt_bin_yield_error.append(qcd_hist.GetBinError(ibin))
qcd_hist.cmt_scale = 1.
qcd_hist.cmt_process_name = "qcd"
qcd_hist.process_label = "QCD"
qcd_hist.SetTitle("QCD")
qcd_c = tuple([255, 87, 215])
qcd_color = ROOT.TColor.GetColor(*qcd_c)
self.setup_background_hist(qcd_hist, qcd_color)
background_hists.append(qcd_hist)
all_hists.append(qcd_hist)
if self.propagate_syst_qcd:
print("****INFO: Propagating shape uncertainties to QCD")
for syst_dir in self.histos["shape"].keys():
syst_dir_name = "_{}".format(syst_dir)
# do the qcd extrapolation
if "shape" in qcd_shape_files:
qcd_hist = get_qcd("shape", qcd_shape_files, syst=syst_dir_name).Clone(randomize("qcd"))
n_qcd_hist, n_qcd_hist_error, n_qcd_hist_compatible = get_integral_and_error(qcd_hist)
if not n_qcd_hist_compatible:
qcd_hist.Scale(1. / n_qcd_hist)
else: #sym shape
qcd_hist = get_qcd("shape1", qcd_shape_files, syst=syst_dir_name).Clone(randomize("qcd"))
qcd_hist2 = get_qcd("shape2", qcd_shape_files, syst=syst_dir_name).Clone(randomize("qcd"))
n_qcd_hist1, n_qcd_hist1_error, n_qcd_hist1_compatible = get_integral_and_error(qcd_hist)
n_qcd_hist2, n_qcd_hist2_error, n_qcd_hist2_compatible = get_integral_and_error(qcd_hist2)
qcd_hist.Scale(1. / n_qcd_hist1)
qcd_hist2.Scale(1. / n_qcd_hist2)
qcd_hist.Add(qcd_hist2)
qcd_hist.Scale(0.5)
n_os_inviso, n_os_inviso_error, n_os_inviso_compatible = get_integral_and_error(
get_qcd("os_inviso", qcd_shape_files, syst=syst_dir_name, bin_limit=-999)) # C
n_ss_iso, n_ss_iso_error, n_ss_iso_compatible = get_integral_and_error(
get_qcd("ss_iso", qcd_shape_files, syst=syst_dir_name, bin_limit=-999)) # B
n_ss_inviso, n_ss_inviso_error, n_ss_inviso_compatible = get_integral_and_error(
get_qcd("ss_inviso", qcd_shape_files, syst=syst_dir_name, bin_limit=-999)) # D
# if not n_ss_iso or not n_ss_inviso:
if n_os_inviso_compatible or n_ss_iso_compatible or n_ss_inviso_compatible:
print("****WARNING: QCD normalization failed (negative yield), Removing QCD!")
qcd_scaling = 0.
qcd_hist.Scale(qcd_scaling)
else:
qcd_scaling = n_os_inviso * n_ss_iso / n_ss_inviso # C*B/D
os_inviso_rel_error = n_os_inviso_error / n_os_inviso
ss_iso_rel_error = n_ss_iso_error / n_ss_iso
ss_inviso_rel_error = n_ss_inviso_error / n_ss_inviso
new_errors_sq = []
for ib in range(1, qcd_hist.GetNbinsX() + 1):
if qcd_hist.GetBinContent(ib) > 0:
bin_rel_error = qcd_hist.GetBinError(ib) / qcd_hist.GetBinContent(ib)
else:
bin_rel_error = 0
new_errors_sq.append(bin_rel_error * bin_rel_error
+ os_inviso_rel_error * os_inviso_rel_error
+ ss_iso_rel_error * ss_iso_rel_error
+ ss_inviso_rel_error * ss_inviso_rel_error)
qcd_hist.Scale(qcd_scaling)
# fix errors
for ib in range(1, qcd_hist.GetNbinsX() + 1):
qcd_hist.SetBinError(ib, qcd_hist.GetBinContent(ib)
* math.sqrt(new_errors_sq[ib - 1]))
qcd_hist.cmt_process_name = "qcd"
self.histos["shape"][syst_dir].append(qcd_hist)
# sideband files
sideband_files = None
if self.do_sideband:
sideband_files = {}
for key, region in self.sideband_regions.items():
my_feature = feature.name
sideband_files[key] = ROOT.TFile.Open(
self.input()["sideband"][key]["root"].targets[my_feature].path)
# do the qcd extrapolation
data_hist, bkg_hist_background_region = get_sideband("background", sideband_files)
_, bkg_hist_signal_region = get_sideband("signal", sideband_files)
n_bkg_background, n_bkg_background_error, _ = get_integral_and_error(
bkg_hist_background_region)
n_bkg_signal, n_bkg_signal_error, _ = get_integral_and_error(
bkg_hist_signal_region)
bkg_hist = data_hist.Clone(randomize("bkg"))
n_bkg_background_rel_error = get_ratio(n_bkg_background_error, n_bkg_background)
n_bkg_signal_rel_error = get_ratio(n_bkg_signal_error, n_bkg_signal)
new_errors_sq = []
for ib in range(1, bkg_hist.GetNbinsX() + 1):
if bkg_hist.GetBinContent(ib) > 0:
bin_rel_error = bkg_hist.GetBinError(ib) / bkg_hist.GetBinContent(ib)
else:
bin_rel_error = 0
new_errors_sq.append(bin_rel_error * bin_rel_error
+ n_bkg_background_rel_error * n_bkg_background_rel_error
+ n_bkg_signal_rel_error * n_bkg_signal_rel_error)
bkg_hist.Scale(n_bkg_signal / n_bkg_background)
# fix errors
for ib in range(1, bkg_hist.GetNbinsX() + 1):
bkg_hist.SetBinError(ib, bkg_hist.GetBinContent(ib)
* math.sqrt(new_errors_sq[ib - 1]))
# store and style
yield_error = c_double(0.)
bkg_hist.cmt_yield = bkg_hist.IntegralAndError(
0, bkg_hist.GetNbinsX() + 1, yield_error)
bkg_hist.cmt_yield_error = yield_error.value
bkg_hist.cmt_bin_yield = []
bkg_hist.cmt_bin_yield_error = []
for ibin in range(1, bkg_hist.GetNbinsX() + 1):
bkg_hist.cmt_bin_yield.append(bkg_hist.GetBinContent(ibin))
bkg_hist.cmt_bin_yield_error.append(bkg_hist.GetBinError(ibin))
bkg_hist.cmt_scale = 1.
bkg_hist.cmt_process_name = "background"
bkg_hist.process_label = "Background"
bkg_hist.SetTitle("Background")
try:
bkg_color = ROOT.TColor.GetColor(*self.config.processes.get("background").color)
except: # background process not defined in config
bkg_color = ROOT.kRed
self.setup_background_hist(bkg_hist, bkg_color)
background_hists = [bkg_hist]
all_hists.append(bkg_hist)
if not self.hide_data:
all_hists += data_hists
bkg_histo = None
data_histo = None
if not self.stack:
for hist in all_hists:
scale = 1. / (hist.Integral() or 1.)
hist.Scale(scale)
hist.scale = scale
draw_hists = all_hists[::-1]
else:
background_stack = ROOT.THStack(randomize("stack"), "")
for hist in background_hists[::-1]:
# hist.SetFillColor(ROOT.kRed)
background_stack.Add(hist)
if not bkg_histo:
bkg_histo = hist.Clone()
else:
bkg_histo.Add(hist.Clone())
background_stack.hist_type = "background"
# Normalize signal histograms to background sum
if self.normalize_signals and bkg_histo:
for hist in signal_hists:
signal_yield = hist.cmt_yield
scale = (bkg_histo.Integral() / signal_yield if signal_yield != 0 else 1.)
hist.Scale(scale)
hist.cmt_scale = scale
draw_hists = [background_stack] + signal_hists[::-1]
if not self.hide_data:
# blinding
blinded_range = feature.get_aux("blinded_range", None)
if blinded_range and self.blinded:
for hist in data_hists:
for ib in range(1, hist.GetNbinsX() + 1):
blind = False
if isinstance(blinded_range[0], list):
for iblinded_range in blinded_range:
if (hist.GetBinCenter(ib) >= iblinded_range[0] and
hist.GetBinCenter(ib) <= iblinded_range[1]):
blind = True
break
else:
if (hist.GetBinCenter(ib) >= blinded_range[0] and
hist.GetBinCenter(ib) <= blinded_range[1]):
blind = True
if blind:
hist.SetBinContent(ib, -999)
hist.SetBinError(ib, 0)
draw_hists.extend(data_hists[::-1])
for hist in data_hists:
if not data_histo:
data_histo = hist.Clone()
else:
data_histo.Add(hist.Clone())
entries = [(hist, hist.process_label, hist.legend_style) for hist in all_hists]
n_entries = len(entries)
if n_entries <= 4:
n_cols = 1
elif n_entries <= 8:
n_cols = 2
else:
n_cols = 3
col_width = 0.125
n_rows = math.ceil(n_entries / float(n_cols))
row_width = 0.06
legend_x2 = 0.88
legend_x1 = legend_x2 - n_cols * col_width
legend_y2 = 0.88
legend_y1 = legend_y2 - n_rows * row_width
legend = ROOT.TLegend(legend_x1, legend_y1, legend_x2, legend_y2)
legend.SetBorderSize(0)
legend.SetNColumns(n_cols)
for entry in entries:
legend.AddEntry(*entry)
dummy_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args)
# Draw
if self.hide_data or len(data_hists) == 0 or len(background_hists) == 0 or not self.stack:
do_ratio = False
c = Canvas()
if self.log_y:
c.SetLogy()
label_scaling = 1
else:
do_ratio = True
c = RatioCanvas()
dummy_hist.GetXaxis().SetLabelOffset(100)
dummy_hist.GetXaxis().SetTitleOffset(100)
c.get_pad(1).cd()
if self.log_y:
c.get_pad(1).SetLogy()
label_scaling = 5./4.
# r.setup_hist(dummy_hist, pad=c.get_pad(1))
r.setup_hist(dummy_hist)
if do_ratio:
r.setup_y_axis(dummy_hist.GetYaxis(), pad=c.get_pad(1))
dummy_hist.GetYaxis().SetMaxDigits(4)
dummy_hist.GetYaxis().SetTitleOffset(1.22)
maximum = max([hist.GetMaximum() for hist in draw_hists])
if self.log_y:
dummy_hist.SetMaximum(100 * maximum)
dummy_hist.SetMinimum(0.0011)
else:
dummy_hist.SetMaximum(1.35 * maximum)
dummy_hist.SetMinimum(0.001)
inner_text=[self.category.label + " category"]
if self.region:
if isinstance(self.region.label, list):
inner_text += self.region.label
else:
inner_text.append(self.region.label)
if self.normalize_signals and self.stack and signal_hists and bkg_histo:
scale_text = []
for hist in signal_hists:
scale = hist.cmt_scale
if scale != 1.:
if scale < 100:
scale = "{:.1f}".format(scale)
elif scale < 10000:
scale = "{}".format(int(round(scale)))
else:
scale = "{:.2e}".format(scale).replace("+0", "").replace("+", "")
scale_text.append("{} x{}".format(hist.process_label, scale))
inner_text.append("#scale[0.75]{{{}}}".format(", ".join(scale_text)))
if maximum > 1e4 and not self.log_y:
upper_left_offset = 0.05
else:
upper_left_offset = 0.0
if not (self.hide_data or not len(data_hists) > 0) or self.stack:
upper_right="{}, {} TeV ({:.1f} ".format(
self.config.year,
self.config.ecm,
self.config.lumi_fb
) + "fb^{-1})"
else:
upper_right="{} Simulation ({} TeV)".format(
self.config.year,
self.config.ecm,
)
m = max([hist.GetMaximum() for hist in draw_hists]) if not self.log_y else None
draw_labels = get_labels(
upper_left_offset=upper_left_offset,
upper_right=upper_right,
scaling=label_scaling,
inner_text=inner_text,
max=m
)
dummy_hist.Draw()
# Draw fit if required
if self.include_fit:
with open(self.input()["fit"][feature.name]["json"].path) as f:
d = json.load(f)
d = d[""]
x_range = self.requires()["fit"].x_range
x = ROOT.RooRealVar("x", "x", float(x_range[0]), float(x_range[1]))
l = ROOT.RooArgList(x)
xframe = x.frame();
if self.requires()["fit"].method == "voigtian":
mean = ROOT.RooRealVar('mean', 'Mean of Voigtian', float(d["mean"]))
sigma = ROOT.RooRealVar('sigma', 'Sigma of Voigtian', float(d["sigma"]))
gamma = ROOT.RooRealVar('gamma', 'Gamma of Voigtian', float(d["gamma"]))
fit = ROOT.RooVoigtian("fit", "fit", x, mean, gamma, sigma)
elif self.requires()["fit"].method == "polynomial":
order = int(self.requires()["fit"].fit_parameters.get("order", 1))
params = [ROOT.RooRealVar(f'p{i}', f'p{i}', float(d[f'p{i}'])) for i in range(order)]
fit = ROOT.RooPolynomial("fit", "fit", x, ROOT.RooArgList(*params))
elif self.requires()["fit"].method == "exponential":
p = ROOT.RooRealVar("c", "c", float(d["c"]))
fit = ROOT.RooExponential("fit", "fit", x, p)
elif self.requires()["fit"].method == "powerlaw":
order = int(self.requires()["fit"].fit_parameters.get("order", 1))
params = [x]
for i in range(order):
params.append(ROOT.RooRealVar(f'a{i}', f'a{i}', d[f"a{i}"]))
params.append(ROOT.RooRealVar(f'b{i}', f'b{i}', d[f"b{i}"]))
fun = " + ".join([f"@{i + 1} * TMath::Power(@0, @{i + 2})"
for i in range(0, order, 2)])
fit = ROOT.RooGenericPdf("powerlaw", fun, ROOT.RooArgList(*params))
if self.stack:
process_name = self.requires()["fit"].process_name
for hist in all_hists:
if hist.cmt_process_name == process_name:
data = ROOT.RooDataHist("data_obs", "data_obs", l, hist)
data.plotOn(xframe,
ROOT.RooFit.MarkerColor(ROOT.TColor.GetColorTransparent(0, 1)),
ROOT.RooFit.LineColor(ROOT.TColor.GetColorTransparent(0, 1)))
fit.plotOn(xframe)
xframe.Draw("same");
for ih, hist in enumerate(draw_hists):
option = "HIST,SAME" if hist.hist_type != "data" else "PEZ,SAME"
hist.Draw(option)
for label in draw_labels:
label.Draw("same")
legend.Draw("same")
if not (self.hide_data or len(data_hists) == 0 or len(background_hists) == 0
or not self.stack):
dummy_ratio_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args)
r.setup_hist(dummy_ratio_hist, pad=c.get_pad(2),
props={"Minimum": 0.25, "Maximum": 1.75})
r.setup_y_axis(dummy_ratio_hist.GetYaxis(), pad=c.get_pad(2),
props={"Ndivisions": 3})
dummy_ratio_hist.GetYaxis().SetTitle("Data / MC")
dummy_ratio_hist.GetXaxis().SetTitleOffset(3)
dummy_ratio_hist.GetYaxis().SetTitleOffset(1.22)
data_graph = hist_to_graph(data_histo, remove_zeros=False, errors=True,
asymm=True, overflow=False, underflow=False,
attrs=["cmt_process_name", "cmt_hist_type", "cmt_legend_style"])
bkg_graph = hist_to_graph(bkg_histo, remove_zeros=False, errors=True,
asymm=True, overflow=False, underflow=False,
attrs=["cmt_process_name", "cmt_hist_type", "cmt_legend_style"])
ratio_graph = ROOT.TGraphAsymmErrors(binning_args[0])
mc_unc_graph = ROOT.TGraphErrors(binning_args[0])
r.setup_graph(ratio_graph, props={"MarkerStyle": 20, "MarkerSize": 0.5})
r.setup_graph(mc_unc_graph, props={"FillStyle": 3004, "LineColor": 0,
"MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kGray + 2})
if self.plot_systematics:
syst_graph = hist_to_graph(bkg_histo_syst, remove_zeros=False, errors=True,
asymm=True, overflow=False, underflow=False,
attrs=["cmt_process_name", "cmt_hist_type", "cmt_legend_style"])
syst_unc_graph = ROOT.TGraphErrors(binning_args[0])
r.setup_graph(syst_unc_graph, props={"FillStyle": 3005, "LineColor": 0,
"MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kRed + 2})
all_unc_graph = ROOT.TGraphErrors(binning_args[0])
r.setup_graph(all_unc_graph, props={"FillStyle": 3007, "LineColor": 0,
"MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kBlue + 2})
for i in range(binning_args[0]):
x, d, b = c_double(0.), c_double(0.), c_double(0.)
data_graph.GetPoint(i, x, d)
bkg_graph.GetPoint(i, x, b)
d = d.value
b = b.value
if d == EMPTY or b == 0:
ratio_graph.SetPoint(i, x, EMPTY)
else:
ratio_graph.SetPoint(i, x, d / b)
ratio_graph.SetPointEYhigh(i, data_graph.GetErrorYhigh(i) / b)
ratio_graph.SetPointEYlow(i, data_graph.GetErrorYlow(i) / b)
# set the mc uncertainty
if b == 0:
mc_unc_graph.SetPoint(i, x, EMPTY)
else:
mc_unc_graph.SetPoint(i, x, 1.)
mc_error = bkg_graph.GetErrorYhigh(i) / b
mc_unc_graph.SetPointError(i, dummy_ratio_hist.GetBinWidth(i + 1) / 2.,
mc_error)
if self.plot_systematics:
# syst only
syst_unc_graph.SetPoint(i, x, 1.)
syst_error = syst_graph.GetErrorYhigh(i) / b
syst_unc_graph.SetPointError(i, dummy_ratio_hist.GetBinWidth(i + 1) / 2.,
syst_error)
# syst + stat
all_unc_graph.SetPoint(i, x, 1.)
tot_unc = math.sqrt(mc_error ** 2 + syst_error ** 2)
all_unc_graph.SetPointError(i, dummy_ratio_hist.GetBinWidth(i + 1) / 2.,
tot_unc)
c.get_pad(2).cd()
dummy_ratio_hist.Draw()
mc_unc_graph.Draw("2,SAME")
if not self.hide_data:
ratio_graph.Draw("PEZ,SAME")
if self.plot_systematics:
# syst_unc_graph.Draw("2,SAME")
all_unc_graph.Draw("2,SAME")
lines = []
for y in [0.5, 1.0, 1.5]:
if isinstance(feature.binning, tuple):
l = ROOT.TLine(binning_args[1], y, binning_args[2], y)
else:
l = ROOT.TLine(binning_args[1][0], y, binning_args[1][-1], y)
r.setup_line(l, props={"NDC": False, "LineStyle": 3, "LineWidth": 1,
"LineColor": 1})
lines.append(l)
for line in lines:
line.Draw("same")
outputs = []
if self.save_png:
outputs.append(self.output()["png"].targets[feature.name].path)
if self.save_pdf:
outputs.append(self.output()["pdf"].targets[feature.name].path)
for output in outputs:
c.SaveAs(create_file_dir(output))
if self.save_root:
f = ROOT.TFile.Open(create_file_dir(
self.output()["root"].targets[feature.name].path), "RECREATE")
f.cd()
# c.Write("canvas") #FIXME
hist_dir = f.mkdir("histograms")
hist_dir.cd()
for hist in all_hists:
hist.Write(hist.cmt_process_name)
if bkg_histo:
bkg_histo.Write("background")
if self.store_systematics:
for syst_dir, shape_hists in self.histos["shape"].items():
for hist in shape_hists:
hist.Write("%s_%s" % (hist.cmt_process_name, syst_dir))
f.Close()
if self.save_yields:
yields = OrderedDict()
for hist in signal_hists + background_hists + data_hists:
yields[hist.cmt_process_name] = {
"Total yield": hist.cmt_yield,
"Total yield error": hist.cmt_yield_error,
"Entries": getattr(hist, "cmt_entries", hist.GetEntries()),
"Binned yield": hist.cmt_bin_yield,
"Binned yield error": hist.cmt_bin_yield_error,
}
if bkg_histo:
yields["background"] = {
"Total yield": sum([hist.cmt_yield for hist in background_hists]),
"Total yield error": math.sqrt(sum([(hist.cmt_yield_error)**2
for hist in background_hists])),
"Entries": getattr(bkg_histo, "cmt_entries", bkg_histo.GetEntries()),
"Binned yield": [sum([hist.cmt_bin_yield[i] for hist in background_hists])
for i in range(0, background_hists[0].GetNbinsX())],
"Binned yield error": [math.sqrt(sum([(hist.cmt_bin_yield_error[i])**2
for hist in background_hists]))
for i in range(0, background_hists[0].GetNbinsX())],
}
with open(create_file_dir(self.output()["yields"].targets[feature.name].path), "w") as f:
json.dump(yields, f, indent=4)
def get_nevents(self):
nevents = {}
for iproc, (process, datasets) in enumerate(self.processes_datasets.items()):
if not process.isData and not self.avoid_normalization:
for dataset in datasets:
nevents[dataset.name] = {}
for elem in ["central"] + [f"{syst}_{d}"
for (syst, d) in itertools.product(self.norm_syst_list, directions)]:
inp = self.input()["stats"][dataset.name][elem]
with open(inp.path) as f:
stats = json.load(f)
if self.apply_weights:
nevents[dataset.name][elem] = stats["nweightedevents"]
else:
nevents[dataset.name][elem] = stats["nevents"]
return nevents
def get_normalization_factor(self, dataset, elem):
return dataset.xs * self.config.lumi_pb / self.nevents[dataset.name][elem]
[docs] @law.decorator.notify
@law.decorator.safe_output
def run(self):
"""
Splits processes into data, signal and background. Creates histograms from each process
loading them from the input files. Scales the histograms and applies the correct format
to them.
"""
ROOT.gStyle.SetOptStat(0)
# create root tchains for inputs
inputs = self.input()
self.data_names = [p.name for p in self.processes_datasets.keys() if p.isData]
self.signal_names = [p.name for p in self.processes_datasets.keys() if p.isSignal]
self.background_names = [p.name for p in self.processes_datasets.keys()
if not p.isData and not p.isSignal]
if self.plot_systematics:
systematics = self.get_norm_systematics()
if self.fixed_colors:
colors = list(range(2, 2 + len(self.processes_datasets.keys())))
self.nevents = self.get_nevents()
for ifeat, feature in enumerate(self.features):
self.histos = {"background": [], "signal": [], "data": [], "all": []}
binning_args, y_axis_adendum = self.get_binning(feature, ifeat)
x_title = (str(feature.get_aux("x_title"))
+ (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else ""))
y_title = ("Events" if self.stack else "Normalized Events") + y_axis_adendum
hist_title = "; %s; %s" % (x_title, y_title)
systs_directions = [("central", "")]
if self.plot_systematics:
self.histos["bkg_histo_syst"] = ROOT.TH1D(
randomize("syst"), hist_title, *binning_args)
if self.store_systematics:
self.histos["shape"] = {}
shape_systematics = self.get_systs(feature, True)
systs_directions += list(itertools.product(shape_systematics, directions))
for (syst, d) in systs_directions:
feature_name = feature.name if syst == "central" else "%s_%s_%s" % (
feature.name, syst, d)
if syst != "central":
self.histos["shape"]["%s_%s" % (syst, d)] = []
for iproc, (process, datasets) in enumerate(self.processes_datasets.items()):
if syst != "central" and process.isData:
continue
if self.do_sideband and not process.isData and not process.isSignal:
continue
process_histo = ROOT.TH1D(randomize(process.name), hist_title, *binning_args)
process_histo.process_label = str(process.label)
process_histo.cmt_process_name = process.name
process_histo.Sumw2()
for dataset in datasets:
dataset_histo = ROOT.TH1D(randomize("tmp"), hist_title, *binning_args)
dataset_histo.Sumw2()
for category in self.expand_category():
inp = inputs["data"][
(dataset.name, category.name)].collection.targets.values()
for elem in inp:
rootfile = ROOT.TFile.Open(elem.path)
histo = copy(rootfile.Get(feature_name))
rootfile.Close()
dataset_histo.Add(histo)
if not process.isData and not self.avoid_normalization:
elem = ("central" if syst == "central" or syst not in self.norm_syst_list
else f"{syst}_{d}")
if self.nevents[dataset.name][elem] != 0:
dataset_histo.Scale(self.get_normalization_factor(dataset, elem))
scaling = dataset.get_aux("scaling", None)
if scaling:
print(" ### Scaling {} histo by {} +- {}".format(
dataset.name, scaling[0], scaling[1]))
old_errors = [dataset_histo.GetBinError(ibin)\
/ dataset_histo.GetBinContent(ibin)
if dataset_histo.GetBinContent(ibin) != 0 else 0
for ibin in range(1, dataset_histo.GetNbinsX() + 1)]
new_errors = [
math.sqrt(elem ** 2 + (scaling[1] / scaling[0]) ** 2)
for elem in old_errors]
dataset_histo.Scale(scaling[0])
for ibin in range(1, dataset_histo.GetNbinsX() + 1):
dataset_histo.SetBinError(
ibin, dataset_histo.GetBinContent(ibin)
* new_errors[ibin - 1])
process_histo.Add(dataset_histo)
if self.plot_systematics and not process.isData and not process.isSignal \
and syst == "central":
dataset_histo_syst = dataset_histo.Clone()
for ibin in range(1, dataset_histo_syst.GetNbinsX() + 1):
# some datasets may not have any systematics
if dataset.name in systematics:
dataset_histo_syst.SetBinError(ibin,
float(dataset_histo.GetBinContent(ibin))\
* systematics[dataset.name]
)
self.histos["bkg_histo_syst"].Add(dataset_histo_syst)
if process.name in self.additional_scaling:
process_histo.Scale(self.additional_scaling[process.name])
yield_error = c_double(0.)
process_histo.cmt_yield = process_histo.IntegralAndError(0,
process_histo.GetNbinsX() + 1, yield_error)
process_histo.cmt_yield_error = yield_error.value
process_histo.cmt_bin_yield = []
process_histo.cmt_bin_yield_error = []
for ibin in range(1, process_histo.GetNbinsX() + 1):
process_histo.cmt_bin_yield.append(process_histo.GetBinContent(ibin))
process_histo.cmt_bin_yield_error.append(process_histo.GetBinError(ibin))
if syst == "central":
if self.fixed_colors:
color = colors[iproc]
elif type(process.color) == tuple:
color = ROOT.TColor.GetColor(*process.color)
else:
color = process.color
if process.isSignal:
self.setup_signal_hist(process_histo, color)
self.histos["signal"].append(process_histo)
elif process.isData:
self.setup_data_hist(process_histo, color)
self.histos["data"].append(process_histo)
else:
self.setup_background_hist(process_histo, color)
self.histos["background"].append(process_histo)
if not process.isData: #or not self.hide_data:
self.histos["all"].append(process_histo)
else:
self.histos["shape"]["%s_%s" % (syst, d)].append(process_histo)
self.plot(feature)
#####################################################################################################
#####################################################################################################
#####################################################################################################
class FeaturePlotSyst(FeaturePlot):
"""
Performs the histogram plotting with up and down variations due to systematics:
loads the histograms obtained in the FeaturePlot task, rescales them if needed
and plots and saves them.
Example command:
``law run FeaturePlotSyst --version test --category-name etau --config-name ul_2018 \
--process-group-name etau --feature-names lep1_pt,lep1_eta \
--workers 20 --PrePlot-workflow local --stack --hide-data False --do-qcd --region-name etau_os_iso\
--dataset-names tt_dl,tt_sl,dy_high,wjets,data_etau_a,data_etau_b,data_etau_c,data_etau_d \
--MergeCategorizationStats-version test_old``
"""
def requires(self):
"""
Needs as input the root file provided by the FeaturePlot task
"""
return FeaturePlot.vreq(self, save_root=True, stack=True)
def output(self):
"""
Output files to be filled: pdf or png
"""
output_data = []
if self.save_pdf:
output_data.append(("pdf", "", "pdf"))
if self.save_png:
output_data.append(("png", "", "png"))
process_names = [process.name for process in self.processes_datasets.keys()
if not process.isData]
if self.do_qcd:
process_names.append("qcd")
out = {
key: law.SiblingFileCollection(OrderedDict(
("%s_%s_%s" %(process_name, feature.name, syst),
self.local_target("{}{}_{}_{}{}.{}".format(
prefix, process_name, feature.name, syst, self.get_output_postfix(key), ext)))
for feature in self.features
for syst in self.get_unique_systs(self.get_systs(feature, True) \
+ self.config.get_weights_systematics(self.config.weights[self.category.name], True))
for process_name in process_names
))
for key, prefix, ext in output_data
}
return out
def setup_syst_hist(self, hist, dir):
if dir == "central":
hist.SetFillStyle(0)
hist.SetLineColor(1)
hist.legend_style = "l"
hist.SetLineWidth(2)
if dir == "up":
hist.SetFillStyle(0)
hist.SetLineColor(ROOT.kAzure)
hist.legend_style = "l"
hist.SetLineWidth(2)
# hist.SetLineStyle(4)
if dir == "down":
hist.SetFillStyle(0)
hist.SetLineColor(ROOT.kMagenta)
hist.legend_style = "l"
hist.SetLineWidth(2)
# hist.SetLineStyle(8)
def get_syst_label(self, shape_syst):
if self.config.systematics.get(shape_syst).get_aux("label"):
return self.config.systematics.get(shape_syst).get_aux("label")
else:
return shape_syst
def plot(self, feature):
for process, p_label in zip(self.process_names, self.process_labels):
# central histogram for each process
histo_syst_central = self.histos[process]["central"]
self.setup_syst_hist(histo_syst_central, "central")
for shape_syst in self.shape_syst_list:
# up and down variation histograms for each systematic
histo_syst_up = self.histos[process]["%s_up" %shape_syst]
histo_syst_down = self.histos[process]["%s_down" %shape_syst]
self.setup_syst_hist(histo_syst_up, "up")
self.setup_syst_hist(histo_syst_down, "down")
draw_hists = [histo_syst_central, histo_syst_up, histo_syst_down]
shape_syst_label = self.get_syst_label(shape_syst)
entries = [ (histo_syst_central, "Nominal", histo_syst_central.legend_style),
(histo_syst_up, shape_syst_label + " Up", histo_syst_up.legend_style),
(histo_syst_down, shape_syst_label + " Down", histo_syst_down.legend_style)]
n_entries = len(entries)
if n_entries <= 4:
n_cols = 1
elif n_entries <= 8:
n_cols = 2
else:
n_cols = 3
if len(shape_syst_label) < 5:
col_width = 0.20
elif len(shape_syst_label) < 12:
col_width = 0.30
else:
col_width = 0.40
n_rows = math.ceil(n_entries / float(n_cols))
row_width = 0.06
legend_x2 = 0.88
legend_x1 = legend_x2 - n_cols * col_width
legend_y2 = 0.88
legend_y1 = legend_y2 - n_rows * row_width
legend = ROOT.TLegend(legend_x1, legend_y1, legend_x2, legend_y2)
legend.SetBorderSize(0)
legend.SetNColumns(1)
for entry in entries:
legend.AddEntry(*entry)
binning_args, y_axis_adendum = self.get_binning(feature)
x_title = (str(feature.get_aux("x_title"))
+ (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else ""))
y_title = ("Events" if self.stack else "Normalized Events") + y_axis_adendum
hist_title = "; %s; %s" % (x_title, y_title)
dummy_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args)
# Draw
c = RatioCanvas()
dummy_hist.GetXaxis().SetLabelOffset(100)
dummy_hist.GetXaxis().SetTitleOffset(100)
c.get_pad(1).cd()
if self.log_y:
c.get_pad(1).SetLogy()
label_scaling = 5./4.
r.setup_hist(dummy_hist)
r.setup_y_axis(dummy_hist.GetYaxis(), pad=c.get_pad(1))
dummy_hist.GetYaxis().SetMaxDigits(4)
dummy_hist.GetYaxis().SetTitleOffset(1.22)
maximum = max([hist.GetMaximum() for hist in draw_hists])
if self.log_y:
dummy_hist.SetMaximum(100 * maximum)
dummy_hist.SetMinimum(0.0011)
else:
dummy_hist.SetMaximum(1.25 * maximum)
dummy_hist.SetMinimum(0.001)
inner_text=[self.category.label + " category"]
if self.region:
if isinstance(self.region.label, list):
inner_text += self.region.label
else:
inner_text.append(self.region.label)
inner_text.append(p_label)
if maximum > 1e4 and not self.log_y:
upper_left_offset = 0.05
else:
upper_left_offset = 0.0
upper_right="{} Simulation ({} TeV)".format(
self.config.year,
self.config.ecm,
)
m = max([hist.GetMaximum() for hist in draw_hists]) if not self.log_y else None
draw_labels = get_labels(
upper_left_offset=upper_left_offset,
upper_right=upper_right,
scaling=label_scaling,
inner_text=inner_text,
max=m
)
dummy_hist.Draw()
histo_syst_central.Draw("HIST,SAME")
histo_syst_up.Draw("HIST,SAME")
histo_syst_down.Draw("HIST,SAME")
for label in draw_labels:
label.Draw("same")
legend.Draw("same")
dummy_ratio_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args)
r.setup_hist(dummy_ratio_hist, pad=c.get_pad(2),
props={"Minimum": 0.75, "Maximum": 1.25})
r.setup_y_axis(dummy_ratio_hist.GetYaxis(), pad=c.get_pad(2),
props={"Ndivisions": 5})
dummy_ratio_hist.GetYaxis().SetTitle("Ratio")
dummy_ratio_hist.GetXaxis().SetTitleOffset(3)
dummy_ratio_hist.GetYaxis().SetTitleOffset(1.22)
ratio_hist_up = ROOT.TH1F(randomize("ratio_hist_up"), hist_title, *binning_args)
ratio_hist_down = ROOT.TH1F(randomize("ratio_hist_down"), hist_title, *binning_args)
self.setup_syst_hist(ratio_hist_up, "up")
self.setup_syst_hist(ratio_hist_down, "down")
ratio_hist_up.SetLineStyle(0)
ratio_hist_down.SetLineStyle(0)
mc_unc_graph = ROOT.TGraphErrors(binning_args[0])
r.setup_graph(mc_unc_graph, props={"FillStyle": 3004, "LineColor": 0,
"MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kGray + 2})
for i in range(binning_args[0]+1):
x = histo_syst_central.GetBinCenter(i)
y = histo_syst_central.GetBinContent(i)
sigma_x = histo_syst_central.GetBinWidth(i)/2.
sigma_y = histo_syst_central.GetBinError(i)
if histo_syst_central.GetBinContent(i) != 0:
ratio_hist_up.SetBinContent(i, histo_syst_up.GetBinContent(i)/histo_syst_central.GetBinContent(i))
ratio_hist_down.SetBinContent(i, histo_syst_down.GetBinContent(i)/histo_syst_central.GetBinContent(i))
mc_unc_graph.SetPoint(i, x, 1.)
mc_unc_graph.SetPointError(i, sigma_x, sigma_y/y)
else:
ratio_hist_up.SetBinContent(i, EMPTY)
ratio_hist_down.SetBinContent(i, EMPTY)
mc_unc_graph.SetPoint(i, x, EMPTY)
c.get_pad(2).cd()
dummy_ratio_hist.Draw()
ratio_hist_up.Draw("SAME")
ratio_hist_down.Draw("SAME")
mc_unc_graph.Draw("2,SAME")
lines = []
for y in [0.5, 1.0, 1.5]:
if isinstance(feature.binning, tuple):
l = ROOT.TLine(binning_args[1], y, binning_args[2], y)
else:
l = ROOT.TLine(binning_args[1][0], y, binning_args[1][-1], y)
r.setup_line(l, props={"NDC": False, "LineStyle": 3, "LineWidth": 1,
"LineColor": 1})
lines.append(l)
for line in lines:
line.Draw("same")
outputs = []
if self.save_png:
outputs.append(self.output()["png"].targets["%s_%s_%s" %(process, feature.name, shape_syst)].path)
if self.save_pdf:
outputs.append(self.output()["pdf"].targets["%s_%s_%s" %(process, feature.name, shape_syst)].path)
for output in outputs:
c.SaveAs(create_file_dir(output))
def run(self):
"""
Splits the processes into data and non-data. Per feature, loads the input histograms,
creates the output plots with up and down variations.
"""
ROOT.gStyle.SetOptStat(0)
inputs = self.input()
self.process_names = [process.name for process in self.processes_datasets.keys() if not process.isData]
self.process_labels = [process.label for process in self.processes_datasets.keys() if not process.isData]
if self.do_qcd:
self.process_names.append("qcd")
self.process_labels.append("QCD")
for feature in self.features:
self.shape_syst_list = self.get_unique_systs(self.get_systs(feature, True) \
+ self.config.get_weights_systematics(self.config.weights[self.category.name], True))
tf = ROOT.TFile.Open(inputs["root"].targets[feature.name].path)
self.histos = {}
for process in self.process_names:
self.histos[process] = {}
self.histos[process]["central"] = copy(tf.Get("histograms/%s" % process))
for shape_syst in self.shape_syst_list:
self.histos[process]["%s_up" %shape_syst] = copy(tf.Get("histograms/%s_%s_up" % (process, shape_syst)))
self.histos[process]["%s_down" %shape_syst] = copy(tf.Get("histograms/%s_%s_down" % (process, shape_syst)))
tf.Close()
self.plot(feature)
#################################################################################################################################
#################################################################################################################################
#################################################################################################################################
class BasePlot2DTask(BasePlotTask):
def get_features(self):
features = []
for feature_pair_name in self.feature_names:
try:
feature_pair_names = feature_pair_name.split(":")
except:
print("%s cannot be considered for a 2D plot" % feature_pair_name)
continue
feature_pair = []
for feature_name in feature_pair_names:
for feature in self.config.features:
if feature.name == feature_name:
feature_pair.append(feature)
break
if len(feature_pair) == 2:
features.append(tuple(feature_pair))
if len(features) == 0:
raise ValueError("No features were included. Did you spell them correctly?")
return features
def get_binning(self, feature):
binning_args = []
for feature_elem in feature:
if isinstance(feature_elem.binning, tuple):
binning_args += feature_elem.binning
else:
binning_args += [len(feature_elem.binning) - 1, array.array("f", feature_elem.binning)]
return tuple(binning_args), ""
class PrePlot2D(PrePlot, BasePlot2DTask):
def get_syst_list(self):
if self.skip_processing:
return []
syst_list = []
isMC = self.dataset.process.isMC
for feature_pair in self.features:
for feature in feature_pair:
systs = self.get_unique_systs(self.get_systs(feature, isMC))
for syst in systs:
if syst in syst_list:
continue
try:
systematic = self.config.systematics.get(syst)
if self.category.name in systematic.get_aux("affected_categories", []):
syst_list.append(syst)
except:
continue
return syst_list
def define_histograms(self, dfs, nentries):
histos = []
isMC = self.dataset.process.isMC
for feature_pair in self.features:
binning_args, _ = self.get_binning(feature_pair)
x_title = (str(feature_pair[0].get_aux("x_title"))
+ (" [%s]" % feature_pair[0].get_aux("units")
if feature_pair[0].get_aux("units") else ""))
y_title = (str(feature_pair[1].get_aux("x_title"))
+ (" [%s]" % feature_pair[1].get_aux("units")
if feature_pair[1].get_aux("units") else ""))
title = "; %s; %s; Events" % (x_title, y_title)
systs = self.get_unique_systs(self.get_systs(feature_pair[0], isMC)
+ self.get_systs(feature_pair[1], isMC) \
+ self.config.get_weights_systematics(self.config.weights[self.category.name], isMC)
)
systs_directions = [("central", "")]
if isMC and self.store_systematics:
systs_directions += list(itertools.product(systs, directions))
# loop over systematics and up/down variations
for syst_name, direction in systs_directions:
# Select the appropriate RDF (input file) depending on the syst and direction
key = "central"
if f"{syst_name}_{direction}" in dfs:
key = f"{syst_name}_{direction}"
df = dfs[key]
# apply selection if needed
feat_df = df
for ifeat, feature in enumerate(feature_pair):
if feature.selection:
feat_df = feat_df.Define(
"feat%s_selection" % ifeat, self.config.get_object_expression(
feature.selection, isMC, syst_name, direction)).Filter(
"feat%s_selection" % ifeat)
# define tag just for histograms's name
if syst_name != "central" and isMC:
tag = "_%s" % syst_name
if direction != "":
tag += "_%s" % direction
else:
tag = ""
feature_expressions = [
self.config.get_object_expression(
feature_pair[0], isMC, syst_name, direction),
self.config.get_object_expression(
feature_pair[1], isMC, syst_name, direction),
]
feature_name = "_".join([feature.name for feature in feature_pair]) + tag
hist_base = ROOT.TH2D(feature_name, title, *binning_args)
if nentries[key] > 0:
hmodel = ROOT.RDF.TH2DModel(hist_base)
histos.append(
feat_df.Define(
"weight", "{}".format(self.get_weight(
self.category.name, syst_name, direction))
).Define("var1", feature_expressions[0]).Define("var2", feature_expressions[1]
).Histo2D(hmodel, "var1", "var2", "weight")
)
else: # no entries available, append empty histogram
histos.append(hist_base)
return histos
class FeaturePlot2D(FeaturePlot, BasePlot2DTask):
"""
Performs the actual histogram plotting: loads the histograms obtained in the PrePlot2D tasks,
rescales them if needed and plots and saves them.
Example command:
``law run FeaturePlot2D --version test --category-name etau --config-name ul_2018 \
--process-group-name etau --feature-names lep1_pt:lep1_eta,Hbb_mass:Htt_svfit_mass \
--workers 20 --PrePlot2D-workflow local --stack --hide-data False --do-qcd --region-name etau_os_iso\
--dataset-names tt_dl,tt_sl,dy_high,wjets,data_etau_a,data_etau_b,data_etau_c,data_etau_d \
--MergeCategorizationStats-version test_old``
:param log_z: whether to set y axis to log scale
:type log_z: bool
"""
log_z = luigi.BoolParameter(default=False, description="set logarithmic scale for Z axis, "
"default: False")
def requires(self):
"""
All requirements needed:
* Histograms coming from the PrePlot2D task.
* Number of total events coming from the MergeCategorizationStats task \
(to normalize MC histograms).
* If estimating QCD, FeaturePlot2D for the three additional QCD regions needed.
"""
reqs = {}
reqs["data"] = OrderedDict(
((dataset.name, category.name), PrePlot2D.vreq(self,
dataset_name=dataset.name, category_name=self.get_data_category(category).name))
for dataset, category in itertools.product(
self.datasets_to_run, self.expand_category())
)
if not self.avoid_normalization:
reqs["stats"] = OrderedDict()
for dataset in self.datasets_to_run:
if dataset.process.isData:
continue
reqs["stats"][dataset.name] = {}
for elem in ["central"] + [f"{syst}_{d}"
for (syst, d) in itertools.product(self.norm_syst_list, directions)]:
syst = ""
d = ""
if "_" in elem:
syst = elem.split("_")[0]
d = elem.split("_")[1]
if dataset.get_aux("secondary_dataset", None):
reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self,
dataset_name=dataset.get_aux("secondary_dataset"),
systematic=syst, systematic_direction=d)
else:
reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self,
dataset_name=dataset.name, systematic=syst, systematic_direction=d)
if self.do_qcd:
reqs["qcd"] = {
key: self.req(self, region_name=region.name, blinded=False, hide_data=False,
do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False,
remove_horns=False,_exclude=["feature_tags", "shape_region",
"qcd_category_name", "qcd_sym_shape", "qcd_signal_region_wp"])
for key, region in self.qcd_regions.items()
}
if self.qcd_category_name != "default":
reqs["qcd"]["ss_iso"] = FeaturePlot2D.vreq(self,
region_name=self.qcd_regions.ss_iso.name, category_name=self.qcd_category_name,
blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True,
save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,),
bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags",
"shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version",
"qcd_signal_region_wp"])
# reqs["qcd"]["os_inviso"] = FeaturePlot2D.vreq(self,
# region_name=self.qcd_regions.os_inviso.name, category_name=self.qcd_category_name,
# blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True,
# save_pdf=True, save_yields=True, feature_names=(self.qcd_feature,),
# remove_horns=self.remove_horns, _exclude=["feature_tags", "bin_opt_version"])
reqs["qcd"]["ss_inviso"] = FeaturePlot2D.vreq(self,
region_name=self.qcd_regions.ss_inviso.name,
category_name=self.qcd_category_name,
blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True,
save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,),
bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags",
"shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version",
"qcd_signal_region_wp"])
return reqs
def output(self):
"""
Output files to be filled: pdf, png, root or json
"""
def get_feature_name(feature_pair):
return "_".join([feature.name for feature in feature_pair])
# output definitions, i.e. key, file prefix, extension
output_data = []
if self.save_root:
output_data.append(("root", "root/", "root"))
if self.save_yields:
output_data.append(("yields", "yields/", "json"))
out = {
key: law.SiblingFileCollection(OrderedDict(
(get_feature_name(feature_pair), self.local_target("{}{}{}.{}".format(
prefix, get_feature_name(feature_pair),
self.get_output_postfix(key), ext)))
for feature_pair in self.features
))
for key, prefix, ext in output_data
}
output_data = []
if self.save_pdf:
output_data.append(("pdf", "", "pdf"))
if self.save_png:
output_data.append(("png", "", "png"))
out.update({
key: law.SiblingFileCollection(OrderedDict(
(get_feature_name(feature_pair), OrderedDict(
(process.name, self.local_target("{}{}{}_{}.{}".format(
prefix, get_feature_name(feature_pair),
self.get_output_postfix(key), process.name, ext)))
for process in self.processes_datasets
if not process.isData or not self.hide_data
))
for feature_pair in self.features
))
for key, prefix, ext in output_data
})
return out
def plot(self, feature):
"""
Performs the actual plotting.
"""
# helper to extract the qcd shape in a region
def get_qcd(region, files, bin_limit=0.):
d_hist = files[region].Get("histograms/" + self.data_names[0])
if not d_hist:
raise Exception("data histogram '{}' not found for region '{}' in tfile {}".format(
self.data_names[0], region, files[region]))
b_hists = []
for b_name in self.background_names:
b_hist = files[region].Get("histograms/" + b_name)
if not b_hist:
raise Exception("background histogram '{}' not found in region '{}'".format(
b_name, region))
b_hists.append(b_hist)
qcd_hist = d_hist.Clone(randomize("qcd_" + region))
for hist in b_hists:
qcd_hist.Add(hist, -1.)
# removing negative bins
for ibinx, ibiny in itertools.product(
range(1, qcd_hist.GetNbinsX() + 1),
range(1, qcd_hist.GetNbinsY() + 1)):
if qcd_hist.GetBinContent(ibinx, ibiny) < bin_limit:
qcd_hist.SetBinContent(ibinx, ibiny, 1.e-6)
return qcd_hist
def get_integral_and_error(hist):
error = c_double(0.)
integral = hist.IntegralAndError(
0, hist.GetNbinsX() + 1,
0, hist.GetNbinsY() + 1, error)
error = error.value
compatible = True if integral <= 0 else False
# compatible = True if integral - error <= 0 else False
return integral, error, compatible
background_hists = self.histos["background"]
signal_hists = self.histos["signal"]
data_hists = self.histos["data"]
all_hists = self.histos["all"]
if self.plot_systematics:
bkg_histo_syst = self.histos["bkg_histo_syst"]
binning_args, _ = self.get_binning(feature)
x_title = (str(feature[0].get_aux("x_title"))
+ (" [%s]" % feature[0].get_aux("units") if feature[0].get_aux("units") else ""))
y_title = (str(feature[1].get_aux("x_title"))
+ (" [%s]" % feature[1].get_aux("units") if feature[1].get_aux("units") else ""))
z_title = ("Events" if self.stack else "Normalized Events")
hist_title = "; %s; %s; %s" % (x_title, y_title, z_title)
feature_pair_name = "_".join([elem.name for elem in feature])
# qcd shape files
qcd_shape_files = None
if self.do_qcd:
qcd_shape_files = {}
for key, region in self.qcd_regions.items():
if self.qcd_category_name != "default":
my_feature = (feature_pair_name if "shape" in key or key == "os_inviso"
else self.qcd_feature)
else:
my_feature = feature_pair_name
qcd_shape_files[key] = ROOT.TFile.Open(self.input()["qcd"][key]["root"].targets[my_feature].path)
# do the qcd extrapolation
if "shape" in qcd_shape_files:
qcd_hist = get_qcd("shape", qcd_shape_files).Clone(randomize("qcd"))
qcd_hist.Scale(1. / qcd_hist.Integral())
else: #sym shape
qcd_hist = get_qcd("shape1", qcd_shape_files).Clone(randomize("qcd"))
qcd_hist2 = get_qcd("shape2", qcd_shape_files).Clone(randomize("qcd"))
qcd_hist.Scale(1. / qcd_hist.Integral())
qcd_hist2.Scale(1. / qcd_hist2.Integral())
qcd_hist.Add(qcd_hist2)
qcd_hist.Scale(0.5)
n_os_inviso, n_os_inviso_error, n_os_inviso_compatible = get_integral_and_error(
get_qcd("os_inviso", qcd_shape_files, -999))
n_ss_iso, n_ss_iso_error, n_ss_iso_compatible = get_integral_and_error(
get_qcd("ss_iso", qcd_shape_files, -999))
n_ss_inviso, n_ss_inviso_error, n_ss_inviso_compatible = get_integral_and_error(
get_qcd("ss_inviso", qcd_shape_files, -999))
# if not n_ss_iso or not n_ss_inviso:
if n_os_inviso_compatible or n_ss_iso_compatible or n_ss_inviso_compatible:
print("****WARNING: QCD normalization failed (negative yield), Removing QCD!")
qcd_scaling = 0.
qcd_inviso = 0.
qcd_inviso_error = 0.
qcd_hist.Scale(qcd_scaling)
else:
qcd_inviso = n_os_inviso / n_ss_inviso
qcd_inviso_error = qcd_inviso * math.sqrt(
(n_os_inviso_error / n_os_inviso) * (n_os_inviso_error / n_os_inviso)
+ (n_ss_inviso_error / n_ss_inviso) * (n_ss_inviso_error / n_ss_inviso)
)
qcd_scaling = n_os_inviso * n_ss_iso / n_ss_inviso
os_inviso_rel_error = n_os_inviso_error / n_os_inviso
ss_iso_rel_error = n_ss_iso_error / n_ss_iso
ss_inviso_rel_error = n_ss_inviso_error / n_ss_inviso
new_errors_sq = {}
for ibinx, ibiny in itertools.product(
range(1, qcd_hist.GetNbinsX() + 1),
range(1, qcd_hist.GetNbinsY() + 1)):
if qcd_hist.GetBinContent(ibinx, ibiny) > 0:
bin_rel_error = (qcd_hist.GetBinError(ibinx, ibiny)
/ qcd_hist.GetBinContent(ibinx, ibiny))
else:
bin_rel_error = 0
new_errors_sq[(ibinx, ibiny)] = (bin_rel_error * bin_rel_error
+ os_inviso_rel_error * os_inviso_rel_error
+ ss_iso_rel_error * ss_iso_rel_error
+ ss_inviso_rel_error * ss_inviso_rel_error)
qcd_hist.Scale(qcd_scaling)
# fix errors
for ibinx, ibiny in itertools.product(
range(1, qcd_hist.GetNbinsX() + 1),
range(1, qcd_hist.GetNbinsY() + 1)):
qcd_hist.SetBinError(ibinx, ibiny, qcd_hist.GetBinContent(ibinx, ibiny)
* math.sqrt(new_errors_sq[(ibinx, ibiny)]))
# store and style
yield_error = c_double(0.)
qcd_hist.cmt_yield = qcd_hist.IntegralAndError(
0, qcd_hist.GetNbinsX() + 1, 0, qcd_hist.GetNbinsY() + 1, yield_error)
qcd_hist.cmt_yield_error = yield_error.value
qcd_hist.cmt_bin_yield = []
qcd_hist.cmt_bin_yield_error = []
for ibiny in range(1, qcd_hist.GetNbinsY() + 1):
qcd_hist.cmt_bin_yield.append([
qcd_hist.GetBinContent(ibinx, ibiny)
for ibinx in range(1, qcd_hist.GetNbinsX() + 1)
])
qcd_hist.cmt_bin_yield_error.append([
qcd_hist.GetBinError(ibinx, ibiny)
for ibinx in range(1, qcd_hist.GetNbinsX() + 1)
])
qcd_hist.cmt_scale = 1.
qcd_hist.cmt_process_name = "qcd"
qcd_hist.process_label = "QCD"
qcd_hist.SetTitle("QCD")
background_hists.append(qcd_hist)
all_hists.append(qcd_hist)
if not self.hide_data:
all_hists += data_hists
bkg_histo = None
if not self.stack:
for hist in all_hists:
scale = 1. / (hist.Integral() or 1.)
hist.Scale(scale)
hist.scale = scale
draw_hists = all_hists
else:
draw_hists = all_hists
for hist in background_hists:
if not bkg_histo:
bkg_histo = hist.Clone()
else:
bkg_histo.Add(hist.Clone())
if not self.hide_data:
draw_hists.extend(data_hists)
dummy_hist = ROOT.TH2F(randomize("dummy"), hist_title, *binning_args)
r.setup_hist(dummy_hist)
dummy_hist.GetZaxis().SetMaxDigits(4)
dummy_hist.GetZaxis().SetTitleOffset(1.22) # FIXME
if self.log_z:
dummy_hist.SetMinimum(0.0011)
else:
dummy_hist.SetMinimum(0.001)
inner_text=[self.category.label + " category"]
if self.region:
if isinstance(self.region.label, list):
inner_text += self.region.label
else:
inner_text.append(self.region.label)
if not (self.hide_data or not len(data_hists) > 0) or self.stack:
upper_right="{}, {} TeV ({:.1f} ".format(
self.config.year,
self.config.ecm,
self.config.lumi_fb
) + "fb^{-1})"
else:
upper_right="{} Simulation ({} TeV)".format(
self.config.year,
self.config.ecm,
)
draw_labels = get_labels(
upper_right=upper_right,
inner_text=inner_text
)
for ih, hist in enumerate(draw_hists):
c = Canvas()
if self.log_z:
c.SetLogz()
dummy_hist.SetMaximum(hist.GetMaximum())
dummy_hist.Draw()
hist.Draw("COLZ,SAME")
for label in draw_labels:
label.Draw("same")
outputs = []
if self.save_png:
outputs.append(self.output()["png"].targets[feature_pair_name][hist.cmt_process_name].path)
if self.save_pdf:
outputs.append(self.output()["pdf"].targets[feature_pair_name][hist.cmt_process_name].path)
for output in outputs:
c.SaveAs(create_file_dir(output))
if self.save_root:
f = ROOT.TFile.Open(create_file_dir(
self.output()["root"].targets[feature_pair_name].path), "RECREATE")
f.cd()
# c.Write("canvas") #FIXME
hist_dir = f.mkdir("histograms")
hist_dir.cd()
for hist in all_hists:
hist.Write(hist.cmt_process_name)
if bkg_histo:
bkg_histo.Write("background")
if self.store_systematics:
for syst_dir, shape_hists in self.histos["shape"].items():
for hist in shape_hists:
hist.Write("%s_%s" % (hist.cmt_process_name, syst_dir))
f.Close()
if self.save_yields:
yields = OrderedDict()
for hist in signal_hists + background_hists + data_hists:
yields[hist.cmt_process_name] = {
"Total yield": hist.cmt_yield,
"Total yield error": hist.cmt_yield_error,
"Entries": getattr(hist, "cmt_entries", hist.GetEntries()),
"Binned yield": hist.cmt_bin_yield,
"Binned yield error": hist.cmt_bin_yield_error,
}
if bkg_histo:
yields["background"] = {
"Total yield": sum([hist.cmt_yield for hist in background_hists]),
"Total yield error": math.sqrt(sum([(hist.cmt_yield_error)**2 for hist in background_hists])),
"Entries": getattr(bkg_histo, "cmt_entries", bkg_histo.GetEntries()),
"Binned yield": [[
sum([hist.cmt_bin_yield[j][i] for hist in background_hists])
for i in range(0, background_hists[0].GetNbinsX())]
for j in range(0, background_hists[0].GetNbinsY())],
"Binned yield error": [[
math.sqrt(sum([(hist.cmt_bin_yield_error[j][i]) ** 2
for hist in background_hists]))
for i in range(0, background_hists[0].GetNbinsX())]
for j in range(0, background_hists[0].GetNbinsY())],
}
with open(create_file_dir(self.output()["yields"].targets[feature_pair_name].path), "w") as f:
json.dump(yields, f, indent=4)
@law.decorator.notify
@law.decorator.safe_output
def run(self):
"""
Splits processes into data, signal and background. Creates histograms from each process
loading them from the input files. Scales the histograms and applies the correct format
to them.
"""
ROOT.gStyle.SetOptStat(0)
# create root tchains for inputs
inputs = self.input()
self.data_names = [p.name for p in self.processes_datasets.keys() if p.isData]
self.signal_names = [p.name for p in self.processes_datasets.keys() if p.isSignal]
self.background_names = [p.name for p in self.processes_datasets.keys()
if not p.isData and not p.isSignal]
self.nevents = self.get_nevents()
for feature_pair in self.features:
self.histos = {"background": [], "signal": [], "data": [], "all": []}
binning_args, _ = self.get_binning(feature_pair)
x_title = (str(feature_pair[0].get_aux("x_title"))
+ (" [%s]" % feature_pair[0].get_aux("units")
if feature_pair[0].get_aux("units") else ""))
y_title = (str(feature_pair[1].get_aux("x_title"))
+ (" [%s]" % feature_pair[1].get_aux("units")
if feature_pair[1].get_aux("units") else ""))
z_title = ("Events" if self.stack else "Normalized Events")
hist_title = "; %s; %s: %s" % (x_title, y_title, z_title)
systs_directions = [("central", "")]
if self.plot_systematics:
self.histos["bkg_histo_syst"] = ROOT.TH2D(
randomize("syst"), hist_title, *binning_args)
if self.store_systematics:
self.histos["shape"] = {}
shape_systematics = self.get_unique_systs(
self.get_systs(feature_pair[0], True) \
+ self.get_systs(feature_pair[1], True) \
+ self.config.get_weights_systematics(self.config.weights[self.category.name],
True))
systs_directions += list(itertools.product(shape_systematics, directions))
feature_pair_name = "_".join([feature.name for feature in feature_pair])
for (syst, d) in systs_directions:
feature_name = feature_pair_name if syst == "central" else "%s_%s_%s" % (
feature_pair_name, syst, d)
if syst != "central":
self.histos["shape"]["%s_%s" % (syst, d)] = []
for iproc, (process, datasets) in enumerate(self.processes_datasets.items()):
if syst != "central" and process.isData:
continue
process_histo = ROOT.TH2D(randomize(process.name), hist_title, *binning_args)
process_histo.process_label = str(process.label)
process_histo.cmt_process_name = process.name
process_histo.Sumw2()
for dataset in datasets:
dataset_histo = ROOT.TH2D(randomize("tmp"), hist_title, *binning_args)
dataset_histo.Sumw2()
for category in self.expand_category():
inp = inputs["data"][
(dataset.name, category.name)].collection.targets.values()
for elem in inp:
rootfile = ROOT.TFile.Open(elem.path)
histo = copy(rootfile.Get(feature_name))
rootfile.Close()
dataset_histo.Add(histo)
if not process.isData and not self.avoid_normalization:
elem = ("central"
if syst == "central" or syst not in self.norm_syst_list
else f"{syst}_{d}")
if self.nevents[dataset.name][elem] != 0:
dataset_histo.Scale(
self.get_normalization_factor(dataset, elem))
scaling = dataset.get_aux("scaling", None)
if scaling:
print(" ### Scaling {} histo by {} +- {}".format(
dataset.name, scaling[0], scaling[1]))
old_errors = [[dataset_histo.GetBinError(ibinx, ibiny)\
/ dataset_histo.GetBinContent(ibinx, ibiny)
if dataset_histo.GetBinContent(ibinx, ibiny) != 0
else 0
for ibinx in range(1, dataset_histo.GetNbinsX() + 1)]
for ibiny in range(1, dataset_histo.GetNbinsY() + 1)
]
new_errors = [[
math.sqrt(elem ** 2 + (scaling[1] / scaling[0]) ** 2)
for elem in line]
for line in old_errors]
dataset_histo.Scale(scaling[0])
for ibinx, ibiny in itertools.product(
range(1, dataset_histo.GetNbinsX() + 1),
range(1, dataset_histo.GetNbinsY() + 1)):
dataset_histo.SetBinError(
ibinx, ibiny, dataset_histo.GetBinContent(ibinx, ibiny)
* new_errors[ibiny - 1][ibinx - 1])
process_histo.Add(dataset_histo)
if self.plot_systematics and not process.isData and not process.isSignal \
and syst == "central":
dataset_histo_syst = dataset_histo.Clone()
self.histos["bkg_histo_syst"].Add(dataset_histo_syst)
yield_error = c_double(0.)
process_histo.cmt_yield = process_histo.IntegralAndError(
0, process_histo.GetNbinsX() + 1,
0, process_histo.GetNbinsY() + 1, yield_error)
process_histo.cmt_yield_error = yield_error.value
process_histo.cmt_bin_yield = []
process_histo.cmt_bin_yield_error = []
for ibiny in range(1, process_histo.GetNbinsY() + 1):
process_histo.cmt_bin_yield.append(
[process_histo.GetBinContent(ibinx, ibiny)
for ibinx in range(1, process_histo.GetNbinsX() + 1)]
)
process_histo.cmt_bin_yield_error.append(
[process_histo.GetBinError(ibinx, ibiny)
for ibinx in range(1, process_histo.GetNbinsX() + 1)]
)
if syst == "central":
if process.isSignal:
self.histos["signal"].append(process_histo)
elif process.isData:
self.histos["data"].append(process_histo)
else:
self.histos["background"].append(process_histo)
if not process.isData: #or not self.hide_data:
self.histos["all"].append(process_histo)
else:
self.histos["shape"]["%s_%s" % (syst, d)].append(process_histo)
self.plot(feature_pair)