Source code for cmt.base_tasks.plotting

# coding: utf-8

"""
Plotting tasks.
"""

__all__ = []

import os
from copy import deepcopy as copy
import json
import math
import itertools
import functools
import array
from collections import OrderedDict

import law
import luigi
import plotlib.root as r
from cmt.util import hist_to_array, hist_to_graph, get_graph_maximum, update_graph_values

from ctypes import c_double


from analysis_tools import ObjectCollection
from analysis_tools.utils import (
    import_root, create_file_dir, join_root_selection, randomize
)
from plotting_tools.root import get_labels, Canvas, RatioCanvas
from cmt.base_tasks.base import ( 
    DatasetTaskWithCategory, DatasetWrapperTask, HTCondorWorkflow, SGEWorkflow,
    ConfigTaskWithCategory, RDFModuleTask, InputData
)

from cmt.base_tasks.preprocessing import (
    Categorization, MergeCategorization, MergeCategorizationStats, EventCounterDAS
)


cmt_style = r.styles.copy("default", "cmt_style")
cmt_style.style.ErrorX = 0
cmt_style.x_axis.TitleOffset = 1.22
cmt_style.y_axis.TitleOffset = 1.48
cmt_style.legend.TextSize = 20
cmt_style.style.legend_dy = 0.035
cmt_style.style.legend_y2 = 0.93

EMPTY = -1.e5

directions = ["up", "down"]

ROOT = import_root()

[docs]class BasePlotTask(ConfigTaskWithCategory): """ Task that wraps parameters used in all plotting tasks. Can't be run. :param feature_names: names of features to plot. Uses all features when empty. :type feature_names: csv string :param feature_tags: list of tags for filtering features selected via feature names. :type feature_tags: csv string :param skip_feature_names: names or name pattern of features to skip :type skip_feature_names: csv string :param skip_feature_tags: list of tags of features to skip :type skip_feature_tags: csv string :param apply_weights: whether to apply weights and scaling to all histograms. :type apply_weights: bool :param n_bins: Custom number of bins for plotting, defaults to the value configured by the feature when empty. :type n_bins: int :param systematics: NOT YET IMPLEMENTED. List of custom systematics to be considered. :type systematics: csv list :param store_systematics: whether to store systematic templates inside the output root files. :type store_systematics: bool :param shape_region: shape region used for QCD computation. :type shape_region: str from choice list :param remove_horns: NOT YET IMPLEMENTED. Whether to remove the eta horns present in 2017 :type remove_horns: bool :param optimization_method: Optimization method to be used. Only `bayesian_blocks` available. :type optimization_method: str """ feature_names = law.CSVParameter(default=(), description="names of features to plot, uses all " "features when empty, default: ()") feature_tags = law.CSVParameter(default=(), description="list of tags for filtering features " "selected via feature_names, default: ()") skip_feature_names = law.CSVParameter(default=(), description="names or name pattern of " "features to skip, default: ()") skip_feature_tags = law.CSVParameter(default=("multiclass_dnn",), description="list of tags of " "features to skip, default: (multiclass_dnn,)") apply_weights = luigi.BoolParameter(default=True, description="whether to apply " "mc weights to histograms, default: True") n_bins = luigi.IntParameter(default=law.NO_INT, description="custom number of bins for " "plotting, defaults to the value configured by the feature when empty, default: empty") systematics = law.CSVParameter(default=(), description="list of systematics, default: empty") store_systematics = luigi.BoolParameter(default=True, description="whether to store " "systematic templates inside root files, default: True") shape_region = luigi.ChoiceParameter(default="os_inviso", choices=("os_inviso", "ss_iso"), significant=False, description="shape region default: os_inviso") remove_horns = luigi.BoolParameter(default=False, description="whether to remove horns " "from distributions, default: False") tree_name = luigi.Parameter(default="Events", description="name of the tree inside " "the root file, default: Events (nanoAOD)") optimization_method = luigi.ChoiceParameter(default="", choices=("", "bayesian_blocks"), significant=False, description="optimization method to be used, default: none") def __init__(self, *args, **kwargs): super(BasePlotTask, self).__init__(*args, **kwargs) # select features self.features = self.get_features() def get_features(self): features = [] for feature in self.config.features: if self.feature_names and not law.util.multi_match(feature.name, self.feature_names): continue if self.feature_tags and not any([feature.has_tag(tag) for tag in self.feature_tags]): continue if self.skip_feature_names and \ law.util.multi_match(feature.name, self.skip_feature_names): continue if self.skip_feature_tags and feature.has_tag(self.skip_feature_tags): continue features.append(feature) if len(features) == 0: raise ValueError("No features were included. Did you spell them correctly?") return features def round(self, number): if number - round(number) < 0.0001: return number if number > 10.: return round(number) elif number > 1.: return round(number, 1) i = 1 while True: if number > (1 / (10 ** i)): return round(number, i + 1) i += 1 def get_binning(self, feature, ifeat=0): if self.optimization_method: y_axis_adendum = "" binning = self.input()["bin_opt"].collection.targets[ifeat].load(formatter="json") n_bins = len(binning) - 1 binning_args = n_bins, array.array("f", binning) elif isinstance(feature.binning, tuple): if self.n_bins == law.NO_INT: nbins = feature.binning[0] else: nbins = self.n_bins y_axis_adendum = (" / %s %s" % ( self.round((feature.binning[2] - feature.binning[1]) / nbins), feature.get_aux("units")) if feature.get_aux("units") else "") binning = (nbins, feature.binning[1], feature.binning[2]) binning_args = tuple(binning) else: y_axis_adendum = "" if self.n_bins == law.NO_INT: n_bins = len(feature.binning) - 1 binning_args = n_bins, array.array("f", feature.binning) else: n_bins = self.n_bins binning_args = tuple(n_bins, feature.binning[0], feature.binning[-1]) return binning_args, y_axis_adendum def get_feature_systematics(self, feature): return feature.systematics def get_systs(self, feature, isMC): if not isMC: return [] systs = self.get_feature_systematics(feature) \ + self.config.get_weights_systematics(self.config.weights[self.category.name], isMC) systs += self.config.get_systematics_from_expression(self.category.selection) if self.region: systs += self.config.get_systematics_from_expression(self.region.selection) return self.get_unique_systs(systs) def get_unique_systs(self, systs): unique_systs = [] for syst in systs: if syst not in unique_systs: unique_systs.append(syst) return unique_systs def get_output_postfix(self): postfix = "" if self.region: postfix += "__" + self.region.name if not self.apply_weights: postfix += "__noweights" if self.optimization_method == "bayesian_blocks": postfix += "__opt_bb" if self.n_bins != law.NO_INT: postfix += "__%s_bins" % self.n_bins return postfix
[docs]class PrePlot(DatasetTaskWithCategory, BasePlotTask, law.LocalWorkflow, HTCondorWorkflow, SGEWorkflow, RDFModuleTask): """ Performs the filling of histograms for all features considered. If systematics are considered, it also produces the same histograms after applying those. :param skip_processing: whether to skip the preprocessing and categorization steps. :type skip_processing: bool :param skip_merging: whether to skip the MergeCategorization task. :type skip_merging: bool :param preplot_modules_file: filename inside ``cmt/config/`` or ``../config/`` (w/o extension) with the RDF modules to run. :type preplot_modules_file: str """ skip_processing = luigi.BoolParameter(default=False, description="whether to skip" " preprocessing and categorization steps, default: False") skip_merging = luigi.BoolParameter(default=False, description="whether to skip" " MergeCategorization task, default: False") preplot_modules_file = luigi.Parameter(description="filename with modules to run RDataFrame", default=law.NO_STR) dataset_names = law.CSVParameter(description="dataset_names to use for optimization", default=()) def __init__(self, *args, **kwargs): super(PrePlot, self).__init__(*args, **kwargs) self.custom_output_tag = "_%s" % self.region_name self.threshold = self.dataset.get_aux("event_threshold", None) self.merging_factor = self.dataset.get_aux("preprocess_merging_factor", None) self.syst_list = self.get_syst_list()
[docs] def get_syst_list(self): """ Returns a list of systematic names that affect present or past selections, so dedicated input ntuples are needed as requirements """ if self.skip_processing: return [] syst_list = [] isMC = self.dataset.process.isMC for feature in self.features: systs = self.get_unique_systs(self.get_systs(feature, isMC)) for syst in systs: if syst in syst_list: continue try: systematic = self.config.systematics.get(syst) if self.category.name in systematic.get_aux("affected_categories", []): syst_list.append(syst) except: continue other_systs = self.config.get_systematics_from_expression(self.category.selection) if self.region: other_systs += self.config.get_systematics_from_expression(self.region.selection) for syst in other_systs: if syst in syst_list: continue try: systematic = self.config.systematics.get(syst) if self.category.name in systematic.get_aux("affected_categories", []): syst_list.append(syst) except: continue return syst_list
[docs] def create_branch_map(self): """ :return: number of files after merging (usually 1) unless skip_processing == True :rtype: int """ if self.skip_processing or self.skip_merging: return len(self.dataset.get_files( os.path.expandvars("$CMT_TMP_DIR/%s/" % self.config_name), add_prefix=False, check_empty=True)) return self.n_files_after_merging
[docs] def workflow_requires(self): """ """ if self.skip_merging: reqs = {"data": {"central": Categorization.vreq(self, workflow="local")}} if self.store_systematics: for syst, d in itertools.product(self.syst_list, directions): reqs["data"][f"{syst}_{d}"] = Categorization.vreq(self, workflow="local", systematic=syst, systematic_direction=d) elif self.skip_processing: reqs= {"data": {"central": InputData.req(self)}} else: reqs = {"data": {"central": MergeCategorization.vreq(self, workflow="local")}} if self.store_systematics: for syst, d in itertools.product(self.syst_list, directions): reqs["data"][f"{syst}_{d}"] = MergeCategorization.vreq(self, workflow="local", systematic=syst, systematic_direction=d) if self.optimization_method == "bayesian_blocks": from cmt.base_tasks.optimization import BayesianBlocksOptimization reqs["bin_opt"] = BayesianBlocksOptimization.vreq(self, plot_systematics=False, _exclude=["branch", "branches", "custom_output_tag", "plot_systematics", "workflow"]) return reqs
[docs] def requires(self): """ Each branch requires one input file for the central value and two per systematic considered """ if self.skip_merging: reqs = {"central": Categorization.vreq(self, workflow="local", branch=self.branch)} if self.store_systematics: for syst, d in itertools.product(self.syst_list, directions): reqs[f"{syst}_{d}"] = Categorization.vreq(self, workflow="local", systematic=syst, systematic_direction=d, branch=self.branch) elif self.skip_processing: reqs= {"central": InputData.req(self, file_index=self.branch)} else: reqs = {"central": MergeCategorization.vreq(self, workflow="local", branch=self.branch)} if self.store_systematics: for syst, d in itertools.product(self.syst_list, directions): reqs[f"{syst}_{d}"] = MergeCategorization.vreq(self, workflow="local", systematic=syst, systematic_direction=d, branch=self.branch) if self.optimization_method: from cmt.base_tasks.optimization import BayesianBlocksOptimization reqs["bin_opt"] = BayesianBlocksOptimization.vreq(self, plot_systematics=False, _exclude=["branch", "branches", "custom_output_tag", "plot_systematics", "workflow"]) return reqs
[docs] def output(self): """ :return: One file per input file with all histograms to be plotted for each feature :rtype: `.root` """ return self.local_target("data{}_{}.root".format( self.get_output_postfix(), self.branch))
[docs] def get_weight(self, category, syst_name, syst_direction, **kwargs): """ Obtains the product of all weights depending on the category/channel applied. Returns "1" if it's a data sample or the apply_weights parameter is set to False. :return: Product of all weights to be applied :rtype: str """ if self.config.processes.get(self.dataset.process.name).isData or not self.apply_weights: return "1" else: return self.config.get_weights_expression( self.config.weights[category], syst_name, syst_direction) return self.config.weights.default
def define_histograms(self, dfs, nentries): histos = [] isMC = self.dataset.process.isMC for ifeat, feature in enumerate(self.features): binning_args, y_axis_adendum = self.get_binning(feature, ifeat) x_title = (str(feature.get_aux("x_title")) + (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else "")) y_title = "Events" + y_axis_adendum title = "; %s; %s" % (x_title, y_title) systs_directions = [("central", "")] if isMC and self.store_systematics: systs = self.get_systs(feature, isMC) systs_directions += list(itertools.product(systs, directions)) # loop over systematics and up/down variations for syst_name, direction in systs_directions: # Select the appropriate RDF (input file) depending on the syst and direction key = "central" if f"{syst_name}_{direction}" in dfs: key = f"{syst_name}_{direction}" df = dfs[key] # apply selection if needed if feature.selection: feat_df = df.Define("feat_selection", self.config.get_object_expression( feature.selection, isMC, syst_name, direction)).Filter("feat_selection") else: feat_df = df # define tag just for the histogram's name if syst_name != "central" and isMC: tag = "_%s" % syst_name if direction != "": tag += "_%s" % direction else: tag = "" feature_expression = self.config.get_object_expression( feature, isMC, syst_name, direction) feature_name = feature.name + tag hist_base = ROOT.TH1D(feature_name, title, *binning_args) if nentries[key] > 0: hmodel = ROOT.RDF.TH1DModel(hist_base) histos.append( feat_df.Define( "weight", "{}".format(self.get_weight( self.category.name, syst_name, direction)) ).Define( "var", feature_expression).Histo1D(hmodel, "var", "weight") ) else: # no entries available, append empty histogram histos.append(hist_base) return histos
[docs] @law.decorator.notify @law.decorator.localize(input=False) def run(self): """ Creates one RDataFrame per input file, runs the desired RDFModules and produces a set of plots per each feature, one for the nominal value and others (if available) for all systematics. """ # prepare inputs and outputs if self.skip_processing: inp = self.get_input() else: inp = self.input() outp = self.output().path ROOT.ROOT.EnableImplicitMT(self.request_cpus) # create one RDF for the central value and each needed systematic dfs = {} nentries = {} for elem in ["central"] + [f"{syst}_{d}" for (syst, d) in itertools.product(self.syst_list, directions)]: if self.skip_processing: inp_to_consider = self.get_path(inp[elem])[0] if not self.dataset.friend_datasets: dfs[elem] = ROOT.RDataFrame(self.tree_name, self.get_path(inp[elem])) # friend tree else: tchain = ROOT.TChain() for f in self.get_path(inp[elem]): tchain.Add("{}/{}".format(f, self.tree_name)) friend_tchain = ROOT.TChain() for f in self.get_path(inp[elem], 1): friend_tchain.Add("{}/{}".format(f, self.tree_name)) tchain.AddFriend(friend_tchain, "friend") dfs[elem] = ROOT.RDataFrame(tchain) elif self.skip_merging: inp_to_consider = inp[elem].path dfs[elem] = ROOT.RDataFrame(self.tree_name, inp_to_consider) else: inp_to_consider = inp[elem].targets[self.branch].path dfs[elem] = ROOT.RDataFrame(self.tree_name, inp_to_consider) empty_file = False try: tf = ROOT.TFile.Open(inp_to_consider) tree = tf.Get(self.tree_name) nentries[elem] = tree.GetEntries() tf.Close() except: # no tree inside the file nentries[elem] = 0 empty_file = True # applying modules according to the systematic considered syst = "" d = "" if "_" in elem: syst = elem.split("_")[0] d = elem.split("_")[1] if not empty_file: modules = self.get_feature_modules(self.preplot_modules_file, systematic=syst, systematic_direction=d) if len(modules) > 0: for module in modules: dfs[elem], _ = module.run(dfs[elem]) selection = "1" dataset_selection = self.config.get_object_expression( self.dataset.get_aux("selection", "1"), self.dataset.process.isMC, syst, d) if self.skip_processing: selection = self.config.get_object_expression( self.category, self.dataset.process.isMC, syst, d) if dataset_selection and dataset_selection != "1": if selection != "1": selection = join_root_selection(dataset_selection, selection, op="and") else: selection = dataset_selection if self.region_name != law.NO_STR: region_selection = self.config.get_object_expression( self.config.regions.get(self.region_name).selection, self.dataset.process.isMC, syst, d) if selection != "1": selection = join_root_selection(region_selection, selection, op="and") else: selection = region_selection if selection != "1": dfs[elem] = dfs[elem].Define("selection", selection).Filter("selection") histos = self.define_histograms(dfs, nentries) out = ROOT.TFile.Open(create_file_dir(outp), "RECREATE") for histo in histos: histo = histo.Clone() histo.Sumw2() out.cd() histo.Write() out.Close()
[docs]class FeaturePlot(BasePlotTask, DatasetWrapperTask): """ Performs the actual histogram plotting: loads the histograms obtained in the PrePlot tasks, rescales them if needed and plots and saves them. Example command: ``law run FeaturePlot --version test --category-name etau --config-name ul_2018 \ --process-group-name etau --feature-names Htt_svfit_mass,lep1_pt,bjet1_pt,lep1_eta,bjet1_eta \ --workers 20 --PrePlot-workflow local --stack --hide-data False --do-qcd --region-name etau_os_iso\ --dataset-names tt_dl,tt_sl,dy_high,wjets,data_etau_a,data_etau_b,data_etau_c,data_etau_d \ --MergeCategorizationStats-version test_old`` :param stack: whether to show all backgrounds stacked (True) or normalized to 1 (False) :type stack: bool :param do_qcd: whether to estimate QCD using the ABCD method :type do_qcd: bool :param qcd_wp: working point to use for QCD estimation :type qcd_wp: str from choice list :param qcd_signal_region_wp: region to use as signal region for QCD estimation :type qcd_signal_region_wp: str :param shape_region: region to use as shape region for QCD estimation :type shape_region: str from choice list :param qcd_sym_shape: whether to symmetrise the shape coming from both possible shape regions :type qcd_sym_shape: bool :param qcd_category_name: category name used for the same sign regions in QCD estimation :type qcd_category_name: str :param hide_data: whether to show (False) or hide (True) the data histograms :type hide_data: bool :param normalize_signals: whether to normalize signals to the total background yield (True) or not (False) :type normalize_signals: bool :param avoid_normalization: whether to avoid normalizing by cross section and initial number of events :type avoid_normalization: bool :param blinded: whether to blind data in specified regions. The blinding ranges are specified using the `blinded_range` parameter in the Feature definition. This parameter can include a list ([initial, final]) or a list of lists ([[init_1, fin_1], [init_2, fin_2], ...]) :type blinded: bool :param save_png: whether to save plots in png :type save_png: bool :param save_pdf: whether to save plots in pdf :type save_pdf: bool :param save_root: whether to write plots in a root file :type save_root: bool :param save_yields: whether to save histogram yields in a json file :type save_yields: bool :param process_group_name: name of the process grouping name :type process_group_name: str :param bins_in_x_axis: (NOT YET IMPLEMENTED) whether to plot histograms with the bin numbers in the x axis instead of the actual values :type bins_in_x_axis: bool :param plot_systematics: (NOT YET FULLY IMPLEMENTED) whether to plot histograms with their uncertainties :type plot_systematics: bool :param fixed_colors: whether to plot histograms with their defined colors (False) or fixed colors (True) starting from ``ROOT`` color ``2``. :type fixed_colors: bool :param log_y: whether to set y axis to log scale :type log_y: bool :param include_fit: YAML file inside config folder (w/o extension) including input parameters for the fit :type include_fit: str :param propagate_syst_qcd: whether to propagate systematics to qcd background :type propagate_syst_qcd: bool """ stack = luigi.BoolParameter(default=False, description="when set, stack backgrounds, weight " "them with dataset and category weights, and normalize afterwards, default: False") do_qcd = luigi.BoolParameter(default=False, description="whether to compute the QCD shape, " "default: False") qcd_wp = luigi.ChoiceParameter(default=law.NO_STR, choices=(law.NO_STR, "vvvl_vvl", "vvl_vl", "vl_l", "l_m"), significant=False, description="working points to use for qcd computation, default: empty (vvvl - m)") qcd_signal_region_wp = luigi.Parameter(default="os_iso", description="signal region wp, " "default: os_iso") shape_region = luigi.ChoiceParameter(default="os_inviso", choices=("os_inviso", "ss_iso"), significant=True, description="shape region default: os_inviso") qcd_sym_shape = luigi.BoolParameter(default=False, description="symmetrize QCD shape, " "default: False") qcd_category_name = luigi.Parameter(default="default", description="category use " "for qcd regions ss_iso and ss_inviso, default=default (same as category)") do_sideband = luigi.BoolParameter(default=False, description="whether to compute the background " "shape from sideband region, default: False") hide_data = luigi.BoolParameter(default=True, description="hide data events, default: True") normalize_signals = luigi.BoolParameter(default=True, description="whether to normalize " "signals to the total bkg yield, default: True") avoid_normalization = luigi.BoolParameter(default=False, description="whether to avoid " "normalizing by cross section and initial number of events, default: False") blinded = luigi.BoolParameter(default=False, description="whether to blind bins above a " "certain feature value, default: False") save_png = luigi.BoolParameter(default=False, description="whether to save created histograms " "in png, default: True") save_pdf = luigi.BoolParameter(default=True, description="whether to save created histograms " "in pdf, default: True") save_root = luigi.BoolParameter(default=False, description="whether to save created histograms " "in root files, default: False") save_yields = luigi.BoolParameter(default=False, description="whether to save event yields per " "process, default: False") process_group_name = luigi.Parameter(default="default", description="the name of the process " "grouping, only encoded into output paths when changed, default: default") bins_in_x_axis = luigi.BoolParameter(default=False, description="whether to show in the x axis " "bin numbers instead of the feature value, default: False") plot_systematics = luigi.BoolParameter(default=True, description="whether plot systematics, " "default: True") fixed_colors = luigi.BoolParameter(default=False, description="whether to use fixed colors " "for plotting, default: False") log_y = luigi.BoolParameter(default=False, description="set logarithmic scale for Y axis, " "default: False") include_fit = luigi.Parameter(default="", description="fit to be included in the plots, " "default: None") propagate_syst_qcd = luigi.BoolParameter(default=False, description="whether to propagate systematics to qcd background, " "default: False") # # optimization parameters # bin_opt_version = luigi.Parameter(default=law.NO_STR, description="version of the binning " # "optimization task to use, not used when empty, default: empty") # n_start_bins = luigi.IntParameter(default=10, description="number of bins to be used " # "as initial value for scans, default: 10") # threshold = luigi.FloatParameter(default=-1., description="threshold per bin, " # "default: -1.") # threshold_method = luigi.ChoiceParameter(default="yield", # choices=("yield", "raw_events", "tt_dy", "tt_and_dy", "tt_dy_and_yield", # "tt_and_dy_and_yield", "rel_error"), # significant=False, description="threshold method to be used, default: yield") # region_to_optimize = luigi.Parameter(default=law.NO_STR, description="region used for the " # "optimization, default: same region as plot") # Not implemented yet bin_opt_version = None # remove_horns = False default_process_group_name = "default" additional_scaling = {"dummy": 1} # Temporary fix, the DictParameter fails when empty def __init__(self, *args, **kwargs): super(FeaturePlot, self).__init__(*args, **kwargs) # select processes and datasets assert self.process_group_name in self.config.process_group_names assert not (self.do_qcd and self.do_sideband) self.processes_datasets = {} self.datasets_to_run = [] def get_processes(dataset=None, process=None): processes = ObjectCollection() if dataset and not process: process = self.config.processes.get(dataset.process.name) processes.append(process) if process.parent_process: processes += get_processes(process=self.config.processes.get( process.parent_process)) return processes for dataset in self.datasets: processes = get_processes(dataset=dataset) filtered_processes = ObjectCollection() for process in processes: if process.name in self.config.process_group_names[self.process_group_name]: filtered_processes.append(process) if len(filtered_processes) > 1: raise Exception("%s process group name includes not orthogonal processes %s" % (self.process_group_name, ", ".join(filtered_processes.names))) elif len(filtered_processes) == 1: process = filtered_processes[0] if process not in self.processes_datasets: self.processes_datasets[process] = [] self.processes_datasets[process].append(dataset) self.datasets_to_run.append(dataset) if len(self.datasets_to_run) == 0: raise ValueError("No datasets were selected. Are you sure you want to use" " %s as process_group_name?" % self.process_group_name) # get QCD regions when requested self.qcd_regions = None if self.do_qcd: wp = self.qcd_wp if self.qcd_wp != law.NO_STR else "" self.qcd_regions = self.config.get_qcd_regions(region=self.region, category=self.category, wp=wp, shape_region=self.shape_region, signal_region_wp=self.qcd_signal_region_wp, sym=self.qcd_sym_shape) # complain when no data is present if not any(dataset.process.isData for dataset in self.datasets): raise Exception("no real dataset passed for QCD estimation") self.sideband_regions = None if self.do_sideband: # Several fixes may be needed later for this assert self.region.name == "signal" self.sideband_regions = {key: self.config.regions.get(key) for key in ["signal", "background"]} # obtain the list of systematics that apply to the normalization only if this is done self.norm_syst_list = [] weights = self.config.weights.total_events_weights for weight in weights: try: feature = self.config.features.get(weight) for syst in feature.systematics: if syst not in self.norm_syst_list: self.norm_syst_list.append(syst) except: # weight not defined as a feature -> no syst available continue
[docs] def requires(self): """ All requirements needed: * Histograms coming from the PrePlot task. * Number of total events coming from the MergeCategorizationStats task \ (to normalize MC histograms). * If estimating QCD, FeaturePlot for the three additional QCD regions needed. """ reqs = {} reqs["data"] = OrderedDict( ((dataset.name, category.name), PrePlot.vreq(self, dataset_name=dataset.name, category_name=self.get_data_category(category).name)) for dataset, category in itertools.product( self.datasets_to_run, self.expand_category()) ) if not self.avoid_normalization: reqs["stats"] = OrderedDict() for dataset in self.datasets_to_run: if dataset.process.isData: continue reqs["stats"][dataset.name] = {} for elem in ["central"] + [f"{syst}_{d}" for (syst, d) in itertools.product(self.norm_syst_list, directions)]: syst = "" d = "" if "_" in elem: syst = elem.split("_")[0] d = elem.split("_")[1] if dataset.get_aux("secondary_dataset", None): reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self, dataset_name=dataset.get_aux("secondary_dataset"), systematic=syst, systematic_direction=d) else: reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self, dataset_name=dataset.name, systematic=syst, systematic_direction=d) if self.do_qcd: reqs["qcd"] = { key: self.req(self, region_name=region.name, blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False, remove_horns=False,_exclude=["feature_tags", "shape_region", "qcd_category_name", "qcd_sym_shape", "qcd_signal_region_wp"]) for key, region in self.qcd_regions.items() } if self.qcd_category_name != "default": # to be fixed reqs["qcd"]["ss_iso"] = FeaturePlot.vreq(self, region_name=self.qcd_regions.ss_iso.name, category_name=self.qcd_category_name, blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,), bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags", "shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version", "qcd_signal_region_wp"]) # reqs["qcd"]["os_inviso"] = FeaturePlot.vreq(self, # region_name=self.qcd_regions.os_inviso.name, category_name=self.qcd_category_name, # blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, # save_pdf=True, save_yields=True, feature_names=(self.qcd_feature,), # remove_horns=self.remove_horns, _exclude=["feature_tags", "bin_opt_version"]) reqs["qcd"]["ss_inviso"] = FeaturePlot.vreq(self, region_name=self.qcd_regions.ss_inviso.name, category_name=self.qcd_category_name, blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,), bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags", "shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version", "qcd_signal_region_wp"]) if self.do_sideband: reqs["sideband"] = { key: self.req(self, region_name=region.name, blinded=False, hide_data=False, do_sideband=False, stack=True, save_root=True, save_pdf=False, save_yields=False, remove_horns=False,_exclude=["feature_tags", "shape_region", "qcd_category_name", "qcd_sym_shape", "qcd_signal_region_wp"]) for key, region in self.sideband_regions.items() } if self.optimization_method: from cmt.base_tasks.optimization import BayesianBlocksOptimization reqs["bin_opt"] = BayesianBlocksOptimization.vreq(self, plot_systematics=False) if self.include_fit: import yaml from cmt.utils.yaml_utils import ordered_load with open(self.retrieve_file("config/{}.yaml".format(self.include_fit))) as f: fit_params = ordered_load(f, yaml.SafeLoader) from cmt.base_tasks.analysis import Fit params = ", ".join([f"{param}='{value}'" for param, value in fit_params.items() if param != "fit_parameters"]) if "fit_parameters" in fit_params: params += ", fit_parameters={" + ", ".join([f"'{param}': '{value}'" for param, value in fit_params["fit_parameters"].items()]) + "}" reqs["fit"] = eval(f"Fit.vreq(self, {params}, _exclude=['include_fit'])") return reqs
[docs] def get_output_postfix(self, key="pdf"): """ :return: string to be included in the output filenames :rtype: str """ postfix = super(FeaturePlot, self).get_output_postfix() if self.process_group_name != self.default_process_group_name: postfix += "__pg_" + self.process_group_name if self.do_qcd: postfix += "__qcd" if self.do_sideband: postfix += "__sideband" if self.hide_data: postfix += "__nodata" elif self.blinded and key not in ("root", "yields"): postfix += "__blinded" if self.stack and key not in ("root", "yields"): postfix += "__stack" if self.include_fit: postfix += "__withfit" if self.log_y and key not in ("root", "yields"): postfix += "__log" return postfix
[docs] def output(self): """ Output files to be filled: pdf, png, root or json """ # output definitions, i.e. key, file prefix, extension output_data = [] if self.save_pdf: output_data.append(("pdf", "", "pdf")) if self.save_png: output_data.append(("png", "", "png")) if self.save_root: output_data.append(("root", "root/", "root")) if self.save_yields: output_data.append(("yields", "yields/", "json")) return { key: law.SiblingFileCollection(OrderedDict( (feature.name, self.local_target("{}{}{}.{}".format( prefix, feature.name, self.get_output_postfix(key), ext))) for feature in self.features )) for key, prefix, ext in output_data }
[docs] def complete(self): """ Task is completed when all output are present """ return ConfigTaskWithCategory.complete(self)
[docs] def setup_signal_hist(self, hist, color): """ Method to apply signal format to an histogram """ hist.hist_type = "signal" hist.legend_style = "l" hist.SetLineColor(color) hist.SetLineWidth(2)
[docs] def setup_background_hist(self, hist, color): """ Method to apply background format to an histogram """ hist.hist_type = "background" if self.stack: hist.SetLineColor(ROOT.kBlack) hist.SetLineWidth(1) hist.SetFillColor(color) hist.legend_style = "f" else: hist.SetLineColor(color) hist.legend_style = "l" hist.SetLineWidth(2)
[docs] def setup_data_hist(self, hist, color): """ Method to apply data format to an histogram """ hist.legend_style = "lp" hist.hist_type = "data" hist.SetMarkerStyle(20) hist.SetMarkerColor(color) hist.SetLineColor(color) hist.SetBinErrorOption((ROOT.TH1.kPoisson if self.stack else ROOT.TH1.kNormal))
def get_norm_systematics(self): return self.config.get_norm_systematics(self.processes_datasets, self.region)
[docs] def plot(self, feature, ifeat=0): """ Performs the actual plotting. """ # helper to extract the qcd shape in a region def get_qcd(region, files, syst='', bin_limit=0.): d_hist = files[region].Get("histograms/" + self.data_names[0]) if not d_hist: raise Exception("data histogram '{}' not found for region '{}' in tfile {}".format( self.data_names[0], region, files[region])) b_hists = [] for b_name in self.background_names: b_hist = files[region].Get("histograms/" + b_name + syst) if not b_hist: raise Exception("background histogram '{}' not found in region '{}'".format( b_name, region)) b_hists.append(b_hist) qcd_hist = d_hist.Clone(randomize("qcd_" + region + syst)) for hist in b_hists: qcd_hist.Add(hist, -1.) # removing negative bins for ibin in range(1, qcd_hist.GetNbinsX() + 1): if qcd_hist.GetBinContent(ibin) < bin_limit: qcd_hist.SetBinContent(ibin, 1.e-6) return qcd_hist # helper to extract data and bkg histos in sideband and signal regions def get_sideband(region, files, bin_limit=0.): d_hist = files[region].Get("histograms/" + self.data_names[0]).Clone( randomize("bkg_" + region)) if not d_hist: raise Exception("data histogram '{}' not found for region '{}' in tfile {}".format( self.data_names[0], region, files[region])) b_hist = None for b_name in self.background_names: if not b_hist: b_hist = files[region].Get("histograms/" + b_name).Clone( randomize("bkg_" + region)) else: b_hist.Add(files[region].Get("histograms/" + b_name)) return d_hist, b_hist def get_integral_and_error(hist): error = c_double(0.) integral = hist.IntegralAndError(0, hist.GetNbinsX() + 1, error) error = error.value compatible = True if integral <= 0 else False # compatible = True if integral - error <= 0 else False return integral, error, compatible def get_ratio(num, den): if den == 0: return 0 return num / den # I think these are for the SIGNAL REGION ONLY background_hists = self.histos["background"] signal_hists = self.histos["signal"] data_hists = self.histos["data"] all_hists = self.histos["all"] if self.plot_systematics: bkg_histo_syst = self.histos["bkg_histo_syst"] binning_args, y_axis_adendum = self.get_binning(feature, ifeat) x_title = (str(feature.get_aux("x_title")) + (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else "")) y_title = ("Events" if self.stack else "Normalized Events") + y_axis_adendum hist_title = "; %s; %s" % (x_title, y_title) # qcd shape files qcd_shape_files = None if self.do_qcd: qcd_shape_files = {} for key, region in self.qcd_regions.items(): if self.qcd_category_name != "default": my_feature = (feature.name if "shape" in key or key == "os_inviso" else self.qcd_feature) else: my_feature = feature.name qcd_shape_files[key] = ROOT.TFile.Open(self.input()["qcd"][key]["root"].targets[my_feature].path) # do the qcd extrapolation if "shape" in qcd_shape_files: qcd_hist = get_qcd("shape", qcd_shape_files).Clone(randomize("qcd")) n_qcd_hist, n_qcd_hist_error, n_qcd_hist_compatible = get_integral_and_error(qcd_hist) if not n_qcd_hist_compatible: qcd_hist.Scale(1. / n_qcd_hist) else: #sym shape qcd_hist = get_qcd("shape1", qcd_shape_files).Clone(randomize("qcd")) qcd_hist2 = get_qcd("shape2", qcd_shape_files).Clone(randomize("qcd")) n_qcd_hist1, n_qcd_hist1_error, n_qcd_hist1_compatible = get_integral_and_error(qcd_hist) n_qcd_hist2, n_qcd_hist2_error, n_qcd_hist2_compatible = get_integral_and_error(qcd_hist2) qcd_hist.Scale(1. / n_qcd_hist1) qcd_hist2.Scale(1. / n_qcd_hist2) qcd_hist.Add(qcd_hist2) qcd_hist.Scale(0.5) n_os_inviso, n_os_inviso_error, n_os_inviso_compatible = get_integral_and_error( get_qcd("os_inviso", qcd_shape_files, bin_limit=-999)) # C n_ss_iso, n_ss_iso_error, n_ss_iso_compatible = get_integral_and_error( get_qcd("ss_iso", qcd_shape_files, bin_limit=-999)) # B n_ss_inviso, n_ss_inviso_error, n_ss_inviso_compatible = get_integral_and_error( get_qcd("ss_inviso", qcd_shape_files, bin_limit=-999)) # D # if not n_ss_iso or not n_ss_inviso: if n_os_inviso_compatible or n_ss_iso_compatible or n_ss_inviso_compatible: print("****WARNING: QCD normalization failed (negative yield), Removing QCD!") qcd_scaling = 0. # qcd_inviso = 0. # qcd_inviso_error = 0. qcd_hist.Scale(qcd_scaling) else: # qcd_inviso = n_os_inviso / n_ss_inviso # C/D # qcd_inviso_error = qcd_inviso * math.sqrt( # (n_os_inviso_error / n_os_inviso) * (n_os_inviso_error / n_os_inviso) # + (n_ss_inviso_error / n_ss_inviso) * (n_ss_inviso_error / n_ss_inviso) # # + 4 * (n_ss_inviso_error / n_ss_inviso) * (n_ss_inviso_error / n_ss_inviso) # ) qcd_scaling = n_os_inviso * n_ss_iso / n_ss_inviso # C*B/D os_inviso_rel_error = n_os_inviso_error / n_os_inviso ss_iso_rel_error = n_ss_iso_error / n_ss_iso ss_inviso_rel_error = n_ss_inviso_error / n_ss_inviso new_errors_sq = [] for ib in range(1, qcd_hist.GetNbinsX() + 1): if qcd_hist.GetBinContent(ib) > 0: bin_rel_error = qcd_hist.GetBinError(ib) / qcd_hist.GetBinContent(ib) else: bin_rel_error = 0 new_errors_sq.append(bin_rel_error * bin_rel_error + os_inviso_rel_error * os_inviso_rel_error + ss_iso_rel_error * ss_iso_rel_error + ss_inviso_rel_error * ss_inviso_rel_error) qcd_hist.Scale(qcd_scaling) # fix errors for ib in range(1, qcd_hist.GetNbinsX() + 1): qcd_hist.SetBinError(ib, qcd_hist.GetBinContent(ib) * math.sqrt(new_errors_sq[ib - 1])) # store and style yield_error = c_double(0.) qcd_hist.cmt_yield = qcd_hist.IntegralAndError( 0, qcd_hist.GetNbinsX() + 1, yield_error) qcd_hist.cmt_yield_error = yield_error.value qcd_hist.cmt_bin_yield = [] qcd_hist.cmt_bin_yield_error = [] for ibin in range(1, qcd_hist.GetNbinsX() + 1): qcd_hist.cmt_bin_yield.append(qcd_hist.GetBinContent(ibin)) qcd_hist.cmt_bin_yield_error.append(qcd_hist.GetBinError(ibin)) qcd_hist.cmt_scale = 1. qcd_hist.cmt_process_name = "qcd" qcd_hist.process_label = "QCD" qcd_hist.SetTitle("QCD") qcd_c = tuple([255, 87, 215]) qcd_color = ROOT.TColor.GetColor(*qcd_c) self.setup_background_hist(qcd_hist, qcd_color) background_hists.append(qcd_hist) all_hists.append(qcd_hist) if self.propagate_syst_qcd: print("****INFO: Propagating shape uncertainties to QCD") for syst_dir in self.histos["shape"].keys(): syst_dir_name = "_{}".format(syst_dir) # do the qcd extrapolation if "shape" in qcd_shape_files: qcd_hist = get_qcd("shape", qcd_shape_files, syst=syst_dir_name).Clone(randomize("qcd")) n_qcd_hist, n_qcd_hist_error, n_qcd_hist_compatible = get_integral_and_error(qcd_hist) if not n_qcd_hist_compatible: qcd_hist.Scale(1. / n_qcd_hist) else: #sym shape qcd_hist = get_qcd("shape1", qcd_shape_files, syst=syst_dir_name).Clone(randomize("qcd")) qcd_hist2 = get_qcd("shape2", qcd_shape_files, syst=syst_dir_name).Clone(randomize("qcd")) n_qcd_hist1, n_qcd_hist1_error, n_qcd_hist1_compatible = get_integral_and_error(qcd_hist) n_qcd_hist2, n_qcd_hist2_error, n_qcd_hist2_compatible = get_integral_and_error(qcd_hist2) qcd_hist.Scale(1. / n_qcd_hist1) qcd_hist2.Scale(1. / n_qcd_hist2) qcd_hist.Add(qcd_hist2) qcd_hist.Scale(0.5) n_os_inviso, n_os_inviso_error, n_os_inviso_compatible = get_integral_and_error( get_qcd("os_inviso", qcd_shape_files, syst=syst_dir_name, bin_limit=-999)) # C n_ss_iso, n_ss_iso_error, n_ss_iso_compatible = get_integral_and_error( get_qcd("ss_iso", qcd_shape_files, syst=syst_dir_name, bin_limit=-999)) # B n_ss_inviso, n_ss_inviso_error, n_ss_inviso_compatible = get_integral_and_error( get_qcd("ss_inviso", qcd_shape_files, syst=syst_dir_name, bin_limit=-999)) # D # if not n_ss_iso or not n_ss_inviso: if n_os_inviso_compatible or n_ss_iso_compatible or n_ss_inviso_compatible: print("****WARNING: QCD normalization failed (negative yield), Removing QCD!") qcd_scaling = 0. qcd_hist.Scale(qcd_scaling) else: qcd_scaling = n_os_inviso * n_ss_iso / n_ss_inviso # C*B/D os_inviso_rel_error = n_os_inviso_error / n_os_inviso ss_iso_rel_error = n_ss_iso_error / n_ss_iso ss_inviso_rel_error = n_ss_inviso_error / n_ss_inviso new_errors_sq = [] for ib in range(1, qcd_hist.GetNbinsX() + 1): if qcd_hist.GetBinContent(ib) > 0: bin_rel_error = qcd_hist.GetBinError(ib) / qcd_hist.GetBinContent(ib) else: bin_rel_error = 0 new_errors_sq.append(bin_rel_error * bin_rel_error + os_inviso_rel_error * os_inviso_rel_error + ss_iso_rel_error * ss_iso_rel_error + ss_inviso_rel_error * ss_inviso_rel_error) qcd_hist.Scale(qcd_scaling) # fix errors for ib in range(1, qcd_hist.GetNbinsX() + 1): qcd_hist.SetBinError(ib, qcd_hist.GetBinContent(ib) * math.sqrt(new_errors_sq[ib - 1])) qcd_hist.cmt_process_name = "qcd" self.histos["shape"][syst_dir].append(qcd_hist) # sideband files sideband_files = None if self.do_sideband: sideband_files = {} for key, region in self.sideband_regions.items(): my_feature = feature.name sideband_files[key] = ROOT.TFile.Open( self.input()["sideband"][key]["root"].targets[my_feature].path) # do the qcd extrapolation data_hist, bkg_hist_background_region = get_sideband("background", sideband_files) _, bkg_hist_signal_region = get_sideband("signal", sideband_files) n_bkg_background, n_bkg_background_error, _ = get_integral_and_error( bkg_hist_background_region) n_bkg_signal, n_bkg_signal_error, _ = get_integral_and_error( bkg_hist_signal_region) bkg_hist = data_hist.Clone(randomize("bkg")) n_bkg_background_rel_error = get_ratio(n_bkg_background_error, n_bkg_background) n_bkg_signal_rel_error = get_ratio(n_bkg_signal_error, n_bkg_signal) new_errors_sq = [] for ib in range(1, bkg_hist.GetNbinsX() + 1): if bkg_hist.GetBinContent(ib) > 0: bin_rel_error = bkg_hist.GetBinError(ib) / bkg_hist.GetBinContent(ib) else: bin_rel_error = 0 new_errors_sq.append(bin_rel_error * bin_rel_error + n_bkg_background_rel_error * n_bkg_background_rel_error + n_bkg_signal_rel_error * n_bkg_signal_rel_error) bkg_hist.Scale(n_bkg_signal / n_bkg_background) # fix errors for ib in range(1, bkg_hist.GetNbinsX() + 1): bkg_hist.SetBinError(ib, bkg_hist.GetBinContent(ib) * math.sqrt(new_errors_sq[ib - 1])) # store and style yield_error = c_double(0.) bkg_hist.cmt_yield = bkg_hist.IntegralAndError( 0, bkg_hist.GetNbinsX() + 1, yield_error) bkg_hist.cmt_yield_error = yield_error.value bkg_hist.cmt_bin_yield = [] bkg_hist.cmt_bin_yield_error = [] for ibin in range(1, bkg_hist.GetNbinsX() + 1): bkg_hist.cmt_bin_yield.append(bkg_hist.GetBinContent(ibin)) bkg_hist.cmt_bin_yield_error.append(bkg_hist.GetBinError(ibin)) bkg_hist.cmt_scale = 1. bkg_hist.cmt_process_name = "background" bkg_hist.process_label = "Background" bkg_hist.SetTitle("Background") try: bkg_color = ROOT.TColor.GetColor(*self.config.processes.get("background").color) except: # background process not defined in config bkg_color = ROOT.kRed self.setup_background_hist(bkg_hist, bkg_color) background_hists = [bkg_hist] all_hists.append(bkg_hist) if not self.hide_data: all_hists += data_hists bkg_histo = None data_histo = None if not self.stack: for hist in all_hists: scale = 1. / (hist.Integral() or 1.) hist.Scale(scale) hist.scale = scale draw_hists = all_hists[::-1] else: background_stack = ROOT.THStack(randomize("stack"), "") for hist in background_hists[::-1]: # hist.SetFillColor(ROOT.kRed) background_stack.Add(hist) if not bkg_histo: bkg_histo = hist.Clone() else: bkg_histo.Add(hist.Clone()) background_stack.hist_type = "background" # Normalize signal histograms to background sum if self.normalize_signals and bkg_histo: for hist in signal_hists: signal_yield = hist.cmt_yield scale = (bkg_histo.Integral() / signal_yield if signal_yield != 0 else 1.) hist.Scale(scale) hist.cmt_scale = scale draw_hists = [background_stack] + signal_hists[::-1] if not self.hide_data: # blinding blinded_range = feature.get_aux("blinded_range", None) if blinded_range and self.blinded: for hist in data_hists: for ib in range(1, hist.GetNbinsX() + 1): blind = False if isinstance(blinded_range[0], list): for iblinded_range in blinded_range: if (hist.GetBinCenter(ib) >= iblinded_range[0] and hist.GetBinCenter(ib) <= iblinded_range[1]): blind = True break else: if (hist.GetBinCenter(ib) >= blinded_range[0] and hist.GetBinCenter(ib) <= blinded_range[1]): blind = True if blind: hist.SetBinContent(ib, -999) hist.SetBinError(ib, 0) draw_hists.extend(data_hists[::-1]) for hist in data_hists: if not data_histo: data_histo = hist.Clone() else: data_histo.Add(hist.Clone()) entries = [(hist, hist.process_label, hist.legend_style) for hist in all_hists] n_entries = len(entries) if n_entries <= 4: n_cols = 1 elif n_entries <= 8: n_cols = 2 else: n_cols = 3 col_width = 0.125 n_rows = math.ceil(n_entries / float(n_cols)) row_width = 0.06 legend_x2 = 0.88 legend_x1 = legend_x2 - n_cols * col_width legend_y2 = 0.88 legend_y1 = legend_y2 - n_rows * row_width legend = ROOT.TLegend(legend_x1, legend_y1, legend_x2, legend_y2) legend.SetBorderSize(0) legend.SetNColumns(n_cols) for entry in entries: legend.AddEntry(*entry) dummy_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args) # Draw if self.hide_data or len(data_hists) == 0 or len(background_hists) == 0 or not self.stack: do_ratio = False c = Canvas() if self.log_y: c.SetLogy() label_scaling = 1 else: do_ratio = True c = RatioCanvas() dummy_hist.GetXaxis().SetLabelOffset(100) dummy_hist.GetXaxis().SetTitleOffset(100) c.get_pad(1).cd() if self.log_y: c.get_pad(1).SetLogy() label_scaling = 5./4. # r.setup_hist(dummy_hist, pad=c.get_pad(1)) r.setup_hist(dummy_hist) if do_ratio: r.setup_y_axis(dummy_hist.GetYaxis(), pad=c.get_pad(1)) dummy_hist.GetYaxis().SetMaxDigits(4) dummy_hist.GetYaxis().SetTitleOffset(1.22) maximum = max([hist.GetMaximum() for hist in draw_hists]) if self.log_y: dummy_hist.SetMaximum(100 * maximum) dummy_hist.SetMinimum(0.0011) else: dummy_hist.SetMaximum(1.35 * maximum) dummy_hist.SetMinimum(0.001) inner_text=[self.category.label + " category"] if self.region: if isinstance(self.region.label, list): inner_text += self.region.label else: inner_text.append(self.region.label) if self.normalize_signals and self.stack and signal_hists and bkg_histo: scale_text = [] for hist in signal_hists: scale = hist.cmt_scale if scale != 1.: if scale < 100: scale = "{:.1f}".format(scale) elif scale < 10000: scale = "{}".format(int(round(scale))) else: scale = "{:.2e}".format(scale).replace("+0", "").replace("+", "") scale_text.append("{} x{}".format(hist.process_label, scale)) inner_text.append("#scale[0.75]{{{}}}".format(", ".join(scale_text))) if maximum > 1e4 and not self.log_y: upper_left_offset = 0.05 else: upper_left_offset = 0.0 if not (self.hide_data or not len(data_hists) > 0) or self.stack: upper_right="{}, {} TeV ({:.1f} ".format( self.config.year, self.config.ecm, self.config.lumi_fb ) + "fb^{-1})" else: upper_right="{} Simulation ({} TeV)".format( self.config.year, self.config.ecm, ) m = max([hist.GetMaximum() for hist in draw_hists]) if not self.log_y else None draw_labels = get_labels( upper_left_offset=upper_left_offset, upper_right=upper_right, scaling=label_scaling, inner_text=inner_text, max=m ) dummy_hist.Draw() # Draw fit if required if self.include_fit: with open(self.input()["fit"][feature.name]["json"].path) as f: d = json.load(f) d = d[""] x_range = self.requires()["fit"].x_range x = ROOT.RooRealVar("x", "x", float(x_range[0]), float(x_range[1])) l = ROOT.RooArgList(x) xframe = x.frame(); if self.requires()["fit"].method == "voigtian": mean = ROOT.RooRealVar('mean', 'Mean of Voigtian', float(d["mean"])) sigma = ROOT.RooRealVar('sigma', 'Sigma of Voigtian', float(d["sigma"])) gamma = ROOT.RooRealVar('gamma', 'Gamma of Voigtian', float(d["gamma"])) fit = ROOT.RooVoigtian("fit", "fit", x, mean, gamma, sigma) elif self.requires()["fit"].method == "polynomial": order = int(self.requires()["fit"].fit_parameters.get("order", 1)) params = [ROOT.RooRealVar(f'p{i}', f'p{i}', float(d[f'p{i}'])) for i in range(order)] fit = ROOT.RooPolynomial("fit", "fit", x, ROOT.RooArgList(*params)) elif self.requires()["fit"].method == "exponential": p = ROOT.RooRealVar("c", "c", float(d["c"])) fit = ROOT.RooExponential("fit", "fit", x, p) elif self.requires()["fit"].method == "powerlaw": order = int(self.requires()["fit"].fit_parameters.get("order", 1)) params = [x] for i in range(order): params.append(ROOT.RooRealVar(f'a{i}', f'a{i}', d[f"a{i}"])) params.append(ROOT.RooRealVar(f'b{i}', f'b{i}', d[f"b{i}"])) fun = " + ".join([f"@{i + 1} * TMath::Power(@0, @{i + 2})" for i in range(0, order, 2)]) fit = ROOT.RooGenericPdf("powerlaw", fun, ROOT.RooArgList(*params)) if self.stack: process_name = self.requires()["fit"].process_name for hist in all_hists: if hist.cmt_process_name == process_name: data = ROOT.RooDataHist("data_obs", "data_obs", l, hist) data.plotOn(xframe, ROOT.RooFit.MarkerColor(ROOT.TColor.GetColorTransparent(0, 1)), ROOT.RooFit.LineColor(ROOT.TColor.GetColorTransparent(0, 1))) fit.plotOn(xframe) xframe.Draw("same"); for ih, hist in enumerate(draw_hists): option = "HIST,SAME" if hist.hist_type != "data" else "PEZ,SAME" hist.Draw(option) for label in draw_labels: label.Draw("same") legend.Draw("same") if not (self.hide_data or len(data_hists) == 0 or len(background_hists) == 0 or not self.stack): dummy_ratio_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args) r.setup_hist(dummy_ratio_hist, pad=c.get_pad(2), props={"Minimum": 0.25, "Maximum": 1.75}) r.setup_y_axis(dummy_ratio_hist.GetYaxis(), pad=c.get_pad(2), props={"Ndivisions": 3}) dummy_ratio_hist.GetYaxis().SetTitle("Data / MC") dummy_ratio_hist.GetXaxis().SetTitleOffset(3) dummy_ratio_hist.GetYaxis().SetTitleOffset(1.22) data_graph = hist_to_graph(data_histo, remove_zeros=False, errors=True, asymm=True, overflow=False, underflow=False, attrs=["cmt_process_name", "cmt_hist_type", "cmt_legend_style"]) bkg_graph = hist_to_graph(bkg_histo, remove_zeros=False, errors=True, asymm=True, overflow=False, underflow=False, attrs=["cmt_process_name", "cmt_hist_type", "cmt_legend_style"]) ratio_graph = ROOT.TGraphAsymmErrors(binning_args[0]) mc_unc_graph = ROOT.TGraphErrors(binning_args[0]) r.setup_graph(ratio_graph, props={"MarkerStyle": 20, "MarkerSize": 0.5}) r.setup_graph(mc_unc_graph, props={"FillStyle": 3004, "LineColor": 0, "MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kGray + 2}) if self.plot_systematics: syst_graph = hist_to_graph(bkg_histo_syst, remove_zeros=False, errors=True, asymm=True, overflow=False, underflow=False, attrs=["cmt_process_name", "cmt_hist_type", "cmt_legend_style"]) syst_unc_graph = ROOT.TGraphErrors(binning_args[0]) r.setup_graph(syst_unc_graph, props={"FillStyle": 3005, "LineColor": 0, "MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kRed + 2}) all_unc_graph = ROOT.TGraphErrors(binning_args[0]) r.setup_graph(all_unc_graph, props={"FillStyle": 3007, "LineColor": 0, "MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kBlue + 2}) for i in range(binning_args[0]): x, d, b = c_double(0.), c_double(0.), c_double(0.) data_graph.GetPoint(i, x, d) bkg_graph.GetPoint(i, x, b) d = d.value b = b.value if d == EMPTY or b == 0: ratio_graph.SetPoint(i, x, EMPTY) else: ratio_graph.SetPoint(i, x, d / b) ratio_graph.SetPointEYhigh(i, data_graph.GetErrorYhigh(i) / b) ratio_graph.SetPointEYlow(i, data_graph.GetErrorYlow(i) / b) # set the mc uncertainty if b == 0: mc_unc_graph.SetPoint(i, x, EMPTY) else: mc_unc_graph.SetPoint(i, x, 1.) mc_error = bkg_graph.GetErrorYhigh(i) / b mc_unc_graph.SetPointError(i, dummy_ratio_hist.GetBinWidth(i + 1) / 2., mc_error) if self.plot_systematics: # syst only syst_unc_graph.SetPoint(i, x, 1.) syst_error = syst_graph.GetErrorYhigh(i) / b syst_unc_graph.SetPointError(i, dummy_ratio_hist.GetBinWidth(i + 1) / 2., syst_error) # syst + stat all_unc_graph.SetPoint(i, x, 1.) tot_unc = math.sqrt(mc_error ** 2 + syst_error ** 2) all_unc_graph.SetPointError(i, dummy_ratio_hist.GetBinWidth(i + 1) / 2., tot_unc) c.get_pad(2).cd() dummy_ratio_hist.Draw() mc_unc_graph.Draw("2,SAME") if not self.hide_data: ratio_graph.Draw("PEZ,SAME") if self.plot_systematics: # syst_unc_graph.Draw("2,SAME") all_unc_graph.Draw("2,SAME") lines = [] for y in [0.5, 1.0, 1.5]: if isinstance(feature.binning, tuple): l = ROOT.TLine(binning_args[1], y, binning_args[2], y) else: l = ROOT.TLine(binning_args[1][0], y, binning_args[1][-1], y) r.setup_line(l, props={"NDC": False, "LineStyle": 3, "LineWidth": 1, "LineColor": 1}) lines.append(l) for line in lines: line.Draw("same") outputs = [] if self.save_png: outputs.append(self.output()["png"].targets[feature.name].path) if self.save_pdf: outputs.append(self.output()["pdf"].targets[feature.name].path) for output in outputs: c.SaveAs(create_file_dir(output)) if self.save_root: f = ROOT.TFile.Open(create_file_dir( self.output()["root"].targets[feature.name].path), "RECREATE") f.cd() # c.Write("canvas") #FIXME hist_dir = f.mkdir("histograms") hist_dir.cd() for hist in all_hists: hist.Write(hist.cmt_process_name) if bkg_histo: bkg_histo.Write("background") if self.store_systematics: for syst_dir, shape_hists in self.histos["shape"].items(): for hist in shape_hists: hist.Write("%s_%s" % (hist.cmt_process_name, syst_dir)) f.Close() if self.save_yields: yields = OrderedDict() for hist in signal_hists + background_hists + data_hists: yields[hist.cmt_process_name] = { "Total yield": hist.cmt_yield, "Total yield error": hist.cmt_yield_error, "Entries": getattr(hist, "cmt_entries", hist.GetEntries()), "Binned yield": hist.cmt_bin_yield, "Binned yield error": hist.cmt_bin_yield_error, } if bkg_histo: yields["background"] = { "Total yield": sum([hist.cmt_yield for hist in background_hists]), "Total yield error": math.sqrt(sum([(hist.cmt_yield_error)**2 for hist in background_hists])), "Entries": getattr(bkg_histo, "cmt_entries", bkg_histo.GetEntries()), "Binned yield": [sum([hist.cmt_bin_yield[i] for hist in background_hists]) for i in range(0, background_hists[0].GetNbinsX())], "Binned yield error": [math.sqrt(sum([(hist.cmt_bin_yield_error[i])**2 for hist in background_hists])) for i in range(0, background_hists[0].GetNbinsX())], } with open(create_file_dir(self.output()["yields"].targets[feature.name].path), "w") as f: json.dump(yields, f, indent=4)
def get_nevents(self): nevents = {} for iproc, (process, datasets) in enumerate(self.processes_datasets.items()): if not process.isData and not self.avoid_normalization: for dataset in datasets: nevents[dataset.name] = {} for elem in ["central"] + [f"{syst}_{d}" for (syst, d) in itertools.product(self.norm_syst_list, directions)]: inp = self.input()["stats"][dataset.name][elem] with open(inp.path) as f: stats = json.load(f) if self.apply_weights: nevents[dataset.name][elem] = stats["nweightedevents"] else: nevents[dataset.name][elem] = stats["nevents"] return nevents def get_normalization_factor(self, dataset, elem): return dataset.xs * self.config.lumi_pb / self.nevents[dataset.name][elem]
[docs] @law.decorator.notify @law.decorator.safe_output def run(self): """ Splits processes into data, signal and background. Creates histograms from each process loading them from the input files. Scales the histograms and applies the correct format to them. """ ROOT.gStyle.SetOptStat(0) # create root tchains for inputs inputs = self.input() self.data_names = [p.name for p in self.processes_datasets.keys() if p.isData] self.signal_names = [p.name for p in self.processes_datasets.keys() if p.isSignal] self.background_names = [p.name for p in self.processes_datasets.keys() if not p.isData and not p.isSignal] if self.plot_systematics: systematics = self.get_norm_systematics() if self.fixed_colors: colors = list(range(2, 2 + len(self.processes_datasets.keys()))) self.nevents = self.get_nevents() for ifeat, feature in enumerate(self.features): self.histos = {"background": [], "signal": [], "data": [], "all": []} binning_args, y_axis_adendum = self.get_binning(feature, ifeat) x_title = (str(feature.get_aux("x_title")) + (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else "")) y_title = ("Events" if self.stack else "Normalized Events") + y_axis_adendum hist_title = "; %s; %s" % (x_title, y_title) systs_directions = [("central", "")] if self.plot_systematics: self.histos["bkg_histo_syst"] = ROOT.TH1D( randomize("syst"), hist_title, *binning_args) if self.store_systematics: self.histos["shape"] = {} shape_systematics = self.get_systs(feature, True) systs_directions += list(itertools.product(shape_systematics, directions)) for (syst, d) in systs_directions: feature_name = feature.name if syst == "central" else "%s_%s_%s" % ( feature.name, syst, d) if syst != "central": self.histos["shape"]["%s_%s" % (syst, d)] = [] for iproc, (process, datasets) in enumerate(self.processes_datasets.items()): if syst != "central" and process.isData: continue if self.do_sideband and not process.isData and not process.isSignal: continue process_histo = ROOT.TH1D(randomize(process.name), hist_title, *binning_args) process_histo.process_label = str(process.label) process_histo.cmt_process_name = process.name process_histo.Sumw2() for dataset in datasets: dataset_histo = ROOT.TH1D(randomize("tmp"), hist_title, *binning_args) dataset_histo.Sumw2() for category in self.expand_category(): inp = inputs["data"][ (dataset.name, category.name)].collection.targets.values() for elem in inp: rootfile = ROOT.TFile.Open(elem.path) histo = copy(rootfile.Get(feature_name)) rootfile.Close() dataset_histo.Add(histo) if not process.isData and not self.avoid_normalization: elem = ("central" if syst == "central" or syst not in self.norm_syst_list else f"{syst}_{d}") if self.nevents[dataset.name][elem] != 0: dataset_histo.Scale(self.get_normalization_factor(dataset, elem)) scaling = dataset.get_aux("scaling", None) if scaling: print(" ### Scaling {} histo by {} +- {}".format( dataset.name, scaling[0], scaling[1])) old_errors = [dataset_histo.GetBinError(ibin)\ / dataset_histo.GetBinContent(ibin) if dataset_histo.GetBinContent(ibin) != 0 else 0 for ibin in range(1, dataset_histo.GetNbinsX() + 1)] new_errors = [ math.sqrt(elem ** 2 + (scaling[1] / scaling[0]) ** 2) for elem in old_errors] dataset_histo.Scale(scaling[0]) for ibin in range(1, dataset_histo.GetNbinsX() + 1): dataset_histo.SetBinError( ibin, dataset_histo.GetBinContent(ibin) * new_errors[ibin - 1]) process_histo.Add(dataset_histo) if self.plot_systematics and not process.isData and not process.isSignal \ and syst == "central": dataset_histo_syst = dataset_histo.Clone() for ibin in range(1, dataset_histo_syst.GetNbinsX() + 1): # some datasets may not have any systematics if dataset.name in systematics: dataset_histo_syst.SetBinError(ibin, float(dataset_histo.GetBinContent(ibin))\ * systematics[dataset.name] ) self.histos["bkg_histo_syst"].Add(dataset_histo_syst) if process.name in self.additional_scaling: process_histo.Scale(self.additional_scaling[process.name]) yield_error = c_double(0.) process_histo.cmt_yield = process_histo.IntegralAndError(0, process_histo.GetNbinsX() + 1, yield_error) process_histo.cmt_yield_error = yield_error.value process_histo.cmt_bin_yield = [] process_histo.cmt_bin_yield_error = [] for ibin in range(1, process_histo.GetNbinsX() + 1): process_histo.cmt_bin_yield.append(process_histo.GetBinContent(ibin)) process_histo.cmt_bin_yield_error.append(process_histo.GetBinError(ibin)) if syst == "central": if self.fixed_colors: color = colors[iproc] elif type(process.color) == tuple: color = ROOT.TColor.GetColor(*process.color) else: color = process.color if process.isSignal: self.setup_signal_hist(process_histo, color) self.histos["signal"].append(process_histo) elif process.isData: self.setup_data_hist(process_histo, color) self.histos["data"].append(process_histo) else: self.setup_background_hist(process_histo, color) self.histos["background"].append(process_histo) if not process.isData: #or not self.hide_data: self.histos["all"].append(process_histo) else: self.histos["shape"]["%s_%s" % (syst, d)].append(process_histo) self.plot(feature)
##################################################################################################### ##################################################################################################### ##################################################################################################### class FeaturePlotSyst(FeaturePlot): """ Performs the histogram plotting with up and down variations due to systematics: loads the histograms obtained in the FeaturePlot task, rescales them if needed and plots and saves them. Example command: ``law run FeaturePlotSyst --version test --category-name etau --config-name ul_2018 \ --process-group-name etau --feature-names lep1_pt,lep1_eta \ --workers 20 --PrePlot-workflow local --stack --hide-data False --do-qcd --region-name etau_os_iso\ --dataset-names tt_dl,tt_sl,dy_high,wjets,data_etau_a,data_etau_b,data_etau_c,data_etau_d \ --MergeCategorizationStats-version test_old`` """ def requires(self): """ Needs as input the root file provided by the FeaturePlot task """ return FeaturePlot.vreq(self, save_root=True, stack=True) def output(self): """ Output files to be filled: pdf or png """ output_data = [] if self.save_pdf: output_data.append(("pdf", "", "pdf")) if self.save_png: output_data.append(("png", "", "png")) process_names = [process.name for process in self.processes_datasets.keys() if not process.isData] if self.do_qcd: process_names.append("qcd") out = { key: law.SiblingFileCollection(OrderedDict( ("%s_%s_%s" %(process_name, feature.name, syst), self.local_target("{}{}_{}_{}{}.{}".format( prefix, process_name, feature.name, syst, self.get_output_postfix(key), ext))) for feature in self.features for syst in self.get_unique_systs(self.get_systs(feature, True) \ + self.config.get_weights_systematics(self.config.weights[self.category.name], True)) for process_name in process_names )) for key, prefix, ext in output_data } return out def setup_syst_hist(self, hist, dir): if dir == "central": hist.SetFillStyle(0) hist.SetLineColor(1) hist.legend_style = "l" hist.SetLineWidth(2) if dir == "up": hist.SetFillStyle(0) hist.SetLineColor(ROOT.kAzure) hist.legend_style = "l" hist.SetLineWidth(2) # hist.SetLineStyle(4) if dir == "down": hist.SetFillStyle(0) hist.SetLineColor(ROOT.kMagenta) hist.legend_style = "l" hist.SetLineWidth(2) # hist.SetLineStyle(8) def get_syst_label(self, shape_syst): if self.config.systematics.get(shape_syst).get_aux("label"): return self.config.systematics.get(shape_syst).get_aux("label") else: return shape_syst def plot(self, feature): for process, p_label in zip(self.process_names, self.process_labels): # central histogram for each process histo_syst_central = self.histos[process]["central"] self.setup_syst_hist(histo_syst_central, "central") for shape_syst in self.shape_syst_list: # up and down variation histograms for each systematic histo_syst_up = self.histos[process]["%s_up" %shape_syst] histo_syst_down = self.histos[process]["%s_down" %shape_syst] self.setup_syst_hist(histo_syst_up, "up") self.setup_syst_hist(histo_syst_down, "down") draw_hists = [histo_syst_central, histo_syst_up, histo_syst_down] shape_syst_label = self.get_syst_label(shape_syst) entries = [ (histo_syst_central, "Nominal", histo_syst_central.legend_style), (histo_syst_up, shape_syst_label + " Up", histo_syst_up.legend_style), (histo_syst_down, shape_syst_label + " Down", histo_syst_down.legend_style)] n_entries = len(entries) if n_entries <= 4: n_cols = 1 elif n_entries <= 8: n_cols = 2 else: n_cols = 3 if len(shape_syst_label) < 5: col_width = 0.20 elif len(shape_syst_label) < 12: col_width = 0.30 else: col_width = 0.40 n_rows = math.ceil(n_entries / float(n_cols)) row_width = 0.06 legend_x2 = 0.88 legend_x1 = legend_x2 - n_cols * col_width legend_y2 = 0.88 legend_y1 = legend_y2 - n_rows * row_width legend = ROOT.TLegend(legend_x1, legend_y1, legend_x2, legend_y2) legend.SetBorderSize(0) legend.SetNColumns(1) for entry in entries: legend.AddEntry(*entry) binning_args, y_axis_adendum = self.get_binning(feature) x_title = (str(feature.get_aux("x_title")) + (" [%s]" % feature.get_aux("units") if feature.get_aux("units") else "")) y_title = ("Events" if self.stack else "Normalized Events") + y_axis_adendum hist_title = "; %s; %s" % (x_title, y_title) dummy_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args) # Draw c = RatioCanvas() dummy_hist.GetXaxis().SetLabelOffset(100) dummy_hist.GetXaxis().SetTitleOffset(100) c.get_pad(1).cd() if self.log_y: c.get_pad(1).SetLogy() label_scaling = 5./4. r.setup_hist(dummy_hist) r.setup_y_axis(dummy_hist.GetYaxis(), pad=c.get_pad(1)) dummy_hist.GetYaxis().SetMaxDigits(4) dummy_hist.GetYaxis().SetTitleOffset(1.22) maximum = max([hist.GetMaximum() for hist in draw_hists]) if self.log_y: dummy_hist.SetMaximum(100 * maximum) dummy_hist.SetMinimum(0.0011) else: dummy_hist.SetMaximum(1.25 * maximum) dummy_hist.SetMinimum(0.001) inner_text=[self.category.label + " category"] if self.region: if isinstance(self.region.label, list): inner_text += self.region.label else: inner_text.append(self.region.label) inner_text.append(p_label) if maximum > 1e4 and not self.log_y: upper_left_offset = 0.05 else: upper_left_offset = 0.0 upper_right="{} Simulation ({} TeV)".format( self.config.year, self.config.ecm, ) m = max([hist.GetMaximum() for hist in draw_hists]) if not self.log_y else None draw_labels = get_labels( upper_left_offset=upper_left_offset, upper_right=upper_right, scaling=label_scaling, inner_text=inner_text, max=m ) dummy_hist.Draw() histo_syst_central.Draw("HIST,SAME") histo_syst_up.Draw("HIST,SAME") histo_syst_down.Draw("HIST,SAME") for label in draw_labels: label.Draw("same") legend.Draw("same") dummy_ratio_hist = ROOT.TH1F(randomize("dummy"), hist_title, *binning_args) r.setup_hist(dummy_ratio_hist, pad=c.get_pad(2), props={"Minimum": 0.75, "Maximum": 1.25}) r.setup_y_axis(dummy_ratio_hist.GetYaxis(), pad=c.get_pad(2), props={"Ndivisions": 5}) dummy_ratio_hist.GetYaxis().SetTitle("Ratio") dummy_ratio_hist.GetXaxis().SetTitleOffset(3) dummy_ratio_hist.GetYaxis().SetTitleOffset(1.22) ratio_hist_up = ROOT.TH1F(randomize("ratio_hist_up"), hist_title, *binning_args) ratio_hist_down = ROOT.TH1F(randomize("ratio_hist_down"), hist_title, *binning_args) self.setup_syst_hist(ratio_hist_up, "up") self.setup_syst_hist(ratio_hist_down, "down") ratio_hist_up.SetLineStyle(0) ratio_hist_down.SetLineStyle(0) mc_unc_graph = ROOT.TGraphErrors(binning_args[0]) r.setup_graph(mc_unc_graph, props={"FillStyle": 3004, "LineColor": 0, "MarkerColor": 0, "MarkerSize": 0., "FillColor": ROOT.kGray + 2}) for i in range(binning_args[0]+1): x = histo_syst_central.GetBinCenter(i) y = histo_syst_central.GetBinContent(i) sigma_x = histo_syst_central.GetBinWidth(i)/2. sigma_y = histo_syst_central.GetBinError(i) if histo_syst_central.GetBinContent(i) != 0: ratio_hist_up.SetBinContent(i, histo_syst_up.GetBinContent(i)/histo_syst_central.GetBinContent(i)) ratio_hist_down.SetBinContent(i, histo_syst_down.GetBinContent(i)/histo_syst_central.GetBinContent(i)) mc_unc_graph.SetPoint(i, x, 1.) mc_unc_graph.SetPointError(i, sigma_x, sigma_y/y) else: ratio_hist_up.SetBinContent(i, EMPTY) ratio_hist_down.SetBinContent(i, EMPTY) mc_unc_graph.SetPoint(i, x, EMPTY) c.get_pad(2).cd() dummy_ratio_hist.Draw() ratio_hist_up.Draw("SAME") ratio_hist_down.Draw("SAME") mc_unc_graph.Draw("2,SAME") lines = [] for y in [0.5, 1.0, 1.5]: if isinstance(feature.binning, tuple): l = ROOT.TLine(binning_args[1], y, binning_args[2], y) else: l = ROOT.TLine(binning_args[1][0], y, binning_args[1][-1], y) r.setup_line(l, props={"NDC": False, "LineStyle": 3, "LineWidth": 1, "LineColor": 1}) lines.append(l) for line in lines: line.Draw("same") outputs = [] if self.save_png: outputs.append(self.output()["png"].targets["%s_%s_%s" %(process, feature.name, shape_syst)].path) if self.save_pdf: outputs.append(self.output()["pdf"].targets["%s_%s_%s" %(process, feature.name, shape_syst)].path) for output in outputs: c.SaveAs(create_file_dir(output)) def run(self): """ Splits the processes into data and non-data. Per feature, loads the input histograms, creates the output plots with up and down variations. """ ROOT.gStyle.SetOptStat(0) inputs = self.input() self.process_names = [process.name for process in self.processes_datasets.keys() if not process.isData] self.process_labels = [process.label for process in self.processes_datasets.keys() if not process.isData] if self.do_qcd: self.process_names.append("qcd") self.process_labels.append("QCD") for feature in self.features: self.shape_syst_list = self.get_unique_systs(self.get_systs(feature, True) \ + self.config.get_weights_systematics(self.config.weights[self.category.name], True)) tf = ROOT.TFile.Open(inputs["root"].targets[feature.name].path) self.histos = {} for process in self.process_names: self.histos[process] = {} self.histos[process]["central"] = copy(tf.Get("histograms/%s" % process)) for shape_syst in self.shape_syst_list: self.histos[process]["%s_up" %shape_syst] = copy(tf.Get("histograms/%s_%s_up" % (process, shape_syst))) self.histos[process]["%s_down" %shape_syst] = copy(tf.Get("histograms/%s_%s_down" % (process, shape_syst))) tf.Close() self.plot(feature) ################################################################################################################################# ################################################################################################################################# ################################################################################################################################# class BasePlot2DTask(BasePlotTask): def get_features(self): features = [] for feature_pair_name in self.feature_names: try: feature_pair_names = feature_pair_name.split(":") except: print("%s cannot be considered for a 2D plot" % feature_pair_name) continue feature_pair = [] for feature_name in feature_pair_names: for feature in self.config.features: if feature.name == feature_name: feature_pair.append(feature) break if len(feature_pair) == 2: features.append(tuple(feature_pair)) if len(features) == 0: raise ValueError("No features were included. Did you spell them correctly?") return features def get_binning(self, feature): binning_args = [] for feature_elem in feature: if isinstance(feature_elem.binning, tuple): binning_args += feature_elem.binning else: binning_args += [len(feature_elem.binning) - 1, array.array("f", feature_elem.binning)] return tuple(binning_args), "" class PrePlot2D(PrePlot, BasePlot2DTask): def get_syst_list(self): if self.skip_processing: return [] syst_list = [] isMC = self.dataset.process.isMC for feature_pair in self.features: for feature in feature_pair: systs = self.get_unique_systs(self.get_systs(feature, isMC)) for syst in systs: if syst in syst_list: continue try: systematic = self.config.systematics.get(syst) if self.category.name in systematic.get_aux("affected_categories", []): syst_list.append(syst) except: continue return syst_list def define_histograms(self, dfs, nentries): histos = [] isMC = self.dataset.process.isMC for feature_pair in self.features: binning_args, _ = self.get_binning(feature_pair) x_title = (str(feature_pair[0].get_aux("x_title")) + (" [%s]" % feature_pair[0].get_aux("units") if feature_pair[0].get_aux("units") else "")) y_title = (str(feature_pair[1].get_aux("x_title")) + (" [%s]" % feature_pair[1].get_aux("units") if feature_pair[1].get_aux("units") else "")) title = "; %s; %s; Events" % (x_title, y_title) systs = self.get_unique_systs(self.get_systs(feature_pair[0], isMC) + self.get_systs(feature_pair[1], isMC) \ + self.config.get_weights_systematics(self.config.weights[self.category.name], isMC) ) systs_directions = [("central", "")] if isMC and self.store_systematics: systs_directions += list(itertools.product(systs, directions)) # loop over systematics and up/down variations for syst_name, direction in systs_directions: # Select the appropriate RDF (input file) depending on the syst and direction key = "central" if f"{syst_name}_{direction}" in dfs: key = f"{syst_name}_{direction}" df = dfs[key] # apply selection if needed feat_df = df for ifeat, feature in enumerate(feature_pair): if feature.selection: feat_df = feat_df.Define( "feat%s_selection" % ifeat, self.config.get_object_expression( feature.selection, isMC, syst_name, direction)).Filter( "feat%s_selection" % ifeat) # define tag just for histograms's name if syst_name != "central" and isMC: tag = "_%s" % syst_name if direction != "": tag += "_%s" % direction else: tag = "" feature_expressions = [ self.config.get_object_expression( feature_pair[0], isMC, syst_name, direction), self.config.get_object_expression( feature_pair[1], isMC, syst_name, direction), ] feature_name = "_".join([feature.name for feature in feature_pair]) + tag hist_base = ROOT.TH2D(feature_name, title, *binning_args) if nentries[key] > 0: hmodel = ROOT.RDF.TH2DModel(hist_base) histos.append( feat_df.Define( "weight", "{}".format(self.get_weight( self.category.name, syst_name, direction)) ).Define("var1", feature_expressions[0]).Define("var2", feature_expressions[1] ).Histo2D(hmodel, "var1", "var2", "weight") ) else: # no entries available, append empty histogram histos.append(hist_base) return histos class FeaturePlot2D(FeaturePlot, BasePlot2DTask): """ Performs the actual histogram plotting: loads the histograms obtained in the PrePlot2D tasks, rescales them if needed and plots and saves them. Example command: ``law run FeaturePlot2D --version test --category-name etau --config-name ul_2018 \ --process-group-name etau --feature-names lep1_pt:lep1_eta,Hbb_mass:Htt_svfit_mass \ --workers 20 --PrePlot2D-workflow local --stack --hide-data False --do-qcd --region-name etau_os_iso\ --dataset-names tt_dl,tt_sl,dy_high,wjets,data_etau_a,data_etau_b,data_etau_c,data_etau_d \ --MergeCategorizationStats-version test_old`` :param log_z: whether to set y axis to log scale :type log_z: bool """ log_z = luigi.BoolParameter(default=False, description="set logarithmic scale for Z axis, " "default: False") def requires(self): """ All requirements needed: * Histograms coming from the PrePlot2D task. * Number of total events coming from the MergeCategorizationStats task \ (to normalize MC histograms). * If estimating QCD, FeaturePlot2D for the three additional QCD regions needed. """ reqs = {} reqs["data"] = OrderedDict( ((dataset.name, category.name), PrePlot2D.vreq(self, dataset_name=dataset.name, category_name=self.get_data_category(category).name)) for dataset, category in itertools.product( self.datasets_to_run, self.expand_category()) ) if not self.avoid_normalization: reqs["stats"] = OrderedDict() for dataset in self.datasets_to_run: if dataset.process.isData: continue reqs["stats"][dataset.name] = {} for elem in ["central"] + [f"{syst}_{d}" for (syst, d) in itertools.product(self.norm_syst_list, directions)]: syst = "" d = "" if "_" in elem: syst = elem.split("_")[0] d = elem.split("_")[1] if dataset.get_aux("secondary_dataset", None): reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self, dataset_name=dataset.get_aux("secondary_dataset"), systematic=syst, systematic_direction=d) else: reqs["stats"][dataset.name][elem] = MergeCategorizationStats.vreq(self, dataset_name=dataset.name, systematic=syst, systematic_direction=d) if self.do_qcd: reqs["qcd"] = { key: self.req(self, region_name=region.name, blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False, remove_horns=False,_exclude=["feature_tags", "shape_region", "qcd_category_name", "qcd_sym_shape", "qcd_signal_region_wp"]) for key, region in self.qcd_regions.items() } if self.qcd_category_name != "default": reqs["qcd"]["ss_iso"] = FeaturePlot2D.vreq(self, region_name=self.qcd_regions.ss_iso.name, category_name=self.qcd_category_name, blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,), bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags", "shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version", "qcd_signal_region_wp"]) # reqs["qcd"]["os_inviso"] = FeaturePlot2D.vreq(self, # region_name=self.qcd_regions.os_inviso.name, category_name=self.qcd_category_name, # blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, # save_pdf=True, save_yields=True, feature_names=(self.qcd_feature,), # remove_horns=self.remove_horns, _exclude=["feature_tags", "bin_opt_version"]) reqs["qcd"]["ss_inviso"] = FeaturePlot2D.vreq(self, region_name=self.qcd_regions.ss_inviso.name, category_name=self.qcd_category_name, blinded=False, hide_data=False, do_qcd=False, stack=True, save_root=True, save_pdf=True, save_yields=False, feature_names=(self.qcd_feature,), bin_opt_version=law.NO_STR, remove_horns=self.remove_horns, _exclude=["feature_tags", "shape_region", "qcd_category_name", "qcd_sym_shape", "bin_opt_version", "qcd_signal_region_wp"]) return reqs def output(self): """ Output files to be filled: pdf, png, root or json """ def get_feature_name(feature_pair): return "_".join([feature.name for feature in feature_pair]) # output definitions, i.e. key, file prefix, extension output_data = [] if self.save_root: output_data.append(("root", "root/", "root")) if self.save_yields: output_data.append(("yields", "yields/", "json")) out = { key: law.SiblingFileCollection(OrderedDict( (get_feature_name(feature_pair), self.local_target("{}{}{}.{}".format( prefix, get_feature_name(feature_pair), self.get_output_postfix(key), ext))) for feature_pair in self.features )) for key, prefix, ext in output_data } output_data = [] if self.save_pdf: output_data.append(("pdf", "", "pdf")) if self.save_png: output_data.append(("png", "", "png")) out.update({ key: law.SiblingFileCollection(OrderedDict( (get_feature_name(feature_pair), OrderedDict( (process.name, self.local_target("{}{}{}_{}.{}".format( prefix, get_feature_name(feature_pair), self.get_output_postfix(key), process.name, ext))) for process in self.processes_datasets if not process.isData or not self.hide_data )) for feature_pair in self.features )) for key, prefix, ext in output_data }) return out def plot(self, feature): """ Performs the actual plotting. """ # helper to extract the qcd shape in a region def get_qcd(region, files, bin_limit=0.): d_hist = files[region].Get("histograms/" + self.data_names[0]) if not d_hist: raise Exception("data histogram '{}' not found for region '{}' in tfile {}".format( self.data_names[0], region, files[region])) b_hists = [] for b_name in self.background_names: b_hist = files[region].Get("histograms/" + b_name) if not b_hist: raise Exception("background histogram '{}' not found in region '{}'".format( b_name, region)) b_hists.append(b_hist) qcd_hist = d_hist.Clone(randomize("qcd_" + region)) for hist in b_hists: qcd_hist.Add(hist, -1.) # removing negative bins for ibinx, ibiny in itertools.product( range(1, qcd_hist.GetNbinsX() + 1), range(1, qcd_hist.GetNbinsY() + 1)): if qcd_hist.GetBinContent(ibinx, ibiny) < bin_limit: qcd_hist.SetBinContent(ibinx, ibiny, 1.e-6) return qcd_hist def get_integral_and_error(hist): error = c_double(0.) integral = hist.IntegralAndError( 0, hist.GetNbinsX() + 1, 0, hist.GetNbinsY() + 1, error) error = error.value compatible = True if integral <= 0 else False # compatible = True if integral - error <= 0 else False return integral, error, compatible background_hists = self.histos["background"] signal_hists = self.histos["signal"] data_hists = self.histos["data"] all_hists = self.histos["all"] if self.plot_systematics: bkg_histo_syst = self.histos["bkg_histo_syst"] binning_args, _ = self.get_binning(feature) x_title = (str(feature[0].get_aux("x_title")) + (" [%s]" % feature[0].get_aux("units") if feature[0].get_aux("units") else "")) y_title = (str(feature[1].get_aux("x_title")) + (" [%s]" % feature[1].get_aux("units") if feature[1].get_aux("units") else "")) z_title = ("Events" if self.stack else "Normalized Events") hist_title = "; %s; %s; %s" % (x_title, y_title, z_title) feature_pair_name = "_".join([elem.name for elem in feature]) # qcd shape files qcd_shape_files = None if self.do_qcd: qcd_shape_files = {} for key, region in self.qcd_regions.items(): if self.qcd_category_name != "default": my_feature = (feature_pair_name if "shape" in key or key == "os_inviso" else self.qcd_feature) else: my_feature = feature_pair_name qcd_shape_files[key] = ROOT.TFile.Open(self.input()["qcd"][key]["root"].targets[my_feature].path) # do the qcd extrapolation if "shape" in qcd_shape_files: qcd_hist = get_qcd("shape", qcd_shape_files).Clone(randomize("qcd")) qcd_hist.Scale(1. / qcd_hist.Integral()) else: #sym shape qcd_hist = get_qcd("shape1", qcd_shape_files).Clone(randomize("qcd")) qcd_hist2 = get_qcd("shape2", qcd_shape_files).Clone(randomize("qcd")) qcd_hist.Scale(1. / qcd_hist.Integral()) qcd_hist2.Scale(1. / qcd_hist2.Integral()) qcd_hist.Add(qcd_hist2) qcd_hist.Scale(0.5) n_os_inviso, n_os_inviso_error, n_os_inviso_compatible = get_integral_and_error( get_qcd("os_inviso", qcd_shape_files, -999)) n_ss_iso, n_ss_iso_error, n_ss_iso_compatible = get_integral_and_error( get_qcd("ss_iso", qcd_shape_files, -999)) n_ss_inviso, n_ss_inviso_error, n_ss_inviso_compatible = get_integral_and_error( get_qcd("ss_inviso", qcd_shape_files, -999)) # if not n_ss_iso or not n_ss_inviso: if n_os_inviso_compatible or n_ss_iso_compatible or n_ss_inviso_compatible: print("****WARNING: QCD normalization failed (negative yield), Removing QCD!") qcd_scaling = 0. qcd_inviso = 0. qcd_inviso_error = 0. qcd_hist.Scale(qcd_scaling) else: qcd_inviso = n_os_inviso / n_ss_inviso qcd_inviso_error = qcd_inviso * math.sqrt( (n_os_inviso_error / n_os_inviso) * (n_os_inviso_error / n_os_inviso) + (n_ss_inviso_error / n_ss_inviso) * (n_ss_inviso_error / n_ss_inviso) ) qcd_scaling = n_os_inviso * n_ss_iso / n_ss_inviso os_inviso_rel_error = n_os_inviso_error / n_os_inviso ss_iso_rel_error = n_ss_iso_error / n_ss_iso ss_inviso_rel_error = n_ss_inviso_error / n_ss_inviso new_errors_sq = {} for ibinx, ibiny in itertools.product( range(1, qcd_hist.GetNbinsX() + 1), range(1, qcd_hist.GetNbinsY() + 1)): if qcd_hist.GetBinContent(ibinx, ibiny) > 0: bin_rel_error = (qcd_hist.GetBinError(ibinx, ibiny) / qcd_hist.GetBinContent(ibinx, ibiny)) else: bin_rel_error = 0 new_errors_sq[(ibinx, ibiny)] = (bin_rel_error * bin_rel_error + os_inviso_rel_error * os_inviso_rel_error + ss_iso_rel_error * ss_iso_rel_error + ss_inviso_rel_error * ss_inviso_rel_error) qcd_hist.Scale(qcd_scaling) # fix errors for ibinx, ibiny in itertools.product( range(1, qcd_hist.GetNbinsX() + 1), range(1, qcd_hist.GetNbinsY() + 1)): qcd_hist.SetBinError(ibinx, ibiny, qcd_hist.GetBinContent(ibinx, ibiny) * math.sqrt(new_errors_sq[(ibinx, ibiny)])) # store and style yield_error = c_double(0.) qcd_hist.cmt_yield = qcd_hist.IntegralAndError( 0, qcd_hist.GetNbinsX() + 1, 0, qcd_hist.GetNbinsY() + 1, yield_error) qcd_hist.cmt_yield_error = yield_error.value qcd_hist.cmt_bin_yield = [] qcd_hist.cmt_bin_yield_error = [] for ibiny in range(1, qcd_hist.GetNbinsY() + 1): qcd_hist.cmt_bin_yield.append([ qcd_hist.GetBinContent(ibinx, ibiny) for ibinx in range(1, qcd_hist.GetNbinsX() + 1) ]) qcd_hist.cmt_bin_yield_error.append([ qcd_hist.GetBinError(ibinx, ibiny) for ibinx in range(1, qcd_hist.GetNbinsX() + 1) ]) qcd_hist.cmt_scale = 1. qcd_hist.cmt_process_name = "qcd" qcd_hist.process_label = "QCD" qcd_hist.SetTitle("QCD") background_hists.append(qcd_hist) all_hists.append(qcd_hist) if not self.hide_data: all_hists += data_hists bkg_histo = None if not self.stack: for hist in all_hists: scale = 1. / (hist.Integral() or 1.) hist.Scale(scale) hist.scale = scale draw_hists = all_hists else: draw_hists = all_hists for hist in background_hists: if not bkg_histo: bkg_histo = hist.Clone() else: bkg_histo.Add(hist.Clone()) if not self.hide_data: draw_hists.extend(data_hists) dummy_hist = ROOT.TH2F(randomize("dummy"), hist_title, *binning_args) r.setup_hist(dummy_hist) dummy_hist.GetZaxis().SetMaxDigits(4) dummy_hist.GetZaxis().SetTitleOffset(1.22) # FIXME if self.log_z: dummy_hist.SetMinimum(0.0011) else: dummy_hist.SetMinimum(0.001) inner_text=[self.category.label + " category"] if self.region: if isinstance(self.region.label, list): inner_text += self.region.label else: inner_text.append(self.region.label) if not (self.hide_data or not len(data_hists) > 0) or self.stack: upper_right="{}, {} TeV ({:.1f} ".format( self.config.year, self.config.ecm, self.config.lumi_fb ) + "fb^{-1})" else: upper_right="{} Simulation ({} TeV)".format( self.config.year, self.config.ecm, ) draw_labels = get_labels( upper_right=upper_right, inner_text=inner_text ) for ih, hist in enumerate(draw_hists): c = Canvas() if self.log_z: c.SetLogz() dummy_hist.SetMaximum(hist.GetMaximum()) dummy_hist.Draw() hist.Draw("COLZ,SAME") for label in draw_labels: label.Draw("same") outputs = [] if self.save_png: outputs.append(self.output()["png"].targets[feature_pair_name][hist.cmt_process_name].path) if self.save_pdf: outputs.append(self.output()["pdf"].targets[feature_pair_name][hist.cmt_process_name].path) for output in outputs: c.SaveAs(create_file_dir(output)) if self.save_root: f = ROOT.TFile.Open(create_file_dir( self.output()["root"].targets[feature_pair_name].path), "RECREATE") f.cd() # c.Write("canvas") #FIXME hist_dir = f.mkdir("histograms") hist_dir.cd() for hist in all_hists: hist.Write(hist.cmt_process_name) if bkg_histo: bkg_histo.Write("background") if self.store_systematics: for syst_dir, shape_hists in self.histos["shape"].items(): for hist in shape_hists: hist.Write("%s_%s" % (hist.cmt_process_name, syst_dir)) f.Close() if self.save_yields: yields = OrderedDict() for hist in signal_hists + background_hists + data_hists: yields[hist.cmt_process_name] = { "Total yield": hist.cmt_yield, "Total yield error": hist.cmt_yield_error, "Entries": getattr(hist, "cmt_entries", hist.GetEntries()), "Binned yield": hist.cmt_bin_yield, "Binned yield error": hist.cmt_bin_yield_error, } if bkg_histo: yields["background"] = { "Total yield": sum([hist.cmt_yield for hist in background_hists]), "Total yield error": math.sqrt(sum([(hist.cmt_yield_error)**2 for hist in background_hists])), "Entries": getattr(bkg_histo, "cmt_entries", bkg_histo.GetEntries()), "Binned yield": [[ sum([hist.cmt_bin_yield[j][i] for hist in background_hists]) for i in range(0, background_hists[0].GetNbinsX())] for j in range(0, background_hists[0].GetNbinsY())], "Binned yield error": [[ math.sqrt(sum([(hist.cmt_bin_yield_error[j][i]) ** 2 for hist in background_hists])) for i in range(0, background_hists[0].GetNbinsX())] for j in range(0, background_hists[0].GetNbinsY())], } with open(create_file_dir(self.output()["yields"].targets[feature_pair_name].path), "w") as f: json.dump(yields, f, indent=4) @law.decorator.notify @law.decorator.safe_output def run(self): """ Splits processes into data, signal and background. Creates histograms from each process loading them from the input files. Scales the histograms and applies the correct format to them. """ ROOT.gStyle.SetOptStat(0) # create root tchains for inputs inputs = self.input() self.data_names = [p.name for p in self.processes_datasets.keys() if p.isData] self.signal_names = [p.name for p in self.processes_datasets.keys() if p.isSignal] self.background_names = [p.name for p in self.processes_datasets.keys() if not p.isData and not p.isSignal] self.nevents = self.get_nevents() for feature_pair in self.features: self.histos = {"background": [], "signal": [], "data": [], "all": []} binning_args, _ = self.get_binning(feature_pair) x_title = (str(feature_pair[0].get_aux("x_title")) + (" [%s]" % feature_pair[0].get_aux("units") if feature_pair[0].get_aux("units") else "")) y_title = (str(feature_pair[1].get_aux("x_title")) + (" [%s]" % feature_pair[1].get_aux("units") if feature_pair[1].get_aux("units") else "")) z_title = ("Events" if self.stack else "Normalized Events") hist_title = "; %s; %s: %s" % (x_title, y_title, z_title) systs_directions = [("central", "")] if self.plot_systematics: self.histos["bkg_histo_syst"] = ROOT.TH2D( randomize("syst"), hist_title, *binning_args) if self.store_systematics: self.histos["shape"] = {} shape_systematics = self.get_unique_systs( self.get_systs(feature_pair[0], True) \ + self.get_systs(feature_pair[1], True) \ + self.config.get_weights_systematics(self.config.weights[self.category.name], True)) systs_directions += list(itertools.product(shape_systematics, directions)) feature_pair_name = "_".join([feature.name for feature in feature_pair]) for (syst, d) in systs_directions: feature_name = feature_pair_name if syst == "central" else "%s_%s_%s" % ( feature_pair_name, syst, d) if syst != "central": self.histos["shape"]["%s_%s" % (syst, d)] = [] for iproc, (process, datasets) in enumerate(self.processes_datasets.items()): if syst != "central" and process.isData: continue process_histo = ROOT.TH2D(randomize(process.name), hist_title, *binning_args) process_histo.process_label = str(process.label) process_histo.cmt_process_name = process.name process_histo.Sumw2() for dataset in datasets: dataset_histo = ROOT.TH2D(randomize("tmp"), hist_title, *binning_args) dataset_histo.Sumw2() for category in self.expand_category(): inp = inputs["data"][ (dataset.name, category.name)].collection.targets.values() for elem in inp: rootfile = ROOT.TFile.Open(elem.path) histo = copy(rootfile.Get(feature_name)) rootfile.Close() dataset_histo.Add(histo) if not process.isData and not self.avoid_normalization: elem = ("central" if syst == "central" or syst not in self.norm_syst_list else f"{syst}_{d}") if self.nevents[dataset.name][elem] != 0: dataset_histo.Scale( self.get_normalization_factor(dataset, elem)) scaling = dataset.get_aux("scaling", None) if scaling: print(" ### Scaling {} histo by {} +- {}".format( dataset.name, scaling[0], scaling[1])) old_errors = [[dataset_histo.GetBinError(ibinx, ibiny)\ / dataset_histo.GetBinContent(ibinx, ibiny) if dataset_histo.GetBinContent(ibinx, ibiny) != 0 else 0 for ibinx in range(1, dataset_histo.GetNbinsX() + 1)] for ibiny in range(1, dataset_histo.GetNbinsY() + 1) ] new_errors = [[ math.sqrt(elem ** 2 + (scaling[1] / scaling[0]) ** 2) for elem in line] for line in old_errors] dataset_histo.Scale(scaling[0]) for ibinx, ibiny in itertools.product( range(1, dataset_histo.GetNbinsX() + 1), range(1, dataset_histo.GetNbinsY() + 1)): dataset_histo.SetBinError( ibinx, ibiny, dataset_histo.GetBinContent(ibinx, ibiny) * new_errors[ibiny - 1][ibinx - 1]) process_histo.Add(dataset_histo) if self.plot_systematics and not process.isData and not process.isSignal \ and syst == "central": dataset_histo_syst = dataset_histo.Clone() self.histos["bkg_histo_syst"].Add(dataset_histo_syst) yield_error = c_double(0.) process_histo.cmt_yield = process_histo.IntegralAndError( 0, process_histo.GetNbinsX() + 1, 0, process_histo.GetNbinsY() + 1, yield_error) process_histo.cmt_yield_error = yield_error.value process_histo.cmt_bin_yield = [] process_histo.cmt_bin_yield_error = [] for ibiny in range(1, process_histo.GetNbinsY() + 1): process_histo.cmt_bin_yield.append( [process_histo.GetBinContent(ibinx, ibiny) for ibinx in range(1, process_histo.GetNbinsX() + 1)] ) process_histo.cmt_bin_yield_error.append( [process_histo.GetBinError(ibinx, ibiny) for ibinx in range(1, process_histo.GetNbinsX() + 1)] ) if syst == "central": if process.isSignal: self.histos["signal"].append(process_histo) elif process.isData: self.histos["data"].append(process_histo) else: self.histos["background"].append(process_histo) if not process.isData: #or not self.hide_data: self.histos["all"].append(process_histo) else: self.histos["shape"]["%s_%s" % (syst, d)].append(process_histo) self.plot(feature_pair)