Source code for MPF.multiHistDrawer

#!/usr/bin/env python

"""
This module tries to implement a `TTree::Draw
<https://root.cern.ch/doc/master/classTTree.html#ac4016b174665a086fe16695aad3356e2>`_
like functionality with the addition that multiple histograms can be
filled at once.

.. note::

  For most use cases it is better to use the functionality via
  :py:class:`~MPF.histProjector.HistProjector` or
  :py:class:`~MPF.processProjector.ProcessProjector` instead of
  directly invoking :py:class:`~MPF.multiHistDrawer.MultiHistDrawer`

The code is rather experimental - it uses the `TTree::MakeClass
<https://root.cern.ch/doc/master/classTTree.html#ac4ceaf4ae0b87412acf94093043cc2de>`_
code generator and pastes in the nescessary additions to fill the
required histograms. Everything from generating the code and executing
it is done automatically.

*Whatever, it runs fine for now. - So does a burning bus.* [1]_

Example::

  d = MultiHistDrawer(path, treeName)
  hist1 = d.addHist(varexp=var1, xmin=xmin1, xmax=xmax1, nbins=nbins1)
  hist2 = d.addHist(varexp=var2, xmin=xmin2, xmax=xmax2, nbins=nbins2)
  d.run()

  # now hist1 and hist2 should be filled

It should be noted that there are no checks on the input
performed. This makes code injection possible, so don't pass arbitrary
user input to
:py:meth:`~MPF.multiHistDrawer.MultiHistDrawer.addHist`. On the other
hand it can also be used at your benefit::

  d.addHist(varexp=\"\"\"1);
  line_10: printf("LOOK AROUND YOU ");
  line_20: goto line_10;
  //\"\"\", cut="1", weight="1")


.. [1] https://xkcd.com/1695/

"""

import collections
import glob
import os
import uuid
import atexit
import re
from array import array
import tempfile

import ROOT

from .meme import cache
from .commonHelpers.logger import logger
from . import pyrootHelpers as PH
from . import IOHelpers as IO
from .atlasStyle import setAtlasStyle
logger = logger.getChild(__name__)

generatedClasses = []

[docs]class MultiHistDrawer: classdir = None """Directory to work in for the temporary classes. If None, the systems tmp directory is used""" skipCleanup = False """for debug purposes - set true to keep generated c files""" def __init__(self, path, treeName): self.path = path self.treeName = treeName self.rootfile = ROOT.TFile.Open(path) self.tree = self.rootfile.Get(self.treeName) if not self.tree: raise KeyError("{} not found in file {}, try: {}".format(treeName, path, PH.getTreeNames(self.rootfile))) self.hists = [] self.classnumber = uuid.uuid4().hex ## hehe self.classname = "_tmpClass_{}".format(self.classnumber) generatedClasses.append(self.classname) setAtlasStyle()
[docs] def get_2D_expr(self, varexp): """ Parse expressions like x:y for drawing 2D histograms, also enabling the possibility to have expressions containing "::" """ match = re.match("(.*?)([^:]):([^:])(.*)", varexp) if match is None: return match else: x = "".join(match.groups()[0:2]) y = "".join(match.groups()[2:4]) return x, y
[docs] def addHist(self, cut, weight, **kwargs): """ Add a histogram to be filled later. Returns the histogram (still unfilled). :param cut: selection expression :param varexp: expression to be filled into the histogram, default "1" :param weight: additional expression, to be multiplied with cut, default "1" :param xmin: minimum value to be filled into the histogram, default 0.5 :param xmax: maximum value to be filled into the histogram, default 1.5 :param nbins: number of bins between xmin and xmax, default 1 :param binLowEdges: list of low edges for variable binning, the first value is the lower bound of the first bin, the last value the upper bound of the last bin. If given, xmin, xmax and nbins are ignored. :param ymin: minimum y-value to be filled into the histogram, default 0.5 :param ymax: maximum y-value to be filled into the histogram, default 1.5 :param nbinsy: number of bins between ymin and ymax, default 1 :param yBinLowEdges: list of low edges for variable binning of the y-axis, the first value is the lower bound of the first bin, the last value the upper bound of the last bin. If given, ymin, ymax and nbinsy are ignored. """ varexp = kwargs.get("varexp", "1") xmin = kwargs.get("xmin", 0.5) xmax = kwargs.get("xmax", 1.5) nbins = kwargs.get("nbins", 1) binLowEdges = kwargs.get("binLowEdges", None) its2d = False match_2d = self.get_2D_expr(varexp) if match_2d is not None: its2d = True xvar, yvar = match_2d ymin = kwargs.get("ymin", 0.5) ymax = kwargs.get("ymax", 1.5) nbinsy = kwargs.get("nbinsy", 1) yBinLowEdges = kwargs.get("yBinLowEdges", None) name = "hist_"+uuid.uuid4().hex if not its2d: if not binLowEdges: roothist = ROOT.TH1D(name, "", nbins, xmin, xmax) else: xbins = array('d', binLowEdges) roothist = ROOT.TH1D(name, "", len(binLowEdges)-1, xbins) roothist.GetXaxis().SetTitle(varexp) else: if not binLowEdges and not yBinLowEdges: roothist = ROOT.TH2D(name, "", nbins, xmin, xmax, nbinsy, ymin, ymax) elif not binLowEdges and yBinLowEdges: roothist = ROOT.TH2D(name, "", nbins, xmin, xmax, len(yBinLowEdges)-1, array('d', yBinLowEdges)) elif binLowEdges and not yBinLowEdges: roothist = ROOT.TH2D(name, "", len(binLowEdges)-1, array('d', binLowEdges), nbinsy, ymin, ymax) else: roothist = ROOT.TH2D(name, "", len(binLowEdges)-1, array('d', binLowEdges), len(yBinLowEdges)-1, array('d', yBinLowEdges)) roothist.GetXaxis().SetTitle(xvar) roothist.GetYaxis().SetTitle(yvar) roothist.Sumw2() # merge cut and weight cut = "({})*({})".format(cut, weight) # support for simple vector branch expressions varexp = re.sub("\[([0-9]+)\]", "->at(\\1)", varexp) cut = re.sub("\[([0-9]+)\]", "->at(\\1)", cut) hist = {"name" : name, "roothist" : roothist, "varexp" : varexp, "cut" : cut} self.hists.append(hist) hist["roothist"].SetDirectory(0) # seems important return hist["roothist"]
[docs] def getOverallOR(self): "Return an expression that evaluates if any of the used selections passed" return "||".join(["({})".format(_hist["cut"]) for _hist in self.hists])
[docs] def generate(self): "Generate the c++ code" self.tree.MakeClass(self.classname) headerlines = None with open("{}.h".format(self.classname)) as f: headerlines = f.readlines() with open("{}.h".format(self.classname), "w") as of: for line in headerlines: # if "Loop();" in line: # of.write(" virtual TH1D* Loop();\n") if "class {} {{".format(self.classname) in line: of.write("#include \"TH1.h\"\n#include \"TH2.h\"\n") of.write("#include \"TLorentzVector.h\"\n") of.write(line) continue if "public" in line: of.write(line) for hist in self.hists: if hist["roothist"].ClassName() == "TH1D": of.write("TH1D* {};\n".format(hist["name"])) elif hist["roothist"].ClassName() == "TH2D": of.write("TH2D* {};\n".format(hist["name"])) else: raise NotImplementedError("I can only fill TH1Ds and TH2Ds. Sorry!") continue of.write(line) with open("{}.C".format(self.classname), "w") as of: of.write("""#define {classname}_cxx #include "{classname}.h" #include <TH2.h> #include <TStyle.h> #include <TCanvas.h> #include <iostream> void {classname}::Loop() {{ if (fChain == 0) return; Long64_t nentries = fChain->GetEntriesFast(); fChain->SetBranchStatus(\"*\", 0); """.format(classname=self.classname)) for branchname in [_b.GetName() for _b in self.tree.GetListOfBranches()]: if self.branchUsed(branchname): logger.debug("Activating branch {}".format(branchname)) of.write("{:>10}fChain->SetBranchStatus(\"{}\", 1);\n".format("", branchname)) of.write(""" Long64_t nbytes = 0, nb = 0; for (Long64_t jentry=0; jentry<nentries;jentry++) { Long64_t ientry = LoadTree(jentry); if (ientry < 0) break; nb = fChain->GetEntry(jentry); nbytes += nb; """) of.write("{:>14}if (!({})) continue;\n".format("", self.getOverallOR())) for hist in self.hists: match_2d = self.get_2D_expr(hist.get("varexp")) if match_2d is not None: y, x = match_2d of.write("{:>14}if ({cut}) {name}->Fill({}, {}, {cut});\n".format("", x, y, **dict(hist))) else: of.write("{:>14}if ({cut}) {name}->Fill({varexp}, {cut});\n".format("", **dict(hist))) of.write(""" } } """)
[docs] def branchUsed(self, branchName): """Determine wether a branch is used for any cut or varexp. This is not perfect - it might activate too many branches. But it shouldn't miss any (i hope)""" for hist in self.hists: if branchName in hist["varexp"]: return True if branchName in hist["cut"]: return True return False
[docs] def run(self, compile=False): """ Finally generate and run the code which will fill the hists. :param compile: If True, use "++" for loading the code. Often the overhead for this is larger as the speed improvement. """ with IO.workInTempDir(baseDir=self.classdir, skipCleanup=self.skipCleanup, prefix="mhd_", cleanAtExit=True): logger.debug("Current directory: {}".format(os.getcwd())) self.generate() logger.debug("Loading generated c++ file") if compile: ROOT.gROOT.ProcessLine(".L {}.C++".format(self.classname)) else: ROOT.gROOT.ProcessLine(".L {}.C".format(self.classname)) logger.debug("Loading done") looper = ROOT.__getattr__(self.classname)(self.tree) for hist in self.hists: looper.__setattr__(hist["name"], hist["roothist"]) logger.info("Loop over tree {} in file {}".format(self.treeName, self.path)) looper.Loop() for hist in self.hists: if hist["roothist"].GetEntries() == 0: logger.warning("no events extracted for tree {} in file {} (varexp=\"{}\", cut=\"{}\")" .format(self.treeName, self.path, hist["varexp"], hist["cut"]))
[docs]@cache("_cache", useJSON=True) def getYields(cutsDict, weight, path, treeName): drawer = MultiHistDrawer(path, treeName) yieldHists = {} for region, cut in cutsDict.items(): logger.debug("Adding hist for cut region {} with cut {}".format(region, cut)) yieldHists[region] = drawer.addHist(cut, weight) logger.debug("Running drawer") #drawer.run(compile=True) drawer.run(compile=False) regionYields = {} for region, hist in yieldHists.items(): logger.debug("Got a hist: {}".format(hist)) logger.debug("Integral: {}".format(hist.Integral())) err = ROOT.Double() yie = hist.IntegralAndError(0, hist.GetNbinsX(), err) n = hist.GetEntries() regionYields[region] = (n, yie, float(err)) return regionYields
[docs]@cache("_cache", useJSON=True) def getHists(histsConfDict, path, treeName, compile=True): """ Wrapper to get all hists for a like dict:: { "hist1" : {nbins=nbins1, xmin=xmin1, xmax=xmax1, varexp=var1, cut=cut1, weight=weight1}, "hist2" : {nbins=nbins2, xmin=xmin2, xmax=xmax2, varexp=var2, cut=cut2, weight=weight2}, ... } Returns a dict with histograms """ drawer = MultiHistDrawer(path, treeName) histsDict = {} for histname, kwargs in histsConfDict.items(): logger.debug("Adding hist {} with kwargs {}".format(histname, kwargs)) histsDict[histname] = drawer.addHist(**kwargs) logger.debug("Running drawer") drawer.run(compile=compile) return histsDict
[docs]def getHistsPaths(histConfDict, treeName, *paths, **kwargs): """ call getHists and merges the returned histsDicts for all given paths """ compile = kwargs.pop("compile", True) if kwargs: raise KeyError("Got Unexpected kwargs: {}".format(kwargs)) histListDict = {} for path in paths: histsDict = getHists(histConfDict, path, treeName, compile=compile) for histname, hist in histsDict.items(): if not histname in histListDict: histListDict[histname] = [] histListDict[histname].append(hist) mergedHistDict = {} for histname, histlist in histListDict.items(): mergedHistDict[histname] = PH.getMergedHist(histlist) return mergedHistDict