Source code for MPF.pyrootHelpers

"""
Set of utility functions used in the plotting scripts
"""
import ROOT
import os
import shutil
import itertools
import math
import collections
from array import array

from .commonHelpers.logger import logger
from .commonHelpers.pathHelpers import ensurePathExists
from . import IOHelpers as IO
from .commonHelpers.logger import logger
logger = logger.getChild(__name__)

[docs]def rootStyleColor(color): """ function to convert different color expressions to the standard ROOT int """ try: logger.debug("try to return int: {}".format(color)) return int(color) except ValueError: logger.debug("{} no int".format(color)) pass if color[:1] == '#': logger.debug("return from RGB Hex code: {}".format(color)) return ROOT.TColor.GetColor(color) else: logger.debug("return from ROOT color name: {}".format(color)) return eval('ROOT.'+color) logger.warning("{} color format is not implemented: {}".format(color, type(color)))
[docs]class SolarizedColors: yellow = rootStyleColor("#b58900") orange = rootStyleColor("#cb4b16") red = rootStyleColor("#dc322f") magenta = rootStyleColor("#d33682") violet = rootStyleColor("#6c71c4") blue = rootStyleColor("#268bd2") cyan = rootStyleColor("#2aa198") green = rootStyleColor("#859900")
[docs]class TangoColors: butter1 = rootStyleColor("#fce94f") butter2 = rootStyleColor("#edd400") butter3 = rootStyleColor("#c4a000") orange1 = rootStyleColor("#fcaf3e") orange2 = rootStyleColor("#f57900") orange3 = rootStyleColor("#ce5c00") chocolate1 = rootStyleColor("#e9b96e") chocolate2 = rootStyleColor("#c17d11") chocolate3 = rootStyleColor("#8f5902") green1 = rootStyleColor("#8ae234") green2 = rootStyleColor("#73d216") green3 = rootStyleColor("#4e9a06") blue1 = rootStyleColor("#729fcf") blue2 = rootStyleColor("#3465a4") blue3 = rootStyleColor("#204a87") plum1 = rootStyleColor("#ad7fa8") plum2 = rootStyleColor("#75507b") plum3 = rootStyleColor("#5c3566") red1 = rootStyleColor("#ef2929") red2 = rootStyleColor("#cc0000") red3 = rootStyleColor("#a40000") grey1 = rootStyleColor("#eeeeec") grey2 = rootStyleColor("#d3d7cf") grey3 = rootStyleColor("#babdb6") dark1 = rootStyleColor("#888a85") dark2 = rootStyleColor("#555753") dark3 = rootStyleColor("#2e3436")
[docs]def yieldColors(): # start with Tango colors order = [ "blue", "red", "green", "chocolate", "butter", "plum", "orange", "grey", "dark", ] for i in [2, 1, 3]: for colname in order: yield getattr(TangoColors, "{}{}".format(colname, i)) # afterwards root colors for i in itertools.count(): yield i+2
# until I understand how pyROOT works give histograms unique names import uuid
[docs]def tempNameGenerator(): while True: yield 'PHtemp{}'.format(uuid.uuid4().hex)
tempNames = tempNameGenerator()
[docs]def pdfUnite(pathList, outputfile): if len(pathList) < 2: logger.warning("not enough pdfs to merge {}".format(pathList)) return False if outputfile[-4:] != '.pdf': outputfile += '.pdf' from subprocess import call ensurePathExists(os.path.dirname(outputfile), ask=True) try: call(['pdfunite'] + pathList + [outputfile]) except OSError: logger.error("pdfunite not found or not working") logger.error("Tried to unite {} to {}".format(pathList, outputfile)) return False logger.info('Created {}'.format(outputfile)) return True
[docs]def pdfUniteOrMove(pathList, outputfile): if len(pathList) < 2: logger.info("Renaming {} to {}".format(pathList[0], outputfile)) shutil.move(pathList[0], outputfile) else: pdfUnite(pathList, outputfile)
[docs]def getMergedHist(hists): """returns a merged histogram""" if len(hists) == 1: return hists[0] comb = hists[0].Clone('comb{}'.format(next(tempNames))) comb.Reset() l = ROOT.TList() for hist in hists: l.Add(hist) comb.Merge(l) return comb
[docs]def getHistFromYieldsDict(yieldsDict): """Fills a histogram from a dictionary - keys will be the labels - the values can be given as a tuple (rawYield, yield, error) or (yield, error) or just a yield. """ histname = next(tempNames) nbins = len(yieldsDict) hist = ROOT.TH1D(histname, histname, nbins, 0, nbins) for label, yieldTuple in yieldsDict.items(): if issubclass(type(yieldTuple), (list, tuple)): if len(yieldTuple) == 3: _, y, dy = yieldTuple elif len(yieldTuple) == 2: y, dy = yieldTuple else: raise ValueError("Yield tuples have to have 2 or 3 entries") else: y = yieldTuple dy = 0 hist.Fill(label, y) index = hist.GetXaxis().FindFixBin(label) hist.SetBinError(index, dy) return hist
[docs]def getMergedYieldsDict(*yieldsDicts): mergedDict = collections.OrderedDict() for yieldsDict in yieldsDicts: for name, yieldTuple in yieldsDict.items(): if not name in mergedDict: mergedDict[name] = (0, 0., 0.) n, y, dy = mergedDict[name] n_new, y_new, dy_new = yieldTuple n += n_new y += y_new dy = math.sqrt(dy**2+dy_new**2) mergedDict[name] = (n, y, dy) return mergedDict
[docs]def scaleYieldsDict(yieldsDict, factor): for label, yieldTuple in yieldsDict.items(): n, y, dy = yieldTuple yieldsDict[label] = (n, factor*y, factor*dy)
[docs]def getBranchNames(tree): branches = [] for branch in tree.GetListOfBranches(): branches.append( branch.GetName()) return branches
[docs]def getTreeNamesFromFile(fileName, getAll=False): with IO.ROpen(fileName) as tFile: return getTreeNames(tFile, getAll)
[docs]def getTreeNames(tFile, getAll=False): if not getAll: logger.warning("Only fetching 10 tree names") trees = [] if getAll: for key in tFile.GetListOfKeys(): if issubclass(type(tFile.Get(key.GetName())), ROOT.TTree): trees.append(key.GetName()) else: for i, key in zip(range(10), tFile.GetListOfKeys()): if issubclass(type(tFile.Get(key.GetName())), ROOT.TTree): trees.append(key.GetName()) return trees
[docs]def calcPoissonCLLower(q, obs): """Calculate lower confidence limit e.g. to calculate the 68% lower limit for 2 observed events: calcPoissonCLLower(0.68, 2.) cf. plot_data_Poisson.C""" LL = 0. if obs >= 0.: a = (1. - q) / 2. # = 0.025 for 95% confidence interval LL = ROOT.TMath.ChisquareQuantile(a, 2.*obs) / 2. return LL
[docs]def calcPoissonCLUpper(q, obs): """Calculate upper confidence limit e.g. to calculate the 68% upper limit for 2 observed events: calcPoissonCLUpper(0.68, 2.)""" UL = 0. if obs >= 0.: a = 1. - (1. - q) / 2. # = 0.025 for 95% confidence interval UL = ROOT.TMath.ChisquareQuantile(a, 2.* (obs + 1.)) / 2. return UL
[docs]def getRelErrHist(hist): relhist = hist.Clone(next(tempNames)) for i in range(1, relhist.GetNbinsX()+1): content = relhist.GetBinContent(i) error = relhist.GetBinError(i) relhist.SetBinContent(i, 1.) try: relhist.SetBinError(i, error/content) except ZeroDivisionError: relhist.SetBinError(i, 0) return relhist
[docs]def getIntegralAndError(hist): dy = ROOT.Double() y = hist.IntegralAndError(0, hist.GetNbinsX(), dy) return hist.GetEntries(), float(y), float(dy)
[docs]def addOverflowToLastBin(hist): indexOverflow = hist.GetNbinsX()+1 overflowContent = hist.GetBinContent(indexOverflow) overflowError = hist.GetBinError(indexOverflow) lastBinContent = hist.GetBinContent(indexOverflow-1) lastBinError = hist.GetBinError(indexOverflow-1) sumContent = lastBinContent+overflowContent sumError = math.sqrt(lastBinError**2+overflowError**2) hist.SetBinContent(indexOverflow-1, sumContent) hist.SetBinError(indexOverflow-1, sumError) hist.SetBinContent(indexOverflow, 0.) hist.SetBinError(indexOverflow, 0.) return hist
[docs]def yieldBinNumbers(hist, overFlow=False, underFlow=False): for i in range(hist.GetNbinsX()+2): if i == 0 and not underFlow: continue if i == hist.GetNbinsX()+1 and not overFlow: continue yield i
[docs]def calculateMarginMaximum(relMargin, minimum, maximum, logy=False): """Calculate a new maximum for the given range, taking into account a relative top margin inside the pad :param relMargin: relative top margin inside the pad - for example 0.2 will leave 20% space to the top :param minimum: minimum to be used on the y-axis :param maximum: histograms current maximum :param logy: calculate for log scale? """ if not logy: maximum = (maximum-minimum)*(1+relMargin) + minimum else: logger.debug("Calculating new maximum for log scale to leave {} top margin".format(relMargin)) if minimum <= 0 or maximum <= 0: logger.warning("Minumum = {}, Maximum = {} - can't make logarithmic top margin" .format(minimum, maximum)) return maximum emax = math.log10(maximum) emin = math.log10(minimum) maximum = 10**((emax-emin)*(1+relMargin) + emin) logger.debug("Maximum (scaled): {}".format(maximum)) return maximum
[docs]def getMinMaxWithErrors(*histsOrStack, **kwargs): overFlow = kwargs.pop("overFlow", False) underFlow = kwargs.pop("underFlow", False) maximumWithErrors = kwargs.pop("maximumWithErrors", False) minimumWithErrors = kwargs.pop("minimumWithErrors", False) if kwargs: raise KeyError("Got unexpected kwargs: {}".format(kwargs)) hists = [] logger.debug("Looking for minimum and maximum in {}".format(histsOrStack)) if not histsOrStack: return None, None for hist in histsOrStack: if issubclass(type(hist), ROOT.TH1): hists.append(hist) elif issubclass(type(hist), ROOT.THStack): hists += [h for h in hist.GetStack()] maxima = [] minima = [] for hist in hists: for i in yieldBinNumbers(hist, underFlow=underFlow, overFlow=overFlow): logger.debug("bin {}: {}".format(i, hist.GetBinContent(i)+hist.GetBinError(i))) maximum = hist.GetBinContent(i) if maximumWithErrors: maximum += hist.GetBinError(i) minimum = hist.GetBinContent(i) if minimumWithErrors: minimum -= hist.GetBinError(i) maxima.append(maximum) minima.append(minimum) return min(minima), max(maxima)
[docs]def histFromGraph(g): """ Convert a TGraphAsymmErrors to a histogram. If asymmetric errors are given, they are symmetrised. The bin boundaries are determined by the x-errors """ bins = [] logger.debug("Creating histogram from TGraph {}".format(g)) for i in range(g.GetN()): x, y = ROOT.Double(), ROOT.Double() g.GetPoint(i, x, y) try: delxLow = g.GetErrorXlow(i) delxHigh = g.GetErrorXhigh(i) except AttributeError: raise TypeError("This graph is not a TGraphAsymmErrors - can't create histogram from this") if not (delxLow-delxHigh)/delxLow < 1e-5: raise ValueError("This TGraph has asymmetric x-errors - can't create histogram from this") delx = delxLow if not bins: bins.append(x-delx) bins.append(x+delx) logger.debug("bins: {}".format(bins)) xbins = array('d', bins) h = ROOT.TH1F(g.GetName(), g.GetName(), len(bins)-1, xbins) for i in range(g.GetN()): x = ROOT.Double() y = ROOT.Double() ey = g.GetErrorY(i) g.GetPoint(i, x, y) logger.debug("bin {}: {} +/- {}".format(x, y, ey)) h.SetBinContent(i+1, y) if ey > 0: h.SetBinError(i+1, ey) else: h.SetBinError(i+1, 0.) h.GetXaxis().SetTitle(g.GetHistogram().GetXaxis().GetTitle()) return h
[docs]def setBatchMode(): """Set ROOT to batch and ignore command line options""" ROOT.gROOT.SetBatch() ROOT.PyConfig.IgnoreCommandLineOptions = True
[docs]def getExpCLsAsimov(s, b, db): """ Calculate the expected CLs (CLs=p_(s+b)/p_b) value based on the asymptotic approximation. See Eur.Phys.J.C71, `arXiv:1007.1727 <https://arxiv.org/abs/1007.1727>`_ ("CCGV paper") for the formulas. The derivation follows the same procedure as described for p0 in <http://www.pp.rhul.ac.uk/~cowan/stat/medsig/medsigNote.pdf> """ def ll(n, m, mu, s, b, tau): "Log likelihood without factorials (cancel in ratio)" return n*math.log(mu*s+b)-(mu*s+b)+m*math.log(tau*b)-tau*b def getPsb(s, b, tau): "Expected p-value for s+b hypothesis" mu = 1 # Asimov dataset for mu=0 (expected exclusion) n = b m = tau*b # MLEs from CCGV paper muhat = (n-m/tau)/s bhat = m/tau bhathat = (n+m-(1+tau)*mu*s+math.sqrt((n+m-(1+tau)*mu*s)**2\ +4*(1+tau)*m*mu*s))/(2*(1+tau)) condll = ll(n, m, mu, s, bhathat, tau) uncondll = ll(n, m, muhat, s, bhat, tau) return ROOT.RooStats.SignificanceToPValue(math.sqrt(-2.*(condll-uncondll))) # in the asymptotic limit, the expected p_b = 0.5 return 2*getPsb(s, b, b/(db**2))