Source code for MPF.pyrootHelpers

"""
Set of utility functions used in the plotting scripts
"""
import ROOT
import os
import shutil
import itertools
import math
import collections
from array import array

from .commonHelpers.logger import logger
from .commonHelpers.pathHelpers import ensurePathExists
from . import IOHelpers as IO
from .commonHelpers.logger import logger
logger = logger.getChild(__name__)

[docs]def rootStyleColor(color):
    """ function to convert different color expressions to the standard ROOT int """
    try:
        logger.debug("try to return int: {}".format(color))
        return int(color)
    except ValueError:
        logger.debug("{} no int".format(color))
        pass

    if color[:1] == '#':
        logger.debug("return from RGB Hex code: {}".format(color))
        return ROOT.TColor.GetColor(color)
    else:
        logger.debug("return from ROOT color name: {}".format(color))
        return eval('ROOT.'+color)
    logger.warning("{} color format is not implemented: {}".format(color, type(color)))

[docs]class SolarizedColors:
    yellow = rootStyleColor("#b58900")
    orange = rootStyleColor("#cb4b16")
    red = rootStyleColor("#dc322f")
    magenta = rootStyleColor("#d33682")
    violet = rootStyleColor("#6c71c4")
    blue = rootStyleColor("#268bd2")
    cyan = rootStyleColor("#2aa198")
    green = rootStyleColor("#859900")

[docs]class TangoColors:
    butter1 = rootStyleColor("#fce94f")
    butter2 = rootStyleColor("#edd400")
    butter3 = rootStyleColor("#c4a000")
    orange1 = rootStyleColor("#fcaf3e")
    orange2 = rootStyleColor("#f57900")
    orange3 = rootStyleColor("#ce5c00")
    chocolate1 = rootStyleColor("#e9b96e")
    chocolate2 = rootStyleColor("#c17d11")
    chocolate3 = rootStyleColor("#8f5902")
    green1 = rootStyleColor("#8ae234")
    green2 = rootStyleColor("#73d216")
    green3 = rootStyleColor("#4e9a06")
    blue1 = rootStyleColor("#729fcf")
    blue2 = rootStyleColor("#3465a4")
    blue3 = rootStyleColor("#204a87")
    plum1 = rootStyleColor("#ad7fa8")
    plum2 = rootStyleColor("#75507b")
    plum3 = rootStyleColor("#5c3566")
    red1 = rootStyleColor("#ef2929")
    red2 = rootStyleColor("#cc0000")
    red3 = rootStyleColor("#a40000")
    grey1 = rootStyleColor("#eeeeec")
    grey2 = rootStyleColor("#d3d7cf")
    grey3 = rootStyleColor("#babdb6")
    dark1 = rootStyleColor("#888a85")
    dark2 = rootStyleColor("#555753")
    dark3 = rootStyleColor("#2e3436")

[docs]def yieldColors():
    # start with Tango colors
    order = [
        "blue",
        "red",
        "green",
        "chocolate",
        "butter",
        "plum",
        "orange",
        "grey",
        "dark",
    ]
    for i in [2, 1, 3]:
        for colname in order:
            yield getattr(TangoColors, "{}{}".format(colname, i))
    # afterwards root colors
    for i in itertools.count():
        yield i+2

# until I understand how pyROOT works give histograms unique names
import uuid
[docs]def tempNameGenerator():
    while True:
        yield 'PHtemp{}'.format(uuid.uuid4().hex)
tempNames = tempNameGenerator()

[docs]def pdfUnite(pathList, outputfile):
    if len(pathList) < 2:
        logger.warning("not enough pdfs to merge {}".format(pathList))
        return False
    if outputfile[-4:] != '.pdf':
        outputfile += '.pdf'
    from subprocess import call
    ensurePathExists(os.path.dirname(outputfile), ask=True)
    try:
        call(['pdfunite'] + pathList + [outputfile])
    except OSError:
        logger.error("pdfunite not found or not working")
        logger.error("Tried to unite {} to {}".format(pathList, outputfile))
        return False
    logger.info('Created {}'.format(outputfile))
    return True

[docs]def pdfUniteOrMove(pathList, outputfile):
    if len(pathList) < 2:
        logger.info("Renaming {} to {}".format(pathList[0], outputfile))
        shutil.move(pathList[0], outputfile)
    else:
        pdfUnite(pathList, outputfile)

[docs]def getMergedHist(hists):
    """returns a merged histogram"""
    if len(hists) == 1:
        return hists[0]
    comb = hists[0].Clone('comb{}'.format(next(tempNames)))
    comb.Reset()
    l = ROOT.TList()
    for hist in hists:
        l.Add(hist)
    comb.Merge(l)
    return comb

[docs]def getHistFromYieldsDict(yieldsDict):
    """Fills a histogram from a dictionary - keys will be the labels - the
    values can be given as a tuple (rawYield, yield, error) or
    (yield, error) or just a yield.
    """
    histname = next(tempNames)
    nbins = len(yieldsDict)
    hist = ROOT.TH1D(histname, histname, nbins, 0, nbins)
    for label, yieldTuple in yieldsDict.items():
        if issubclass(type(yieldTuple), (list, tuple)):
            if len(yieldTuple) == 3:
                _, y, dy = yieldTuple
            elif len(yieldTuple) == 2:
                y, dy = yieldTuple
            else:
                raise ValueError("Yield tuples have to have 2 or 3 entries")
        else:
            y = yieldTuple
            dy = 0
        hist.Fill(label, y)
        index = hist.GetXaxis().FindFixBin(label)
        hist.SetBinError(index, dy)
    return hist

[docs]def getMergedYieldsDict(*yieldsDicts):
    mergedDict = collections.OrderedDict()
    for yieldsDict in yieldsDicts:
        for name, yieldTuple in yieldsDict.items():
            if not name in mergedDict:
                mergedDict[name] = (0, 0., 0.)
            n, y, dy = mergedDict[name]
            n_new, y_new, dy_new = yieldTuple
            n += n_new
            y += y_new
            dy = math.sqrt(dy**2+dy_new**2)
            mergedDict[name] = (n, y, dy)
    return mergedDict

[docs]def scaleYieldsDict(yieldsDict, factor):
    for label, yieldTuple in yieldsDict.items():
        n, y, dy = yieldTuple
        yieldsDict[label] = (n, factor*y, factor*dy)

[docs]def getBranchNames(tree):
    branches = []
    for branch in tree.GetListOfBranches():
        branches.append( branch.GetName())
    return branches

[docs]def getTreeNamesFromFile(fileName, getAll=False):
    with IO.ROpen(fileName) as tFile:
        return getTreeNames(tFile, getAll)

[docs]def getTreeNames(tFile, getAll=False):
    if not getAll:
        logger.warning("Only fetching 10 tree names")
    trees = []
    if getAll:
        for key in tFile.GetListOfKeys():
            if issubclass(type(tFile.Get(key.GetName())), ROOT.TTree):
                trees.append(key.GetName())
    else:
        for i, key in zip(range(10), tFile.GetListOfKeys()):
            if issubclass(type(tFile.Get(key.GetName())), ROOT.TTree):
                trees.append(key.GetName())
    return trees


[docs]def calcPoissonCLLower(q, obs):
    """Calculate lower confidence limit
       e.g. to calculate the 68% lower limit for 2 observed events:
       calcPoissonCLLower(0.68, 2.)
       cf. plot_data_Poisson.C"""
    LL = 0.
    if obs >= 0.:
        a = (1. - q) / 2. # = 0.025 for 95% confidence interval
        LL = ROOT.TMath.ChisquareQuantile(a, 2.*obs) / 2.
    return LL
[docs]def calcPoissonCLUpper(q, obs):
    """Calculate upper confidence limit
       e.g. to calculate the 68% upper limit for 2 observed events:
       calcPoissonCLUpper(0.68, 2.)"""
    UL = 0.
    if obs >= 0.:
        a = 1. - (1. - q) / 2. # = 0.025 for 95% confidence interval
        UL = ROOT.TMath.ChisquareQuantile(a, 2.* (obs + 1.)) / 2.
    return UL


[docs]def getRelErrHist(hist):
    relhist = hist.Clone(next(tempNames))
    for i in range(1, relhist.GetNbinsX()+1):
        content = relhist.GetBinContent(i)
        error = relhist.GetBinError(i)
        relhist.SetBinContent(i, 1.)
        try:
            relhist.SetBinError(i, error/content)
        except ZeroDivisionError:
            relhist.SetBinError(i, 0)
    return relhist

[docs]def getIntegralAndError(hist):
    dy = ROOT.Double()
    y = hist.IntegralAndError(0, hist.GetNbinsX(), dy)
    return hist.GetEntries(), float(y), float(dy)

[docs]def addOverflowToLastBin(hist):
    indexOverflow = hist.GetNbinsX()+1
    overflowContent = hist.GetBinContent(indexOverflow)
    overflowError = hist.GetBinError(indexOverflow)
    lastBinContent = hist.GetBinContent(indexOverflow-1)
    lastBinError = hist.GetBinError(indexOverflow-1)
    sumContent = lastBinContent+overflowContent
    sumError = math.sqrt(lastBinError**2+overflowError**2)
    hist.SetBinContent(indexOverflow-1, sumContent)
    hist.SetBinError(indexOverflow-1, sumError)
    hist.SetBinContent(indexOverflow, 0.)
    hist.SetBinError(indexOverflow, 0.)
    return hist


[docs]def yieldBinNumbers(hist, overFlow=False, underFlow=False):
    for i in range(hist.GetNbinsX()+2):
        if i == 0 and not underFlow:
            continue
        if i == hist.GetNbinsX()+1 and not overFlow:
            continue
        yield i


[docs]def calculateMarginMaximum(relMargin, minimum, maximum, logy=False):
    """Calculate a new maximum for the given range, taking into account a
    relative top margin inside the pad

    :param relMargin: relative top margin inside the pad - for example
                      0.2 will leave 20% space to the top

    :param minimum: minimum to be used on the y-axis

    :param maximum: histograms current maximum

    :param logy: calculate for log scale?

    """
    if not logy:
        maximum = (maximum-minimum)*(1+relMargin) + minimum
    else:
        logger.debug("Calculating new maximum for log scale to leave {} top margin".format(relMargin))
        if minimum <= 0 or maximum <= 0:
            logger.warning("Minumum = {}, Maximum = {} - can't make logarithmic top margin"
                           .format(minimum, maximum))
            return maximum
        emax = math.log10(maximum)
        emin = math.log10(minimum)
        maximum = 10**((emax-emin)*(1+relMargin) + emin)
    logger.debug("Maximum (scaled): {}".format(maximum))
    return maximum


[docs]def getMinMaxWithErrors(*histsOrStack, **kwargs):

    overFlow = kwargs.pop("overFlow", False)
    underFlow = kwargs.pop("underFlow", False)
    maximumWithErrors = kwargs.pop("maximumWithErrors", False)
    minimumWithErrors = kwargs.pop("minimumWithErrors", False)
    if kwargs:
        raise KeyError("Got unexpected kwargs: {}".format(kwargs))

    hists = []
    logger.debug("Looking for minimum and maximum in {}".format(histsOrStack))
    if not histsOrStack:
        return None, None
    for hist in histsOrStack:
        if issubclass(type(hist), ROOT.TH1):
            hists.append(hist)
        elif issubclass(type(hist), ROOT.THStack):
            hists += [h for h in hist.GetStack()]
    maxima = []
    minima = []
    for hist in hists:
        for i in yieldBinNumbers(hist, underFlow=underFlow, overFlow=overFlow):
            logger.debug("bin {}: {}".format(i, hist.GetBinContent(i)+hist.GetBinError(i)))
            maximum = hist.GetBinContent(i)
            if maximumWithErrors:
                maximum += hist.GetBinError(i)
            minimum = hist.GetBinContent(i)
            if minimumWithErrors:
                minimum -= hist.GetBinError(i)
            maxima.append(maximum)
            minima.append(minimum)
    return min(minima), max(maxima)


[docs]def histFromGraph(g):
    """
    Convert a TGraphAsymmErrors to a histogram. If asymmetric errors are given,
    they are symmetrised. The bin boundaries are determined by the x-errors
    """
    bins = []
    logger.debug("Creating histogram from TGraph {}".format(g))
    for i in range(g.GetN()):
        x, y  = ROOT.Double(), ROOT.Double()
        g.GetPoint(i, x, y)
        try:
            delxLow = g.GetErrorXlow(i)
            delxHigh = g.GetErrorXhigh(i)
        except AttributeError:
            raise TypeError("This graph is not a TGraphAsymmErrors - can't create histogram from this")
        if not (delxLow-delxHigh)/delxLow < 1e-5:
            raise ValueError("This TGraph has asymmetric x-errors - can't create histogram from this")
        delx = delxLow
        if not bins:
            bins.append(x-delx)
        bins.append(x+delx)
    logger.debug("bins: {}".format(bins))
    xbins = array('d', bins)
    h = ROOT.TH1F(g.GetName(), g.GetName(), len(bins)-1, xbins)
    for i in range(g.GetN()):
        x = ROOT.Double()
        y = ROOT.Double()
        ey = g.GetErrorY(i)
        g.GetPoint(i, x, y)
        logger.debug("bin {}: {} +/- {}".format(x, y, ey))
        h.SetBinContent(i+1, y)
        if ey > 0:
            h.SetBinError(i+1, ey)
        else:
            h.SetBinError(i+1, 0.)
    h.GetXaxis().SetTitle(g.GetHistogram().GetXaxis().GetTitle())
    return h


[docs]def setBatchMode():
    """Set ROOT to batch and ignore command line options"""
    ROOT.gROOT.SetBatch()
    ROOT.PyConfig.IgnoreCommandLineOptions = True


[docs]def getExpCLsAsimov(s, b, db):

    """
    Calculate the expected CLs (CLs=p_(s+b)/p_b) value based on the asymptotic approximation.
    See Eur.Phys.J.C71, `arXiv:1007.1727 <https://arxiv.org/abs/1007.1727>`_ ("CCGV paper") for the formulas.
    The derivation follows the same procedure as described for p0 in  <http://www.pp.rhul.ac.uk/~cowan/stat/medsig/medsigNote.pdf>
    """

    def ll(n, m, mu, s, b, tau):
        "Log likelihood without factorials (cancel in ratio)"
        return n*math.log(mu*s+b)-(mu*s+b)+m*math.log(tau*b)-tau*b

    def getPsb(s, b, tau):
        "Expected p-value for s+b hypothesis"

        mu = 1

        # Asimov dataset for mu=0 (expected exclusion)
        n = b
        m = tau*b

        # MLEs from CCGV paper
        muhat = (n-m/tau)/s
        bhat = m/tau
        bhathat = (n+m-(1+tau)*mu*s+math.sqrt((n+m-(1+tau)*mu*s)**2\
                   +4*(1+tau)*m*mu*s))/(2*(1+tau))

        condll = ll(n, m, mu, s, bhathat, tau)
        uncondll = ll(n, m, muhat, s, bhat, tau)

        return ROOT.RooStats.SignificanceToPValue(math.sqrt(-2.*(condll-uncondll)))

    # in the asymptotic limit, the expected p_b = 0.5
    return 2*getPsb(s, b, b/(db**2))