"""
Set of utility functions used in the plotting scripts
"""
import ROOT
import os
import shutil
import itertools
import math
import collections
from array import array
from .commonHelpers.logger import logger
from .commonHelpers.pathHelpers import ensurePathExists
from . import IOHelpers as IO
from .commonHelpers.logger import logger
logger = logger.getChild(__name__)
[docs]def rootStyleColor(color):
""" function to convert different color expressions to the standard ROOT int """
try:
logger.debug("try to return int: {}".format(color))
return int(color)
except ValueError:
logger.debug("{} no int".format(color))
pass
if color[:1] == '#':
logger.debug("return from RGB Hex code: {}".format(color))
return ROOT.TColor.GetColor(color)
else:
logger.debug("return from ROOT color name: {}".format(color))
return eval('ROOT.'+color)
logger.warning("{} color format is not implemented: {}".format(color, type(color)))
[docs]class SolarizedColors:
yellow = rootStyleColor("#b58900")
orange = rootStyleColor("#cb4b16")
red = rootStyleColor("#dc322f")
magenta = rootStyleColor("#d33682")
violet = rootStyleColor("#6c71c4")
blue = rootStyleColor("#268bd2")
cyan = rootStyleColor("#2aa198")
green = rootStyleColor("#859900")
[docs]class TangoColors:
butter1 = rootStyleColor("#fce94f")
butter2 = rootStyleColor("#edd400")
butter3 = rootStyleColor("#c4a000")
orange1 = rootStyleColor("#fcaf3e")
orange2 = rootStyleColor("#f57900")
orange3 = rootStyleColor("#ce5c00")
chocolate1 = rootStyleColor("#e9b96e")
chocolate2 = rootStyleColor("#c17d11")
chocolate3 = rootStyleColor("#8f5902")
green1 = rootStyleColor("#8ae234")
green2 = rootStyleColor("#73d216")
green3 = rootStyleColor("#4e9a06")
blue1 = rootStyleColor("#729fcf")
blue2 = rootStyleColor("#3465a4")
blue3 = rootStyleColor("#204a87")
plum1 = rootStyleColor("#ad7fa8")
plum2 = rootStyleColor("#75507b")
plum3 = rootStyleColor("#5c3566")
red1 = rootStyleColor("#ef2929")
red2 = rootStyleColor("#cc0000")
red3 = rootStyleColor("#a40000")
grey1 = rootStyleColor("#eeeeec")
grey2 = rootStyleColor("#d3d7cf")
grey3 = rootStyleColor("#babdb6")
dark1 = rootStyleColor("#888a85")
dark2 = rootStyleColor("#555753")
dark3 = rootStyleColor("#2e3436")
[docs]def yieldColors():
# start with Tango colors
order = [
"blue",
"red",
"green",
"chocolate",
"butter",
"plum",
"orange",
"grey",
"dark",
]
for i in [2, 1, 3]:
for colname in order:
yield getattr(TangoColors, "{}{}".format(colname, i))
# afterwards root colors
for i in itertools.count():
yield i+2
# until I understand how pyROOT works give histograms unique names
import uuid
[docs]def tempNameGenerator():
while True:
yield 'PHtemp{}'.format(uuid.uuid4().hex)
tempNames = tempNameGenerator()
[docs]def pdfUnite(pathList, outputfile):
if len(pathList) < 2:
logger.warning("not enough pdfs to merge {}".format(pathList))
return False
if outputfile[-4:] != '.pdf':
outputfile += '.pdf'
from subprocess import call
ensurePathExists(os.path.dirname(outputfile), ask=True)
try:
call(['pdfunite'] + pathList + [outputfile])
except OSError:
logger.error("pdfunite not found or not working")
logger.error("Tried to unite {} to {}".format(pathList, outputfile))
return False
logger.info('Created {}'.format(outputfile))
return True
[docs]def pdfUniteOrMove(pathList, outputfile):
if len(pathList) < 2:
logger.info("Renaming {} to {}".format(pathList[0], outputfile))
shutil.move(pathList[0], outputfile)
else:
pdfUnite(pathList, outputfile)
[docs]def getMergedHist(hists):
"""returns a merged histogram"""
if len(hists) == 1:
return hists[0]
comb = hists[0].Clone('comb{}'.format(next(tempNames)))
comb.Reset()
l = ROOT.TList()
for hist in hists:
l.Add(hist)
comb.Merge(l)
return comb
[docs]def getHistFromYieldsDict(yieldsDict):
"""Fills a histogram from a dictionary - keys will be the labels - the
values can be given as a tuple (rawYield, yield, error) or
(yield, error) or just a yield.
"""
histname = next(tempNames)
nbins = len(yieldsDict)
hist = ROOT.TH1D(histname, histname, nbins, 0, nbins)
for label, yieldTuple in yieldsDict.items():
if issubclass(type(yieldTuple), (list, tuple)):
if len(yieldTuple) == 3:
_, y, dy = yieldTuple
elif len(yieldTuple) == 2:
y, dy = yieldTuple
else:
raise ValueError("Yield tuples have to have 2 or 3 entries")
else:
y = yieldTuple
dy = 0
hist.Fill(label, y)
index = hist.GetXaxis().FindFixBin(label)
hist.SetBinError(index, dy)
return hist
[docs]def getMergedYieldsDict(*yieldsDicts):
mergedDict = collections.OrderedDict()
for yieldsDict in yieldsDicts:
for name, yieldTuple in yieldsDict.items():
if not name in mergedDict:
mergedDict[name] = (0, 0., 0.)
n, y, dy = mergedDict[name]
n_new, y_new, dy_new = yieldTuple
n += n_new
y += y_new
dy = math.sqrt(dy**2+dy_new**2)
mergedDict[name] = (n, y, dy)
return mergedDict
[docs]def scaleYieldsDict(yieldsDict, factor):
for label, yieldTuple in yieldsDict.items():
n, y, dy = yieldTuple
yieldsDict[label] = (n, factor*y, factor*dy)
[docs]def getBranchNames(tree):
branches = []
for branch in tree.GetListOfBranches():
branches.append( branch.GetName())
return branches
[docs]def getTreeNamesFromFile(fileName, getAll=False):
with IO.ROpen(fileName) as tFile:
return getTreeNames(tFile, getAll)
[docs]def getTreeNames(tFile, getAll=False):
if not getAll:
logger.warning("Only fetching 10 tree names")
trees = []
if getAll:
for key in tFile.GetListOfKeys():
if issubclass(type(tFile.Get(key.GetName())), ROOT.TTree):
trees.append(key.GetName())
else:
for i, key in zip(range(10), tFile.GetListOfKeys()):
if issubclass(type(tFile.Get(key.GetName())), ROOT.TTree):
trees.append(key.GetName())
return trees
[docs]def calcPoissonCLLower(q, obs):
"""Calculate lower confidence limit
e.g. to calculate the 68% lower limit for 2 observed events:
calcPoissonCLLower(0.68, 2.)
cf. plot_data_Poisson.C"""
LL = 0.
if obs >= 0.:
a = (1. - q) / 2. # = 0.025 for 95% confidence interval
LL = ROOT.TMath.ChisquareQuantile(a, 2.*obs) / 2.
return LL
[docs]def calcPoissonCLUpper(q, obs):
"""Calculate upper confidence limit
e.g. to calculate the 68% upper limit for 2 observed events:
calcPoissonCLUpper(0.68, 2.)"""
UL = 0.
if obs >= 0.:
a = 1. - (1. - q) / 2. # = 0.025 for 95% confidence interval
UL = ROOT.TMath.ChisquareQuantile(a, 2.* (obs + 1.)) / 2.
return UL
[docs]def getRelErrHist(hist):
relhist = hist.Clone(next(tempNames))
for i in range(1, relhist.GetNbinsX()+1):
content = relhist.GetBinContent(i)
error = relhist.GetBinError(i)
relhist.SetBinContent(i, 1.)
try:
relhist.SetBinError(i, error/content)
except ZeroDivisionError:
relhist.SetBinError(i, 0)
return relhist
[docs]def getIntegralAndError(hist):
dy = ROOT.Double()
y = hist.IntegralAndError(0, hist.GetNbinsX(), dy)
return hist.GetEntries(), float(y), float(dy)
[docs]def addOverflowToLastBin(hist):
indexOverflow = hist.GetNbinsX()+1
overflowContent = hist.GetBinContent(indexOverflow)
overflowError = hist.GetBinError(indexOverflow)
lastBinContent = hist.GetBinContent(indexOverflow-1)
lastBinError = hist.GetBinError(indexOverflow-1)
sumContent = lastBinContent+overflowContent
sumError = math.sqrt(lastBinError**2+overflowError**2)
hist.SetBinContent(indexOverflow-1, sumContent)
hist.SetBinError(indexOverflow-1, sumError)
hist.SetBinContent(indexOverflow, 0.)
hist.SetBinError(indexOverflow, 0.)
return hist
[docs]def yieldBinNumbers(hist, overFlow=False, underFlow=False):
for i in range(hist.GetNbinsX()+2):
if i == 0 and not underFlow:
continue
if i == hist.GetNbinsX()+1 and not overFlow:
continue
yield i
[docs]def calculateMarginMaximum(relMargin, minimum, maximum, logy=False):
"""Calculate a new maximum for the given range, taking into account a
relative top margin inside the pad
:param relMargin: relative top margin inside the pad - for example
0.2 will leave 20% space to the top
:param minimum: minimum to be used on the y-axis
:param maximum: histograms current maximum
:param logy: calculate for log scale?
"""
if not logy:
maximum = (maximum-minimum)*(1+relMargin) + minimum
else:
logger.debug("Calculating new maximum for log scale to leave {} top margin".format(relMargin))
if minimum <= 0 or maximum <= 0:
logger.warning("Minumum = {}, Maximum = {} - can't make logarithmic top margin"
.format(minimum, maximum))
return maximum
emax = math.log10(maximum)
emin = math.log10(minimum)
maximum = 10**((emax-emin)*(1+relMargin) + emin)
logger.debug("Maximum (scaled): {}".format(maximum))
return maximum
[docs]def getMinMaxWithErrors(*histsOrStack, **kwargs):
overFlow = kwargs.pop("overFlow", False)
underFlow = kwargs.pop("underFlow", False)
maximumWithErrors = kwargs.pop("maximumWithErrors", False)
minimumWithErrors = kwargs.pop("minimumWithErrors", False)
if kwargs:
raise KeyError("Got unexpected kwargs: {}".format(kwargs))
hists = []
logger.debug("Looking for minimum and maximum in {}".format(histsOrStack))
if not histsOrStack:
return None, None
for hist in histsOrStack:
if issubclass(type(hist), ROOT.TH1):
hists.append(hist)
elif issubclass(type(hist), ROOT.THStack):
hists += [h for h in hist.GetStack()]
maxima = []
minima = []
for hist in hists:
for i in yieldBinNumbers(hist, underFlow=underFlow, overFlow=overFlow):
logger.debug("bin {}: {}".format(i, hist.GetBinContent(i)+hist.GetBinError(i)))
maximum = hist.GetBinContent(i)
if maximumWithErrors:
maximum += hist.GetBinError(i)
minimum = hist.GetBinContent(i)
if minimumWithErrors:
minimum -= hist.GetBinError(i)
maxima.append(maximum)
minima.append(minimum)
return min(minima), max(maxima)
[docs]def histFromGraph(g):
"""
Convert a TGraphAsymmErrors to a histogram. If asymmetric errors are given,
they are symmetrised. The bin boundaries are determined by the x-errors
"""
bins = []
logger.debug("Creating histogram from TGraph {}".format(g))
for i in range(g.GetN()):
x, y = ROOT.Double(), ROOT.Double()
g.GetPoint(i, x, y)
try:
delxLow = g.GetErrorXlow(i)
delxHigh = g.GetErrorXhigh(i)
except AttributeError:
raise TypeError("This graph is not a TGraphAsymmErrors - can't create histogram from this")
if not (delxLow-delxHigh)/delxLow < 1e-5:
raise ValueError("This TGraph has asymmetric x-errors - can't create histogram from this")
delx = delxLow
if not bins:
bins.append(x-delx)
bins.append(x+delx)
logger.debug("bins: {}".format(bins))
xbins = array('d', bins)
h = ROOT.TH1F(g.GetName(), g.GetName(), len(bins)-1, xbins)
for i in range(g.GetN()):
x = ROOT.Double()
y = ROOT.Double()
ey = g.GetErrorY(i)
g.GetPoint(i, x, y)
logger.debug("bin {}: {} +/- {}".format(x, y, ey))
h.SetBinContent(i+1, y)
if ey > 0:
h.SetBinError(i+1, ey)
else:
h.SetBinError(i+1, 0.)
h.GetXaxis().SetTitle(g.GetHistogram().GetXaxis().GetTitle())
return h
[docs]def setBatchMode():
"""Set ROOT to batch and ignore command line options"""
ROOT.gROOT.SetBatch()
ROOT.PyConfig.IgnoreCommandLineOptions = True
[docs]def getExpCLsAsimov(s, b, db):
"""
Calculate the expected CLs (CLs=p_(s+b)/p_b) value based on the asymptotic approximation.
See Eur.Phys.J.C71, `arXiv:1007.1727 <https://arxiv.org/abs/1007.1727>`_ ("CCGV paper") for the formulas.
The derivation follows the same procedure as described for p0 in <http://www.pp.rhul.ac.uk/~cowan/stat/medsig/medsigNote.pdf>
"""
def ll(n, m, mu, s, b, tau):
"Log likelihood without factorials (cancel in ratio)"
return n*math.log(mu*s+b)-(mu*s+b)+m*math.log(tau*b)-tau*b
def getPsb(s, b, tau):
"Expected p-value for s+b hypothesis"
mu = 1
# Asimov dataset for mu=0 (expected exclusion)
n = b
m = tau*b
# MLEs from CCGV paper
muhat = (n-m/tau)/s
bhat = m/tau
bhathat = (n+m-(1+tau)*mu*s+math.sqrt((n+m-(1+tau)*mu*s)**2\
+4*(1+tau)*m*mu*s))/(2*(1+tau))
condll = ll(n, m, mu, s, bhathat, tau)
uncondll = ll(n, m, muhat, s, bhat, tau)
return ROOT.RooStats.SignificanceToPValue(math.sqrt(-2.*(condll-uncondll)))
# in the asymptotic limit, the expected p_b = 0.5
return 2*getPsb(s, b, b/(db**2))