Source code for MPF.signalGridProjector

import re
import math

import ROOT

from .meme import cache

from .processProjector import ProcessProjector
from . import IOHelpers as IO
from .commonHelpers.options import checkUpdateOpt, checkUpdateDict
from . import pyrootHelpers as PH
from .process import Process

from .commonHelpers.logger import logger
logger = logger.getChild(__name__)

[docs]class SignalGridProjector(ProcessProjector): """ Use :py:class:`~MPF.processProjector.ProcessProjector` for a whole signal grid where each signal point corresponds to one tree where the grid parameters can be parsed from the name. .. inheritance-diagram:: MPF.signalGridProjector.SignalGridProjector Currently no further functionality of automatically creating plots is implemented, but the dictionaries created by :py:class:`~MPF.signalGridProjector.SignalGridProjector.getHarvestList` can be used to make for example signal acceptance/efficiency plots or simple sensitivity projections for a whole signal grid. """ defaults = dict( pValueName = "p0exp", pValueMode = "BinomialExpP", BinomialExpPFlatSys = None, customFunction = None, ) def __init__(self, **kwargs): """ The following parameters can also be given in :py:class:`~MPF.signalGridProjector.SignalGridProjector.getHarvestList` and :py:class:`~MPF.signalGridProjector.SignalGridProjector.registerHarvestList` and will overwrite the defaults for the particular list. Example usage:: sp = SignalGridProjector(weight="eventWeight*genWeight") sp.addProcessTree("ttbar", treePath, "ttbar_NoSys", style="background") sp.addProcessTree("wjets", treePath, "wjets_NoSys", style="background") sp.addSignalProcessesByRegex("GG_oneStep_(?P<mg>.*?)_(?P<mch>.*?)_(?P<mn>.*?)_NoSys", "GG_oneStep", treePathSignal) harvest = sp.getHarvestList("GG_oneStep", cut="met>200&&mt>150") :param pValueName: dictionary key for the "pValue" (default: "p0exp") :param pValueMode: which kind of "pValue" should be calculated. Currently *BinomialExpP*, *SOverB* and *S* is supported (default: "BinomialExpP") :param BinomialExpPFlatSys: add this flat (relative) systematic uncertainty to the MC stat. uncertainty when using *BinomialExpP* :param customFunction: instead of using one of the predefined pValueModes use this custom function. The parameters given to the function are (*signal*, *signalMCStatError*, *background*, *backgroundMCStatError*). """ super(SignalGridProjector, self).__init__() # update defaults dict from base class self.defaults = dict(super(SignalGridProjector, self).defaults, **self.defaults) # update defaults from kwargs (checking for valid options) self.defaults = checkUpdateDict(self.defaults, **kwargs) self.signalGrids = {} self.registeredHarvestLists = []
[docs] @staticmethod @cache() def getSignalPoints(regex, *paths): """Returns a dictionary of matching group dicts (and rootfile paths where they were found) corresponding to the signal tree names that were found. For example:: regex = "w_(?P<m1>.*?)_(?P<m2>.*?)$" will return a dict like:: { "w_2000_1000" : {"paths" : {"path1", "path2", ...}, "m1" : "2000", "m2" : "1000"}, ... } ToDo: Add option to match by path name instead of tree name """ sigpoints = {} regex = re.compile(regex) for path in paths: with IO.ROpen(path) as rf: for k in rf.GetListOfKeys(): match = regex.match(k.GetName()) pathlist = set() if not match: continue if k.GetName() in sigpoints: pathlist = sigpoints[k.GetName()]["paths"] if path in pathlist: logger.warning("Found multiple cycles for key {}".format(k.GetName())) pathlist.add(path) sigpoints[k.GetName()] = dict(match.groupdict(), paths=pathlist) return sigpoints
[docs] def addSignalProcessesByRegex(self, regex, gridName, *paths, **processOpts): """Add all signal points matching a regex. The regex should be able to return a groupdict containing the grid parameters and will be tried to match the tree names found in the given paths (matching the path names is not supported yet). The grid and its parameters are referenced later by the given gridName """ signalPointDict = self.getSignalPoints(regex, *paths) for signalTree, parameterDict in signalPointDict.items(): process = Process(signalTree, style="signal", **processOpts) for path in parameterDict["paths"]: process.addTree(path, signalTree) self.addProcess(process) self.signalGrids[gridName] = signalPointDict
[docs] def addCalculatedVariable(self, varname, gridName, fun): """Add a variable that is supposed to be calculated from the signal point parameters to the given signal grid. .. warning:: Not implemented yet Note: cuts on parameters are not supposed to be introduced here (better in the plotting step) """ raise NotImplementedError
[docs] def getTotalBkgHist(self): hists = [] for p in self.getProcesses("background"): hists.append(p.hist) return PH.getMergedHist(hists)
[docs] def getSignalProcessesGrid(self, gridName): for p in self.getProcesses("signal"): if not p.name in self.signalGrids[gridName]: continue yield p
[docs] def registerHarvestList(self, gridName, registerKey, *selection, **kwargs): """Register a plot to be plotted later. You can give a selection. For example if you pass "background" and use multiHistDraw later only the background processes will be projected by multiHistDraw (the rest will just use "usual" TTree::Draw). When you call getAllHarvestLists later the list will be referenced by the registerKey you have given. """ opt = checkUpdateOpt(self.defaults, **kwargs) self.registerToProjector(*selection, opt=opt) self.registeredHarvestLists.append((tuple([gridName, registerKey]), opt))
[docs] def getAllHarvestLists(self, **kwargs): """Finally create all registered harvest lists. Returns a dict gridName -> registerKey -> harvestList""" compile = kwargs.pop("compile", False) opt = checkUpdateOpt(self.defaults, **kwargs) harvestListDictDict = {} if opt.useMultiHistDraw: self.projector.fillHists(compile=compile) for rArgs, rOpt in self.registeredHarvestLists: gridName, registerKey = rArgs if not gridName in harvestListDictDict: harvestListDict = {} harvestListDictDict[gridName] = harvestListDict else: harvestListDict = harvestListDictDict[gridName] if registerKey in harvestListDict: logger.warning("key {} registered multiple times! - final harvestListDict will only " "contain the last harvest list that was registered!".format(registerKey)) harvestListDict[registerKey] = self.getHarvestList(*rArgs[:-1], opt=rOpt) return harvestListDictDict
[docs] def getHarvestList(self, gridName, **kwargs): """Calculates the pValues for the given grid. Returns a "harvest list" of the form:: [ {<pValueName> : pValue1, "parameter1" : "value1", "parameter2" : "value2", ...}, {<pValueName> : pValue2, "parameter1" : "value1", "parameter2" : "value2", ...}, ... ] By default the pValue is calculated by BinomialExpP, using the MC stat uncertainty for the background. A flat background systematic can be given optionally. Alternatively you can specify a custom Function for p-value calculation with parameters (signal, signalMCStatError, background, backgroundMCStatError). """ # for usage in getAllHarvestLists() opt = kwargs.pop("opt", None) if opt is None: opt = checkUpdateOpt(self.defaults, **kwargs) def getPValue(s, ds, b, db): if opt.customFunction is not None: return opt.customFunction(s, ds, b, db) elif opt.pValueMode == "BinomialExpP": if opt.BinomialExpPFlatSys is not None: db = math.sqrt(db**2+(opt.BinomialExpPFlatSys*b)**2) return ROOT.RooStats.NumberCountingUtils.BinomialExpP(s, b, db/b) elif opt.pValueMode == "SOverB": return s/b elif opt.pValueMode == "S": return s else: raise ValueError("pValue mode {} unknown!".format(mode)) self.fillHists(opt) nb, b, db = PH.getIntegralAndError(self.getTotalBkgHist()) harvest = [] signalGrid = self.signalGrids[gridName] for signalProcess in self.getSignalProcessesGrid(gridName): ns, s, ds = PH.getIntegralAndError(signalProcess.hist) logger.debug("Calculating p-value for signal {} - " "s, ds, b, db = {}".format(signalProcess.name, (s, ds, b, db))) p = getPValue(s, ds, b, db) logger.debug("P-Value = {}".format(p)) paramDict = signalGrid[signalProcess.name].copy() paramDict.pop("paths") paramDict[opt.pValueName] = p harvest.append(paramDict) return harvest