Source code for MPF.signalGridProjector

import re
import math

import ROOT

from .meme import cache

from .processProjector import ProcessProjector
from . import IOHelpers as IO
from .commonHelpers.options import checkUpdateOpt, checkUpdateDict
from . import pyrootHelpers as PH
from .process import Process

from .commonHelpers.logger import logger
logger = logger.getChild(__name__)

[docs]class SignalGridProjector(ProcessProjector):

    """
    Use :py:class:`~MPF.processProjector.ProcessProjector` for a whole
    signal grid where each signal point corresponds to one tree where
    the grid parameters can be parsed from the name.

    .. inheritance-diagram:: MPF.signalGridProjector.SignalGridProjector

    Currently no further functionality of automatically
    creating plots is implemented, but the dictionaries created by
    :py:class:`~MPF.signalGridProjector.SignalGridProjector.getHarvestList`
    can be used to make for example signal acceptance/efficiency plots
    or simple sensitivity projections for a whole signal grid.

    """

    defaults = dict(
        pValueName = "p0exp",
        pValueMode = "BinomialExpP",
        BinomialExpPFlatSys = None,
        customFunction = None,
    )

    def __init__(self, **kwargs):
        """
        The following parameters can also be given in
        :py:class:`~MPF.signalGridProjector.SignalGridProjector.getHarvestList`
        and
        :py:class:`~MPF.signalGridProjector.SignalGridProjector.registerHarvestList`
        and will overwrite the defaults for the particular list.

        Example usage::

          sp = SignalGridProjector(weight="eventWeight*genWeight")
          sp.addProcessTree("ttbar", treePath, "ttbar_NoSys", style="background")
          sp.addProcessTree("wjets", treePath, "wjets_NoSys", style="background")
          sp.addSignalProcessesByRegex("GG_oneStep_(?P<mg>.*?)_(?P<mch>.*?)_(?P<mn>.*?)_NoSys", "GG_oneStep", treePathSignal)

          harvest = sp.getHarvestList("GG_oneStep", cut="met>200&&mt>150")

        :param pValueName: dictionary key for the "pValue" (default: "p0exp")
        :param pValueMode: which kind of "pValue" should be calculated. Currently
                           *BinomialExpP*, *SOverB* and *S* is supported (default: "BinomialExpP")
        :param BinomialExpPFlatSys: add this flat (relative) systematic uncertainty
                                    to the MC stat. uncertainty when using *BinomialExpP*
        :param customFunction: instead of using one of the predefined pValueModes use this custom
                               function. The parameters given to the function are (*signal*,
                               *signalMCStatError*, *background*, *backgroundMCStatError*).
        """
        super(SignalGridProjector, self).__init__()

        # update defaults dict from base class
        self.defaults = dict(super(SignalGridProjector, self).defaults, **self.defaults)

        # update defaults from kwargs (checking for valid options)
        self.defaults = checkUpdateDict(self.defaults, **kwargs)

        self.signalGrids = {}
        self.registeredHarvestLists = []


[docs]    @staticmethod
    @cache()
    def getSignalPoints(regex, *paths):
        """Returns a dictionary of matching group dicts (and rootfile paths
        where they were found) corresponding to the signal tree names that
        were found. For example::

          regex = "w_(?P<m1>.*?)_(?P<m2>.*?)$"

        will return a dict like::

            {
              "w_2000_1000" : {"paths" : {"path1", "path2", ...}, "m1" : "2000", "m2" : "1000"},
              ...
            }

        ToDo: Add option to match by path name instead of tree name
        """
        sigpoints = {}
        regex = re.compile(regex)
        for path in paths:
            with IO.ROpen(path) as rf:
                for k in rf.GetListOfKeys():
                    match = regex.match(k.GetName())
                    pathlist = set()
                    if not match:
                        continue
                    if k.GetName() in sigpoints:
                        pathlist = sigpoints[k.GetName()]["paths"]
                        if path in pathlist:
                            logger.warning("Found multiple cycles for key {}".format(k.GetName()))
                    pathlist.add(path)
                    sigpoints[k.GetName()] = dict(match.groupdict(), paths=pathlist)
        return sigpoints


[docs]    def addSignalProcessesByRegex(self, regex, gridName, *paths, **processOpts):
        """Add all signal points matching a regex. The regex should be
        able to return a groupdict containing the grid parameters and
        will be tried to match the tree names found in the given paths
        (matching the path names is not supported yet). The grid and
        its parameters are referenced later by the given gridName
        """
        signalPointDict = self.getSignalPoints(regex, *paths)
        for signalTree, parameterDict in signalPointDict.items():
            process = Process(signalTree, style="signal", **processOpts)
            for path in parameterDict["paths"]:
                process.addTree(path, signalTree)
            self.addProcess(process)
        self.signalGrids[gridName] = signalPointDict


[docs]    def addCalculatedVariable(self, varname, gridName, fun):
        """Add a variable that is supposed to be calculated from the signal
        point parameters to the given signal grid.

        .. warning:: Not implemented yet

        Note: cuts on parameters are not supposed to be introduced here (better in the plotting step)
        """
        raise NotImplementedError


[docs]    def getTotalBkgHist(self):
        hists = []
        for p in self.getProcesses("background"):
            hists.append(p.hist)
        return PH.getMergedHist(hists)


[docs]    def getSignalProcessesGrid(self, gridName):
        for p in self.getProcesses("signal"):
            if not p.name in self.signalGrids[gridName]:
                continue
            yield p


[docs]    def registerHarvestList(self, gridName, registerKey, *selection, **kwargs):
        """Register a plot to be plotted later. You can give a selection. For
        example if you pass "background" and use multiHistDraw later
        only the background processes will be projected by
        multiHistDraw (the rest will just use "usual"
        TTree::Draw). When you call getAllHarvestLists later the list will
        be referenced by the registerKey you have given.
        """
        opt = checkUpdateOpt(self.defaults, **kwargs)
        self.registerToProjector(*selection, opt=opt)
        self.registeredHarvestLists.append((tuple([gridName, registerKey]), opt))


[docs]    def getAllHarvestLists(self, **kwargs):
        """Finally create all registered harvest lists. Returns a dict
        gridName -> registerKey -> harvestList"""
        compile = kwargs.pop("compile", False)
        opt = checkUpdateOpt(self.defaults, **kwargs)
        harvestListDictDict = {}
        if opt.useMultiHistDraw:
            self.projector.fillHists(compile=compile)
        for rArgs, rOpt in self.registeredHarvestLists:
            gridName, registerKey = rArgs
            if not gridName in harvestListDictDict:
                harvestListDict = {}
                harvestListDictDict[gridName] = harvestListDict
            else:
                harvestListDict = harvestListDictDict[gridName]
            if registerKey in harvestListDict:
                logger.warning("key {} registered multiple times! - final harvestListDict will only "
                               "contain the last harvest list that was registered!".format(registerKey))
            harvestListDict[registerKey] = self.getHarvestList(*rArgs[:-1], opt=rOpt)
        return harvestListDictDict


[docs]    def getHarvestList(self, gridName, **kwargs):
        """Calculates the pValues for the given grid. Returns a "harvest list" of the form::

            [
              {<pValueName> : pValue1, "parameter1" : "value1", "parameter2" : "value2", ...},
              {<pValueName> : pValue2, "parameter1" : "value1", "parameter2" : "value2", ...},
              ...
            ]

        By default the pValue is calculated by BinomialExpP, using the
        MC stat uncertainty for the background. A flat background
        systematic can be given optionally.
        Alternatively you can specify a custom Function for p-value calculation with parameters
        (signal, signalMCStatError, background, backgroundMCStatError).
        """

        # for usage in getAllHarvestLists()
        opt = kwargs.pop("opt", None)

        if opt is None:
            opt = checkUpdateOpt(self.defaults, **kwargs)

        def getPValue(s, ds, b, db):
            if opt.customFunction is not None:
                return opt.customFunction(s, ds, b, db)
            elif opt.pValueMode == "BinomialExpP":
                if opt.BinomialExpPFlatSys is not None:
                    db = math.sqrt(db**2+(opt.BinomialExpPFlatSys*b)**2)
                return ROOT.RooStats.NumberCountingUtils.BinomialExpP(s, b, db/b)
            elif opt.pValueMode == "SOverB":
                return s/b
            elif opt.pValueMode == "S":
                return s
            else:
                raise ValueError("pValue mode {} unknown!".format(mode))

        self.fillHists(opt)
        nb, b, db = PH.getIntegralAndError(self.getTotalBkgHist())
        harvest = []
        signalGrid = self.signalGrids[gridName]
        for signalProcess in self.getSignalProcessesGrid(gridName):
            ns, s, ds = PH.getIntegralAndError(signalProcess.hist)
            logger.debug("Calculating p-value for signal {} - "
                         "s, ds, b, db = {}".format(signalProcess.name, (s, ds, b, db)))
            p = getPValue(s, ds, b, db)
            logger.debug("P-Value = {}".format(p))
            paramDict = signalGrid[signalProcess.name].copy()
            paramDict.pop("paths")
            paramDict[opt.pValueName] = p
            harvest.append(paramDict)
        return harvest