Skip to content

Commit

Permalink
Merge pull request cms-analysis#51 from cms-analysis/HIG-16-006-dev
Browse files Browse the repository at this point in the history
Merging HIG-16-006
  • Loading branch information
ajgilbert authored Jul 11, 2016
2 parents 03d4e06 + 67b9877 commit f3e5bf5
Show file tree
Hide file tree
Showing 48 changed files with 4,123 additions and 725 deletions.
12 changes: 11 additions & 1 deletion CombinePdfs/python/MSSM.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,16 @@ def safeTH2DivideForKappa(self, h1, h2):
for y in xrange(1, h2.GetNbinsY() +1):
val_h1 = h1.GetBinContent(x, y)
val_h2 = h2.GetBinContent(x, y)
# if val_h2 < 0.:
# print ('Warning: denominator histogram %s has a negative value %g at bin (%i,%i)=(%g, %g)' % (
# h2.GetName(), val_h2, x, y, h1.GetXaxis().GetBinCenter(x),
# h1.GetYaxis().GetBinCenter(y)))
# if val_h1 < 0.:
# print ('Warning: numerator histogram %s has a negative value %g at bin (%i,%i)=(%g, %g)' % (
# h1.GetName(), val_h2, x, y, h1.GetXaxis().GetBinCenter(x),
# h1.GetYaxis().GetBinCenter(y)))
# val_h1 = val_h2 * 0.5
# print ('>> Setting value to 0.5 * numerator: %g')
if val_h1 == 0. or val_h2 == 0.:
print ('Warning: dividing histograms %s and %s at bin (%i,%i)=(%g, %g) '
'with values: %g/%g, will set the kappa to 1.0 here' % (
Expand Down Expand Up @@ -368,7 +378,7 @@ def buildModel(self):
self.doHistFunc('br_AZh_%s'%era, f.Get(hd['br_AZh']), pars)
self.PROC_SETS.append((['ggA'], ['AZhLLtautau'], [era]))
# And the SM terms
for X in ['ggH', 'qqH', 'VH']:
for X in ['ggH', 'qqH', 'VH', 'ZH', 'WminusH', 'WplusH']:
self.PROC_SETS.append((['%s'%X], ['SM125'], [era]))

def preProcessNuisances(self,nuisances):
Expand Down
4 changes: 2 additions & 2 deletions CombinePdfs/python/ModelIndependent.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def getYieldScale(self,bin,process):
foundDecay = D
if not foundDecay: raise RuntimeError, "Validation Error: decay string %s does not contain any known decay name" % decaySource
foundEnergy = None
for D in [ "7TeV", "8TeV" ]:
for D in [ "7TeV", "8TeV", "13TeV" ]:
if D in decaySource:
if foundEnergy: raise RuntimeError, "Validation Error: decay string %s contains multiple known energies" % decaySource
foundEnergy = D
Expand Down Expand Up @@ -128,7 +128,7 @@ def doParametersOfInterest(self):
self.modelBuilder.out.var("MH").setRange(float(self.mHRange[0]),float(self.mHRange[1]))
self.modelBuilder.out.var("MH").setConstant(False)
poi+=',MH'
else:
elif self.options.mass != 0:
print 'MH will be assumed to be', self.options.mass
self.modelBuilder.out.var("MH").removeRange()
self.modelBuilder.out.var("MH").setVal(self.options.mass)
Expand Down
8 changes: 7 additions & 1 deletion CombinePdfs/src/MorphFunctions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "boost/lexical_cast.hpp"
#include "boost/format.hpp"
#include "boost/multi_array.hpp"
#include "TGraphErrors.h"
#include "RooFitResult.h"
#include "RooRealVar.h"
#include "RooDataHist.h"
Expand Down Expand Up @@ -280,6 +281,9 @@ void BuildRooMorphing(RooWorkspace& ws, CombineHarvester& cb,
// We also need the array of process yields vs mass, because this will have to
// be interpolated too
multi_array<double, 1> rate_arr(extents[m]);
// Also store the yield uncertainty - we don't actually need this for the signal
// model, but will include it in the debug TGraph
multi_array<double, 1> rate_unc_arr(extents[m]);
// The vertical-interpolation PDF needs the TH1 inputs in the format of a TList
multi_array<std::shared_ptr<TList>, 1> list_arr(extents[m]);
// Combine always treats the normalisation part of shape systematics as
Expand Down Expand Up @@ -319,6 +323,8 @@ void BuildRooMorphing(RooWorkspace& ws, CombineHarvester& cb,
}
// Store the process rate
rate_arr[mi] = pr_arr[mi]->rate();
auto proc_hist = pr_arr[mi]->ClonedScaledShape();
proc_hist->IntegralAndError(1, proc_hist->GetNbinsX(), rate_unc_arr[mi]);
// Do the same for the Up and Down shapes
for (unsigned ssi = 0; ssi < ss; ++ssi) {
hist_arr[mi][1 + 2 * ssi] =
Expand Down Expand Up @@ -439,7 +445,7 @@ void BuildRooMorphing(RooWorkspace& ws, CombineHarvester& cb,
//! [part4]

if (file) {
TGraph tmp(m, m_vec.data(), rate_arr.data());
TGraphErrors tmp(m, m_vec.data(), rate_arr.data(), nullptr, rate_unc_arr.data());
file->WriteTObject(&tmp, "interp_rate_"+key);
}
// Collect all terms that will go into the total normalisation:
Expand Down
27 changes: 25 additions & 2 deletions CombineTools/bin/PostFitShapesFromWorkspace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "boost/program_options.hpp"
#include "boost/format.hpp"
#include "TSystem.h"
#include "TH2F.h"
#include "CombineHarvester/CombineTools/interface/CombineHarvester.h"
#include "CombineHarvester/CombineTools/interface/ParseCombineWorkspace.h"
#include "CombineHarvester/CombineTools/interface/TFileIO.h"
Expand All @@ -25,6 +26,8 @@ int main(int argc, char* argv[]) {
bool factors = false;
unsigned samples = 500;
std::string freeze_arg = "";
bool covariance = false;
string data = "data_obs";

po::options_description help_config("Help");
help_config.add_options()
Expand All @@ -35,6 +38,9 @@ int main(int argc, char* argv[]) {
("workspace,w",
po::value<string>(&workspace)->required(),
"The input workspace-containing file [REQUIRED]")
("dataset",
po::value<string>(&data)->default_value(data),
"The input dataset name")
("datacard,d",
po::value<string>(&datacard),
"The input datacard, only used for rebinning")
Expand Down Expand Up @@ -62,7 +68,10 @@ int main(int argc, char* argv[]) {
"Print tables of background shifts and relative uncertainties")
("freeze",
po::value<string>(&freeze_arg)->default_value(freeze_arg),
"Format PARAM1,PARAM2=X,PARAM3=Y where the values X and Y are optional");
"Format PARAM1,PARAM2=X,PARAM3=Y where the values X and Y are optional")
("covariance",
po::value<bool>(&covariance)->default_value(covariance)->implicit_value(true),
"Save the covariance and correlation matrices of the process yields");

if (sampling && !postfit) {
throw logic_error(
Expand Down Expand Up @@ -99,7 +108,7 @@ int main(int argc, char* argv[]) {
// Create CH instance and parse the workspace
ch::CombineHarvester cmb;
cmb.SetFlag("workspaces-use-clone", true);
ch::ParseCombineWorkspace(cmb, *ws, "ModelConfig", "data_obs", false);
ch::ParseCombineWorkspace(cmb, *ws, "ModelConfig", data, false);

// Only evaluate in case parameters to freeze are provided
if(! freeze_arg.empty())
Expand Down Expand Up @@ -226,6 +235,8 @@ int main(int argc, char* argv[]) {
}

map<string, map<string, TH1F>> post_shapes;
map<string, TH2F> post_yield_cov;
map<string, TH2F> post_yield_cor;
// As we calculate the post-fit yields can also print out the post/pre scale
// factors
if (factors) {
Expand Down Expand Up @@ -253,6 +264,10 @@ int main(int argc, char* argv[]) {
: 1.0);
}
}
if (sampling && covariance) {
post_yield_cov[bin] = cmb_bin.GetRateCovariance(res, samples);
post_yield_cor[bin] = cmb_bin.GetRateCorrelation(res, samples);
}
// Fill the total sig. and total bkg. hists
auto cmb_bkgs = cmb_bin.cp().backgrounds();
auto cmb_sigs = cmb_bin.cp().signals();
Expand All @@ -279,6 +294,14 @@ int main(int argc, char* argv[]) {
ch::WriteToTFile(&(iter.second), &outfile,
bin + "_postfit/" + iter.first);
}
for (auto & iter : post_yield_cov) {
ch::WriteToTFile(&(iter.second), &outfile,
iter.first+"_cov");
}
for (auto & iter : post_yield_cor) {
ch::WriteToTFile(&(iter.second), &outfile,
iter.first+"_cor");
}
}
}
// And we're done!
Expand Down
7 changes: 7 additions & 0 deletions CombineTools/interface/Algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ bool contains(const Range &r, T p) {
return std::find(boost::begin(r), boost::end(r), p) != boost::end(r);
}

// This version needed for cases like: contains({"A", "B"}, "B"),
// which are not implicitly convertible in the above generic version
template<typename R, typename T>
bool contains(const std::initializer_list<R> &r, T p) {
return std::find(boost::begin(r), boost::end(r), p) != boost::end(r);
}

template <typename T>
bool contains_rgx(const std::vector<boost::regex>& r, T p) {
for (auto const& rgx : r)
Expand Down
18 changes: 18 additions & 0 deletions CombineTools/interface/BinByBin.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,30 @@ class BinByBinFactory {
return *this;
}

/**
* Construct approximate Poisson uncertainties instead of default Gaussian
*/
inline BinByBinFactory& SetPoissonErrors(bool poisson_errors) {
poisson_errors_ = poisson_errors;
return *this;
}

/**
* Construct approximate Poisson uncertainties instead of default Gaussian
*/
inline BinByBinFactory& SetMergeZeroBins(bool merge) {
merge_zero_bins_ = merge;
return *this;
}

private:
std::string pattern_;
unsigned v_;
double bbb_threshold_;
double merge_threshold_;
bool fix_norm_;
bool poisson_errors_;
bool merge_zero_bins_;
};
}

Expand Down
3 changes: 3 additions & 0 deletions CombineTools/interface/CombineHarvester.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,9 @@ class CombineHarvester {
TH1F GetShapeWithUncertainty(RooFitResult const* fit, unsigned n_samples);
TH1F GetShapeWithUncertainty(RooFitResult const& fit, unsigned n_samples);
TH1F GetObservedShape();

TH2F GetRateCovariance(RooFitResult const& fit, unsigned n_samples);
TH2F GetRateCorrelation(RooFitResult const& fit, unsigned n_samples);
/**@}*/

/**
Expand Down
4 changes: 0 additions & 4 deletions CombineTools/interface/HttSystematics.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,6 @@ void AddMSSMUpdateSystematics_mm(CombineHarvester& cb);
void AddMSSMUpdateSystematics_tt(CombineHarvester& cb, CombineHarvester src);
void AddMSSMUpdateSystematics_tt(CombineHarvester& cb);

// Run2 MSSM analysis systematics
// Implemented in src/HttSystematics_MSSMRun2.cc
void AddMSSMRun2Systematics(CombineHarvester& cb);

// Hhh systematics
// Implemented in src/HhhSystematics.cc
void AddSystematics_hhh_et_mt(CombineHarvester& cb, CombineHarvester src);
Expand Down
2 changes: 1 addition & 1 deletion CombineTools/python/combine/CombineToolBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def create_job_script(self, commands, script_filename, do_log = False):
tee = 'tee' if i == 0 else 'tee -a'
log_part = '\n'
if do_log: log_part = ' 2>&1 | %s ' % tee + logname + log_part
if command.startswith('combine'):
if command.startswith('combine') or command.startswith('pushd'):
text_file.write(
'eval ' + command + log_part)
else:
Expand Down
46 changes: 41 additions & 5 deletions CombineTools/python/combine/EnhancedCombine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import CombineHarvester.CombineTools.combine.utils as utils
import json
import os
import bisect
from CombineHarvester.CombineTools.combine.opts import OPTS
from CombineHarvester.CombineTools.combine.CombineToolBase import CombineToolBase

Expand Down Expand Up @@ -33,10 +34,11 @@ def attach_intercept_args(self, group):
'-s', '--seed', help='Supports range strings for multiple RNG seeds, uses the same format as the --mass argument')
group.add_argument(
'-d', '--datacard', nargs='*', default=[], help='Operate on multiple datacards')
group.add_argument(
'--boundlist', help='Name of json-file which contains the ranges of physical parameters depending on the given mass and given physics model')
group.add_argument('--name', '-n', default='.Test',
help='Name used to label the combine output file, can be modified by other options')
group.add_argument(
'--setPhysicsModelParameterRanges', help='Some other options will modify or add to the list of parameter ranges')


def attach_args(self, group):
CombineToolBase.attach_args(self, group)
Expand All @@ -46,6 +48,8 @@ def attach_args(self, group):
'--there', action='store_true', help='Run combine in the same directory as the workspace')
group.add_argument('--split-points', type=int, default=0,
help='When used in conjunction with --points will create multiple combine calls that each run at most the number of points specified here.')
group.add_argument(
'--boundlist', help='Name of json-file which contains the ranges of physical parameters depending on the given mass and given physics model')

def set_args(self, known, unknown):
CombineToolBase.set_args(self, known, unknown)
Expand Down Expand Up @@ -109,9 +113,26 @@ def run_method(self):
# elif len(self.args.datacard) == 1:
# self.passthru.extend(['-d', self.args.datacard[0]])

current_ranges = self.args.setPhysicsModelParameterRanges
put_back_ranges = current_ranges is not None

if self.args.boundlist is not None:
# We definitely don't need to put the parameter ranges back
# into the args because they're going in via the boundlist
# option instead
put_back_ranges = False
with open(self.args.boundlist) as json_file:
bnd = json.load(json_file)
bound_pars = list(bnd.keys())
print 'Found bounds for parameters %s' % ','.join(bound_pars)
# Fill a dictionaries of the bound info of the form:
# { 'PAR1' : [(MASS, LOWER, UPER), ...], ...}
bound_vals = {}
for par in bound_pars:
bound_vals[par] = list()
for mass, bounds in bnd[par].iteritems():
bound_vals[par].append((float(mass), bounds[0], bounds[1]))
bound_vals[par].sort(key=lambda x: x[0])
# find the subbed_vars entry containing the mass
# We will extend it to also specify the ranges
dict_key = None
Expand All @@ -124,16 +145,31 @@ def run_method(self):
new_list = []
for entry in subbed_vars[dict_key]:
command = []
if current_ranges is not None:
command.append(current_ranges)
mval = entry[mass_idx]
for model in bnd:
command.append(model+'=0,'+str(bnd[model][mval]))
new_list.append(entry + (':'.join(command),))
for par in bound_pars:
# The (mass, None, None) is just a trick to make bisect_left do the comparison
# with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding
# issues
lower_bound = bisect.bisect_left(bound_vals[par], (float(mval)+1E-5, None, None))
# If lower_bound == 0 this means we are at or below the lowest mass point,
# in which case we should increase by one to take the bounds from this lowest
# point
if lower_bound == 0:
lower_bound += 1
command.append('%s=%g,%g' % (par, bound_vals[par][lower_bound-1][1], bound_vals[par][lower_bound-1][2]))
new_list.append(entry + (str(':'.join(command)),))
# now remove the current mass information from subbed_vars
# and replace it with the updated one
del subbed_vars[dict_key]
subbed_vars[new_key] = new_list
self.passthru.extend(['--setPhysicsModelParameterRanges', '%(MODELBOUND)s'])

# We might need to put the intercepted --setPhysicsModelParameterRanges arg back in
if put_back_ranges:
self.put_back_arg('setPhysicsModelParameterRanges', '--setPhysicsModelParameterRanges')

if self.args.points is not None:
self.passthru.extend(['--points', self.args.points])
if (self.args.split_points is not None and
Expand Down
6 changes: 6 additions & 0 deletions CombineTools/python/combine/Impacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def attach_args(self, group):
group.add_argument('--named', metavar='PARAM1,PARAM2,...', help=""" By
default the list of nuisance parameters will be loaded from the
input workspace. Use this option to specify a different list""")
group.add_argument('--exclude', metavar='PARAM1,PARAM2,...', help=""" Skip
these nuisances""")
group.add_argument('--doInitialFit', action='store_true', help="""Find
the crossings of all the POIs. Must have the output from this
before running with --doFits""")
Expand Down Expand Up @@ -80,6 +82,10 @@ def run_method(self):
else:
paramList = utils.list_from_workspace(
ws, 'w', 'ModelConfig_NuisParams')
if self.args.exclude is not None:
exclude = self.args.exclude.split(',')
paramList = [x for x in paramList if x not in exclude]

print 'Have nuisance parameters: ' + str(len(paramList))
prefit = utils.prefit_from_workspace(ws, 'w', paramList)
res = {}
Expand Down
Loading

0 comments on commit f3e5bf5

Please sign in to comment.