• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BATCalculator.cxx

Go to the documentation of this file.
00001 // Author: Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke, Stefan A. Schmitz
00002 
00003 
00004 // include other header files
00005 
00006 #include <TAxis.h>
00007 #include <TFile.h>
00008 #include <TCanvas.h>
00009 #include <TVector.h>
00010 
00011 #include <RooAbsFunc.h>
00012 //#include <RooAbsReal.h>
00013 #include <RooRealVar.h>
00014 //#include <RooArgSet.h>
00015 #include <RooBrentRootFinder.h>
00016 #include <RooFormulaVar.h>
00017 #include <RooGenericPdf.h>
00018 //#include <RooPlot.h>
00019 #include <RooProdPdf.h>
00020 #include <RooDataHist.h>
00021 //#include <RooAbsPdf.h>
00022 #include <RooHistPdf.h>
00023 
00024 // include header file of this class
00025 //#include <RooStats/ModelConfig.h>
00026 
00027 #include <BAT/BCLog.h>
00028 #include <BAT/BCAux.h>
00029 #include <BAT/BCH1D.h>
00030 
00031 #include "BCRooInterface.h"
00032 #include "BATCalculator.h"
00033 
00034 #include <algorithm>
00035 #include <cassert>
00036 
00037 
00038 
00039 ClassImp(RooStats::BATCalculator)
00040 
00041 namespace RooStats {
00042 
00043 // ---------------------------------------------------------
00044 BATCalculator::BATCalculator()
00045    : fData(0)
00046    , fPdf(0)
00047    , fPrior(0)
00048    , fProductPdf(0)
00049    , fLogLike(0)
00050    , fLikelihood(0)
00051    , fIntegratedLikelihood(0)
00052    , fPosteriorPdf(0)
00053    , fLower(0)
00054    , fUpper(0)
00055    , fBrfPrecision(0.00005)
00056    , fValidInterval(false)
00057    , fSize(0.05)
00058 {
00059    // default constructor
00060    _myRooInterface = new BCRooInterface();
00061 }
00062 
00063 // ---------------------------------------------------------
00064 BATCalculator::BATCalculator( /* const char * name,  const char * title, */
00065                               RooAbsData & data,
00066                               RooAbsPdf  & pdf,
00067                               RooArgSet  & POI,
00068                               RooAbsPdf  & prior,
00069                               RooArgSet  * params)
00070    : fData(&data)
00071    , fPdf(&pdf)
00072    , fPOI(POI)
00073    , fPrior(&prior)
00074    , fparams(params)
00075    , fProductPdf(0)
00076    , fLogLike(0)
00077    , fLikelihood(0)
00078    , fIntegratedLikelihood(0)
00079    , fPosteriorPdf(0)
00080    , fLower(0)
00081    , fUpper(0)
00082    , fBrfPrecision(0.00005)
00083    , fValidInterval(false)
00084    , _nMCMC(1000000)
00085    , fSize(0.05)
00086    //, TNamed( TString(name), TString(title) )
00087 {
00088    // constructor
00089    //if (nuisanceParameters)
00090    //   fNuisanceParameters.add(*nuisanceParameters);
00091    _myRooInterface = new BCRooInterface();
00092 }
00093 
00094 // ---------------------------------------------------------
00095 BATCalculator::BATCalculator( RooAbsData & data, ModelConfig & model)
00096    : fData(&data)
00097    , fPdf(model.GetPdf())
00098    , fPOI(*model.GetParametersOfInterest())
00099    , fPrior(model.GetPriorPdf())
00100    , fparams(model.GetNuisanceParameters())
00101    , fProductPdf(0)
00102    , fLogLike(0)
00103    , fLikelihood(0)
00104    , fIntegratedLikelihood(0)
00105    , fPosteriorPdf(0)
00106    , fLower(0)
00107    , fUpper(0)
00108    , fBrfPrecision(0.00005)
00109    , fValidInterval(false)
00110    , _nMCMC(1000000)
00111    , fSize(0.05)
00112 {
00113    // constructor from Model Config
00114  //  SetModel(model);
00115    _myRooInterface = new BCRooInterface();
00116    cout << "BATCalculator constructed" << endl;
00117 }
00118 
00119 // ---------------------------------------------------------
00120 BATCalculator::~BATCalculator()
00121 {
00122    // destructor
00123    ClearAll();
00124    delete _myRooInterface;
00125 }
00126 
00127 // ---------------------------------------------------------
00128 void BATCalculator::ClearAll() const
00129 {
00130    // clear cached pdf objects
00131    if (fProductPdf)
00132       delete fProductPdf;
00133    if (fLogLike)
00134       delete fLogLike;
00135    if (fLikelihood)
00136       delete fLikelihood;
00137    if (fIntegratedLikelihood)
00138       delete fIntegratedLikelihood;
00139    if (fPosteriorPdf)
00140       delete fPosteriorPdf;
00141    fPosteriorPdf = 0;
00142    fProductPdf = 0;
00143    fLogLike = 0;
00144    fLikelihood = 0;
00145    fIntegratedLikelihood = 0;
00146    fLower = 0;
00147    fUpper = 0;
00148    fValidInterval = false;
00149 }
00150 
00151 // ---------------------------------------------------------
00152 void BATCalculator::SetModel(const ModelConfig & model)
00153 {
00154    // set the model
00155    //fPdf = model.GetPdf();
00156    //fPriorPOI =  model.GetPriorPdf();
00157    // assignment operator = does not do a real copy the sets (must use add method)
00158    //fPOI.removeAll();
00159    //*fparams.removeAll();
00160    //if (model.GetParametersOfInterest())
00161    //   fPOI.add( *(model.GetParametersOfInterest()) );
00162    //if (model.GetNuisanceParameters())
00163    //   fNuisanceParameters.add( *(model.GetNuisanceParameters() ) );
00164 
00165    // invalidate the cached pointers
00166    //ClearAll();
00167 }
00168 
00169 // ---------------------------------------------------------
00170 RooArgSet * BATCalculator::GetMode(RooArgSet * /* parameters */) const
00171 {
00172   /// Returns the value of the parameters for the point in
00173   /// parameter-space that is the most likely.
00174   // Should cover multi-dimensional cases...
00175   // How do we do if there are points that are equi-probable?
00176 
00177   return 0;
00178 }
00179 
00180 // ---------------------------------------------------------
00181 //returns posterior with respect to first entry in the POI ArgSet
00182 RooAbsPdf * BATCalculator::GetPosteriorPdf1D() const
00183 {
00184    const char * POIname = fPOI.first()->GetName();
00185    return GetPosteriorPdf1D(POIname);
00186 }
00187 
00188 // ---------------------------------------------------------
00189 // returns posterior with respect to provided name in the POI ArgSet
00190 RooAbsPdf * BATCalculator::GetPosteriorPdf1D(const char * POIname) const
00191 {
00192 
00193    // run some sanity checks
00194    if (!fPdf ) {
00195      std::cerr << "BATCalculator::GetPosteriorPdf - missing pdf model" << std::endl;
00196      return 0;
00197    }
00198 
00199    if (!fPrior) {
00200       std::cerr << "BATCalculator::GetPosteriorPdf - missing prior pdf" << std::endl;
00201    }
00202 
00203    if (fPOI.getSize() == 0) {
00204      std::cerr << "BATCalculator::GetPosteriorPdf - missing parameter of interest" << std::endl;
00205      return 0;
00206    }
00207 
00208    if (fPOI.getSize() > 1) {
00209       std::cerr << "BATCalculator::GetPosteriorPdf - current implementation works only on 1D intervals" << std::endl;
00210       return 0;
00211    }
00212 
00213     //initialize the RooInterface
00214    _myRooInterface->Initialize(*fData,*fPdf,*fPrior,fparams,fPOI);
00215    _myRooInterface->MCMCSetNIterationsRun(_nMCMC);
00216    _myRooInterface->MarginalizeAll();
00217    _myRooInterface->FindMode();
00218    BCParameter * myPOI = _myRooInterface->GetParameter(POIname);
00219    BCH1D * myPosterior =_myRooInterface->GetMarginalized(myPOI);
00220    TH1D * posteriorTH1D = myPosterior->GetHistogram();
00221    _posteriorTH1D = static_cast<TH1D *>(posteriorTH1D->Clone("_posteriorTH1D"));
00222    RooDataHist * posteriorRooDataHist = new RooDataHist("posteriorhist","", fPOI,posteriorTH1D);
00223    fPosteriorPdf = new RooHistPdf("posteriorPdf","",fPOI,*posteriorRooDataHist);
00224 
00225    /*
00226    // plots for debugging
00227    TFile * debugfile = new TFile( "debug_posterior.root" ,"RECREATE");
00228    if ( debugfile->IsOpen() )
00229       cout << "File debug_posterior opened successfully" << endl;
00230    debugfile->cd();
00231    //posteriorTH1D->Write("posteriorTH1D");
00232    //posteriorRooDataHist->Write("posteriorRooDataHist");
00233    fPosteriorPdf->Write("fPosteriorPdf");
00234    TCanvas myCanvas("canvasforfposterior","canvas for fposterior");
00235    myCanvas.cd();
00236    //fPosteriorPdf->Draw();
00237    //myCanvas.Write();
00238    RooRealVar nSig("nSig","",0.0001,200.);
00239    RooPlot * pl = nSig.frame();
00240    fPosteriorPdf->plotOn(pl);
00241    pl->Draw();
00242    pl->Write();
00243    myCanvas.Write();
00244    debugfile->Close();
00245 
00246    // just for testing: create plots with BAT
00247    char * outputFile = "bat_plots_debugging.ps";
00248    _myRooInterface -> PrintAllMarginalized(outputFile);
00249    //*/
00250 
00251    return fPosteriorPdf; // is of type RooAbsPdf*
00252 }
00253 
00254 // ---------------------------------------------------------
00255 RooPlot * BATCalculator::GetPosteriorPlot1D() const
00256 {
00257   /// return a RooPlot with the posterior PDF and the credibility region
00258 
00259    if (!fPosteriorPdf)
00260       GetPosteriorPdf1D();
00261    if (!fValidInterval)
00262       GetInterval();
00263 
00264    RooAbsRealLValue * poi = dynamic_cast<RooAbsRealLValue *>( fPOI.first() );
00265    assert(poi);
00266 
00267    RooPlot* plot = poi->frame();
00268 
00269    plot->SetTitle(TString("Posterior probability of parameter \"")+TString(poi->GetName())+TString("\""));
00270    fPosteriorPdf->plotOn(plot,RooFit::Range(fLower,fUpper,kFALSE),RooFit::VLines(),RooFit::DrawOption("F"),RooFit::MoveToBack(),RooFit::FillColor(kGray));
00271    fPosteriorPdf->plotOn(plot);
00272    plot->GetYaxis()->SetTitle("posterior probability");
00273 
00274    return plot;
00275 }
00276 
00277 // ---------------------------------------------------------
00278 // returns central interval for first POI in the POI ArgSet
00279 SimpleInterval * BATCalculator::GetInterval() const
00280 {
00281    const char * POIname = fPOI.first()->GetName();
00282    return GetInterval(POIname); //is of type RooAbsPdf *
00283 }
00284 
00285 // ---------------------------------------------------------
00286 // returns central interval for requested POI
00287 SimpleInterval * BATCalculator::GetInterval(const char * POIname) const
00288 {
00289   /// returns a SimpleInterval with lower and upper bounds on the
00290   /// parameter of interest. Applies the central ordering rule to
00291   /// compute the credibility interval. Covers only the case with one
00292   /// single parameter of interest
00293 
00294    if (fValidInterval)
00295       std::cout << "BATCalculator::GetInterval:"
00296                 << "Warning : recomputing interval for the same CL and same model" << std::endl;
00297 
00298    RooRealVar * poi = dynamic_cast<RooRealVar *>( fPOI.find(POIname) );
00299    assert(poi);
00300 
00301    if (!fPosteriorPdf)
00302       fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
00303 
00304    RooAbsReal * cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanParameters(100,2));
00305    //RooAbsReal* cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanNoCdf());
00306    RooAbsFunc * cdf_bind = cdf->bindVars(fPOI,&fPOI);
00307    RooBrentRootFinder brf(*cdf_bind);
00308    brf.setTol(fBrfPrecision); // set the brf precision
00309 
00310    double tmpVal = poi->getVal();  // patch used because findRoot changes the value of poi
00311 
00312    double y = fSize/2;
00313    brf.findRoot(fLower,poi->getMin(),poi->getMax(),y);
00314 
00315    y = 1-fSize/2;
00316    bool ret = brf.findRoot(fUpper,poi->getMin(),poi->getMax(),y);
00317    if (!ret)
00318       std::cout << "BATCalculator::GetInterval: Warning:"
00319                 << "Error returned from Root finder, estimated interval is not fully correct"
00320                 << std::endl;
00321 
00322    poi->setVal(tmpVal); // patch: restore the original value of poi
00323 
00324    delete cdf_bind;
00325    delete cdf;
00326    fValidInterval = true;
00327    fConnectedInterval = true;
00328 
00329    TString interval_name = TString("CentralBayesianInterval_a") + TString(this->GetName());
00330    SimpleInterval * interval = new SimpleInterval(interval_name,*poi,fLower,fUpper,ConfidenceLevel());
00331    interval->SetTitle("SimpleInterval from BATCalculator");
00332 
00333    return interval;
00334 }
00335 
00336 // ---------------------------------------------------------
00337 SimpleInterval * BATCalculator::GetShortestInterval1D() const
00338 {
00339    const char * POIname = fPOI.first()->GetName();
00340    bool checkConnected = true;
00341    return GetShortestInterval1D(POIname, checkConnected);
00342 }
00343 
00344 // ---------------------------------------------------------
00345 /// returns a SimpleInterval with lower and upper bounds on the
00346 /// parameter of interest. Applies the shortest interval rule to
00347 /// compute the credibility interval. The resulting interval is not necessarily connected.
00348 /// Methods for constructing the shortest region with respect given CL are now available in
00349 /// different places (here; MCMCCalculator, BAT)-> this should be cleaned.
00350 /// Result is approximate as CL is not reached exactly (due to finite bin number/bin size in posterior histogram)-> make CL/interval a bit smaller or bigger ?
00351 SimpleInterval * BATCalculator::GetShortestInterval1D(const char * POIname, bool & checkConnected) const
00352 {
00353 
00354    if (fValidInterval)
00355       std::cout << "BATCalculator::GetInterval:"
00356                 << "Warning : recomputing interval for the same CL and same model" << std::endl;
00357 
00358    // get pointer to selected parameter of interest
00359    RooRealVar * poi = dynamic_cast<RooRealVar*>( fPOI.find(POIname) );
00360    assert(poi);
00361 
00362    // get pointer to poserior pdf
00363    if (!fPosteriorPdf)
00364       fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf1D();
00365    //RooAbsReal * cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanParameters(100,2));
00366 
00367    // range of poi
00368    Double_t minpoi = poi->getMin();
00369    Double_t maxpoi = poi->getMax();
00370 
00371    // bin number of histogram for posterior
00372    Int_t stepnumber = _posteriorTH1D->GetNbinsX();
00373    //cout << "stepnumber is: " << stepnumber << endl;
00374 
00375    // width of one bin in histogram of posterior
00376    Double_t stepsize = (maxpoi-minpoi)/stepnumber;
00377    //cout << "stepsize is: " << stepsize << endl;
00378 
00379    // pair: first entry: bin number , second entry: value of posterior
00380    vector < pair< Int_t,Double_t > > posteriorVector;
00381 
00382    // for normalization
00383    Double_t histoIntegral = 0;
00384    // give posteriorVector the right length
00385    posteriorVector.resize(stepnumber);
00386 
00387    // see in BayesianCalculator for details about this "feature"
00388    Double_t tmpVal = poi->getVal();
00389 
00390    // initialize elements of posteriorVector
00391    int i = 0;
00392    vector < pair< Int_t,Double_t > >::iterator vecit = posteriorVector.begin();
00393    vector < pair< Int_t,Double_t > >::iterator vecit_end = posteriorVector.begin();
00394    for( ; vecit != vecit_end ; ++vecit) {
00395       poi->setVal(poi->getMin()+i*stepsize);
00396       posteriorVector[i] = make_pair(i, _posteriorTH1D->GetBinContent(i) ); // hope this is working
00397       histoIntegral+=_posteriorTH1D->GetBinContent(i); // better to get this directly from the histogram!
00398       //cout << "pair with binnumber: " << i << " and postriorprob: " << _posteriorTH1D->GetBinContent(i) << endl;
00399       i++;
00400    }
00401 
00402    //cout << "histoIntegral is: " << histoIntegral << endl;
00403 
00404    // sort posteriorVector with respect to posterior pdf
00405    std::sort(posteriorVector.begin(), posteriorVector.end(), sortbyposterior);
00406 
00407    // keep track of integrated posterior in the interval
00408    Double_t integratedposterior = 0.;
00409 
00410    // keep track of lowerLim and upperLim
00411    Double_t lowerLim=posteriorVector.size();
00412    Double_t upperLim=0;
00413 
00414    // store the bins in the intervall
00415    vector<bool> inInterval;
00416    inInterval.resize(posteriorVector.size());
00417 
00418    // set all values in inInterval to false
00419    for (unsigned int k = 0; k < inInterval.size(); k++)
00420       inInterval[k] = false;
00421 
00422    unsigned int j = 0;
00423    // add bins to interval while CL not reached
00424    while(((integratedposterior/histoIntegral) < (1-fSize)) && (j < posteriorVector.size())) {
00425       integratedposterior+=posteriorVector[j].second;
00426 
00427       //cout << "bin number: " << posteriorVector[j].first << "with posterior prob.: " << posteriorVector[j].second << endl;
00428 
00429       // update vector with bins included in the interval
00430       inInterval[posteriorVector[j].first] = true;
00431 
00432       if(posteriorVector[j].first < lowerLim) {
00433          lowerLim = posteriorVector[j].first; // update lowerLim
00434       }
00435       if(posteriorVector[j].first > upperLim) {
00436          upperLim = posteriorVector[j].first; // update upperLim
00437       }
00438 
00439       fLower = (lowerLim-1) * stepsize; // bin 0 of histo is underflow! Hence -1
00440       fUpper = (upperLim-1) * stepsize;
00441       j++;
00442    }
00443 
00444    // initialize vector with interval borders
00445 
00446    bool runInside = false;
00447    for (unsigned int l = 0; l < inInterval.size(); l++) {
00448       if ( (runInside == false) && (inInterval[l] == true) ) {
00449          _intervalBorders1D.push_back(static_cast<double>(l-1)* stepsize);
00450          runInside = true;
00451       }
00452       if ( ( runInside == true) && (l < (inInterval.size()-1) ) && (inInterval[l+1] == false) ) {
00453          _intervalBorders1D.push_back(static_cast<double>(l-1)* stepsize);
00454          runInside = false;
00455       }
00456       if ( ( runInside == true) && (l == (inInterval.size()-1)) ) {
00457          _intervalBorders1D.push_back(static_cast<double>(l-1)* stepsize);
00458       }
00459    }
00460 
00461    // check if the intervall is connected
00462    if(checkConnected) {
00463       if (_intervalBorders1D.size() > 2) {
00464          fConnectedInterval = false;
00465       }
00466       else {
00467          fConnectedInterval = true;
00468       }
00469    }
00470 
00471    poi->setVal(tmpVal); // patch: restore the original value of poi
00472 
00473    fValidInterval = true;
00474 
00475    TString interval_name = TString("ShortestBayesianInterval_a") + TString(this->GetName());
00476    SimpleInterval * interval = new SimpleInterval(interval_name,*poi,fLower,fUpper,ConfidenceLevel());
00477    interval->SetTitle("Shortest SimpleInterval from BATCalculator");
00478 
00479    //if(assert)
00480 
00481    return interval;
00482 
00483 }
00484 
00485 // ---------------------------------------------------------
00486 void BATCalculator::SetNbins(const char * parname, int nbins)
00487 {
00488    _myRooInterface->SetNbins(parname, nbins);
00489 }
00490 
00491 // ---------------------------------------------------------
00492 // temporary solution for upper Limit
00493 Double_t BATCalculator::GetOneSidedUperLim()
00494 {
00495    double safeVal = fSize;
00496    fSize = safeVal/2.;
00497    return GetInterval()->UpperLimit();
00498 }
00499 
00500 /*
00501 // ---------------------------------------------------------
00502 int BATCalculator::GetNbins(const char * parname)
00503 {
00504    ;
00505 }
00506 */
00507 
00508 // ---------------------------------------------------------
00509 bool sortbyposterior(pair< Int_t,Double_t > pair1, pair< Int_t,Double_t > pair2)
00510 {
00511    return (pair1.second > pair2.second);
00512 }
00513 
00514 
00515 
00516 } // end namespace RooStats

Generated on Mon Aug 30 2010 22:14:54 for Bayesian Analysis Toolkit by  doxygen 1.7.1