• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BCEfficiencyFitter.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2008, Daniel Kollar and Kevin Kroeninger.
00003  * All rights reserved.
00004  *
00005  * For the licensing terms see doc/COPYING.
00006  */
00007 
00008 // ---------------------------------------------------------
00009 
00010 #include <TH1D.h>
00011 #include <TH2D.h>
00012 #include <TF1.h>
00013 #include <TGraph.h>
00014 #include <TGraphAsymmErrors.h>
00015 #include <TString.h>
00016 #include <TPad.h>
00017 #include <TRandom3.h>
00018 #include <TLegend.h>
00019 #include <TMath.h>
00020 
00021 #include "BAT/BCLog.h"
00022 #include "BAT/BCDataSet.h"
00023 #include "BAT/BCDataPoint.h"
00024 #include "BAT/BCMath.h"
00025 
00026 #include "BCEfficiencyFitter.h"
00027 
00028 // ---------------------------------------------------------
00029 
00030 BCEfficiencyFitter::BCEfficiencyFitter() : BCModel("EfficiencyFitter")
00031 {
00032    fHistogram1 = 0;
00033    fHistogram2 = 0;
00034    fHistogramBinomial = 0;
00035    fHistogramRatio = 0;
00036    fFitFunction = 0;
00037 
00038    // set default options and values
00039    this -> MCMCSetNIterationsRun(2000);
00040    this -> MCMCSetRValueCriterion(0.01);
00041    this -> SetFillErrorBand();
00042    fFlagIntegration = false;
00043 }
00044 
00045 // ---------------------------------------------------------
00046 
00047 BCEfficiencyFitter::BCEfficiencyFitter(TH1D * hist1, TH1D * hist2, TF1 * func) : BCModel("EfficiencyFitter")
00048 {
00049    fHistogram1 = 0;
00050    fHistogram2 = 0;
00051    fHistogramBinomial = 0;
00052    fHistogramRatio = 0;
00053    fFitFunction = 0;
00054 
00055    this -> SetHistograms(hist1, hist2);
00056    this -> SetFitFunction(func);
00057 
00058    this -> MCMCSetNIterationsRun(2000);
00059    this -> MCMCSetRValueCriterion(0.01);
00060    this -> SetFillErrorBand();
00061    fFlagIntegration = false;
00062 }
00063 
00064 // ---------------------------------------------------------
00065 
00066 int BCEfficiencyFitter::SetHistograms(TH1D * hist1, TH1D * hist2)
00067 {
00068    // check if histogram exists
00069    if (!hist1 || !hist2)
00070    {
00071       BCLog::Out(BCLog::error,BCLog::error,"BCEfficiencyFitter::SetHistograms() : TH1D not created.");
00072       return 0;
00073    }
00074 
00075    // check compatibility of both histograms : number of bins
00076    if (hist1 -> GetNbinsX() != hist2 -> GetNbinsX())
00077    {
00078       BCLog::Out(BCLog::error,BCLog::error,"BCEfficiencyFitter::SetHistograms() : Histograms do not have the same number of bins.");
00079       return 0;
00080    }
00081 
00082    // check compatibility of both histograms : bin content
00083    for (int i = 1; i <= hist1 -> GetNbinsX(); ++i)
00084    {
00085       if (hist1 -> GetBinContent(i) < hist2 -> GetBinContent(i))
00086       {
00087          BCLog::Out(BCLog::error,BCLog::error,"BCEfficiencyFitter::SetHistograms() : Histogram 1 has fewer entries than histogram 2.");
00088          return 0;
00089       }
00090    }
00091 
00092    // set pointer to histograms
00093    fHistogram1 = hist1;
00094    fHistogram2 = hist2;
00095    
00096    // create efficiency histogram
00097    if (fHistogramRatio)
00098       delete fHistogramRatio;
00099 
00100    fHistogramRatio = new TGraphAsymmErrors();
00101    fHistogramRatio -> SetMarkerStyle(20);
00102    fHistogramRatio -> SetMarkerSize(1.5);
00103 
00104    int npoints = 0;
00105 
00106    // set points
00107    for (int i = 1; i <= hist1 -> GetNbinsX(); ++i)
00108    {
00109       // calculate uncertainties
00110       double xmin;
00111       double xmax;
00112       int flag = this -> GetUncertainties(
00113             int(hist1 -> GetBinContent(i)),
00114             int(hist2 -> GetBinContent(i)),
00115             0.68, xmin, xmax);
00116 
00117       if (flag == 1)
00118       {
00119          fHistogramRatio -> SetPoint(
00120                npoints,
00121                hist1 -> GetBinCenter(i),
00122                hist2 -> GetBinContent(i) / hist1 -> GetBinContent(i));
00123          // set uncertainties
00124          fHistogramRatio -> SetPointEXhigh(npoints, 0.);
00125          fHistogramRatio -> SetPointEXlow(npoints, 0.);
00126          fHistogramRatio -> SetPointEYhigh(npoints, xmax - hist2 -> GetBinContent(i) / hist1 -> GetBinContent(i));
00127          fHistogramRatio -> SetPointEYlow(npoints++, hist2 -> GetBinContent(i) / hist1 -> GetBinContent(i) - xmin);
00128       }
00129       else if (flag == -2)
00130       {
00131          fHistogramRatio -> SetPoint(npoints, hist1 -> GetBinCenter(i), 0.);
00132          // set uncertainties
00133          fHistogramRatio -> SetPointEXhigh(npoints, 0.);
00134          fHistogramRatio -> SetPointEXlow(npoints, 0.);
00135          fHistogramRatio -> SetPointEYhigh(npoints, xmax);
00136          fHistogramRatio -> SetPointEYlow(npoints++, 0.);
00137       }
00138       else if (flag == -1)
00139       {
00140          fHistogramRatio -> SetPoint(npoints, hist1 -> GetBinCenter(i), 1.);
00141          // set uncertainties
00142          fHistogramRatio -> SetPointEXhigh(npoints, 0.);
00143          fHistogramRatio -> SetPointEXlow(npoints, 0.);
00144          fHistogramRatio -> SetPointEYhigh(npoints, 0.);
00145          fHistogramRatio -> SetPointEYlow(npoints++, 1.-xmin);
00146       }
00147    }
00148 
00149    // create a data set. this is necessary in order to calculate the
00150    // error band. the data set contains as many data points as there
00151    // are bins. for now, the data points are empty.
00152    BCDataSet * ds = new BCDataSet();
00153 
00154    // create data points and add them to the data set.
00155    int nbins = fHistogram1 -> GetNbinsX();
00156    for (int i = 0; i < nbins; ++i)
00157    {
00158       BCDataPoint* dp = new BCDataPoint(2);
00159       ds -> AddDataPoint(dp);
00160    }
00161 
00162    // set the new data set.
00163    this -> SetDataSet(ds);
00164 
00165    // calculate the lower and upper edge in x.
00166    double xmin = hist1 -> GetBinLowEdge(1);
00167    double xmax = hist1 -> GetBinLowEdge(nbins+1);
00168 
00169 // // calculate the minimum and maximum range in y.
00170 // double histymin = hist2 -> GetMinimum();
00171 // double histymax = hist1 -> GetMaximum();
00172 
00173 // // calculate the minimum and maximum of the function value based on
00174 // // the minimum and maximum value in y.
00175 // double ymin = TMath::Max(0., histymin - 5.*sqrt(histymin));
00176 // double ymax = histymax + 5.*sqrt(histymax);
00177 
00178    // set the data boundaries for x and y values.
00179    this -> SetDataBoundaries(0, xmin, xmax);
00180    this -> SetDataBoundaries(1, 0.0, 1.0);
00181 
00182    // set the indeces for fitting.
00183    this -> SetFitFunctionIndices(0, 1);
00184 
00185    // no error
00186    return 1;
00187 }
00188 
00189 // ---------------------------------------------------------
00190 
00191 int BCEfficiencyFitter::SetFitFunction(TF1 * func)
00192 {
00193    // check if function exists
00194    if(!func)
00195    {
00196       BCLog::Out(BCLog::error,BCLog::error,"BCEfficiencyFitter::SetFitFunction() : TF1 not created.");
00197       return 0;
00198    }
00199 
00200    // set the function
00201    fFitFunction = func;
00202 
00203    // update the model name to contain the function name
00204    this -> SetName(TString::Format("BCEfficiencyFitter with %s",fFitFunction->GetName()));
00205 
00206    // reset parameters
00207    fParameterSet -> clear();
00208 
00209    // get the new number of parameters
00210    int n = func -> GetNpar();
00211 
00212    // add parameters
00213    for (int i = 0; i < n; ++i)
00214    {
00215       double xmin;
00216       double xmax;
00217 
00218       func -> GetParLimits(i, xmin, xmax);
00219 
00220       this -> AddParameter(func->GetParName(i), xmin, xmax);
00221    }
00222 
00223    // no error
00224    return 1;
00225 }
00226 
00227 // ---------------------------------------------------------
00228 
00229 BCEfficiencyFitter::~BCEfficiencyFitter()
00230 {
00231    if (fHistogram1)
00232       delete fHistogram1;
00233 
00234    if (fHistogram2)
00235       delete fHistogram2;
00236 
00237    if (fHistogramBinomial)
00238       delete fHistogramBinomial;
00239 
00240    if (fHistogramRatio)
00241       delete fHistogramRatio;
00242 }
00243 
00244 // ---------------------------------------------------------
00245 
00246 double BCEfficiencyFitter::LogAPrioriProbability(std::vector <double> parameters)
00247 {
00248    // using flat probability in all parameters
00249    double logprob = 0.;
00250    for(unsigned int i=0; i < this -> GetNParameters(); i++)
00251       logprob -= log(this -> GetParameter(i) -> GetRangeWidth());
00252 
00253    return logprob;
00254 }
00255 
00256 // ---------------------------------------------------------
00257 
00258 double BCEfficiencyFitter::LogLikelihood(std::vector <double> params)
00259 {
00260 
00261    // initialize probability
00262    double loglikelihood = 0;
00263 
00264    // set the parameters of the function
00265    fFitFunction -> SetParameters(&params[0]);
00266 
00267    // get the number of bins
00268    int nbins = fHistogram1 -> GetNbinsX();
00269 
00270    // get bin width
00271    double dx = fHistogram1 -> GetXaxis() -> GetBinWidth(0);
00272 
00273    // loop over all bins
00274    for (int ibin = 1; ibin <= nbins; ++ibin)
00275    {
00276       // get n
00277       int n = int(fHistogram1 -> GetBinContent(ibin));
00278 
00279       // get k
00280       int k = int(fHistogram2 -> GetBinContent(ibin));
00281 
00282       // get x
00283       double x = fHistogram1 -> GetBinCenter(ibin);
00284 
00285       double eff = 0;
00286 
00287       // use ROOT's TH1D::Integral method
00288       if (fFlagIntegration)
00289          eff = fFitFunction -> Integral(x - dx/2., x + dx/2.) / dx;
00290 
00291       // use linear interpolation
00292       else
00293          eff = (fFitFunction -> Eval(x + dx/2.) + fFitFunction -> Eval(x - dx/2.)) / 2.;
00294 
00295       // get the value of the Poisson distribution
00296       loglikelihood += BCMath::LogApproxBinomial(n, k, eff);
00297    }
00298 
00299    return loglikelihood;
00300 }
00301 
00302 // ---------------------------------------------------------
00303 
00304 double BCEfficiencyFitter::FitFunction(std::vector <double> x, std::vector <double> params)
00305 {
00306    // set the parameters of the function
00307    fFitFunction -> SetParameters(&params[0]);
00308 
00309    return fFitFunction -> Eval(x[0]);
00310 }
00311 
00312 // ---------------------------------------------------------
00313 
00314 int BCEfficiencyFitter::Fit(TH1D * hist1, TH1D * hist2, TF1 * func)
00315 {
00316    // set histogram
00317    if (hist1 && hist2)
00318       this -> SetHistograms(hist1, hist2);
00319    else
00320    {
00321       BCLog::Out(BCLog::warning, BCLog::warning,"BCEfficiencyFitter::Fit() : Histogram(s) not defined.");
00322       return 0;
00323    }
00324 
00325    // set function
00326    if (func)
00327       this -> SetFitFunction(func);
00328    else
00329    {
00330       BCLog::Out(BCLog::warning, BCLog::warning,"BCEfficiencyFitter::Fit() : Fit function not defined.");
00331       return 0;
00332    }
00333 
00334    // perform marginalization
00335    this -> MarginalizeAll();
00336 
00337    // maximize posterior probability, using the best-fit values close
00338    // to the global maximum from the MCMC
00339    this -> FindModeMinuit( this -> GetBestFitParameters() , -1);
00340 
00341    // calculate the p-value using the fast MCMC algorithm
00342    double pvalue;
00343    if (this -> CalculatePValueFast(this -> GetBestFitParameters(), pvalue))
00344          fPValue = pvalue;
00345    else
00346       BCLog::Out(BCLog::warning, BCLog::warning,"BCEfficiencyFitter::Fit() : Could not use the fast p-value evaluation.");
00347 
00348    // print summary to screen
00349    this -> PrintShortFitSummary();
00350 
00351    // no error
00352    return 1;
00353 }
00354 
00355 // ---------------------------------------------------------
00356 
00357 void BCEfficiencyFitter::DrawFit(const char * options, bool flaglegend)
00358 {
00359    if (!fHistogram1 || !fHistogram2 || !fHistogramRatio)
00360    {
00361       BCLog::Out(BCLog::warning, BCLog::warning,"BCEfficiencyFitter::DrawFit() : Histogram(s) not defined.");
00362       return;
00363    }
00364 
00365    if (!fFitFunction)
00366    {
00367       BCLog::Out(BCLog::warning, BCLog::warning,"BCEfficiencyFitter::DrawFit() : Fit function not defined.");
00368       return;
00369    }
00370 
00371    // check wheather options contain "same"
00372    TString opt = options;
00373    opt.ToLower();
00374 
00375    // if not same, draw the histogram first to get the axes
00376    if(!opt.Contains("same"))
00377    {
00378       // create new histogram
00379       TH2D * hist_axes = new TH2D("hist_axes",
00380             Form(";%s;ratio", fHistogram1 -> GetXaxis() -> GetTitle()),
00381             fHistogram1 -> GetNbinsX(),
00382             fHistogram1 -> GetXaxis() -> GetBinLowEdge(1),
00383             fHistogram1 -> GetXaxis() -> GetBinLowEdge(fHistogram1 -> GetNbinsX()+1),
00384             1, 0., 1.);
00385       hist_axes -> SetStats(kFALSE);
00386       hist_axes -> Draw();
00387 
00388       fHistogramRatio -> Draw(TString::Format("%sp",opt.Data()));
00389    }
00390 
00391    // draw the error band as central 68% probability interval
00392    fErrorBand = this -> GetErrorBandGraph(0.16, 0.84);
00393    fErrorBand -> Draw("f same");
00394 
00395    // now draw the histogram again since it was covered by the band
00396    fHistogramRatio -> Draw(TString::Format("%ssamep",opt.Data()));
00397 
00398    // draw the fit function on top
00399    fGraphFitFunction = this -> GetFitFunctionGraph( this->GetBestFitParameters() );
00400    fGraphFitFunction -> SetLineColor(kRed);
00401    fGraphFitFunction -> SetLineWidth(2);
00402    fGraphFitFunction -> Draw("l same");
00403 
00404    // draw legend
00405    if (flaglegend)
00406    {
00407       TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
00408       legend -> SetBorderSize(0);
00409       legend -> SetFillColor(kWhite);
00410       legend -> AddEntry(fHistogramRatio, "Data", "L");
00411       legend -> AddEntry(fGraphFitFunction, "Best fit", "L");
00412       legend -> AddEntry(fErrorBand, "Error band", "F");
00413       legend -> Draw();
00414    }
00415    gPad -> RedrawAxis();
00416 }
00417 
00418 // ---------------------------------------------------------
00419 int BCEfficiencyFitter::CalculatePValueFast(std::vector<double> par, double &pvalue)
00420 {
00421    // check size of parameter vector
00422    if (par.size() != this -> GetNParameters())
00423    {
00424       BCLog::Out(BCLog::warning, BCLog::warning,"BCEfficiencyFitter::CalculatePValueFast() : Number of parameters is inconsistent.");
00425       return 0;
00426    }
00427 
00428    // check if histogram exists
00429    if (!fHistogram1 || !fHistogram2)
00430    {
00431       BCLog::Out(BCLog::warning, BCLog::warning,"BCEfficiencyFitter::CalculatePValueFast() : Histogram not defined.");
00432       return 0;
00433    }
00434 
00435    // define temporary variables
00436    int nbins = fHistogram1 -> GetNbinsX();
00437 
00438    std::vector <int> histogram;
00439    std::vector <double> expectation;
00440    histogram.assign(nbins, 0);
00441    expectation.assign(nbins, 0);
00442 
00443    double logp = 0;
00444    double logp_start = 0;
00445    int counter_pvalue = 0;
00446 
00447    // define starting distribution
00448    for (int ibin = 0; ibin < nbins; ++ibin)
00449    {
00450       // get bin boundaries
00451       double xmin = fHistogram1 -> GetBinLowEdge(ibin+1);
00452       double xmax = fHistogram1 -> GetBinLowEdge(ibin+2);
00453 
00454       // get the number of expected events
00455       double yexp = fFitFunction -> Integral(xmin, xmax);
00456 
00457       // get n
00458       int n = int(fHistogram1 -> GetBinContent(ibin));
00459 
00460       // get k
00461       int k = int(fHistogram2 -> GetBinContent(ibin));
00462 
00463       histogram[ibin]   = k;
00464       expectation[ibin] = n * yexp;
00465 
00466       // calculate p;
00467       logp += BCMath::LogApproxBinomial(n, k, yexp);
00468    }
00469    logp_start = logp;
00470 
00471    int niter = 100000;
00472 
00473    // loop over iterations
00474    for (int iiter = 0; iiter < niter; ++iiter)
00475    {
00476       // loop over bins
00477       for (int ibin = 0; ibin < nbins; ++ibin)
00478       {
00479          // get n
00480          int n = int(fHistogram1 -> GetBinContent(ibin));
00481 
00482          // get k
00483          int k = histogram.at(ibin);
00484 
00485          // get expectation
00486          double yexp = 0;
00487          if (n > 0)
00488             yexp = expectation.at(ibin)/n;
00489 
00490          // random step up or down in statistics for this bin
00491          double ptest = fRandom -> Rndm() - 0.5;
00492 
00493          // continue, if efficiency is at limit
00494          if (!(yexp > 0. || yexp < 1.0))
00495             continue;
00496 
00497          // increase statistics by 1
00498          if (ptest > 0 && (k < n))
00499          {
00500             // calculate factor of probability
00501             double r = (double(n)-double(k))/(double(k)+1.) * yexp / (1. - yexp);
00502 
00503             // walk, or don't (this is the Metropolis part)
00504             if (fRandom -> Rndm() < r)
00505             {
00506                histogram[ibin] = k + 1;
00507                logp += log(r);
00508             }
00509          }
00510 
00511          // decrease statistics by 1
00512          else if (ptest <= 0 && (k > 0))
00513          {
00514             // calculate factor of probability
00515             double r = double(k) / (double(n)-(double(k)-1)) * (1-yexp)/yexp;
00516 
00517             // walk, or don't (this is the Metropolis part)
00518             if (fRandom -> Rndm() < r)
00519             {
00520                histogram[ibin] = k - 1;
00521                logp += log(r);
00522             }
00523          }
00524 
00525       } // end of looping over bins
00526 
00527       // increase counter
00528       if (logp < logp_start)
00529          counter_pvalue++;
00530 
00531    } // end of looping over iterations
00532 
00533    // calculate p-value
00534    pvalue = double(counter_pvalue) / double(niter);
00535 
00536    // no error
00537    return 1;
00538 }
00539 
00540 // ---------------------------------------------------------
00541 int BCEfficiencyFitter::GetUncertainties(int n, int k, double p, double &xmin, double &xmax)
00542 {
00543    // create a histogram with binomial distribution
00544    if (fHistogramBinomial)
00545       fHistogramBinomial -> Reset();
00546    else
00547       fHistogramBinomial = new TH1D("hist_binomial", "", 1000, 0., 1.);
00548 
00549    // loop over bins and fill histogram
00550    for (int i = 1; i <= 1000; ++i)
00551    {
00552       double x   = fHistogramBinomial -> GetBinCenter(i);
00553       double val = TMath::Binomial(n, k) * TMath::Power(x, double(k)) * TMath::Power(1-x, double(n-k));
00554       fHistogramBinomial -> SetBinContent(i, val);
00555    }
00556 
00557    // normalize
00558    fHistogramBinomial -> Scale(1.0 / fHistogramBinomial -> Integral());
00559 
00560    // calculate quantiles
00561    int nprobSum = 4;
00562    double q[4];
00563    double probSum[4];
00564    probSum[0] = (1.-p)/2.;
00565    probSum[1] = 1.-(1.-p)/2.;
00566    probSum[2] = 0.05;
00567    probSum[3] = 0.95;
00568 
00569    fHistogramBinomial -> GetQuantiles(nprobSum, q, probSum);
00570 
00571    double xexp = double(k)/double(n);
00572 
00573    // calculate uncertainties
00574    if (n == 0)
00575    {
00576       xmin = 0.0;
00577       xmax = 0.0;
00578       return -3;
00579    }
00580    else if (xexp < q[0])
00581    {
00582       xmin = 0;
00583       xmax = q[3];
00584       return -2;
00585    }
00586 
00587    else if (xexp > q[1])
00588    {
00589       xmin = q[2];
00590       xmax = 1.0;
00591       return -1;
00592    }
00593    else
00594    {
00595       xmin = q[0];
00596       xmax = q[1];
00597       return 1;
00598    }
00599 
00600 }
00601 
00602 // ---------------------------------------------------------

Generated on Mon Aug 30 2010 22:14:54 for Bayesian Analysis Toolkit by  doxygen 1.7.1