• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

BCHistogramFitter.cxx

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2008, Daniel Kollar and Kevin Kroeninger.
00003  * All rights reserved.
00004  *
00005  * For the licensing terms see doc/COPYING.
00006  */
00007 
00008 // ---------------------------------------------------------
00009 
00010 #include <TH1D.h>
00011 #include <TF1.h>
00012 #include <TGraph.h>
00013 #include <TString.h>
00014 #include <TPad.h>
00015 #include <TRandom3.h>
00016 #include <TLegend.h>
00017 #include <TMath.h>
00018 #include <Math/ProbFuncMathCore.h> //for ROOT::Math namespace
00019 #include <TString.h>
00020 
00021 #include "BAT/BCLog.h"
00022 #include "BAT/BCDataSet.h"
00023 #include "BAT/BCDataPoint.h"
00024 #include "BAT/BCMath.h"
00025 
00026 #include "BCHistogramFitter.h"
00027 
00028 // ---------------------------------------------------------
00029 
00030 BCHistogramFitter::BCHistogramFitter() :
00031    BCModel("HistogramFitter")
00032 {
00033    fHistogram = 0;
00034    fFitFunction = 0;
00035 
00036    // set default options and values
00037    this -> MCMCSetNIterationsRun(2000);
00038    this -> SetFillErrorBand();
00039    fFlagIntegration = true;
00040    flag_discrete = true;
00041 }
00042 
00043 // ---------------------------------------------------------
00044 
00045 BCHistogramFitter::BCHistogramFitter(TH1D * hist, TF1 * func) :
00046    BCModel("HistogramFitter")
00047 {
00048    fHistogram = 0;
00049    fFitFunction = 0;
00050 
00051    this -> SetHistogram(hist);
00052    this -> SetFitFunction(func);
00053 
00054    this -> MCMCSetNIterationsRun(2000);
00055    this -> SetFillErrorBand();
00056    fFlagIntegration = true;
00057    flag_discrete = true;
00058 
00059    fHistogramExpected = 0;
00060 }
00061 
00062 // ---------------------------------------------------------
00063 
00064 int BCHistogramFitter::SetHistogram(TH1D * hist)
00065 {
00066    // check if histogram exists
00067    if (!hist) {
00068       BCLog::Out(BCLog::error, BCLog::error,
00069             "BCHistogramFitter::SetHistogram() : TH1D not created.");
00070       return 0;
00071    }
00072 
00073    // set pointer to histogram
00074    fHistogram = hist;
00075 
00076    // create a data set. this is necessary in order to calculate the
00077    // error band. the data set contains as many data points as there
00078    // are bins.
00079    BCDataSet * ds = new BCDataSet();
00080 
00081    // create data points and add them to the data set.
00082    // the x value is the lower edge of the bin, and
00083    // the y value is the bin count
00084    int nbins = fHistogram -> GetNbinsX();
00085    for (int i = 0; i < nbins; ++i) {
00086       BCDataPoint* dp = new BCDataPoint(2);
00087       dp ->SetValue(0, fHistogram -> GetBinLowEdge(i+1));
00088       dp ->SetValue(1, fHistogram -> GetBinContent(i+1));
00089       ds -> AddDataPoint(dp);
00090    }
00091 
00092    // set the new data set.
00093    this -> SetDataSet(ds);
00094 
00095    // calculate the lower and upper edge in x.
00096    double xmin = hist -> GetBinLowEdge(1);
00097    double xmax = hist -> GetBinLowEdge(nbins + 1);
00098 
00099    // calculate the minimum and maximum range in y.
00100    double histymin = hist -> GetMinimum();
00101    double histymax = hist -> GetMaximum();
00102 
00103    // calculate the minimum and maximum of the function value based on
00104    // the minimum and maximum value in y.
00105    double ymin = TMath::Max(0., histymin - 5. * sqrt(histymin));
00106    double ymax = histymax + 5. * sqrt(histymax);
00107 
00108    // set the data boundaries for x and y values.
00109    this -> SetDataBoundaries(0, xmin, xmax);
00110    this -> SetDataBoundaries(1, ymin, ymax);
00111 
00112    // set the indeces for fitting.
00113    this -> SetFitFunctionIndices(0, 1);
00114 
00115    // no error
00116    return 1;
00117 }
00118 
00119 // ---------------------------------------------------------
00120 
00121 int BCHistogramFitter::SetHistogramExpected(const std::vector <double>& parameters)
00122 {
00123    //clear up previous memory
00124    if(fHistogramExpected){
00125       delete fHistogramExpected;
00126    }
00127    //copy all properties from the data histogram
00128    fHistogramExpected = new TH1D(*fHistogram);
00129 
00130    // get the number of bins
00131    int nBins = fHistogramExpected -> GetNbinsX();
00132 
00133    // get bin width
00134    double dx = fHistogramExpected -> GetBinWidth(1);
00135 
00136    //set the parameters of fit function
00137    fFitFunction -> SetParameters(&parameters[0]);
00138 
00139    // get function value of lower bin edge
00140    double fedgelow = fFitFunction -> Eval(fHistogramExpected -> GetBinLowEdge(1));
00141 
00142    // loop over all bins, skip underflow
00143    for (int ibin = 1; ibin <= nBins; ++ibin) {
00144       // get upper bin edge
00145       double xedgehi = fHistogramExpected -> GetBinLowEdge(ibin) + dx;
00146 
00147       //expected count
00148       double yexp = 0.;
00149 
00150       // use ROOT's TH1D::Integral method
00151       if (fFlagIntegration)
00152          yexp = fFitFunction -> Integral(xedgehi - dx, xedgehi);
00153 
00154       // use linear interpolation
00155       else {
00156          // get function value at upper bin edge
00157          double fedgehi = fFitFunction -> Eval(xedgehi);
00158          yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
00159 
00160          // make the upper edge the lower edge for the next iteration
00161          fedgelow = fedgehi;
00162       }
00163 
00164       //write the expectation for the bin
00165       fHistogramExpected -> SetBinContent(ibin, yexp);
00166 
00167       //avoid automatic error as sqrt(yexp), used e.g. in Kolmogorov correction factor
00168       fHistogramExpected -> SetBinError(ibin, 0.0);
00169 
00170       // but the data under this model have that sqrt(yexp) uncertainty
00171       fHistogram -> SetBinError(ibin, sqrt(yexp));
00172 
00173 
00174    }
00175    return 1;
00176 }
00177 
00178 // ---------------------------------------------------------
00179 
00180 int BCHistogramFitter::SetFitFunction(TF1 * func)
00181 {
00182    // check if function exists
00183    if (!func) {
00184       BCLog::Out(BCLog::error, BCLog::error,
00185             "BCHistogramFitter::SetFitFunction() : TF1 not created.");
00186       return 0;
00187    }
00188 
00189    // set the function
00190    fFitFunction = func;
00191 
00192    // update the model name to contain the function name
00193    this -> SetName(TString::Format("HistogramFitter with %s",
00194          fFitFunction->GetName()));
00195 
00196    // reset parameters
00197    fParameterSet -> clear();
00198 
00199    // get the new number of parameters
00200    int n = func -> GetNpar();
00201 
00202    // add parameters
00203    for (int i = 0; i < n; ++i) {
00204       double xmin;
00205       double xmax;
00206 
00207       func -> GetParLimits(i, xmin, xmax);
00208 
00209       this -> AddParameter(func->GetParName(i), xmin, xmax);
00210    }
00211 
00212    // no error
00213    return 1;
00214 }
00215 
00216 // ---------------------------------------------------------
00217 
00218 BCHistogramFitter::~BCHistogramFitter()
00219 {
00220    delete fHistogramExpected;
00221 }
00222 
00223 // ---------------------------------------------------------
00224 
00225 double BCHistogramFitter::LogAPrioriProbability(std::vector<double> parameters)
00226 {
00227    // using flat probability in all parameters
00228    double logprob = 0.;
00229    for (unsigned int i = 0; i < this -> GetNParameters(); i++)
00230       logprob -= log(this -> GetParameter(i) -> GetRangeWidth());
00231 
00232    return logprob;
00233 }
00234 
00235 // ---------------------------------------------------------
00236 
00237 double BCHistogramFitter::LogLikelihood(std::vector<double> params)
00238 {
00239    // initialize probability
00240    double loglikelihood = 0;
00241 
00242    // set the parameters of the function
00243    fFitFunction -> SetParameters(&params[0]);
00244 
00245    // get the number of bins
00246    int nbins = fHistogram -> GetNbinsX();
00247 
00248    // get bin width
00249    double dx = fHistogram -> GetBinWidth(1);
00250 
00251    // get function value of lower bin edge
00252    double fedgelow = fFitFunction -> Eval(fHistogram -> GetBinLowEdge(1));
00253 
00254    // loop over all bins
00255    for (int ibin = 1; ibin <= nbins; ++ibin) {
00256       // get upper bin edge
00257       double xedgehi = fHistogram -> GetBinLowEdge(ibin) + dx;
00258 
00259       // get function value at upper bin edge
00260       double fedgehi = fFitFunction -> Eval(xedgehi);
00261 
00262       // get the number of observed events
00263       double y = fHistogram -> GetBinContent(ibin);
00264 
00265       double yexp = 0.;
00266 
00267       // use ROOT's TH1D::Integral method
00268       if (fFlagIntegration)
00269          yexp = fFitFunction -> Integral(xedgehi - dx, xedgehi);
00270 
00271       // use linear interpolation
00272       else {
00273          yexp = fedgelow * dx + (fedgehi - fedgelow) * dx / 2.;
00274 
00275          // make the upper edge the lower edge for the next iteration
00276          fedgelow = fedgehi;
00277       }
00278 
00279       // get the value of the Poisson distribution
00280       loglikelihood += BCMath::LogPoisson(y, yexp);
00281    }
00282 
00283    // // get bin boundaries
00284    // double xmin = fHistogram -> GetBinLowEdge(ibin);
00285    // double xmax = fHistogram -> GetBinLowEdge(ibin+1);
00286 
00287    // // get the number of observed events
00288    // double y = fHistogram -> GetBinContent(ibin);
00289 
00290    // // get the number of expected events
00291    // double yexp = fFitFunction -> Integral(xmin, xmax);
00292 
00293    // // get the value of the Poisson distribution
00294    // loglikelihood += BCMath::LogPoisson(y, yexp);
00295 
00296    return loglikelihood;
00297 }
00298 
00299 // ---------------------------------------------------------
00300 
00301 double BCHistogramFitter::FitFunction(std::vector<double> x,
00302       std::vector<double> params)
00303 {
00304    // set the parameters of the function
00305    fFitFunction -> SetParameters(&params[0]);
00306 
00307    return fFitFunction -> Eval(x[0]) * fHistogram -> GetBinWidth(
00308          fHistogram -> FindBin(x[0]));
00309 }
00310 
00311 // ---------------------------------------------------------
00312 
00313 int BCHistogramFitter::Fit(TH1D * hist, TF1 * func)
00314 {
00315    // set histogram
00316    if (hist)
00317       this -> SetHistogram(hist);
00318    else {
00319       BCLog::Out(BCLog::warning, BCLog::warning,
00320             "BCHistogramFitter::Fit() : Histogram not defined.");
00321       return 0;
00322    }
00323 
00324    // set function
00325    if (func)
00326       this -> SetFitFunction(func);
00327    else {
00328       BCLog::Out(BCLog::warning, BCLog::warning,
00329             "BCHistogramFitter::Fit() : Fit function not defined.");
00330       return 0;
00331    }
00332 
00333    // perform marginalization
00334    this -> MarginalizeAll();
00335 
00336    // maximize posterior probability, using the best-fit values close
00337    // to the global maximum from the MCMC
00338    this -> FindModeMinuit(this -> GetBestFitParameters(), -1);
00339 
00340    // calculate the p-value using the fast MCMC algorithm
00341    double pvalue;
00342    if (this -> CalculatePValueFast(this -> GetBestFitParameters(), pvalue))
00343       fPValue = pvalue;
00344    else
00345       BCLog::Out(BCLog::warning, BCLog::warning,
00346             "BCHistogramFitter::Fit() : Could not use the fast p-value evaluation.");
00347 
00348    // print summary to screen
00349    this -> PrintShortFitSummary();
00350 
00351    // no error
00352    return 1;
00353 }
00354 
00355 // ---------------------------------------------------------
00356 
00357 void BCHistogramFitter::DrawFit(const char * options, bool flaglegend)
00358 {
00359    if (!fHistogram) {
00360       BCLog::Out(BCLog::warning, BCLog::warning,
00361             "BCHistogramFitter::DrawFit() : Histogram not defined.");
00362       return;
00363    }
00364 
00365    if (!fFitFunction) {
00366       BCLog::Out(BCLog::warning, BCLog::warning,
00367             "BCHistogramFitter::DrawFit() : Fit function not defined.");
00368       return;
00369    }
00370 
00371    if (!fErrorBandXY || fBestFitParameters.empty() ) {
00372       BCLog::Out(BCLog::warning, BCLog::warning,
00373             "BCHistogramFitter::DrawFit() : Fit not performed yet.");
00374       return;
00375    }
00376 
00377    // check wheather options contain "same"
00378    TString opt = options;
00379    opt.ToLower();
00380 
00381    // if not same, draw the histogram first to get the axes
00382    if (!opt.Contains("same"))
00383       fHistogram -> Draw(opt.Data());
00384 
00385    // draw the error band as central 68% probability interval
00386    fErrorBand = this -> GetErrorBandGraph(0.16, 0.84);
00387    fErrorBand -> Draw("f same");
00388 
00389    // now draw the histogram again since it was covered by the band
00390    fHistogram -> Draw(TString::Format("%ssame", opt.Data()));
00391 
00392    // draw the fit function on top
00393    fGraphFitFunction
00394          = this -> GetFitFunctionGraph(this->GetBestFitParameters());
00395    fGraphFitFunction -> SetLineColor(kRed);
00396    fGraphFitFunction -> SetLineWidth(2);
00397    fGraphFitFunction -> Draw("l same");
00398 
00399    // draw legend
00400    if (flaglegend) {
00401       TLegend * legend = new TLegend(0.25, 0.75, 0.55, 0.95);
00402       legend -> SetBorderSize(0);
00403       legend -> SetFillColor(kWhite);
00404       legend -> AddEntry(fHistogram, "Data", "L");
00405       legend -> AddEntry(fGraphFitFunction, "Best fit", "L");
00406       legend -> AddEntry(fErrorBand, "Error band", "F");
00407       legend -> Draw();
00408    }
00409 
00410    gPad -> RedrawAxis();
00411 }
00412 
00413 // ---------------------------------------------------------
00414 
00415 int BCHistogramFitter::CalculatePValueFast(std::vector<double> par,
00416       double &pvalue, int nIterations)
00417 {
00418    // check size of parameter vector
00419    if (par.size() != this -> GetNParameters()) {
00420       BCLog::Out(
00421             BCLog::warning,
00422             BCLog::warning,
00423             "BCHistogramFitter::CalculatePValueFast() : Number of parameters is inconsistent.");
00424       return 0;
00425    }
00426 
00427    // check if histogram exists
00428    if (!fHistogram) {
00429       BCLog::Out(BCLog::warning, BCLog::warning,
00430             "BCHistogramFitter::CalculatePValueFast() : Histogram not defined.");
00431       return 0;
00432    }
00433 
00434    // define temporary variables
00435    int nbins = fHistogram -> GetNbinsX();
00436 
00437    std::vector<int> histogram;
00438    std::vector<double> expectation;
00439    histogram.assign(nbins, 0);
00440    expectation.assign(nbins, 0);
00441 
00442    double logp = 0;
00443    double logp_start = 0;
00444    int counter_pvalue = 0;
00445 
00446    //update the expected number of events for all bins
00447    this -> SetHistogramExpected(par);
00448 
00449    // define starting distribution as histogram with most likely entries
00450    for (int ibin = 0; ibin < nbins; ++ibin) {
00451 
00452      // get the number of expected events
00453      double yexp = fHistogramExpected -> GetBinContent(ibin +1);
00454 
00455      //most likely observed value = int(expected value)
00456       histogram[ibin] = int(yexp);
00457       expectation[ibin] = yexp;
00458 
00459       // calculate test statistic (= likelihood of entire histogram) for starting distr.
00460       logp += BCMath::LogPoisson(double(int(yexp)), yexp);
00461       //statistic of the observed data set
00462       logp_start += BCMath::LogPoisson(fHistogram -> GetBinContent(ibin + 1),
00463             yexp);
00464    }
00465 
00466    // loop over iterations
00467    for (int iiter = 0; iiter < nIterations; ++iiter) {
00468       // loop over bins
00469       for (int ibin = 0; ibin < nbins; ++ibin) {
00470          // random step up or down in statistics for this bin
00471          double ptest = fRandom -> Rndm() - 0.5;
00472 
00473          // increase statistics by 1
00474          if (ptest > 0) {
00475             // calculate factor of probability
00476             double r = expectation.at(ibin) / double(histogram.at(ibin) + 1);
00477 
00478             // walk, or don't (this is the Metropolis part)
00479             if (fRandom -> Rndm() < r) {
00480                histogram[ibin] = histogram.at(ibin) + 1;
00481                logp += log(r);
00482             }
00483          }
00484 
00485          // decrease statistics by 1
00486          else if (ptest <= 0 && histogram[ibin] > 0) {
00487             // calculate factor of probability
00488             double r = double(histogram.at(ibin)) / expectation.at(ibin);
00489 
00490             // walk, or don't (this is the Metropolis part)
00491             if (fRandom -> Rndm() < r) {
00492                histogram[ibin] = histogram.at(ibin) - 1;
00493                logp += log(r);
00494             }
00495          }
00496       } // end of looping over bins
00497 
00498       // increase counter
00499       if (logp < logp_start)
00500          counter_pvalue++;
00501 
00502    } // end of looping over iterations
00503 
00504    // calculate p-value
00505    pvalue = double(counter_pvalue) / double(nIterations);
00506 
00507    // no error
00508    return 1;
00509 }
00510 
00511 // ---------------------------------------------------------
00512 
00513 int BCHistogramFitter::CalculatePValueLikelihood(std::vector<double> par,
00514       double &pvalue)
00515 {
00516    // initialize test statistic -2*lambda
00517    double logLambda = 0.0;
00518 
00519    //Calculate expected counts to compare with
00520    this -> SetHistogramExpected(par);
00521 
00522    for (int ibin = 1; ibin <= fHistogram->GetNbinsX(); ++ibin) {
00523 
00524       // get the number of observed events
00525       double y = fHistogram -> GetBinContent(ibin);
00526 
00527       // get the number of expected events
00528       double yexp = fHistogramExpected -> GetBinContent(ibin);
00529 
00530       // get the contribution from this datapoint
00531       if (y == 0)
00532          logLambda += yexp;
00533       else
00534          logLambda += yexp - y + y * log(y / yexp);
00535    }
00536 
00537    // rescale
00538    logLambda *= 2.0;
00539 
00540    // compute degrees of freedom for Poisson case
00541    int nDoF = this->GetNDataPoints() - this->GetNParameters();
00542 
00543    //p value from chi^2 distribution, returns zero if logLambda < 0
00544    fPValueNDoF = TMath::Prob(logLambda, nDoF);
00545    pvalue = fPValueNDoF;
00546 
00547    // no error
00548    return 1;
00549 }
00550 
00551 int BCHistogramFitter::CalculatePValueLeastSquares(std::vector<double> par,
00552       double &pvalue, bool weightExpect)
00553 {
00554    // initialize test statistic chi^2
00555    double chi2 = 0.0;
00556 
00557    //Calculate expected counts to compare with
00558    this -> SetHistogramExpected(par);
00559 
00560    for (int ibin = 1; ibin <= fHistogram->GetNbinsX(); ++ibin) {
00561 
00562       // get the number of observed events
00563       double y = fHistogram -> GetBinContent(ibin);
00564 
00565       // get the number of expected events
00566       double yexp = fHistogramExpected -> GetBinContent(ibin);
00567 
00568       //convert 1/0.0 into 1 for weighted sum
00569       double weight;
00570       if (weightExpect)
00571          weight = (yexp > 0) ? yexp : 1.0;
00572       else
00573          weight = (y > 0) ? y : 1.0;
00574 
00575       // get the contribution from this datapoint
00576       chi2 += (y - yexp) * (y - yexp) / weight;
00577    }
00578 
00579    // compute degrees of freedom for Poisson case
00580    int nDoF = this->GetNDataPoints() - this->GetNParameters();
00581 
00582    // p value from chi^2 distribution, returns zero if logLambda < 0
00583    fPValueNDoF = TMath::Prob(chi2, nDoF);
00584    pvalue = fPValueNDoF;
00585 
00586    // no error
00587    return 1;
00588 }
00589 
00590 int BCHistogramFitter::CalculatePValueKolmogorov(std::vector<double> par, double& pvalue)
00591 {
00592    if (!fHistogramExpected || !fHistogram){
00593       BCLog::OutError("BCHistogramFitter::CalculatePValueKolmogorov: "
00594             "Please define the reference distribution by calling \n"
00595             "BCHistogramFitter::SetHistogramExpected() first!");
00596       return 0;
00597    }
00598 
00599    //update expected counts with current parameters
00600    this -> SetHistogramExpected(par);
00601 
00602    //perform the test
00603    pvalue = fHistogramExpected -> KolmogorovTest(fHistogram);
00604 
00605    fPValue = pvalue;
00606 
00607    // no error
00608    return 1;
00609 }
00610 
00611 double BCHistogramFitter::CDF(const std::vector<double>& parameters, int index,
00612       bool lower)
00613 {
00614 
00615    if (parameters.empty())
00616       BCLog::OutWarning("BCHistogramFitter::CDF: parameter vector empty!");
00617    //histogram stores underflow in bin 0, so datapoint 0 is in bin 1
00618    index++;
00619 
00620    // get the number of observed events (should be integer)
00621    double yObs = fHistogram -> GetBinContent(index);
00622 
00623    // if(double( (unsigned int)yObs)==yObs)
00624    //    BCLog::OutWarning(Form(
00625    //          "BCHistogramFitter::CDF: using bin count %f that  is not an integer!",yObs));
00626 
00627    // get function value of lower bin edge
00628    double edgeLow = fHistogram -> GetBinLowEdge(index);
00629    double edgeHigh = edgeLow + fHistogram -> GetBinWidth(index);
00630 
00631    // expectation value of this bin
00632    double yExp = 0.0;
00633 
00634    // use ROOT's TH1D::Integral method
00635    if (fFlagIntegration)
00636       yExp = fFitFunction -> Integral(edgeLow, edgeHigh, &parameters[0]);
00637 
00638    // use linear interpolation
00639    else {
00640       double dx = fHistogram -> GetBinWidth(index);
00641       double fEdgeLow = fFitFunction -> Eval(edgeLow);
00642       double fEdgeHigh = fFitFunction -> Eval(edgeHigh);
00643       yExp = fEdgeLow * dx + (fEdgeHigh - fEdgeLow) * dx / 2.;
00644    }
00645 
00646    // usually Poisson bins do not agree with fixed probability bins
00647    // so choose where it should belong
00648 
00649    if (lower) {
00650       if ((int) yObs >= 1)
00651          return ROOT::Math::poisson_cdf((unsigned int) (yObs - 1), yExp);
00652       else
00653          return 0.0;
00654    }
00655    // what if yObs as double doesn't reprepsent a whole number? exceptioN?
00656    if ((double) (unsigned int) yObs != yObs){
00657       BCLog::OutWarning(Form(
00658             "BCHistogramFitter::CDF: Histogram values should be integer!\n"
00659                " Bin %d = %f", index, yObs));
00660 
00661       //convert randomly to integer
00662       // ex. yObs = 9.785 =>
00663 //      yObs --> 10 with P = 78.5%
00664 //      yObs --> 9  with P = 21.5%
00665       double U = fRandom -> Rndm() ;
00666       double yObsLower = (unsigned int)yObs;
00667       if( U > (yObs - yObsLower) )
00668          yObs = yObsLower;
00669       else
00670          yObs = yObsLower + 1;
00671    }
00672 
00673 // BCLog::OutDebug(Form("yExp= %f yObs= %f par2=%",yExp, yObs,parameters.at(2)));
00674 // BCLog::Out(TString::Format("yExp= %f yObs= %f par2=%",yExp, yObs,parameters.at(2)));
00675 
00676    return ROOT::Math::poisson_cdf((unsigned int)yObs, yExp);
00677 }
00678 
00679 // ---------------------------------------------------------

Generated on Mon Aug 30 2010 22:14:54 for Bayesian Analysis Toolkit by  doxygen 1.7.1