Bayesian Analysis Toolkit: BCMath.cxx Source File

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2008-2010, Daniel Kollar, Kevin Kroeninger and Jing Liu
00003  * All rights reserved.
00004  *
00005  * For the licensing terms see doc/COPYING.
00006  */
00007 
00008 // ---------------------------------------------------------
00009 
00010 #include "BAT/BCMath.h"
00011 #include "BAT/BCLog.h"
00012 
00013 #include <math.h>
00014 
00015 #include <set>
00016 
00017 #include <TMath.h>
00018 #include <TF1.h>
00019 #include <TH1D.h>
00020 
00021 #include <Math/PdfFuncMathCore.h>
00022 
00023 // ---------------------------------------------------------
00024 
00025 double BCMath::LogGaus(double x, double mean, double sigma, bool norm)
00026 {
00027    // if we have a delta function, return fixed value
00028    if (sigma == 0.)
00029       return 0;
00030 
00031    // if sigma is negative use absolute value
00032    if (sigma < 0.)
00033       sigma *= -1.;
00034 
00035    double arg = (x - mean) / sigma;
00036    double result = -.5 * arg * arg;
00037 
00038    // check if we should add the normalization constant
00039    if (!norm)
00040       return result;
00041 
00042    // subtract the log of the denominator of the normalization constant
00043    // and return
00044    return result - log(sqrt(2. * M_PI) * sigma);
00045 }
00046 
00047 // ---------------------------------------------------------
00048 
00049 double BCMath::LogPoisson(double x, double par)
00050 {
00051    if (par > 899)
00052       return LogGaus(x, par, sqrt(par), true);
00053 
00054    if (x < 0)
00055       return 0;
00056 
00057    if (x == 0.)
00058       return -par;
00059 
00060    return x * log(par) - par - ApproxLogFact(x);
00061 }
00062 
00063 // ---------------------------------------------------------
00064 
00065 double BCMath::ApproxBinomial(int n, int k, double p)
00066 {
00067    return exp(LogApproxBinomial(n, k, p));
00068 }
00069 
00070 // ---------------------------------------------------------
00071 
00072 double BCMath::LogApproxBinomial(int n, int k, double p)
00073 {
00074    // check p
00075    if (p == 0)
00076       return -1e99;
00077 
00078    else if (p == 1)
00079       return 0;
00080 
00081    // switch parameters if n < k
00082    if (n < k) {
00083       int a = n;
00084       n = k;
00085       k = a;
00086    }
00087 
00088    return LogBinomFactor(n, k) + (double) k * log(p) + (double) (n - k) * log(1. - p);
00089 }
00090 
00091 // ---------------------------------------------------------
00092 
00093 double BCMath::LogBinomFactor(int n, int k)
00094 {
00095    // switch parameters if n < k
00096    if (n < k) {
00097       int a = n;
00098       n = k;
00099       k = a;
00100    }
00101 
00102    if (n == k || k == 0)
00103       return 0.;
00104    if (k == 1 || k == n - 1)
00105       return log((double) n);
00106 
00107    // if no approximation needed
00108    if (n < BCMATH_NFACT_ALIMIT || (n - k) < 5)
00109       return LogNoverK(n, k);
00110 
00111    // calculate final log(n over k) using approximations if necessary
00112    return ApproxLogFact((double)n) - ApproxLogFact((double)k) - ApproxLogFact((double)(n - k));
00113 }
00114 
00115 // ---------------------------------------------------------
00116 
00117 double BCMath::ApproxLogFact(double x)
00118 {
00119    if (x > BCMATH_NFACT_ALIMIT)
00120       return x * log(x) - x + log(x * (1. + 4. * x * (1. + 2. * x))) / 6. + log(M_PI) / 2.;
00121 
00122    else
00123       return LogFact((int) x);
00124 }
00125 
00126 // ---------------------------------------------------------
00127 double BCMath::LogFact(int n)
00128 {
00129    double ln = 0.;
00130 
00131    for (int i = 1; i <= n; i++)
00132       ln += log((double) i);
00133 
00134    return ln;
00135 }
00136 
00137 // ---------------------------------------------------------
00138 
00139 double BCMath::LogNoverK(int n, int k)
00140 {
00141    // switch parameters if n < k
00142    if (n < k) {
00143       int a = n;
00144       n = k;
00145       k = a;
00146    }
00147 
00148    if (n == k || k == 0)
00149       return 0.;
00150    if (k == 1 || k == n - 1)
00151       return log((double) n);
00152 
00153    int lmax = Max(k, n - k);
00154    int lmin = Min(k, n - k);
00155 
00156    double ln = 0.;
00157 
00158    for (int i = n; i > lmax; i--)
00159       ln += log((double) i);
00160    ln -= LogFact(lmin);
00161 
00162    return ln;
00163 }
00164 
00165 // ---------------------------------------------------------
00166 
00167 int BCMath::Nint(double x)
00168 {
00169    // round to integer
00170    int i;
00171 
00172    if (x >= 0) {
00173       i = (int) (x + .5);
00174       if (x + .5 == (double) i && i&1)
00175          i--;
00176    }
00177    else {
00178       i = int(x - 0.5);
00179       if (x - 0.5 == double(i) && i&1)
00180          i++;
00181    }
00182 
00183    return i;
00184 }
00185 
00186 // ---------------------------------------------------------
00187 
00188 double BCMath::rms(int n, const double *a)
00189 {
00190    if (n <= 0 || !a)
00191       return 0;
00192 
00193    double sum = 0., sum2 = 0.;
00194 
00195    for (int i = 0; i < n; i++) {
00196       sum += a[i];
00197       sum2 += a[i] * a[i];
00198    }
00199 
00200    double n1 = 1. / (double) n;
00201    double mean = sum * n1;
00202 
00203    return sqrt(fabs(sum2 * n1 - mean * mean));
00204 }
00205 
00206 // ---------------------------------------------------------
00207 
00208 double BCMath::LogBreitWignerNonRel(double x, double mean, double Gamma, bool norm)
00209 {
00210    double bw = log(Gamma) - log((x - mean) * (x - mean) + Gamma*Gamma / 4.);
00211 
00212    if (norm)
00213       bw -= log(2. * M_PI);
00214 
00215    return bw;
00216 }
00217 
00218 // ---------------------------------------------------------
00219 
00220 double BCMath::LogBreitWignerRel(double x, double mean, double Gamma)
00221 {
00222    return -log((x*x - mean*mean) * (x*x - mean*mean) + mean*mean * Gamma*Gamma);
00223 }
00224 
00225 // ---------------------------------------------------------
00226 
00227 double BCMath::LogChi2(double x, int n)
00228 {
00229    if (x < 0) {
00230       BCLog::OutWarning("BCMath::LogChi2 : parameter cannot be negative!");
00231       return -1e99;
00232    }
00233 
00234    if (x == 0 && n == 1) {
00235       BCLog::OutWarning("BCMath::LogChi2 : returned value is infinity!");
00236       return 1e99;
00237    }
00238 
00239    double nOver2 = ((double) n) / 2.;
00240 
00241    return (nOver2 - 1.) * log(x) - x / 2. - nOver2 * log(2) - log(TMath::Gamma(nOver2));
00242 }
00243 
00244 // ---------------------------------------------------------
00245 double BCMath::LogVoigtian(double x, double sigma, double gamma)
00246 {
00247    if (sigma <= 0 || gamma <= 0) {
00248       BCLog::OutWarning("BCMath::LogVoigtian : widths are negative or zero!");
00249       return -1e99;
00250    }
00251 
00252    return log(TMath::Voigt(x, sigma, gamma));
00253 }
00254 
00255 // ---------------------------------------------------------
00256 double BCMath::SplitGaussian(double* x, double* par)
00257 {
00258    double mean = par[0]; 
00259    double sigmadown = par[1]; 
00260    double sigmaup = par[2];
00261 
00262    double sigma = sigmadown;
00263 
00264    if (x[0] > mean)
00265       sigma = sigmaup; 
00266    
00267    return 1.0/sqrt(2.0*TMath::Pi())/sigma * exp(- (x[0]-mean)*(x[0]-mean)/2./sigma/sigma);
00268 }
00269 
00270 // ---------------------------------------------------------
00271 // wrapper with signature to construct a TF1
00272 double chi2(double *x, double *par)
00273 {
00274    return ROOT::Math::chisquared_pdf(x[0], par[0]);
00275 }
00276 
00277 // ---------------------------------------------------------
00278 void BCMath::RandomChi2(std::vector<double> &randoms, int K)
00279 {
00280    // fixed upper cutoff to 1000, might be too small
00281    TF1 *f = new TF1("chi2", chi2, 0.0, 1000, 1);
00282    f->SetParameter(0, K);
00283    f->SetNpx(500);
00284    // uses inverse-transform method
00285    // fortunately CDF only built once
00286    for (unsigned int i = 0; i < randoms.size(); i++)
00287       randoms.at(i) = f->GetRandom();
00288 
00289    delete f;
00290 }
00291 
00292 // ---------------------------------------------------------
00293 TH1D * BCMath::ECDF(const std::vector<double> & data)
00294 {
00295    int N = data.size();
00296 
00297    std::set<double> uniqueObservations;
00298    // sort and filter out multiple instances
00299    for (int i = 0; i < N; ++i)
00300       uniqueObservations.insert(data[i]);
00301 
00302    // extract lower edges for CDF histogram
00303    int nUnique = uniqueObservations.size();
00304    double lowerEdges[nUnique];
00305 
00306    // traverse the set
00307    std::set<double>::iterator iter;
00308    int counter = 0;
00309    for (iter = uniqueObservations.begin(); iter != uniqueObservations.end(); ++iter) {
00310       lowerEdges[counter] = *iter;
00311       counter++;
00312    }
00313 
00314    // create histogram where
00315    // lower edge of first bin = min. data
00316    // upper edge of last bin = max. data
00317    TH1D * ECDF = new TH1D("ECDF", "Empirical cumulative distribution function", nUnique - 1, lowerEdges);
00318 
00319    // fill the data in to find multiplicities
00320    for (int i = 0; i < N; ++i)
00321       ECDF -> Fill(data[i]);
00322 
00323    // just in case, empty the underflow
00324    ECDF -> SetBinContent(0, 0.0);
00325 
00326    // construct the ecdf
00327    for (int nBin = 1; nBin <= ECDF->GetNbinsX(); nBin++) {
00328       double previousBin = ECDF -> GetBinContent(nBin - 1);
00329       // BCLog::OutDebug(Form("n_%d = %.2f", nBin, ECDF -> GetBinContent(nBin) ));
00330       // BCLog::OutDebug(Form("previous_%d = %.2f", nBin, previousBin));
00331       double thisBin = ECDF -> GetBinContent(nBin) / double(N);
00332       ECDF -> SetBinContent(nBin, thisBin + previousBin);
00333 
00334       // the uncertainty is only correctly estimated in the model
00335       ECDF -> SetBinError(nBin, 0.0);
00336    }
00337 
00338    // set the endpoint to 1, so all larger values are at CDF=1
00339    ECDF -> SetBinContent(ECDF->GetNbinsX() + 1, 1.);
00340 
00341    // adjust for nice plotting
00342    ECDF -> SetMinimum(0.);
00343    ECDF -> SetMaximum(1.);
00344 
00345    return ECDF;
00346 }
00347 
00348 // ---------------------------------------------------------
00349 
00350 std::vector<int> BCMath::longestRuns(const std::vector<bool> &bitStream)
00351 {
00352    // initialize counter variables
00353    unsigned int maxRunAbove, maxRunBelow, currRun;
00354    maxRunAbove = 0;
00355    maxRunBelow = 0;
00356    currRun = 1;
00357    // set both entries to zero
00358    std::vector<int> runs(2, 0);
00359 
00360    if (bitStream.empty())
00361       return runs;
00362 
00363    // flag about kind of the currently considered run
00364    bool aboveRun = bitStream.at(0);
00365 
00366    // start at second variable
00367    std::vector<bool>::const_iterator iter = bitStream.begin();
00368    ++iter;
00369    while (iter != bitStream.end()) {
00370 
00371       // increase counter if run continues
00372       if (*(iter - 1) == *iter)
00373          currRun++;
00374       else {
00375          // compare terminated run to maximum
00376          if (aboveRun)
00377             maxRunAbove = TMath::Max(maxRunAbove, currRun);
00378          else
00379             maxRunBelow = TMath::Max(maxRunBelow, currRun);
00380          // set flag to run of opposite kind
00381          aboveRun = !aboveRun;
00382          // restart at length one
00383          currRun = 1;
00384       }
00385       // move to next bit
00386       ++iter;
00387    }
00388 
00389    // check last run
00390    if (aboveRun)
00391       maxRunAbove = TMath::Max(maxRunAbove, currRun);
00392    else
00393       maxRunBelow = TMath::Max(maxRunBelow, currRun);
00394 
00395    // save the longest runs
00396    runs.at(0) = maxRunBelow;
00397    runs.at(1) = maxRunAbove;
00398 
00399    return runs;
00400 }
00401 // ---------------------------------------------------------
00402 
00403 std::vector<double> BCMath::longestRunsChi2(
00404       const std::vector<double>& yMeasured,
00405       const std::vector<double>& yExpected, const std::vector<double>& sigma)
00406 {
00407    //initialize counter variables
00408    double maxRunAbove, maxRunBelow, currRun;
00409    maxRunAbove = 0;
00410    maxRunBelow = 0;
00411    currRun = 0;
00412    //set both entries to zero
00413    std::vector<double> runs(2, 0);
00414 
00415    //check input size
00416    if (yMeasured.size() != yExpected.size() || yMeasured.size() != sigma.size()
00417          || yExpected.size() != sigma.size()) {
00418       //should throw exception
00419       return runs;
00420    }
00421 
00422    //exclude zero uncertainty
00423    //...
00424 
00425     int N = yMeasured.size();
00426     if ( N<=0)
00427        return runs;
00428    //BCLog::OutDebug(Form("N = %d", N));
00429 
00430 
00431    //flag about kind of the currently considered run
00432    double residue = (yMeasured.at(0) - yExpected.at(0)) / sigma.at(0);
00433    bool aboveRun = residue >= 0 ? true : false;
00434    currRun = residue * residue;
00435 
00436    //start at second variable
00437    for (int i = 1; i < N; i++) {
00438       residue = (yMeasured.at(i) - yExpected.at(i)) / sigma.at(i);
00439       //run continues
00440       if ((residue >= 0) == aboveRun) {
00441          currRun += residue * residue;
00442       } else {
00443          //compare terminated run to maximum
00444          if (aboveRun)
00445             maxRunAbove = TMath::Max(maxRunAbove, currRun);
00446          else
00447             maxRunBelow = TMath::Max(maxRunBelow, currRun);
00448          //set flag to run of opposite kind
00449          aboveRun = !aboveRun;
00450          //restart at current residual
00451          currRun = residue * residue;
00452       }
00453       //BCLog::OutDebug(Form("maxRunBelow = %g", maxRunBelow));
00454       //BCLog::OutDebug(Form("maxRunAbove = %g", maxRunAbove));
00455       //BCLog::OutDebug(Form("currRun = %g", currRun));
00456 
00457    }
00458 
00459    //BCLog::OutDebug(Form("maxRunBelow = %g", maxRunBelow));
00460    //BCLog::OutDebug(Form("maxRunAbove = %g", maxRunAbove));
00461    //BCLog::OutDebug(Form("currRun = %g", currRun));
00462 
00463    //check last run
00464    if (aboveRun)
00465       maxRunAbove = TMath::Max(maxRunAbove, currRun);
00466    else
00467       maxRunBelow = TMath::Max(maxRunBelow, currRun);
00468 
00469    //BCLog::OutDebug(Form("maxRunBelow = %g", maxRunBelow));
00470    //BCLog::OutDebug(Form("maxRunAbove = %g", maxRunAbove));
00471 
00472    //save the longest runs
00473    runs.at(0) = maxRunBelow;
00474    runs.at(1) = maxRunAbove;
00475 
00476    return runs;
00477 }
00478 // ---------------------------------------------------------
00479 double BCMath::longestRunFrequency(unsigned longestObserved, unsigned int nTrials)
00480 {
00481    // can't observe run that's longer than the whole sequence
00482    if (longestObserved >= nTrials)
00483       return 0.;
00484 
00485    // return value
00486    double prob = 0.;
00487 
00488    // short cuts
00489    typedef unsigned int uint;
00490    uint Lobs = longestObserved;
00491    uint n = nTrials;
00492 
00493    // the result of the inner loop is the cond. P given r successes
00494    double conditionalProb;
00495 
00496    // first method: use the gamma function for the factorials: bit slower and more inaccurate
00497    // in fact may return NaN for n >= 1000
00498 
00499    //   double Gup, Gdown1, Gdown2, Gdown3;
00500    //
00501    //   for (uint r = 0; r <= n; r++) {
00502    //      conditionalProb = 0.0;
00503    //      for (uint i = 1; ( i <= n-r+1) && (i <= uint(r / double(Lobs + 1)) ); i++) {
00504    //
00505    //         Gup =  TMath::Gamma(1 - i * (Lobs + 1) + n);
00506    //         Gdown1 = TMath::Gamma(1 + i);
00507    //         Gdown2 = TMath::Gamma(2 - i + n - r);
00508    //         Gdown3 = TMath::Gamma(1 - i * (Lobs + 1) + r);
00509    //
00510    //         //consider the sign of contribution
00511    //         Gup = i%2 ? Gup : - Gup;
00512    //
00513    //         conditionalProb += Gup/(Gdown1 * Gdown2 * Gdown3);
00514    //
00515    ////         printf("G(%d,%d)= %.2f %.2f %.2f %.2f",r,i,Gup, Gdown1, Gdown2, Gdown3);
00516    //
00517    ////         prob += TMath::Gamma(1 - i * (Lobs + 1) + n)
00518    ////               / ( TMath::Gamma(1 + i) * TMath::Gamma(2 - i + n - r)
00519    ////                     * TMath::Gamma(1 - i * (Lobs + 1) + r) );
00520    ////         printf("prob inside (i=%d) = %.2f",i, prob);
00521    //      }
00522    //      prob += (1 + n - r)*conditionalProb;
00523    ////      printf("prob outside (r=%d) = %.2f",r, prob);
00524    //   }
00525 
00526    // alternative using log factorial approximations, is faster and more accurate
00527 
00528    double tempLog = 0;
00529    for (uint r = 0; r <= n; r++) {
00530       conditionalProb = 0.0;
00531 
00532       for (uint i = 1; (i <= n - r + 1) && (i <= uint(r / double(Lobs + 1))); i++) {
00533          tempLog = ApproxLogFact(n - i * (Lobs + 1)) - ApproxLogFact(i)
00534                - ApproxLogFact(n - r - i + 1)
00535                - ApproxLogFact(r - i * (Lobs + 1));
00536          if (i % 2)
00537             conditionalProb += exp(tempLog);
00538          else
00539             conditionalProb -= exp(tempLog);
00540          //         printf("tempLog inside = %.2f",prob);
00541       }
00542       //      printf("tempLog outside = %.2f",prob);
00543       prob += (1 + n - r) * conditionalProb;
00544    }
00545 
00546    //Bernoulli probability of each permutation
00547    prob *= TMath::Power(2., -double(n));
00548 
00549    return prob;
00550 }
00551