/*
Copyright 2009-2012 Andreas Biegert, Christof Angermueller
This file is part of the CS-BLAST package.
The CS-BLAST package is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
The CS-BLAST package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include "globals.h"
#include "utils.h"
#ifndef CS_PSEUDOCOUNTS_H_
#define CS_PSEUDOCOUNTS_H_
namespace cs {
// Forward declarations
template
class Sequence;
template
class Profile;
template
class CountProfile;
// Calculates pseudocount admixture for profile column.
struct Admix {
public:
Admix() {}
virtual ~Admix() {}
// Returns the admixture fraction tau given the Neff.
virtual double operator() (double neff) const = 0;
// Gets the parameter value adjusted to obtain a certain target Neff.
virtual double GetTargetNeffParam() const = 0;
// Sets the parameter value adjusted to obtain a certain target Neff.
virtual void SetTargetNeffParam(double p) = 0;
};
// Calculates constant pseudocount admixture independent of number of effective
// sequences.
struct ConstantAdmix : public Admix {
ConstantAdmix(double a) : pca(a) {}
virtual ~ConstantAdmix() {}
virtual double operator() (double) const {
return pca;
}
virtual double GetTargetNeffParam() const {
return pca;
}
virtual void SetTargetNeffParam(double p) {
pca = p;
}
double pca;
};
// Calculates divergence-dependent pseudocount admixture as in CS-BLAST.
// tau = A * (B + 1) / (B + Neff)
struct CSBlastAdmix : public Admix {
CSBlastAdmix(double a, double b) : pca(a), pcb(b) {}
virtual ~CSBlastAdmix() {}
virtual double operator() (double neff) const {
return MIN(1.0, pca * (pcb + 1.0) / (pcb + neff));
}
virtual double GetTargetNeffParam() const {
return pca;
}
virtual void SetTargetNeffParam(double p) {
pca = p;
}
double pca, pcb;
};
// Calculates divergence-dependent pseudocount admixture as in HHsearch.
struct HHsearchAdmix : public Admix {
HHsearchAdmix(double a, double b, double c = 1.0) : pca(a), pcb(b), pcc(c) {}
virtual ~HHsearchAdmix() {}
virtual double operator() (double neff) const {
double rv = 0.0;
if (pcc == 1.0)
rv = MIN(1.0, pca / (1.0 + neff / pcb));
else
rv = MIN(1.0, pca / (1.0 + pow(neff / pcb, pcc)));
return rv;
}
virtual inline double GetTargetNeffParam() const {
return pca;
}
virtual inline void SetTargetNeffParam(double p) {
pca = p;
}
double pca, pcb, pcc;
};
//const double kNormalize = 1e-5; // Normalization threshold.
const double kTargetNeffParamMin = 0.0; // Minimal paramater value for adjusting to the target Neff.
const double kTargetNeffParamMax = 1.0; // Maximal parameter value for adjusting to the target Neff.
const double kTargetNeffParamInit = 0.5; // Initial parameter value for adjusting to the target Neff.
const double kTargetNeffEps = 0.01; // Convergence threshold for adjusting to the target Neff.
// An abstract base class for pseudocount factories.
template
class Pseudocounts {
public:
Pseudocounts() : target_neff_(0.0), target_neff_delta_(0.025) {}
virtual ~Pseudocounts() {}
// Adds pseudocounts to sequence using admixture and returns normalized profile.
Profile AddTo(const Sequence& seq, Admix& admix) const;
// Adds pseudocounts to sequence using admixture and returns normalized profile.
Profile AddTo(const CountProfile& cp, Admix& admix) const;
// Gets the target Neff in the resulting profile after admixing pseudocounts.
double GetTargetNeff() const {
return target_neff_;
}
// Sets the target Neff in the resulting profile after admixing pseudocounts.
void SetTargetNeff(double target_neff) {
target_neff_ = target_neff;
}
// Gets maximal deviation from the target Neff.
double GetTargetNeffDelta() const {
return target_neff_delta_;
}
// Sets maximal deviation from the target Neff.
void SetTargetNeffDelta(double target_neff_delta) {
target_neff_delta_ = target_neff_delta;
}
private:
// Adds pseudocounts to sequence and stores resulting frequencies in given profile.
virtual void AddToSequence(const Sequence& seq, Profile& p) const = 0;
// Adds pseudocounts to alignment derived profile.
virtual void AddToProfile(const CountProfile& cp, Profile& p) const = 0;
// Admixes Sequence q to Profile p.
void AdmixTo(const Sequence& q, Profile& p, const Admix& admix) const;
// Admixes CountProfile q to Profile q.
void AdmixTo(const CountProfile& q, Profile& p, const Admix& admix) const;
// Admixes q to Profile p such that the Neff in p converges to the target Neff.
template
double AdmixToTargetNeff(const T& q, Profile& p, Admix& admix) const;
private:
double target_neff_; // Target Neff in the resulting profile.
double target_neff_delta_; // Maximal deviation from the target Neff.
private:
DISALLOW_COPY_AND_ASSIGN(Pseudocounts);
}; // Pseudocounts
} // namespace cs
#endif // CS_PSEUDOCOUNTS_H_