/* Copyright 2009-2012 Andreas Biegert, Christof Angermueller This file is part of the CS-BLAST package. The CS-BLAST package is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The CS-BLAST package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef CS_CRF_PSEUDOCOUNTS_INL_H_ #define CS_CRF_PSEUDOCOUNTS_INL_H_ #include "crf_pseudocounts.h" namespace cs { template CrfPseudocounts::CrfPseudocounts(const Crf& crf) : crf_(crf) {} template void CrfPseudocounts::AddToSequence(const Sequence& seq, Profile& p) const { assert_eq(seq.length(), p.length()); LOG(INFO) << "Adding CRF pseudocounts to sequence ..."; const size_t center = crf_.center(); Matrix pp(seq.length(), crf_.size(), 0.0); // posterior probabilities int len = static_cast(seq.length()); // Calculate and add pseudocounts for each sequence window X_i separately #pragma omp parallel for schedule(static) for (int i = 0; i < len; ++i) { double* ppi = &pp[i][0]; // Calculate posterior probability ppi[k] of state k given sequence window // around position 'i' double max = -DBL_MAX; for (size_t k = 0; k < crf_.size(); ++k) { ppi[k] = crf_[k].bias_weight + ContextScore(crf_[k].context_weights, seq, i, center); if (ppi[k] > max) max = ppi[k]; // needed for log-sum-exp trick } // Log-sum-exp trick begins here double sum = 0.0; for (size_t k = 0; k < crf_.size(); ++k) sum += exp(ppi[k] - max); double tmp = max + log(sum); // Calculate pseudocount vector P(a|X_i) double* pc = p[i]; for (size_t a = 0; a < Abc::kSize; ++a) pc[a] = 0.0; for (size_t k = 0; k < crf_.size(); ++k) { ppi[k] = exp(ppi[k] - tmp); for(size_t a = 0; a < Abc::kSize; ++a) pc[a] += ppi[k] * crf_[k].pc[a]; } Normalize(&pc[0], Abc::kSize); } } template void CrfPseudocounts::AddToProfile(const CountProfile& cp, Profile& p) const { assert_eq(cp.counts.length(), p.length()); LOG(INFO) << "Adding library pseudocounts to profile ..."; const size_t center = crf_.center(); Matrix pp(cp.counts.length(), crf_.size(), 0.0); // posterior probs int len = static_cast(cp.length()); // Calculate and add pseudocounts for each sequence window X_i separately #pragma omp parallel for schedule(static) for (int i = 0; i < len; ++i) { double* ppi = &pp[i][0]; // Calculate posterior probability ppi[k] of state k given sequence window // around position 'i' double max = -DBL_MAX; for (size_t k = 0; k < crf_.size(); ++k) { ppi[k] = crf_[k].bias_weight + ContextScore(crf_[k].context_weights, cp, i, center); if (ppi[k] > max) max = ppi[k]; // needed for log-sum-exp trick } // Log-sum-exp trick begins here double sum = 0.0; for (size_t k = 0; k < crf_.size(); ++k) sum += exp(ppi[k] - max); double tmp = max + log(sum); // Calculate pseudocount vector P(a|X_i) double* pc = p[i]; for (size_t a = 0; a < Abc::kSize; ++a) pc[a] = 0.0; for (size_t k = 0; k < crf_.size(); ++k) { ppi[k] = exp(ppi[k] - tmp); for(size_t a = 0; a < Abc::kSize; ++a) pc[a] += ppi[k] * crf_[k].pc[a]; } Normalize(&pc[0], Abc::kSize); } } } // namespace cs #endif // CS_LIBRARY_PSEUDOCOUNTS_INL_H_