/* Copyright 2009 Andreas Biegert This file is part of the CS-BLAST package. The CS-BLAST package is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The CS-BLAST package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef CS_CONTEXT_LIBRARY_INL_H_ #define CS_CONTEXT_LIBRARY_INL_H_ #include "context_profile-inl.h" #include "pseudocounts-inl.h" #include "context_library.h" #include "ran.h" namespace cs { template ContextLibrary::ContextLibrary(size_t size, size_t wlen) : wlen_(wlen), profiles_(size, ContextProfile(wlen)) {} template ContextLibrary::ContextLibrary(FILE* fin) : wlen_(0), profiles_() { Read(fin); } template ContextLibrary::ContextLibrary(size_t size, size_t wlen, const LibraryInit& init) : wlen_(wlen), profiles_(size, ContextProfile(wlen)) { init(*this); } template void ContextLibrary::Read(FILE* fin) { // Parse and check header information if (!StreamStartsWith(fin, "ContextLibrary")) throw Exception("Stream does not start with class id 'ContextLibrary'!"); char buffer[KB]; size_t size = 0; if (cs::fgetline(buffer, KB, fin)) size = ReadInt(buffer, "SIZE", "Unable to parse context library 'SIZE'!"); if (cs::fgetline(buffer, KB, fin)) wlen_ = ReadInt(buffer, "LENG", "Unable to parse context library 'LENG'!"); // Read context profiles profiles_.Resize(size); size_t k = 0; for (; k < size && !feof(fin); ++k) profiles_[k] = ContextProfile(fin); if (k != size) throw Exception("Serialized context library should have %i profiles but" "actually has %i!", size, k); } template void ContextLibrary::Write(FILE* fout) const { // Write header fputs("ContextLibrary\n", fout); fprintf(fout, "SIZE\t%d\n", static_cast(size())); fprintf(fout, "LENG\t%d\n", static_cast(wlen())); // Serialize profiles for (size_t k = 0; k < profiles_.size(); ++k) profiles_[k].Write(fout); } // Transforms probabilites in context profiles to log-space and sets 'is_log' flag. template inline void TransformToLog(ContextLibrary& lib) { for (size_t k = 0; k < lib.size(); ++k) TransformToLog(lib[k]); } // Transforms probabilites in context profiles to lin-space and sets 'is_log' flag. template inline void TransformToLin(ContextLibrary& lib) { for (size_t k = 0; k < lib.size(); ++k) TransformToLin(lib[k]); } template double CalculatePosteriorProbs(const ContextLibrary& lib, const Emission& emission, const CountsInput& input, CenterPos i, double* pp) { // Calculate posterior probability ppi[k] of state k given context window // around position 'i' double max = -FLT_MAX; for (size_t k = 0; k < lib.size(); ++k) { assert(lib[k].is_log); pp[k] = lib[k].prior + emission(lib[k].probs, input, i); if (pp[k] > max) max = pp[k]; // needed for log-sum-exp trick } // Log-sum-exp trick begins here double sum = 0.0; for (size_t k = 0; k < lib.size(); ++k) sum += exp(pp[k] - max); double tmp = max + log(sum); for (size_t k = 0; k < lib.size(); ++k) pp[k] = exp(pp[k] - tmp); return tmp; } template Sequence TranslateIntoStateSequence(const CountsInput& input, const ContextLibrary& lib, const Emission& emission) { Sequence as_seq(input.length()); Vector pp(AS::kSize); for (size_t i = 0; i < input.length(); ++i) { // Calculate posterior probabilities given sequence window around 'i' CalculatePosteriorProbs(lib, emission, input, i, &pp[0]); // Find state with maximal posterior prob and assign it to as_seq[i] size_t k_max = 0; double p_max = pp[0]; for (size_t k = 1; k < AS::kSize; ++k) { if (pp[k] > p_max) { k_max = k; p_max = pp[k]; } } as_seq[i] = k_max; } return as_seq; } } // namespace cs #endif // CS_CONTEXT_LIBRARY_INL_H_