/*
Copyright 2009 Andreas Biegert
This file is part of the CS-BLAST package.
The CS-BLAST package is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
The CS-BLAST package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#ifndef CS_CONTEXT_LIBRARY_H_
#define CS_CONTEXT_LIBRARY_H_
#include "context_profile.h"
#include "pseudocounts-inl.h"
namespace cs {
// Forward declarations
template
class ContextLibrary;
template
class Emission;
template
class Crf;
template
class CrfState;
// Strategy class for initializing a context library
template
class LibraryInit {
public:
LibraryInit() {}
virtual ~LibraryInit() {}
virtual void operator() (ContextLibrary& lib) const = 0;
};
// A container of K context profiles representing the most common
// sequence motifs in a training database of proteins/DNA sequences.
template
class ContextLibrary {
public:
typedef ContextProfile* ProfileIter;
typedef const ContextProfile* ConstProfileIter;
// Constructs an empty profile library of given dimenions.
ContextLibrary(size_t size, size_t wlen);
// Constructs a profile library from serialized data read from input stream.
explicit ContextLibrary(FILE* fin);
// Constructs profile library with a specific init-strategy encapsulated by an
// initializer.
ContextLibrary(size_t size, size_t wlen, const LibraryInit& init);
// Nothing to do here
virtual ~ContextLibrary() {}
// Returns the number of profiles in the fully assembled profile library
size_t size() const { return profiles_.size(); }
// Returns the number of columns in each context profile.
size_t wlen() const { return wlen_; }
// Returns index of central profile column.
size_t center() const { return (wlen_ - 1) / 2; }
// Accessor methods for state i, where i is from interval [0,size].
ContextProfile& operator[](size_t i) { return profiles_[i]; }
const ContextProfile& operator[](size_t i) const { return profiles_[i]; }
// Returns an iterator pointing to beginning of profiles.
ProfileIter begin() { return &profiles_[0]; }
// Returns an iterator pointing past the end of profiles.
ProfileIter end() { return &profiles_[0] + profiles_.size(); }
// Returns a const iterator pointing to beginning of profiles.
ConstProfileIter begin() const { return &profiles_[0]; }
// Returns a const iterator pointing past the end of profiles.
ConstProfileIter end() const { return &profiles_[0] + profiles_.size(); }
// Writes the profile library in serialization format to output stream.
void Write(FILE* fout) const;
private:
// Initializes the library from serialized data read from stream.
void Read(FILE* fin);
size_t wlen_; // size of context window.
Vector > profiles_; // context profiles ordered by index.
}; // ContextLibrary
// Prints the library in human-readable format for debugging.
template
std::ostream& operator<< (std::ostream& out, const ContextLibrary& lib) {
out << "ContextLibrary" << std::endl;
out << "size:\t" << lib.size() << std::endl;
out << "wlen:\t" << lib.wlen() << std::endl;
for (size_t k = 0; k < lib.size(); ++k) out << lib[k];
return out;
}
// Transforms probabilites in context profiles to log-space and sets 'is_log' flag.
template
void TransformToLog(ContextLibrary& lib);
// Transforms probabilites in context profiles to lin-space and sets 'is_log' flag.
template
void TransformToLin(ContextLibrary& lib);
// Calculates posterior probs for a context library and sequence window X_i
// centered at index 'i' and writes them to array 'pp'. Caller is responsible for
// making sure that 'pp' has sufficient length. Return value is log sum of all
// individual emission terms. The third template parameter specifies the central
// position of the context window. Note: For PO-HMMs this is not an ordinary size_t
// but a vertex descriptor.
template
double CalculatePosteriorProbs(const ContextLibrary& lib,
const Emission& emission,
const ContextInput& input,
CenterPos i,
double* pp);
// Translate a sequence or count profile into an abstract state sequence.
template
Sequence TranslateIntoStateSequence(const CountsInput& input,
const ContextLibrary& lib,
const Emission& emission);
} // namespace cs
#endif // CS_CONTEXT_LIBRARY_H_