/*
Copyright 2009-2012 Andreas Biegert, Christof Angermueller
This file is part of the CS-BLAST package.
The CS-BLAST package is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
The CS-BLAST package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#ifndef CS_CRF_H_
#define CS_CRF_H_
#include "count_profile.h"
#include "context_profile-inl.h"
#include "context_library-inl.h"
#include "crf_state.h"
#include "pseudocounts-inl.h"
#include "sequence.h"
namespace cs {
// Forward declarations
template
class Crf;
// Strategy class for initializing a CRF
template
class CrfInit {
public:
CrfInit() {}
virtual ~CrfInit() {}
virtual void operator() (Crf& crf) = 0;
};
// A container class for CRF states to represent the most common
// sequence motifs in a training database of proteins/DNA sequences.
template
class Crf {
public:
typedef CrfState* StateIter;
typedef const CrfState* ConstStateIter;
// Constructs an empty CRF of given dimenions.
Crf(size_t size, size_t wlen);
// Constructs a CRF from serialized data read from input stream.
explicit Crf(FILE* fin);
// Constructs CRF with a specific init-strategy encapsulated by an
// initializer.
Crf(size_t size, size_t wlen, CrfInit& init);
// Constructs CRF using a context library.
Crf(const ContextLibrary& lib, double weight_center = 1.6, double weight_decay = 0.85)
: wlen_(lib.wlen()), states_(lib.size(), CrfState(lib.wlen())) {
for (size_t i = 0; i < lib.size(); ++i)
states_[i] = CrfState(lib[i], weight_center, weight_decay);
}
// Deallocates states in profile vector
virtual ~Crf() {}
// Returns the number of profiles in the fully assembled CRF
size_t size() const { return states_.size(); }
// Returns the number of columns in each context profile.
size_t wlen() const { return wlen_; }
// Returns total number of weights in this CRF. Note that context weights
// and pseudocount weights of letter ANY are not accounted for since these are
// held fix at zero anyway.
size_t nweights() const { return size() * (1 + (wlen() + 1) * Abc::kSize); }
// Returns index of central profile column.
size_t center() const { return (wlen_ - 1) / 2; }
// Accessor methods for state i, where i is from interval [0,size].
CrfState& operator[](size_t i) { return states_[i]; }
const CrfState& operator[](size_t i) const { return states_[i]; }
// Initializes profile at index 'idx' with given profile.
void SetState(size_t idx, const CrfState& s);
// Returns an iterator to a list of pointers to profiles.
StateIter begin() { return &states_[0]; }
// Returns an iterator pointing past the end of pointers to profiles.
StateIter end() { return &states_[0] + states_.size(); }
// Returns a const iterator over pointers of profiles.
ConstStateIter begin() const { return &states_[0]; }
// Returns a const iterator pointing past the end of pointers to profiles.
ConstStateIter end() const { return &states_[0] + states_.size(); }
// Writes the CRF in serialization format to output stream.
void Write(FILE* fout) const;
private:
// Initializes the library from serialized data read from stream.
void Read(FILE* fin);
size_t wlen_; // size of context window.
Vector< CrfState > states_; // states ordered by index.
}; // Crf
// Prints the library in human-readable format for debugging.
template
std::ostream& operator<< (std::ostream& out, const Crf& crf) {
out << "CRF" << std::endl;
out << "size:\t" << crf.size() << std::endl;
out << "wlen:\t" << crf.wlen() << std::endl;
for (size_t k = 0; k < crf.size(); ++k) out << crf[k];
return out;
}
} // namespace cs
#endif // CS_CRF_H_