// Copyright 2009, Andreas Biegert #ifndef CS_COUNT_PROFILE_H_ #define CS_COUNT_PROFILE_H_ #include "alignment.h" #include "profile.h" #include "sequence.h" #include "substitution_matrix-inl.h" namespace cs { template struct CountProfile { // Constructs a count profile with 'len' columns explicit CountProfile(size_t len = 0) : counts(len, 0.0), neff(len, 1.0) {} // Construction from serialized profile read from input stream. explicit CountProfile(FILE* fin) { Read(fin); } // Constructs a profile representation of given sequence. explicit CountProfile(const Sequence& seq) : counts(seq.length(), 0.0f), neff(seq.length(), 1.0f) { for (size_t i = 0; i < seq.length(); ++i) counts[i][seq[i]] = 1.0f; } // Constructs a count profile from a profile and sets effective number // of sequences to one. Usefull for constructing a count profile from // a pseudocount factory. CountProfile(const Profile& p) : counts(p), neff(p.length(), 1.0) {} // Construction from alignment with specified sequence weighting method CountProfile(const Alignment& ali, bool pos_weights = true, bool neff_sum_pairs = false); // Creates profile by copying subprofile starting at index 'idx' for 'len' cols CountProfile(const CountProfile& other, size_t idx, size_t len) : counts(len), neff(len) { for (size_t i = 0; i < len; ++i) { neff[i] = other.neff[idx + i]; for (size_t a = 0; a < Abc::kSizeAny; ++a) counts[i][a] = other.counts[idx + i][a]; } } // Initializes count profile with a serialized profile read from stream. void Read(FILE* fin); // Writes serialized count profile to stream. void Write(FILE* fout) const; // Returns number of columns. size_t length() const { return counts.length(); } std::string name; // optional name descriptor Profile counts; // absolute counts of alphabet letters Vector neff; // effective number of sequences at column i }; // Returns the average number of effective sequences in given count profile template double Neff(const CountProfile& cp); // Builds and returns a consensus sequence of the given count profile by // calculating at each position the alphabet character that deviates most strongly // from its background probability. template std::string ConsensusSequence(const CountProfile& cp, const SubstitutionMatrix& sm); // Builds and returns a conservation string for given count profile that // indicates conservation of residues by uppercase, lowercase, and '~' template std::string ConservationSequence(const CountProfile& cp, const SubstitutionMatrix& sm); // Prints counts and neff in human-readable format for debugging. template std::ostream& operator<< (std::ostream& out, const CountProfile& cp); } // namespace cs #endif // CS_COUNT_PROFILE_H_