/* Copyright 2009 Andreas Biegert This file is part of the CS-BLAST package. The CS-BLAST package is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The CS-BLAST package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef CS_PSSM_H_ #define CS_PSSM_H_ #include "profile-inl.h" #include "sequence-inl.h" namespace cs { // Container class for a PSI-BLAST position specific scoring matrix (PSSM) and its // associated query sequence. struct Pssm { // Constructor to create a PSSM from query string and a sequence profile. Pssm(const Sequence& seq, const Profile& prof) : query(seq), profile(prof) {} // Constructor to create a PSSM from a PSI-BLAST checkpoint file. Pssm(FILE* fin); // Overwrites existing PSSM with PSSM from PSI-BLAST checkpoint. void Read(FILE* fin) { size_t count = 0; int query_length = 0; count += fread(reinterpret_cast(&query_length), kIntSize, 1, fin); assert(count == 1); assert(query_length > 0); query.Resize(query_length); profile.Resize(query_length); for (int i = 0; i < query_length; ++i) { char c; count += fread(&c, kCharSize, 1, fin); assert(AA::kValidChar[c]); query[i] = AA::kCharToInt[static_cast(c)]; } for(int i = 0; i < query_length; ++i) { for(size_t a = 0; a < AA::kSize; ++a) { double p; count += fread(reinterpret_cast(&p), kDoubleSize, 1, fin); profile[i][a] = p; } } assert_eq(1 + query_length + query_length * AA::kSize, count); } // Writes PSSM in PSI-BLAST binary checkpoint format to stream. void Write(FILE* fout) const { LOG(INFO) << "Writing PSI-BLAST checkpoint ..."; LOG(INFO) << query; LOG(INFO) << profile; int query_length = query.length(); size_t count = 0; count += fwrite(reinterpret_cast(&query_length), kIntSize, 1, fout); for(int i = 0; i < query_length; ++i) { char c = query.chr(i); count += fwrite(&c, kCharSize, 1, fout); } for(int i = 0; i < query_length; ++i) { for(size_t a = 0; a < AA::kSize; ++a) { double p = profile[i][a]; count += fwrite(reinterpret_cast(&p), kDoubleSize, 1, fout); } } assert_eq(1 + query_length + query_length * AA::kSize, count); } // Shifts PSIBLAST subsitution scores log_2(P(y|x)/P(y)) by muliplying the // conditional probabilities P(y|x) by log_2(bits). void Shift(double bits) { const double kFactor = pow(2, bits); for (size_t i = 0; i < profile.length(); ++i) { for (size_t a = 0; a < AA::kSize; ++a) profile[i][a] *= kFactor; } } // Query sequence with which search was started Sequence query; // Evolving sequence profile Profile profile; }; // class Pssm } // namespace cs #endif // CS_PSSM_H_