/* Copyright 2009 Andreas Biegert This file is part of the CS-BLAST package. The CS-BLAST package is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The CS-BLAST package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef CS_SEQUENCE_INL_H_ #define CS_SEQUENCE_INL_H_ #include "assert_helpers.h" #include "sequence.h" #include "exception.h" #include "io.h" #include "as.h" namespace cs { template Sequence::Sequence(size_t length) : length_(length), seq_(length_ > 0 ? new value_type[length_] : NULL) {} template Sequence::Sequence(FILE* in) : length_(0), seq_(NULL) { Read(in); } template Sequence::Sequence(const std::string& sequence, const std::string& header) : length_(0), seq_(NULL) { Init(sequence, header); } template Sequence::Sequence(const Sequence& other) : length_(other.length_), seq_(length_ > 0 ? new value_type[length_] : NULL), header_(other.header_) { for (size_t i = 0; i < length_; ++i) seq_[i] = other[i]; } template Sequence::Sequence(const Sequence& other, size_t idx, size_t len) : length_(len), seq_(len > 0 ? new value_type[len] : NULL), header_(other.header_) { for (size_t i = 0; i < length_; ++i) seq_[i] = other[idx + i]; } template Sequence& Sequence::operator= (const Sequence& rhs) { if (this == &rhs) return *this; // handle self assignment delete[] seq_; length_ = rhs.length_; if (length_ > 0) { seq_ = new value_type[length_]; for (size_t i = 0; i < length_; ++i) seq_[i] = rhs[i]; } else { seq_ = NULL; } header_ = rhs.header_; return *this; } template void Sequence::Init(std::string sequence, std::string header) { assert(seq_ == NULL); // Init header using swap trick to trim excess capacity std::string(header).swap(header_); // Strip whitespace and newlines from sequence. sequence.erase(remove_if(sequence.begin(), sequence.end(), isspace), sequence.end()); // Strip gap characters from sequence. sequence.erase(remove_if(sequence.begin(), sequence.end(), isgap), sequence.end()); // First validate each character before copying const size_t seqlen = sequence.length(); for (size_t i = 0; i < seqlen; ++i) { if (!Abc::kValidChar[static_cast(sequence[i])]) throw Exception("Invalid character with ASCII number %i at position %zu of sequence '%s'", static_cast(sequence[i]), i, header_.c_str()); } // Copy character sequence in packed format into sequence array length_ = seqlen; seq_ = new value_type[length_]; for (size_t i = 0; i < seqlen; ++i) seq_[i] = Abc::kCharToInt[static_cast(sequence[i])]; } template void Sequence::Read(FILE* fin) { delete [] seq_; const size_t kBuffSize = MB; char buffer[kBuffSize]; int c = '\0'; std::string header; std::string sequence; // Read header while (cs::fgetline(buffer, kBuffSize, fin)) { if (!strscn(buffer)) continue; if (buffer[0] == '>') { header.append(buffer + 1); break; } else { throw Exception("Sequence header does not start with '>'!"); } } // Read sequence and stop if either a new header or delimiter is found while (cs::fgetline(buffer, kBuffSize, fin) && !(buffer[0] == '/' && buffer[1] == '/')) { if (strscn(buffer)) sequence.append(buffer); c = getc(fin); if (c == EOF) break; ungetc(c, fin); if (static_cast(c) == '>') break; } Init(sequence, header); } template std::string Sequence::ToString() const { std::string s(length_, '\0'); for (size_t i = 0; i < length_; ++i) s[i] = Abc::kIntToChar[seq_[i]]; return s; } template void Sequence::Resize(size_t newlen) { if (newlen != length_) { if (seq_ != NULL) delete[] seq_; length_ = newlen; seq_ = length_ > 0 ? new value_type[length_] : NULL; } } } // namespace cs #endif // CS_SEQUENCE_INL_H_