// Copyright 2009, Andreas Biegert #ifndef CS_SEQUENCE_INL_H_ #define CS_SEQUENCE_INL_H_ #include "sequence.h" namespace cs { template Sequence::Sequence(size_t length) : length_(length), seq_(length_ > 0 ? new value_type[length_] : NULL) {} template Sequence::Sequence(FILE* in) : length_(0), seq_(NULL) { Read(in); } template Sequence::Sequence(const std::string& sequence, const std::string& header) : length_(0), seq_(NULL) { Init(sequence, header); } template Sequence::Sequence(const Sequence& other) : length_(other.length_), seq_(length_ > 0 ? new value_type[length_] : NULL), header_(other.header_) { for (size_t i = 0; i < length_; ++i) seq_[i] = other[i]; } template Sequence::Sequence(const Sequence& other, size_t idx, size_t len) : length_(len), seq_(len > 0 ? new value_type[len] : NULL), header_(other.header_) { for (size_t i = 0; i < length_; ++i) seq_[i] = other[idx + i]; } template Sequence& Sequence::operator= (const Sequence& rhs) { if (this == &rhs) return *this; // handle self assignment delete[] seq_; length_ = rhs.length_; if (length_ > 0) { seq_ = new value_type[length_]; for (size_t i = 0; i < length_; ++i) seq_[i] = rhs[i]; } else { seq_ = NULL; } header_ = rhs.header_; return *this; } template void Sequence::Init(std::string sequence, std::string header) { assert(seq_ == NULL); // Init header using swap trick to trim excess capacity std::string(header).swap(header_); // Strip whitespace and newlines from sequence. sequence.erase(remove_if(sequence.begin(), sequence.end(), isspace), sequence.end()); // Strip gap characters from sequence. sequence.erase(remove_if(sequence.begin(), sequence.end(), isgap), sequence.end()); // First validate each character before copying const size_t seqlen = sequence.length(); for (size_t i = 0; i < seqlen; ++i) { if (!Abc::kValidChar[static_cast(sequence[i])]) throw Exception("Invalid character with ASCII number %i at position %zu of sequence '%s'", static_cast(sequence[i]), i, header_.c_str()); } // Copy character sequence in packed format into sequence array length_ = seqlen; seq_ = new value_type[length_]; for (size_t i = 0; i < seqlen; ++i) seq_[i] = Abc::kCharToInt[static_cast(sequence[i])]; } template void Sequence::Read(FILE* fin) { delete [] seq_; const size_t kBuffSize = MB; char buffer[kBuffSize]; int c = '\0'; std::string header; std::string sequence; // Read header while (fgetline(buffer, kBuffSize, fin)) { if (!strscn(buffer)) continue; if (buffer[0] == '>') { header.append(buffer + 1); break; } else { throw Exception("Sequence header does not start with '>'!"); } } // Read sequence and stop if either a new header or delimiter is found while (fgetline(buffer, kBuffSize, fin) && buffer[0] != '/' && buffer[1] != '/') { if (strscn(buffer)) sequence.append(buffer); c = getc(fin); if (c == EOF) break; ungetc(c, fin); if (static_cast(c) == '>') break; } Init(sequence, header); } template void Sequence::Write(FILE* fout, size_t width) const { fprintf(fout, ">%s\n", header_.c_str()); for (size_t i = 0; i < length(); ++i) { // fprintf(stdout, "%i\n", chr(i)); fputc(chr(i), fout); if ((i+1) % width == 0) fputc('\n', fout); } if (length() % width != 0) fputc('\n', fout); } template std::string Sequence::ToString() const { std::string s(length_, '\0'); for (size_t i = 0; i < length_; ++i) s[i] = Abc::kIntToChar[seq_[i]]; return s; } template void Sequence::Resize(size_t newlen) { if (newlen != length_) { if (seq_ != NULL) delete[] seq_; length_ = newlen; seq_ = length_ > 0 ? new value_type[length_] : NULL; } } } // namespace cs #endif // CS_SEQUENCE_INL_H_