const char* docstring="" "realignMSA uniclust.fasta metaclust.fasta\n" " realign metaclust.fasta to uniclust.fasta according to the match\n" " state of the first sequence in uniclust.fasta\n" "\n" "realignMSA uniclust.fasta metaclust.fasta realign.fasta\n" " output the result to realign,fasta\n" ; #include #include #include #include #include #include #include using namespace std; /* parse match state of first sequence in input fasta file, * return the sequence length and * match_state_vec: index of match state positions in query [a-zA-Z] * del_state_vec: index of positions that should be deleted from homolog */ int parse_match_state_of_first_fasta( vector & match_state_vec, // [A-Z] position in [a-zA-Z] sequence vector & del_state_vec, // [-] position in [-A-Z] sequence const char *filename) { ifstream fp; fp.open(filename,ios::in); string sequence; // including [-a-zA-Z] excluding [.] string line; int i; int L=0; // protein length, including [a-zA-Z] excluding [-.] /* read [-a-zA-Z] sequence */ while (fp.good()) { getline(fp,line); if (line.length()==0) continue; if (line[0]=='>') { if (sequence.length()==0) continue; break; } for (i=0;i &match_state_vec,int L,string sequence) { string realigned_seq=""; int i; for (i=0;i& match_state_vec, const vector& del_state_vec, int L, string infile="-", string outfile="-") { ifstream fp_in; ofstream fp_out; if (infile!="-") fp_in.open(infile.c_str(),ios::in); if (outfile!="-") fp_out.open(outfile.c_str(),ofstream::out); string sequence,line; int nseqs=0; int i; while ((infile!="-")?fp_in.good():cin.good()) { if (infile!="-") getline(fp_in,line); else getline(cin,line); if (line.length()==0) continue; if (line[0]=='>') { if (sequence.length()>0) { if (outfile!="-") fp_out< match_state_vec; // based on query sequence residue index vector del_state_vec; // based on (gapped) homolog position index int L=parse_match_state_of_first_fasta( match_state_vec,del_state_vec,argv[1]); /* re-align sequence profile */ string outfile=(argc<=3)?"-":argv[3]; realignMSA(match_state_vec,del_state_vec,L,argv[2],outfile); return 0; }