const char* docstring="" "trimMSA symfrac seq.aln pos_map trim.aln\n" " remove positions with > symfrac gaps from seq.aln\n" " save trimmed aligment to trim.aln. save position mapping between\n" " trim.aln and seq.aln to pos_map\n" "\n" "trimMSA symfrac seq.aln pos_map trim.aln 1\n" " apart from first sequence, set every position with > symfrac gaps\n" " to lower case\n" ; #include #include #include #include #include #include using namespace std; /* parse PSICOV format MSA */ int parse_aln(const char *filename, vector & aln) { ifstream fp; fp.open(filename,ios::in); string sequence; while (fp.good()) { getline(fp,sequence); if (sequence.length()==0) continue; aln.push_back(sequence); } return aln.size(); } /* calculate fraction of gaps for each position */ int calGapFrac(vector & aln, vector & gapfrac_vec) { int nseqs=aln.size(); int L=aln[0].length(); gapfrac_vec.assign(L,0.); int n,i; for (n=0;n symfrac fraction of gaps */ int trimMSA(vector & aln, vector & gapfrac_vec, vector & pos_map,const float symfrac,const int output_mode=0) { int L=calGapFrac(aln,gapfrac_vec); int nseqs=aln.size(); int i,j,n; vector nonpos_map; for (i=0,j=-1;i aln; vector gapfrac_vec; vector pos_map; int output_mode=0; if (argc>5) output_mode=atoi(argv[5]); int nseqs=parse_aln(argv[2],aln); trimMSA(aln,gapfrac_vec,pos_map,symfrac,output_mode); ofstream fp; fp.open(argv[3],ios::out); for (int j=0;j