#include #include #include #include #include "molecule.h" //#include "/afs/pdc.kth.se/home/b/bjornw/bjorn/modules/numrecipies/nr.h" //#include "/afs/pdc.kth.se/home/b/bjornw/bjorn/modules/numrecipies/nrutil.h" //#include "/afs/pdc.kth.se/home/b/bjornw/bjorn/source/c/nets/nets.h" #include "nr.h" #include "nrutil.h" #include "nets.h" /* changed input to (molecule *m,char atomflag) atomflag == a -> read all atoms (except H) atomflag == c -> CA atoms atomflag == b -> backbone CA,C,N,O atoms */ int read_molecules_dynamic(dyn_molecule *m,char atomflag,int *ignore_res) /* Reads in molecules to be superimposed */ { int i,j,k,atoms,residues; /* Counter variables */ char buff[3000]; /* Input string */ char temp[3000]; /* Throwaway string */ char line_flag[3000]; /* PDB file line mode */ char desc_flag[3000]; char residue[4]; /* PDB atom info for output */ char name[4]; int resnum; char resname[9]; char old_resname[9]="noname"; char alt_loc_check[2]=" "; char x_temp[11]; char y_temp[11]; char z_temp[11]; //char number[8]; int number; char chain[2]; char inscode[2]=" "; char alt_loc[2]=" "; double x,y,z; /* Temporary coordinates values */ double bfactor=9.99; FILE *fp; char temp_number[11]; char temp_resnum[11]; int ss_flag=0; i=0; /* It is only one molecule to be read this was done for the moment instead of changing all "i" to 0 */ //for (i=0;i<2;i++) //{ //#ifdef NOZLIB*/ //fp=fopen(m[i].filename,"r"); /* Does file exist? */ //#else //fp=gzopen(m[i].filename,"r"); /* Does file exist? */ //#endif //printf("%s\n",m[0].filename); //strcpy(m[0].method,"undef"); m[0].rank=-1; m[0].score=-9999; fp=fopen(m[0].filename,"r"); /* Does file exist? */ m[0].sequence='\0'; m[0].ss='\0'; m[0].method='\0'; m[0].atm='\0'; m[0].CA_ref='\0'; m[0].residues=0; m[0].atoms=0; if (fp!=NULL) /* If yes, read in coordinates */ { /* Initialize things */ //m[0].xcen=m[0].ycen=m[0].zcen=0; atoms=0; residues=0; //#ifdef NOZLIB //while(fgets(buff,255,fp)!=NULL) //#else //while(gzgets(fp,buff,255)!=NULL) //#endif while(fgets(buff,1000,fp)!=NULL) { //sscanf(buff,"%-6s %4d %-3s%1s%3s %1s%5s %7.3lf %7.3lf %7.3lf",line_flag,&number,name,residue,chain,&resnum,inscode,&x,&y,&z); //sscanf(buff,"%s %d %s %s %s",line_flag,&number,name,residue,resname); strcpy(line_flag,"undef"); strcpy(desc_flag,"undef"); strcpy(temp,"undef"); sscanf(buff,"%s",line_flag); // printf("%s %s %s",m[0].filename,line_flag,&buff); if(strcmp("REMARK",line_flag)==0) { sscanf(buff,"%s %s %s",line_flag,desc_flag,temp); if(strcmp("SS",desc_flag)==0) { m[0].ss=malloc(sizeof(char)*(strlen(temp)+1)); strcpy(m[0].ss,temp); ss_flag=1; //printf("%s\n",m[0].ss); } if(strcmp("METHOD",desc_flag)==0) { m[0].method=malloc(sizeof(char)*(strlen(temp)+1)); strcpy(m[0].method,temp); //printf("%s\n",m[0].method); } if(strcmp("SCORE",desc_flag)==0) { m[0].score=atof(temp); //strcpy(m[0].method,temp); //printf("%lf\n",m[0].score); } } if(strcmp("MODEL",line_flag)==0) { sscanf(buff,"%s %s",line_flag,temp); m[0].rank=atoi(temp); } if(strcmp("ATOM",line_flag)==0 && (buff[12] != 'H' || buff[13] != 'H' )) /* Is it an ATOM entry? */ { //printf("Heja: %s %s",m[0].filename,&buff); strncpy_NULL(temp_number,&buff[6],5); strncpy_NULL(name,&buff[13],3); if(atomflag == 'a' || (atomflag == 'c' && strcmp("CA ",name) == 0) || (atomflag == 'b' && (strcmp("CA ",name) == 0 || strcmp("C ",name) == 0 || strcmp("O ",name) == 0 || strcmp("N ",name) ==0))) { //( printf("%s\n",name); //printf("%s %s",m[0].filename,&buff); strncpy_NULL(alt_loc,&buff[16],1); strncpy_NULL(residue,&buff[17],3); strncpy_NULL(chain,&buff[21],1); strncpy_NULL(temp_resnum,&buff[22],4); strncpy_NULL(resname,&buff[22],5); strncpy_NULL(x_temp,&buff[30],8); strncpy_NULL(y_temp,&buff[38],8); strncpy_NULL(z_temp,&buff[46],8); number=atoi(temp_number); resnum=atoi(temp_resnum); x=atof(x_temp); y=atof(y_temp); z=atof(z_temp); //printf("test: %s %d %s %s %s %s %d %s %lf %lf %lf\n",line_flag,number,name,alt_loc,residue,chain,resnum,resname,x,y,z); //if (strcmp("N",name)==0) /* Is it an N atom => new residue? */ //printf("%s %d %d\n",m[0].filename,resnum,ignore_res[resnum]); // printf("%d %d\n",residues,ignore_res[0]); if(ignore_res[resnum]!=1) { if(strcmp(old_resname,resname)!=0) { m[0].sequence=realloc(m[0].sequence,sizeof(char)*(residues+1)); //m[0].sequence=malloc(sizeof(char)*2000); m[0].sequence[residues]=aa321(residue); residues++; strcpy(alt_loc_check,alt_loc); } //sscanf(&buff[22],"%d %lf %lf %lf",&resnum,&x,&y,&z); //printf("test %d %s %s %d %lf %lf %lf\n",number,name,residue,resnum,x,y,z); if(strcmp(alt_loc_check,alt_loc)==0) { //printf("test: %s %s",m[0].filename,&buff); m[0].atm=realloc(m[0].atm,sizeof(atm)*(atoms+1)); m[0].atm[atoms].x=x; m[0].atm[atoms].y=y; m[0].atm[atoms].z=z; m[0].atm[atoms].resnum=resnum; m[0].atm[atoms].number=number; //atoi(number); m[0].atm[atoms].rescount=residues; m[0].atm[atoms].selected=TRUE; strcpy(m[0].atm[atoms].name,name); strcpy(m[0].atm[atoms].residue,residue); strcpy(m[0].atm[atoms].resname,resname); if(strcmp("CA ",name) == 0) { //printf("%s %s %s %s %d\n",m[0].filename,resname,residue,alt_loc,residues); m[0].CA_ref=realloc(m[0].CA_ref,sizeof(int)*(residues+1)); m[0].CA_ref[residues-1]=atoms; } //if(strcmp(old_resname,resname)!=0) // { // m[0].sequence[residues-1]=aa321(residue); // } m[0].xcen+=x; m[0].ycen+=y; m[0].zcen+=z; atoms++; strcpy(old_resname,resname); } } } } } if(atoms>0) { m[0].sequence=realloc(m[0].sequence,sizeof(char)*(residues+1)); m[0].sequence[residues]='\0'; m[0].atoms=atoms; m[0].residues=residues; m[0].xcen=m[0].xcen/atoms; m[0].ycen=m[0].ycen/atoms; m[0].zcen=m[0].zcen/atoms; if(ss_flag == 0) { //fprintf(stderr,"No secondary structure information in file!\n"); } } fclose(fp); //#ifdef NOZLIB // fclose(fp); //#else // gzclose(fp); //#endif // if (atoms!=m[0].atoms) /* Are file sizes indentical? */ //{ // printf("Inconsistent number of atoms in file %s\n",m[0].filename); // return(1); //} } else { printf("Couldn't open file \"%s\"\n",m[0].filename); exit(1); } return(0); } int free_dyn_molecule(dyn_molecule *m) { free(m[0].sequence); free(m[0].ss); free(m[0].method); free(m[0].CA_ref); free(m[0].atm); } int read_molecules(molecule *m,char atomflag) /* Reads in molecule */ { int i,j,k,atoms,residues; /* Counter variables */ char buff[3000]; /* Input string */ char temp[3000]; /* Throwaway string */ char line_flag[3000]; /* PDB file line mode */ char desc_flag[3000]; char residue[4]; /* PDB atom info for output */ char name[4]; int resnum; char resname[9]; char old_resname[9]="noname"; char alt_loc_check[2]=" "; char x_temp[11]; char y_temp[11]; char z_temp[11]; //char number[8]; int number; char chain[2]; char inscode[2]=" "; char alt_loc[2]=" "; double x,y,z; /* Temporary coordinates values */ FILE *fp; double bfactor=9.99; char temp_number[11]; char temp_resnum[11]; char temp_bfactor[11]; int ss_flag=0; i=0; /* It is only one molecule to be read this was done for the moment instead of changing all "i" to 0 */ //for (i=0;i<2;i++) //{ //#ifdef NOZLIB*/ //fp=fopen(m[i].filename,"r"); /* Does file exist? */ //#else //fp=gzopen(m[i].filename,"r"); /* Does file exist? */ //#endif //printf("%s\n",m[0].filename); strcpy(m[0].method,"undef"); m[0].rank=-1; m[0].score=-9999; fp=fopen(m[0].filename,"r"); /* Does file exist? */ if (fp!=NULL) /* If yes, read in coordinates */ { /* Initialize things */ //m[0].xcen=m[0].ycen=m[0].zcen=0; atoms=0; residues=0; //#ifdef NOZLIB //while(fgets(buff,255,fp)!=NULL) //#else //while(gzgets(fp,buff,255)!=NULL) //#endif //printf("file: %s\n",m[0].filename); while(fgets(buff,1000,fp)!=NULL) { //sscanf(buff,"%-6s %4d %-3s%1s%3s %1s%5s %7.3lf %7.3lf %7.3lf",line_flag,&number,name,residue,chain,&resnum,inscode,&x,&y,&z); //sscanf(buff,"%s %d %s %s %s",line_flag,&number,name,residue,resname); strcpy(line_flag,"undef"); strcpy(desc_flag,"undef"); sscanf(buff,"%s",line_flag); if(strcmp("REMARK",line_flag)==0) { //printf("%s %s %s",m[0].filename,line_flag,&buff); sscanf(buff,"%s %s %s",line_flag,desc_flag,temp); //printf("%s %s %s\n",line_flag,desc_flag,temp); if(strcmp("SS",desc_flag)==0) { strcpy(m[0].ss,temp); ss_flag=1; //printf("%s\n",m[0].ss); } if(strcmp("METHOD",desc_flag)==0) { strcpy(m[0].method,temp); //printf("%s\n",m[0].method); } if(strcmp("SCORE",desc_flag)==0) { m[0].score=atof(temp); //strcpy(m[0].method,temp); //printf("%lf\n",m[0].score); } } if(strcmp("MODEL",line_flag)==0) { sscanf(buff,"%s %s",line_flag,temp); m[0].rank=atoi(temp); } if(strcmp("ATOM",line_flag)==0 && (buff[12] != 'H' || buff[13] != 'H' )) /* Is it an ATOM entry? */ { //printf("Heja: %s %s",m[0].filename,&buff); strncpy_NULL(temp_number,&buff[6],5); strncpy_NULL(name,&buff[13],3); if(atomflag == 'a' || (atomflag == 'c' && strcmp("CA ",name) == 0) || (atomflag == 'b' && (strcmp("CA ",name) == 0 || strcmp("C ",name) == 0 || strcmp("O ",name) == 0 || strcmp("N ",name) ==0))) { //( printf("%s\n",name); strncpy_NULL(alt_loc,&buff[16],1); strncpy_NULL(residue,&buff[17],3); strncpy_NULL(chain,&buff[21],1); strncpy_NULL(temp_resnum,&buff[22],4); strncpy_NULL(resname,&buff[22],5); strncpy_NULL(x_temp,&buff[30],8); strncpy_NULL(y_temp,&buff[38],8); strncpy_NULL(z_temp,&buff[46],8); number=atoi(temp_number); resnum=atoi(temp_resnum); if(strlen(buff)>=66) { strncpy_NULL(temp_bfactor,&buff[60],6); bfactor=atof(temp_bfactor); // printf("%lf\n",bfactor); } x=atof(x_temp); y=atof(y_temp); z=atof(z_temp); //printf("test: %s %d %s %s %s %s %d %s %lf %lf %lf\n",line_flag,number,name,alt_loc,residue,chain,resnum,resname,x,y,z); //if (strcmp("N",name)==0) /* Is it an N atom => new residue? */ //printf("%s %s\n",old_resname,resname); if(strcmp(old_resname,resname)!=0) { m[0].sequence[residues]=aa321(residue); residues++; strcpy(alt_loc_check,alt_loc); //printf("%s %s\n",resname,residue); } //sscanf(&buff[22],"%d %lf %lf %lf",&resnum,&x,&y,&z); //printf("test %d %s %s %d %lf %lf %lf\n",number,name,residue,resnum,x,y,z); if(strcmp(alt_loc_check,alt_loc)==0) { m[0].atm[atoms].x=x; m[0].atm[atoms].y=y; m[0].atm[atoms].z=z; m[0].atm[atoms].bfactor=bfactor; m[0].atm[atoms].resnum=resnum; m[0].atm[atoms].number=number; //atoi(number); m[0].atm[atoms].rescount=residues; m[0].atm[atoms].selected=TRUE; m[0].atm[atoms].deleted=FALSE; strcpy(m[0].atm[atoms].name,name); strcpy(m[0].atm[atoms].residue,residue); strcpy(m[0].atm[atoms].resname,resname); strcpy(m[0].atm[atoms].chain,chain); if(strcmp("CA ",name) == 0) { //printf("%s %s %s %s %d\n",m[0].filename,resname,residue,alt_loc,residues); m[0].CA_ref[residues-1]=atoms; } //if(strcmp(old_resname,resname)!=0) // { // m[0].sequence[residues-1]=aa321(residue); // } m[0].xcen+=x; m[0].ycen+=y; m[0].zcen+=z; atoms++; strcpy(old_resname,resname); } } } } m[0].sequence[residues]='\0'; m[0].atoms=atoms; m[0].residues=residues; m[0].xcen=m[0].xcen/atoms; m[0].ycen=m[0].ycen/atoms; m[0].zcen=m[0].zcen/atoms; if(ss_flag == 0) { //fprintf(stderr,"No secondary structure information in file!\n"); } fclose(fp); //#ifdef NOZLIB // fclose(fp); //#else // gzclose(fp); //#endif // if (atoms!=m[0].atoms) /* Are file sizes indentical? */ //{ // printf("Inconsistent number of atoms in file %s\n",m[0].filename); // return(1); //} } else { printf("Couldn't open file \"%s\"\n",m[0].filename); return(1); //exit(1); } return(0); } int read_molecules_ca(molecule *m) /* Reads in molecules to be superimposed */ { int i,j,k,atoms,residues; /* Counter variables */ char buff[1200]; /* Input string */ char temp[1000]; /* Throwaway string */ char line_flag[11]; /* PDB file line mode */ char desc_flag[20]; char residue[4]; /* PDB atom info for output */ char name[4]; int resnum; char resname[9]; char old_resname[9]="noname"; char alt_loc_check[2]=" "; char x_temp[11]; char y_temp[11]; char z_temp[11]; //char number[8]; int number; char chain[2]; char inscode[2]=" "; char alt_loc[2]=" "; double x,y,z; /* Temporary coordinates values */ FILE *fp; char temp_number[11]; char temp_resnum[11]; i=0; /* It is only one molecule to be read this was done for the moment instead of changing all "i" to 0 */ //for (i=0;i<2;i++) //{ //#ifdef NOZLIB*/ //fp=fopen(m[i].filename,"r"); /* Does file exist? */ //#else //fp=gzopen(m[i].filename,"r"); /* Does file exist? */ //#endif //printf("%s\n",m[0].filename); strcpy(m[0].method,"undef"); m[0].rank=-1; m[0].score=-9999; fp=fopen(m[0].filename,"r"); /* Does file exist? */ if (fp!=NULL) /* If yes, read in coordinates */ { /* Initialize things */ //m[0].xcen=m[0].ycen=m[0].zcen=0; atoms=0; residues=0; //#ifdef NOZLIB //while(fgets(buff,255,fp)!=NULL) //#else //while(gzgets(fp,buff,255)!=NULL) //#endif while(fgets(buff,1000,fp)!=NULL) { //sscanf(buff,"%-6s %4d %-3s%1s%3s %1s%5s %7.3lf %7.3lf %7.3lf",line_flag,&number,name,residue,chain,&resnum,inscode,&x,&y,&z); //sscanf(buff,"%s %d %s %s %s",line_flag,&number,name,residue,resname); sscanf(buff,"%s",line_flag); if(strcmp("REMARK",line_flag)==0) { //printf("%s",buff); sscanf(buff,"%s %s %s",line_flag,desc_flag,temp); if(strcmp("SS",desc_flag)==0) { //printf("%s\n%d\n\n",buff,strlen(buff)); strcpy(m[0].ss,temp); //printf("%s\n%d\n",m[0].ss,strlen(m[0].ss)); } if(strcmp("METHOD",desc_flag)==0) { strcpy(m[0].method,temp); //printf("%s\n",m[0].method); } if(strcmp("SCORE",desc_flag)==0) { m[0].score=atof(temp); //strcpy(m[0].method,temp); //printf("%lf\n",m[0].score); } } if(strcmp("MODEL",line_flag)==0) { sscanf(buff,"%s %s",line_flag,temp); m[0].rank=atoi(temp); } if (strcmp("ATOM",line_flag)==0 && buff[13] != 'H') /* Is it an ATOM entry? */ { strncpy_NULL(temp_number,&buff[6],5); strncpy_NULL(name,&buff[13],3); //printf("%s",&buff[6]); if(strcmp("CA ",name) == 0) { //printf("%s",&buff[6]); strncpy_NULL(alt_loc,&buff[16],1); strncpy_NULL(residue,&buff[17],3); strncpy_NULL(chain,&buff[21],1); strncpy_NULL(temp_resnum,&buff[22],4); strncpy_NULL(resname,&buff[22],5); strncpy_NULL(x_temp,&buff[30],8); strncpy_NULL(y_temp,&buff[38],8); strncpy_NULL(z_temp,&buff[46],8); number=atoi(temp_number); resnum=atoi(temp_resnum); x=atof(x_temp); y=atof(y_temp); z=atof(z_temp); //printf("test: %s %d %s %s %s %s %d %s %lf %lf %lf\n",line_flag,number,name,alt_loc,residue,chain,resnum,resname,x,y,z); //if (strcmp("N",name)==0) /* Is it an N atom => new residue? */ //printf("%s %s\n",old_resname,resname); if(strcmp(old_resname,resname)!=0) { m[0].sequence[residues]=aa321(residue); residues++; strcpy(alt_loc_check,alt_loc); //printf("%s %s\n",resname,residue); } //sscanf(&buff[22],"%d %lf %lf %lf",&resnum,&x,&y,&z); //printf("test %d %s %s %d %lf %lf %lf\n",number,name,residue,resnum,x,y,z); if(strcmp(alt_loc_check,alt_loc)==0) { m[0].atm[atoms].x=x; m[0].atm[atoms].y=y; m[0].atm[atoms].z=z; m[0].atm[atoms].resnum=resnum; m[0].atm[atoms].number=number; //atoi(number); m[0].atm[atoms].rescount=residues; m[0].atm[atoms].selected=TRUE; strcpy(m[0].atm[atoms].name,name); strcpy(m[0].atm[atoms].residue,residue); //if(strcmp(old_resname,resname)!=0) // { // m[0].sequence[residues-1]=aa321(residue); // } m[0].xcen+=x; m[0].ycen+=y; m[0].zcen+=z; atoms++; strcpy(old_resname,resname); } } } } m[0].sequence[residues]='\0'; m[0].atoms=atoms; m[0].residues=residues; m[0].xcen=m[0].xcen/atoms; m[0].ycen=m[0].ycen/atoms; m[0].zcen=m[0].zcen/atoms; fclose(fp); //#ifdef NOZLIB // fclose(fp); //#else // gzclose(fp); //#endif // if (atoms!=m[0].atoms) /* Are file sizes indentical? */ //{ // printf("Inconsistent number of atoms in file %s\n",m[0].filename); // return(1); //} } else { printf("Couldn't open file \"%s\"\n",m[0].filename); exit(1); } return(0); } int read_molecules_backbone(molecule *m) /* Reads in molecules to be superimposed */ { int i,j,k,atoms,residues; /* Counter variables */ char buff[1200]; /* Input string */ char temp[1000]; /* Throwaway string */ char line_flag[11]; /* PDB file line mode */ char desc_flag[20]; char residue[4]; /* PDB atom info for output */ char name[4]; int resnum; char resname[9]; char old_resname[9]="noname"; char x_temp[11]; char y_temp[11]; char z_temp[11]; //char number[8]; int number; char chain[2]; char inscode[2]=" "; char alt_loc[2]=" "; double x,y,z; /* Temporary coordinates values */ FILE *fp; char temp_number[11]; char temp_resnum[11]; i=0; /* It is only one molecule to be read this was done for the moment instead of changing all "i" to 0 */ //printf("%s\n",m[0].filename); strcpy(m[0].method,"undef"); m[0].rank=-1; m[0].score=-9999; fp=fopen(m[0].filename,"r"); /* Does file exist? */ if (fp!=NULL) /* If yes, read in coordinates */ { /* Initialize things */ atoms=0; residues=0; while(fgets(buff,1000,fp)!=NULL) { //sscanf(buff,"%-6s %4d %-3s%1s%3s %1s%5s %7.3lf %7.3lf %7.3lf",line_flag,&number,name,residue,chain,&resnum,inscode,&x,&y,&z); //sscanf(buff,"%s %d %s %s %s",line_flag,&number,name,residue,resname); sscanf(buff,"%s",line_flag); if(strcmp("REMARK",line_flag)==0) { sscanf(buff,"%s %s %s",line_flag,desc_flag,temp); if(strcmp("SS",desc_flag)==0) { //strcpy(m[0].ss,temp); strncpy_NULL(m[0].ss,temp,strlen(temp)); // printf("%s\n",m[0].ss); } if(strcmp("METHOD",desc_flag)==0) { //strcpy(m[0].method,temp); strncpy_NULL(m[0].method,temp,strlen(temp)); //printf("%s\n",m[0].method); } if(strcmp("SCORE",desc_flag)==0) { m[0].score=atof(temp); //strcpy(m[0].method,temp); //printf("%lf\n",m[0].score); } } if(strcmp("MODEL",line_flag)==0) { sscanf(buff,"%s %s",line_flag,temp); m[0].rank=atoi(temp); } if(strcmp("ATOM",line_flag)==0) /* Is it an ATOM entry? */ { //printf("%s",&buff[6]); strncpy_NULL(temp_number,&buff[6],5); strncpy_NULL(name,&buff[13],3); //printf("%s",&buff[6]); if(strcmp("CA ",name) == 0 || strcmp("C ",name) == 0 || strcmp("O ",name) == 0 || strcmp("N ",name) ==0) { //printf("%d %s",2,&buff[6]); strncpy_NULL(alt_loc,&buff[16],1); strncpy_NULL(residue,&buff[17],3); strncpy_NULL(chain,&buff[21],1); strncpy_NULL(temp_resnum,&buff[22],4); strncpy_NULL(resname,&buff[22],5); strncpy_NULL(x_temp,&buff[30],8); strncpy_NULL(y_temp,&buff[38],8); strncpy_NULL(z_temp,&buff[46],8); //(strcmp("CA ",name) !=0) // number=atoi(temp_number); resnum=atoi(temp_resnum); x=atof(x_temp); y=atof(y_temp); z=atof(z_temp); //printf("test: %s %d %s %s %s %s %d %s %lf %lf %lf\n",line_flag,number,name,alt_loc,residue,chain,resnum,resname,x,y,z); //if (strcmp("N",name)==0) /* Is it an N atom => new residue? */ //printf("%s %s\n",old_resname,resname); if(strcmp(old_resname,resname)!=0) { m[0].sequence[residues]=aa321(residue); m[0].res_ref[residues]=atoms; residues++; //printf("%s %s\n",resname,residue); } //printf("%s %s\n",resname,residue); // } //sscanf(&buff[22],"%d %lf %lf %lf",&resnum,&x,&y,&z); //printf("test %d %s %s %d %lf %lf %lf\n",number,name,residue,resnum,x,y,z); m[0].atm[atoms].x=x; m[0].atm[atoms].y=y; m[0].atm[atoms].z=z; m[0].atm[atoms].resnum=resnum; //printf("%d\n",m[0].atm[atoms].resnum); m[0].atm[atoms].number=number; //atoi(number); m[0].atm[atoms].rescount=residues; m[0].atm[atoms].selected=TRUE; strcpy(m[0].atm[atoms].name,name); strcpy(m[0].atm[atoms].residue,residue); if(strcmp("CA ",name) == 0) { //printf("%s %s %s %d\n",resname,residue,alt_loc,residues); m[0].CA_ref[residues-1]=atoms; } // if(strcmp(old_resname,resname)!=0) // { // m[0].sequence[residues-1]=aa321(residue); // } m[0].xcen+=x; m[0].ycen+=y; m[0].zcen+=z; atoms++; strcpy(old_resname,resname); //} } } } m[0].sequence[residues]='\0'; m[0].atoms=atoms; m[0].residues=residues; m[0].xcen=m[0].xcen/atoms; m[0].ycen=m[0].ycen/atoms; m[0].zcen=m[0].zcen/atoms; fclose(fp); //#ifdef NOZLIB // fclose(fp); //#else // gzclose(fp); //#endif // if (atoms!=m[0].atoms) /* Are file sizes indentical? */ //{ // printf("Inconsistent number of atoms in file %s\n",m[0].filename); // return(1); //} } else { printf("Couldn't open file \"%s\"\n",m[0].filename); exit(1); } return(0); } int read_to_molecule(molecule *m,char** atom_vec,char** residue_vec,int* number_vec,rvec* coord_vec,size_t n) { int i,atoms,residues; /* Counter variables */ int resnum; int old_resnum=-991999; atoms=0; residues=0; for(i=0;imax) max=b; if(c>max) max=c; //printf("%5.3f %5.3f %5.3f\n=======\n",a,b,c); //printf("%5.3f",max/min); //printf("unsorted eigenvectors:\n"); //for (i=1;i<=NP;i++) { // printf("eigenvalue %3d = %12.6f\n",i,d[i]); // printf("eigenvector:\n"); // for (j=1;j<=NP;j++) { // printf("%12.6f",v[j][i]); // if ((j % 5) == 0) printf("\n"); // } // printf("\n"); //} //printf("\n****** Sorting Eigenvectors ******\n\n"); //eigsrt(d,v,NP); //printf("sorted eigenvectors:\n"); //for (i=1;i<=NP;i++) { // printf("eigenvalue %3d = %12.6f\n",i,d[i]); // printf("eigenvector:\n"); // for (j=1;j<=NP;j++) { // printf("%12.6f",v[j][i]); // if ((j % 5) == 0) printf("\n"); // } // printf("\n"); //} free_convert_matrix(e,1,NP,1,NP); free_matrix(v,1,NP,1,NP); free_vector(d,1,NP); return max/min; } double fatness2(molecule *m) { int i,j,nrot; float trans_x,trans_y,trans_z,trans_x2,trans_y2,trans_z2; float a,b,c,max,min; float V[6]={}; float I[3][3]={}; float *d,**v,**e; int NP=3; for(i=0;imax) max=b; if(c>max) max=c; //printf("%5.3f %5.3f %5.3f\n=======\n",a,b,c); //printf("%5.3f",max/min); //printf("unsorted eigenvectors:\n"); //for (i=1;i<=NP;i++) { // printf("eigenvalue %3d = %12.6f\n",i,d[i]); // printf("eigenvector:\n"); // for (j=1;j<=NP;j++) { // printf("%12.6f",v[j][i]); // if ((j % 5) == 0) printf("\n"); // } // printf("\n"); //} //printf("\n****** Sorting Eigenvectors ******\n\n"); //eigsrt(d,v,NP); //printf("sorted eigenvectors:\n"); //for (i=1;i<=NP;i++) { // printf("eigenvalue %3d = %12.6lf\n",i,d[i]); // printf("eigenvector:\n"); // for (j=1;j<=NP;j++) { // printf("%12.6f",v[j][i]); // if ((j % 5) == 0) printf("\n"); // } // printf("\n"); //} free_convert_matrix(e,1,NP,1,NP); free_matrix(v,1,NP,1,NP); free_vector(d,1,NP); return max; } char aa321(const char* res) { //char check_res[4]; //check_res[0]=res[0]; //check_res[1]=res[1]; //check_res[2]=res[2]; //res[3]='\0'; //printf("%s %s\n",res,res); if(strcmp("ALA",res)==0) return 'A'; if(strcmp("ARG",res)==0) return 'R'; if(strcmp("ASN",res)==0) return 'N'; if(strcmp("ASP",res)==0) return 'D'; if(strcmp("CYS",res)==0) return 'C'; if(strcmp("GLN",res)==0) return 'Q'; if(strcmp("GLU",res)==0) return 'E'; if(strcmp("GLY",res)==0) return 'G'; if(strcmp("HIS",res)==0) return 'H'; if(strcmp("ILE",res)==0) return 'I'; if(strcmp("LEU",res)==0) return 'L'; if(strcmp("LYS",res)==0) return 'K'; if(strcmp("MET",res)==0) return 'M'; if(strcmp("PHE",res)==0) return 'F'; if(strcmp("PRO",res)==0) return 'P'; if(strcmp("SER",res)==0) return 'S'; if(strcmp("THR",res)==0) return 'T'; if(strcmp("TRP",res)==0) return 'W'; if(strcmp("TYR",res)==0) return 'Y'; if(strcmp("VAL",res)==0) return 'V'; return 'X'; } char* assign_ss(molecule *m,float cutoff,float angle) { char *ss; int *hb; int j,i; int number_of_res; int *alphatemp,*betatemp1,*betatemp2,*alphatest,*betatest; int hbond_check=0; int hbond_error=0; //printf("Assigning Secondary structure\n"); //printf("Residues: %d\n",m[0].residues); ss=malloc(m[0].residues*sizeof(char)+1); alphatemp=malloc(m[0].residues*sizeof(int)); betatemp1=malloc(m[0].residues*sizeof(int)); betatemp2=malloc(m[0].residues*sizeof(int)); alphatest=malloc(m[0].residues*sizeof(int)); betatest=malloc(m[0].residues*sizeof(int)); hb=malloc(m[0].residues*m[0].residues*sizeof(int)); //printf("Leffe\n"); for(i=0;i %d %d\n",i,j); } else { if(hbond_check==-1) hbond_error++; // if(hbond_error>m[0].residues*m[0].residues) // { // //fprintf(stderr,"hej %s\n",m[0].filename,hbond_error); // fprintf(stderr,"%s Unable to assign ss to structure... %d %d %5.3f\n",m[0].filename,hbond_error,m[0].residues,hbond_error/m[0].residues); // // fprintf(stderr,"%s Unable to assign ss to structure... %d \n",m[0].filename,hbond_error); // free(hb); // free(alphatemp); // free(betatemp1); // free(betatemp2); // free(alphatest); // free(betatest); // ss[0]='\0'; // return ss; // } } } } if(hbond_error==m[0].residues*m[0].residues) fprintf(stderr,"file: %s contains no hydrogen bonds (might be a CA-model)\n",m[0].filename); for(i=1;i2) { //printf("%d %d %d\n",i,j,m[0].residues); if((hb[calc_index(m[0].residues,max(i-1,0),j)] && hb[calc_index(m[0].residues,j,min(i+1,m[0].residues-1))]) || (hb[calc_index(m[0].residues,max(j-1,0),i)] && hb[calc_index(m[0].residues,i,min(j+1,m[0].residues-1))])) { betatemp1[max(i-1,0)]=1; betatemp1[i]=1; betatemp1[min(m[0].residues-1,i+1)]=1; betatemp1[max(j-1,0)]=1; betatemp1[j]=1; betatemp1[min(m[0].residues-1,j+1)]=1; } if((hb[calc_index(m[0].residues,i,j)] && hb[calc_index(m[0].residues,j,i)])|| (hb[calc_index(m[0].residues,max(i-1,0),min(j+1,m[0].residues-1))] && hb[calc_index(m[0].residues,max(j-1,0),min(i+1,m[0].residues-1))])) { //printf("BETA\n"); betatemp2[max(i-1,0)]=1; betatemp2[i]=1; betatemp2[min(m[0].residues-1,i+1)]=1; betatemp2[max(j-1,0)]=1; betatemp2[j]=1; betatemp2[min(m[0].residues-1,j+1)]=1; } } } } for(i=0;i1) { alphatest[i]=1; //printf("INFO> Alpha-helix %d\n",i); } if((betatemp1[max(0,i-2)] && betatemp1[i]) || (betatemp2[max(0,i-2)] && betatemp2[i])) { betatest[i]=1; betatest[max(0,i-1)]=1; betatest[max(0,i-2)]=1; //printf("BETA\n"); } else { if((betatemp1[min(m[0].residues-1,i+2)] && betatemp1[i]) || (betatemp2[min(m[0].residues-1,i+2)] && betatemp2[i])) { betatest[i]=1; betatest[min(m[0].residues-1,i+1)]=1; betatest[min(m[0].residues-1,i+2)]=1; //printf("BETA\n"); } } } for(i=0;iN2 // printf("%d %s %s %lf\n",atomno1,m[0].atm[atomno1].name,m[0].atm[atomno1].residue,CO2.x*CO2.x+CO2.y*CO2.y+CO2.z*CO2.z); //nvecN1=sqrt(vecN1.x*vecN1.x+vecN1.y*vecN1.y+vecN1.z*vecN1.z); nvecN2=sqrt(vecN2.x*vecN2.x+vecN2.y*vecN2.y+vecN2.z*vecN2.z); //printf("%lf %lf\n",nvecN1,nvecN2); if(nvecN25 && dist20) //buried { buried_residues[get_res6(m[0].atm[m[0].CA_ref[i]].residue)]++; total_buried++; } } //printf("Exposed Buried\n"); for(i=0;i<6;i++) { //printf("%d/%d %d/%d\n",exposed_residues[i],total_exposed,buried_residues[i],total_buried); if(total_exposed!=0) frac_exposed_residues[i]=(double)exposed_residues[i]/total_exposed; if(total_buried!=0) frac_buried_residues[i]=(double)buried_residues[i]/total_buried; //printf("%5.4f %5.4f\n",frac_exposed_residues[i],frac_buried_residues[i]); } //printf("\n"); for(i=0;i<6;i++) { for(j=0;j<6;j++) { // printf("%d ",res_contacts[i][j]); if(i!=j) { res_contacts[i][j]=2*res_contacts[i][j]; } } //printf("\n"); } for(k=0,i=0;i<6;i++) { for(j=i;j<6;j++,k++) { if(tot_res_contacts != 0) { frac_res_contacts[k]=(double)res_contacts[i][j]/tot_res_contacts; //printf("%f %d %d %d\n",frac_res_contacts[k],res_contacts[i][j],tot_res_contacts,&tot_res_contacts); } } } } //printf("\n"); fat=fatness(m); //FIX THAT SS=0 MEANS ERROR ss=assign_ss(m,hcut,hangle); // if(ss!=0) // { if(strlen(psipred)==strlen(ss)) { for(i=0;i5 && dist20) //buried { buried_residues[get_res6(m[0].atm[m[0].CA_ref[i]].residue)]++; total_buried++; } } //printf("Exposed Buried\n"); for(i=0;i<6;i++) { //printf("%d/%d %d/%d\n",exposed_residues[i],total_exposed,buried_residues[i],total_buried); if(total_exposed!=0) frac_exposed_residues[i]=(double)exposed_residues[i]/total_exposed; if(total_buried!=0) frac_buried_residues[i]=(double)buried_residues[i]/total_buried; //printf("%5.4f %5.4f\n",frac_exposed_residues[i],frac_buried_residues[i]); } //printf("\n"); for(i=0;i<6;i++) { for(j=0;j<6;j++) { // printf("%d ",res_contacts[i][j]); if(i!=j) { res_contacts[i][j]=2*res_contacts[i][j]; } } //printf("\n"); } for(k=0,i=0;i<6;i++) { for(j=i;j<6;j++,k++) { if(tot_res_contacts != 0) { frac_res_contacts[k]=(double)res_contacts[i][j]/tot_res_contacts; //printf("%f %d %d %d\n",frac_res_contacts[k],res_contacts[i][j],tot_res_contacts,&tot_res_contacts); } } } } //printf("\n"); fat=fatness(m); //printf("SS\n"); ss=assign_ss(m,hcut,hangle); //printf("AFTER SS\n"); //printf("%s\nSTR: %s\n",m[0].filename,m[0].ss); if(strlen(m[0].ss)==strlen(ss)) { // printf("PRED: %s\n",ss); for(i=0;i 8) // { // fprintf(stderr,"%s Fatness (%lf) > 8 assigned fatness 8 to avoid strange results\n",m[0].filename,fat); // fat=8; // } parameters[j]=fat; parameters[j+1]=Q3; //printf("%f %f %d",fat,Q3,j); //printf("\n"); //for(i=0;i<35;i++) // printf("%lf ",parameters[i]); //printf("\n"); } else { if(strlen(m[0].ss)!=strlen(ss)) fprintf(stderr,"%s Different strlen\nPDB-SS %s (%lu)\nASSIGN-SS %s (%lu)\nSEQ %s\n",m[0].filename,m[0].ss,strlen(m[0].ss),ss,strlen(ss),m[0].sequence); //if(fat > 8) // fprintf(stderr,"%s Fatness (%lf) > 8 assign quality 0 to avoid strange results\n",m[0].filename,fat); free(ss); quality[0]=0; quality[1]=0; return quality; } free(ss); if(fat > 8) { fprintf(stderr,"%s Fatness (%lf) > 8 assigned quality 0 to avoid strange results\n",m[0].filename,fat); quality[0]=0; quality[1]=0; } else { for(i=0;i<5;i++) quality[0]+=netfwd(parameters,&net_lg[i]); for(i=0;i<5;i++) quality[1]+=netfwd(parameters,&net_mx[i]); quality[0]=quality[0]/5; quality[1]=quality[1]/5; } //printf("%-30s %6.4lf %6.4lf\n",pdbfile,quality[0]/5,quality[1]/5); return quality; } void ProQ(char** atom_vec,char** residue_vec,int* number_vec,rvec* coord_vec,size_t n,char* psipred,double* quality) { molecule m[1]; network net_lg[5]; network net_mx[5]; int i,j,k; int atoms=0; int residues=0; int atom_i=0; int atom_j=0; int exposed_cut; int buried_cut; int total_exposed=0; int total_buried=0; int ss_correct=0; int tot_res_contacts=0; int temp=0; int res_contacts[6][6]={{0}}; int contacts[10000]={0}; int exposed_residues[6]={0}; int buried_residues[6]={0}; double dist=0; double mean=0; double fat=0; double cutoff=196; double hcut=3.6; double hangle=1.2; double Q3=0; double pred_lg=0; double pred_mx=0; double frac_res_contacts[21]={0}; // 6*7/2=21 double frac_exposed_residues[6]={0}; double frac_buried_residues[6]={0}; double parameters[35]; //double *parameters; //double *quality; //(LG,MX) FILE *fp; char *ss; char psipredfiles[2000]; char lg1[200],lg2[200],lg3[200],lg4[200],lg5[200],mx1[200],mx2[200],mx3[200],mx4[200],mx5[200]; //printf("In ProQ\n"); //printf("%s %s\n",pdbfile,psipred); //strcpy(m[0].filename,pdbfile); //quality=(double*)malloc(2*sizeof(double)); quality[0]=0; quality[1]=0; strcpy(lg1,getenv("PROQDIR")); strcpy(lg2,lg1); strcpy(lg3,lg1); strcpy(lg4,lg1); strcpy(lg5,lg1); strcpy(mx1,lg1); strcpy(mx2,lg1); strcpy(mx3,lg1); strcpy(mx4,lg1); strcpy(mx5,lg1); read_net((char*)strcat(lg1,"lg1"),&net_lg[0]); read_net((char*)strcat(lg2,"lg2"),&net_lg[1]); read_net((char*)strcat(lg3,"lg3"),&net_lg[2]); read_net((char*)strcat(lg4,"lg4"),&net_lg[3]); read_net((char*)strcat(lg5,"lg5"),&net_lg[4]); read_net((char*)strcat(mx1,"mx1"),&net_mx[0]); read_net((char*)strcat(mx2,"mx2"),&net_mx[1]); read_net((char*)strcat(mx3,"mx3"),&net_mx[2]); read_net((char*)strcat(mx4,"mx4"),&net_mx[3]); read_net((char*)strcat(mx5,"mx5"),&net_mx[4]); //printf("In ProQ\n"); //printf("%lf %lf %lf\n",coord_vec[0][0],coord_vec[0][1],coord_vec[0][2]); if(read_to_molecule(m,atom_vec,residue_vec,number_vec,coord_vec,n)==0 && strlen(m[0].sequence)>18) { //printf("%d\n",m[0].residues); //if(strlen(m[0].sequence)<19) // { // return quality; // } for(i=0;i5 && dist20) //buried { buried_residues[get_res6(m[0].atm[m[0].CA_ref[i]].residue)]++; total_buried++; } } //printf("Exposed Buried\n"); for(i=0;i<6;i++) { if(total_exposed!=0) frac_exposed_residues[i]=(double)exposed_residues[i]/total_exposed; if(total_buried!=0) frac_buried_residues[i]=(double)buried_residues[i]/total_buried; //printf("%d/%d=%5.4f %d/%d=%5.4f\n",exposed_residues[i],total_exposed,frac_exposed_residues[i],buried_residues[i],total_buried,frac_buried_residues[i]); } //printf("\n"); for(i=0;i<6;i++) { for(j=0;j<6;j++) { // printf("%d ",res_contacts[i][j]); if(i!=j) { res_contacts[i][j]=2*res_contacts[i][j]; } } //printf("\n"); } for(k=0,i=0;i<6;i++) { for(j=i;j<6;j++,k++) { if(tot_res_contacts != 0) { frac_res_contacts[k]=(double)res_contacts[i][j]/tot_res_contacts; //printf("%f %d %d %d\n",frac_res_contacts[k],res_contacts[i][j],tot_res_contacts,&tot_res_contacts); } } } } //printf("\n"); fat=fatness(m); ss=assign_ss(m,hcut,hangle); // printf("PRED: %s\n SS: %s\n",psipred,ss); if(strlen(psipred)==strlen(ss)) { for(i=0;i 8) { fprintf(stderr,"%s Fatness (%lf) > 8 assigned fatness 8 to avoid strange results\n",m[0].filename,fat); fat=8; } parameters[j]=fat; parameters[j+1]=Q3; //printf("%f %f %d",fat,Q3,j); //printf("\n"); // for(i=0;i<35;i++) // printf("%lf ",parameters[i]); // printf("\n"); } else { fprintf(stderr,"Different strlen %lu %lu\nPRED: %s\n SS: %s\n SEQ: %s\n",strlen(psipred),strlen(ss),psipred,ss,m[0].sequence); free(ss); quality[0]=-1; quality[1]=-1; //return quality; } free(ss); for(i=0;i<5;i++) quality[0]+=netfwd(parameters,&net_lg[i]); for(i=0;i<5;i++) quality[1]+=netfwd(parameters,&net_mx[i]); quality[0]=quality[0]/5; quality[1]=quality[1]/5; //printf("LG: %6.4lf MX: %6.4lf\n",quality[0],quality[1]); //printf("%-30s %6.4lf %6.4lf\n",pdbfile,quality[0]/5,quality[1]/5); //return quality; }