/* @source embiep ************************************************************* ** ** Isoelectric point routines ** ** @author Copyright (c) 1999 Alan Bleasby ** @version $Revision: 1.27 $ ** @modified $Date: 2013/06/29 22:38:19 $ by $Author: rice $ ** @@ ** ** This library is free software; you can redistribute it and/or ** modify it under the terms of the GNU Lesser General Public ** License as published by the Free Software Foundation; either ** version 2.1 of the License, or (at your option) any later version. ** ** This library is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** Lesser General Public License for more details. ** ** You should have received a copy of the GNU Lesser General Public ** License along with this library; if not, write to the Free Software ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, ** MA 02110-1301, USA. ** ******************************************************************************/ #include "ajlib.h" #include "embiep.h" #include "ajsys.h" #include "ajfiledata.h" #include "ajfileio.h" #include "ajbase.h" #include #include #include #include #define PKFILE "Epk.dat" /* @func embIepPkNew ********************************************************** ** ** Create a pK array to hold amino acid pK data ** ** @return [double**] pK data ** ** @release 6.1.0 ******************************************************************************/ double** embIepPkNew(void) { double **pK = NULL; ajuint i; AJCNEW(pK,EMBIEPSIZE); for(i=0; i < EMBIEPSIZE;++i) AJCNEW(pK[i],3); return pK; } /* @func embIepPkNewFile ****************************************************** ** ** Create a pK array and read the data from a file ** ** @param [u] pkfile [AjPFile] Amino acid pKa data file ** @param [w] pK [double***] Arrays of pK values ** @return [AjBool] True on success ** ** @release 6.6.0 ******************************************************************************/ AjBool embIepPkNewFile(AjPFile pkfile, double ***pK) { if(!*pK) *pK = embIepPkNew(); embIepPkReadFile(*pK, pkfile); /* read pK values */ return ajTrue; } /* @func embIepPkDel ********************************************************* ** ** Delete a pK array and read the data ** ** @param [w] pK [double ***] pKs ** ** @return [void] ** ** @release 6.1.0 ******************************************************************************/ void embIepPkDel(double ***pK) { ajuint i; for(i=0; i < EMBIEPSIZE; ++i) AJFREE((*pK)[i]); AJFREE(*pK); return; } /* @func embIepPhToHconc ***************************************************** ** ** Convert pH to hydrogen ion concontration ** ** @param [r] pH [double] pH ** ** @return [double] hydrogen ion concentrration ** ** @release 4.0.0 ******************************************************************************/ double embIepPhToHconc(double pH) { return pow(10.0,-pH); } /* @func embIepPhFromHconc **************************************************** ** ** Convert hydrogen ion concontration to pH ** ** @param [r] H [double] H ** ** @return [double] pH ** ** @release 4.0.0 ******************************************************************************/ double embIepPhFromHconc(double H) { return -log10(H); } /* @func embIepPkToK ********************************************************* ** ** Convert pK to dissociation constant ** ** @param [r] pK [double] pK ** ** @return [double] dissociation constant ** ** @release 1.0.0 ******************************************************************************/ double embIepPkToK(double pK) { return pow(10.0,-pK); } /* @func embIepPkFromK ******************************************************** ** ** Convert dissociation constant to pK ** ** @param [r] K [double] K ** ** @return [double] pK ** ** @release 4.0.0 ******************************************************************************/ double embIepPkFromK(double K) { return -log10(K); } /* @func embIepPkRead ******************************************************** ** ** Read the pK values from Epk.dat ** ** @param [w] pK [double**] pK ** ** @return [void] ** ** @release 1.0.0 ******************************************************************************/ void embIepPkRead(double **pK) { AjPFile inf = NULL; inf = ajDatafileNewInNameC(PKFILE); embIepPkReadFile(pK, inf); ajFileClose(&inf); return; } /* @func embIepPkReadFile ***************************************************** ** ** Read the pK values from Epk.dat ** ** @param [w] pK [double**] pK ** @param [w] pkfile [AjPFile] pK data file ** ** @return [void] ** ** @release 6.6.0 ******************************************************************************/ void embIepPkReadFile(double **pK, AjPFile pkfile) { AjPStr line; const char *p; double amino = 7.50; double carboxyl = 3.55; double dval = 0.0; char ch; ajuint ich; ajuint i; ajuint j; if(!pkfile) ajFatal("%s file not found",PKFILE); for(j=0; j < 3; ++j) for(i=0; i < EMBIEPSIZE; ++i) pK[i][j] = 0.0; line = ajStrNew(); while(ajReadline(pkfile,&line)) { p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || *p=='\n' || *p=='\r') continue; if(ajStrPrefixCaseC(line,"Amino")) { p = ajSysFuncStrtok(p," \t\n\r"); p = ajSysFuncStrtok(NULL," \t\n\r"); sscanf(p,"%lf",&amino); continue; } if(ajStrPrefixCaseC(line,"Carboxyl")) { p = ajSysFuncStrtok(p," \t\n\r"); p = ajSysFuncStrtok(NULL," \t\n\r"); sscanf(p,"%lf",&carboxyl); continue; } p = ajSysFuncStrtok(p," \t\n\r"); ch = ajSysCastItoc(toupper((ajint)*p)); ich = ajBasecodeToInt(ch); p = ajSysFuncStrtok(NULL," \t\n\r"); sscanf(p, "%lf", &pK[ich][0]); p = ajSysFuncStrtok(NULL," \t\n\r"); if(p && sscanf(p, "%lf", &dval)) pK[ich][1] = dval; else pK[ich][1] = 0.0; p = ajSysFuncStrtok(NULL," \t\n\r"); if(p && sscanf(p, "%lf", &dval)) pK[ich][2] = dval; else pK[ich][2] = 0.0; } pK[EMBIEPAMINO][0] = amino; pK[EMBIEPAMINO][1] = amino; pK[EMBIEPCARBOXYL][0] = carboxyl; pK[EMBIEPCARBOXYL][2] = carboxyl; ajStrDel(&line); return; } /* @func embIepCompC ********************************************************** ** ** Calculate the amino acid composition of a protein sequence ** ** @param [r] s [const char *] protein sequence ** @param [r] amino [ajuint] number of amino termini ** @param [r] carboxyl [ajuint] number of carboxyl termini ** @param [r] sscount [ajuint] number of disulphide bridges ** @param [r] modlysine [ajuint] number of modified lysines ** @param [w] c [ajuint*] array of amino acid composition ** @param [w] resn [ajuint*] N-terminal amino acid ** @param [w] resc [ajuint*] C-terminal amino acid ** ** @return [void] ** ** @release 4.0.0 ******************************************************************************/ void embIepCompC(const char *s, ajuint amino, ajuint carboxyl, ajuint sscount, ajuint modlysine, ajuint *c, ajuint *resn, ajuint *resc) { ajint i; ajint j; const char *p; ajuint ires = 0; for(i=0;i D:%d N:%d\n", c[1], j, c[1]-j); c[1] = 0; } if(c[25]) /* Z = E or Q use Dayhoff freq */ { j = (int) (0.5 + ((float)c[25]) * 6.0 / 9.9); c[4] += j; c[16] += c[25] - j; ajDebug("embIepCompC Z:%d => E:%d Q:%d\n", c[25], j, c[25]-j); c[25] = 0; } c[EMBIEPAMINO] = amino; c[EMBIEPCARBOXYL] = carboxyl; if (sscount > 0) { if(c[EMBIEPCYSTEINE] < 2*sscount) { ajWarn("embIepCompC %d disulphides but only %d cysteines\n", sscount, c[EMBIEPCYSTEINE]+2*sscount); c[EMBIEPCYSTEINE] = 0; } else { c[EMBIEPCYSTEINE] -= 2*sscount; } } if (modlysine > 0) { if(c[EMBIEPLYSINE] < modlysine) { ajWarn("embIepCompC %d modified lysines but only %d lysines\n", sscount, c[EMBIEPLYSINE]); c[EMBIEPLYSINE] = 0; } else { c[EMBIEPLYSINE] -= modlysine; } } return; } /* @func embIepCompS ********************************************************** ** ** Calculate the amino acid composition of a protein sequence ** ** @param [r] str [const AjPStr] protein sequence ** @param [r] amino [ajuint] number of amino termini ** @param [r] carboxyl [ajuint] number of carboxyl termini ** @param [r] sscount [ajuint] number of disulphide bridges ** @param [r] modlysine [ajuint] number of modified lysines ** @param [w] c [ajuint *] Array of amino acid composition ** @param [w] resn [ajuint*] N-terminal amino acid ** @param [w] resc [ajuint*] C-terminal amino acid ** ** @return [void] ** ** @release 4.0.0 ******************************************************************************/ void embIepCompS(const AjPStr str, ajuint amino, ajuint carboxyl, ajuint sscount, ajuint modlysine, ajuint *c, ajuint *resn, ajuint *resc) { embIepCompC(MAJSTRGETPTR(str), amino, carboxyl, sscount, modlysine, c, resn, resc); return; } /* @func embIepCalcK ********************************************************* ** ** Calculate the dissociation constants ** Amino acids for which there is no entry in Epk.dat have K set to 0.0 ** ** @param [w] K [double *] dissociation constants ** @param [w] pK [double **] pK values ** ** @return [void] ** ** @release 1.0.0 ******************************************************************************/ void embIepCalcK(double *K, double **pK) { ajint i; for(i=0;i0.0 && bot>0.0) || (top<0.0 && bot<0.0)) return 0.0; while(bph-tph>0.0001) { mid = ((bph-tph) / 2.0) + tph; H = embIepPhToHconc(mid); embIepGetProto(K,c,op,H,pro); charge = embIepGetCharge(c,pro,&sum); if(charge>0.0) { tph = mid; continue; } if(charge<0.0) { bph = mid; continue; } else return mid; } return tph; } /* @func embIepIepC *********************************************************** ** ** Calculate the pH nearest the IEP. ** ** @param [r] s [const char *] sequence ** @param [r] amino [ajuint] number of N-termini ** @param [r] carboxyl [ajuint] number of C-termini ** @param [r] sscount [ajuint] number of disulphide bridges ** @param [r] modlysine [ajuint] number of modified lysines ** @param [w] pK [double **] pK values ** @param [w] iep [double *] IEP ** @param [r] termini [AjBool] use termini ** ** @return [AjBool] True if IEP exists ** ** @release 4.0.0 ******************************************************************************/ AjBool embIepIepC(const char *s, ajuint amino, ajuint carboxyl, ajuint sscount, ajuint modlysine, double **pK, double *iep, AjBool termini) { ajuint *c = NULL; ajuint *op = NULL; double *K = NULL; double *pro = NULL; ajuint resn = 0; ajuint resc = 0; *iep = 0.0; AJCNEW(c, EMBIEPSIZE); AJCNEW(op, EMBIEPSIZE); AJCNEW(K, EMBIEPSIZE); AJCNEW(pro, EMBIEPSIZE); embIepCalcK(K,pK); /* Convert to dissoc consts */ /* Get sequence composition */ embIepCompC(s,amino,carboxyl,sscount, modlysine, c, &resn, &resc); if(!termini) c[EMBIEPAMINO] = c[EMBIEPCARBOXYL] = 0; else embIepCalcKend(K, pK, resn, resc); *iep = embIepPhConverge(c,K,op,pro); AJFREE(pro); AJFREE(K); AJFREE(op); AJFREE(c); if(!*iep) return ajFalse; return ajTrue; } /* @func embIepIepS *********************************************************** ** ** Calculate the pH nearest the IEP. ** ** @param [r] str [const AjPStr] sequence ** @param [r] amino [ajuint] number of N-termini ** @param [r] carboxyl [ajuint] number of C-termini ** @param [r] sscount [ajuint] number of disulphide bridges ** @param [r] modlysine [ajuint] number of modified lysines ** @param [w] pK [double **] pK values ** @param [w] iep [double *] IEP ** @param [r] termini [AjBool] use termini ** ** @return [AjBool] True if IEP exists ** ** @release 4.0.0 ******************************************************************************/ AjBool embIepIepS(const AjPStr str, ajuint amino, ajuint carboxyl, ajuint sscount, ajuint modlysine, double **pK, double *iep, AjBool termini) { return embIepIepC(ajStrGetPtr(str), amino, carboxyl, sscount, modlysine, pK, iep, termini); } #ifdef AJ_COMPILE_DEPRECATED_BOOK #endif #ifdef AJ_COMPILE_DEPRECATED /* @obsolete embIepComp ** @replace embIepCompC (1,2,3/1,2,0,0,3) */ __deprecated void embIepComp(const char *s, ajint amino, ajint carboxyl, ajint *c) { embIepCompC(s, amino, carboxyl, 0, 0, c); return; } /* @obsolete embIepIEP ** @replace embIepIepC (1,2,3,4/1,2,0,0,3,4) */ __deprecated AjBool embIepIEP(const char *s, ajint amino, ajint carboxyl, double *pK, double *iep, AjBool termini) { return embIepIepC(s, amino, carboxyl, 0, 0, pK, iep, termini); } #endif