/* METAPSICOV - Neural Network Meta-prediction of Residue Contacts */ /* Written by David T. Jones */ /* Copyright (C) 2014 University College London - Created : January 2014 */ /* Original Neural Network code Copyright (C) 1990 David T. Jones */ #include #include #include #include #include #include #define MAXSEQLEN 5000 /* logistic 'squashing' function (+/- 1.0) */ #define logistic(x) (1.0F / (1.0F + (float)exp(-(x)))) #define MAX(x,y) ((x)>(y)?(x):(y)) #include "metapsicov_net.h" char *wtfnm; int nwtsum, fwt_to[TOTAL], lwt_to[TOTAL]; float activation[TOTAL], bias[TOTAL], *weight[TOTAL]; float **cmutmat; int profile[MAXSEQLEN][20]; char seq[MAXSEQLEN]; struct entry { char *id, *seq, **contmap; float **mimat, **mipmat, **potmat, **psicov, **evfold, **ccmpred, *cprob, *hprob, *eprob, *entropy, *psolv, **profile, effnseq; float *aacomp, cmean, hmean, emean, smean, entropmean; int length, nseq; } target; enum aacodes { ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, LEU, LYS, MET, PHE, PRO, SER, THR, TRP, TYR, VAL, UNK }; void err(char *s) { fprintf(stderr, "%s\n", s); } void fail(char *s) { err(s); exit(1); } /* Allocate matrix */ void *allocmat(int rows, int columns, int size) { int i; void **p, *rp; rp = malloc(rows * sizeof(void *) + sizeof(int)); if (rp == NULL) fail("allocmat: malloc [] failed!"); *((int *)rp) = rows; p = rp + sizeof(int); for (i = 0; i < rows; i++) if ((p[i] = calloc(columns, size)) == NULL) fail("allocmat: malloc [][] failed!"); return p; } /* Allocate vector */ void *allocvec(int columns, int size) { void *p; p = calloc(columns, size); if (p == NULL) fail("allocvec: calloc failed!"); return p; } void compute_output(void) { int i, j; float netinp; for (i = NUM_IN; i < TOTAL; i++) { netinp = bias[i]; for (j = fwt_to[i]; j < lwt_to[i]; j++) netinp += activation[j] * weight[i][j]; /* Trigger neuron */ activation[i] = logistic(netinp); } } /* * load weights - load all link weights from a disk file */ void load_wts(char *fname) { int i, j; double t; FILE *ifp; if (!(ifp = fopen(fname, "r"))) fail("Cannot open weight file!\n"); /* Load input units to hidden layer weights */ for (i = NUM_IN; i < NUM_IN + NUM_HID; i++) for (j = fwt_to[i]; j < lwt_to[i]; j++) { fscanf(ifp, "%lf", &t); weight[i][j] = t; } /* Load hidden layer to output units weights */ for (i = NUM_IN + NUM_HID; i < TOTAL; i++) for (j = fwt_to[i]; j < lwt_to[i]; j++) { fscanf(ifp, "%lf", &t); weight[i][j] = t; } /* Load bias weights */ for (j = NUM_IN; j < TOTAL; j++) { fscanf(ifp, "%lf", &t); bias[j] = t; } fclose(ifp); } /* Initialize network */ void init(void) { int i, j; for (i = NUM_IN; i < TOTAL; i++) if (!(weight[i] = calloc(TOTAL - NUM_OUT, sizeof(float)))) fail("init: Out of Memory!"); /* Connect input units to hidden layer */ for (i = NUM_IN; i < NUM_IN + NUM_HID; i++) { fwt_to[i] = 0; lwt_to[i] = NUM_IN; } /* Connect hidden units to output layer */ for (i = NUM_IN + NUM_HID; i < TOTAL; i++) { fwt_to[i] = NUM_IN; lwt_to[i] = NUM_IN + NUM_HID; } } /* Convert AA letter to numeric code (0-20) */ int aanum(ch) int ch; { static const int aacvs[] = { 999, 0, 20, 4, 3, 6, 13, 7, 8, 9, 20, 11, 10, 12, 2, 20, 14, 5, 1, 15, 16, 20, 19, 17, 20, 18, 20 }; return (isalpha(ch) ? aacvs[ch & 31] : 20); } /* Make 1st level prediction averaged over specified weight sets */ void predict(int argc, char **argv) { int aa, i, j, k, n, winpos, winpos2, midpos, ws, seqsep; float **predsum; double prob, x; predsum = allocmat(target.length, target.length, sizeof(float)); for (ws=8; ws= 0 && j + winpos < target.length) { for (aa = 0; aa < 21; aa++) activation[(j - WINL) * IPERGRP + aa] = target.profile[j + winpos][aa]; activation[(j - WINL) * IPERGRP + 21] = target.cprob[j + winpos]; activation[(j - WINL) * IPERGRP + 22] = target.hprob[j + winpos]; activation[(j - WINL) * IPERGRP + 23] = target.eprob[j + winpos]; activation[(j - WINL) * IPERGRP + 24] = target.psolv[j + winpos]; activation[(j - WINL) * IPERGRP + 25] = target.entropy[j + winpos]; } else activation[(j - WINL) * IPERGRP + 26] = 1.0; for (j = WINL; j <= WINR; j++) if (j + winpos2 >= 0 && j + winpos2 < target.length) { for (aa = 0; aa < 21; aa++) activation[(WINR-WINL+1) * IPERGRP + (j - WINL) * IPERGRP + aa] = target.profile[j + winpos2][aa]; activation[(WINR-WINL+1) * IPERGRP + (j - WINL) * IPERGRP + 21] = target.cprob[j + winpos2]; activation[(WINR-WINL+1) * IPERGRP + (j - WINL) * IPERGRP + 22] = target.hprob[j + winpos2]; activation[(WINR-WINL+1) * IPERGRP + (j - WINL) * IPERGRP + 23] = target.eprob[j + winpos2]; activation[(WINR-WINL+1) * IPERGRP + (j - WINL) * IPERGRP + 24] = target.psolv[j + winpos2]; activation[(WINR-WINL+1) * IPERGRP + (j - WINL) * IPERGRP + 25] = target.entropy[j + winpos2]; } else activation[(WINR-WINL+1) * IPERGRP + (j - WINL) * IPERGRP + 26] = 1.0; midpos = (winpos + winpos2) / 2; for (j = CWINL; j <= CWINR; j++) if (j + midpos >= 0 && j + midpos < target.length) { for (aa = 0; aa < 21; aa++) activation[2*(WINR-WINL+1) * IPERGRP + (j - CWINL) * IPERGRP + aa] = target.profile[j + midpos][aa]; activation[2*(WINR-WINL+1) * IPERGRP + (j - CWINL) * IPERGRP + 21] = target.cprob[j + midpos]; activation[2*(WINR-WINL+1) * IPERGRP + (j - CWINL) * IPERGRP + 22] = target.hprob[j + midpos]; activation[2*(WINR-WINL+1) * IPERGRP + (j - CWINL) * IPERGRP + 23] = target.eprob[j + midpos]; activation[2*(WINR-WINL+1) * IPERGRP + (j - CWINL) * IPERGRP + 24] = target.psolv[j + midpos]; activation[2*(WINR-WINL+1) * IPERGRP + (j - CWINL) * IPERGRP + 25] = target.entropy[j + midpos]; } else activation[2*(WINR-WINL+1) * IPERGRP + (j - CWINL) * IPERGRP + 26] = 1.0; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP] = target.mimat[winpos][winpos2]; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP+1] = target.mipmat[winpos][winpos2]; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP+2] = target.potmat[winpos][winpos2]; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP+3] = target.psicov[winpos][winpos2]; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP+4] = target.evfold[winpos][winpos2]; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP+5] = target.ccmpred[winpos][winpos2]; seqsep = winpos2 - winpos; if (seqsep < 5) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 6] = 1.0; else if (seqsep < 14) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 2 + seqsep] = 1.0; else if (seqsep < 18) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 16] = 1.0; else if (seqsep < 23) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 17] = 1.0; else if (seqsep <= 28) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 18] = 1.0; else if (seqsep <= 38) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 19] = 1.0; else if (seqsep <= 48) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 20] = 1.0; else activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 21] = 1.0; for (aa=0; aa<21; aa++) activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + aa] = target.aacomp[aa]; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 21] = target.cmean; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 22] = target.hmean; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 23] = target.emean; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 24] = target.smean; activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 25] = log((double)target.length); activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 26] = log((double)target.nseq); activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 27] = log((double)target.effnseq); activation[2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 28] = target.entropmean; #if (2*(WINR-WINL+1) * IPERGRP + (CWINR-CWINL+1) * IPERGRP + 22 + 29) != NUM_IN #error "MISMATCHING NUMBER OF INPUTS!" #endif compute_output(); predsum[winpos][winpos2] += activation[TOTAL - NUM_OUT]; } } for (winpos = 0; winpos < target.length; winpos++) for (winpos2 = winpos+5; winpos2 < target.length; winpos2++) { predsum[winpos][winpos2] /= (float)(argc - 8); x = predsum[winpos][winpos2]; #if 0 /* Transform to posterior prob by scaled log transform */ #define A 3.6512190000514688E-01 #define B 1.5491157018510503E+01 #define C 9.8856650825604842E-01 prob = A * log(B * x + C); if (prob < 0.001) prob = 0.001; if (prob > 0.999) prob = 0.999; printf("%d %d 0 8 %f\n", winpos+1, winpos2+1, prob); #else printf("%d %d 0 8 %f\n", winpos+1, winpos2+1, x); #endif } } /* Read PSI AA frequency data */ int getmtx(FILE *lfil) { int aa, i, j, naa; char buf[256], *p; if (fscanf(lfil, "%d", &naa) != 1) fail("Bad mtx file - no sequence length!"); if (naa > MAXSEQLEN) fail("Input sequence too long!"); if (fscanf(lfil, "%s", seq) != 1) fail("Bad mtx file - no sequence!"); while (!feof(lfil)) { if (!fgets(buf, 65536, lfil)) fail("Bad mtx file!"); if (!strncmp(buf, "-32768 ", 7)) { for (j=0; j