/* FreeContact - program to predict protein residue contacts from a sufficiently large multiple alignment * Copyright (C) 2012-2013 Laszlo Kajan Rost Lab, Technical University of Munich, Germany * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "config.h" #ifdef HAVE_OPENMP #include #endif #include #include #include #include //#include // auto_ptr, unique_ptr #include #include #include #include #include #include "freecontact.h" // lkajan: EVfold-mfDCA is calculate_evolutionary_constraints.m namespace bo = boost; typedef float g_fp_t; // n S L thr maxIt msg warm X W Wd WXj info brk extern "C" void glassofast_(int *, g_fp_t *, g_fp_t *, g_fp_t *, int *, int *, int *, g_fp_t *, g_fp_t *, g_fp_t *, g_fp_t *, int *, int *); extern "C" { // LAPACK Cholesky void spotrf_(const char* UPLO, const int* N, float* A, const int* LDA, int* INFO); // LU decomoposition of a general matrix void sgetrf_(int* M, int* N, float* A, int* LDA, int* IPIV, int* INFO); // generate inverse of a matrix given its LU decomposition void sgetri_(int* N, float* A, int* lda, int* IPIV, float* WORK, int* LWORK, int* INFO); } namespace bo = boost; using namespace std; namespace freecontact { // When you change these, rename BLAS/LAPACK calls for correct precision. typedef float cov_fp_t; typedef float fp_t; template class ct_vector : public std::vector<_Tp> { public: uint16_t alilen; ct_vector(uint16_t __alilen = 0) : std::vector<_Tp>(__alilen*__alilen), alilen(__alilen) {} // Does not check range. inline _Tp& operator()(uint16_t __i, uint16_t __j) { return this->_M_impl._M_start[ __i*alilen + __j ]; } // Does not check range. inline const _Tp& operator()(uint16_t __i, uint16_t __j) const { return this->_M_impl._M_start[ __i*alilen + __j ]; } }; template class af_vector : public std::vector<_Tp> { public: uint16_t alilen; uint8_t q; // letters af_vector(uint16_t __alilen, uint8_t __q, _Tp __v) : std::vector<_Tp>(__alilen*__q, __v), alilen(__alilen), q(__q) {} inline _Tp& operator()(uint16_t __i, uint8_t __ai) { return this->_M_impl._M_start[ __i*q + __ai ]; } inline const _Tp& operator()(uint16_t __i, uint8_t __ai) const { return this->_M_impl._M_start[ + __i*q + __ai ]; } }; class pf_vector : public std::vector { typedef predictor::pairfreq_t _Tp; public: uint16_t alilen; uint8_t q; // letters size_t d3, d2, d1; pf_vector() : std::vector<_Tp>(), alilen(0), q(0), d3(0), d2(0), d1(0) {} pf_vector(uint16_t __alilen, uint8_t __q, _Tp __v) : std::vector<_Tp>(__alilen*__alilen*__q*__q, __v), alilen(__alilen), q(__q), d3(q), d2(q*d3), d1(alilen*d2) {} inline _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) { return this->_M_impl._M_start[ (__i)*d1 + (__j)*d2 + (__ai)*d3 + (__aj) ]; } inline const _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) const { return this->_M_impl._M_start[ (__i)*d1 + (__j)*d2 + (__ai)*d3 + (__aj) ]; } }; template class cov_vector : public std::vector<_Tp> { public: uint16_t alilen; uint8_t q; // letters size_t d1; cov_vector() : std::vector<_Tp>(), alilen(0), q(0), d1(0) {} cov_vector(uint16_t __alilen, uint8_t __q) : std::vector<_Tp>(__alilen*__q*__alilen*__q), alilen(__alilen), q(__q), d1(alilen*q) {} cov_vector(uint16_t __alilen, uint8_t __q, _Tp __v) : std::vector<_Tp>(__alilen*__q*__alilen*__q, __v), alilen(__alilen), q(__q), d1(alilen*q) {} // 2D inline _Tp& operator()(size_t __row, size_t __col) { return this->_M_impl._M_start[ __row*d1 + __col ]; } inline const _Tp& operator()(size_t __row, size_t __col) const { return this->_M_impl._M_start[ __row*d1 + __col ]; } // 4D inline _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) { return this->_M_impl._M_start[ (__i*q+__ai)*d1 + __j*q+__aj ]; } inline const _Tp& operator()(uint16_t __i, uint16_t __j, uint8_t __ai, uint8_t __aj) const { return this->_M_impl._M_start[ (__i*q+__ai)*d1 + __j*q+__aj ]; } }; /// Calculates raw score for an i,j contact class _rawscore_calc_t { public: virtual double operator()(const uint16_t i, const uint16_t j) const = 0; }; /// 2-dimensional matrix. template class d2matrix : public std::vector<_Tp> { public: _Tri rows; _Tci cols; d2matrix(_Tri __rows, _Tci __cols) : std::vector<_Tp>(__rows*__cols), rows(__rows), cols(__cols) { memset(this->_M_impl._M_start, 0, rows*cols*sizeof(_Tp)); } inline _Tp& operator()(_Tri __r, _Tci __c) { return this->_M_impl._M_start[ __r*cols + __c ]; } inline const _Tp& operator()(_Tri __r, _Tci __c) const { return this->_M_impl._M_start[ __r*cols + __c ]; } }; typedef d2matrix simcnt_t; static void _raw_score_matrix(map > &__raw_ctscore, map > &__apc_bg, map &__apc_mean, const uint16_t __alilen, const string &__key, const _rawscore_calc_t &__fo ); static vector _apc( const ct_vector& __raw_ctscore, const vector& __apc_bg, const double __apc_mean, const uint16_t __mincontsep, bool __filt ); static inline vector _raw_as_is( const ct_vector& __raw_ctscore, const uint16_t __mincontsep ); static inline uint32_t _cache_holds_nseq(uint16_t __seqsize); #ifndef __SSE2__ static inline __m128i _mm_setzero_si128(){ __m128i m; long long *mp = (long long *)&m; mp[1] = mp[0] = 0; return m; } #endif class _glasso_timer { timer_t _timerid; static void _brk(sigval_t __sigval) { *static_cast(__sigval.sival_ptr) = 1; } // this is a resource - disable copy contructor and copy assignment _glasso_timer(const _glasso_timer&){} _glasso_timer& operator=(const _glasso_timer&){return *this;} public: bool dbg; _glasso_timer(int *__sival_ptr, time_t __tv_sec, bool __dbg = false) : dbg(__dbg) { struct sigevent sev; sev.sigev_notify = SIGEV_THREAD; sev.sigev_notify_function = _brk; sev.sigev_value.sival_ptr = __sival_ptr; sev.sigev_notify_attributes = NULL; if (timer_create(CLOCK_PROCESS_CPUTIME_ID, &sev, &_timerid) == -1) { if (dbg) cerr << "timer_create(CLOCK_PROCESS_CPUTIME_ID, ...): " << strerror(errno) << " at " << __FILE__ << ":" << __LINE__ << "\n"; // In case timer_create fails with CLOCK_PROCESS_CPUTIME_ID because clock_getcpuclockid() is not supported (e.g. on kfreebsd): if (timer_create(CLOCK_MONOTONIC, &sev, &_timerid) == -1) throw std::runtime_error(bo::str(bo::format("%s at %s:%d") % strerror(errno) % __FILE__ % __LINE__ )); } struct itimerspec its; its.it_value.tv_sec = __tv_sec; its.it_value.tv_nsec = 0; its.it_interval.tv_sec = 0; its.it_interval.tv_nsec = 0; if (timer_settime(_timerid, 0, &its, NULL) == -1) throw std::runtime_error(bo::str(bo::format("%s at %s:%d") % strerror(errno) % __FILE__ % __LINE__ )); } virtual ~_glasso_timer() { if (timer_delete(_timerid) == -1) throw std::runtime_error(strerror(errno)); } }; void predictor::get_seq_weights( freq_vec_t &__aliw, double &__wtot, const ali_t& __ali, double __cp, bool __veczw, int __num_threads ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error) { if (__ali.seqcnt < 2) throw alilen_error(bo::str(bo::format("alignment size (%d) < 2") % __ali.seqcnt)); // lkajan: the present implementation is limited to alilen 4080. if (__ali.alilenpad > 4080) throw alilen_error(bo::str(bo::format("alignment (%d) longer than 4080") % __ali.alilenpad)); if (__cp < 0 || __cp > 1) throw range_error(bo::str(bo::format("clustering threshold is out of range [0-1] %g") % __cp )); if (__num_threads < 0) throw range_error(bo::str(bo::format("number of threads is out of range [0-) %d") % __num_threads )); #ifdef HAVE_OPENMP int num_threads = __num_threads ? __num_threads : omp_get_max_threads(); if(dbg) cerr << "will use " << num_threads << " OMP threads\n"; #else int num_threads = 1; if(dbg) cerr << "no OMP thread support\n"; #endif // Calculate BLOSUM sequence weights at clustering threshold __cp: "sequence segments that are identical for at least that percentage of amino acids are grouped together". // A modification of the method in "Amino acid substitution matrices from protein blocks. Henikoff and Henikoff 1992. PNAS.", because the weight is not established for all members of // the cluster from the size of the __cp cluster, but instead individually, from the number of sequences found at >= __cp. This is the method of mfDCA and PSICOV. // Gaps and Xs also contribute to matches/mismatches, notably X matches only [JOUX]. time_t t0 = time(NULL); uint16_t mismthresh = __ali.alilen - ceil( __ali.alilen * __cp ); int matchthresh = __ali.alilenpad - mismthresh; // minimum number of matches for clustering seq uint32_t cache_holds_nseq = _cache_holds_nseq(__ali.alilenpad); simcnt_t simcnt(__ali.seqcnt, num_threads); // lkajan: allow for other vectorizations as well #define CAN_VECW 0 #ifdef __SSE2__ #undef CAN_VECW #define CAN_VECW 1 #endif if(!CAN_VECW || !__veczw) { // lkajan: The SSE2 implementation came first. Aim was to keep changes to the minimum here. That's why this implementation looks a bit weird. // lkajan: Look out: the iterations take less and less time, so a division by first-half/second-half, as seems to be the default, leads to thread starvation. #ifdef HAVE_OPENMP #pragma omp parallel for num_threads(num_threads), schedule(dynamic) #endif for(uint32_t i=0; i<__ali.seqcnt-1; ++i) { if(dbg #ifdef HAVE_OPENMP && omp_get_thread_num() == 0 #endif ) { time_t t1 = time(NULL); if( t1 != t0 ) { t0 = t1; fprintf(stderr, "%s%d/%d", "[1G[K", i, __ali.seqcnt); } } // xterm const uint8_t *a0 = &__ali(i,0); for(uint32_t j=i+1; j<__ali.seqcnt; ++j) { const uint8_t *a = a0, *b = &__ali(j,0); uint16_t matc128 = 0; for(uint16_t k = 0, k_end = __ali.alilenpad; k < k_end; ++k) matc128 += ( a[k] == b[k] ); if( matc128 >= matchthresh ) { #ifndef HAVE_OPENMP int thread_num = 0; #else int thread_num = omp_get_thread_num(); #endif ++simcnt(i,thread_num); ++simcnt(j,thread_num); } } } } #ifdef __SSE2__ else { // lkajan: Dynamically set chunk size according to (L1) cache size (cat /sys/devices/system/cpu/cpu0/cache/index*/{coherency_line_size,level,size,type}). // lkajan: Would prefetching with _mm_prefetch(ptr, _MM_HINT_T0) help? uint32_t wchunk = cache_holds_nseq / 2; if(dbg) cerr << "SSE2 veczweight, wchunk = "<< wchunk <<"\n"; __m128i m0 = _mm_setzero_si128(); __m128i m1 = _mm_cmpeq_epi8( m0, m0 ); uint32_t chunk_e = (__ali.seqcnt-2)/wchunk + 1; #ifdef HAVE_OPENMP #pragma omp parallel for num_threads(num_threads), schedule(dynamic) #endif for(uint32_t chunki=0; chunki= matchthresh || (k<<4) - matc > mismthresh) have failed. // matc128 = _mm_add_epi8( m, matc128 ): look out, good only till seq len <= 4080 #define _count_matches_sse2_helper(__omp_get_thread_num) {\ __m128i *a = a0, *b = (__m128i*)&__ali(j,0);\ __m128i matc128 = m1;\ \ for(uint16_t k = 0, k_end = __ali.alilen16; k < k_end; ++k) \ {\ __m128i m = _mm_cmpeq_epi8( a[k], b[k] );\ matc128 = _mm_add_epi8( m, matc128 );\ }\ \ matc128 = _mm_sad_epu8( m1, matc128 );\ matc128 = _mm_add_epi32( matc128,\ _mm_srli_si128( matc128, 8 ) );\ \ if( _mm_cvtsi128_si32(matc128) >= matchthresh )\ {\ int thread_num = (__omp_get_thread_num);\ ++simcnt(i,thread_num);\ ++simcnt(j,thread_num);\ } } #ifdef HAVE_OPENMP #define _count_matches_sse2() _count_matches_sse2_helper(omp_get_thread_num()) #else #define _count_matches_sse2() _count_matches_sse2_helper(0) #endif for(uint32_t j = i+1; j 1) throw range_error(bo::str(bo::format("contact density is out of range [0-1] %g") % __dens )); if(__gapth < 0 || __gapth > 1) throw range_error(bo::str(bo::format("gap threshold is out of range [0-1] %g") % __gapth )); if(__mincontsep < 1) throw range_error(bo::str(bo::format("minimum contact separation is out of range [1-) %d") % __mincontsep )); if(__num_threads < 0) throw range_error(bo::str(bo::format("number of threads is out of range [0-) %d") % __num_threads )); if(__pseudocnt < 0) throw range_error(bo::str(bo::format("pseudo count is out of range [0-) %g") % __pseudocnt )); if(__pscnt_weight < 0 || __pscnt_weight > 1) throw range_error(bo::str(bo::format("pseudo count weight is out of range [0-1] %g") % __pscnt_weight )); if(__shrink_lambda < 0 || __shrink_lambda > 1) throw range_error(bo::str(bo::format("shrinkage lambda is out of range [0-1] %g") % __shrink_lambda )); #ifdef HAVE_OPENMP int num_threads = __num_threads ? __num_threads : omp_get_max_threads(); if(dbg) cerr << "will use " << num_threads << " OMP threads\n"; #else int num_threads = 1; if(dbg) cerr << "no OMP thread support\n"; #endif if(__timing) (*__timing)["num_threads"] = num_threads; //timeval t_run; gettimeofday(&t_run, NULL); // Calculate column-wise AA frequencies with pseudocount and weighting, 21-letter alphabet. const double ps_wtot = __pseudocnt * 21 + __wtot; // lkajan: sum( pa[i][*] ) and sum( pab[i][j][*][*] ) is __wtot, if no pseudocounts are used af_vector aafreq( __ali.alilen, 21, __pseudocnt / ps_wtot ); // aafreq[__ali.alilen][21], this is EVfold-mfDCA::Pi_true with __pseudocnt == 0 freq_t ps_aliw[__ali.seqcnt]; memcpy(ps_aliw, __aliw.data(), __ali.seqcnt*sizeof(freq_t)); cblas_dscal(__ali.seqcnt, 1/ps_wtot, ps_aliw, 1); // scale vector by a constant for(uint32_t k = 0; k < __ali.seqcnt; ++k) { for(uint16_t j = 0; j < __ali.alilen; ++j) { uint8_t aa = __ali(k,j); if(aa < 21) aafreq(j,aa) += ps_aliw[k]; // if aa is not X or alike } } vector gap_cols; if(__gapth<1) for(uint16_t i = 0; i < __ali.alilen; ++i) { if( aafreq(i,aamap_gapidx) > __gapth ) gap_cols.push_back(i); } if(dbg) cerr << "calculated column aa frequencies, gap cols = " << gap_cols.size() << "\n"; // Calculate AA-pair frequencies with pseudocount and weighting, 21-letter alphabet. pf_vector pairfreq( __ali.alilen, 21, __pseudocnt/21.0 / ps_wtot ); // pairfreq[__ali.alilen][__ali.alilen][21][21], this is EVfold-mfDCA::Pij_true with __pseudocnt == 0 timeval t_before; gettimeofday(&t_before, NULL); { for(uint32_t k = 0; k<__ali.seqcnt; ++k) { freq_t ps_aliwk = ps_aliw[k]; const uint8_t *alik = &__ali(k,0); #ifdef HAVE_OPENMP #pragma omp parallel for num_threads(num_threads), schedule(static) #endif for(uint16_t i = 0; i < (__ali.alilen-2)/2+1; ++i) { #define _fill_pfi(__i) {\ pairfreq_t *pfi = &pairfreq(__i,0,0,0);\ for(uint16_t j = __i+1; j < __ali.alilen; ++j)\ {\ uint8_t ai = alik[__i];\ uint8_t aj = alik[j];\ if(ai < 21 && aj < 21)\ pfi[ j*pairfreq.d2 + ai*pairfreq.d3 + aj ] += ps_aliwk;\ } } // _fill_pfi(i); uint16_t ri = __ali.alilen-2-i; if(ri!=i) _fill_pfi(ri); #undef _fill_pfi } } for(uint16_t i = 0; i < __ali.alilen-1; ++i) for(uint16_t j = i+1; j < __ali.alilen; ++j) { pairfreq_t *pfij = &pairfreq(i,j,0,0), *pfji = &pairfreq(j,i,0,0); // lkajan: this is faster for(uint8_t ai = 0; ai < 21; ++ai) // -funroll-loops to unroll such loops? vectorize? for(uint8_t aj = 0; aj < 21; ++aj) pfji[ aj*pairfreq.d3 + ai ] = pfij[ ai*pairfreq.d3 + aj ]; } for(uint16_t i = 0; i < __ali.alilen; ++i) { // zero out 21x21 matrix memset(pairfreq.data() + i*pairfreq.d1 + i*pairfreq.d2, 0, 21*21*sizeof(pairfreq_t)); for(uint8_t aa = 0; aa < 21; ++aa) pairfreq(i,i,aa,aa) = aafreq(i,aa); } } { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff); if(__timing) (*__timing)["pairfreq"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "calculated pair frequency table in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6 ); } // lkajan: Do MI_true calculation before changing aafreq and pairfreq: map > raw_ctscore; map > apc_bg; map apc_mean; // EVfold-mfDCA MI_true class _rawscore_MI_true_t : public _rawscore_calc_t { const af_vector &_Pi_true; const pf_vector &_Pij_true; public: _rawscore_MI_true_t(const af_vector &__Pi_true, const pf_vector &__Pij_true ) : _Pi_true(__Pi_true), _Pij_true(__Pij_true) {} virtual double operator()(const uint16_t i, const uint16_t j) const { double raw_ij = 0; for(uint8_t ai = 0; ai < 21; ++ai) for(uint8_t aj = 0; aj < 21; ++aj) if(_Pij_true(i,j,ai,aj) > 0) raw_ij += _Pij_true(i,j,ai,aj) * log( _Pij_true(i,j,ai,aj) / _Pi_true(i,ai) / _Pi_true(j,aj) ); return raw_ij; } } _rawscore_MI_true(aafreq, pairfreq); _raw_score_matrix(raw_ctscore, apc_bg, apc_mean, __ali.alilen, "MI", _rawscore_MI_true ); if(dbg) fprintf(stderr, "collected apc_mean[MI] = %16.15g\n", apc_mean["MI"]); // EVfold-mfDCA::with_pc() pseudocount weight function implementation { pf_vector Pij_true = pairfreq; if( __pscnt_weight != 0.0 ) { for(size_t i = 0; i < pairfreq.size(); ++i) pairfreq[i] = (1-__pscnt_weight)*pairfreq[i] + __pscnt_weight/21/21; // vectorize? for(size_t i = 0; i < aafreq.size(); ++i) aafreq[i] = (1-__pscnt_weight)*aafreq[i] + __pscnt_weight/21; // vectorize? for(uint16_t i = 0; i < __ali.alilen; ++i) for(uint8_t ai = 0; ai < 21; ++ai) for(uint8_t aj = 0; aj < 21; ++aj) pairfreq(i,i,ai,aj) = ai==aj ? (1-__pscnt_weight)*Pij_true(i,i,ai,aj) + __pscnt_weight/21 : 0; } } if(dbg) { // compute sum of pairfreq matrix double aafs = 0; // aafreq sum double pfs = 0; for(uint16_t i = 0; i < __ali.alilen; ++i) for(uint16_t j = 0; j < __ali.alilen; ++j) for(uint8_t ai = 0; ai < 21; ++ai) { if(j==0) aafs += aafreq(i,ai); for(uint8_t aj = 0; aj < 21; ++aj) pfs += pairfreq(i,j,ai,aj); } fprintf(stderr, "aa freq sum (cell) = %.15g, pairfreq sum (cell) = %.15g\n", aafs/__ali.alilen, pfs/__ali.alilen/__ali.alilen); } #ifdef RETURN_AFTER_PAIRFREQ cerr << "returning because of RETURN_AFTER_PAIRFREQ " << __FILE__ << ":" << __LINE__ << "\n"; return map >(); #endif // Forming the covariance matrix. // lkajan: EVfold-mfDCA: covq = 20, the last letter ('Y') is simply left off. In this code '-' (gap) is left off. // lkajan: Gaps and the actual inversion of the covariance matrix (as opposed to estimation with glasso): // lkajan: * Leave ( aafreq(i,aamap_gapidx) > __gapth || aafreq(j,aamap_gapidx) > __gapth ) columns and rows entirely out of the cov matrix. cov_vector covm( __apply_gapth ? __ali.alilen - gap_cols.size() : __ali.alilen, __cov20 ? 20 : 21, 0.0 ); uint16_t i = 0, covi = 0; vector::const_iterator gc_bi = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ei = gap_cols.end(); for(; i < __ali.alilen; ++i) { if(gc_bi!=gc_ei && *gc_bi == i){ ++gc_bi; continue; } // too many gaps in this row uint16_t j = 0, covj = 0; vector::const_iterator gc_bj = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ej = gap_cols.end(); for(; j < __ali.alilen; ++j) { if(gc_bj!=gc_ej && *gc_bj == j){ ++gc_bj; continue; } // too many gaps in this col for(uint8_t ai = 0; ai < covm.q; ++ai) for(uint8_t aj = 0; aj < covm.q; ++aj) covm(covi,covj,ai,aj) = pairfreq(i,j,ai,aj) - aafreq(i,ai) * aafreq(j,aj); ++covj; } ++covi; } { pf_vector _empty; pairfreq.swap(_empty); } // lkajan: this trick releases memory, unlike clear() // lkajan: psicov shrinks covm in infinite loop without zeroing those cells out: if(__estimate_ivcov) // lkajan: control it by matrix inversion/estimation method for(uint16_t i = 0; i < covm.alilen; ++i) for(uint8_t ai = 0; ai < covm.q; ++ai) for(uint8_t aj = 0; aj < covm.q; ++aj) if(ai != aj) covm(i,i,ai,aj) = 0; if(dbg) cerr<<"formed covariance matrix ("<0) { gettimeofday(&t_before, NULL); double dmean = 0; for(size_t i = 0; i < covm.d1; ++i) dmean += covm(i, i); dmean /= covm.d1; const double& lambda = __shrink_lambda; // lkajan: A constant lambda seems crude. See if a mathematician can think of a better solution. for (;;) { if(dbg) cerr << "cov matrix Cholesky..."; int cholres; { vector tempmat = covm; int N = covm.d1; spotrf_("L",&N,tempmat.data(),&N,&cholres); if(cholres < 0) throw std::runtime_error(bo::str(bo::format("illegal value for argument %d") % -cholres )); } if(cholres==0) break; if(dbg) cerr << " not positive definite"; for(uint16_t i = 0; i < covm.alilen; ++i) for(uint16_t j = 0; j < covm.alilen; ++j) for(uint8_t ai = 0; ai < covm.q; ++ai) for(uint8_t aj = 0; aj < covm.q; ++aj) if(i != j) covm(i,j,ai,aj) *= (1.0 - lambda); else if(ai == aj) covm(i,j,ai,aj) = lambda*dmean + (1.0-lambda)*covm(i,j,ai,aj); if(dbg) { double dmean_dev = 0; for(size_t i = 0; i < covm.d1; ++i) dmean_dev += pow(dmean - covm(i,i), 2); dmean_dev /= covm.d1; dmean_dev = sqrt(dmean_dev); fprintf(stderr, ", dmean dev: %g\n", dmean_dev ); } } { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff); if(__timing) (*__timing)["shrink"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "\nshrank covariance matrix in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6 ); } } #ifdef LIBFREEC_CHKPT { FILE *f = fopen("covm.dat", "w"); do { if( fwrite(&covd, sizeof(covd), 1, f) != 1 ) { cerr << "error writing 'covm.dat': " << strerror(errno) << "\n"; fclose(f); unlink("covm.dat"); break; } if( fwrite(&galilen, sizeof(galilen), 1, f) != 1 ) { cerr << "error writing 'covm.dat': " << strerror(errno) << "\n"; fclose(f); unlink("covm.dat"); break; } if( fwrite( covm.data(), sizeof(*covm.data()), covm.size(), f ) != covm.size() ) { cerr << "error writing 'covm.dat': " << strerror(errno) << "\n"; fclose(f); unlink("covm.dat"); break; } cerr << "wrote 'covm.dat'\n"; fclose(f); } while(false); } #endif // Invert the covariance matrix, aiming for __dens precision matrix density. fp_t rho = __rho; fp_t rhofac = 0; cov_vector wwi( covm.alilen, covm.q ); // inverse covariance matrix gettimeofday(&t_before, NULL); if(__estimate_ivcov) { fp_t density = -1, prev_dens = -1; do { // Set new rho - the bigger rho, the smaller density gets if( rho < 0 ) rho = std::max(0.001, 1.0 / __wtot); else if(density >= 0) { if( density == 0 ) rhofac = 0.5; else { if( rhofac != 0 && prev_dens > 0 && density > 0 ) rhofac = pow( rhofac, log( __dens / density ) / log( density / prev_dens ) ); else rhofac = __dens > density ? 0.9 : 1.1; // lkajan: be more aggressive here? 0.5 : 2? } rho *= rhofac; } int g_ia = 0; // exact solution if(dbg) fprintf(stderr, "will try rho %g, %s solution\n", rho, (g_ia ? "approximate" : "exact")); if(rho <= 0 || rho >= 1) throw std::runtime_error(bo::str(bo::format("regularization parameter rho is out of expected range (0-1): %g") % rho )); cov_vector rho_m( covm.alilen, covm.q, rho ); for(uint16_t i = 0; i < rho_m.alilen; ++i) for(uint16_t j = 0; j < rho_m.alilen; ++j) for(uint8_t ai = 0; ai < rho_m.q; ++ai) for(uint8_t aj = 0; aj < rho_m.q; ++aj) // Watch out, __ali.alilen may != galilen; __gapth filtering does not seem to make much sense with 0.5 __pscnt_weight. if( (__ali.alilen==rho_m.alilen && ( aafreq(i,aamap_gapidx) > __gapth || aafreq(j,aamap_gapidx) > __gapth )) || (i == j && ai != aj ) ) rho_m(i,j,ai,aj) = 1e9; #ifdef LIBFREEC_CHKPT { FILE *f = fopen("rho_m.dat", "w"); do { if( fwrite( rho_m.data(), sizeof(*rho_m.data()), rho_m.size(), f ) != rho_m.size() ) { cerr << "error writing 'rho_m.dat': " << strerror(errno) << "\n"; fclose(f); unlink("rho_m.dat"); break; } cerr << "wrote 'rho_m.dat'\n"; fclose(f); } while(false); } #endif // lkajan: estimate inverse covariance matrix { int g_n = covm.d1, g_is = 0, g_msg = dbg, g_maxit = 1e5, g_jerr, g_brk = 0; int g_niter; g_fp_t g_thr = 1e-4; vector ww( g_n*g_n ); // estimated covariance matrix timeval t_before; if(dbg) gettimeofday(&t_before, NULL); { // Set up a timer to break this process if it overruns some process cputime limit _glasso_timer _gt(&g_brk, __icme_timeout, dbg); // No need to transpose for Fortran, matrix is symmetric. { vector g_Wd(g_n), g_WXj(g_n); glassofast_(&g_n, covm.data(), rho_m.data(), &g_thr, &g_maxit, &g_msg, &g_is, wwi.data(), ww.data(), g_Wd.data(), g_WXj.data(), &g_jerr, &g_brk); switch (g_jerr) { case 0: break; case 256: throw icme_timeout_error(bo::str(bo::format("inverse covariance matrix estimation exceeded time limit (%d sec)") % __icme_timeout )); break; default: throw std::runtime_error("an error occurred in glassofast_"); } g_niter = g_maxit; } } if(dbg){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff); fprintf(stderr, "%d iterations of glassofast took %g secs\n", g_niter, t_diff.tv_sec+t_diff.tv_usec/1e6 ); } } if( __dens == 0 ) break; // Contact density target is not set, no need to calculate density. prev_dens = density; { size_t nz = 0; for(size_t i = 0; i < wwi.d1; ++i) for(size_t j = i+1; j < wwi.d1; ++j) if( wwi(i, j) != 0 ) ++nz; density = (fp_t)nz / ( wwi.d1 * (wwi.d1-1) / 2 ); // triangle w/o diagonal } if(dbg) fprintf(stderr, "density = %7g, target sp = %g, |density-target sp|/target sp = %g\n", density, __dens, fabs(density - __dens)/__dens); } while( __dens == 0 || fabs(__dens - density)/__dens > 0.01 ); // lkajan: Is 0.01 strict enough? 1% of the 3% still means 17 contacts for a 340-long protein. But does that matter? Perhaps make 0.01 a parameter. } else { // lkajan: LAPACK matrix inversion: fast, but does not disregard gaps like glasso* do through high regularization, and no control over density. wwi = covm; { cov_vector _empty; covm.swap(_empty); } // lkajan: this trick releases memory, unlike clear() int N = wwi.d1, INFO; int IPIV[N+1]; timeval t_before; if(dbg) gettimeofday(&t_before, NULL); sgetrf_(&N,&N,wwi.data(),&N,IPIV,&INFO); if(INFO) throw std::runtime_error(bo::str(bo::format("LU factorization error %d") % INFO )); // lkajan: we could handle the singular matrix case if we wanted to, e.g. by increasing __pscnt_weight. if(dbg){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff); fprintf(stderr, "LU factorization took %g secs, ", t_diff.tv_sec+t_diff.tv_usec/1e6 ); } int LWORK = -1; vector WORK(1); sgetri_(&N,wwi.data(),&N,IPIV,WORK.data(),&LWORK,&INFO); LWORK = N*N < WORK[0] ? N*N : WORK[0]; WORK.resize(LWORK); sgetri_(&N,wwi.data(),&N,IPIV,WORK.data(),&LWORK,&INFO); if(INFO) throw std::runtime_error(bo::str(bo::format("matrix inversion error %d") % INFO )); if(dbg){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff); fprintf(stderr, "inverted matrix (incl LUf) in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6 ); } } { cov_vector _empty; covm.swap(_empty); } // lkajan: this trick releases memory, unlike clear() if(__timing){ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff); (*__timing)["inv"] = t_diff.tv_sec+t_diff.tv_usec/1e6; } if(dbg) { size_t nz = 0; double checksum = 0; for(size_t i = 0; i < wwi.d1; ++i) for(size_t j = i+1; j < wwi.d1; ++j) if( wwi(i, j) != 0 ){ ++nz; checksum += wwi(i, j); } fp_t density = (fp_t)nz / wwi.d1 / (wwi.d1-1) * 2; // upper triangle w/o diagonal fprintf(stderr, "density of inverse covariance matrix = %g (cksum %.7g)\n", density, checksum); } // lkajan: Go back to __ali.alilen*covq x __ali.alilen*covq wwi matrix. // lkajan: Well, we could go back to __ali.alilen*21 x __ali.alilen*21 even? cov_vector wwiwg( __ali.alilen, wwi.q, 0 ); // wwi with gaps (if any) if(wwi.alilen != wwiwg.alilen) { uint16_t i = 0, covi = 0; vector::const_iterator gc_bi = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ei = gap_cols.end(); for(; i < __ali.alilen; ++i) { if(gc_bi!=gc_ei && *gc_bi == i){ ++gc_bi; continue; } // gapped row uint16_t j = 0, covj = 0; vector::const_iterator gc_bj = ( __apply_gapth ? gap_cols.begin() : gap_cols.end() ), gc_ej = gap_cols.end(); for(; j < __ali.alilen; ++j) { if(gc_bj!=gc_ej && *gc_bj == j){ ++gc_bj; continue; } // gapped col for(uint8_t ai = 0; ai < wwiwg.q; ++ai) for(uint8_t aj = 0; aj < wwiwg.q; ++aj) wwiwg(i,j,ai,aj) = wwi(covi,covj,ai,aj); ++covj; } ++covi; } if(dbg) cerr<<"went back to gapped ("< _empty; wwi.swap(_empty); } // lkajan: this trick releases memory, unlike clear() // lkajan: Idea: how about doing APC over the scores of contacts beyond __mincontsep, instead of 1 (excluding diagonal only)? // PSICOV raw scores and background for average product correction (APC) [Dunn et al. 2008] // l1-norm (PSICOV) raw scores class _rawscore_l1norm_t : public _rawscore_calc_t { const cov_vector &_wwiwg; public: _rawscore_l1norm_t(const cov_vector &__wwiwg ) : _wwiwg(__wwiwg) {} virtual double operator()(const uint16_t i, const uint16_t j) const { double raw_ij = 0; for(uint8_t ai = 0; ai < 20; ++ai) // disregard gap (index 20) - works also if __cov20 is in effect for(uint8_t aj = 0; aj < 20; ++aj) raw_ij += fabs(_wwiwg(i,j,ai,aj)); return raw_ij; } } _rawscore_l1norm(wwiwg); _raw_score_matrix(raw_ctscore, apc_bg, apc_mean, __ali.alilen, "l1norm", _rawscore_l1norm ); if(dbg) fprintf(stderr, "collected apc_mean[l1norm] = %16.15g\n", apc_mean["l1norm"]); // Frobenius norm // EVfold-mfDCA::calculate_cn_scores() up to APC bit: obtain EVfold-mfDCA::fn_scores - courtesy of Thomas Hopf // Quoting Thomas: transform to zero-sum gauge according to Ekeberg et al. // Quoting Thomas: calculate Frobenius norm for each pair of columns i, j class _rawscore_fro_t : public _rawscore_calc_t { const cov_vector &_wwiwg; public: _rawscore_fro_t(const cov_vector &__wwiwg) : _wwiwg(__wwiwg) {} virtual double operator()(const uint16_t i, const uint16_t j) const { vector W_mf( 21*21, 0.0 ); vector rmean_W_mf( 21, 0.0 ); // row mean W_mf vector cmean_W_mf( 21, 0.0 ); // col mean W_mf double mean2_W_mf = 0; for(uint8_t ai = 0; ai < 20; ++ai) // lkajan: look out, 20, not covq! for(uint8_t aj = 0; aj < 20; ++aj) { const double cell = ( W_mf[ ai*21+aj ] = -_wwiwg(i,j,ai,aj) ); mean2_W_mf += cell; rmean_W_mf[ai] += cell/21; cmean_W_mf[aj] += cell/21; } mean2_W_mf /= W_mf.size(); // transform to zero-sum gauge according to Ekeberg et al. double sum_sum_W_mf_zsg2 = 0.0; for(uint8_t ai = 0; ai < 21; ++ai) for(uint8_t aj = 0; aj < 21; ++aj) { double W_mf_zsg = (double)W_mf[ ai*21+aj ] - rmean_W_mf[ai] - cmean_W_mf[aj] + mean2_W_mf; sum_sum_W_mf_zsg2 += W_mf_zsg*W_mf_zsg; } // calculate Frobenius norm for each pair of columns i, j double raw_ij = sqrt(sum_sum_W_mf_zsg2); return raw_ij; } } _rawscore_fro(wwiwg); _raw_score_matrix(raw_ctscore, apc_bg, apc_mean, __ali.alilen, "fro", _rawscore_fro ); if(dbg) fprintf(stderr, "collected apc_mean[fro] = %16.15g\n", apc_mean["fro"]); { cov_vector _empty; wwiwg.swap(_empty); } // lkajan: this trick releases memory, unlike clear() // Calculate final scores with average product correction (APC, Dunn et al., 2008) map > res; // l1norm res["l1norm"] = _apc( raw_ctscore["l1norm"], apc_bg["l1norm"], apc_mean["l1norm"], __mincontsep, true ); // MI_true - report raw values, not apc res["MI"] = _raw_as_is( raw_ctscore["MI"], __mincontsep ); // fro res["fro"] = _apc( raw_ctscore["fro"], apc_bg["fro"], apc_mean["fro"], __mincontsep, false ); // //{ timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_run, &t_diff); // if(__timing) (*__timing)["run"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "run done in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6); } return res; } predictor::cont_res_t predictor::run(const ali_t& __ali, double __clustpc, double __density, double __gapth, uint16_t __mincontsep, double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda, bool __cov20, bool __apply_gapth, double __rho, bool __veczw, int __num_threads, time_t __icme_timeout, time_res_t *__timing ) throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error) { timeval t_top; gettimeofday(&t_top, NULL); timeval t_before; gettimeofday(&t_before, NULL); freq_vec_t aliw; double wtot; get_seq_weights(aliw, wtot, __ali, __clustpc, __veczw, __num_threads); { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_before, &t_diff); if(__timing) (*__timing)["seqw"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "\nseq weight loop for %d seqs took %g secs\n", __ali.seqcnt, t_diff.tv_sec+t_diff.tv_usec/1e6 ); } cont_res_t ret = run(__ali, aliw, wtot, __density, __gapth, __mincontsep, __pseudocnt, __pscnt_weight, __estimate_ivcov, __shrink_lambda, __cov20, __apply_gapth, __rho, __num_threads, __icme_timeout, __timing); { timeval t_now, t_diff; gettimeofday(&t_now, NULL); timersub(&t_now, &t_top, &t_diff); if(__timing) (*__timing)["all"] = t_diff.tv_sec+t_diff.tv_usec/1e6; if(dbg) fprintf(stderr, "all done in %g secs\n", t_diff.tv_sec+t_diff.tv_usec/1e6); } return ret; } ali_t& ali_t::push(const std::vector& __al) throw (alilen_error) { if( __al.size() != alilen ) throw alilen_error(bo::str(bo::format("alignment length mismatch, expected %d, got %d") % alilen % __al.size() )); resize( ++seqcnt*alilen16, _mm_setzero_si128() ); uint8_t *alptr = reinterpret_cast(data()) + (seqcnt-1) * alilenpad; memcpy(alptr, __al.data(), __al.size()); if( capacity() == size() ) reserve( size() + 1024*alilen16 ); return *this; } /// Calculate raw contact scores using given function and collect row/column/overall mean for APC calculation. /** \param [out] __raw_ctscore * \param [out] __apc_bg * \param [out] __apc_mean * \param [in] __alilen * \param [in] __key * \param [in] __fo */ void _raw_score_matrix(map > &__raw_ctscore, map > &__apc_bg, map &__apc_mean, const uint16_t __alilen, const string &__key, const _rawscore_calc_t &__fo ) { ct_vector& raw_cts = __raw_ctscore[__key] = ct_vector( __alilen ); vector& apc_bg = __apc_bg[__key] = vector( __alilen ); double& mean = __apc_mean[__key] = 0; for(uint16_t i = 0; i < __alilen; ++i) for(uint16_t j = i+1; j < __alilen; ++j) { double raw_ij = __fo(i, j); raw_cts[ i*__alilen + j ] = raw_cts[ j*__alilen + i ] = raw_ij; mean += raw_ij; double bg_ij = raw_ij / (__alilen-1); apc_bg[i] += bg_ij; apc_bg[j] += bg_ij; } mean /= __alilen * (__alilen-1) / 2; } vector _apc( const ct_vector& __raw_ctscore, const vector& __apc_bg, const double __apc_mean, const uint16_t __mincontsep, bool __filt ) { const uint16_t alilen = __apc_bg.size(); // we expect that __raw_ctscore is a alilen*alilen matrix vector res; res.reserve( alilen*(alilen-1)/2*0.03 ); // reserve space for 3% contacts for(int i = 0, e = alilen-__mincontsep; i < e; ++i) for(uint16_t j = i+__mincontsep; j < alilen; ++j) if(!__filt || __raw_ctscore(i,j) > 0 ) // lkajan: psicov has this - it does strip off a few predictions. res.push_back( contact_t( i, j, __raw_ctscore(i,j) - __apc_bg[i] * __apc_bg[j] / __apc_mean ) ); return res; } vector _raw_as_is( const ct_vector& __raw_ctscore, const uint16_t __mincontsep ) { const uint16_t alilen = __raw_ctscore.alilen; vector res; res.reserve( alilen*(alilen-1)/2 ); for(int i = 0, e = alilen-__mincontsep; i < e; ++i) for(uint16_t j = i+__mincontsep; j < alilen; ++j) res.push_back( contact_t( i, j, __raw_ctscore(i,j) ) ); return res; } static inline uint32_t _cache_holds_nseq(uint16_t __seqsize) { long l1_dcache_size = sysconf(_SC_LEVEL1_DCACHE_SIZE); //long l1_dcache_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); long l2_cache_size = sysconf(_SC_LEVEL2_CACHE_SIZE); //long l2_cache_linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE); long l3_cache_size = sysconf(_SC_LEVEL3_CACHE_SIZE); //long l3_cache_linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE); uint32_t wchunk = 0; if(l1_dcache_size > 0) wchunk = 0.5*l1_dcache_size / __seqsize; if(wchunk < 4 && l2_cache_size > 0) wchunk = 0.5*l2_cache_size / __seqsize; if(wchunk < 4 && l3_cache_size > 0) wchunk = 0.5*l3_cache_size / __seqsize; if(wchunk < 4){ wchunk = 0.5*1048576 / __seqsize; // assume there is a 1MB cache somewhere, even if we can not detect it if(wchunk < 4) wchunk = 4; } // jobtest2: // 100000 => 86s //wchunk = 100000 // L3 cache enough for 49152 // 20000 => 50s //wchunk = 20000 // L2 cache enough for 4096 // 2000 => 42s //wchunk = 2000 // L1 cache enough for 512 // 200 => 42s //wchunk = 200 // the below is the fastest so far, 50 is slower // 100 => 40.5s //wchunk = 100 // emak: // 100000 => 67s //wchunk = 100000 // L2 cache enough for 24576 // 10000 => 36s //wchunk = 10000 // L1 cache enough for 256 // 100 => 34.6s //wchunk = 100 // the below is the fastest so far, 25 is slower // 50 => 34.4s //wchunk = 50 return wchunk; } } // namespace freecontact // vim:et:ts=4:ai:sw=4: