/*********************************************************** * Given a MSA file, calculate the DI * * Input: a MSA file * Output: profile_file and DCA_file * * Tong Liu * last update: 1/5/2016 * ***********************************************************/ #include #include #include #include #include #include #include #include #include #include "Eigen/Dense" #include "Eigen/Eigenvalues" template class vector_4d { public: typedef typename std::vector::size_type size_type; typedef typename std::vector::iterator iterator; typedef typename std::vector::const_iterator const_iterator; vector_4d(size_type x, size_type y, size_type z, size_type w) :x_(x) ,y_(y) ,z_(z) ,w_(w) ,data_(x_*y_*z_*w_) {} const T& operator()(size_type x_pos, size_type y_pos, size_type z_pos, size_type w_pos) const { size_type pos_in_4d_plane = x_ * y_ * z_ * w_pos; size_type pos_in_3d_plane = x_ * y_ * z_pos; size_type pos_in_2d_plane = x_ * y_pos; size_type pos_in_1d_plane = x_pos; return data_[pos_in_4d_plane + pos_in_3d_plane + pos_in_2d_plane + pos_in_1d_plane]; } T& operator()(size_type x_pos, size_type y_pos, size_type z_pos, size_type w_pos) { size_type pos_in_4d_plane = x_ * y_ * z_ * w_pos; size_type pos_in_3d_plane = x_ * y_ * z_pos; size_type pos_in_2d_plane = x_ * y_pos; size_type pos_in_1d_plane = x_pos; return data_[pos_in_4d_plane + pos_in_3d_plane + pos_in_2d_plane + pos_in_1d_plane]; } iterator begin() { return data_.begin(); } iterator end() { return data_.end(); } const_iterator begin() const { return data_.begin(); } const_iterator end() const { return data_.end(); } private: size_type x_; size_type y_; size_type z_; size_type w_; std::vector data_; }; template struct incrementor : std::unary_function { incrementor(const T& initial_value=T()) :value_(initial_value) {} T operator()(T) { return value_++; } private: T value_; }; using namespace Eigen; using namespace std; void get_dimension(char *, int &, int &); double letter2number(char c); void return_alignment(MatrixXd &, char *); void compute_true_frequencies(MatrixXd &, int, int, int, float, vector_4d &, MatrixXd &, double &); void with_pc(vector_4d &, MatrixXd &, float, int, int, vector_4d &, MatrixXd &); void Compute_C(vector_4d &, MatrixXd &, int, int, MatrixXd &); int mapkey(int, int, int); void Compute_Results(vector_4d &, MatrixXd &, vector_4d &, MatrixXd &, MatrixXd &, int, int, MatrixXd &); void ReturnW(MatrixXd &, int, int, int, MatrixXd &); void bp_link(int, int, MatrixXd &, MatrixXd &, int, double &); void compute_mu(int, int, MatrixXd &, MatrixXd &, int, MatrixXd &, MatrixXd &); double compute_di(int, int, MatrixXd &, MatrixXd &, MatrixXd &, MatrixXd &); int main(int argc, char **argv){ char *msa_file = argv[1]; char *profile_file = argv[2]; char *DI_file = argv[3]; if(argc != 4){ cout << endl << "Need three arguments: " << endl; cout << "\t(1) MSA file; (2) Profile file; (3) DI Output file." << endl; cout << endl; cout << "Reference: " << endl; cout << "\tMorcos,F. Direct-coupling analysis of residue coevolution captures native contacts across many protein families. (2011)" << endl; cout << endl << endl; exit(1); } // relative weight of pseudp count float pseudocount_weight = 0.5; // threshold for sequence id in reweighting float theta = 0.2; //---------------------------------- // function: return_alignment int M = 0; int N = 0; int q; get_dimension(msa_file, M, N); MatrixXd align(M, N); return_alignment(align, msa_file); q = align.maxCoeff(); //cout << "M=" << M << " N=" << N << " q=" << q << endl; //----------------------------------------- // function: compute_true_frequencies double Meff = 0.0; vector_4d Pij_true(N,N,q,q); vector_4d Pij(N,N,q,q); //std::transform(Pij_true.begin(), Pij_true.end(), Pij_true.begin(), incrementor(0)); //std::transform(Pij.begin(), Pij.end(), Pij.begin(), incrementor(0)); for(int i=0; i epsilon){ MatrixXd W_adjoint = W.adjoint(); MatrixXd scra1 = mu2*W_adjoint; MatrixXd scra2 = mu1*W; MatrixXd new1 = pi.cwiseQuotient(scra1); double tmp_1 = new1.sum(); new1 = new1/tmp_1; MatrixXd new2 = pj.cwiseQuotient(scra2); double tmp_2 = new2.sum(); new2 = new2/tmp_2; double d1 = (new1 - mu1).cwiseAbs().maxCoeff(); double d2 = (new2 - mu2).cwiseAbs().maxCoeff(); if(d1 >= d2){ diff = d1; }else{ diff = d2; } mu1 = new1; mu2 = new2; } } double compute_di(int i, int j, MatrixXd &W, MatrixXd &mu1, MatrixXd &mu2, MatrixXd &Pi){ double tiny = 1.0e-100; MatrixXd mu1_adjoint = mu1.adjoint(); MatrixXd Pdir = (W.cwiseProduct(mu1_adjoint*mu2)).matrix(); Pdir = Pdir/(Pdir.sum()); MatrixXd Pii = Pi.row(i).adjoint(); MatrixXd Pfac = Pii*(Pi.row(j)); MatrixXd Pdir_adjoint = Pdir.adjoint(); int q = Pdir.rows(); Pdir.array() += tiny; Pfac.array() += tiny; MatrixXd Pdirfac = ((Pdir.cwiseQuotient(Pfac)).array().log()).matrix(); MatrixXd tmp = Pdir_adjoint * Pdirfac; double tmp_di = tmp.trace(); return tmp_di; } void bp_link(int i, int j, MatrixXd &W, MatrixXd &Pi, int q, double &DI_mf_pc){ MatrixXd mu1 = MatrixXd::Ones(1,q); mu1 /= q; MatrixXd mu2 = MatrixXd::Ones(1,q); mu2 /= q; compute_mu(i, j, W, Pi, q, mu1, mu2); DI_mf_pc = compute_di(i, j, W, mu1, mu2, Pi); } void Compute_Results(vector_4d &Pij, MatrixXd &Pi, vector_4d &Pij_true, MatrixXd &Pi_true, MatrixXd &invC, int n, int q, MatrixXd & DI){ for(int i=0; i<(n-1); i++){ for(int j=(i+1); j &Pij, MatrixXd &Pi, int n, int q, MatrixXd &C){ for(int i=0; i &Pij_true, MatrixXd &Pi_true, float p_w, int n, int q, vector_4d &Pij, MatrixXd &Pi){ Pi = (1.0 - p_w)*Pi_true + (p_w/q)*MatrixXd::Ones(n,q); for(int i=0; i &Pij_true, MatrixXd &Pi_true, double &Meff){ double W[m]; for(int i=0; i 0.0){ for(int i=0; i'){ continue; } int nums = line.length(); line.clear(); n = nums; m++; line.clear(); } myfile.close(); } double letter2number(char c){ double x; switch(c){ case '-': x=1; break; case 'A': x=2; break; case 'C': x=3; break; case 'D': x=4; break; case 'E': x=5; break; case 'F': x=6; break; case 'G': x=7; break; case 'H': x=8; break; case 'I': x=9; break; case 'K': x=10; break; case 'L': x=11; break; case 'M': x=12; break; case 'N': x=13; break; case 'P': x=14; break; case 'Q': x=15; break; case 'R': x=16; break; case 'S': x=17; break; case 'T': x=18; break; case 'V': x=19; break; case 'W': x=20; break; case 'Y': x=21; break; default: x=1; break; } return x; } void return_alignment(MatrixXd &align, char *msa_file){ ifstream myfile; myfile.open(msa_file); string line; int rows = 0; while(getline(myfile, line)){ if(line.at(0) == '>'){ continue; } int cols = 0; int num = line.length(); while(cols < num){ align(rows, cols) = letter2number(line.at(cols)); cols++; } rows++; line.clear(); } myfile.close(); }