diff --git a/.gitignore b/.gitignore index 59a0fef..9dba05a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ .*.sw* # binary executables +addChainID +biounitasym cif2pdb TMalign TMalignc @@ -25,4 +27,5 @@ ca2rr *.zip .idea +.gitignore cmake-build-debug diff --git a/MMalign.cpp b/MMalign.cpp index 72f4b45..5d8e4b4 100644 --- a/MMalign.cpp +++ b/MMalign.cpp @@ -6,131 +6,130 @@ using namespace std; void print_version() { - cout << -"\n" -" **********************************************************************\n" -" * MM-align (Version 20231222): complex structure alignment *\n" -" * References: S Mukherjee, Y Zhang. Nucl Acids Res 37(11):e83 (2009) *\n" -" * Please email comments and suggestions to yangzhanglab@umich.edu *\n" -" **********************************************************************" - << endl; + cout << "\n" + " **********************************************************************\n" + " * MM-align (Version 20231222): complex structure alignment *\n" + " * References: S Mukherjee, Y Zhang. Nucl Acids Res 37(11):e83 (2009) *\n" + " * Please email comments and suggestions to yangzhanglab@umich.edu *\n" + " **********************************************************************" + << endl; } void print_extra_help() { - cout << -"Additional options:\n" -" -fast Fast but slightly inaccurate alignment\n" -"\n" -" -dir1 Use a list of PDB chains listed by 'chain1_list' under\n" -" 'chain1_folder' as all chains for the first complex.\n" -" Note that the slash is necessary.\n" -" $ MMalign -dir1 chain1_folder/ chain1_list complex2\n" -"\n" -" -dir2 Use a list of PDB chains listed by'chain2_list'\n" -" under 'chain2_folder' as all chains for the second complex.\n" -" $ MMalign complex1 -dir2 chain2_folder/ chain2_list\n" -"\n" -" -suffix (Only when -dir1 and/or -dir2 are set, default is empty)\n" -" add file name suffix to files listed by chain1_list or chain2_list\n" -"\n" -" -atom 4-character atom name used to represent a residue.\n" -" Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n" -" (note the spaces before and after CA).\n" -"\n" -" -mol Types of molecules to align\n""Molecule type: RNA or protein\n" -" auto : (default) align both proteins and nucleic acids\n" -" protein: only align proteins\n" -" RNA : only align nucleic acids (RNA and DNA)\n" -"\n" -" -split Whether to split PDB file into multiple chains\n" -" 2: (default) treat each chain as a seperate chain (-ter should be <=1)\n" -" 1: treat each MODEL as a separate chain (-ter should be 0)\n" -" and joins all chains in a MODEL into a single chain.\n" -"\n" -" -outfmt Output format\n" -" 0: (default) full output\n" -" 1: fasta format compact output\n" -" 2: tabular format very compact output\n" -" -1: full output, but without version or citation information\n" -"\n" -" -TMcut -1: (default) do not consider TMcut\n" -" Values in [0.5,1): Do not proceed with TM-align for this\n" -" structure pair if TM-score is unlikely to reach TMcut.\n" -" TMcut is normalized is set by -a option:\n" -" -2: normalized by longer structure length\n" -" -1: normalized by shorter structure length\n" -" 0: (default, same as F) normalized by second structure\n" -" 1: same as T, normalized by average structure length\n" -"\n" -" -mirror Whether to align the mirror image of input structure\n" -" 0: (default) do not align mirrored structure\n" -" 1: align mirror of chain1 to origin chain2\n" -"\n" -" -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n" -" 0: (default) only align 'ATOM ' residues\n" -" 1: align both 'ATOM ' and 'HETATM' residues\n" -"\n" -" -infmt1 Input format for complex1\n" -" -infmt2 Input format for complex2\n" -" -1: (default) automatically detect PDB or PDBx/mmCIF format\n" -" 0: PDB format\n" -" 1: SPICKER format\n" -" 2: xyz format\n" -" 3: PDBx/mmCIF format\n" - < sequence; // get value from alignment file - double d0_scale =0; + string xname = ""; + string yname = ""; + string fname_super = ""; // file name for superposed structure + string fname_lign = ""; // file name for user alignment + string fname_matrix = ""; // file name for output matrix + vector sequence; // get value from alignment file + double d0_scale = 0; bool h_opt = false; // print full help message bool v_opt = false; // print version bool m_opt = false; // flag for -m, output rotation matrix bool o_opt = false; // flag for -o, output superposed structure - int a_opt = 0; // flag for -a, do not normalized by average length + int a_opt = 0; // flag for -a, do not normalized by average length bool d_opt = false; // flag for -d, user specified d0 - bool full_opt = false;// do not show chain level alignment - double TMcut =-1; - int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1 - int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2 - int ter_opt =1; // ENDMDL or END - int split_opt =2; // split by chain - int outfmt_opt=0; // set -outfmt to full output - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int mirror_opt=0; // do not align mirror - int het_opt =0; // do not read HETATM residues - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir1_opt =""; // set -dir1 to empty - string dir2_opt =""; // set -dir2 to empty + bool full_opt = false; // do not show chain level alignment + double TMcut = -1; + int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 + int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 + int ter_opt = 1; // ENDMDL or END + int split_opt = 2; // split by chain + int outfmt_opt = 0; // set -outfmt to full output + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int mirror_opt = 0; // do not align mirror + int het_opt = 0; // do not read HETATM residues + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir1_opt = ""; // set -dir1 to empty + string dir2_opt = ""; // set -dir2 to empty vector chain1_list; // only when -dir1 is set vector chain2_list; // only when -dir2 is set vector chain2parse1; @@ -175,266 +174,299 @@ int main(int argc, char *argv[]) vector model2parse1; vector model2parse2; - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( !strcmp(argv[i],"-o") && i < (argc-1) ) + if (!strcmp(argv[i], "-o") && i < (argc - 1)) { - fname_super = argv[i + 1]; o_opt = true; i++; + fname_super = argv[i + 1]; + o_opt = true; + i++; } - else if ( !strcmp(argv[i],"-a") && i < (argc-1) ) + else if (!strcmp(argv[i], "-a") && i < (argc - 1)) { - if (!strcmp(argv[i + 1], "T")) a_opt=true; - else if (!strcmp(argv[i + 1], "F")) a_opt=false; - else + if (!strcmp(argv[i + 1], "T")) + a_opt = true; + else if (!strcmp(argv[i + 1], "F")) + a_opt = false; + else { - a_opt=atoi(argv[i + 1]); - if (a_opt!=-2 && a_opt!=-1 && a_opt!=1) + a_opt = atoi(argv[i + 1]); + if (a_opt != -2 && a_opt != -1 && a_opt != 1) PrintErrorAndQuit("-a must be -2, -1, 1, T or F"); } i++; } - else if ( !strcmp(argv[i],"-full") && i < (argc-1) ) + else if (!strcmp(argv[i], "-full") && i < (argc - 1)) { - if (!strcmp(argv[i + 1], "T")) full_opt=true; - else if (!strcmp(argv[i + 1], "F")) full_opt=false; - else PrintErrorAndQuit("-full must be T or F"); + if (!strcmp(argv[i + 1], "T")) + full_opt = true; + else if (!strcmp(argv[i + 1], "F")) + full_opt = false; + else + PrintErrorAndQuit("-full must be T or F"); i++; } - else if ( !strcmp(argv[i],"-d") && i < (argc-1) ) + else if (!strcmp(argv[i], "-d") && i < (argc - 1)) { - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if ( !strcmp(argv[i],"-v") ) + else if (!strcmp(argv[i], "-v")) { v_opt = true; } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } - else if (!strcmp(argv[i], "-chain1") ) + else if (!strcmp(argv[i], "-chain1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain1"); - split(argv[i+1],chain2parse1,','); + split(argv[i + 1], chain2parse1, ','); i++; } - else if (!strcmp(argv[i], "-chain2") ) + else if (!strcmp(argv[i], "-chain2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain2"); - split(argv[i+1],chain2parse2,','); + split(argv[i + 1], chain2parse2, ','); i++; } - else if (!strcmp(argv[i], "-model1") ) + else if (!strcmp(argv[i], "-model1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model1"); - split(argv[i+1],model2parse1,','); + split(argv[i + 1], model2parse1, ','); i++; } - else if (!strcmp(argv[i], "-model2") ) + else if (!strcmp(argv[i], "-model2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model2"); - split(argv[i+1],model2parse2,','); + split(argv[i + 1], model2parse2, ','); i++; } - else if (!strcmp(argv[i], "-m") && i < (argc-1) ) + else if (!strcmp(argv[i], "-m") && i < (argc - 1)) { - fname_matrix = argv[i + 1]; m_opt = true; i++; - }// get filename for rotation matrix + fname_matrix = argv[i + 1]; + m_opt = true; + i++; + } // get filename for rotation matrix else if (!strcmp(argv[i], "-fast")) { fast_opt = true; } - else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt1") && i < (argc - 1)) { - infmt1_opt=atoi(argv[i + 1]); i++; + infmt1_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt2") && i < (argc - 1)) { - infmt2_opt=atoi(argv[i + 1]); i++; + infmt2_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-ter") && i < (argc-1) ) + else if (!strcmp(argv[i], "-ter") && i < (argc - 1)) { - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") && i < (argc-1) ) + else if (!strcmp(argv[i], "-split") && i < (argc - 1)) { - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") && i < (argc-1) ) + else if (!strcmp(argv[i], "-atom") && i < (argc - 1)) { - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mol") && i < (argc - 1)) { - mol_opt=argv[i + 1]; i++; + mol_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir1") && i < (argc - 1)) { - dir1_opt=argv[i + 1]; i++; + dir1_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir2") && i < (argc - 1)) { - dir2_opt=argv[i + 1]; i++; + dir2_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) ) + else if (!strcmp(argv[i], "-suffix") && i < (argc - 1)) { - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) ) + else if (!strcmp(argv[i], "-outfmt") && i < (argc - 1)) { - outfmt_opt=atoi(argv[i + 1]); i++; + outfmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) ) + else if (!strcmp(argv[i], "-TMcut") && i < (argc - 1)) { - TMcut=atof(argv[i + 1]); i++; + TMcut = atof(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") && i < (argc-1) ) + else if (!strcmp(argv[i], "-het") && i < (argc - 1)) { - het_opt=atoi(argv[i + 1]); i++; + het_opt = atoi(argv[i + 1]); + i++; } - else if (xname.size() == 0) xname=argv[i]; - else if (yname.size() == 0) yname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (xname.size() == 0) + xname = argv[i]; + else if (yname.size() == 0) + yname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if(yname.size()==0) + if (yname.size() == 0) { - if (h_opt) print_help(h_opt); + if (h_opt) + print_help(h_opt); if (v_opt) { print_version(); exit(EXIT_FAILURE); } - if (xname.size()==0) + if (xname.size() == 0) PrintErrorAndQuit("Please provide input structures"); PrintErrorAndQuit("Please provide the second input structure"); } - if (suffix_opt.size() && dir1_opt.size()+dir2_opt.size()==0) + if (suffix_opt.size() && dir1_opt.size() + dir2_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir1 or -dir2 is set"); if ((dir1_opt.size() || dir2_opt.size()) && (m_opt || o_opt)) PrintErrorAndQuit("-m or -o cannot be set with -dir1 or -dir2"); - if (atom_opt.size()!=4) + if (atom_opt.size() != 4) PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space."); - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein."); - else if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; + else if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (outfmt_opt>=2 && (a_opt || d_opt)) + if (outfmt_opt >= 2 && (a_opt || d_opt)) PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -d"); - if (ter_opt!=0 && ter_opt!=1) + if (ter_opt != 0 && ter_opt != 1) PrintErrorAndQuit("-ter should be 1 or 0"); - if (split_opt!=1 && split_opt!=2) + if (split_opt != 1 && split_opt != 2) PrintErrorAndQuit("-split should be 1 or 2"); - else if (split_opt==1 && ter_opt!=0) + else if (split_opt == 1 && ter_opt != 0) PrintErrorAndQuit("-split 1 should be used with -ter 0"); if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt PrintErrorAndQuit("ERROR! Please provide a file name for option -m!"); /* parse file list */ - if (dir1_opt.size()==0) chain1_list.push_back(xname); - else file2chainlist(chain1_list, xname, dir1_opt, suffix_opt); + if (dir1_opt.size() == 0) + chain1_list.push_back(xname); + else + file2chainlist(chain1_list, xname, dir1_opt, suffix_opt); - if (dir2_opt.size()==0) chain2_list.push_back(yname); - else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt); + if (dir2_opt.size() == 0) + chain2_list.push_back(yname); + else + file2chainlist(chain2_list, yname, dir2_opt, suffix_opt); - if (outfmt_opt==2) - cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t" - <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"< > > xa_vec; // structure of complex1 - vector > > ya_vec; // structure of complex2 - vector >seqx_vec; // sequence of complex1 - vector >seqy_vec; // sequence of complex2 - vector >secx_vec; // secondary structure of complex1 - vector >secy_vec; // secondary structure of complex2 - vector mol_vec1; // molecule type of complex1, RNA if >0 - vector mol_vec2; // molecule type of complex2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - vector xlen_vec; // length of complex1 - vector ylen_vec; // length of complex2 - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int xlen_aa,ylen_aa; // total length of protein - int xlen_na,ylen_na; // total length of RNA/DNA - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 + vector>> xa_vec; // structure of complex1 + vector>> ya_vec; // structure of complex2 + vector> seqx_vec; // sequence of complex1 + vector> seqy_vec; // sequence of complex2 + vector> secx_vec; // secondary structure of complex1 + vector> secy_vec; // secondary structure of complex2 + vector mol_vec1; // molecule type of complex1, RNA if >0 + vector mol_vec2; // molecule type of complex2, RNA if >0 + vector chainID_list1; // list of chainID1 + vector chainID_list2; // list of chainID2 + vector xlen_vec; // length of complex1 + vector ylen_vec; // length of complex2 + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int xlen_aa, ylen_aa; // total length of protein + int xlen_na, ylen_na; // total length of RNA/DNA + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 /* parse complex */ parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1, - xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, - atom_opt, false, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, - resi_vec1, chain2parse1, model2parse1); - if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 1"); + xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, + atom_opt, false, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, + resi_vec1, chain2parse1, model2parse1); + if (xa_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 1"); parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2, - ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, - atom_opt, false, 0, het_opt, ylen_aa, ylen_na, o_opt, - resi_vec2, chain2parse2, model2parse2); - if (ya_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 2"); - int len_aa=getmin(xlen_aa,ylen_aa); - int len_na=getmin(xlen_na,ylen_na); + ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, + atom_opt, false, 0, het_opt, ylen_aa, ylen_na, o_opt, + resi_vec2, chain2parse2, model2parse2); + if (ya_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 2"); + int len_aa = getmin(xlen_aa, ylen_aa); + int len_na = getmin(xlen_na, ylen_na); if (a_opt) { - len_aa=(xlen_aa+ylen_aa)/2; - len_na=(xlen_na+ylen_na)/2; + len_aa = (xlen_aa + ylen_aa) / 2; + len_na = (xlen_na + ylen_na) / 2; } - map chainmap; + map chainmap; /* perform monomer alignment if there is only one chain */ - if (xa_vec.size()==1 && ya_vec.size()==1) + if (xa_vec.size() == 1 && ya_vec.size() == 1) { xlen = xlen_vec[0]; ylen = ylen_vec[0]; - seqx = new char[xlen+1]; - seqy = new char[ylen+1]; - secx = new char[xlen+1]; - secy = new char[ylen+1]; + seqx = new char[xlen + 1]; + seqy = new char[ylen + 1]; + secx = new char[xlen + 1]; + secy = new char[ylen + 1]; NewArray(&xa, xlen, 3); NewArray(&ya, ylen, 3); - copy_chain_data(xa_vec[0],seqx_vec[0],secx_vec[0], xlen,xa,seqx,secx); - copy_chain_data(ya_vec[0],seqy_vec[0],secy_vec[0], ylen,ya,seqy,secy); - + copy_chain_data(xa_vec[0], seqx_vec[0], secx_vec[0], xlen, xa, seqx, secx); + copy_chain_data(ya_vec[0], seqy_vec[0], secy_vec[0], ylen, ya, seqy, secy); + /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; /* entry function for structure alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, 0, d0_scale, - 0, a_opt, false, d_opt, fast_opt, - mol_vec1[0]+mol_vec2[0],TMcut); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, 0, d0_scale, + 0, a_opt, false, d_opt, fast_opt, + mol_vec1[0] + mol_vec2[0], TMcut, 0); /* print result */ output_results( @@ -444,7 +476,7 @@ int main(int argc, char *argv[]) xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out, seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B, - 0, d0_scale, d0a, d0u, (m_opt?fname_matrix:"").c_str(), + 0, d0_scale, d0a, d0u, (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, true, split_opt, o_opt, fname_super, 0, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2); @@ -452,135 +484,141 @@ int main(int argc, char *argv[]) seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqx; - delete[]seqy; - delete[]secx; - delete[]secy; - DeleteArray(&xa,xlen); - DeleteArray(&ya,ylen); + delete[] seqx; + delete[] seqy; + delete[] secx; + delete[] secy; + DeleteArray(&xa, xlen); + DeleteArray(&ya, ylen); chain1_list.clear(); chain2_list.clear(); sequence.clear(); do_vec.clear(); - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - chainID_list1.clear(); // list of chainID1 - chainID_list2.clear(); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + chainID_list1.clear(); // list of chainID1 + chainID_list2.clear(); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } /* declare TM-score tables */ - int chain1_num=xa_vec.size(); - int chain2_num=ya_vec.size(); - vector tmp_str_vec(chain2_num,""); + int chain1_num = xa_vec.size(); + int chain2_num = ya_vec.size(); + vector tmp_str_vec(chain2_num, ""); double **TMave_mat; double **ut_mat; // rotation matrices for all-against-all alignment - int ui,uj,ut_idx; - NewArray(&TMave_mat,chain1_num,chain2_num); - NewArray(&ut_mat,chain1_num*chain2_num,4*3); - vector >seqxA_mat(chain1_num,tmp_str_vec); - vector > seqM_mat(chain1_num,tmp_str_vec); - vector >seqyA_mat(chain1_num,tmp_str_vec); + int ui, uj, ut_idx; + NewArray(&TMave_mat, chain1_num, chain2_num); + NewArray(&ut_mat, chain1_num * chain2_num, 4 * 3); + vector> seqxA_mat(chain1_num, tmp_str_vec); + vector> seqM_mat(chain1_num, tmp_str_vec); + vector> seqyA_mat(chain1_num, tmp_str_vec); - double maxTMmono=-1; - int maxTMmono_i,maxTMmono_j; + double maxTMmono = -1; + int maxTMmono_i, maxTMmono_j; /* get all-against-all alignment */ - if (len_aa+len_na>500) fast_opt=true; - for (i=0;i 500) + fast_opt = true; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (xlen<3) + xlen = xlen_vec[i]; + if (xlen < 3) { - for (j=0;j do_vec; - int Lnorm_tmp=len_aa; - if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na; + int Lnorm_tmp = len_aa; + if (mol_vec1[i] + mol_vec2[j] > 0) + Lnorm_tmp = len_na; /* entry function for structure alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_tmp, d0_scale, - 0, false, true, false, fast_opt, - mol_vec1[i]+mol_vec2[j],TMcut); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_tmp, d0_scale, + 0, false, true, false, fast_opt, + mol_vec1[i] + mol_vec2[j], TMcut, 0); /* store result */ - for (ui=0;ui<3;ui++) - for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=u0[ui][uj]; - for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=t0[uj]; - seqxA_mat[i][j]=seqxA; - seqyA_mat[i][j]=seqyA; - TMave_mat[i][j]=TM4*Lnorm_tmp; - if (TMave_mat[i][j]>maxTMmono) + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = u0[ui][uj]; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = t0[uj]; + seqxA_mat[i][j] = seqxA; + seqyA_mat[i][j] = seqyA; + TMave_mat[i][j] = TM4 * Lnorm_tmp; + if (TMave_mat[i][j] > maxTMmono) { - maxTMmono=TMave_mat[i][j]; - maxTMmono_i=i; - maxTMmono_j=j; + maxTMmono = TMave_mat[i][j]; + maxTMmono_i = i; + maxTMmono_j = j; } /* clean up */ @@ -588,133 +626,136 @@ int main(int argc, char *argv[]) seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); do_vec.clear(); } - delete[]seqx; - delete[]secx; - DeleteArray(&xa,xlen); + delete[] seqx; + delete[] secx; + DeleteArray(&xa, xlen); } /* calculate initial chain-chain assignment */ int *assign1_list; // value is index of assigned chain2 int *assign2_list; // value is index of assigned chain1 - assign1_list=new int[chain1_num]; - assign2_list=new int[chain2_num]; - double total_score=enhanced_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num); - if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain"); + assign1_list = new int[chain1_num]; + assign2_list = new int[chain2_num]; + double total_score = enhanced_greedy_search(TMave_mat, assign1_list, + assign2_list, chain1_num, chain2_num); + if (total_score <= 0) + PrintErrorAndQuit("ERROR! No assignable chain"); /* refine alignment for large oligomers */ - int aln_chain_num=count_assign_pair(assign1_list,chain1_num); - bool is_oligomer=(aln_chain_num>=3); - if (aln_chain_num==2) // dimer alignment + int aln_chain_num = count_assign_pair(assign1_list, chain1_num); + bool is_oligomer = (aln_chain_num >= 3); + if (aln_chain_num == 2) // dimer alignment { - int na_chain_num1,na_chain_num2,aa_chain_num1,aa_chain_num2; - count_na_aa_chain_num(na_chain_num1,aa_chain_num1,mol_vec1); - count_na_aa_chain_num(na_chain_num2,aa_chain_num2,mol_vec2); + int na_chain_num1, na_chain_num2, aa_chain_num1, aa_chain_num2; + count_na_aa_chain_num(na_chain_num1, aa_chain_num1, mol_vec1); + count_na_aa_chain_num(na_chain_num2, aa_chain_num2, mol_vec2); /* align protein-RNA hybrid dimer to another hybrid dimer */ - if (na_chain_num1==1 && na_chain_num2==1 && - aa_chain_num1==1 && aa_chain_num2==1) is_oligomer=false; + if (na_chain_num1 == 1 && na_chain_num2 == 1 && + aa_chain_num1 == 1 && aa_chain_num2 == 1) + is_oligomer = false; /* align pure protein dimer or pure RNA dimer */ - else if ((getmin(na_chain_num1,na_chain_num2)==0 && - aa_chain_num1==2 && aa_chain_num2==2) || - (getmin(aa_chain_num1,aa_chain_num2)==0 && - na_chain_num1==2 && na_chain_num2==2)) + else if ((getmin(na_chain_num1, na_chain_num2) == 0 && + aa_chain_num1 == 2 && aa_chain_num2 == 2) || + (getmin(aa_chain_num1, aa_chain_num2) == 0 && + na_chain_num1 == 2 && na_chain_num2 == 2)) { - adjust_dimer_assignment(xa_vec,ya_vec,xlen_vec,ylen_vec,mol_vec1, - mol_vec2,assign1_list,assign2_list,seqxA_mat,seqyA_mat); - is_oligomer=false; // cannot refiner further + adjust_dimer_assignment(xa_vec, ya_vec, xlen_vec, ylen_vec, mol_vec1, + mol_vec2, assign1_list, assign2_list, seqxA_mat, seqyA_mat); + is_oligomer = false; // cannot refiner further } - else is_oligomer=true; /* align oligomers to dimer */ + else + is_oligomer = true; /* align oligomers to dimer */ } - if (aln_chain_num>=3 || is_oligomer) // oligomer alignment + if (aln_chain_num >= 3 || is_oligomer) // oligomer alignment { /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); - + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); + /* clean up */ DeleteArray(&xcentroids, chain1_num); DeleteArray(&ycentroids, chain2_num); } /* store initial assignment */ - int init_pair_num=count_assign_pair(assign1_list,chain1_num); + int init_pair_num = count_assign_pair(assign1_list, chain1_num); int *assign1_init, *assign2_init; - assign1_init=new int[chain1_num]; - assign2_init=new int[chain2_num]; + assign1_init = new int[chain1_num]; + assign2_init = new int[chain2_num]; double **TMave_init; - NewArray(&TMave_init,chain1_num,chain2_num); - vector >seqxA_init(chain1_num,tmp_str_vec); - vector >seqyA_init(chain1_num,tmp_str_vec); + NewArray(&TMave_init, chain1_num, chain2_num); + vector> seqxA_init(chain1_num, tmp_str_vec); + vector> seqyA_init(chain1_num, tmp_str_vec); vector sequence_init; copy_chain_assign_data(chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); /* perform iterative alignment */ - double max_total_score=0; // ignore old total_score because previous - // score was from monomeric chain superpositions - int max_iter=5-(int)((len_aa+len_na)/200); - if (max_iter<2) max_iter=2; + double max_total_score = 0; // ignore old total_score because previous + // score was from monomeric chain superpositions + int max_iter = 5 - (int)((len_aa + len_na) / 200); + if (max_iter < 2) + max_iter = 2; MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec, - seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, - ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, - chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, - sequence, d0_scale, fast_opt, chainmap); + seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, + ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, + chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, + sequence, d0_scale, fast_opt, chainmap); - if (aln_chain_num>=4 && is_oligomer && chainmap.size()==0) // oligomer alignment + if (aln_chain_num >= 4 && is_oligomer && chainmap.size() == 0) // oligomer alignment { MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, 1, 0, 5, ter_opt, split_opt, - 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); - + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, 1, 0, 5, ter_opt, split_opt, + 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); /* clean up */ DeleteArray(&xcentroids, chain1_num); @@ -722,116 +763,122 @@ int main(int argc, char *argv[]) } /* sometime MMalign_iter is even worse than monomer alignment */ - if (max_total_score=init_pair_num) copy_chain_assign_data( - chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); - double max_total_score_cross=max_total_score; - - //if (init_pair_num!=2 && is_oligomer==false) MMalign_cross( - //max_total_score_cross, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec, - //secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - //xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, - //TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, sequence_init, - //d0_scale, true); - //else - if (len_aa+len_na<10000) + int iter_pair_num = count_assign_pair(assign1_list, chain1_num); + if (iter_pair_num >= init_pair_num) + copy_chain_assign_data( + chain1_num, chain2_num, sequence_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + double max_total_score_cross = max_total_score; + + // if (init_pair_num!=2 && is_oligomer==false) MMalign_cross( + // max_total_score_cross, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec, + // secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + // xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, + // TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, sequence_init, + // d0_scale, true); + // else + if (len_aa + len_na < 10000) { MMalign_dimer(max_total_score_cross, xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, - TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, - sequence_init, d0_scale, fast_opt); - if (max_total_score_cross>max_total_score) + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, + TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, + sequence_init, d0_scale, fast_opt); + if (max_total_score_cross > max_total_score) { - max_total_score=max_total_score_cross; + max_total_score = max_total_score_cross; copy_chain_assign_data(chain1_num, chain2_num, sequence, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); } - } + } /* final alignment */ - if (outfmt_opt==0) print_version(); + if (outfmt_opt == 0) + print_version(); MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, - a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, + a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); /* clean up everything */ - delete [] assign1_list; - delete [] assign2_list; - DeleteArray(&TMave_mat,chain1_num); - DeleteArray(&ut_mat, chain1_num*chain2_num); - vector >().swap(seqxA_mat); - vector >().swap(seqM_mat); - vector >().swap(seqyA_mat); + delete[] assign1_list; + delete[] assign2_list; + DeleteArray(&TMave_mat, chain1_num); + DeleteArray(&ut_mat, chain1_num * chain2_num); + vector>().swap(seqxA_mat); + vector>().swap(seqM_mat); + vector>().swap(seqyA_mat); vector().swap(tmp_str_vec); - delete [] assign1_init; - delete [] assign2_init; - DeleteArray(&TMave_init,chain1_num); - vector >().swap(seqxA_init); - vector >().swap(seqyA_init); - - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - vector().swap(chainID_list1); // list of chainID1 - vector().swap(chainID_list2); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + delete[] assign1_init; + delete[] assign2_init; + DeleteArray(&TMave_init, chain1_num); + vector>().swap(seqxA_init); + vector>().swap(seqyA_init); + + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + vector().swap(chainID_list1); // list of chainID1 + vector().swap(chainID_list2); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 vector().swap(chain1_list); vector().swap(chain2_list); vector().swap(sequence); - vector().swap(resi_vec1); // residue index for chain1 - vector().swap(resi_vec2); // residue index for chain2 + vector().swap(resi_vec1); // residue index for chain1 + vector().swap(resi_vec2); // residue index for chain2 vector().swap(chain2parse1); vector().swap(chain2parse2); vector().swap(model2parse1); vector().swap(model2parse2); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } diff --git a/MMalign.h b/MMalign.h index 231cfa3..2ea5059 100644 --- a/MMalign.h +++ b/MMalign.h @@ -1279,7 +1279,7 @@ double MMalign_search( d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, false, true, false, fast_opt, mol_type, -1); + i_opt, false, true, false, fast_opt, mol_type, -1, 0); /* clean up */ delete [] seqx; @@ -1460,7 +1460,7 @@ void MMalign_final( d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, - 3, a_opt, false, d_opt, fast_opt, mol_type, -1); + 3, a_opt, false, d_opt, fast_opt, mol_type, -1, 0); /* prepare full complex alignment */ string chainID1=""; diff --git a/TMalign.cpp b/TMalign.cpp index e2539d0..3dba166 100644 --- a/TMalign.cpp +++ b/TMalign.cpp @@ -6,161 +6,159 @@ using namespace std; void print_version() { - cout << -"\n" -" **********************************************************************\n" -" * TM-align (Version 20240303): protein and RNA structure alignment *\n" -" * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n" -" * S Gong, C Zhang, Y Zhang. Bioinformatics, bz282 (2019) *\n" -" * Please email comments and suggestions to yangzhanglab@umich.edu *\n" -" **********************************************************************" - << endl; + cout << "\n" + " **********************************************************************\n" + " * TM-align (Version 20240303): protein and RNA structure alignment *\n" + " * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n" + " * S Gong, C Zhang, Y Zhang. Bioinformatics, bz282 (2019) *\n" + " * Please email comments and suggestions to yangzhanglab@umich.edu *\n" + " **********************************************************************" + << endl; } void print_extra_help() { - cout << -"Additional options:\n" -" -fast Fast but slightly inaccurate alignment\n" -"\n" -" -dir Perform all-against-all alignment among the list of PDB\n" -" chains listed by 'chain_list' under 'chain_folder'. Note\n" -" that the slash is necessary.\n" -" $ TMalign -dir chain_folder/ chain_list\n" -"\n" -" -dir1 Use chain2 to search a list of PDB chains listed by 'chain1_list'\n" -" under 'chain1_folder'. Note that the slash is necessary.\n" -" $ TMalign -dir1 chain1_folder/ chain1_list chain2\n" -"\n" -" -dir2 Use chain1 to search a list of PDB chains listed by 'chain2_list'\n" -" under 'chain2_folder'\n" -" $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n" -"\n" -" -pair (Only when -dir1 and -dir2 are set, default is no) whether to\n" -" perform pair alignment rather than all-against-all alignment\n" -"\n" -" -suffix (Only when -dir1 and/or -dir2 are set, default is empty)\n" -" add file name suffix to files listed by chain1_list or chain2_list\n" -"\n" -" -atom 4-character atom name used to represent a residue.\n" -" Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n" -" (note the spaces before and after CA).\n" -"\n" -" -mol Molecule type: RNA or protein\n" -" Default is detect molecule type automatically\n" -"\n" -" -ter Strings to mark the end of a chain\n" -" 3: (default) TER, ENDMDL, END or different chain ID\n" -" 2: ENDMDL, END, or different chain ID\n" -" 1: ENDMDL or END\n" -" 0: (default in the first C++ TMalign) end of file\n" -"\n" -" -split Whether to split PDB file into multiple chains\n" -" 0: (default) treat the whole structure as one single chain\n" -" 1: treat each MODEL as a separate chain (-ter should be 0)\n" -" 2: treat each chain as a seperate chain (-ter should be <=1)\n" -"\n" -" -outfmt Output format\n" -" 0: (default) full output\n" -" 1: fasta format compact output\n" -" 2: tabular format very compact output\n" -" -1: full output, but without version or citation information\n" -"\n" -" -byresi Whether to assume residue index correspondence between the\n" -" two structures. The same as -TMscore.\n" -" 0: (default) sequence independent alignment\n" -" 1: (same as TMscore program) sequence-dependent superposition,\n" -" i.e. align by residue index\n" -" 2: (same as TMscore -c, should be used with -ter <=1)\n" -" align by residue index and chain ID\n" -" 3: (similar to TMscore -c, should be used with -ter <=1)\n" -" align by residue index and order of chain\n" -//" 4: sequence dependent alignment: perform Needleman-Wunsch\n" -//" global sequence alignment, followed by TM-score superposition\n" -" 5: sequence dependent alignment: perform glocal sequence\n" -" alignment followed by TM-score superposition.\n" -" -byresi 5 is thee same as -seq\n" -"\n" -" -TMcut -1: (default) do not consider TMcut\n" -" Values in [0.5,1): Do not proceed with TM-align for this\n" -" structure pair if TM-score is unlikely to reach TMcut.\n" -" TMcut is normalized is set by -a option:\n" -" -2: normalized by longer structure length\n" -" -1: normalized by shorter structure length\n" -" 0: (default, same as F) normalized by second structure\n" -" 1: same as T, normalized by average structure length\n" -"\n" -" -cp ALignment with circular permutation\n" -"\n" -" -mirror Whether to align the mirror image of input structure\n" -" 0: (default) do not align mirrored structure\n" -" 1: align mirror of chain1 to origin chain2\n" -"\n" -" -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n" -" 0: (default) only align 'ATOM ' residues\n" -" 1: align both 'ATOM ' and 'HETATM' residues\n" -"\n" -" -infmt1 Input format for chain1\n" -" -infmt2 Input format for chain2\n" -" -1: (default) automatically detect PDB or PDBx/mmCIF format\n" -" 0: PDB format\n" -" 1: SPICKER format\n" -" 2: xyz format\n" -" 3: PDBx/mmCIF format\n" - <= minimum length of the two structures\n" -" otherwise, TM-score may be >1\n" -"\n" -" -a TM-score normalized by the average length of two structures\n" -" T or F, (default F)\n" -"\n" -" -i Start with an alignment specified in fasta file 'align.txt'\n" -"\n" -" -I Stick to the alignment specified in 'align.txt'\n" -"\n" -" -m Output TM-align rotation matrix\n" -"\n" -" -d TM-score scaled by an assigned d0, e.g. 5 Angstroms\n" -"\n" -" -o Output the superposition of PDB1.pdb to TM_sup.pdb\n" -" $ TMalign PDB1.pdb PDB2.pdb -o TM_sup.pdb\n" -" To view superposed full-atom structures:\n" -" $ pymol TM_sup.pdb PDB2.pdb\n" -"\n" -" -v Print the version of TM-align\n" -"\n" -" -h Print the full help message, including options not available\n" -" in standard TM-align program\n" -"\n" -" (Options -u, -a, -d, -o won't change the final structure alignment)\n\n" -"Example usages:\n" -" TMalign PDB1.pdb PDB2.pdb\n" -" TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n" -" TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n" -" TMalign PDB1.pdb PDB2.pdb -i align.txt\n" -" TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n" - <= minimum length of the two structures\n" + " otherwise, TM-score may be >1\n" + "\n" + " -a TM-score normalized by the average length of two structures\n" + " T or F, (default F)\n" + "\n" + " -i Start with an alignment specified in fasta file 'align.txt'\n" + "\n" + " -I Stick to the alignment specified in 'align.txt'\n" + "\n" + " -m Output TM-align rotation matrix\n" + "\n" + " -d TM-score scaled by an assigned d0, e.g. 5 Angstroms\n" + "\n" + " -o Output the superposition of PDB1.pdb to TM_sup.pdb\n" + " $ TMalign PDB1.pdb PDB2.pdb -o TM_sup.pdb\n" + " To view superposed full-atom structures:\n" + " $ pymol TM_sup.pdb PDB2.pdb\n" + "\n" + " -v Print the version of TM-align\n" + "\n" + " -h Print the full help message, including options not available\n" + " in standard TM-align program\n" + "\n" + " (Options -u, -a, -d, -o won't change the final structure alignment)\n\n" + "Example usages:\n" + " TMalign PDB1.pdb PDB2.pdb\n" + " TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n" + " TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n" + " TMalign PDB1.pdb PDB2.pdb -i align.txt\n" + " TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n" + << endl; + + if (h_opt) + print_extra_help(); exit(EXIT_SUCCESS); } int main(int argc, char *argv[]) { - if (argc < 2) print_help(); - + if (argc < 2) + print_help(); clock_t t1, t2; t1 = clock(); @@ -168,41 +166,41 @@ int main(int argc, char *argv[]) /**********************/ /* get argument */ /**********************/ - string xname = ""; - string yname = ""; - string fname_super = ""; // file name for superposed structure - string fname_lign = ""; // file name for user alignment - string fname_matrix= ""; // file name for output matrix - vector sequence; // get value from alignment file + string xname = ""; + string yname = ""; + string fname_super = ""; // file name for superposed structure + string fname_lign = ""; // file name for user alignment + string fname_matrix = ""; // file name for output matrix + vector sequence; // get value from alignment file double Lnorm_ass, d0_scale; bool h_opt = false; // print full help message bool v_opt = false; // print version bool m_opt = false; // flag for -m, output rotation matrix - int i_opt = 0; // 1 for -i, 3 for -I + int i_opt = 0; // 1 for -i, 3 for -I bool o_opt = false; // flag for -o, output superposed structure - int a_opt = 0; // flag for -a, do not normalized by average length + int a_opt = 0; // flag for -a, do not normalized by average length bool u_opt = false; // flag for -u, normalized by user specified length bool d_opt = false; // flag for -d, user specified d0 - double TMcut =-1; - int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1 - int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2 - int ter_opt =3; // TER, END, or different chainID - int split_opt =0; // do not split chain - int outfmt_opt=0; // set -outfmt to full output - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int cp_opt =0; // do not check circular permutation - int mirror_opt=0; // do not align mirror - int het_opt =0; // do not read HETATM residues - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir_opt =""; // set -dir to empty - string dir1_opt =""; // set -dir1 to empty - string dir2_opt =""; // set -dir2 to empty - bool pair_opt=false; // pair alignment - int byresi_opt=0; // set -byresi to 0 + double TMcut = -1; + int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 + int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 + int ter_opt = 3; // TER, END, or different chainID + int split_opt = 0; // do not split chain + int outfmt_opt = 0; // set -outfmt to full output + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int cp_opt = 0; // do not check circular permutation + int mirror_opt = 0; // do not align mirror + int het_opt = 0; // do not read HETATM residues + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir_opt = ""; // set -dir to empty + string dir1_opt = ""; // set -dir1 to empty + string dir2_opt = ""; // set -dir2 to empty + bool pair_opt = false; // pair alignment + int byresi_opt = 0; // set -byresi to 0 vector chain1_list; // only when -dir1 is set vector chain2_list; // only when -dir2 is set vector chain2parse1; @@ -210,185 +208,220 @@ int main(int argc, char *argv[]) vector model2parse1; vector model2parse2; - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( !strcmp(argv[i],"-o") && i < (argc-1) ) + if (!strcmp(argv[i], "-o") && i < (argc - 1)) { - fname_super = argv[i + 1]; o_opt = true; i++; + fname_super = argv[i + 1]; + o_opt = true; + i++; } - else if ( (!strcmp(argv[i],"-u") || - !strcmp(argv[i],"-L")) && i < (argc-1) ) + else if ((!strcmp(argv[i], "-u") || + !strcmp(argv[i], "-L")) && + i < (argc - 1)) { - Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++; + Lnorm_ass = atof(argv[i + 1]); + u_opt = true; + i++; } - else if ( !strcmp(argv[i],"-a") && i < (argc-1) ) + else if (!strcmp(argv[i], "-a") && i < (argc - 1)) { - if (!strcmp(argv[i + 1], "T")) a_opt=true; - else if (!strcmp(argv[i + 1], "F")) a_opt=false; - else + if (!strcmp(argv[i + 1], "T")) + a_opt = true; + else if (!strcmp(argv[i + 1], "F")) + a_opt = false; + else { - a_opt=atoi(argv[i + 1]); - if (a_opt!=-2 && a_opt!=-1 && a_opt!=1) + a_opt = atoi(argv[i + 1]); + if (a_opt != -2 && a_opt != -1 && a_opt != 1) PrintErrorAndQuit("-a must be -2, -1, 1, T or F"); } i++; } - else if ( !strcmp(argv[i],"-d") && i < (argc-1) ) + else if (!strcmp(argv[i], "-d") && i < (argc - 1)) { - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if ( !strcmp(argv[i],"-v") ) + else if (!strcmp(argv[i], "-v")) { v_opt = true; } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } - else if ( !strcmp(argv[i],"-i") && i < (argc-1) ) + else if (!strcmp(argv[i], "-i") && i < (argc - 1)) { - if (i_opt==3) + if (i_opt == 3) PrintErrorAndQuit("ERROR! -i and -I cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 1; i++; + fname_lign = argv[i + 1]; + i_opt = 1; + i++; } - else if (!strcmp(argv[i], "-I") && i < (argc-1) ) + else if (!strcmp(argv[i], "-I") && i < (argc - 1)) { - if (i_opt==1) + if (i_opt == 1) PrintErrorAndQuit("ERROR! -I and -i cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 3; i++; + fname_lign = argv[i + 1]; + i_opt = 3; + i++; } - else if (!strcmp(argv[i], "-chain1") ) + else if (!strcmp(argv[i], "-chain1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain1"); - split(argv[i+1],chain2parse1,','); + split(argv[i + 1], chain2parse1, ','); i++; } - else if (!strcmp(argv[i], "-chain2") ) + else if (!strcmp(argv[i], "-chain2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain2"); - split(argv[i+1],chain2parse2,','); + split(argv[i + 1], chain2parse2, ','); i++; } - else if (!strcmp(argv[i], "-model1") ) + else if (!strcmp(argv[i], "-model1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model1"); - split(argv[i+1],model2parse1,','); + split(argv[i + 1], model2parse1, ','); i++; } - else if (!strcmp(argv[i], "-model2") ) + else if (!strcmp(argv[i], "-model2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model2"); - split(argv[i+1],model2parse2,','); + split(argv[i + 1], model2parse2, ','); i++; } - else if (!strcmp(argv[i], "-m") && i < (argc-1) ) + else if (!strcmp(argv[i], "-m") && i < (argc - 1)) { - fname_matrix = argv[i + 1]; m_opt = true; i++; - }// get filename for rotation matrix + fname_matrix = argv[i + 1]; + m_opt = true; + i++; + } // get filename for rotation matrix else if (!strcmp(argv[i], "-fast")) { fast_opt = true; } - else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt1") && i < (argc - 1)) { - infmt1_opt=atoi(argv[i + 1]); i++; + infmt1_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt2") && i < (argc - 1)) { - infmt2_opt=atoi(argv[i + 1]); i++; + infmt2_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-ter") && i < (argc-1) ) + else if (!strcmp(argv[i], "-ter") && i < (argc - 1)) { - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") && i < (argc-1) ) + else if (!strcmp(argv[i], "-split") && i < (argc - 1)) { - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") && i < (argc-1) ) + else if (!strcmp(argv[i], "-atom") && i < (argc - 1)) { - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mol") && i < (argc - 1)) { - mol_opt=argv[i + 1]; i++; + mol_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir") && i < (argc - 1)) { - dir_opt=argv[i + 1]; i++; + dir_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir1") && i < (argc - 1)) { - dir1_opt=argv[i + 1]; i++; + dir1_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir2") && i < (argc - 1)) { - dir2_opt=argv[i + 1]; i++; + dir2_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-pair") ) + else if (!strcmp(argv[i], "-pair")) { - pair_opt=true; + pair_opt = true; } - else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) ) + else if (!strcmp(argv[i], "-suffix") && i < (argc - 1)) { - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) ) + else if (!strcmp(argv[i], "-outfmt") && i < (argc - 1)) { - outfmt_opt=atoi(argv[i + 1]); i++; + outfmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) ) + else if (!strcmp(argv[i], "-TMcut") && i < (argc - 1)) { - TMcut=atof(argv[i + 1]); i++; + TMcut = atof(argv[i + 1]); + i++; } - else if ((!strcmp(argv[i],"-byresi") || !strcmp(argv[i],"-tmscore") || - !strcmp(argv[i],"-TMscore")) && i < (argc-1) ) + else if ((!strcmp(argv[i], "-byresi") || !strcmp(argv[i], "-tmscore") || + !strcmp(argv[i], "-TMscore")) && + i < (argc - 1)) { - byresi_opt=atoi(argv[i + 1]); i++; + byresi_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-seq") ) + else if (!strcmp(argv[i], "-seq")) { - byresi_opt=5; + byresi_opt = 5; } - else if ( !strcmp(argv[i],"-cp") ) + else if (!strcmp(argv[i], "-cp")) { - cp_opt=1; + cp_opt = 1; } - else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mirror") && i < (argc - 1)) { - mirror_opt=atoi(argv[i + 1]); i++; + mirror_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") && i < (argc-1) ) + else if (!strcmp(argv[i], "-het") && i < (argc - 1)) { - het_opt=atoi(argv[i + 1]); i++; + het_opt = atoi(argv[i + 1]); + i++; } - else if (xname.size() == 0) xname=argv[i]; - else if (yname.size() == 0) yname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (xname.size() == 0) + xname = argv[i]; + else if (yname.size() == 0) + yname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if(xname.size()==0 || (yname.size()==0 && dir_opt.size()==0) || - (yname.size() && dir_opt.size())) + if (xname.size() == 0 || (yname.size() == 0 && dir_opt.size() == 0) || + (yname.size() && dir_opt.size())) { - if (h_opt) print_help(h_opt); + if (h_opt) + print_help(h_opt); if (v_opt) { print_version(); exit(EXIT_FAILURE); } - if (xname.size()==0) + if (xname.size() == 0) PrintErrorAndQuit("Please provide input structures"); - else if (yname.size()==0 && dir_opt.size()==0) + else if (yname.size() == 0 && dir_opt.size() == 0) PrintErrorAndQuit("Please provide structure B"); else if (yname.size() && dir_opt.size()) PrintErrorAndQuit("Please provide only one file name if -dir is set"); } - if (suffix_opt.size() && dir_opt.size()+dir1_opt.size()+dir2_opt.size()==0) + if (suffix_opt.size() && dir_opt.size() + dir1_opt.size() + dir2_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set"); if ((dir_opt.size() || dir1_opt.size() || dir2_opt.size())) { @@ -397,239 +430,260 @@ int main(int argc, char *argv[]) else if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size())) PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2"); } - if (atom_opt.size()!=4) + if (atom_opt.size() != 4) PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space."); - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein."); - else if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; + else if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; - if (u_opt && Lnorm_ass<=0) + if (u_opt && Lnorm_ass <= 0) PrintErrorAndQuit("Wrong value for option -u! It should be >0"); - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (outfmt_opt>=2 && (a_opt || u_opt || d_opt)) + if (outfmt_opt >= 2 && (a_opt || u_opt || d_opt)) PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d"); - if (byresi_opt!=0) + if (byresi_opt != 0) { if (i_opt) PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I"); - if (byresi_opt<0 || byresi_opt>5) + if (byresi_opt < 0 || byresi_opt > 5) PrintErrorAndQuit("-byresi can only be 0, 1, 2, 3, 4, or 5"); - if (byresi_opt>=2 && byresi_opt<=3 && ter_opt>=2) + if (byresi_opt >= 2 && byresi_opt <= 3 && ter_opt >= 2) PrintErrorAndQuit("-byresi 2 and -byresi 3 should be used with -ter <=1"); } - if (split_opt==1 && ter_opt!=0) + if (split_opt == 1 && ter_opt != 0) PrintErrorAndQuit("-split 1 should be used with -ter 0"); - else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) + else if (split_opt == 2 && ter_opt != 0 && ter_opt != 1) PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); - if (split_opt<0 || split_opt>2) + if (split_opt < 0 || split_opt > 2) PrintErrorAndQuit("-split can only be 0, 1 or 2"); - if (cp_opt!=0 && cp_opt!=1) + if (cp_opt != 0 && cp_opt != 1) PrintErrorAndQuit("-cp can only be 0 or 1"); if (cp_opt && i_opt) PrintErrorAndQuit("-cp cannot be used with -i or -I"); /* read initial alignment file from 'align.txt' */ - if (i_opt) read_user_alignment(sequence, fname_lign, i_opt); + if (i_opt) + read_user_alignment(sequence, fname_lign, i_opt); - if (byresi_opt) i_opt=3; + if (byresi_opt) + i_opt = 3; if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt PrintErrorAndQuit("ERROR! Please provide a file name for option -m!"); /* parse file list */ - if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname); - else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt); + if (dir1_opt.size() + dir_opt.size() == 0) + chain1_list.push_back(xname); + else + file2chainlist(chain1_list, xname, dir_opt + dir1_opt, suffix_opt); if (dir_opt.size()) - for (int i=0;i >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 vector mol_vec1; // molecule type of chain1, RNA if >0 vector mol_vec2; // molecule type of chain2, RNA if >0 vector chainID_list1; // list of chainID1 vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=byresi_opt; // whether to read residue index - if (byresi_opt==0 && o_opt) read_resi=2; + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = byresi_opt; // whether to read residue index + if (byresi_opt == 0 && o_opt) + read_resi = 2; /* loop over file names */ - for (i=0;i0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment - - for (j=(dir_opt.size()>0)*(i+1);j 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment + + for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) { - if (pair_opt && j!=i) continue; + if (pair_opt && j != i) + continue; /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, false, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, false, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) { - cerr<<"Warning! Cannot parse file: "<0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; /* entry function for structure alignment */ - if (cp_opt) CPalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); - else TMalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); + if (cp_opt) + CPalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], TMcut); + else + TMalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], TMcut, 0); /* print result */ - if (outfmt_opt==0) print_version(); + if (outfmt_opt == 0) + print_version(); output_results( - xname.substr(dir1_opt.size()+dir_opt.size()), - yname.substr(dir2_opt.size()+dir_opt.size()), + xname.substr(dir1_opt.size() + dir_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size()), chainID_list1[chain_i].c_str(), chainID_list2[chain_j].c_str(), - xlen, ylen, t0, u0, TM1, TM2, + xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out, seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B, - Lnorm_ass, d0_scale, d0a, d0u, - (m_opt?fname_matrix:"").c_str(), + Lnorm_ass, d0_scale, d0a, d0u, + (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, 0, split_opt, o_opt, - (o_opt?fname_super:"").c_str(), - i_opt, a_opt, u_opt, d_opt,mirror_opt, - resi_vec1, resi_vec2 ); + (o_opt ? fname_super : "").c_str(), + i_opt, a_opt, u_opt, d_opt, mirror_opt, + resi_vec1, resi_vec2); /* Done! Free memory */ seqM.clear(); seqxA.clear(); seqyA.clear(); DeleteArray(&ya, ylen); - delete [] seqy; - delete [] secy; + delete[] seqy; + delete[] secy; resi_vec2.clear(); do_vec.clear(); } // chain_j - if (chain2_list.size()>1) + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j().swap(model2parse2); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } diff --git a/TMalign.h b/TMalign.h index 383eaf5..e8009a0 100644 --- a/TMalign.h +++ b/TMalign.h @@ -10,88 +10,93 @@ // 1, collect those residues with dis3) + // there are not enough feasible pairs, relieve the threshold + if (n_cut < 3 && n_ali > 3) { inc++; - double dinc=(d+inc*0.5); + double dinc = (d + inc * 0.5); d_tmp = dinc * dinc; } - else break; - } + else + break; + } - *score1=score_sum/Lnorm; + *score1 = score_sum / Lnorm; return n_cut; } int score_fun8_standard(double **xa, double **ya, int n_ali, double d, - int i_ali[], double *score1, int score_sum_method, - double score_d8, double d0) + int i_ali[], double *score1, int score_sum_method, + double score_d8, double d0) { double score_sum = 0, di; - double d_tmp = d*d; - double d02 = d0*d0; - double score_d8_cut = score_d8*score_d8; + double d_tmp = d * d; + double d02 = d0 * d0; + double score_d8_cut = score_d8 * score_d8; int i, n_cut, inc = 0; while (1) { n_cut = 0; score_sum = 0; - for (i = 0; i3) + // there are not enough feasible pairs, relieve the threshold + if (n_cut < 3 && n_ali > 3) { inc++; - double dinc = (d + inc*0.5); + double dinc = (d + inc * 0.5); d_tmp = dinc * dinc; } - else break; + else + break; } *score1 = score_sum / n_ali; @@ -99,164 +104,167 @@ int score_fun8_standard(double **xa, double **ya, int n_ali, double d, } double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm, - double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step, - int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm, - double score_d8, double d0) + double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step, + int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm, + double score_d8, double d0) { int i, m; - double score_max, score, rmsd; - const int kmax=Lali; + double score_max, score, rmsd; + const int kmax = Lali; int k_ali[kmax], ka, k; double t[3]; double u[3][3]; double d; - - //iterative parameters - int n_it=20; //maximum number of iterations - int n_init_max=6; //maximum number of different fragment length - int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4 - int L_ini_min=4; - if(Laliscore_max) - { - score_max=score; - - //save the rotation matrix - for(k=0; k<3; k++) + n_cut = score_fun8(xt, ytm, Lali, d, i_ali, &score, + score_sum_method, Lnorm, score_d8, d0); + if (score > score_max) + { + score_max = score; + + // save the rotation matrix + for (k = 0; k < 3; k++) { - t0[k]=t[k]; - u0[k][0]=u[k][0]; - u0[k][1]=u[k][1]; - u0[k][2]=u[k][2]; + t0[k] = t[k]; + u0[k][0] = u[k][0]; + u0[k][1] = u[k][1]; + u0[k][2] = u[k][2]; } } - - //try to extend the alignment iteratively + + // try to extend the alignment iteratively d = local_d0_search + 1; - for(int it=0; itscore_max) + n_cut = score_fun8(xt, ytm, Lali, d, i_ali, &score, + score_sum_method, Lnorm, score_d8, d0); + if (score > score_max) { - score_max=score; + score_max = score; - //save the rotation matrix - for(k=0; k<3; k++) + // save the rotation matrix + for (k = 0; k < 3; k++) { - t0[k]=t[k]; - u0[k][0]=u[k][0]; - u0[k][1]=u[k][1]; - u0[k][2]=u[k][2]; - } + t0[k] = t[k]; + u0[k][0] = u[k][0]; + u0[k][1] = u[k][1]; + u0[k][2] = u[k][2]; + } } - - //check if it converges - if(n_cut==ka) - { - for(k=0; kiL_max) i=iL_max; //do this to use the last missed fragment + i = i + simplify_step; // shift the fragment + if (i > iL_max) + i = iL_max; // do this to use the last missed fragment } - else if(i>=iL_max) break; - }//while(1) - //end of one fragment - }//for(i_init + else if (i >= iL_max) + break; + } // while(1) + // end of one fragment + } // for(i_init return score_max; } - -double TMscore8_search_standard( double **r1, double **r2, - double **xtm, double **ytm, double **xt, int Lali, - double t0[3], double u0[3][3], int simplify_step, int score_sum_method, - double *Rcomm, double local_d0_search, double score_d8, double d0) +double TMscore8_search_standard(double **r1, double **r2, + double **xtm, double **ytm, double **xt, int Lali, + double t0[3], double u0[3][3], int simplify_step, int score_sum_method, + double *Rcomm, double local_d0_search, double score_d8, double d0) { int i, m; double score_max, score, rmsd; @@ -266,15 +274,16 @@ double TMscore8_search_standard( double **r1, double **r2, double u[3][3]; double d; - //iterative parameters - int n_it = 20; //maximum number of iterations - int n_init_max = 6; //maximum number of different fragment length - int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4 + // iterative parameters + int n_it = 20; // maximum number of iterations + int n_init_max = 6; // maximum number of different fragment length + int L_ini[n_init_max]; // fragment lengths, Lali, Lali/2, Lali/4 ... 4 int L_ini_min = 4; - if (Laliscore_max) + if (score > score_max) { score_max = score; - //save the rotation matrix - for (k = 0; k<3; k++) + // save the rotation matrix + for (k = 0; k < 3; k++) { t0[k] = t[k]; u0[k][0] = u[k][0]; @@ -345,12 +354,12 @@ double TMscore8_search_standard( double **r1, double **r2, } } - //try to extend the alignment iteratively + // try to extend the alignment iteratively d = local_d0_search + 1; - for (int it = 0; itscore_max) + score_sum_method, score_d8, d0); + if (score > score_max) { score_max = score; - //save the rotation matrix - for (k = 0; k<3; k++) + // save the rotation matrix + for (k = 0; k < 3; k++) { t0[k] = t[k]; u0[k][0] = u[k][0]; @@ -383,309 +392,325 @@ double TMscore8_search_standard( double **r1, double **r2, } } - //check if it converges + // check if it converges if (n_cut == ka) { - for (k = 0; kiL_max) i = iL_max; //do this to use the last missed fragment + i = i + simplify_step; // shift the fragment + if (i > iL_max) + i = iL_max; // do this to use the last missed fragment } - else if (i >= iL_max) break; - }//while(1) - //end of one fragment - }//for(i_init + else if (i >= iL_max) + break; + } // while(1) + // end of one fragment + } // for(i_init return score_max; } -//Comprehensive TMscore search engine -// input: two vector sets: x, y -// an alignment invmap0[] between x and y -// simplify_step: 1 or 40 or other integers -// score_sum_method: 0 for score over all pairs -// 8 for socre over the pairs with dist=0) //aligned + j = invmap0[i]; + if (j >= 0) // aligned { - xtm[k][0]=x[j][0]; - xtm[k][1]=x[j][1]; - xtm[k][2]=x[j][2]; - - ytm[k][0]=y[i][0]; - ytm[k][1]=y[i][1]; - ytm[k][2]=y[i][2]; + xtm[k][0] = x[j][0]; + xtm[k][1] = x[j][1]; + xtm[k][2] = x[j][2]; + + ytm[k][0] = y[i][0]; + ytm[k][1] = y[i][1]; + ytm[k][2] = y[i][2]; k++; } } - //detailed search 40-->1 + // detailed search 40-->1 tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, simplify_step, - score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); + score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); return tmscore; } -double detailed_search_standard( double **r1, double **r2, - double **xtm, double **ytm, double **xt, double **x, double **y, - int xlen, int ylen, int invmap0[], double t[3], double u[3][3], - int simplify_step, int score_sum_method, double local_d0_search, - const bool& bNormalize, double Lnorm, double score_d8, double d0) +double detailed_search_standard(double **r1, double **r2, + double **xtm, double **ytm, double **xt, double **x, double **y, + int xlen, int ylen, int invmap0[], double t[3], double u[3][3], + int simplify_step, int score_sum_method, double local_d0_search, + const bool &bNormalize, double Lnorm, double score_d8, double d0) { - //x is model, y is template, try to superpose onto y - int i, j, k; + // x is model, y is template, try to superpose onto y + int i, j, k; double tmscore; double rmsd; - k=0; - for(i=0; i=0) //aligned + j = invmap0[i]; + if (j >= 0) // aligned { - xtm[k][0]=x[j][0]; - xtm[k][1]=x[j][1]; - xtm[k][2]=x[j][2]; - - ytm[k][0]=y[i][0]; - ytm[k][1]=y[i][1]; - ytm[k][2]=y[i][2]; + xtm[k][0] = x[j][0]; + xtm[k][1] = x[j][1]; + xtm[k][2] = x[j][2]; + + ytm[k][0] = y[i][0]; + ytm[k][1] = y[i][1]; + ytm[k][2] = y[i][2]; k++; } } - //detailed search 40-->1 - tmscore = TMscore8_search_standard( r1, r2, xtm, ytm, xt, k, t, u, - simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0); - if (bNormalize)// "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false; + // detailed search 40-->1 + tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, k, t, u, + simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0); + if (bNormalize) // "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false; tmscore = tmscore * k / Lnorm; return tmscore; } -//compute the score quickly in three iterations -double get_score_fast( double **r1, double **r2, double **xtm, double **ytm, - double **x, double **y, int xlen, int ylen, int invmap[], - double d0, double d0_search, double t[3], double u[3][3]) +// compute the score quickly in three iterations +double get_score_fast(double **r1, double **r2, double **xtm, double **ytm, + double **x, double **y, int xlen, int ylen, int invmap[], + double d0, double d0_search, double t[3], double u[3][3]) { double rms, tmscore, tmscore1, tmscore2; int i, j, k; - k=0; - for(j=0; j=0) - { - r1[k][0]=x[i][0]; - r1[k][1]=x[i][1]; - r1[k][2]=x[i][2]; - - r2[k][0]=y[j][0]; - r2[k][1]=y[j][1]; - r2[k][2]=y[j][2]; - - xtm[k][0]=x[i][0]; - xtm[k][1]=x[i][1]; - xtm[k][2]=x[i][2]; - - ytm[k][0]=y[j][0]; - ytm[k][1]=y[j][1]; - ytm[k][2]=y[j][2]; - + k = 0; + for (j = 0; j < ylen; j++) + { + i = invmap[j]; + if (i >= 0) + { + r1[k][0] = x[i][0]; + r1[k][1] = x[i][1]; + r1[k][2] = x[i][2]; + + r2[k][0] = y[j][0]; + r2[k][1] = y[j][1]; + r2[k][2] = y[j][2]; + + xtm[k][0] = x[i][0]; + xtm[k][1] = x[i][1]; + xtm[k][2] = x[i][2]; + + ytm[k][0] = y[j][0]; + ytm[k][1] = y[j][1]; + ytm[k][2] = y[j][2]; + k++; } - else if(i!=-1) PrintErrorAndQuit("Wrong map!\n"); + else if (i != -1) + PrintErrorAndQuit("Wrong map!\n"); } Kabsch(r1, r2, k, 1, &rms, t, u); - - //evaluate score + + // evaluate score double di; - const int len=k; - double dis[len]; - double d00=d0_search; - double d002=d00*d00; - double d02=d0*d0; - - int n_ali=k; + const int len = k; + double dis[len]; + double d00 = d0_search; + double d002 = d00 * d00; + double d02 = d0 * d0; + + int n_ali = k; double xrot[3]; - tmscore=0; - for(k=0; k dis_vec(dis, dis+n_ali); + tmscore = 0; + for (k = 0; k < n_ali; k++) + { + transform(t, u, &xtm[k][0], xrot); + di = dist(xrot, &ytm[k][0]); + dis[k] = di; + tmscore += 1 / (1 + di / d02); + } + + // second iteration + double d002t = d002; + vector dis_vec(dis, dis + n_ali); sort(dis_vec.begin(), dis_vec.end()); - if (d002t3) d002t += 0.5; - else break; + // there are not enough feasible pairs, relieve the threshold + if (j < 3 && n_ali > 3) + d002t += 0.5; + else + break; } - - if(n_ali!=j) + + if (n_ali != j) { Kabsch(r1, r2, j, 1, &rms, t, u); - tmscore1=0; - for(k=0; k dis_vec(dis, dis+n_ali); + tmscore1 = 0; + for (k = 0; k < n_ali; k++) + { + transform(t, u, &xtm[k][0], xrot); + di = dist(xrot, &ytm[k][0]); + dis[k] = di; + tmscore1 += 1 / (1 + di / d02); + } + + // third iteration + d002t = d002 + 1; + vector dis_vec(dis, dis + n_ali); sort(dis_vec.begin(), dis_vec.end()); - if (d002t3) d002t += 0.5; - else break; + // there are not enough feasible pairs, relieve the threshold + if (j < 3 && n_ali > 3) + d002t += 0.5; + else + break; } - //evaluate the score + // evaluate the score Kabsch(r1, r2, j, 1, &rms, t, u); - tmscore2=0; - for(k=0; k=tmscore) tmscore=tmscore1; - if(tmscore2>=tmscore) tmscore=tmscore2; + + if (tmscore1 >= tmscore) + tmscore = tmscore1; + if (tmscore2 >= tmscore) + tmscore = tmscore2; return tmscore; // no need to normalize this score because it will not be used for latter scoring } - -//perform gapless threading to find the best initial alignment -//input: x, y, xlen, ylen -//output: y2x0 stores the best alignment: e.g., -//y2x0[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 +// perform gapless threading to find the best initial alignment +// input: x, y, xlen, ylen +// output: y2x0 stores the best alignment: e.g., +// y2x0[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 double get_initial(double **r1, double **r2, double **xtm, double **ytm, - double **x, double **y, int xlen, int ylen, int *y2x, - double d0, double d0_search, const bool fast_opt, - double t[3], double u[3][3]) + double **x, double **y, int xlen, int ylen, int *y2x, + double d0, double d0_search, const bool fast_opt, + double t[3], double u[3][3]) { - int min_len=getmin(xlen, ylen); - if(min_len<3) PrintErrorAndQuit("Sequence is too short <3!\n"); - - int min_ali= min_len/2; //minimum size of considered fragment - if(min_ali<=5) min_ali=5; + int min_len = getmin(xlen, ylen); + if (min_len < 3) + PrintErrorAndQuit("Sequence is too short <3!\n"); + + int min_ali = min_len / 2; // minimum size of considered fragment + if (min_ali <= 5) + min_ali = 5; int n1, n2; - n1 = -ylen+min_ali; - n2 = xlen-min_ali; + n1 = -ylen + min_ali; + n2 = xlen - min_ali; int i, j, k, k_best; - double tmscore, tmscore_max=-1; + double tmscore, tmscore_max = -1; - k_best=n1; - for(k=n1; k<=n2; k+=(fast_opt)?5:1) + k_best = n1; + for (k = n1; k <= n2; k += (fast_opt) ? 5 : 1) { - //get the map - for(j=0; j=0 && i= 0 && i < xlen) + y2x[j] = i; + else + y2x[j] = -1; } - - //evaluate the map quickly in three iterations - //this is not real tmscore, it is used to evaluate the goodness of the initial alignment - tmscore=get_score_fast(r1, r2, xtm, ytm, - x, y, xlen, ylen, y2x, d0,d0_search, t, u); - if(tmscore>=tmscore_max) + + // evaluate the map quickly in three iterations + // this is not real tmscore, it is used to evaluate the goodness of the initial alignment + tmscore = get_score_fast(r1, r2, xtm, ytm, + x, y, xlen, ylen, y2x, d0, d0_search, t, u); + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - k_best=k; + tmscore_max = tmscore; + k_best = k; } } - - //extract the best map - k=k_best; - for(j=0; j=0 && i= 0 && i < xlen) + y2x[j] = i; + else + y2x[j] = -1; + } return tmscore_max; } @@ -693,179 +718,205 @@ double get_initial(double **r1, double **r2, double **xtm, double **ytm, void smooth(int *sec, int len) { int i, j; - //smooth single --x-- => ----- - for (i=2; i ----- + for (i = 2; i < len - 2; i++) { - if(sec[i]==2 || sec[i]==4) + if (sec[i] == 2 || sec[i] == 4) { - j=sec[i]; - if (sec[i-2]!=j && sec[i-1]!=j && sec[i+1]!=j && sec[i+2]!=j) - sec[i]=1; + j = sec[i]; + if (sec[i - 2] != j && sec[i - 1] != j && sec[i + 1] != j && sec[i + 2] != j) + sec[i] = 1; } } - // smooth double + // smooth double // --xx-- => ------ - for (i=0; icoil, 2->helix, 3->turn, 4->strand */ void make_sec(double **x, int len, char *sec) { int j1, j2, j3, j4, j5; double d13, d14, d15, d24, d25, d35; - for(int i=0; i=0 && j5= 0 && j5 < len) + { + d13 = sqrt(dist(x[j1], x[j3])); + d14 = sqrt(dist(x[j1], x[j4])); + d15 = sqrt(dist(x[j1], x[j5])); + d24 = sqrt(dist(x[j2], x[j4])); + d25 = sqrt(dist(x[j2], x[j5])); + d35 = sqrt(dist(x[j3], x[j5])); + sec[i] = sec_str(d13, d14, d15, d24, d25, d35); + } + } + sec[len] = 0; } /* a c d b: a paired to b, c paired to d */ -bool overlap(const int a1,const int b1,const int c1,const int d1, - const int a2,const int b2,const int c2,const int d2) +bool overlap(const int a1, const int b1, const int c1, const int d1, + const int a2, const int b2, const int c2, const int d2) { - return (a2>=a1&&a2<=c1)||(c2>=a1&&c2<=c1)|| - (d2>=a1&&d2<=c1)||(b2>=a1&&b2<=c1)|| - (a2>=d1&&a2<=b1)||(c2>=d1&&c2<=b1)|| - (d2>=d1&&d2<=b1)||(b2>=d1&&b2<=b1); + return (a2 >= a1 && a2 <= c1) || (c2 >= a1 && c2 <= c1) || + (d2 >= a1 && d2 <= c1) || (b2 >= a1 && b2 <= c1) || + (a2 >= d1 && a2 <= b1) || (c2 >= d1 && c2 <= b1) || + (d2 >= d1 && d2 <= b1) || (b2 >= d1 && b2 <= b1); } /* find base pairing stacks in RNA*/ -void sec_str(int len,char *seq, const vector >&bp, - int a, int b,int &c, int &d) +void sec_str(int len, char *seq, const vector> &bp, + int a, int b, int &c, int &d) { int i; - - for (i=0;i0) + if (a + i < len - 3 && b - i > 0) { - if (a+iunpair, 2->paired with upstream, 3->paired with downstream */ -void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt) +void make_sec(char *seq, double **x, int len, char *sec, const string atom_opt) { - int ii,jj,i,j; + int ii, jj, i, j; - float lb=12.5; // lower bound for " C3'" - float ub=15.0; // upper bound for " C3'" - if (atom_opt==" C4'") {lb=14.0;ub=16.0;} - else if(atom_opt==" C5'") {lb=16.0;ub=18.0;} - else if(atom_opt==" O3'") {lb=13.5;ub=16.5;} - else if(atom_opt==" O5'") {lb=15.5;ub=18.5;} - else if(atom_opt==" P ") {lb=16.5;ub=21.0;} + float lb = 12.5; // lower bound for " C3'" + float ub = 15.0; // upper bound for " C3'" + if (atom_opt == " C4'") + { + lb = 14.0; + ub = 16.0; + } + else if (atom_opt == " C5'") + { + lb = 16.0; + ub = 18.0; + } + else if (atom_opt == " O3'") + { + lb = 13.5; + ub = 16.5; + } + else if (atom_opt == " O5'") + { + lb = 15.5; + ub = 18.5; + } + else if (atom_opt == " P ") + { + lb = 16.5; + ub = 21.0; + } float dis; - vector bp_tmp(len,false); - vector > bp(len,bp_tmp); + vector bp_tmp(len, false); + vector> bp(len, bp_tmp); bp_tmp.clear(); - for (i=0; ilb && dis lb && dis < ub); } } } - + // From 5' to 3': A0_var C0_var D0_var B0_var: A0_var paired to B0_var, C0_var paired to D0_var - vector A0_var,B0_var,C0_var,D0_var; - for (i=0; i A0_var, B0_var, C0_var, D0_var; + for (i = 0; i < len - 2; i++) { - for (j=i+3; j0 && j+1 0 && j + 1 < len && bp[i - 1][j + 1]) + continue; + if (!bp[i + 1][j - 1]) + continue; + sec_str(len, seq, bp, i, j, ii, jj); + if (jj < i || j < ii) { - ii=i; - jj=j; + ii = i; + jj = j; } A0_var.push_back(i); B0_var.push_back(j); @@ -873,9 +924,9 @@ void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt) D0_var.push_back(jj); } } - - //int sign; - for (i=0;iC0_var[i]) break; - sec[A0_var[i]+j]='<'; - sec[D0_var[i]+j]='>'; + if (A0_var[i] + j > C0_var[i]) + break; + sec[A0_var[i] + j] = '<'; + sec[D0_var[i] + j] = '>'; } } - sec[len]=0; + sec[len] = 0; /* clean up */ A0_var.clear(); @@ -919,39 +971,39 @@ void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt) bp.clear(); } -//get initial alignment from secondary structure alignment -//input: x, y, xlen, ylen -//output: y2x stores the best alignment: e.g., -//y2x[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 +// get initial alignment from secondary structure alignment +// input: x, y, xlen, ylen +// output: y2x stores the best alignment: e.g., +// y2x[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 void get_initial_ss(bool **path, double **val, - const char *secx, const char *secy, int xlen, int ylen, int *y2x) + const char *secx, const char *secy, int xlen, int ylen, int *y2x) { - double gap_open=-1.0; + double gap_open = -1.0; NWDP_TM(path, val, secx, secy, xlen, ylen, gap_open, y2x); } - // get_initial5 in TMalign fortran, get_initial_local in TMalign c by yangji -//get initial alignment of local structure superposition -//input: x, y, xlen, ylen -//output: y2x stores the best alignment: e.g., -//y2x[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 -bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, - bool **path, double **val, - double **x, double **y, int xlen, int ylen, int *y2x, - double d0, double d0_search, const bool fast_opt, const double D0_MIN) +// get initial alignment of local structure superposition +// input: x, y, xlen, ylen +// output: y2x stores the best alignment: e.g., +// y2x[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 +bool get_initial5(double **r1, double **r2, double **xtm, double **ytm, + bool **path, double **val, + double **x, double **y, int xlen, int ylen, int *y2x, + double d0, double d0_search, const bool fast_opt, const double D0_MIN) { double GL, rmsd; double t[3]; double u[3][3]; double d01 = d0 + 1.5; - if (d01 < D0_MIN) d01 = D0_MIN; - double d02 = d01*d01; + if (d01 < D0_MIN) + d01 = D0_MIN; + double d02 = d01 * d01; double GLmax = 0; int aL = getmin(xlen, ylen); @@ -984,7 +1036,7 @@ bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, n_jump2 = ylen / 3; // fragment to superimpose--------------> - int n_frag[2] = { 20, 100 }; + int n_frag[2] = {20, 100}; if (n_frag[0] > (aL / 3)) n_frag[0] = aL / 3; if (n_frag[1] > (aL / 2)) @@ -993,8 +1045,8 @@ bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, // start superimpose search--------------> if (fast_opt) { - n_jump1*=5; - n_jump2*=5; + n_jump1 *= 5; + n_jump2 *= 5; } bool flag = false; for (int i_frag = 0; i_frag < 2; i_frag++) @@ -1002,11 +1054,11 @@ bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, int m1 = xlen - n_frag[i_frag] + 1; int m2 = ylen - n_frag[i_frag] + 1; - for (int i = 0; iGLmax) + invmap, d0, d0_search, t, u); + if (GL > GLmax) { GLmax = GL; - for (int ii = 0; ii=0) - { - r1[k][0]=x[i][0]; - r1[k][1]=x[i][1]; - r1[k][2]=x[i][2]; - - r2[k][0]=y[j][0]; - r2[k][1]=y[j][1]; - r2[k][2]=y[j][2]; - + int i, k = 0; + for (int j = 0; j < ylen; j++) + { + i = y2x[j]; + if (i >= 0) + { + r1[k][0] = x[i][0]; + r1[k][1] = x[i][1]; + r1[k][2] = x[i][2]; + + r2[k][0] = y[j][0]; + r2[k][1] = y[j][1]; + r2[k][2] = y[j][2]; + k++; } } Kabsch(r1, r2, k, 1, &rmsd, t, u); - - for(int ii=0; ii=0 -//the jth element in y is aligned to a gap in x if i==-1 +// get initial alignment from secondary structure and previous alignments +// input: x, y, xlen, ylen +// output: y2x stores the best alignment: e.g., +// y2x[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 void get_initial_ssplus(double **r1, double **r2, double **score, bool **path, - double **val, const char *secx, const char *secy, double **x, double **y, - int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0) + double **val, const char *secx, const char *secy, double **x, double **y, + int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0) { - //create score matrix for DP + // create score matrix for DP score_matrix_rmsd_sec(r1, r2, score, secx, secy, x, y, xlen, ylen, - y2x0, D0_MIN,d0); - - double gap_open=-1.0; + y2x0, D0_MIN, d0); + + double gap_open = -1.0; NWDP_TM(score, path, val, xlen, ylen, gap_open, y2x); } - void find_max_frag(double **x, int len, int *start_max, - int *end_max, double dcu0, const bool fast_opt) + int *end_max, double dcu0, const bool fast_opt) { - int r_min, fra_min=4; //minimum fragment for search - if (fast_opt) fra_min=8; + int r_min, fra_min = 4; // minimum fragment for search + if (fast_opt) + fra_min = 8; int start; - int Lfr_max=0; + int Lfr_max = 0; - r_min= (int) (len*1.0/3.0); //minimum fragment, in case too small protein - if(r_min > fra_min) r_min=fra_min; - - int inc=0; - double dcu0_cut=dcu0*dcu0;; - double dcu_cut=dcu0_cut; + r_min = (int)(len * 1.0 / 3.0); // minimum fragment, in case too small protein + if (r_min > fra_min) + r_min = fra_min; - while(Lfr_max < r_min) - { - Lfr_max=0; - int j=1; //number of residues at nf-fragment - start=0; - for(int i=1; i Lfr_max) + if (j > Lfr_max) { - Lfr_max=j; - *start_max=start; - *end_max=i; + Lfr_max = j; + *start_max = start; + *end_max = i; } - j=1; + j = 1; } } else { - if(j>Lfr_max) + if (j > Lfr_max) { - Lfr_max=j; - *start_max=start; - *end_max=i-1; + Lfr_max = j; + *start_max = start; + *end_max = i - 1; } - j=1; - start=i; + j = 1; + start = i; } - }// for i; - - if(Lfr_max < r_min) + } // for i; + + if (Lfr_max < r_min) { inc++; - double dinc=pow(1.1, (double) inc) * dcu0; - dcu_cut= dinc*dinc; + double dinc = pow(1.1, (double)inc) * dcu0; + dcu_cut = dinc * dinc; } - }//while <; + } // while <; } -//perform fragment gapless threading to find the best initial alignment -//input: x, y, xlen, ylen -//output: y2x0 stores the best alignment: e.g., -//y2x0[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 +// perform fragment gapless threading to find the best initial alignment +// input: x, y, xlen, ylen +// output: y2x0 stores the best alignment: e.g., +// y2x0[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 double get_initial_fgt(double **r1, double **r2, double **xtm, double **ytm, - double **x, double **y, int xlen, int ylen, - int *y2x, double d0, double d0_search, - double dcu0, const bool fast_opt, double t[3], double u[3][3]) + double **x, double **y, int xlen, int ylen, + int *y2x, double d0, double d0_search, + double dcu0, const bool fast_opt, double t[3], double u[3][3]) { - int fra_min=4; //minimum fragment for search - if (fast_opt) fra_min=8; - int fra_min1=fra_min-1; //cutoff for shift, save time + int fra_min = 4; // minimum fragment for search + if (fast_opt) + fra_min = 8; + int fra_min1 = fra_min - 1; // cutoff for shift, save time - int xstart=0, ystart=0, xend=0, yend=0; + int xstart = 0, ystart = 0, xend = 0, yend = 0; find_max_frag(x, xlen, &xstart, &xend, dcu0, fast_opt); find_max_frag(y, ylen, &ystart, ¥d, dcu0, fast_opt); - - int Lx = xend-xstart+1; - int Ly = yend-ystart+1; + int Lx = xend - xstart + 1; + int Ly = yend - ystart + 1; int *ifr, *y2x_; - int L_fr=getmin(Lx, Ly); - ifr= new int[L_fr]; - y2x_= new int[ylen+1]; + int L_fr = getmin(Lx, Ly); + ifr = new int[L_fr]; + y2x_ = new int[ylen + 1]; - //select what piece will be used. The original implement may cause - //asymetry, but only when xlen==ylen and Lx==Ly - //if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1 + // select what piece will be used. The original implement may cause + // asymetry, but only when xlen==ylen and Lx==Ly + // if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1 - if(LxLy || (Lx==Ly && xlen>ylen)) - { - for(int i=0; i Ly || (Lx == Ly && xlen > ylen)) + { + for (int i = 0; i < L_fr; i++) + ifr[i] = ystart + i; } else // solve asymetric for 1x5gA vs 2q7nA5 { /* In this case, L0==xlen==ylen; L_fr==Lx==Ly */ - int L0=xlen; - double tmscore, tmscore_max=-1; + int L0 = xlen; + double tmscore, tmscore_max = -1; int i, j, k; int n1, n2; int min_len; int min_ali; /* part 1, normalized by xlen */ - for(i=0; i=0 && i= 0 && i < L1) + y2x_[j] = ifr[i]; + else + y2x_[j] = -1; } - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, - d0, d0_search, t, u); + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, + d0, d0_search, t, u); - if(tmscore>=tmscore_max) + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0 && i= 0 && i < xlen) + y2x_[ifr[j]] = i; } - - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, - x, y, xlen, ylen, y2x_, d0,d0_search, t, u); - if(tmscore>=tmscore_max) + + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, + x, y, xlen, ylen, y2x_, d0, d0_search, t, u); + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0 && i= 0 && i < L1) + y2x_[j] = ifr[i]; + else + y2x_[j] = -1; } - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, - d0, d0_search, t, u); + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, + d0, d0_search, t, u); - if(tmscore>=tmscore_max) + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0 && i= 0 && i < xlen) + y2x_[ifr[j]] = i; } - - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, - x, y, xlen, ylen, y2x_, d0,d0_search, t, u); - if(tmscore>=tmscore_max) + + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, + x, y, xlen, ylen, y2x_, d0, d0_search, t, u); + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0) //aligned + if (i >= 0) // aligned { - xtm[k][0]=x[i][0]; - xtm[k][1]=x[i][1]; - xtm[k][2]=x[i][2]; - - ytm[k][0]=y[j][0]; - ytm[k][1]=y[j][1]; - ytm[k][2]=y[j][2]; + xtm[k][0] = x[i][0]; + xtm[k][1] = x[i][1]; + xtm[k][2] = x[i][2]; + + ytm[k][0] = y[j][0]; + ytm[k][1] = y[j][1]; + ytm[k][2] = y[j][2]; k++; } } tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, - simplify_step, score_sum_method, &rmsd, local_d0_search, - Lnorm, score_d8, d0); + simplify_step, score_sum_method, &rmsd, local_d0_search, + Lnorm, score_d8, d0); - - if(tmscore>tmscore_max) + if (tmscore > tmscore_max) { - tmscore_max=tmscore; - for(i=0; i0) + + if (iteration > 0) { - if(fabs(tmscore_old-tmscore)<0.000001) break; + if (fabs(tmscore_old - tmscore) < 0.000001) + break; } - tmscore_old=tmscore; - }// for iteration - - }//for gapopen - - - delete []invmap; + tmscore_old = tmscore; + } // for iteration + + } // for gapopen + + delete[] invmap; return tmscore_max; } - /* script format: 0 - no script; 1 - pymol; 3 - chimerax */ void output_pymol(const string xname, const string yname, - const string fname_super, double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2, const int o_opt=1) + const string fname_super, double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2, const int o_opt = 1) { - int compress_type=0; // uncompressed file + int compress_type = 0; // uncompressed file ifstream fin; #ifndef REDI_PSTREAM_H_SEEN ifstream fin_gz; #else redi::ipstream fin_gz; // if file is compressed - if (xname.size()>=3 && - xname.substr(xname.size()-3,3)==".gz") + if (xname.size() >= 3 && + xname.substr(xname.size() - 3, 3) == ".gz") { - fin_gz.open("gunzip -c "+xname); - compress_type=1; + fin_gz.open("gunzip -c " + xname); + compress_type = 1; } - else if (xname.size()>=4 && - xname.substr(xname.size()-4,4)==".bz2") + else if (xname.size() >= 4 && + xname.substr(xname.size() - 4, 4) == ".bz2") { - fin_gz.open("bzcat "+xname); - compress_type=2; + fin_gz.open("bzcat " + xname); + compress_type = 2; } else #endif - fin.open(xname.c_str()); + fin.open(xname.c_str()); stringstream buf; stringstream buf_pymol; @@ -1510,132 +1580,160 @@ void output_pymol(const string xname, const string yname, double x1[3]; // after transform /* for PDBx/mmCIF only */ - map _atom_site; + map _atom_site; size_t atom_site_pos; vector line_vec; - int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF + int infmt = -1; // 0 - PDB, 3 - PDBx/mmCIF - while (compress_type?fin_gz.good():fin.good()) + while (compress_type ? fin_gz.good() : fin.good()) { - if (compress_type) getline(fin_gz, line); - else getline(fin, line); - if (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0) // PDB format + if (compress_type) + getline(fin_gz, line); + else + getline(fin, line); + if (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0) // PDB format { - infmt=0; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; + infmt = 0; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; transform(t, u, x, x1); - buf<=1 && line.compare(0,3,"END")==0) break; + buf << line << '\n'; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } - if (compress_type) fin_gz.close(); - else fin.close(); + if (compress_type) + fin_gz.close(); + else + fin.close(); - string fname_super_full=fname_super; - if (infmt==0) fname_super_full+=".pdb"; - else if (infmt==3) fname_super_full+=".cif"; + string fname_super_full = fname_super; + if (infmt == 0) + fname_super_full += ".pdb"; + else if (infmt == 3) + fname_super_full += ".cif"; ofstream fp; fp.open(fname_super_full.c_str()); - fp<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - if (o_opt==1) + if (o_opt == 1) { - chain1_sele=" and c. "+chainID1.substr(1); - chain2_sele=" and c. "+chainID2.substr(1); + chain1_sele = " and c. " + chainID1.substr(1); + chain2_sele = " and c. " + chainID2.substr(1); } - else if (o_opt==3) + else if (o_opt == 3) { - chain1_sele="/"+chainID1.substr(1); - chain2_sele="/"+chainID2.substr(1); + chain1_sele = "/" + chainID1.substr(1); + chain2_sele = "/" + chainID2.substr(1); } } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i pml_list; - pml_list.push_back(fname_super+""); - pml_list.push_back(fname_super+"_atm"); - pml_list.push_back(fname_super+"_all"); - pml_list.push_back(fname_super+"_all_atm"); - pml_list.push_back(fname_super+"_all_atm_lig"); + pml_list.push_back(fname_super + ""); + pml_list.push_back(fname_super + "_atm"); + pml_list.push_back(fname_super + "_all"); + pml_list.push_back(fname_super + "_all_atm"); + pml_list.push_back(fname_super + "_all_atm_lig"); - for (int p=0;p&chain_list, - const int infmt_opt, double **ut_mat, const string &fname_super, - const int o_opt=1) +void output_mTMalign_pymol(const vector &chain_list, + const int infmt_opt, double **ut_mat, const string &fname_super, + const int o_opt = 1) { - int compress_type=0; // uncompressed file + int compress_type = 0; // uncompressed file size_t m; string name; double t[3]; double u[3][3]; - int ui,uj; + int ui, uj; string filename; vector color_list; color_list.push_back("red"); @@ -1918,262 +2045,294 @@ void output_mTMalign_pymol(const vector&chain_list, color_list.push_back("grey"); stringstream buf_pymol; - if (o_opt==1) - buf_pymol<<"#!/usr/bin/env pymol\n"; - else if (o_opt==3) - buf_pymol<<"#!/usr/bin/env chimerax --script\n"; - for (m=0;m=3 && - name.substr(name.size()-3,3)==".gz") + if (name.size() >= 3 && + name.substr(name.size() - 3, 3) == ".gz") { - fin_gz.open("gunzip -c "+name); - compress_type=1; + fin_gz.open("gunzip -c " + name); + compress_type = 1; } - else if (name.size()>=4 && - name.substr(name.size()-4,4)==".bz2") + else if (name.size() >= 4 && + name.substr(name.size() - 4, 4) == ".bz2") { - fin_gz.open("bzcat "+name); - compress_type=2; + fin_gz.open("bzcat " + name); + compress_type = 2; } else #endif fin.open(name.c_str()); stringstream buf; - buf< _atom_site; + map _atom_site; size_t atom_site_pos; vector line_vec; - int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF + int infmt = -1; // 0 - PDB, 3 - PDBx/mmCIF - while (compress_type?fin_gz.good():fin.good()) + while (compress_type ? fin_gz.good() : fin.good()) { - if (compress_type) getline(fin_gz, line); - else getline(fin, line); - if (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0) // PDB format + if (compress_type) + getline(fin_gz, line); + else + getline(fin, line); + if (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0) // PDB format { - infmt=0; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); + infmt = 0; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); transform(t, u, x, x1); - buf< ().swap(color_list); + vector().swap(color_list); } void output_rasmol(const string xname, const string yname, - const string fname_super, double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2, - const int xlen, const int ylen, const double d0A, const int n_ali8, - const double rmsd, const double TM1, const double Liden) + const string fname_super, double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2, + const int xlen, const int ylen, const double d0A, const int n_ali8, + const double rmsd, const double TM1, const double Liden) { stringstream buf; stringstream buf_all; stringstream buf_atm; stringstream buf_all_atm; stringstream buf_all_atm_lig; - //stringstream buf_pdb; + // stringstream buf_pdb; stringstream buf_tm; string line; - double x[3]; // before transform - double x1[3]; // after transform + double x[3]; // before transform + double x1[3]; // after transform bool after_ter; // true if passed the "TER" line in PDB string asym_id; // chain ID - buf_tm<<"REMARK US-align" - <<"\nREMARK Structure 1:"<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - chain1_sele=chainID1.substr(1); - chain2_sele=chainID2.substr(1); + chain1_sele = chainID1.substr(1); + chain2_sele = chainID2.substr(1); } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i _atom_site; + map _atom_site; int atom_site_pos; vector line_vec; - string atom; // 4-character atom name - string AA; // 3-character residue name - string resi; // 4-character residue sequence number - string inscode; // 1-character insertion code + string atom; // 4-character atom name + string AA; // 3-character residue name + string resi; // 4-character residue sequence number + string inscode; // 1-character insertion code string model_index; // model index - bool is_mmcif=false; + bool is_mmcif = false; /* used for CONECT record of chain1 */ - int ca_idx1=0; // all CA atoms - int lig_idx1=0; // all atoms - vector idx_vec; + int ca_idx1 = 0; // all CA atoms + int lig_idx1 = 0; // all atoms + vector idx_vec; /* used for CONECT record of chain2 */ - int ca_idx2=0; // all CA atoms - int lig_idx2=0; // all atoms + int ca_idx2 = 0; // all CA atoms + int lig_idx2 = 0; // all atoms /* extract aligned region */ vector resi_aln1; vector resi_aln2; - int i1=-1; - int i2=-1; + int i1 = -1; + int i2 = -1; if (!mm_opt) { - for (i=0;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (is_mmcif==false && line.size()>=54 && - (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format - { - if (line[16]!='A' && line[16]!=' ') continue; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (is_mmcif == false && line.size() >= 54 && + (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format + { + if (line[16] != 'A' && line[16] != ' ') + continue; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; transform(t, u, x, x1); - //buf_pdb<=2) - { - if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + << line.substr(11, 9) << " A" << line.substr(22, 8) + << setiosflags(ios::fixed) << setprecision(3) + << setw(8) << x1[0] << setw(8) << x1[1] << setw(8) << x1[2] << '\n'; + if (chain1_sele.size() && line[21] != chain1_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) + { + if (ca_idx1 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; if (_atom_site.count("auth_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain1_sele; - else if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain1_sele; + else if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain1_sele; - if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain1_sele; + if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - //buf_pdb<=1 && line.compare(0,3,"END")==0) break; + // buf_pdb<= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=1;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (line.size()>=54 && (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (line.size() >= 54 && (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format { - if (line[16]!='A' && line[16]!=' ') continue; - if (after_ter && line.compare(0,6,"ATOM ")==0) continue; + if (line[16] != 'A' && line[16] != ' ') + continue; + if (after_ter && line.compare(0, 6, "ATOM ") == 0) + continue; lig_idx2++; - buf_all_atm_lig<=2) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + lig_idx2 + << line.substr(11, 9) << " B" << line.substr(22, 32) << '\n'; + if (chain2_sele.size() && line[21] != chain2_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) { - if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1)) + if (ca_idx2 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; - + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + if (_atom_site.count("auth_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - if (after_ter==false || - line_vec[_atom_site["group_PDB"]]=="HETATM") + if (after_ter == false || + line_vec[_atom_site["group_PDB"]] == "HETATM") { lig_idx2++; - buf_all_atm_lig<=1 && line.compare(0,3,"END")==0) break; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=ca_idx1+1;i&resi_vec1, const vector&resi_vec2) + const string chainID1, const string chainID2, + const int xlen, const int ylen, double t[3], double u[3][3], + const double TM1, const double TM2, + const double TM3, const double TM4, const double TM5, + const double rmsd, const double d0_out, const char *seqM, + const char *seqxA, const char *seqyA, const double Liden, + const int n_ali8, const int L_ali, const double TM_ali, + const double rmsd_ali, const double TM_0, const double d0_0, + const double d0A, const double d0B, const double Lnorm_ass, + const double d0_scale, const double d0a, const double d0u, + const char *fname_matrix, const int outfmt_opt, const int ter_opt, + const int mm_opt, const int split_opt, const int o_opt, + const string fname_super, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const int mirror_opt, + const vector &resi_vec1, const vector &resi_vec2) { - if (outfmt_opt<=0) + if (outfmt_opt <= 0) { printf("\nName of Structure_1: %s%s (to be superimposed onto Structure_2)\n", - xname.c_str(), chainID1.c_str()); + xname.c_str(), chainID1.c_str()); printf("Name of Structure_2: %s%s\n", yname.c_str(), chainID2.c_str()); printf("Length of Structure_1: %d residues\n", xlen); printf("Length of Structure_2: %d residues\n\n", ylen); @@ -2856,178 +3098,190 @@ void output_results(const string xname, const string yname, if (i_opt) printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali); - printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); printf("TM-score= %6.5f (normalized by length of Structure_1: L=%d, d0=%.2f)\n", TM2, xlen, d0B); printf("TM-score= %6.5f (normalized by length of Structure_2: L=%d, d0=%.2f)\n", TM1, ylen, d0A); - if (a_opt==1) - printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt == 1) + printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); if (u_opt) printf("TM-score= %6.5f (normalized by user-specified L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u); if (d_opt) printf("TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen); printf("(You should use TM-score normalized by length of the reference structure)\n"); - - //output alignment + + // output alignment printf("\n(\":\" denotes residue pairs of d <%4.1f Angstrom, ", d0_out); printf("\".\" denotes other aligned residues)\n"); printf("%s\n", seqxA); printf("%s\n", seqM); printf("%s\n", seqyA); } - else if (outfmt_opt==1) + else if (outfmt_opt == 1) { printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - xname.c_str(), chainID1.c_str(), xlen, d0B, Liden/xlen, TM2); + xname.c_str(), chainID1.c_str(), xlen, d0B, Liden / xlen, TM2); printf("%s\n", seqxA); printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - yname.c_str(), chainID2.c_str(), ylen, d0A, Liden/ylen, TM1); + yname.c_str(), chainID2.c_str(), ylen, d0A, Liden / ylen, TM1); printf("%s\n", seqyA); printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n", - n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); if (i_opt) printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali); - if(a_opt) - printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt) + printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); - if(u_opt) + if (u_opt) printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u); - if(d_opt) + if (d_opt) printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen); printf("$$$$\n"); } - else if (outfmt_opt==2) + else if (outfmt_opt == 2) { printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", - xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), - TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0, - xlen, ylen, n_ali8); + xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), + TM2, TM1, rmsd, Liden / xlen, Liden / ylen, (n_ali8 > 0) ? Liden / n_ali8 : 0, + xlen, ylen, n_ali8); } - if (outfmt_opt<5) cout << endl; + if (outfmt_opt < 5) + cout << endl; - if (strlen(fname_matrix)) output_rotation_matrix(fname_matrix, t, u); + if (strlen(fname_matrix)) + output_rotation_matrix(fname_matrix, t, u); - if (o_opt==1 || o_opt==3) + if (o_opt == 1 || o_opt == 3) output_pymol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, o_opt); - else if (o_opt==2) + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, o_opt); + else if (o_opt == 2) output_rasmol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, - xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, + xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); } void output_mTMalign_results(const string xname, const string yname, - const string chainID1, const string chainID2, - const int xlen, const int ylen, double t[3], double u[3][3], - const double TM1, const double TM2, - const double TM3, const double TM4, const double TM5, - const double rmsd, const double d0_out, const char *seqM, - const char *seqxA, const char *seqyA, const double Liden, - const int n_ali8, const int L_ali, const double TM_ali, - const double rmsd_ali, const double TM_0, const double d0_0, - const double d0A, const double d0B, const double Lnorm_ass, - const double d0_scale, const double d0a, const double d0u, - const char* fname_matrix, const int outfmt_opt, const int ter_opt, - const int mm_opt, const int split_opt, const int o_opt, - const string fname_super, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const int mirror_opt, - const vector&resi_vec1, const vector&resi_vec2) + const string chainID1, const string chainID2, + const int xlen, const int ylen, double t[3], double u[3][3], + const double TM1, const double TM2, + const double TM3, const double TM4, const double TM5, + const double rmsd, const double d0_out, const char *seqM, + const char *seqxA, const char *seqyA, const double Liden, + const int n_ali8, const int L_ali, const double TM_ali, + const double rmsd_ali, const double TM_0, const double d0_0, + const double d0A, const double d0B, const double Lnorm_ass, + const double d0_scale, const double d0a, const double d0u, + const char *fname_matrix, const int outfmt_opt, const int ter_opt, + const int mm_opt, const int split_opt, const int o_opt, + const string fname_super, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const int mirror_opt, + const vector &resi_vec1, const vector &resi_vec2) { - if (outfmt_opt<=0) + if (outfmt_opt <= 0) { - printf("Average aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + printf("Average aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); printf("Average TM-score= %6.5f (normalized by length of shorter structure: L=%d, d0=%.2f)\n", TM2, xlen, d0B); printf("Average TM-score= %6.5f (normalized by length of longer structure: L=%d, d0=%.2f)\n", TM1, ylen, d0A); - if (a_opt==1) - printf("Average TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt == 1) + printf("Average TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); if (u_opt) printf("Average TM-score= %6.5f (normalized by average L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u); if (d_opt) printf("Average TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen); - - //output alignment + + // output alignment printf("In the following, seqID=n_identical/L.\n\n%s\n", seqM); } - else if (outfmt_opt==1) + else if (outfmt_opt == 1) { printf("%s\n", seqM); printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n", - n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); if (i_opt) printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali); - if(a_opt) - printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt) + printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); - if(u_opt) + if (u_opt) printf("# TM-score=%.5f (normalized by average L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u); - if(d_opt) + if (d_opt) printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen); printf("$$$$\n"); } - else if (outfmt_opt==2) + else if (outfmt_opt == 2) { printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", - xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), - TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0, - xlen, ylen, n_ali8); + xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), + TM2, TM1, rmsd, Liden / xlen, Liden / ylen, (n_ali8 > 0) ? Liden / n_ali8 : 0, + xlen, ylen, n_ali8); } cout << endl; - if (strlen(fname_matrix)) output_rotation_matrix(fname_matrix, t, u); + if (strlen(fname_matrix)) + output_rotation_matrix(fname_matrix, t, u); - if (o_opt==1 || o_opt==3) + if (o_opt == 1 || o_opt == 3) output_pymol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, o_opt); - else if (o_opt==2) + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, o_opt); + else if (o_opt == 2) output_rasmol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, - xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, + xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); } double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm, - double **xt, double **x, double **y, int xlen, int ylen, int invmap[], - int& L_ali, double& RMSD, double D0_MIN, double Lnorm, double d0, - double d0_search, double score_d8, double t[3], double u[3][3], - const int mol_type) + double **xt, double **x, double **y, int xlen, int ylen, int invmap[], + int &L_ali, double &RMSD, double D0_MIN, double Lnorm, double d0, + double d0_search, double score_d8, double t[3], double u[3][3], + const int mol_type) { D0_MIN = 0.5; Lnorm = ylen; - if (mol_type>0) // RNA + if (mol_type > 0) // RNA { - if (Lnorm<=11) d0=0.3; - else if(Lnorm>11 && Lnorm<=15) d0=0.4; - else if(Lnorm>15 && Lnorm<=19) d0=0.5; - else if(Lnorm>19 && Lnorm<=23) d0=0.6; - else if(Lnorm>23 && Lnorm<30) d0=0.7; - else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5); + if (Lnorm <= 11) + d0 = 0.3; + else if (Lnorm > 11 && Lnorm <= 15) + d0 = 0.4; + else if (Lnorm > 15 && Lnorm <= 19) + d0 = 0.5; + else if (Lnorm > 19 && Lnorm <= 23) + d0 = 0.6; + else if (Lnorm > 23 && Lnorm < 30) + d0 = 0.7; + else + d0 = (0.6 * pow((Lnorm * 1.0 - 0.5), 1.0 / 2) - 2.5); } else { - if (Lnorm > 21) d0=(1.24*pow((Lnorm*1.0-15), 1.0/3) -1.8); - else d0 = D0_MIN; - if (d0 < D0_MIN) d0 = D0_MIN; + if (Lnorm > 21) + d0 = (1.24 * pow((Lnorm * 1.0 - 15), 1.0 / 3) - 1.8); + else + d0 = D0_MIN; + if (d0 < D0_MIN) + d0 = D0_MIN; } - double d0_input = d0;// Scaled by seq_min + double d0_input = d0; // Scaled by seq_min - double tmscore;// collected alined residues from invmap + double tmscore; // collected alined residues from invmap int n_al = 0; int i; - for (int j = 0; j= 0) @@ -3050,21 +3304,22 @@ double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm, n_al++; } - else if (i != -1) PrintErrorAndQuit("Wrong map!\n"); + else if (i != -1) + PrintErrorAndQuit("Wrong map!\n"); } L_ali = n_al; Kabsch(r1, r2, n_al, 0, &RMSD, t, u); - RMSD = sqrt( RMSD/(1.0*n_al) ); - + RMSD = sqrt(RMSD / (1.0 * n_al)); + int temp_simplify_step = 1; int temp_score_sum_method = 0; d0_search = d0_input; double rms = 0.0; tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, n_al, t, u, - temp_simplify_step, temp_score_sum_method, &rms, d0_input, - score_d8, d0); - tmscore = tmscore * n_al / (1.0*Lnorm); + temp_simplify_step, temp_score_sum_method, &rms, d0_input, + score_d8, d0); + tmscore = tmscore * n_al / (1.0 * Lnorm); return tmscore; } @@ -3072,57 +3327,61 @@ double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm, /* copy the value of t and u into t0,u0 */ void copy_t_u(double t[3], double u[3][3], double t0[3], double u0[3][3]) { - int i,j; - for (i=0;i<3;i++) + int i, j; + for (i = 0; i < 3; i++) { - t0[i]=t[i]; - for (j=0;j<3;j++) u0[i][j]=u[i][j]; + t0[i] = t[i]; + for (j = 0; j < 3; j++) + u0[i][j] = u[i][j]; } } /* calculate approximate TM-score given rotation matrix */ double approx_TM(const int xlen, const int ylen, const int a_opt, - double **xa, double **ya, double t[3], double u[3][3], - const int invmap0[], const int mol_type) + double **xa, double **ya, double t[3], double u[3][3], + const int invmap0[], const int mol_type) { - double Lnorm_0=ylen; // normalized by the second protein - if (a_opt==-2 && xlen>ylen) Lnorm_0=xlen; // longer - else if (a_opt==-1 && xlen ylen) + Lnorm_0 = xlen; // longer + else if (a_opt == -1 && xlen < ylen) + Lnorm_0 = xlen; // shorter + else if (a_opt == 1) + Lnorm_0 = (xlen + ylen) / 2.; // average + double D0_MIN; double Lnorm; double d0; double d0_search; parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); - double TMtmp=0; + double TMtmp = 0; double d; - double xtmp[3]={0,0,0}; + double xtmp[3] = {0, 0, 0}; - for(int i=0,j=0; j=0)//aligned + i = invmap0[j]; + if (i >= 0) // aligned { transform(t, u, &xa[i][0], &xtmp[0]); - d=sqrt(dist(&xtmp[0], &ya[j][0])); - TMtmp+=1/(1+(d/d0)*(d/d0)); - //if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0)); + d = sqrt(dist(&xtmp[0], &ya[j][0])); + TMtmp += 1 / (1 + (d / d0) * (d / d0)); + // if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0)); } } - TMtmp/=Lnorm_0; + TMtmp /= Lnorm_0; return TMtmp; } void clean_up_after_approx_TM(int *invmap0, int *invmap, - double **score, bool **path, double **val, double **xtm, double **ytm, - double **xt, double **r1, double **r2, const int xlen, const int minlen) + double **score, bool **path, double **val, double **xtm, double **ytm, + double **xt, double **r1, double **r2, const int xlen, const int minlen) { - delete [] invmap0; - delete [] invmap; - DeleteArray(&score, xlen+1); - DeleteArray(&path, xlen+1); - DeleteArray(&val, xlen+1); + delete[] invmap0; + delete[] invmap; + DeleteArray(&score, xlen + 1); + DeleteArray(&path, xlen + 1); + DeleteArray(&val, xlen + 1); DeleteArray(&xtm, minlen); DeleteArray(&ytm, minlen); DeleteArray(&xt, xlen); @@ -3132,42 +3391,42 @@ void clean_up_after_approx_TM(int *invmap0, int *invmap, } /* Entry function for TM-align. Return TM-score calculation status: - * 0 - full TM-score calculation + * 0 - full TM-score calculation * 1 - terminated due to exception * 2-7 - pre-terminated due to low TM-score */ int TMalign_main(double **xa, double **ya, - const char *seqx, const char *seqy, const char *secx, const char *secy, - double t0[3], double u0[3][3], - double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, - double &d0_0, double &TM_0, - double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, - string &seqM, string &seqxA, string &seqyA, vector&do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const double TMcut=-1) + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + string &seqM, string &seqxA, string &seqyA, vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const double TMcut, const int ss_opt) { - double D0_MIN; //for d0 - double Lnorm; //normalization length - double score_d8,d0,d0_search,dcu0;//for TMscore search - double t[3], u[3][3]; //Kabsch translation vector and rotation matrix - double **score; // Input score table for dynamic programming - bool **path; // for dynamic programming - double **val; // for dynamic programming - double **xtm, **ytm; // for TMscore search engine - double **xt; //for saving the superposed version of r_1 or xtm - double **r1, **r2; // for Kabsch rotation + double D0_MIN; // for d0 + double Lnorm; // normalization length + double score_d8, d0, d0_search, dcu0; // for TMscore search + double t[3], u[3][3]; // Kabsch translation vector and rotation matrix + double **score; // Input score table for dynamic programming + bool **path; // for dynamic programming + double **val; // for dynamic programming + double **xtm, **ytm; // for TMscore search engine + double **xt; // for saving the superposed version of r_1 or xtm + double **r1, **r2; // for Kabsch rotation /***********************/ /* allocate memory */ /***********************/ int minlen = min(xlen, ylen); - NewArray(&score, xlen+1, ylen+1); - NewArray(&path, xlen+1, ylen+1); - NewArray(&val, xlen+1, ylen+1); + NewArray(&score, xlen + 1, ylen + 1); + NewArray(&path, xlen + 1, ylen + 1); + NewArray(&val, xlen + 1, ylen + 1); NewArray(&xtm, minlen, 3); NewArray(&ytm, minlen, 3); NewArray(&xt, xlen, 3); @@ -3175,188 +3434,213 @@ int TMalign_main(double **xa, double **ya, NewArray(&r2, minlen, 3); /***********************/ - /* parameter set */ + /* parameter set */ /***********************/ - parameter_set4search(xlen, ylen, D0_MIN, Lnorm, - score_d8, d0, d0_search, dcu0); - int simplify_step = 40; //for simplified search engine - int score_sum_method = 8; //for scoring method, whether only sum over pairs with dis= ylen || i1 >= xlen) kk1 = L; - else if (sequence[0][kk1] != '-') invmap[i2] = i1; + if (i2 >= ylen || i1 >= xlen) + kk1 = L; + else if (sequence[0][kk1] != '-') + invmap[i2] = i1; } } //--------------- 2. Align proteins from original alignment - double prevD0_MIN = D0_MIN;// stored for later use + double prevD0_MIN = D0_MIN; // stored for later use int prevLnorm = Lnorm; double prevd0 = d0; TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8, - t, u, mol_type); + invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8, + t, u, mol_type); D0_MIN = prevD0_MIN; Lnorm = prevLnorm; d0 = prevd0; TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0); + invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0); if (TM > TMmax) { TMmax = TM; - for (i = 0; iTMmax) TMmax = TM; - if (TMcut>0) copy_t_u(t, u, t0, u0); - //run dynamic programing iteratively to find the best alignment + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) + TMmax = TM; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + // run dynamic programing iteratively to find the best alignment TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, xlen, ylen, - t, u, invmap, 0, 2, (fast_opt)?2:30, local_d0_search, - D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, local_d0_search, + D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) { TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); } - if (TMcut>0) // pre-terminate if TM-score is too low + if (TMcut > 0) // pre-terminate if TM-score is too low { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.5*TMcut) + if (TMtmp < 0.5 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 2; } } /************************************************************/ - /* get initial alignment based on secondary structure */ + /* get initial alignment based on secondary structure */ /************************************************************/ - get_initial_ss(path, val, secx, secy, xlen, ylen, invmap); - TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, - t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, - score_d8, d0); - if (TM>TMmax) + if (ss_opt != 1) { - TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); - } - if (TM > TMmax*0.2) - { - TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30, - local_d0_search, D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + get_initial_ss(path, val, secx, secy, xlen, ylen, invmap); + TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } + if (TM > TMmax * 0.2) + { + TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, + xlen, ylen, t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, + local_d0_search, D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) + { + TMmax = TM; + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } } - } - - if (TMcut>0) // pre-terminate if TM-score is too low - { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.52*TMcut) + if (TMcut > 0) // pre-terminate if TM-score is too low { - TM1=TM2=TM3=TM4=TM5=TMtmp; - clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); - return 3; + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); + + if (TMtmp < 0.52 * TMcut) + { + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; + clean_up_after_approx_TM(invmap0, invmap, score, path, val, + xtm, ytm, xt, r1, r2, xlen, minlen); + return 3; + } } } /************************************************************/ - /* get initial alignment based on local superposition */ + /* get initial alignment based on local superposition */ /************************************************************/ //=initial5 in original TM-align - if (get_initial5( r1, r2, xtm, ytm, path, val, xa, ya, - xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN)) + if (ss_opt != 1) { - TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap, t, u, simplify_step, score_sum_method, - local_d0_search, Lnorm, score_d8, d0); - if (TM>TMmax) + if (get_initial5(r1, r2, xtm, ytm, path, val, xa, ya, + xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN)) { - TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); - } - if (TM > TMmax*ddcc) - { - TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search, - D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, + invmap, t, u, simplify_step, score_sum_method, + local_d0_search, Lnorm, score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } + if (TM > TMmax * ddcc) + { + TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, + xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search, + D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) + { + TMmax = TM; + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } } } + else + cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" + << endl; } - else - cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" << endl; - if (TMcut>0) // pre-terminate if TM-score is too low + if (TMcut > 0) // pre-terminate if TM-score is too low { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.54*TMcut) + if (TMtmp < 0.54 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 4; } } @@ -3365,82 +3649,93 @@ int TMalign_main(double **xa, double **ya, /* get initial alignment by local superposition+secondary structure */ /********************************************************************/ //=initial3 in original TM-align - get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya, - xlen, ylen, invmap0, invmap, D0_MIN, d0); - TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, - t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, - score_d8, d0); - if (TM>TMmax) + if (ss_opt != 1) { - TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); - } - if (TM > TMmax*ddcc) - { - TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30, - local_d0_search, D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya, + xlen, ylen, invmap0, invmap, D0_MIN, d0); + TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } + if (TM > TMmax * ddcc) + { + TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, + xlen, ylen, t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, + local_d0_search, D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) + { + TMmax = TM; + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } } - } - - if (TMcut>0) // pre-terminate if TM-score is too low - { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.56*TMcut) + if (TMcut > 0) // pre-terminate if TM-score is too low { - TM1=TM2=TM3=TM4=TM5=TMtmp; - clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); - return 5; + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); + + if (TMtmp < 0.56 * TMcut) + { + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; + clean_up_after_approx_TM(invmap0, invmap, score, path, val, + xtm, ytm, xt, r1, r2, xlen, minlen); + return 5; + } } } /*******************************************************************/ - /* get initial alignment based on fragment gapless threading */ + /* get initial alignment based on fragment gapless threading */ /*******************************************************************/ //=initial4 in original TM-align get_initial_fgt(r1, r2, xtm, ytm, xa, ya, xlen, ylen, - invmap, d0, d0_search, dcu0, fast_opt, t, u); + invmap, d0, d0_search, dcu0, fast_opt, t, u); TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, - t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, - score_d8, d0); - if (TM>TMmax) + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); } - if (TM > TMmax*ddcc) + if (TM > TMmax * ddcc) { TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN, - Lnorm, d0, score_d8); - if (TM>TMmax) + xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN, + Lnorm, d0, score_d8); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); } } - if (TMcut>0) // pre-terminate if TM-score is too low + if (TMcut > 0) // pre-terminate if TM-score is too low { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.58*TMcut) + if (TMtmp < 0.58 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 6; } } @@ -3449,16 +3744,16 @@ int TMalign_main(double **xa, double **ya, //************************************************// // get initial alignment from user's input: // //************************************************// - if (i_opt>=1 && i_opt<=2)// if input has set parameter for "-i" + if (i_opt >= 1 && i_opt <= 2) // if input has set parameter for "-i" { - for (int j = 0; j < ylen; j++)// Set aligned position to be "-1" + for (int j = 0; j < ylen; j++) // Set aligned position to be "-1" invmap[j] = -1; - int i1 = -1;// in C version, index starts from zero, not from one + int i1 = -1; // in C version, index starts from zero, not from one int i2 = -1; int L1 = sequence[0].size(); int L2 = sequence[1].size(); - int L = min(L1, L2);// Get positions for aligned residues + int L = min(L1, L2); // Get positions for aligned residues for (int kk1 = 0; kk1 < L; kk1++) { if (sequence[0][kk1] != '-') @@ -3466,75 +3761,77 @@ int TMalign_main(double **xa, double **ya, if (sequence[1][kk1] != '-') { i2++; - if (i2 >= ylen || i1 >= xlen) kk1 = L; - else if (sequence[0][kk1] != '-') invmap[i2] = i1; + if (i2 >= ylen || i1 >= xlen) + kk1 = L; + else if (sequence[0][kk1] != '-') + invmap[i2] = i1; } } //--------------- 2. Align proteins from original alignment - double prevD0_MIN = D0_MIN;// stored for later use + double prevD0_MIN = D0_MIN; // stored for later use int prevLnorm = Lnorm; double prevd0 = d0; TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, - xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, - d0_search, score_d8, t, u, mol_type); + xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, + d0_search, score_d8, t, u, mol_type); D0_MIN = prevD0_MIN; Lnorm = prevLnorm; d0 = prevd0; TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, - xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm, - score_d8, d0); + xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm, + score_d8, d0); if (TM > TMmax) { TMmax = TM; - for (i = 0; iTMmax) + xlen, ylen, t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, + local_d0_search, D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i=0) + if (invmap0[i] >= 0) { - flag=true; + flag = true; break; } } - if(!flag) + if (!flag) { cout << "There is no alignment between the two structures! " << "Program stop with no result!" << endl; - TM1=TM2=TM3=TM4=TM5=0; + TM1 = TM2 = TM3 = TM4 = TM5 = 0; return 1; } /* last TM-score pre-termination */ - if (TMcut>0) + if (TMcut > 0) { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.6*TMcut) + if (TMtmp < 0.6 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 7; } } @@ -3542,42 +3839,43 @@ int TMalign_main(double **xa, double **ya, //********************************************************************// // Detailed TMscore search engine --> prepare for final TMscore // //********************************************************************// - //run detailed TMscore search engine for the best alignment, and - //extract the best rotation matrix (t, u) for the best alignment - simplify_step=1; - if (fast_opt) simplify_step=40; - score_sum_method=8; + // run detailed TMscore search engine for the best alignment, and + // extract the best rotation matrix (t, u) for the best alignment + simplify_step = 1; + if (fast_opt) + simplify_step = 40; + score_sum_method = 8; TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap0, t, u, simplify_step, score_sum_method, local_d0_search, - false, Lnorm, score_d8, d0); + invmap0, t, u, simplify_step, score_sum_method, local_d0_search, + false, Lnorm, score_d8, d0); - //select pairs with dis=0)//aligned + i = invmap0[j]; + if (i >= 0) // aligned { n_ali++; - d=sqrt(dist(&xt[i][0], &ya[j][0])); + d = sqrt(dist(&xt[i][0], &ya[j][0])); if (d <= score_d8 || (i_opt == 3)) { - m1[k]=i; - m2[k]=j; + m1[k] = i; + m2[k] = j; - xtm[k][0]=xa[i][0]; - xtm[k][1]=xa[i][1]; - xtm[k][2]=xa[i][2]; + xtm[k][0] = xa[i][0]; + xtm[k][1] = xa[i][1]; + xtm[k][2] = xa[i][2]; - ytm[k][0]=ya[j][0]; - ytm[k][1]=ya[j][1]; - ytm[k][2]=ya[j][2]; + ytm[k][0] = ya[j][0]; + ytm[k][1] = ya[j][1]; + ytm[k][2] = ya[j][2]; r1[k][0] = xt[i][0]; r1[k][1] = xt[i][1]; @@ -3590,243 +3888,242 @@ int TMalign_main(double **xa, double **ya, } } } - n_ali8=k; + n_ali8 = k; - Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u + Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u); // rmsd0 is used for final output, only recalculate rmsd0, not t & u rmsd0 = sqrt(rmsd0 / n_ali8); - //****************************************// // Final TMscore // // Please set parameters for output // //****************************************// double rmsd; - simplify_step=1; - score_sum_method=0; - double Lnorm_0=ylen; - + simplify_step = 1; + score_sum_method = 0; + double Lnorm_0 = ylen; - //normalized by length of structure A + // normalized by length of structure A parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); - d0A=d0; - d0_0=d0A; + d0A = d0; + d0_0 = d0A; local_d0_search = d0_search; TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step, - score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); + score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); TM_0 = TM1; - //normalized by length of structure B - parameter_set4final(xlen+0.0, D0_MIN, Lnorm, d0, d0_search, mol_type); - d0B=d0; + // normalized by length of structure B + parameter_set4final(xlen + 0.0, D0_MIN, Lnorm, d0, d0_search, mol_type); + d0B = d0; local_d0_search = d0_search; TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step, - score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); + score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); double Lnorm_d0; - if (a_opt>0) + if (a_opt > 0) { - //normalized by average length of structures A, B - Lnorm_0=(xlen+ylen)*0.5; + // normalized by average length of structures A, B + Lnorm_0 = (xlen + ylen) * 0.5; parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); - d0a=d0; - d0_0=d0a; + d0a = d0; + d0_0 = d0a; local_d0_search = d0_search; TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, - simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, - score_d8, d0); - TM_0=TM3; + simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, + score_d8, d0); + TM_0 = TM3; } if (u_opt) { - //normalized by user assigned length + // normalized by user assigned length parameter_set4final(Lnorm_ass, D0_MIN, Lnorm, - d0, d0_search, mol_type); - d0u=d0; - d0_0=d0u; - Lnorm_0=Lnorm_ass; + d0, d0_search, mol_type); + d0u = d0; + d0_0 = d0u; + Lnorm_0 = Lnorm_ass; local_d0_search = d0_search; TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, - simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, - score_d8, d0); - TM_0=TM4; + simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, + score_d8, d0); + TM_0 = TM4; } if (d_opt) { - //scaled by user assigned d0 + // scaled by user assigned d0 parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search); - d0_out=d0_scale; - d0_0=d0_scale; - //Lnorm_0=ylen; - Lnorm_d0=Lnorm_0; + d0_out = d0_scale; + d0_0 = d0_scale; + // Lnorm_0=ylen; + Lnorm_d0 = Lnorm_0; local_d0_search = d0_search; TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, - simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, - score_d8, d0); - TM_0=TM5; + simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, + score_d8, d0); + TM_0 = TM5; } /* derive alignment from superposition */ - int ali_len=xlen+ylen; //maximum length of alignment - seqxA.assign(ali_len,'-'); - seqM.assign( ali_len,' '); - seqyA.assign(ali_len,'-'); + int ali_len = xlen + ylen; // maximum length of alignment + seqxA.assign(ali_len, '-'); + seqM.assign(ali_len, ' '); + seqyA.assign(ali_len, '-'); do_vec.clear(); - do_vec.assign(ali_len,0); - - //do_rotation(xa, xt, xlen, t, u); + do_vec.assign(ali_len, 0); + + // do_rotation(xa, xt, xlen, t, u); do_rotation(xa, xt, xlen, t0, u0); - int kk=0, i_old=0, j_old=0; - d=0; - Liden=0; - //double SO=0; - for(int k=0; k &do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const double TMcut=-1) + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + string &seqM, string &seqxA, string &seqyA, vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const double TMcut = -1) { - char *seqx_cp; // for the protein sequence - char *secx_cp; // for the secondary structure - double **xa_cp; // coordinates - string seqxA_cp,seqyA_cp; // alignment - int i,r; - int cp_point=0; // position of circular permutation - int cp_aln_best=0; // amount of aligned residue in sliding window - int cp_aln_current;// amount of aligned residue in sliding window + char *seqx_cp; // for the protein sequence + char *secx_cp; // for the secondary structure + double **xa_cp; // coordinates + string seqxA_cp, seqyA_cp; // alignment + int i, r; + int cp_point = 0; // position of circular permutation + int cp_aln_best = 0; // amount of aligned residue in sliding window + int cp_aln_current; // amount of aligned residue in sliding window /* duplicate structure */ - NewArray(&xa_cp, xlen*2, 3); - seqx_cp = new char[xlen*2 + 1]; - secx_cp = new char[xlen*2 + 1]; - for (r=0;rcp_aln_best) + if (cp_aln_current > cp_aln_best) { - cp_aln_best=cp_aln_current; - cp_point=r; + cp_aln_best = cp_aln_current; + cp_point = r; } } seqM.clear(); @@ -3834,146 +4131,153 @@ int CPalign_main(double **xa, double **ya, seqyA.clear(); seqxA_cp.clear(); seqyA_cp.clear(); - rmsd0=Liden=n_ali=n_ali8=0; + rmsd0 = Liden = n_ali = n_ali8 = 0; /* fTM-align alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, - do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_tmp, d0_scale, - 0, false, true, false, true, mol_type, -1); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, + do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_tmp, d0_scale, + 0, false, true, false, true, mol_type, -1, 0); /* do not use circular permutation of number of aligned residues is not * larger than sequence-order dependent alignment */ - //cout<<"cp: aln="<= length\n" -" of protein to avoid TM-score >1. -u does not change final alignment.\n" -"\n" -" -o Output superposed structure1 to sup.* for PyMOL viewing.\n" -" $ USalign structure1.pdb structure2.pdb -o sup\n" -" $ pymol -d @sup.pml # C-alpha trace aligned region\n" -" $ pymol -d @sup_all.pml # C-alpha trace whole chain\n" -" $ pymol -d @sup_atm.pml # full-atom aligned region\n" -" $ pymol -d @sup_all_atm.pml # full-atom whole chain\n" -" $ pymol -d @sup_all_atm_lig.pml # full-atom with all molecules\n" -"\n" -" -rasmol Output superposed structure1 to sup.* for RasMol viewing.\n" -" $ USalign structure1.pdb structure2.pdb -rasmol sup\n" -" $ rasmol -script sup # C-alpha trace aligned region\n" -" $ rasmol -script sup_all # C-alpha trace whole chain\n" -" $ rasmol -script sup_atm # full-atom aligned region\n" -" $ rasmol -script sup_all_atm # full-atom whole chain\n" -" $ rasmol -script sup_all_atm_lig # full-atom with all molecules\n" -"\n" -"-chimerax Output superposed structure1 to sup.* for ChimeraX viewing.\n" -" $ USalign structure1.pdb structure2.pdb -chimerax sup\n" -" $ chimerax --script sup.cxc # C-alpha trace aligned region\n" -" $ chimerax --script sup_all.cxc # C-alpha trace whole chain\n" -" $ chimerax --script sup_atm.cxc # full-atom aligned region\n" -" $ chimerax --script sup_all_atm.cxc # full-atom whole chain\n" -" $ chimerax --script sup_all_atm_lig.cxc # full-atom with all molecules\n" -"\n" -" -do Output distance of aligned residue pairs\n" -"\n" -//" -h Print the full help message, including additional options\n" -//"\n" -"Example usages ('gunzip' program is needed to read .gz compressed files):\n" -" USalign 101m.cif.gz 1mba.pdb # pairwise monomeric protein alignment\n" -" USalign 1qf6.cif 5yyn.pdb.gz -mol RNA # pairwise monomeric RNA alignment\n" -" USalign model.pdb native.pdb -TMscore 1 # calculate TM-score between two conformations of a monomer\n" -" USalign 4v4a.cif 4v49.cif -mm 1 -ter 1 # oligomeric alignment for asymmetic units\n" -" USalign 3ksc.pdb1 4lej.pdb1 -mm 1 -ter 0 # oligomeric alignment for biological units\n" -" USalign 1ajk.pdb.gz 2ayh.pdb.gz -mm 3 # circular permutation alignment\n" - <= length\n" + " of protein to avoid TM-score >1. -u does not change final alignment.\n" + "\n" + " -o Output superposed structure1 to sup.* for PyMOL viewing.\n" + " $ USalign structure1.pdb structure2.pdb -o sup\n" + " $ pymol -d @sup.pml # C-alpha trace aligned region\n" + " $ pymol -d @sup_all.pml # C-alpha trace whole chain\n" + " $ pymol -d @sup_atm.pml # full-atom aligned region\n" + " $ pymol -d @sup_all_atm.pml # full-atom whole chain\n" + " $ pymol -d @sup_all_atm_lig.pml # full-atom with all molecules\n" + "\n" + " -rasmol Output superposed structure1 to sup.* for RasMol viewing.\n" + " $ USalign structure1.pdb structure2.pdb -rasmol sup\n" + " $ rasmol -script sup # C-alpha trace aligned region\n" + " $ rasmol -script sup_all # C-alpha trace whole chain\n" + " $ rasmol -script sup_atm # full-atom aligned region\n" + " $ rasmol -script sup_all_atm # full-atom whole chain\n" + " $ rasmol -script sup_all_atm_lig # full-atom with all molecules\n" + "\n" + "-chimerax Output superposed structure1 to sup.* for ChimeraX viewing.\n" + " $ USalign structure1.pdb structure2.pdb -chimerax sup\n" + " $ chimerax --script sup.cxc # C-alpha trace aligned region\n" + " $ chimerax --script sup_all.cxc # C-alpha trace whole chain\n" + " $ chimerax --script sup_atm.cxc # full-atom aligned region\n" + " $ chimerax --script sup_all_atm.cxc # full-atom whole chain\n" + " $ chimerax --script sup_all_atm_lig.cxc # full-atom with all molecules\n" + "\n" + " -do Output distance of aligned residue pairs\n" + "\n" + //" -h Print the full help message, including additional options\n" + //"\n" + "Example usages ('gunzip' program is needed to read .gz compressed files):\n" + " USalign 101m.cif.gz 1mba.pdb # pairwise monomeric protein alignment\n" + " USalign 1qf6.cif 5yyn.pdb.gz -mol RNA # pairwise monomeric RNA alignment\n" + " USalign model.pdb native.pdb -TMscore 1 # calculate TM-score between two conformations of a monomer\n" + " USalign 4v4a.cif 4v49.cif -mm 1 -ter 1 # oligomeric alignment for asymmetic units\n" + " USalign 3ksc.pdb1 4lej.pdb1 -mm 1 -ter 0 # oligomeric alignment for biological units\n" + " USalign 1ajk.pdb.gz 2ayh.pdb.gz -mm 3 # circular permutation alignment\n" + << endl; + + // if (h_opt) + print_extra_help(); exit(EXIT_SUCCESS); } /* TMalign, RNAalign, CPalign, TMscore */ int TMalign(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int cp_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, - const string &dir2_opt, const vector &chain2parse1, - const vector &chain2parse2, const vector &model2parse1, - const vector &model2parse2, const int byresi_opt, - const vector &chain1_list, const vector &chain2_list, - const bool se_opt, const bool do_opt) + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int cp_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, + const string &dir2_opt, const vector &chain2parse1, + const vector &chain2parse2, const vector &model2parse1, + const vector &model2parse2, const int byresi_opt, + const vector &chain1_list, const vector &chain2_list, + const bool se_opt, const bool do_opt) { /* declare previously global variables */ - vector >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 vector mol_vec1; // molecule type of chain1, RNA if >0 vector mol_vec2; // molecule type of chain2, RNA if >0 vector chainID_list1; // list of chainID1 vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=byresi_opt; // whether to read residue index - if (byresi_opt==0 && o_opt) read_resi=2; + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = byresi_opt; // whether to read residue index + if (byresi_opt == 0 && o_opt) + read_resi = 2; /* loop over file names */ - for (i=0;i0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment + xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, + resi_vec1, read_resi); + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; + if (mol_vec1[chain_i] > 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment - for (j=(dir_opt.size()>0)*(i+1);j 0) * (i + 1); j < chain2_list.size(); j++) { - if (dirpair_opt.size() && j!=i) continue; + if (dirpair_opt.size() && j != i) + continue; /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) { - cerr<<"Warning! Cannot parse file: "<0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; - bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; vector do_vec; /* entry function for structure alignment */ - if (cp_opt) CPalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); - else if (se_opt) - { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; - se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, + if (cp_opt) + CPalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, - mol_vec1[chain_i]+mol_vec2[chain_j], - outfmt_opt, invmap); - if (outfmt_opt>=2) + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], TMcut); + else if (se_opt) + { + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; + se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], + outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r21) + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j &sequence, - const double d0_scale, const bool m_opt, const int o_opt, - const int a_opt, const bool d_opt, const bool full_opt, - const double TMcut, const int infmt1_opt, const int infmt2_opt, - const int ter_opt, const int split_opt, const int outfmt_opt, - bool fast_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const vector &chain1_list, const vector &chain2_list, - const int byresi_opt,const string&chainmapfile, const bool se_opt) + const string &fname_super, const string &fname_lign, + const string &fname_matrix, vector &sequence, + const double d0_scale, const bool m_opt, const int o_opt, + const int a_opt, const bool d_opt, const bool full_opt, + const double TMcut, const int infmt1_opt, const int infmt2_opt, + const int ter_opt, const int split_opt, const int outfmt_opt, + bool fast_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const vector &chain1_list, const vector &chain2_list, + const int byresi_opt, const string &chainmapfile, const bool se_opt) { /* declare previously global variables */ - vector > > xa_vec; // structure of complex1 - vector > > ya_vec; // structure of complex2 - vector >seqx_vec; // sequence of complex1 - vector >seqy_vec; // sequence of complex2 - vector >secx_vec; // secondary structure of complex1 - vector >secy_vec; // secondary structure of complex2 - vector mol_vec1; // molecule type of complex1, RNA if >0 - vector mol_vec2; // molecule type of complex2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - vector xlen_vec; // length of complex1 - vector ylen_vec; // length of complex2 - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int xlen_aa,ylen_aa; // total length of protein - int xlen_na,ylen_na; // total length of RNA/DNA - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 + vector>> xa_vec; // structure of complex1 + vector>> ya_vec; // structure of complex2 + vector> seqx_vec; // sequence of complex1 + vector> seqy_vec; // sequence of complex2 + vector> secx_vec; // secondary structure of complex1 + vector> secy_vec; // secondary structure of complex2 + vector mol_vec1; // molecule type of complex1, RNA if >0 + vector mol_vec2; // molecule type of complex2, RNA if >0 + vector chainID_list1; // list of chainID1 + vector chainID_list2; // list of chainID2 + vector xlen_vec; // length of complex1 + vector ylen_vec; // length of complex2 + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int xlen_aa, ylen_aa; // total length of protein + int xlen_na, ylen_na; // total length of RNA/DNA + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 /* parse complex */ parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1, - xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, - atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, - resi_vec1, chain2parse1, model2parse1); - if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 1"); + xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, + atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, + resi_vec1, chain2parse1, model2parse1); + if (xa_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 1"); parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2, - ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, - atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, - resi_vec2, chain2parse2, model2parse2); - if (ya_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 2"); - int len_aa=getmin(xlen_aa,ylen_aa); - int len_na=getmin(xlen_na,ylen_na); + ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, + atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, + resi_vec2, chain2parse2, model2parse2); + if (ya_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 2"); + int len_aa = getmin(xlen_aa, ylen_aa); + int len_na = getmin(xlen_na, ylen_na); if (a_opt) { - len_aa=(xlen_aa+ylen_aa)/2; - len_na=(xlen_na+ylen_na)/2; + len_aa = (xlen_aa + ylen_aa) / 2; + len_na = (xlen_na + ylen_na) / 2; } - int i_opt=0; - if (byresi_opt) i_opt=3; + int i_opt = 0; + if (byresi_opt) + i_opt = 3; - map chainmap; + map chainmap; if (chainmapfile.size()) { string line; - int chainidx1,chainidx2; + int chainidx1, chainidx2; vector line_vec; ifstream fin; - bool fromStdin=(chainmapfile=="-"); - if (!fromStdin) fin.open(chainmapfile.c_str()); - while (fromStdin?cin.good():fin.good()) - { - if (fromStdin) getline(cin,line); - else getline(fin,line); - if (line.size()==0 || line[0]=='#') continue; - split(line,line_vec,'\t'); - if (line_vec.size()==2) + bool fromStdin = (chainmapfile == "-"); + if (!fromStdin) + fin.open(chainmapfile.c_str()); + while (fromStdin ? cin.good() : fin.good()) + { + if (fromStdin) + getline(cin, line); + else + getline(fin, line); + if (line.size() == 0 || line[0] == '#') + continue; + split(line, line_vec, '\t'); + if (line_vec.size() == 2) { - chainidx1=-1; - chainidx2=-1; - - for (i=0;i=0 && chainidx2>=0) + if (chainidx1 >= 0 && chainidx2 >= 0) { if (chainmap.count(chainidx1)) - cerr<<"ERROR! "< do_vec; - - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); + + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, 0, d0_scale, - i_opt, a_opt, false, d_opt, - mol_vec1[0]+mol_vec2[0], outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, 0, d0_scale, + i_opt, a_opt, false, d_opt, + mol_vec1[0] + mol_vec2[0], outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2 > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - chainID_list1.clear(); // list of chainID1 - chainID_list2.clear(); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + chainID_list1.clear(); // list of chainID1 + chainID_list2.clear(); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 return 0; } /* declare TM-score tables */ - int chain1_num=xa_vec.size(); - int chain2_num=ya_vec.size(); - int chain_num =MAX(chain1_num,chain2_num); - vector tmp_str_vec(chain2_num,""); + int chain1_num = xa_vec.size(); + int chain2_num = ya_vec.size(); + int chain_num = MAX(chain1_num, chain2_num); + vector tmp_str_vec(chain2_num, ""); double **TMave_mat; double **ut_mat; // rotation matrices for all-against-all alignment - int ui,uj,ut_idx; - NewArray(&TMave_mat,chain_num,chain_num); - NewArray(&ut_mat,chain1_num*chain2_num,4*3); - vector >seqxA_mat(chain1_num,tmp_str_vec); - vector > seqM_mat(chain1_num,tmp_str_vec); - vector >seqyA_mat(chain1_num,tmp_str_vec); + int ui, uj, ut_idx; + NewArray(&TMave_mat, chain_num, chain_num); + NewArray(&ut_mat, chain1_num * chain2_num, 4 * 3); + vector> seqxA_mat(chain1_num, tmp_str_vec); + vector> seqM_mat(chain1_num, tmp_str_vec); + vector> seqyA_mat(chain1_num, tmp_str_vec); - double maxTMmono=-1; - int maxTMmono_i,maxTMmono_j; + double maxTMmono = -1; + int maxTMmono_i, maxTMmono_j; /* get all-against-all alignment */ - if (len_aa+len_na>500) fast_opt=true; - for (i=0;i 500) + fast_opt = true; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (xlen<3) + xlen = xlen_vec[i]; + if (xlen < 3) { - for (j=0;j do_vec; - int Lnorm_tmp=len_aa; - if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na; - + int Lnorm_tmp = len_aa; + if (mol_vec1[i] + mol_vec2[j] > 0) + Lnorm_tmp = len_na; + if (byresi_opt) { - int total_aln=extract_aln_from_resi(sequence, seqx,seqy, - resi_vec1,resi_vec2,xlen_vec,ylen_vec, i, j, byresi_opt); - seqxA_mat[i][j]=sequence[0]; - seqyA_mat[i][j]=sequence[1]; - if (total_aln>xlen+ylen-3) + int total_aln = extract_aln_from_resi(sequence, seqx, seqy, + resi_vec1, resi_vec2, xlen_vec, ylen_vec, i, j, byresi_opt); + seqxA_mat[i][j] = sequence[0]; + seqyA_mat[i][j] = sequence[1]; + if (total_aln > xlen + ylen - 3) { - for (ui=0;ui<3;ui++) for (uj=0;uj<3;uj++) - ut_mat[ut_idx][ui*3+uj]=(ui==uj)?1:0; - for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=0; - TMave_mat[i][j]=TMave_mat[j][i]=0; + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = (ui == uj) ? 1 : 0; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = 0; + TMave_mat[i][j] = TMave_mat[j][i] = 0; seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); continue; } } @@ -874,54 +911,58 @@ int MMalign(const string &xname, const string &yname, /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_tmp, d0_scale, - i_opt, false, true, false, - mol_vec1[i]+mol_vec2[j], outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_tmp, d0_scale, + i_opt, false, true, false, + mol_vec1[i] + mol_vec2[j], outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2maxTMmono) + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = u0[ui][uj]; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = t0[uj]; + seqxA_mat[i][j] = seqxA; + seqyA_mat[i][j] = seqyA; + TMave_mat[i][j] = TMave_mat[j][i] = TM4 * Lnorm_tmp; + if (TMave_mat[i][j] > maxTMmono) { - maxTMmono=TMave_mat[i][j]; - maxTMmono_i=i; - maxTMmono_j=j; + maxTMmono = TMave_mat[i][j]; + maxTMmono_i = i; + maxTMmono_j = j; } /* clean up */ @@ -929,79 +970,82 @@ int MMalign(const string &xname, const string &yname, seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); do_vec.clear(); } - delete[]seqx; - delete[]secx; - DeleteArray(&xa,xlen); + delete[] seqx; + delete[] secx; + DeleteArray(&xa, xlen); } /* calculate initial chain-chain assignment */ int *assign1_list; // value is index of assigned chain2 int *assign2_list; // value is index of assigned chain1 - assign1_list=new int[chain1_num]; - assign2_list=new int[chain2_num]; - double total_score=enhanced_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num); - if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain"); + assign1_list = new int[chain1_num]; + assign2_list = new int[chain2_num]; + double total_score = enhanced_greedy_search(TMave_mat, assign1_list, + assign2_list, chain1_num, chain2_num); + if (total_score <= 0) + PrintErrorAndQuit("ERROR! No assignable chain"); /* refine alignment for large oligomers */ - int aln_chain_num=count_assign_pair(assign1_list,chain1_num); - bool is_oligomer=(aln_chain_num>=3); - if (aln_chain_num==2 && chainmap.size()==0 && !se_opt) // dimer alignment + int aln_chain_num = count_assign_pair(assign1_list, chain1_num); + bool is_oligomer = (aln_chain_num >= 3); + if (aln_chain_num == 2 && chainmap.size() == 0 && !se_opt) // dimer alignment { - int na_chain_num1,na_chain_num2,aa_chain_num1,aa_chain_num2; - count_na_aa_chain_num(na_chain_num1,aa_chain_num1,mol_vec1); - count_na_aa_chain_num(na_chain_num2,aa_chain_num2,mol_vec2); + int na_chain_num1, na_chain_num2, aa_chain_num1, aa_chain_num2; + count_na_aa_chain_num(na_chain_num1, aa_chain_num1, mol_vec1); + count_na_aa_chain_num(na_chain_num2, aa_chain_num2, mol_vec2); /* align protein-RNA hybrid dimer to another hybrid dimer */ - if (na_chain_num1==1 && na_chain_num2==1 && - aa_chain_num1==1 && aa_chain_num2==1) is_oligomer=false; + if (na_chain_num1 == 1 && na_chain_num2 == 1 && + aa_chain_num1 == 1 && aa_chain_num2 == 1) + is_oligomer = false; /* align pure protein dimer or pure RNA dimer */ - else if ((getmin(na_chain_num1,na_chain_num2)==0 && - aa_chain_num1==2 && aa_chain_num2==2) || - (getmin(aa_chain_num1,aa_chain_num2)==0 && - na_chain_num1==2 && na_chain_num2==2)) + else if ((getmin(na_chain_num1, na_chain_num2) == 0 && + aa_chain_num1 == 2 && aa_chain_num2 == 2) || + (getmin(aa_chain_num1, aa_chain_num2) == 0 && + na_chain_num1 == 2 && na_chain_num2 == 2)) { - adjust_dimer_assignment(xa_vec,ya_vec,xlen_vec,ylen_vec,mol_vec1, - mol_vec2,assign1_list,assign2_list,seqxA_mat,seqyA_mat); - is_oligomer=false; // cannot refiner further + adjust_dimer_assignment(xa_vec, ya_vec, xlen_vec, ylen_vec, mol_vec1, + mol_vec2, assign1_list, assign2_list, seqxA_mat, seqyA_mat); + is_oligomer = false; // cannot refiner further } - else is_oligomer=true; /* align oligomers to dimer */ + else + is_oligomer = true; /* align oligomers to dimer */ } - if ((aln_chain_num>=3 || is_oligomer) && chainmap.size()==0 && !se_opt) // oligomer alignment + if ((aln_chain_num >= 3 || is_oligomer) && chainmap.size() == 0 && !se_opt) // oligomer alignment { /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); - if (chain1_num<=chain2_num) + if (chain1_num <= chain2_num) { hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); } else { hetero_refined_greedy_search(TMave_mat, assign2_list, - assign1_list, chain2_num, chain1_num, ycentroids, - xcentroids, d0MM, len_aa+len_na); + assign1_list, chain2_num, chain1_num, ycentroids, + xcentroids, d0MM, len_aa + len_na); } /* clean up */ @@ -1010,64 +1054,64 @@ int MMalign(const string &xname, const string &yname, } /* store initial assignment */ - int init_pair_num=count_assign_pair(assign1_list,chain1_num); + int init_pair_num = count_assign_pair(assign1_list, chain1_num); int *assign1_init, *assign2_init; - assign1_init=new int[chain1_num]; - assign2_init=new int[chain2_num]; + assign1_init = new int[chain1_num]; + assign2_init = new int[chain2_num]; double **TMave_init; - NewArray(&TMave_init,chain1_num,chain2_num); - vector >seqxA_init(chain1_num,tmp_str_vec); - vector >seqyA_init(chain1_num,tmp_str_vec); + NewArray(&TMave_init, chain1_num, chain2_num); + vector> seqxA_init(chain1_num, tmp_str_vec); + vector> seqyA_init(chain1_num, tmp_str_vec); vector sequence_init; copy_chain_assign_data(chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); /* perform iterative alignment */ - double max_total_score=0; // ignore old total_score because previous - // score was from monomeric chain superpositions - int max_iter=5-(int)((len_aa+len_na)/200); - if (max_iter<2) max_iter=2; - //if (byresi_opt==0) + double max_total_score = 0; // ignore old total_score because previous + // score was from monomeric chain superpositions + int max_iter = 5 - (int)((len_aa + len_na) / 200); + if (max_iter < 2) + max_iter = 2; + // if (byresi_opt==0) if (!se_opt) MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec, - seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, - ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, - chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, - sequence, d0_scale, fast_opt, chainmap, byresi_opt); - - if (byresi_opt && aln_chain_num>=4 && is_oligomer && chainmap.size()==0 && !se_opt) // oligomer alignment + seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, + ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, + chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, + sequence, d0_scale, fast_opt, chainmap, byresi_opt); + + if (byresi_opt && aln_chain_num >= 4 && is_oligomer && chainmap.size() == 0 && !se_opt) // oligomer alignment { MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, 1, 0, 5, ter_opt, split_opt, - 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); - + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, 1, 0, 5, ter_opt, split_opt, + 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); /* clean up */ DeleteArray(&xcentroids, chain1_num); @@ -1075,210 +1119,219 @@ int MMalign(const string &xname, const string &yname, } /* sometime MMalign_iter is even worse than monomer alignment */ - if (byresi_opt==0 && max_total_score=init_pair_num) copy_chain_assign_data( - chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); - double max_total_score_cross=max_total_score; - if (byresi_opt==0 && len_aa+len_na<10000) + int iter_pair_num = count_assign_pair(assign1_list, chain1_num); + if (iter_pair_num >= init_pair_num) + copy_chain_assign_data( + chain1_num, chain2_num, sequence_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + double max_total_score_cross = max_total_score; + if (byresi_opt == 0 && len_aa + len_na < 10000) { MMalign_dimer(max_total_score_cross, xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, - TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, - sequence_init, d0_scale, fast_opt); - if (max_total_score_cross>max_total_score) + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, + TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, + sequence_init, d0_scale, fast_opt); + if (max_total_score_cross > max_total_score) { - max_total_score=max_total_score_cross; + max_total_score = max_total_score_cross; copy_chain_assign_data(chain1_num, chain2_num, sequence, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); } - } + } /* final alignment */ - if (outfmt_opt==0) print_version(); - if (se_opt) MMalign_se_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, - a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); - else MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, - a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); + if (outfmt_opt == 0) + print_version(); + if (se_opt) + MMalign_se_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, + a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); + else + MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, + a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); /* clean up everything */ - delete [] assign1_list; - delete [] assign2_list; - DeleteArray(&TMave_mat,chain_num); - DeleteArray(&ut_mat, chain1_num*chain2_num); - vector >().swap(seqxA_mat); - vector >().swap(seqM_mat); - vector >().swap(seqyA_mat); + delete[] assign1_list; + delete[] assign2_list; + DeleteArray(&TMave_mat, chain_num); + DeleteArray(&ut_mat, chain1_num * chain2_num); + vector>().swap(seqxA_mat); + vector>().swap(seqM_mat); + vector>().swap(seqyA_mat); vector().swap(tmp_str_vec); - delete [] assign1_init; - delete [] assign2_init; - DeleteArray(&TMave_init,chain1_num); - vector >().swap(seqxA_init); - vector >().swap(seqyA_init); - - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - vector().swap(chainID_list1); // list of chainID1 - vector().swap(chainID_list2); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 - vector ().swap(resi_vec1); // residue index for chain1 - vector ().swap(resi_vec2); // residue index for chain2 - map ().swap(chainmap); + delete[] assign1_init; + delete[] assign2_init; + DeleteArray(&TMave_init, chain1_num); + vector>().swap(seqxA_init); + vector>().swap(seqyA_init); + + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + vector().swap(chainID_list1); // list of chainID1 + vector().swap(chainID_list2); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 + vector().swap(resi_vec1); // residue index for chain1 + vector().swap(resi_vec2); // residue index for chain2 + map().swap(chainmap); return 1; } - /* alignment individual chains to a complex. */ -int MMdock(const string &xname, const string &yname, const string &fname_super, - const string &fname_matrix, vector &sequence, const double Lnorm_ass, - const double d0_scale, const bool m_opt, const int o_opt, - const int a_opt, const bool u_opt, const bool d_opt, - const double TMcut, const int infmt1_opt, const int infmt2_opt, - const int ter_opt, const int split_opt, const int outfmt_opt, - bool fast_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const vector &chain1_list, const vector &chain2_list, - const bool do_opt) +int MMdock(const string &xname, const string &yname, const string &fname_super, + const string &fname_matrix, vector &sequence, const double Lnorm_ass, + const double d0_scale, const bool m_opt, const int o_opt, + const int a_opt, const bool u_opt, const bool d_opt, + const double TMcut, const int infmt1_opt, const int infmt2_opt, + const int ter_opt, const int split_opt, const int outfmt_opt, + bool fast_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const vector &chain1_list, const vector &chain2_list, + const bool do_opt) { /* declare previously global variables */ - vector > > xa_vec; // structure of complex1 - vector > > ya_vec; // structure of complex2 - vector >seqx_vec; // sequence of complex1 - vector >seqy_vec; // sequence of complex2 - vector >secx_vec; // secondary structure of complex1 - vector >secy_vec; // secondary structure of complex2 - vector mol_vec1; // molecule type of complex1, RNA if >0 - vector mol_vec2; // molecule type of complex2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - vector xlen_vec; // length of complex1 - vector ylen_vec; // length of complex2 - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int xlen_aa,ylen_aa; // total length of protein - int xlen_na,ylen_na; // total length of RNA/DNA - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 + vector>> xa_vec; // structure of complex1 + vector>> ya_vec; // structure of complex2 + vector> seqx_vec; // sequence of complex1 + vector> seqy_vec; // sequence of complex2 + vector> secx_vec; // secondary structure of complex1 + vector> secy_vec; // secondary structure of complex2 + vector mol_vec1; // molecule type of complex1, RNA if >0 + vector mol_vec2; // molecule type of complex2, RNA if >0 + vector chainID_list1; // list of chainID1 + vector chainID_list2; // list of chainID2 + vector xlen_vec; // length of complex1 + vector ylen_vec; // length of complex2 + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int xlen_aa, ylen_aa; // total length of protein + int xlen_na, ylen_na; // total length of RNA/DNA + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 /* parse complex */ parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1, - xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, - atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, - resi_vec1, chain2parse1, model2parse1); - if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 individual chain"); + xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, + atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, + resi_vec1, chain2parse1, model2parse1); + if (xa_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 individual chain"); parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2, - ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, - atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, resi_vec2, - chain2parse2, model2parse2); - if (xa_vec.size()>ya_vec.size()) PrintErrorAndQuit( - "ERROR! more individual chains to align than number of chains in complex template"); - int len_aa=getmin(xlen_aa,ylen_aa); - int len_na=getmin(xlen_na,ylen_na); + ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, + atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, resi_vec2, + chain2parse2, model2parse2); + if (xa_vec.size() > ya_vec.size()) + PrintErrorAndQuit( + "ERROR! more individual chains to align than number of chains in complex template"); + int len_aa = getmin(xlen_aa, ylen_aa); + int len_na = getmin(xlen_na, ylen_na); if (a_opt) { - len_aa=(xlen_aa+ylen_aa)/2; - len_na=(xlen_na+ylen_na)/2; + len_aa = (xlen_aa + ylen_aa) / 2; + len_na = (xlen_na + ylen_na) / 2; } /* perform monomer alignment if there is only one chain */ - if (xa_vec.size()==1 && ya_vec.size()==1) + if (xa_vec.size() == 1 && ya_vec.size() == 1) { xlen = xlen_vec[0]; ylen = ylen_vec[0]; - seqx = new char[xlen+1]; - seqy = new char[ylen+1]; - secx = new char[xlen+1]; - secy = new char[ylen+1]; + seqx = new char[xlen + 1]; + seqy = new char[ylen + 1]; + secx = new char[xlen + 1]; + secy = new char[ylen + 1]; NewArray(&xa, xlen, 3); NewArray(&ya, ylen, 3); - copy_chain_data(xa_vec[0],seqx_vec[0],secx_vec[0], xlen,xa,seqx,secx); - copy_chain_data(ya_vec[0],seqy_vec[0],secy_vec[0], ylen,ya,seqy,secy); - + copy_chain_data(xa_vec[0], seqx_vec[0], secx_vec[0], xlen, xa, seqx, secx); + copy_chain_data(ya_vec[0], seqy_vec[0], secy_vec[0], ylen, ya, seqy, secy); + /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; /* entry function for structure alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 0, a_opt, u_opt, d_opt, fast_opt, - mol_vec1[0]+mol_vec2[0],TMcut); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + 0, a_opt, u_opt, d_opt, fast_opt, + mol_vec1[0] + mol_vec2[0], TMcut, 0); /* print result */ output_results( @@ -1288,191 +1341,199 @@ int MMdock(const string &xname, const string &yname, const string &fname_super, xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out, seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B, - Lnorm_ass, d0_scale, d0a, d0u, (m_opt?fname_matrix:"").c_str(), - (outfmt_opt==2?outfmt_opt:3), ter_opt, true, split_opt, o_opt, fname_super, + Lnorm_ass, d0_scale, d0a, d0u, (m_opt ? fname_matrix : "").c_str(), + (outfmt_opt == 2 ? outfmt_opt : 3), ter_opt, true, split_opt, o_opt, fname_super, 0, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2); - if (outfmt_opt==2) printf("%s%s\t%s%s\t%.4f\n", - xname.substr(dir1_opt.size()).c_str(), chainID_list1[0].c_str(), - yname.substr(dir2_opt.size()).c_str(), chainID_list2[0].c_str(), - sqrt((TM1*TM1+TM2*TM2)/2)); + if (outfmt_opt == 2) + printf("%s%s\t%s%s\t%.4f\n", + xname.substr(dir1_opt.size()).c_str(), chainID_list1[0].c_str(), + yname.substr(dir2_opt.size()).c_str(), chainID_list2[0].c_str(), + sqrt((TM1 * TM1 + TM2 * TM2) / 2)); /* clean up */ seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqx; - delete[]seqy; - delete[]secx; - delete[]secy; - DeleteArray(&xa,xlen); - DeleteArray(&ya,ylen); + delete[] seqx; + delete[] seqy; + delete[] secx; + delete[] secy; + DeleteArray(&xa, xlen); + DeleteArray(&ya, ylen); do_vec.clear(); - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - chainID_list1.clear(); // list of chainID1 - chainID_list2.clear(); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + chainID_list1.clear(); // list of chainID1 + chainID_list2.clear(); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 return 0; } /* declare TM-score tables */ - int chain1_num=xa_vec.size(); - int chain2_num=ya_vec.size(); - vector tmp_str_vec(chain2_num,""); + int chain1_num = xa_vec.size(); + int chain2_num = ya_vec.size(); + vector tmp_str_vec(chain2_num, ""); double **TMave_mat; - NewArray(&TMave_mat,chain1_num,chain2_num); - vector >seqxA_mat(chain1_num,tmp_str_vec); - vector > seqM_mat(chain1_num,tmp_str_vec); - vector >seqyA_mat(chain1_num,tmp_str_vec); + NewArray(&TMave_mat, chain1_num, chain2_num); + vector> seqxA_mat(chain1_num, tmp_str_vec); + vector> seqM_mat(chain1_num, tmp_str_vec); + vector> seqyA_mat(chain1_num, tmp_str_vec); /* trimComplex */ - vector > > ya_trim_vec; // structure of complex2 - vector >seqy_trim_vec; // sequence of complex2 - vector >secy_trim_vec; // secondary structure of complex2 - vector ylen_trim_vec; // length of complex2 - int Lchain_aa_max1=0; - int Lchain_na_max1=0; - for (i=0;i>> ya_trim_vec; // structure of complex2 + vector> seqy_trim_vec; // sequence of complex2 + vector> secy_trim_vec; // secondary structure of complex2 + vector ylen_trim_vec; // length of complex2 + int Lchain_aa_max1 = 0; + int Lchain_na_max1 = 0; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (mol_vec1[i]>0 && xlen>Lchain_na_max1) Lchain_na_max1=xlen; - else if (mol_vec1[i]<=0 && xlen>Lchain_aa_max1) Lchain_aa_max1=xlen; + xlen = xlen_vec[i]; + if (mol_vec1[i] > 0 && xlen > Lchain_na_max1) + Lchain_na_max1 = xlen; + else if (mol_vec1[i] <= 0 && xlen > Lchain_aa_max1) + Lchain_aa_max1 = xlen; } - int trim_chain_count=trimComplex(ya_trim_vec,seqy_trim_vec, - secy_trim_vec,ylen_trim_vec,ya_vec,seqy_vec,secy_vec,ylen_vec, - mol_vec2,Lchain_aa_max1,Lchain_na_max1); - int ylen_trim; // chain length - double **ya_trim; // structure of single chain - char *seqy_trim; // for the protein sequence - char *secy_trim; // for the secondary structure + int trim_chain_count = trimComplex(ya_trim_vec, seqy_trim_vec, + secy_trim_vec, ylen_trim_vec, ya_vec, seqy_vec, secy_vec, ylen_vec, + mol_vec2, Lchain_aa_max1, Lchain_na_max1); + int ylen_trim; // chain length + double **ya_trim; // structure of single chain + char *seqy_trim; // for the protein sequence + char *secy_trim; // for the secondary structure double **xt; /* get all-against-all alignment */ - if (len_aa+len_na>500) fast_opt=true; - for (i=0;i 500) + fast_opt = true; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (xlen<3) + xlen = xlen_vec[i]; + if (xlen < 3) { - for (j=0;j do_vec; - int Lnorm_tmp=len_aa; - if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na; + int Lnorm_tmp = len_aa; + if (mol_vec1[i] + mol_vec2[j] > 0) + Lnorm_tmp = len_na; /* entry function for structure alignment */ - if (trim_chain_count && ylen_trim_vec[j] > >().swap(ya_trim_vec); - vector >().swap(seqy_trim_vec); - vector >().swap(secy_trim_vec); - vector ().swap(ylen_trim_vec); + vector>>().swap(ya_trim_vec); + vector>().swap(seqy_trim_vec); + vector>().swap(secy_trim_vec); + vector().swap(ylen_trim_vec); /* calculate initial chain-chain assignment */ int *assign1_list; // value is index of assigned chain2 int *assign2_list; // value is index of assigned chain1 - assign1_list=new int[chain1_num]; - assign2_list=new int[chain2_num]; + assign1_list = new int[chain1_num]; + assign2_list = new int[chain2_num]; enhanced_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num); + assign2_list, chain1_num, chain2_num); /* final alignment */ - if (outfmt_opt==0) print_version(); + if (outfmt_opt == 0) + print_version(); double **ut_mat; // rotation matrices for all-against-all alignment - NewArray(&ut_mat,chain1_num,4*3); - int ui,uj; - vectorxname_vec; - vectoryname_vec; - vectorTM_vec; - for (i=0;i xname_vec; + vector yname_vec; + vector TM_vec; + for (i = 0; i < chain1_num; i++) { - j=assign1_list[i]; - xname_vec.push_back(xname+chainID_list1[i]); - if (j<0) + j = assign1_list[i]; + xname_vec.push_back(xname + chainID_list1[i]); + if (j < 0) { - cerr<<"Warning! "< do_vec; int c; - for (c=0; c().swap(TM_vec); vector().swap(xname_vec); vector().swap(yname_vec); - delete [] assign1_list; - delete [] assign2_list; - DeleteArray(&TMave_mat,chain1_num); - DeleteArray(&ut_mat, chain1_num); - vector >().swap(seqxA_mat); - vector >().swap(seqM_mat); - vector >().swap(seqyA_mat); + delete[] assign1_list; + delete[] assign2_list; + DeleteArray(&TMave_mat, chain1_num); + DeleteArray(&ut_mat, chain1_num); + vector>().swap(seqxA_mat); + vector>().swap(seqM_mat); + vector>().swap(seqyA_mat); vector().swap(tmp_str_vec); - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - vector().swap(chainID_list1); // list of chainID1 - vector().swap(chainID_list2); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + vector().swap(chainID_list1); // list of chainID1 + vector().swap(chainID_list2); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 return 1; } int mTMalign(string &xname, string &yname, const string &fname_super, - const string &fname_matrix, - vector &sequence, double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - bool u_opt, const bool d_opt, const bool full_opt, const double TMcut, - const int infmt_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, bool fast_opt, - const int het_opt, const string &atom_opt, const bool autojustify, - const string &mol_opt, const string &dir_opt, const int byresi_opt, - const vector &chain_list, const vector &chain2parse, - const vector &model2parse, const bool se_opt) + const string &fname_matrix, + vector &sequence, double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + bool u_opt, const bool d_opt, const bool full_opt, const double TMcut, + const int infmt_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, bool fast_opt, + const int het_opt, const string &atom_opt, const bool autojustify, + const string &mol_opt, const string &dir_opt, const int byresi_opt, + const vector &chain_list, const vector &chain2parse, + const vector &model2parse, const bool se_opt) { /* declare previously global variables */ - vector > >a_vec; // atomic structure - vector > >ua_vec; // unchanged atomic structure - vector >seq_vec; // sequence of complex - vector >sec_vec; // secondary structure of complex - vector mol_vec; // molecule type of complex1, RNA if >0 - vector chainID_list; // list of chainID - vector len_vec; // length of complex - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int len_aa,len_na; // total length of protein and RNA/DNA - vector resi_vec; // residue index for chain + vector>> a_vec; // atomic structure + vector>> ua_vec; // unchanged atomic structure + vector> seq_vec; // sequence of complex + vector> sec_vec; // secondary structure of complex + vector mol_vec; // molecule type of complex1, RNA if >0 + vector chainID_list; // list of chainID + vector len_vec; // length of complex + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int len_aa, len_na; // total length of protein and RNA/DNA + vector resi_vec; // residue index for chain /* parse chain list */ parse_chain_list(chain_list, a_vec, seq_vec, sec_vec, mol_vec, - len_vec, chainID_list, ter_opt, split_opt, mol_opt, infmt_opt, - atom_opt, autojustify, false, het_opt, len_aa, len_na, o_opt, - resi_vec, chain2parse, model2parse); - int chain_num=a_vec.size(); - if (chain_num<=1) PrintErrorAndQuit("ERROR! <2 chains for multiple alignment"); - if (m_opt||o_opt) for (i=0;ixlen) xlen=len_vec[i]; - total_len+=len_vec[i]; - mol_type+=mol_vec[i]; + if (len_vec[i] > xlen) + xlen = len_vec[i]; + total_len += len_vec[i]; + mol_type += mol_vec[i]; } - if (!u_opt) Lnorm_ass=total_len/chain_num; - u_opt=true; - total_len-=xlen; - if (total_len>750) fast_opt=true; + if (!u_opt) + Lnorm_ass = total_len / chain_num; + u_opt = true; + total_len -= xlen; + if (total_len > 750) + fast_opt = true; /* get all-against-all alignment */ double **TMave_mat; - NewArray(&TMave_mat,chain_num,chain_num); - vector tmp_str_vec(chain_num,""); - vector >seqxA_mat(chain_num,tmp_str_vec); - vector >seqyA_mat(chain_num,tmp_str_vec); - for (i=0;i tmp_str_vec(chain_num, ""); + vector> seqxA_mat(chain_num, tmp_str_vec); + vector> seqyA_mat(chain_num, tmp_str_vec); + for (i = 0; i < chain_num; i++) + for (j = 0; j < chain_num; j++) + TMave_mat[i][j] = 0; + for (i = 0; i < chain_num; i++) { - xlen=len_vec[i]; - if (xlen<3) continue; - seqx = new char[xlen+1]; - secx = new char[xlen+1]; + xlen = len_vec[i]; + if (xlen < 3) + continue; + seqx = new char[xlen + 1]; + secx = new char[xlen + 1]; NewArray(&xa, xlen, 3); - copy_chain_data(a_vec[i],seq_vec[i],sec_vec[i],xlen,xa,seqx,secx); - seqxA_mat[i][i]=seqyA_mat[i][i]=(string)(seqx); - for (j=i+1;j do_vec; /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 0, false, u_opt, false, mol_type, outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + 0, false, u_opt, false, mol_type, outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2xname_vec; - for (i=0;iyname_vec; + int repr_idx = 0; + vector xname_vec; + for (i = 0; i < chain_num; i++) + xname_vec.push_back( + chain_list[i].substr(dir_opt.size()) + chainID_list[i]); + vector yname_vec; double *TMave_list; TMave_list = new double[chain_num]; int *assign_list; - assign_list=new int[chain_num]; - vector msa(ylen,""); // row is position along msa; column is sequence + assign_list = new int[chain_num]; + vector msa(ylen, ""); // row is position along msa; column is sequence int compare_num; double TM1_total, TM2_total; - double TM3_total, TM4_total, TM5_total; // for a_opt, u_opt, d_opt + double TM3_total, TM4_total, TM5_total; // for a_opt, u_opt, d_opt double d0_0_total, TM_0_total; double d0A_total, d0B_total, d0u_total, d0a_total; double d0_out_total; double rmsd0_total; - int L_ali_total; // Aligned length in standard_TMscore + int L_ali_total; // Aligned length in standard_TMscore double Liden_total; - double TM_ali_total, rmsd_ali_total; // TMscore and rmsd in standard_TMscore + double TM_ali_total, rmsd_ali_total; // TMscore and rmsd in standard_TMscore int n_ali_total; int n_ali8_total; int xlen_total, ylen_total; - double TM4_total_max=0; - - int max_iter=5-(int)(total_len/200); - if (max_iter<2) max_iter=2; - int iter=0; - vector TM_vec(chain_num,0); - vector d0_vec(chain_num,0); - vector seqID_vec(chain_num,0); - vector > TM_mat(chain_num,TM_vec); - vector > d0_mat(chain_num,d0_vec); - vector > seqID_mat(chain_num,seqID_vec); - for (iter=0; iter TM_vec(chain_num, 0); + vector d0_vec(chain_num, 0); + vector seqID_vec(chain_num, 0); + vector> TM_mat(chain_num, TM_vec); + vector> d0_mat(chain_num, d0_vec); + vector> seqID_mat(chain_num, seqID_vec); + for (iter = 0; iter < max_iter; iter++) { - /* select representative */ - for (j=0; j do_vec; /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 2, a_opt, u_opt, d_opt, mol_type, outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + 2, a_opt, u_opt, d_opt, mol_type, outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2 msa_ext; // row is position along msa; column is sequence - for (r=0;r msa_ext; // row is position along msa; column is sequence + for (r = 0; r < ylen; r++) + msa[r] = seqy[r]; + // for (r=0;r do_vec; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, - do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 0, a_opt, u_opt, d_opt, mol_type, 1, invmap); - - int rx=0,ry=0; - ylen_ext=seqxA.size(); - NewArray(&ya_ext, ylen_ext, 3); // structure of single chain - seqy_ext= new char[ylen_ext+1]; // for the protein sequence - secy_ext= new char[ylen_ext+1]; // for the secondary structure - string tmp_gap=""; - for (r=0;r().swap(msa_ext); - vector >().swap(TM_pair_vec); - for (i=0; i>().swap(TM_pair_vec); + for (i = 0; i < chain_num; i++) { - tm_idx=assign_list[i]; - if (tm_idx<0) continue; - seqyA_mat[i][i]=""; - for (r=0 ;r do_vec; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, - do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - true, a_opt, u_opt, d_opt, mol_type, 1, invmap); + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, + do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + true, a_opt, u_opt, d_opt, mol_type, 1, invmap); - if (xlen<=ylen) + if (xlen <= ylen) { - xlen_total+=xlen; - ylen_total+=ylen; - TM1_total+=TM1; - TM2_total+=TM2; - d0A_total+=d0A; - d0B_total+=d0B; + xlen_total += xlen; + ylen_total += ylen; + TM1_total += TM1; + TM2_total += TM2; + d0A_total += d0A; + d0B_total += d0B; } else { - xlen_total+=ylen; - ylen_total+=xlen; - TM1_total+=TM2; - TM2_total+=TM1; - d0A_total+=d0B; - d0B_total+=d0A; + xlen_total += ylen; + ylen_total += xlen; + TM1_total += TM2; + TM2_total += TM1; + d0A_total += d0B; + d0B_total += d0A; } - TM_mat[i][j]=TM2; - TM_mat[j][i]=TM1; - d0_mat[i][j]=d0B; - d0_mat[j][i]=d0A; - seqID_mat[i][j]=1.*Liden/xlen; - seqID_mat[j][i]=1.*Liden/ylen; - - TM3_total+=TM3; - TM4_total+=TM4; - TM5_total+=TM5; - d0_0_total+=d0_0; - TM_0_total+=TM_0; - d0u_total+=d0u; - d0_out_total+=d0_out; - rmsd0_total+=rmsd0; - L_ali_total+=L_ali; // Aligned length in standard_TMscore - Liden_total+=Liden; - TM_ali_total+=TM_ali; - rmsd_ali_total+=rmsd_ali; // TMscore and rmsd in standard_TMscore - n_ali_total+=n_ali; - n_ali8_total+=n_ali8; + TM_mat[i][j] = TM2; + TM_mat[j][i] = TM1; + d0_mat[i][j] = d0B; + d0_mat[j][i] = d0A; + seqID_mat[i][j] = 1. * Liden / xlen; + seqID_mat[j][i] = 1. * Liden / ylen; + + TM3_total += TM3; + TM4_total += TM4; + TM5_total += TM5; + d0_0_total += d0_0; + TM_0_total += TM_0; + d0u_total += d0u; + d0_out_total += d0_out; + rmsd0_total += rmsd0; + L_ali_total += L_ali; // Aligned length in standard_TMscore + Liden_total += Liden; + TM_ali_total += TM_ali; + rmsd_ali_total += rmsd_ali; // TMscore and rmsd in standard_TMscore + n_ali_total += n_ali; + n_ali8_total += n_ali8; /* clean up */ - delete[]invmap; + delete[] invmap; seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); do_vec.clear(); } - delete[]seqx; - delete[]secx; - DeleteArray(&xa,xlen); + delete[] seqx; + delete[] secx; + DeleteArray(&xa, xlen); } - if (TM4_total<=TM4_total_max) break; - TM4_total_max=TM4_total; + if (TM4_total <= TM4_total_max) + break; + TM4_total_max = TM4_total; } - for (i=0;i"<" << xname_vec[i] << "\tL=" << len_vec[i] + << "\td0=" << setiosflags(ios::fixed) << setprecision(2) << d0_vec[i] + << "\tseqID=" << setiosflags(ios::fixed) << setprecision(3) << seqID_vec[i] + << "\tTM-score=" << setiosflags(ios::fixed) << setprecision(5) << TM_vec[i]; + if (i == repr_idx) + buf << "\t*"; + buf << '\n' + << seqxA_mat[i][i] << endl; } - seqM=buf.str(); - seqM=seqM.substr(0,seqM.size()-1); + seqM = buf.str(); + seqM = seqM.substr(0, seqM.size() - 1); buf.str(string()); - //MergeAlign(seqxA_mat,seqyA_mat,repr_idx,xname_vec,chain_num,seqM); - if (outfmt_opt==0) print_version(); - output_mTMalign_results( xname,yname, "","", - xlen_total, ylen_total, t0, u0, TM1_total, TM2_total, - TM3_total, TM4_total, TM5_total, rmsd0_total, d0_out_total, - seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden_total, - n_ali8_total, L_ali_total, TM_ali_total, rmsd_ali_total, - TM_0_total, d0_0_total, d0A_total, d0B_total, - Lnorm_ass, d0_scale, d0a_total, d0u_total, - "", outfmt_opt, ter_opt, 0, split_opt, false, - "", false, a_opt, u_opt, d_opt, false, - resi_vec, resi_vec ); + // MergeAlign(seqxA_mat,seqyA_mat,repr_idx,xname_vec,chain_num,seqM); + if (outfmt_opt == 0) + print_version(); + output_mTMalign_results(xname, yname, "", "", + xlen_total, ylen_total, t0, u0, TM1_total, TM2_total, + TM3_total, TM4_total, TM5_total, rmsd0_total, d0_out_total, + seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden_total, + n_ali8_total, L_ali_total, TM_ali_total, rmsd_ali_total, + TM_0_total, d0_0_total, d0A_total, d0B_total, + Lnorm_ass, d0_scale, d0a_total, d0u_total, + "", outfmt_opt, ter_opt, 0, split_opt, false, + "", false, a_opt, u_opt, d_opt, false, + resi_vec, resi_vec); if (m_opt || o_opt) { double **ut_mat; // rotation matrices for all-against-all alignment - int ui,uj; + int ui, uj; double t[3], u[3][3]; double rmsd; - NewArray(&ut_mat,chain_num,4*3); - for (i=0;i > >().swap(ua_vec); + vector>>().swap(ua_vec); if (m_opt) { - assign_list[repr_idx]=-1; + assign_list[repr_idx] = -1; output_dock_rotation_matrix(fname_matrix.c_str(), - xname_vec,yname_vec, ut_mat, assign_list); + xname_vec, yname_vec, ut_mat, assign_list); } - //if (o_opt) output_dock(chain_list, ter_opt, split_opt, - //infmt_opt, atom_opt, false, ut_mat, fname_super); - if (o_opt) output_mTMalign_pymol(chain_list, - infmt_opt, ut_mat, fname_super, o_opt); - - DeleteArray(&ut_mat,chain_num); + // if (o_opt) output_dock(chain_list, ter_opt, split_opt, + // infmt_opt, atom_opt, false, ut_mat, fname_super); + if (o_opt) + output_mTMalign_pymol(chain_list, + infmt_opt, ut_mat, fname_super, o_opt); + + DeleteArray(&ut_mat, chain_num); } /* clean up */ vector().swap(msa); vector().swap(tmp_str_vec); - vector >().swap(seqxA_mat); - vector >().swap(seqyA_mat); + vector>().swap(seqxA_mat); + vector>().swap(seqyA_mat); vector().swap(xname_vec); vector().swap(yname_vec); - delete[]TMave_list; - DeleteArray(&TMave_mat,chain_num); - vector > >().swap(a_vec); // structure of complex - vector >().swap(seq_vec); // sequence of complex - vector >().swap(sec_vec); // secondary structure of complex - vector().swap(mol_vec); // molecule type of complex1, RNA if >0 - vector().swap(chainID_list); // list of chainID - vector().swap(len_vec); // length of complex + delete[] TMave_list; + DeleteArray(&TMave_mat, chain_num); + vector>>().swap(a_vec); // structure of complex + vector>().swap(seq_vec); // sequence of complex + vector>().swap(sec_vec); // secondary structure of complex + vector().swap(mol_vec); // molecule type of complex1, RNA if >0 + vector().swap(chainID_list); // list of chainID + vector().swap(len_vec); // length of complex vector().swap(TM_vec); vector().swap(d0_vec); vector().swap(seqID_vec); - vector >().swap(TM_mat); - vector >().swap(d0_mat); - vector >().swap(seqID_mat); + vector>().swap(TM_mat); + vector>().swap(d0_mat); + vector>().swap(seqID_mat); return 1; } /* sequence order independent alignment */ int SOIalign(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int cp_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, - const string &dir2_opt, const vector &chain2parse1, - const vector &chain2parse2, const vector &model2parse1, - const vector &model2parse2, const vector &chain1_list, - const vector &chain2_list, const bool se_opt, - const int closeK_opt, const int mm_opt) + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int cp_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, + const string &dir2_opt, const vector &chain2parse1, + const vector &chain2parse2, const vector &model2parse1, + const vector &model2parse2, const vector &chain1_list, + const vector &chain2_list, const bool se_opt, + const int closeK_opt, const int mm_opt) { /* declare previously global variables */ - vector >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 vector mol_vec1; // molecule type of chain1, RNA if >0 vector mol_vec2; // molecule type of chain2, RNA if >0 vector chainID_list1; // list of chainID1 vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int **secx_bond; // boundary of secondary structure - int **secy_bond; // boundary of secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - double **xk, **yk; // k closest residues - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=0; // whether to read residue index - if (o_opt) read_resi=2; + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int **secx_bond; // boundary of secondary structure + int **secy_bond; // boundary of secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + double **xk, **yk; // k closest residues + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = 0; // whether to read residue index + if (o_opt) + read_resi = 2; /* loop over file names */ - for (i=0;i=3) NewArray(&xk, xlen*closeK_opt, 3); + if (closeK_opt >= 3) + NewArray(&xk, xlen * closeK_opt, 3); seqx = new char[xlen + 1]; secx = new char[xlen + 1]; - xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, - resi_vec1, read_resi); - if (mirror_opt) for (r=0;r0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment - if (closeK_opt>=3) getCloseK(xa, xlen, closeK_opt, xk); - if (mm_opt==6) + xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, + resi_vec1, read_resi); + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; + if (mol_vec1[chain_i] > 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment + if (closeK_opt >= 3) + getCloseK(xa, xlen, closeK_opt, xk); + if (mm_opt == 6) { NewArray(&secx_bond, xlen, 2); assign_sec_bond(secx_bond, secx, xlen); } - for (j=(dir_opt.size()>0)*(i+1);j 0) * (i + 1); j < chain2_list.size(); j++) { - if (dirpair_opt.size() && i!=j) continue; + if (dirpair_opt.size() && i != j) + continue; /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) { - cerr<<"Warning! Cannot parse file: "<=3) NewArray(&yk, ylen*closeK_opt, 3); + if (closeK_opt >= 3) + NewArray(&yk, ylen * closeK_opt, 3); seqy = new char[ylen + 1]; secy = new char[ylen + 1]; ylen = read_PDB(PDB_lines2[chain_j], ya, seqy, - resi_vec2, read_resi); - if (mol_vec2[chain_j]>0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); - if (closeK_opt>=3) getCloseK(ya, ylen, closeK_opt, yk); - if (mm_opt==6) + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); + if (closeK_opt >= 3) + getCloseK(ya, ylen, closeK_opt, yk); + if (mm_opt == 6) { NewArray(&secy_bond, ylen, 2); assign_sec_bond(secy_bond, secy, ylen); @@ -2575,111 +2711,117 @@ int SOIalign(string &xname, string &yname, const string &fname_super, /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; - bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt; - int *invmap = new int[ylen+1]; - double *dist_list = new double[ylen+1]; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + int *invmap = new int[ylen + 1]; + double *dist_list = new double[ylen + 1]; /* entry function for structure alignment */ - if (se_opt) + if (se_opt) { - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; soi_se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, - mol_vec1[chain_i]+mol_vec2[chain_j], - outfmt_opt, invmap, dist_list, - secx_bond, secy_bond, mm_opt); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], + outfmt_opt, invmap, dist_list, + secx_bond, secy_bond, mm_opt); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2=3) DeleteArray(&yk, ylen*closeK_opt); - delete [] seqy; - delete [] secy; + if (closeK_opt >= 3) + DeleteArray(&yk, ylen * closeK_opt); + delete[] seqy; + delete[] secy; resi_vec2.clear(); - if (mm_opt==6) DeleteArray(&secy_bond, ylen); + if (mm_opt == 6) + DeleteArray(&secy_bond, ylen); } // chain_j - if (chain2_list.size()>1) + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j=3) DeleteArray(&xk, xlen*closeK_opt); - delete [] seqx; - delete [] secx; + if (closeK_opt >= 3) + DeleteArray(&xk, xlen * closeK_opt); + delete[] seqx; + delete[] secx; resi_vec1.clear(); - if (mm_opt==6) DeleteArray(&secx_bond, xlen); + if (mm_opt == 6) + DeleteArray(&secx_bond, xlen); } // chain_i xname.clear(); PDB_lines1.clear(); chainID_list1.clear(); mol_vec1.clear(); } // i - if (chain2_list.size()==1) + if (chain2_list.size() == 1) { yname.clear(); - for (chain_j=0;chain_j &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int mirror_opt, const int het_opt, const string &atom_opt, - const bool autojustify, const string &mol_opt, const string &dir_opt, - const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const int byresi_opt, const vector &chain1_list, - const vector &chain2_list, const int hinge_opt) +// Unified engine replacing flexalign_greedy and flexalign_fatcat +int flexalign_unified(string &xname, string &yname, const string &fname_super, + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int mirror_opt, const int het_opt, const string &atom_opt, + const bool autojustify, const string &mol_opt, const string &dir_opt, + const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const int byresi_opt, const vector &chain1_list, + const vector &chain2_list, const int hinge_opt, const int ss_opt, + FlexAlignMode mode = FLEX_STANDARD, bool hinge_set = false) { - /* declare previously global variables */ - vector >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 - vector mol_vec1; // molecule type of chain1, RNA if >0 - vector mol_vec2; // molecule type of chain2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=byresi_opt; // whether to read residue index - if (byresi_opt==0 && o_opt) read_resi=2; - - /* loop over file names */ - for (i=0;i> PDB_lines1; + vector> PDB_lines2; + vector mol_vec1; + vector mol_vec2; + vector chainID_list1; + vector chainID_list2; + int i, j, chain_i, chain_j, r, xlen, ylen, xchainnum, ychainnum; + char *seqx, *seqy, *secx, *secy; + double **xa, **ya; + vector resi_vec1; + vector resi_vec2; + int read_resi = byresi_opt; + if (byresi_opt == 0 && o_opt) + read_resi = 2; + + for (i = 0; i < chain1_list.size(); i++) { - /* parse chain 1 */ - xname=chain1_list[i]; - xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1, - mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse1, model2parse1); + xname = chain1_list[i]; + xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, + mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse1, model2parse1); if (!xchainnum) { - cerr<<"Warning! Cannot parse file: "<0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment + read_PDB(PDB_lines1[chain_i], xa, seqx, resi_vec1, read_resi); + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; + (mol_vec1[chain_i] > 0) ? make_sec(seqx, xa, xlen, secx, atom_opt) : make_sec(xa, xlen, secx); - for (j=(dir_opt.size()>0)*(i+1);j 0) * (i + 1); j < chain2_list.size(); j++) { - if (dirpair_opt.size() && i!=j) continue; - /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (dirpair_opt.size() && i != j) + continue; + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) - { - cerr<<"Warning! Cannot parse file: "<0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); + read_PDB(PDB_lines2[chain_j], ya, seqy, resi_vec2, read_resi); + (mol_vec2[chain_j] > 0) ? make_sec(seqy, ya, ylen, secy, atom_opt) : make_sec(ya, ylen, secy); - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); - - /* declare variable specific to this pair of TMalign */ - double t0[3], u0[3][3]; - double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt - double d0_0, TM_0; - double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment - double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; - bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt; - vector >tu_vec; - vector do_vec; + if (byresi_opt) + extract_aln_from_resi(sequence, seqx, seqy, resi_vec1, resi_vec2, byresi_opt); - /* entry function for structure alignment */ - int hingeNum=flexalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],hinge_opt); - - if (hinge_opt && hingeNum<=1 && - n_ali8<0.6*getmin(xlen,ylen)) + // --- CORE DISPATCH LOGIC START --- + if (mode == FLEX_FATCAT) { - double t0_h[3], u0_h[3][3]; - double TM1_h, TM2_h; - double TM3_h, TM4_h, TM5_h; - double d0_0_h, TM_0_h; - double d0_out_h=5.0; - string seqM_h, seqxA_h, seqyA_h; - double rmsd0_h = 0.0; - int L_ali_h; - double Liden_h=0; - double TM_ali_h, rmsd_ali_h; - int n_ali_h=0; - int n_ali8_h=0; - vector >tu_vec_h(1,tu_vec[0]); - vector do_vec_h; - tu2t_u(tu_vec[0],t0_h,u0_h); - - int hingeNum_h=flexalign_main( + FlexAlignResult fatcat_res; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + + fatcat_res.hingeNum = flexalign_fatcat_main( xa, ya, seqx, seqy, secx, secy, - t0_h, u0_h, tu_vec_h, - TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, - d0_0_h, TM_0_h, d0A, d0B, d0u, d0a, d0_out_h, - seqM_h, seqxA_h, seqyA_h, do_vec_h, rmsd0_h, L_ali_h, - Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h, - xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, - a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],hinge_opt); - - double TM =(TM1 >TM2 )?TM1 :TM2; - double TM_h=(TM1_h>TM2_h)?TM1_h:TM2_h; - if (TM_h>TM) + fatcat_res.t0, fatcat_res.u0, fatcat_res.tu_vec, + fatcat_res.TM1, fatcat_res.TM2, fatcat_res.TM3, fatcat_res.TM4, fatcat_res.TM5, + fatcat_res.d0_0, fatcat_res.TM_0, + fatcat_res.d0A, fatcat_res.d0B, fatcat_res.d0u, fatcat_res.d0a, fatcat_res.d0_out, + fatcat_res.seqM, fatcat_res.seqxA, fatcat_res.seqyA, fatcat_res.do_vec, + fatcat_res.rmsd0, fatcat_res.L_ali, fatcat_res.Liden, + fatcat_res.TM_ali, fatcat_res.rmsd_ali, fatcat_res.n_ali, fatcat_res.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt, 0, hinge_set); + + if (outfmt_opt == 0) + print_version(); + output_flexalign_results( + xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), + chainID_list1[chain_i], chainID_list2[chain_j], + xlen, ylen, fatcat_res.t0, fatcat_res.u0, fatcat_res.tu_vec, + fatcat_res.TM1, fatcat_res.TM2, fatcat_res.TM3, fatcat_res.TM4, fatcat_res.TM5, + fatcat_res.rmsd0, fatcat_res.d0_out, fatcat_res.seqM.c_str(), + fatcat_res.seqxA.c_str(), fatcat_res.seqyA.c_str(), fatcat_res.Liden, + fatcat_res.n_ali8, fatcat_res.L_ali, fatcat_res.TM_ali, fatcat_res.rmsd_ali, + fatcat_res.TM_0, fatcat_res.d0_0, + fatcat_res.d0A, fatcat_res.d0B, Lnorm_ass, d0_scale, fatcat_res.d0a, fatcat_res.d0u, + (m_opt ? fname_matrix : "").c_str(), + outfmt_opt, ter_opt, false, split_opt, o_opt, + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, + resi_vec1, resi_vec2); + } + else + { + // === Standard & Best specific logic === + FlexAlignResult best_res; + double global_max_TM = -1.0; + + int start_ss = (mode == FLEX_BEST) ? 0 : ss_opt; + int end_ss = (mode == FLEX_BEST) ? 1 : ss_opt; + + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + + for (int cur_ss_opt = start_ss; cur_ss_opt <= end_ss; cur_ss_opt++) { - hingeNum=hingeNum_h; - tu2t_u(tu_vec_h[0],t0,u0); - TM1=TM1_h; - TM2=TM2_h; - TM3=TM3_h; - TM4=TM4_h; - TM5=TM5_h; - d0_0=d0_0_h; - TM_0=TM_0_h; - d0_out=d0_out_h; - seqM=seqM_h; - seqxA=seqxA_h; - seqyA=seqyA_h; - rmsd0=rmsd0_h; - L_ali=L_ali_h; - Liden=Liden_h; - TM_ali=TM_ali_h; - rmsd_ali=rmsd_ali_h; - n_ali=n_ali_h; - n_ali8=n_ali8_h; - for (int hinge=0;hinge cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; + if (cur_max_TM > global_max_TM) + { + global_max_TM = cur_max_TM; + best_res = cur_res; + } } - else tu2t_u(tu_vec[0],t0,u0); - do_vec_h.clear(); - } - /* print result */ - if (outfmt_opt==0) print_version(); - output_flexalign_results( - xname.substr(dir1_opt.size()+dir_opt.size()+dirpair_opt.size()), - yname.substr(dir2_opt.size()+dir_opt.size()+dirpair_opt.size()), - chainID_list1[chain_i], chainID_list2[chain_j], - xlen, ylen, t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, - rmsd0, d0_out, seqM.c_str(), - seqxA.c_str(), seqyA.c_str(), Liden, - n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, - d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, - (m_opt?fname_matrix:"").c_str(), - outfmt_opt, ter_opt, false, split_opt, o_opt, - fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, - resi_vec1, resi_vec2); + if (outfmt_opt == 0) + print_version(); + output_flexalign_results( + xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), + chainID_list1[chain_i], chainID_list2[chain_j], + xlen, ylen, best_res.t0, best_res.u0, best_res.tu_vec, best_res.TM1, best_res.TM2, best_res.TM3, best_res.TM4, best_res.TM5, + best_res.rmsd0, best_res.d0_out, best_res.seqM.c_str(), + best_res.seqxA.c_str(), best_res.seqyA.c_str(), best_res.Liden, + best_res.n_ali8, best_res.L_ali, best_res.TM_ali, best_res.rmsd_ali, best_res.TM_0, best_res.d0_0, + best_res.d0A, best_res.d0B, Lnorm_ass, d0_scale, best_res.d0a, best_res.d0u, + (m_opt ? fname_matrix : "").c_str(), + outfmt_opt, ter_opt, false, split_opt, o_opt, + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, + resi_vec1, resi_vec2); + } + // --- CORE DISPATCH LOGIC END --- - /* Done! Free memory */ - tu_vec.clear(); - seqM.clear(); - seqxA.clear(); - seqyA.clear(); + // Cleanup memory DeleteArray(&ya, ylen); - delete [] seqy; - delete [] secy; + delete[] seqy; + delete[] secy; resi_vec2.clear(); - do_vec.clear(); - } // chain_j - if (chain2_list.size()>1) + } + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt) +// { +// return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, ss_opt, FLEX_STANDARD); +// } + +int flexalign_greedy(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) { - if (argc < 2) print_help(); + return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt is ignored in BEST mode */, FLEX_BEST); +} + +int flexalign_fatcat(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, bool hinge_set = false) +{ + return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt ignore */, FLEX_FATCAT, hinge_set); +} +int main(int argc, char *argv[]) +{ + if (argc < 2) + print_help(); clock_t t1, t2; t1 = clock(); @@ -3003,67 +3102,69 @@ int main(int argc, char *argv[]) /**********************/ /* get argument */ /**********************/ - string xname = ""; - string yname = ""; - string fname_super = ""; // file name for superposed structure - string fname_lign = ""; // file name for user alignment - string fname_matrix= ""; // file name for output matrix - vector sequence; // get value from alignment file + string xname = ""; + string yname = ""; + string fname_super = ""; // file name for superposed structure + string fname_lign = ""; // file name for user alignment + string fname_matrix = ""; // file name for output matrix + vector sequence; // get value from alignment file double Lnorm_ass, d0_scale; - bool h_opt = false; // print full help message - bool v_opt = false; // print version - bool m_opt = false; // flag for -m, output rotation matrix - int i_opt = 0; // 1 for -i, 3 for -I - int o_opt = 0; // 1 for -o, 2 for -rasmol, 3 for -chimerax - int a_opt = 0; // flag for -a, do not normalized by average length - bool u_opt = false; // flag for -u, normalized by user specified length - bool d_opt = false; // flag for -d, user specified d0 - bool do_opt= false; // flag for -do, output distance of i-th aligned pair - - bool full_opt = false;// do not show chain level alignment - double TMcut =-1; - bool se_opt =false; - int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1 - int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2 - int ter_opt =-1; // default change to 2 (END, or different chainID) - int split_opt =-1; // default change to 2 (split each chains) - int outfmt_opt=0; // set -outfmt to full output - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int cp_opt =0; // do not check circular permutation - int closeK_opt=-1; // number of atoms for SOI initial alignment. - // 5 and 0 for -mm 5 and 6 - int hinge_opt =9; // maximum number of hinge allowed for flexible - int mirror_opt=0; // do not align mirror - int het_opt=0; // do not read HETATM residues - int mm_opt=0; // do not perform MM-align - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir_opt =""; // set -dir to empty - string dirpair_opt=""; // set -dirpair to empty - string dir1_opt =""; // set -dir1 to empty - string dir2_opt =""; // set -dir2 to empty - string chainmapfile=""; // chain mapping between two complexes - int byresi_opt=0; // set -byresi to 0 + bool h_opt = false; // print full help message + bool v_opt = false; // print version + bool m_opt = false; // flag for -m, output rotation matrix + int i_opt = 0; // 1 for -i, 3 for -I + int o_opt = 0; // 1 for -o, 2 for -rasmol, 3 for -chimerax + int a_opt = 0; // flag for -a, do not normalized by average length + bool u_opt = false; // flag for -u, normalized by user specified length + bool d_opt = false; // flag for -d, user specified d0 + bool do_opt = false; // flag for -do, output distance of i-th aligned pair + + bool full_opt = false; // do not show chain level alignment + double TMcut = -1; + bool se_opt = false; + int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 + int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 + int ter_opt = -1; // default change to 2 (END, or different chainID) + int split_opt = -1; // default change to 2 (split each chains) + int outfmt_opt = 0; // set -outfmt to full output + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int cp_opt = 0; // do not check circular permutation + int closeK_opt = -1; // number of atoms for SOI initial alignment. + // 5 and 0 for -mm 5 and 6 + int hinge_opt = 9; // maximum number of hinge allowed for flexible + bool hinge_set = false; + int mirror_opt = 0; // do not align mirror + int het_opt = 0; // do not read HETATM residues + int mm_opt = 0; // do not perform MM-align + bool fatcat_opt = false; // flag for -fatcat, only valid with -mm 7 + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir_opt = ""; // set -dir to empty + string dirpair_opt = ""; // set -dirpair to empty + string dir1_opt = ""; // set -dir1 to empty + string dir2_opt = ""; // set -dir2 to empty + string chainmapfile = ""; // chain mapping between two complexes + int byresi_opt = 0; // set -byresi to 0 vector chain1_list; // only when -dir1 is set vector chain2_list; // only when -dir2 is set vector chain2parse1; vector chain2parse2; vector model2parse1; vector model2parse2; - vector > chain_pair_list; // only when -dirpair is set + vector> chain_pair_list; // only when -dirpair is set - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( !strcmp(argv[i],"-o") ) + if (!strcmp(argv[i], "-o")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -o"); - if (o_opt==2) - cerr<<"Warning! -rasmol is already set. Ignore -o"<=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -rasmol"); - if (o_opt==1) - cerr<<"Warning! -o is already set. Ignore -rasmol"<=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chimerax"); - if (o_opt==1) - cerr<<"Warning! -o is already set. Ignore -chimerax"<=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -u or -L"); - Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++; - if (Lnorm_ass<=0) PrintErrorAndQuit( - "ERROR! The value for -u or -L should be >0"); + Lnorm_ass = atof(argv[i + 1]); + u_opt = true; + i++; + if (Lnorm_ass <= 0) + PrintErrorAndQuit( + "ERROR! The value for -u or -L should be >0"); } - else if ( !strcmp(argv[i],"-a") ) + else if (!strcmp(argv[i], "-a")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -a"); - if (!strcmp(argv[i + 1], "T")) a_opt=true; - else if (!strcmp(argv[i + 1], "F")) a_opt=false; - else + if (!strcmp(argv[i + 1], "T")) + a_opt = true; + else if (!strcmp(argv[i + 1], "F")) + a_opt = false; + else { - a_opt=atoi(argv[i + 1]); - if (a_opt!=-2 && a_opt!=-1 && a_opt!=1) + a_opt = atoi(argv[i + 1]); + if (a_opt != -2 && a_opt != -1 && a_opt != 1) PrintErrorAndQuit("-a must be -2, -1, 1, T or F"); } i++; } - else if ( !strcmp(argv[i],"-full") ) + else if (!strcmp(argv[i], "-full")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -full"); - if (!strcmp(argv[i + 1], "T")) full_opt=true; - else if (!strcmp(argv[i + 1], "F")) full_opt=false; - else PrintErrorAndQuit("-full must be T or F"); + if (!strcmp(argv[i + 1], "T")) + full_opt = true; + else if (!strcmp(argv[i + 1], "F")) + full_opt = false; + else + PrintErrorAndQuit("-full must be T or F"); i++; } - else if ( !strcmp(argv[i],"-d") ) + else if (!strcmp(argv[i], "-d")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -d"); - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if ( !strcmp(argv[i],"-closeK") ) + else if (!strcmp(argv[i], "-closeK")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -closeK"); - closeK_opt = atoi(argv[i + 1]); i++; + closeK_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-hinge") ) + else if (!strcmp(argv[i], "-hinge")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -hinge"); - hinge_opt = atoi(argv[i + 1]); i++; + hinge_set = true; + hinge_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-v") ) + else if (!strcmp(argv[i], "-v")) { v_opt = true; } - else if ( !strcmp(argv[i],"-do") ) + else if (!strcmp(argv[i], "-do")) { do_opt = true; } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } - else if ( !strcmp(argv[i],"-i") ) + else if (!strcmp(argv[i], "-i")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -i"); - if (i_opt==3) + if (i_opt == 3) PrintErrorAndQuit("ERROR! -i and -I cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 1; i++; + fname_lign = argv[i + 1]; + i_opt = 1; + i++; } - else if (!strcmp(argv[i], "-I") ) + else if (!strcmp(argv[i], "-I")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -I"); - if (i_opt==1) + if (i_opt == 1) PrintErrorAndQuit("ERROR! -I and -i cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 3; i++; + fname_lign = argv[i + 1]; + i_opt = 3; + i++; } - else if (!strcmp(argv[i], "-chainmap") ) + else if (!strcmp(argv[i], "-chainmap")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chainmap"); - chainmapfile = argv[i + 1]; i++; + chainmapfile = argv[i + 1]; + i++; } - else if (!strcmp(argv[i], "-chain1") ) + else if (!strcmp(argv[i], "-chain1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain1"); - split(argv[i+1],chain2parse1,','); + split(argv[i + 1], chain2parse1, ','); i++; } - else if (!strcmp(argv[i], "-chain2") ) + else if (!strcmp(argv[i], "-chain2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain2"); - split(argv[i+1],chain2parse2,','); + split(argv[i + 1], chain2parse2, ','); i++; } - else if (!strcmp(argv[i], "-model1") ) + else if (!strcmp(argv[i], "-model1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model1"); - split(argv[i+1],model2parse1,','); + split(argv[i + 1], model2parse1, ','); i++; } - else if (!strcmp(argv[i], "-model2") ) + else if (!strcmp(argv[i], "-model2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model2"); - split(argv[i+1],model2parse2,','); + split(argv[i + 1], model2parse2, ','); i++; } - else if (!strcmp(argv[i], "-m") ) + else if (!strcmp(argv[i], "-m")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -m"); - fname_matrix = argv[i + 1]; m_opt = true; i++; - }// get filename for rotation matrix + fname_matrix = argv[i + 1]; + m_opt = true; + i++; + } // get filename for rotation matrix else if (!strcmp(argv[i], "-fast")) { fast_opt = true; @@ -3226,394 +3347,476 @@ int main(int argc, char *argv[]) { se_opt = true; } - else if ( !strcmp(argv[i],"-infmt1") ) + else if (!strcmp(argv[i], "-infmt1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -infmt1"); - infmt1_opt=atoi(argv[i + 1]); i++; - if (infmt1_opt<-1 || infmt1_opt>3) + infmt1_opt = atoi(argv[i + 1]); + i++; + if (infmt1_opt < -1 || infmt1_opt > 3) PrintErrorAndQuit("ERROR! -infmt1 can only be -1, 0, 1, 2, or 3"); } - else if ( !strcmp(argv[i],"-infmt2") ) + else if (!strcmp(argv[i], "-infmt2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -infmt2"); - infmt2_opt=atoi(argv[i + 1]); i++; - if (infmt2_opt<-1 || infmt2_opt>3) + infmt2_opt = atoi(argv[i + 1]); + i++; + if (infmt2_opt < -1 || infmt2_opt > 3) PrintErrorAndQuit("ERROR! -infmt2 can only be -1, 0, 1, 2, or 3"); } - else if ( !strcmp(argv[i],"-ter") ) + else if (!strcmp(argv[i], "-ter")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -ter"); - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") ) + else if (!strcmp(argv[i], "-split")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -split"); - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") ) + else if (!strcmp(argv[i], "-atom")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -atom"); - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") ) + else if (!strcmp(argv[i], "-mol")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -mol"); - mol_opt=argv[i + 1]; i++; - if (mol_opt=="prot") mol_opt="protein"; - else if (mol_opt=="DNA") mol_opt="RNA"; - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + mol_opt = argv[i + 1]; + i++; + if (mol_opt == "prot") + mol_opt = "protein"; + else if (mol_opt == "DNA") + mol_opt = "RNA"; + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be one of the " - "following:\nauto, prot (the same as 'protein'), and " - "RNA (the same as 'DNA')."); + "following:\nauto, prot (the same as 'protein'), and " + "RNA (the same as 'DNA')."); } - else if ( !strcmp(argv[i],"-dir") ) + else if (!strcmp(argv[i], "-dir")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dir"); - dir_opt=argv[i + 1]; i++; + dir_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dirpair") ) + else if (!strcmp(argv[i], "-dirpair")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dirpair"); - dirpair_opt=argv[i + 1]; i++; + dirpair_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir1") ) + else if (!strcmp(argv[i], "-dir1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dir1"); - dir1_opt=argv[i + 1]; i++; + dir1_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir2") ) + else if (!strcmp(argv[i], "-dir2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dir2"); - dir2_opt=argv[i + 1]; i++; + dir2_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-suffix") ) + else if (!strcmp(argv[i], "-suffix")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -suffix"); - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-outfmt") ) + else if (!strcmp(argv[i], "-outfmt")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -outfmt"); - outfmt_opt=atoi(argv[i + 1]); i++; + outfmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-TMcut") ) + else if (!strcmp(argv[i], "-TMcut")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -TMcut"); - TMcut=atof(argv[i + 1]); i++; + TMcut = atof(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-byresi") || - !strcmp(argv[i],"-tmscore") || - !strcmp(argv[i],"-TMscore")) + else if (!strcmp(argv[i], "-byresi") || + !strcmp(argv[i], "-tmscore") || + !strcmp(argv[i], "-TMscore")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -byresi"); - byresi_opt=atoi(argv[i + 1]); i++; + byresi_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-seq") ) + else if (!strcmp(argv[i], "-seq")) { - byresi_opt=5; + byresi_opt = 5; } - else if ( !strcmp(argv[i],"-cp") ) + else if (!strcmp(argv[i], "-cp")) { - mm_opt=3; + mm_opt = 3; } - else if ( !strcmp(argv[i],"-mirror") ) + else if (!strcmp(argv[i], "-mirror")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -mirror"); - mirror_opt=atoi(argv[i + 1]); i++; + mirror_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") ) + else if (!strcmp(argv[i], "-het")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -het"); - het_opt=atoi(argv[i + 1]); i++; - if (het_opt!=0 && het_opt!=1 && het_opt!=2) + het_opt = atoi(argv[i + 1]); + i++; + if (het_opt != 0 && het_opt != 1 && het_opt != 2) PrintErrorAndQuit("-het must be 0, 1, or 2"); } - else if ( !strcmp(argv[i],"-mm") ) + else if (!strcmp(argv[i], "-mm")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -mm"); - mm_opt=atoi(argv[i + 1]); i++; + mm_opt = atoi(argv[i + 1]); + i++; } - else if (xname.size() == 0) xname=argv[i]; - else if (yname.size() == 0) yname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (!strcmp(argv[i], "-fatcat")) + { + fatcat_opt = true; + } + else if (xname.size() == 0) + xname = argv[i]; + else if (yname.size() == 0) + yname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if (xname.size()==0 || (yname.size() && dir_opt.size()) || + if (xname.size() == 0 || (yname.size() && dir_opt.size()) || (yname.size() && dirpair_opt.size()) || - (yname.size()==0 && dir_opt.size()==0 && dirpair_opt.size()==0)) + (yname.size() == 0 && dir_opt.size() == 0 && dirpair_opt.size() == 0)) { - if (h_opt) print_help(h_opt); + if (h_opt) + print_help(h_opt); if (v_opt) { print_version(); exit(EXIT_FAILURE); } - if (xname.size()==0) + if (xname.size() == 0) PrintErrorAndQuit("Please provide input structures"); - else if (yname.size()==0 && dir_opt.size()==0 && dirpair_opt.size()==0 && mm_opt!=4) + else if (yname.size() == 0 && dir_opt.size() == 0 && dirpair_opt.size() == 0 && mm_opt != 4) PrintErrorAndQuit("Please provide structure B"); - else if (yname.size() && dir_opt.size()+dirpair_opt.size()) + else if (yname.size() && dir_opt.size() + dirpair_opt.size()) PrintErrorAndQuit("Please provide only one file name if -dir is set"); } - if (suffix_opt.size() && dir_opt.size()+dirpair_opt.size()+dir1_opt.size()+dir2_opt.size()==0) + if (suffix_opt.size() && dir_opt.size() + dirpair_opt.size() + dir1_opt.size() + dir2_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set"); if ((dir_opt.size() || dirpair_opt.size() || dir1_opt.size() || dir2_opt.size())) { - if (mm_opt!=2 && mm_opt!=4) + if (mm_opt != 2 && mm_opt != 4) { if (o_opt) PrintErrorAndQuit("-o cannot be set with -dir, -dir1 or -dir2"); - if (m_opt && fname_matrix!="-") + if (m_opt && fname_matrix != "-") PrintErrorAndQuit("-m can only be - or unset when using -dir, -dir1 or -dir2"); } - else if ((dir_opt.size() || dirpair_opt.size() )&& (dir1_opt.size() || dir2_opt.size())) + else if ((dir_opt.size() || dirpair_opt.size()) && (dir1_opt.size() || dir2_opt.size())) PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2"); else if (dir_opt.size() && dirpair_opt.size()) PrintErrorAndQuit("-dir cannot be set with -dirpair"); } - if (o_opt && (infmt1_opt!=-1 && infmt1_opt!=0 && infmt1_opt!=3)) + if (o_opt && (infmt1_opt != -1 && infmt1_opt != 0 && infmt1_opt != 3)) PrintErrorAndQuit("-o can only be used with -infmt1 -1, 0 or 3"); - bool autojustify=(atom_opt=="auto" || atom_opt=="PC4'"); // auto re-pad atom name - if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; - if (atom_opt.size()!=4) + bool autojustify = (atom_opt == "auto" || atom_opt == "PC4'"); // auto re-pad atom name + if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; + if (atom_opt.size() != 4) { - cerr<<"ERROR! Atom name must have 4 characters, including space.\n" - "For example, C alpha, C3' and P atoms should be specified by\n" - "-atom \" CA \", -atom \" P \" and -atom \" C3'\", respectively."<=5 || atom_opt.size()==0) return 1; - else if (atom_opt.size()==1) atom_opt=" "+atom_opt+" "; - else if (atom_opt.size()==2) atom_opt=" "+atom_opt+" "; - else if (atom_opt.size()==3) atom_opt=" "+atom_opt; - cerr<<"Change -atom to \""<= 5 || atom_opt.size() == 0) + return 1; + else if (atom_opt.size() == 1) + atom_opt = " " + atom_opt + " "; + else if (atom_opt.size() == 2) + atom_opt = " " + atom_opt + " "; + else if (atom_opt.size() == 3) + atom_opt = " " + atom_opt; + cerr << "Change -atom to \"" << atom_opt << "\"" << endl; } - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (outfmt_opt>=2 && (a_opt || u_opt || d_opt)) + if (outfmt_opt >= 2 && (a_opt || u_opt || d_opt)) PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d"); - if (byresi_opt!=0) + if (byresi_opt != 0) { if (i_opt) PrintErrorAndQuit("-TMscore >=1 cannot be used with -i or -I"); - if (byresi_opt<0 || byresi_opt>7) + if (byresi_opt < 0 || byresi_opt > 7) PrintErrorAndQuit("-TMscore can only be 0 to 7"); - if ((byresi_opt==2 || byresi_opt==3 || byresi_opt==6) && ter_opt>=2) + if ((byresi_opt == 2 || byresi_opt == 3 || byresi_opt == 6) && ter_opt >= 2) PrintErrorAndQuit("-TMscore 2 and 6 must be used with -ter <=1"); } - //if (split_opt==1 && ter_opt!=0) - //PrintErrorAndQuit("-split 1 should be used with -ter 0"); - //else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) - //PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); - if (split_opt<0) - if (byresi_opt==2 || byresi_opt==3) split_opt=0; - else split_opt=2; - else if (split_opt>2) + // if (split_opt==1 && ter_opt!=0) + // PrintErrorAndQuit("-split 1 should be used with -ter 0"); + // else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) + // PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); + if (split_opt < 0) + if (byresi_opt == 2 || byresi_opt == 3) + split_opt = 0; + else + split_opt = 2; + else if (split_opt > 2) PrintErrorAndQuit("-split can only be 0, 1 or 2"); - if (mm_opt==3) + if (mm_opt == 3) { - cp_opt=true; - mm_opt=0; + cp_opt = true; + mm_opt = 0; } if (cp_opt && i_opt) PrintErrorAndQuit("-mm 3 cannot be used with -i or -I"); - if (mirror_opt && het_opt!=1) - cerr<<"WARNING! -mirror was not used with -het 1. " - <<"D amino acids may not be correctly aligned."<=2 && (mm_opt==1 || mm_opt==2)) PrintErrorAndQuit("-mm 1 or 2 must be used with -ter 0 or -ter 1"); - if (mm_opt==4 && (yname.size() || dir2_opt.size())) - cerr<<"WARNING! structure_2 is ignored for -mm 4"<= 2 && (mm_opt == 1 || mm_opt == 2)) + PrintErrorAndQuit("-mm 1 or 2 must be used with -ter 0 or -ter 1"); + if (mm_opt == 4 && (yname.size() || dir2_opt.size())) + cerr << "WARNING! structure_2 is ignored for -mm 4" << endl; + if (dirpair_opt.size() && (mm_opt == 2 || mm_opt == 4)) PrintErrorAndQuit("-mm 2 or 4 cannot be used with -dirpair"); } - else if (full_opt) PrintErrorAndQuit("-full can only be used with -mm"); + else if (full_opt) + PrintErrorAndQuit("-full can only be used with -mm"); - if (o_opt && ter_opt<=1 && split_opt==2) + if (o_opt && ter_opt <= 1 && split_opt == 2) { - if (mm_opt && o_opt==2) cerr<<"WARNING! -mm may generate incorrect" - <<" RasMol output due to limitations in PDB file format. " - <<"When -mm is used, -o is recommended over -rasmol"<=10) + if (mm_opt >= 7 && hinge_opt >= 10) PrintErrorAndQuit("ERROR! -hinge must be <10"); - if (chainmapfile.size() && mm_opt!=1) - PrintErrorAndQuit("ERROR! -chainmap must be used with -mm 1"); + if (fatcat_opt && mm_opt != 7) + PrintErrorAndQuit("ERROR! -fatcat parameter can only be used when -mm 7 is set"); + if (chainmapfile.size() && mm_opt != 1) + PrintErrorAndQuit("ERROR! -chainmap must be used with -mm 1"); /* read initial alignment file from 'align.txt' */ - if (i_opt) read_user_alignment(sequence, fname_lign, i_opt); + if (i_opt) + read_user_alignment(sequence, fname_lign, i_opt); - if (byresi_opt==6 || byresi_opt==7) mm_opt=1; - else if (byresi_opt) i_opt=3; + if (byresi_opt == 6 || byresi_opt == 7) + mm_opt = 1; + else if (byresi_opt) + i_opt = 3; if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt PrintErrorAndQuit("ERROR! Please provide a file name for option -m!"); /* parse file list */ - int i; + int i; if (dirpair_opt.size()) - file2chainpairlist(chain1_list,chain2_list, xname, dirpair_opt, suffix_opt); + file2chainpairlist(chain1_list, chain2_list, xname, dirpair_opt, suffix_opt); else { - if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname); - else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt); + if (dir1_opt.size() + dir_opt.size() == 0) + chain1_list.push_back(xname); + else + file2chainlist(chain1_list, xname, dir_opt + dir1_opt, suffix_opt); if (dir_opt.size()) - for (i=0;i= 7) + cout << "#PDBchain1\tPDBchain2\tTM1\tTM2\t" + << "RMSD\tID1\tID2\tIDali\tL1\tL2\tLali\tNblk" << endl; + else + cout << "#PDBchain1\tPDBchain2\tTM1\tTM2\t" + << "RMSD\tID1\tID2\tIDali\tL1\tL2\tLali" << endl; } - /* real alignment. entry functions are MMalign_main and + /* real alignment. entry functions are MMalign_main and * TMalign_main */ - if (mm_opt==0) TMalign(xname, yname, fname_super, fname_lign, fname_matrix, - sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, - u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, cp_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, se_opt, do_opt); - else if (mm_opt==1) - { - if (dir_opt.size()>0 || dir1_opt.size()>0 || dir2_opt.size()>0) - { - for (int ii=0; ii 0 || dir1_opt.size() > 0 || dir2_opt.size() > 0) + { + for (int ii = 0; ii < chain1_list.size(); ii++) { xname = chain1_list[ii]; vector tmp_vec1(1, xname); - for (int jj=0; jj0 && jj<=ii) continue; + if (dir_opt.size() > 0 && jj <= ii) + continue; yname = chain2_list[jj]; vector tmp_vec2(1, yname); MMalign(xname, yname, fname_super, - fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, - a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, - ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, - het_opt, atom_opt, autojustify, mol_opt, - dir_opt+dir1_opt, dir_opt+dir2_opt, - chain2parse1, chain2parse2, model2parse1, model2parse2, - tmp_vec1, tmp_vec2, byresi_opt, chainmapfile, se_opt); + fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, + a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, + ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, + het_opt, atom_opt, autojustify, mol_opt, + dir_opt + dir1_opt, dir_opt + dir2_opt, + chain2parse1, chain2parse2, model2parse1, model2parse2, + tmp_vec1, tmp_vec2, byresi_opt, chainmapfile, se_opt); vector().swap(tmp_vec2); } vector().swap(tmp_vec1); } } - else if (dirpair_opt.size()==0) MMalign(xname, yname, fname_super, - fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, - a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, - ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir1_opt, dir2_opt, - chain2parse1, chain2parse2, model2parse1, model2parse2, - chain1_list, chain2_list, byresi_opt,chainmapfile, se_opt); + else if (dirpair_opt.size() == 0) + MMalign(xname, yname, fname_super, + fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, + a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, + ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir1_opt, dir2_opt, + chain2parse1, chain2parse2, model2parse1, model2parse2, + chain1_list, chain2_list, byresi_opt, chainmapfile, se_opt); else { vector tmp_vec1; vector tmp_vec2; - for (i=0;i().swap(chain1_list); @@ -3623,10 +3826,11 @@ int main(int argc, char *argv[]) vector().swap(model2parse1); vector().swap(model2parse2); vector().swap(sequence); - vector >().swap(chain_pair_list); + vector>().swap(chain_pair_list); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; - if (outfmt_opt<2) printf("#Total CPU time is %5.2f seconds\n", diff); + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; + if (outfmt_opt < 2) + printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } diff --git a/flexalign.h b/flexalign.h index e2f8db0..e5134d5 100644 --- a/flexalign.h +++ b/flexalign.h @@ -5,378 +5,397 @@ #include "TMalign.h" -void t_u2tu(double t0[3],double u0[3][3], vector &tu_tmp) +void t_u2tu(double t0[3], double u0[3][3], vector &tu_tmp) { - int i,j,k; - for (i=0;i<3;i++) tu_tmp[i]=t0[i]; - k=3; - for (i=0;i<3;i++) for (j=0;j<3;j++) - { - tu_tmp[k]=u0[i][j]; - k++; - } + int i, j, k; + for (i = 0; i < 3; i++) + tu_tmp[i] = t0[i]; + k = 3; + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + { + tu_tmp[k] = u0[i][j]; + k++; + } } -void tu2t_u(vector tu_tmp, double t0[3],double u0[3][3]) +void tu2t_u(vector tu_tmp, double t0[3], double u0[3][3]) { - int i,j,k; - for (i=0;i<3;i++) t0[i]=tu_tmp[i]; - k=3; - for (i=0;i<3;i++) for (j=0;j<3;j++) - { - u0[i][j]=tu_tmp[k]; - k++; - } + int i, j, k; + for (i = 0; i < 3; i++) + t0[i] = tu_tmp[i]; + k = 3; + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + { + u0[i][j] = tu_tmp[k]; + k++; + } } void aln2invmap(const string &seqxA, const string &seqyA, int *invmap) { - int i,j,r; - int ylen=0; - for (r=0;r >&tu_vec, - double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, - double &d0_0, double &TM_0, - double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, - string &seqM, string &seqxA, string &seqyA, vector&do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const int hinge_opt) + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], vector> &tu_vec, + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + string &seqM, string &seqxA, string &seqyA, vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt) { - vector tu_tmp(12,0); - int round2=tu_vec.size(); - if (round2==0) + vector tu_tmp(12, 0); + int round2 = tu_vec.size(); + if (round2 == 0) { TMalign_main(xa, ya, seqx, seqy, secx, secy, t0, u0, - TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, - d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, - d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type); - - t_u2tu(t0,u0,tu_tmp); + TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, + d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, + d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type, -1, ss_opt); + + t_u2tu(t0, u0, tu_tmp); tu_vec.push_back(tu_tmp); } - - int i,j,r; - int* invmap=new int[ylen+1]; - for (j=0;j TM2_h) ? TM1_h : TM2_h; + double TM = (TM1 > TM2) ? TM1 : TM2; + if (TM_h > TM) + { + TM1 = TM1_h; + TM2 = TM2_h; + TM3 = TM3_h; + TM4 = TM4_h; + TM5 = TM5_h; + seqM = seqM_h; + seqxA = seqxA_h; + seqyA = seqyA_h; + rmsd0 = rmsd0_h; + n_ali = n_ali_h; + n_ali8 = n_ali8_h; + for (j = 0; j < ylen + 1; j++) + invmap[j] = invmap_h[j]; + } + else + t_u2tu(t0, u0, tu_vec[0]); - double TM_h=(TM1_h>TM2_h)?TM1_h:TM2_h; - double TM =(TM1 >TM2 )?TM1 :TM2 ; - if (TM_h>TM) - { - TM1=TM1_h; - TM2=TM2_h; - TM3=TM3_h; - TM4=TM4_h; - TM5=TM5_h; - seqM=seqM_h; - seqxA=seqxA_h; - seqyA=seqyA_h; - rmsd0=rmsd0_h; - n_ali=n_ali_h; - n_ali8=n_ali8_h; - for (j=0;j r1toi(xlen_h,0); - vector r2toj(ylen_h,0); - - int r1,r2; - i=j=-1; - r1=r2=0; - for (r=0;r r1toi(xlen_h, 0); + vector r2toj(ylen_h, 0); + + int r1, r2; + i = j = -1; + r1 = r2 = 0; + for (r = 0; r < seqxA.size(); r++) + { + i += (seqxA[r] != '-'); + j += (seqyA[r] != '-'); + if (seqyA[r] == '-') + { + seqx_h[r1] = seqx[i]; + secx_h[r1] = secx[i]; + xa_h[r1][0] = xa[i][0]; + xa_h[r1][1] = xa[i][1]; + xa_h[r1][2] = xa[i][2]; + r1toi[r1] = i; r1++; } - if (seqxA[r]=='-') + if (seqxA[r] == '-') { - seqy_h[r2]=seqx[j]; - secy_h[r2]=secx[j]; - ya_h[r2][0]=ya[j][0]; - ya_h[r2][1]=ya[j][1]; - ya_h[r2][2]=ya[j][2]; - r2toj[r2]=j; + seqy_h[r2] = seqx[j]; + secy_h[r2] = secx[j]; + ya_h[r2][0] = ya[j][0]; + ya_h[r2][1] = ya[j][1]; + ya_h[r2][2] = ya[j][2]; + r2toj[r2] = j; r2++; } } - + double TM1_h, TM2_h; - double TM3_h, TM4_h, TM5_h; // for a_opt, u_opt, d_opt + double TM3_h, TM4_h, TM5_h; // for a_opt, u_opt, d_opt double d0_0_h, TM_0_h; double d0A_h, d0B_h, d0u_h, d0a_h; - double d0_out_h=5.0; - string seqM_h, seqxA_h, seqyA_h;// for output alignment + double d0_out_h = 5.0; + string seqM_h, seqxA_h, seqyA_h; // for output alignment double rmsd0_h = 0.0; - int L_ali_h=0; // Aligned length in standard_TMscore - double Liden_h=0; - double TM_ali_h, rmsd_ali_h; // TMscore and rmsd in standard_TMscore - int n_ali_h=0; - int n_ali8_h=0; + int L_ali_h = 0; // Aligned length in standard_TMscore + double Liden_h = 0; + double TM_ali_h, rmsd_ali_h; // TMscore and rmsd in standard_TMscore + int n_ali_h = 0; + int n_ali8_h = 0; TMalign_main(xa_h, ya_h, seqx_h, seqy_h, secx_h, secy_h, t0, u0, - TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, d0_0_h, TM_0_h, d0A_h, d0B_h, - d0u_h, d0a_h, d0_out_h, seqM_h, seqxA_h, seqyA_h, do_vec, - rmsd0_h, L_ali_h, Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h, - xlen_h, ylen_h, sequence, Lnorm_ass, - d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type); - + TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, d0_0_h, TM_0_h, d0A_h, d0B_h, + d0u_h, d0a_h, d0_out_h, seqM_h, seqxA_h, seqyA_h, do_vec, + rmsd0_h, L_ali_h, Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h, + xlen_h, ylen_h, sequence, Lnorm_ass, + d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type, -1, ss_opt); + do_rotation(xa, xt, xlen, t0, u0); - - TM1_h=TM1; - TM2_h=TM2; - TM3_h=TM3; - TM4_h=TM4; - TM5_h=TM5; - seqM_h=seqM; - seqxA_h=seqxA; - seqyA_h=seqyA; - rmsd0_h=rmsd0; - n_ali_h=n_ali; - n_ali8_h=n_ali8; - int* invmap_h=new int[ylen+1]; - for (j=0;j=5) - { - TM1=TM1_h; - TM2=TM2_h; - TM3=TM3_h; - TM4=TM4_h; - TM5=TM5_h; - seqM=seqM_h; - seqxA=seqxA_h; - seqyA=seqyA_h; - rmsd0=rmsd0_h; - n_ali=n_ali_h; - n_ali8=n_ali8_h; - t_u2tu(t0,u0,tu_tmp); + d0A, d0B, d0u, d0a, d0_out, seqM_h, seqxA_h, seqyA_h, do_vec, + rmsd0_h, L_ali, Liden, TM_ali, rmsd_ali, n_ali_h, n_ali8_h, + xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, + a_opt, u_opt, d_opt, mol_type, 0, invmap_h, hinge + 1); + int new_ali = 0; + for (r = 0; r < seqM_h.size(); r++) + new_ali += (seqM_h[r] == hinge + '1'); + if (n_ali8_h - n_ali8 < 5) + new_ali = 0; + if (new_ali >= 5) + { + TM1 = TM1_h; + TM2 = TM2_h; + TM3 = TM3_h; + TM4 = TM4_h; + TM5 = TM5_h; + seqM = seqM_h; + seqxA = seqxA_h; + seqyA = seqyA_h; + rmsd0 = rmsd0_h; + n_ali = n_ali_h; + n_ali8 = n_ali8_h; + t_u2tu(t0, u0, tu_tmp); tu_vec.push_back(tu_tmp); - for (j=0;jhinge="<=0) cout<<"("<hinge="<=0) cout<<"("< seqM_char(ylen,' '); - vector di_vec(ylen,-1); + vector seqM_char(ylen, ' '); + vector di_vec(ylen, -1); double d; - for (hinge=tu_vec.size()-1;hinge>=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - tu2t_u(tu_vec[hinge],t0,u0); + tu2t_u(tu_vec[hinge], t0, u0); do_rotation(xa, xt, xlen, t0, u0); - for (j=0;j=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r0 && (seqM[r-1]==hinge+'0' || seqM[r-1]==' ')) continue; - if (r0 && seqM[r-1]!=seqM[r+1]) continue; - if (r>0) seqM[r]=seqM_char[j]=seqM[r-1]; - else seqM[r]=seqM_char[j]=seqM[r+1]; + if (r < seqM.size() - 1 && (seqM[r + 1] == hinge + '0' || seqM[r + 1] == ' ')) + continue; + if (r > 0 && (seqM[r - 1] == hinge + '0' || seqM[r - 1] == ' ')) + continue; + if (r < seqM.size() - 1 && r > 0 && seqM[r - 1] != seqM[r + 1]) + continue; + if (r > 0) + seqM[r] = seqM_char[j] = seqM[r - 1]; + else + seqM[r] = seqM_char[j] = seqM[r + 1]; } } /* smooth out AFP assignment: remove singleton at the end of fragment */ - char left_hinge=' '; - char right_hinge=' '; - for (hinge=tu_vec.size()-1;hinge>=0;hinge--) + char left_hinge = ' '; + char right_hinge = ' '; + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r0 && seqM[r-1]==' ' && r 0 && seqM[r - 1] == ' ' && r < seqM.size() - 1 && seqM[r + 1] == ' ') continue; - - left_hinge=' '; - for (i=r-1;i>=0;i--) + + left_hinge = ' '; + for (i = r - 1; i >= 0; i--) { - if (seqM[i]==' ') continue; - left_hinge=seqM[i]; + if (seqM[i] == ' ') + continue; + left_hinge = seqM[i]; break; } - if (left_hinge==hinge+'0') continue; - - right_hinge=' '; - for (i=r+1;i=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r 0 && (seqM[r - 1] == ' ' || seqM[r - 1] == hinge + '0')) + continue; + if (r < seqM.size() - 2 && r > 0 && seqM[r - 1] != seqM[r + 2]) continue; - if (r>0 && (seqM[r-1]==' ' || seqM[r-1]==hinge+'0')) continue; - if (r0 && seqM[r-1]!=seqM[r+2]) continue; - if (r>0) seqM[r]=seqM_char[j]=seqM[r+1]=seqM_char[j+1]=seqM[r-1]; - else seqM[r]=seqM_char[j]=seqM[r+1]=seqM_char[j+1]=seqM[r+2]; + if (r > 0) + seqM[r] = seqM_char[j] = seqM[r + 1] = seqM_char[j + 1] = seqM[r - 1]; + else + seqM[r] = seqM_char[j] = seqM[r + 1] = seqM_char[j + 1] = seqM[r + 2]; } } /* smooth out AFP assignment: remove disconnected singleton */ - int i1,i2; - for (hinge=tu_vec.size()-1;hinge>=0;hinge--) + int i1, i2; + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r=0;i--) - { - if (seqM[i]==' ') continue; - left_hinge=seqM[i]; - i1=(r-i); + if (seqM[r] != hinge + '0') + continue; + + left_hinge = ' '; + for (i = r - 1; i >= 0; i--) + { + if (seqM[i] == ' ') + continue; + left_hinge = seqM[i]; + i1 = (r - i); break; } - if (left_hinge==hinge+'0') continue; - - right_hinge=' '; - for (i=r+1;i=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - tu2t_u(tu_vec[hinge],t0,u0); + tu2t_u(tu_vec[hinge], t0, u0); do_rotation(xa, xt, xlen, t0, u0); - for (j=0;j0;hinge--) + TM2 /= xlen; + TM1 /= ylen; + TM3 /= (xlen + ylen) * 0.5; + TM4 /= Lnorm_ass; + TM5 /= ylen; + if (n_ali8) + rmsd0 = sqrt(rmsd0 / n_ali8); + for (hinge = tu_vec.size() - 1; hinge > 0; hinge--) { - int afp_len=0; - for (r=0;r >&tu_vec, double t[3], double u[3][3]) +void output_flexalign_rotation_matrix(const char *fname_matrix, + const vector> &tu_vec, double t[3], double u[3][3]) { stringstream ss; char dest[1000]; - for (int hinge=0;hinge >&tu_vec, - double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2, - const int xlen, const int ylen, const double d0A, const int n_ali8, - const double rmsd, const double TM1, const double Liden) + const string fname_super, const vector> &tu_vec, + double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2, + const int xlen, const int ylen, const double d0A, const int n_ali8, + const double rmsd, const double TM1, const double Liden) { stringstream buf; stringstream buf_all; stringstream buf_atm; stringstream buf_all_atm; stringstream buf_all_atm_lig; - //stringstream buf_pdb; + // stringstream buf_pdb; stringstream buf_tm; string line; - double x[3]; // before transform - double x1[3]; // after transform + double x[3]; // before transform + double x1[3]; // after transform bool after_ter; // true if passed the "TER" line in PDB string asym_id; // chain ID - - map resi2hinge_dict; - int r,i,j; - j=-1; - char hinge_char=0; - int ali_len=strlen(seqM); - for (r=0;r resi2hinge_dict; + int r, i, j; + j = -1; + char hinge_char = 0; + int ali_len = strlen(seqM); + for (r = 0; r < strlen(seqxA); r++) { - if (seqxA[r]=='-') continue; + if (seqxA[r] == '-') + continue; j++; - hinge_char=seqM[r]; - if (hinge_char==' ') + hinge_char = seqM[r]; + if (hinge_char == ' ') { - for (i=1;i=0 && seqM[r-i]!=' ') - hinge_char=seqM[r-i]; - else if (r+i= 0 && seqM[r - i] != ' ') + hinge_char = seqM[r - i]; + else if (r + i < xlen && seqM[r + i] != ' ') + hinge_char = seqM[r + i]; + if (hinge_char != ' ') + break; } } - resi2hinge_dict[resi_vec1[j]]=hinge_char-'0'; + int hinge_idx = 0; + if (hinge_char >= '0' && hinge_char <= '9') + { + hinge_idx = hinge_char - '0'; + } + else if (hinge_char >= 'a' && hinge_char <= 'z') + { + hinge_idx = hinge_char - 'a' + 10; + } + else if (hinge_char >= 'A' && hinge_char <= 'Z') + { + hinge_idx = hinge_char - 'A' + 36; + } + resi2hinge_dict[resi_vec1[j]] = hinge_idx; } - string resi=resi_vec1[0]; - int read_resi=resi.size()-4; - - buf_tm<<"REMARK US-align" - <<"\nREMARK Structure 1:"<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - chain1_sele=chainID1.substr(1); - chain2_sele=chainID2.substr(1); + chain1_sele = chainID1.substr(1); + chain2_sele = chainID2.substr(1); } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i _atom_site; + map _atom_site; int atom_site_pos; vector line_vec; - string atom; // 4-character atom name - string AA; // 3-character residue name - string inscode; // 1-character insertion code + string atom; // 4-character atom name + string AA; // 3-character residue name + string inscode; // 1-character insertion code string model_index; // model index - bool is_mmcif=false; + bool is_mmcif = false; /* used for CONECT record of chain1 */ - int ca_idx1=0; // all CA atoms - int lig_idx1=0; // all atoms - vector idx_vec; + int ca_idx1 = 0; // all CA atoms + int lig_idx1 = 0; // all atoms + vector idx_vec; /* used for CONECT record of chain2 */ - int ca_idx2=0; // all CA atoms - int lig_idx2=0; // all atoms + int ca_idx2 = 0; // all CA atoms + int lig_idx2 = 0; // all atoms /* extract aligned region */ vector resi_aln1; vector resi_aln2; - int i1=-1; - int i2=-1; + int i1 = -1; + int i2 = -1; if (!mm_opt) { - for (i=0;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (is_mmcif==false && line.size()>=54 && - (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format - { - if (line[16]!='A' && line[16]!=' ') continue; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; - if (read_resi==1) resi=line.substr(22,5); - else resi=line.substr(22,5)+line[21]; - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (is_mmcif == false && line.size() >= 54 && + (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format + { + if (line[16] != 'A' && line[16] != ' ') + continue; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; + if (read_resi == 1) + resi = line.substr(22, 5); + else + resi = line.substr(22, 5) + line[21]; + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - //buf_pdb<=2) - { - if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + << line.substr(11, 9) << " A" << line.substr(22, 8) + << setiosflags(ios::fixed) << setprecision(3) + << setw(8) << x1[0] << setw(8) << x1[1] << setw(8) << x1[2] << '\n'; + if (chain1_sele.size() && line[21] != chain1_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) + { + if (ca_idx1 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=2) + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + resi += line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + else + resi += " "; + if (read_resi >= 2) { if (_atom_site.count("auth_asym_id")) - asym_id=line_vec[_atom_site["auth_asym_id"]]; - else asym_id=line_vec[_atom_site["label_asym_id"]]; - if (asym_id==".") asym_id=" "; - resi+=asym_id[0]; + asym_id = line_vec[_atom_site["auth_asym_id"]]; + else + asym_id = line_vec[_atom_site["label_asym_id"]]; + if (asym_id == ".") + asym_id = " "; + resi += asym_id[0]; } - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - if (_atom_site.count("label_alt_id")==0 || - line_vec[_atom_site["label_alt_id"]]=="." || - line_vec[_atom_site["label_alt_id"]]=="A") + if (_atom_site.count("label_alt_id") == 0 || + line_vec[_atom_site["label_alt_id"]] == "." || + line_vec[_atom_site["label_alt_id"]] == "A") { - atom=line_vec[_atom_site["label_atom_id"]]; - if (atom[0]=='"') atom=atom.substr(1); - if (atom.size() && atom[atom.size()-1]=='"') - atom=atom.substr(0,atom.size()-1); - if (atom.size()==0) atom=" "; - else if (atom.size()==1) atom=" "+atom+" "; - else if (atom.size()==2) atom=" "+atom+" "; - else if (atom.size()==3) atom=" "+atom; - else if (atom.size()>=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; if (_atom_site.count("auth_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain1_sele; - else if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain1_sele; + else if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain1_sele; - if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain1_sele; + if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - //buf_pdb<=1 && line.compare(0,3,"END")==0) break; + // buf_pdb<= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=1;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (line.size()>=54 && (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (line.size() >= 54 && (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format { - if (line[16]!='A' && line[16]!=' ') continue; - if (after_ter && line.compare(0,6,"ATOM ")==0) continue; + if (line[16] != 'A' && line[16] != ' ') + continue; + if (after_ter && line.compare(0, 6, "ATOM ") == 0) + continue; lig_idx2++; - buf_all_atm_lig<=2) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + lig_idx2 + << line.substr(11, 9) << " B" << line.substr(22, 32) << '\n'; + if (chain1_sele.size() && line[21] != chain1_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) { - if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1)) + if (ca_idx2 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; - + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + if (_atom_site.count("auth_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - if (after_ter==false || - line_vec[_atom_site["group_PDB"]]=="HETATM") + if (after_ter == false || + line_vec[_atom_site["group_PDB"]] == "HETATM") { lig_idx2++; - buf_all_atm_lig<=1 && line.compare(0,3,"END")==0) break; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=ca_idx1+1;i >&tu_vec, - double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2) + const string fname_super, const vector> &tu_vec, + double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2) { - int compress_type=0; // uncompressed file + int compress_type = 0; // uncompressed file ifstream fin; #ifndef REDI_PSTREAM_H_SEEN ifstream fin_gz; #else redi::ipstream fin_gz; // if file is compressed - if (xname.size()>=3 && - xname.substr(xname.size()-3,3)==".gz") + if (xname.size() >= 3 && + xname.substr(xname.size() - 3, 3) == ".gz") { - fin_gz.open("gunzip -c "+xname); - compress_type=1; + fin_gz.open("gunzip -c " + xname); + compress_type = 1; } - else if (xname.size()>=4 && - xname.substr(xname.size()-4,4)==".bz2") + else if (xname.size() >= 4 && + xname.substr(xname.size() - 4, 4) == ".bz2") { - fin_gz.open("bzcat "+xname); - compress_type=2; + fin_gz.open("bzcat " + xname); + compress_type = 2; } else #endif - fin.open(xname.c_str()); - - map resi2hinge_dict; - int r,i,j; - j=-1; - char hinge_char=0; - int xlen=resi_vec1.size(); - int ali_len=strlen(seqM); - for (r=0;r resi2hinge_dict; + int r, i, j; + j = -1; + char hinge_char = 0; + int xlen = resi_vec1.size(); + int ali_len = strlen(seqM); + for (r = 0; r < strlen(seqxA); r++) { - if (seqxA[r]=='-') continue; + if (seqxA[r] == '-') + continue; j++; - hinge_char=seqM[r]; - if (hinge_char==' ') + hinge_char = seqM[r]; + if (hinge_char == ' ') { - for (i=1;i=0 && seqM[r-i]!=' ') - hinge_char=seqM[r-i]; - else if (r+i= 0 && seqM[r - i] != ' ') + hinge_char = seqM[r - i]; + else if (r + i < xlen && seqM[r + i] != ' ') + hinge_char = seqM[r + i]; + if (hinge_char != ' ') + break; } } - resi2hinge_dict[resi_vec1[j]]=hinge_char-'0'; + int hinge_idx = 0; + if (hinge_char >= '0' && hinge_char <= '9') + { + hinge_idx = hinge_char - '0'; + } + else if (hinge_char >= 'a' && hinge_char <= 'z') + { + hinge_idx = hinge_char - 'a' + 10; + } + else if (hinge_char >= 'A' && hinge_char <= 'Z') + { + hinge_idx = hinge_char - 'A' + 36; + } + resi2hinge_dict[resi_vec1[j]] = hinge_idx; } - string resi=resi_vec1[0]; - int read_resi=resi.size()-4; + string resi = resi_vec1[0]; + int read_resi = resi.size() - 4; stringstream buf; stringstream buf_pymol; @@ -1409,161 +1594,195 @@ void output_flexalign_pymol(const string xname, const string yname, double x1[3]; // after transform /* for PDBx/mmCIF only */ - map _atom_site; + map _atom_site; size_t atom_site_pos; vector line_vec; - int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF - int hinge=0; - string asym_id="."; // this is similar to chainID, except that - // chainID is char while asym_id is a string - // with possibly multiple char - while (compress_type?fin_gz.good():fin.good()) + int infmt = -1; // 0 - PDB, 3 - PDBx/mmCIF + int hinge = 0; + string asym_id = "."; // this is similar to chainID, except that + // chainID is char while asym_id is a string + // with possibly multiple char + while (compress_type ? fin_gz.good() : fin.good()) { - if (compress_type) getline(fin_gz, line); - else getline(fin, line); - if (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0) // PDB format - { - infmt=0; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; - if (read_resi==1) resi=line.substr(22,5); - else resi=line.substr(22,5)+line[21]; - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + if (compress_type) + getline(fin_gz, line); + else + getline(fin, line); + if (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0) // PDB format + { + infmt = 0; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; + if (read_resi == 1) + resi = line.substr(22, 5); + else + resi = line.substr(22, 5) + line[21]; + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - buf<=2) + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + resi += line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + else + resi += " "; + if (read_resi >= 2) { if (_atom_site.count("auth_asym_id")) - asym_id=line_vec[_atom_site["auth_asym_id"]]; - else asym_id=line_vec[_atom_site["label_asym_id"]]; - if (asym_id==".") asym_id=" "; - resi+=asym_id[0]; + asym_id = line_vec[_atom_site["auth_asym_id"]]; + else + asym_id = line_vec[_atom_site["label_asym_id"]]; + if (asym_id == ".") + asym_id = " "; + resi += asym_id[0]; } - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - for (atom_site_pos=0; atom_site_pos<_atom_site.size(); atom_site_pos++) + for (atom_site_pos = 0; atom_site_pos < _atom_site.size(); atom_site_pos++) { - if (atom_site_pos==_atom_site["Cartn_x"]) - buf<=1 && line.compare(0,3,"END")==0) break; + buf << line << '\n'; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } - if (compress_type) fin_gz.close(); - else fin.close(); + if (compress_type) + fin_gz.close(); + else + fin.close(); - string fname_super_full=fname_super; - if (infmt==0) fname_super_full+=".pdb"; - else if (infmt==3) fname_super_full+=".cif"; + string fname_super_full = fname_super; + if (infmt == 0) + fname_super_full += ".pdb"; + else if (infmt == 3) + fname_super_full += ".cif"; ofstream fp; fp.open(fname_super_full.c_str()); - fp<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - chain1_sele=" and c. "+chainID1.substr(1); - chain2_sele=" and c. "+chainID2.substr(1); + chain1_sele = " and c. " + chainID1.substr(1); + chain2_sele = " and c. " + chainID2.substr(1); } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i pml_list; - pml_list.push_back(fname_super+""); - pml_list.push_back(fname_super+"_atm"); - pml_list.push_back(fname_super+"_all"); - pml_list.push_back(fname_super+"_all_atm"); - pml_list.push_back(fname_super+"_all_atm_lig"); + pml_list.push_back(fname_super + ""); + pml_list.push_back(fname_super + "_atm"); + pml_list.push_back(fname_super + "_all"); + pml_list.push_back(fname_super + "_all_atm"); + pml_list.push_back(fname_super + "_all_atm_lig"); - for (int p=0;p >&tu_vec, const double TM1, const double TM2, - const double TM3, const double TM4, const double TM5, - const double rmsd, const double d0_out, const char *seqM, - const char *seqxA, const char *seqyA, const double Liden, - const int n_ali8, const int L_ali, const double TM_ali, - const double rmsd_ali, const double TM_0, const double d0_0, - const double d0A, const double d0B, const double Lnorm_ass, - const double d0_scale, const double d0a, const double d0u, - const char* fname_matrix, const int outfmt_opt, const int ter_opt, - const int mm_opt, const int split_opt, const int o_opt, - const string fname_super, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const int mirror_opt, - const vector&resi_vec1, const vector&resi_vec2) + const string chainID1, const string chainID2, + const int xlen, const int ylen, double t[3], double u[3][3], + const vector> &tu_vec, const double TM1, const double TM2, + const double TM3, const double TM4, const double TM5, + const double rmsd, const double d0_out, const char *seqM, + const char *seqxA, const char *seqyA, const double Liden, + const int n_ali8, const int L_ali, const double TM_ali, + const double rmsd_ali, const double TM_0, const double d0_0, + const double d0A, const double d0B, const double Lnorm_ass, + const double d0_scale, const double d0a, const double d0u, + const char *fname_matrix, const int outfmt_opt, const int ter_opt, + const int mm_opt, const int split_opt, const int o_opt, + const string fname_super, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const int mirror_opt, + const vector &resi_vec1, const vector &resi_vec2) { - if (outfmt_opt<=0) + if (outfmt_opt <= 0) { printf("\nName of Structure_1: %s%s (to be superimposed onto Structure_2)\n", - xname.c_str(), chainID1.c_str()); + xname.c_str(), chainID1.c_str()); printf("Name of Structure_2: %s%s\n", yname.c_str(), chainID2.c_str()); printf("Length of Structure_1: %d residues\n", xlen); printf("Length of Structure_2: %d residues\n\n", ylen); @@ -1757,70 +1986,1348 @@ void output_flexalign_results(const string xname, const string yname, if (i_opt) printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali); - printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); printf("TM-score= %6.5f (normalized by length of Structure_1: L=%d, d0=%.2f)\n", TM2, xlen, d0B); printf("TM-score= %6.5f (normalized by length of Structure_2: L=%d, d0=%.2f)\n", TM1, ylen, d0A); - if (a_opt==1) - printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt == 1) + printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); if (u_opt) printf("TM-score= %6.5f (normalized by user-specified L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u); if (d_opt) printf("TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen); printf("(You should use TM-score normalized by length of the reference structure)\n"); - - //output alignment - printf("\n([0-9] denote different aligned fragment pairs separated by different hinges)\n"); + + // output alignment + printf("\n([0-9,a-z,A-Z] denote different aligned fragment pairs separated by different hinges)\n"); printf("%s\n", seqxA); printf("%s\n", seqM); printf("%s\n", seqyA); } - else if (outfmt_opt==1) + else if (outfmt_opt == 1) { printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - xname.c_str(), chainID1.c_str(), xlen, d0B, Liden/xlen, TM2); + xname.c_str(), chainID1.c_str(), xlen, d0B, Liden / xlen, TM2); printf("%s\n", seqxA); printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - yname.c_str(), chainID2.c_str(), ylen, d0A, Liden/ylen, TM1); + yname.c_str(), chainID2.c_str(), ylen, d0A, Liden / ylen, TM1); printf("%s\n", seqyA); printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n", - n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); if (i_opt) printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali); - if(a_opt) - printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt) + printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); - if(u_opt) + if (u_opt) printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u); - if(d_opt) + if (d_opt) printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen); printf("$$$$\n"); } - else if (outfmt_opt==2) + else if (outfmt_opt == 2) { - printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", - xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), - TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0, - xlen, ylen, n_ali8); + printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d\t%d", + xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), + TM2, TM1, rmsd, Liden / xlen, Liden / ylen, (n_ali8 > 0) ? Liden / n_ali8 : 0, + xlen, ylen, n_ali8, (int)tu_vec.size()); } cout << endl; - if (strlen(fname_matrix)) output_flexalign_rotation_matrix( + if (strlen(fname_matrix)) + output_flexalign_rotation_matrix( fname_matrix, tu_vec, t, u); - if (o_opt==1) output_flexalign_pymol(xname, yname, fname_super, tu_vec, - t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2); - else if (o_opt==2) + if (o_opt == 1) + output_flexalign_pymol(xname, yname, fname_super, tu_vec, + t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2); + else if (o_opt == 2) output_flexalign_rasmol(xname, yname, fname_super, tu_vec, - t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, - xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); + t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, + xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); +} + +// Data structure to hold outputs of flexalign_main to avoid parameter clutter +struct FlexAlignResult +{ + double t0[3]; + double u0[3][3]; + vector> tu_vec; + double TM1, TM2, TM3, TM4, TM5; + double d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out; + string seqM, seqxA, seqyA; + vector do_vec; + double rmsd0, Liden, TM_ali, rmsd_ali; + int L_ali, n_ali, n_ali8, hingeNum; + + FlexAlignResult() : TM1(-1.0), TM2(-1.0), TM3(-1.0), TM4(-1.0), TM5(-1.0), + d0_0(0.0), TM_0(0.0), d0A(0.0), d0B(0.0), d0u(0.0), d0a(0.0), d0_out(5.0), + rmsd0(0.0), Liden(0.0), TM_ali(0.0), rmsd_ali(0.0), + L_ali(0), n_ali(0), n_ali8(0), hingeNum(0) + { + for (int i = 0; i < 3; i++) + { + t0[i] = 0.0; + for (int j = 0; j < 3; j++) + u0[i][j] = (i == j) ? 1.0 : 0.0; + } + } +}; + +enum FlexAlignMode +{ + FLEX_STANDARD = 0, + FLEX_BEST = 1, + FLEX_FATCAT = 2 +}; + +// Encapsulates the execution of flexalign_main and its fallback refinement logic +void execute_flexalign_with_fallback( + double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy, + int xlen, int ylen, vector &sequence, const double Lnorm_ass, const double d0_scale, + const int i_opt, const int a_opt, const bool u_opt, const bool d_opt, const bool force_fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt, FlexAlignResult &res) +{ + res.hingeNum = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + res.t0, res.u0, res.tu_vec, res.TM1, res.TM2, res.TM3, res.TM4, res.TM5, + res.d0_0, res.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res.d0_out, + res.seqM, res.seqxA, res.seqyA, res.do_vec, + res.rmsd0, res.L_ali, res.Liden, res.TM_ali, res.rmsd_ali, res.n_ali, res.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, ss_opt); + + // Fallback compensation when too few hinges are found + if (hinge_opt && res.hingeNum <= 1 && res.n_ali8 < 0.6 * getmin(xlen, ylen)) + { + FlexAlignResult res_h; + res_h.tu_vec.push_back(res.tu_vec[0]); + tu2t_u(res.tu_vec[0], res_h.t0, res_h.u0); + + res_h.hingeNum = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + res_h.t0, res_h.u0, res_h.tu_vec, + res_h.TM1, res_h.TM2, res_h.TM3, res_h.TM4, res_h.TM5, + res_h.d0_0, res_h.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res_h.d0_out, + res_h.seqM, res_h.seqxA, res_h.seqyA, res_h.do_vec, + res_h.rmsd0, res_h.L_ali, res_h.Liden, res_h.TM_ali, res_h.rmsd_ali, + res_h.n_ali, res_h.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, + a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, ss_opt); + + double TM = (res.TM1 > res.TM2) ? res.TM1 : res.TM2; + double TM_h = (res_h.TM1 > res_h.TM2) ? res_h.TM1 : res_h.TM2; + if (TM_h > TM) + { + res = res_h; // Safely overwrite with the better refined results + } + } +} + +// ========================================== +// FATCAT Core Algorithm (flexalign_fatcat_main) +// ========================================== +struct FATCAT_AFP +{ + int i, j, len; + double score; +}; + +int flexalign_fatcat_main(double **xa, double **ya, + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], std::vector> &tu_vec, + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + std::string &seqM, std::string &seqxA, std::string &seqyA, std::vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const std::vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt, + int sparse_val = 0, bool hinge_set = false) +{ + // ========================================== + // TRUE flexalign_greedy BASELINE (Defender) + // Run full sequence without generate_bounds slicing! + // This perfectly simulates FLEX_BEST (flexalign_greedy) behavior. + // ========================================== + double best_global_max_TM = -1.0; + std::vector> best_tu_vec; + double best_t0[3], best_u0[3][3]; + double best_TM1 = 0.0, best_TM2 = 0.0, best_TM3 = 0.0, best_TM4 = 0.0, best_TM5 = 0.0; + double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; + int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; + std::string best_seqM = "", best_seqxA = "", best_seqyA = ""; + std::vector best_do_vec; + double best_d0A = 0.0, best_d0B = 0.0, best_d0a = 0.0, best_d0u = 0.0; + + bool force_fast_opt_global = (std::min(xlen, ylen) > 1500) ? true : fast_opt; + std::vector local_sequence = sequence; + + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + { + FlexAlignResult base_res; + // Pass full unbroken sequences directly to flexalign (identical to flexalign_greedy) + execute_flexalign_with_fallback( + xa, ya, (char *)seqx, (char *)seqy, (char *)secx, (char *)secy, + xlen, ylen, local_sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt_global, + mol_type, hinge_opt, cur_ss_opt, base_res); // flexalign_greedy explicitly uses 9 hinges + + double cur_max_TM = (base_res.TM1 > base_res.TM2) ? base_res.TM1 : base_res.TM2; + if (cur_max_TM > best_global_max_TM) + { + best_global_max_TM = cur_max_TM; + for (int a = 0; a < 3; a++) + { + best_t0[a] = base_res.t0[a]; + for (int b = 0; b < 3; b++) + best_u0[a][b] = base_res.u0[a][b]; + } + best_tu_vec = base_res.tu_vec; + best_TM1 = base_res.TM1; + best_TM2 = base_res.TM2; + best_TM3 = base_res.TM3; + best_TM4 = base_res.TM4; + best_TM5 = base_res.TM5; + best_rmsd0 = base_res.rmsd0; + best_Liden = base_res.Liden; + best_TM_ali = base_res.TM_ali; + best_rmsd_ali = base_res.rmsd_ali; + best_L_ali = base_res.L_ali; + best_n_ali = base_res.n_ali; + best_n_ali8 = base_res.n_ali8; + best_seqM = base_res.seqM; + best_seqxA = base_res.seqxA; + best_seqyA = base_res.seqyA; + best_do_vec = base_res.do_vec; + best_d0A = base_res.d0A; + best_d0B = base_res.d0B; + best_d0a = base_res.d0a; + best_d0u = base_res.d0u; + } + } + + // Early exit if the true flexalign_greedy baseline is already excellent + if (best_global_max_TM >= 0.85) + { + // <--- ADD DEBUG HERE: Output early exit confirmation + // std::cout << "[DEBUG] MM9" << std::endl; + + TM1 = best_TM1; + TM2 = best_TM2; + TM3 = best_TM3; + TM4 = best_TM4; + TM5 = best_TM5; + rmsd0 = best_rmsd0; + Liden = best_Liden; + TM_ali = best_TM_ali; + rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; + n_ali = best_n_ali; + n_ali8 = best_n_ali8; + seqM = best_seqM; + seqxA = best_seqxA; + seqyA = best_seqyA; + do_vec = best_do_vec; + tu_vec = best_tu_vec; + d0A = best_d0A; + d0B = best_d0B; + d0a = best_d0a; + d0u = best_d0u; + for (int a = 0; a < 3; a++) + { + t0[a] = best_t0[a]; + for (int b = 0; b < 3; b++) + u0[a][b] = best_u0[a][b]; + } + return tu_vec.size(); + } + + // ========================================== + // Proceed to FATCAT sliced bounds logic... + // ========================================== + + // FATCAT base parameters + int fragLen = 8; + double resScore = 3.0; + double gap_ext = -0.5; + double disCut = 5.0; + double disSmooth = 4.0; + double twist_pen = -25.0; + int max_gap = 40; + double max_penalty = -5.0; + int misCut = 2 * fragLen; + int maxGapFrag = fragLen + max_gap; + double afp_dis_cut = fragLen * fragLen * (disCut * disCut); + int max_twists = hinge_opt; + + // OPTIMIZATION 1: Precompute local intra-protein distance matrices + int max_dist_window = max_gap + 2 * fragLen + 1; + std::vector> disTable1(xlen, std::vector(max_dist_window, 0.0)); + std::vector> disTable2(ylen, std::vector(max_dist_window, 0.0)); + + for (int i = 0; i < xlen; i++) + { + for (int j = i; j < std::min(xlen, i + max_dist_window); j++) + disTable1[i][j - i] = std::sqrt(dist(xa[i], xa[j])); + } + for (int i = 0; i < ylen; i++) + { + for (int j = i; j < std::min(ylen, i + max_dist_window); j++) + disTable2[i][j - i] = std::sqrt(dist(ya[i], ya[j])); + } + + // Wrapper for generating bounds + auto generate_bounds = [&](double cur_rmsdCut, double cur_badRmsd, double cur_local_badRmsd) -> std::pair, std::vector> + { + // Step 1: Extract initial AFPs in batches + std::vector initial_afps; + int step = sparse_val + 1; + + double r1_static[8][3], r2_static[8][3]; + double *r1[8], *r2[8]; + for (int k = 0; k < 8; k++) + { + r1[k] = r1_static[k]; + r2[k] = r2_static[k]; + } + + for (int i = 0; i <= xlen - fragLen; i += step) + { + for (int j = 0; j <= ylen - fragLen; j += step) + { + int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen - 1), ylen - (j + fragLen)) + fragLen; + if (d3_term < 0.3 * std::min(xlen, ylen)) + continue; + + double dist1 = disTable1[i][fragLen - 1]; + double dist2 = disTable2[j][fragLen - 1]; + + if (std::fabs(dist1 - dist2) > 2.0 * cur_rmsdCut) + continue; + + for (int k = 0; k < fragLen; k++) + { + r1[k][0] = xa[i + k][0]; + r1[k][1] = xa[i + k][1]; + r1[k][2] = xa[i + k][2]; + r2[k][0] = ya[j + k][0]; + r2[k][1] = ya[j + k][1]; + r2[k][2] = ya[j + k][2]; + } + + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1, r2, fragLen, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = std::sqrt(rms_sum_sq / fragLen); + + if (rmsd_tmp < cur_rmsdCut) + { + FATCAT_AFP afp; + afp.i = i; + afp.j = j; + afp.len = fragLen; + afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); + initial_afps.push_back(afp); + } + } + } + + // Step 2: Merge diagonal AFPs + int max_diagonal_idx = xlen + ylen + 1; + std::vector> diagonals(max_diagonal_idx); + for (size_t k = 0; k < initial_afps.size(); k++) + { + diagonals[initial_afps[k].i - initial_afps[k].j + ylen].push_back(initial_afps[k]); + } + + std::vector merged_afps; + int max_merge_len = std::min(xlen, ylen); + double **r1_merge, **r2_merge; + NewArray(&r1_merge, max_merge_len, 3); + NewArray(&r2_merge, max_merge_len, 3); + + for (int d = 0; d < max_diagonal_idx; d++) + { + if (diagonals[d].empty()) + continue; + std::vector &group = diagonals[d]; + + std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { return a.i < b.i; }); + + int n_group = group.size(); + std::vector invalid(n_group, false); + for (int idx = 0; idx < n_group; idx++) + { + if (invalid[idx]) + continue; + FATCAT_AFP curr = group[idx]; + for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) + { + FATCAT_AFP nxt = group[nxt_idx]; + if (nxt.i > curr.i + curr.len) + break; + + if (nxt.i + nxt.len > curr.i + curr.len) + { + int new_len = (nxt.i + nxt.len) - curr.i; + + for (int k = 0; k < new_len; k++) + { + r1_merge[k][0] = xa[curr.i + k][0]; + r1_merge[k][1] = xa[curr.i + k][1]; + r1_merge[k][2] = xa[curr.i + k][2]; + r2_merge[k][0] = ya[curr.j + k][0]; + r2_merge[k][1] = ya[curr.j + k][1]; + r2_merge[k][2] = ya[curr.j + k][2]; + } + + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1_merge, r2_merge, new_len, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = std::sqrt(rms_sum_sq / new_len); + + if (rmsd_tmp < cur_rmsdCut) + { + curr.len = new_len; + curr.score = resScore * new_len * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); + invalid[nxt_idx] = true; + } + } + } + merged_afps.push_back(curr); + } + } + DeleteArray(&r1_merge, max_merge_len); + DeleteArray(&r2_merge, max_merge_len); + + std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { + if (a.i == b.i) return a.j < b.j; + return a.i < b.i; }); + + int n_afps = merged_afps.size(); + std::vector ret_b1, ret_b2; + if (n_afps == 0) + return std::make_pair(ret_b1, ret_b2); + + // Step 3 & 4: Dual Dynamic Programming and Domain Splitting + std::vector afp_aft_index(xlen * ylen, -1); + std::vector afp_bef_index(xlen * ylen, -1); + + std::vector>> i_to_j(xlen); + for (int m = 0; m < n_afps; m++) + { + i_to_j[merged_afps[m].i].push_back(std::make_pair(merged_afps[m].j, m)); + } + + for (int i_val = 0; i_val < xlen; i_val++) + { + if (i_to_j[i_val].empty()) + continue; + for (size_t p = 0; p < i_to_j[i_val].size(); p++) + { + int j_val = i_to_j[i_val][p].first; + afp_aft_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; + afp_bef_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; + } + int curr_bef = -1; + for (int j_val = 0; j_val < ylen; j_val++) + { + if (afp_bef_index[i_val * ylen + j_val] != -1) + curr_bef = afp_bef_index[i_val * ylen + j_val]; + else + afp_bef_index[i_val * ylen + j_val] = curr_bef; + } + int curr_aft = -1; + for (int j_val = ylen - 1; j_val >= 0; j_val--) + { + if (afp_aft_index[i_val * ylen + j_val] != -1) + curr_aft = afp_aft_index[i_val * ylen + j_val]; + else + afp_aft_index[i_val * ylen + j_val] = curr_aft; + } + } + + auto get_dvar = [&](const FATCAT_AFP &prv, const FATCAT_AFP &curr) -> double + { + double rms_sq = 0; + for (int i_idx = 0; i_idx < fragLen; i_idx++) + { + for (int j_idx = 0; j_idx < fragLen; j_idx++) + { + double dist1, dist2; + int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } + } + if (rms_sq > afp_dis_cut) + return 1e9; + return std::sqrt(rms_sq / (fragLen * fragLen)); + }; + + auto calc_block_rmsd = [&](const std::vector &afp_list) -> double + { + std::vector r1, r2; + for (size_t a = 0; a < afp_list.size(); a++) + { + for (int l = 0; l < afp_list[a].len; l++) + { + r1.push_back(afp_list[a].i + l); + r2.push_back(afp_list[a].j + l); + } + } + int n = r1.size(); + if (n < 3) + return 0.0; + double **p1; + NewArray(&p1, n, 3); + double **p2; + NewArray(&p2, n, 3); + for (int i = 0; i < n; i++) + { + p1[i][0] = xa[r1[i]][0]; + p1[i][1] = xa[r1[i]][1]; + p1[i][2] = xa[r1[i]][2]; + p2[i][0] = ya[r2[i]][0]; + p2[i][1] = ya[r2[i]][1]; + p2[i][2] = ya[r2[i]][2]; + } + double rms_sq_sum, t_tmp[3], u_tmp[3][3]; + Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); + DeleteArray(&p1, n); + DeleteArray(&p2, n); + return std::sqrt(rms_sq_sum / n); + }; + + struct Region + { + int s1, e1, s2, e2; + }; + + std::vector sco(n_afps); + std::vector twi(n_afps, 0); + std::vector pre(n_afps, -1); + for (int m = 0; m < n_afps; m++) + sco[m] = merged_afps[m].score; + + for (int m = 0; m < n_afps; m++) + { + int curr_i = merged_afps[m].i; + int curr_j = merged_afps[m].j; + int a3 = curr_i - fragLen; + int a2 = std::max(0, a3 - misCut); + int a1 = std::max(0, curr_i - maxGapFrag); + int b3 = curr_j - fragLen; + int b2 = std::max(0, b3 - misCut); + int b1 = std::max(0, curr_j - maxGapFrag); + + std::vector valid_prevs; + for (int st = 0; st < 2; st++) + { + int a_s, a_e, b_s, b_e; + if (st == 0) + { + a_s = std::max(a1, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b2, 0); + b_e = std::min(b3, ylen - 1); + } + else + { + a_s = std::max(a2, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b1, 0); + b_e = std::min(b2 - 1, ylen - 1); + } + + if (b_s >= ylen || b_e < 0) + continue; + for (int prev_i = a_s; prev_i <= a_e; prev_i++) + { + int s1 = afp_aft_index[prev_i * ylen + b_s]; + int s2 = afp_bef_index[prev_i * ylen + b_e]; + if (s1 != -1 && s2 != -1 && s1 <= s2) + for (int s = s1; s <= s2; s++) + valid_prevs.push_back(s); + } + } + + double curr_sco = merged_afps[m].score; + for (size_t v = 0; v < valid_prevs.size(); v++) + { + int prev = valid_prevs[v]; + int prev_twi = twi[prev]; + if (prev_twi > max_twists) + continue; + + int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); + int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); + int m_gap = std::max(gap_i, gap_j); + + double gp = 0.0; + int m_mis = 0; + if (gap_i < 0 || gap_j < 0) + m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + gp = gap_ext * m_mis; + if (m_gap > 0) + gp += gap_ext * m_gap; + if (gp < max_penalty) + gp = max_penalty; + + double rms_sq = 0; + for (int k = 0; k < fragLen; k++) + { + for (int l = 0; l < fragLen; l++) + { + double dist1, dist2; + int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } + } + + double tp = 0.0; + int is_twist = 0; + if (rms_sq >= afp_dis_cut) + { + tp = twist_pen; + is_twist = 1; + } + else + { + double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); + if (dvar > disCut - disSmooth) + tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); + } + + if (prev_twi + is_twist > max_twists) + continue; + + double stmp = sco[prev] + curr_sco + tp + gp; + if (stmp > sco[m]) + { + sco[m] = stmp; + pre[m] = prev; + twi[m] = prev_twi + is_twist; + } + } + } + + int best_m = 0; + for (int m = 1; m < n_afps; m++) + if (sco[m] > sco[best_m]) + best_m = m; + + std::vector path; + int curr_m = best_m; + while (curr_m != -1) + { + path.push_back(curr_m); + curr_m = pre[curr_m]; + } + std::reverse(path.begin(), path.end()); + + if (path.empty()) + return std::make_pair(ret_b1, ret_b2); + + struct Block + { + std::vector afps; + std::vector dvars; + }; + std::vector candidate_blocks; + Block curr_block; + curr_block.afps.push_back(merged_afps[path[0]]); + curr_block.dvars.push_back(0.0); + + for (size_t k = 1; k < path.size(); k++) + { + FATCAT_AFP curr = merged_afps[path[k]]; + FATCAT_AFP prv = merged_afps[path[k - 1]]; + double dvar = get_dvar(prv, curr); + + if (dvar >= disCut) + { + candidate_blocks.push_back(curr_block); + curr_block.afps.clear(); + curr_block.dvars.clear(); + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(0.0); + } + else + { + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(dvar); + } + } + if (!curr_block.afps.empty()) + candidate_blocks.push_back(curr_block); + + bool splitted = true; + while (splitted && candidate_blocks.size() < (size_t)(max_twists + 1)) + { + splitted = false; + double max_rmsd = 0.0; + int target_b = -1; + + for (size_t b = 0; b < candidate_blocks.size(); b++) + { + if (candidate_blocks[b].afps.size() > 2) + { + double cur_rmsd = calc_block_rmsd(candidate_blocks[b].afps); + if (cur_rmsd > max_rmsd) + { + max_rmsd = cur_rmsd; + target_b = b; + } + } + } + + if (max_rmsd >= cur_local_badRmsd && target_b != -1) + { + double max_t = 0; + int cut_idx = 0; + for (size_t i = 1; i < candidate_blocks[target_b].afps.size(); i++) + { + if (candidate_blocks[target_b].dvars[i] > max_t) + { + max_t = candidate_blocks[target_b].dvars[i]; + cut_idx = i; + } + } + + if (cut_idx > 0) + { + Block right_blk; + right_blk.afps.assign(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); + right_blk.dvars.assign(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); + right_blk.dvars[0] = 0.0; + candidate_blocks[target_b].afps.erase(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); + candidate_blocks[target_b].dvars.erase(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); + candidate_blocks.insert(candidate_blocks.begin() + target_b + 1, right_blk); + splitted = true; + } + } + } + + for (int b = 0; b < (int)candidate_blocks.size(); b++) + { + if (candidate_blocks[b].afps.size() <= 1) + { + int e1 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().i : xlen; + int e2 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().j : ylen; + int b1 = (b > 0) ? candidate_blocks[b - 1].afps.back().i + candidate_blocks[b - 1].afps.back().len : 0; + int b2 = (b > 0) ? candidate_blocks[b - 1].afps.back().j + candidate_blocks[b - 1].afps.back().len : 0; + int span = std::min(e1 - b1, e2 - b2); + if (span < 2 * fragLen) + { + candidate_blocks.erase(candidate_blocks.begin() + b); + b--; + } + } + } + + bool merged = true; + while (merged && candidate_blocks.size() > 1) + { + merged = false; + double min_rmsd = 1e9; + int min_b = -1; + for (size_t b = 0; b < candidate_blocks.size() - 1; b++) + { + std::vector temp_merged = candidate_blocks[b].afps; + temp_merged.insert(temp_merged.end(), candidate_blocks[b + 1].afps.begin(), candidate_blocks[b + 1].afps.end()); + double cur_rmsd = calc_block_rmsd(temp_merged); + if (cur_rmsd < min_rmsd) + { + min_rmsd = cur_rmsd; + min_b = b; + } + } + + if (min_rmsd < cur_local_badRmsd && min_b != -1) + { + candidate_blocks[min_b].afps.insert(candidate_blocks[min_b].afps.end(), candidate_blocks[min_b + 1].afps.begin(), candidate_blocks[min_b + 1].afps.end()); + candidate_blocks.erase(candidate_blocks.begin() + min_b + 1); + merged = true; + } + } + + std::vector fatcat_domains; + int last_i = 0, last_j = 0; + for (size_t b = 0; b < candidate_blocks.size(); b++) + { + int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; + for (size_t a = 0; a < candidate_blocks[b].afps.size(); a++) + { + FATCAT_AFP afp = candidate_blocks[b].afps[a]; + int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); + if (skip >= afp.len) + continue; + + int eff_i = afp.i + skip; + int eff_j = afp.j + skip; + int eff_L = afp.len - skip; + if (b_s1 == -1) + { + b_s1 = eff_i; + b_s2 = eff_j; + } + b_e1 = eff_i + eff_L; + b_e2 = eff_j + eff_L; + last_i = b_e1; + last_j = b_e2; + } + if (b_s1 != -1) + { + if (b_e1 - b_s1 >= 4 && b_e2 - b_s2 >= 4) + { + Region r = {b_s1, b_e1, b_s2, b_e2}; + fatcat_domains.push_back(r); + } + } + } + + if (fatcat_domains.empty()) + return std::make_pair(ret_b1, ret_b2); + + ret_b1.push_back(0); + ret_b2.push_back(0); + for (size_t k = 0; k < fatcat_domains.size() - 1; k++) + { + ret_b1.push_back((fatcat_domains[k].e1 + fatcat_domains[k + 1].s1) / 2); + ret_b2.push_back((fatcat_domains[k].e2 + fatcat_domains[k + 1].s2) / 2); + } + ret_b1.push_back(xlen); + ret_b2.push_back(ylen); + + return std::make_pair(ret_b1, ret_b2); + }; + + auto bounds_fatcat = generate_bounds(3.0, 4.0, 4.0); + auto bounds_strict = generate_bounds(2.0, 3.0, 2.0); + + std::vector, std::vector>> all_bounds; + all_bounds.push_back(bounds_fatcat); + if (bounds_strict.first != bounds_fatcat.first || bounds_strict.second != bounds_fatcat.second) + { + all_bounds.push_back(bounds_strict); + } + + // Loop through both bound sets, updating best_global_max_TM if we beat the flexalign_greedy defender + for (size_t b_idx = 0; b_idx < all_bounds.size(); b_idx++) + { + std::vector &bounds1 = all_bounds[b_idx].first; + std::vector &bounds2 = all_bounds[b_idx].second; + + // Skip if only one interval (block) is generated, as the full unbroken sequence + // has already been processed by the baseline (flexalign_greedy) above. + if (bounds1.size() <= 2) + continue; + + // ================== DEBUG START ================== + // Output the interval mapping for the current boundary set + // std::cout << "\n[DEBUG] --- Region Mapping Table ---" << std::endl; + // std::cout << "[DEBUG] Mode: " << (b_idx == 0 ? "FATCAT Bounds" : "Strict Bounds") << std::endl; + // std::cout << "[DEBUG] Total Blocks: " << (bounds1.size() - 1) << std::endl; + + // for (size_t k = 0; k < bounds1.size() - 1; k++) + // { + // std::cout << "[DEBUG] Block " << (k + 1) << ": " + // << "Chain1 [" << bounds1[k] << " -> " << bounds1[k + 1] << "] <==> " + // << "Chain2 [" << bounds2[k] << " -> " << bounds2[k + 1] << "]" + // << std::endl; + // } + // std::cout << "[DEBUG] ----------------------------\n" << std::endl; + // =================== DEBUG END =================== + + // Precalculate distributed local_hinge_opt for each block when hinge_set is true + int num_blocks = bounds1.size() - 1; + std::vector precalc_local_hinge(num_blocks, 0); + + if (hinge_set) + { + struct BlockMeta + { + int index; + double rmsd; + }; + std::vector valid_blocks; + + // Calculate target hinges to distribute based on requested hinge_opt and current implicit blocks + int target_total_hinges = std::max(0, hinge_opt + 1 - num_blocks); + + // Calculate base amount of hinges per block + int base_hinge = (hinge_opt + 1) / num_blocks - 1; + if (base_hinge < 0) + base_hinge = 0; + + for (int k = 0; k < num_blocks; k++) + { + int L1_sub = bounds1[k + 1] - bounds1[k]; + int L2_sub = bounds2[k + 1] - bounds2[k]; + int min_L = std::min(L1_sub, L2_sub); + + if (min_L < 2 * fragLen) + { + precalc_local_hinge[k] = 0; // Length < 2*fragLen gets 0 + } + else + { + // Calculate rough RMSD for this unaligned block section + double block_rmsd = 0.0; + if (min_L >= 3) + { + double **p1, **p2; + NewArray(&p1, min_L, 3); + NewArray(&p2, min_L, 3); + for (int i = 0; i < min_L; i++) + { + p1[i][0] = xa[bounds1[k] + i][0]; + p1[i][1] = xa[bounds1[k] + i][1]; + p1[i][2] = xa[bounds1[k] + i][2]; + p2[i][0] = ya[bounds2[k] + i][0]; + p2[i][1] = ya[bounds2[k] + i][1]; + p2[i][2] = ya[bounds2[k] + i][2]; + } + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(p1, p2, min_L, 0, &rms_sum_sq, t_tmp, u_tmp); + block_rmsd = std::sqrt(rms_sum_sq / min_L); + DeleteArray(&p1, min_L); + DeleteArray(&p2, min_L); + } + valid_blocks.push_back({k, block_rmsd}); + precalc_local_hinge[k] = base_hinge; // Assign base hinges to valid blocks + } + } + + // Distribute remaining hinges strictly prioritizing top RMSD blocks + int assigned = valid_blocks.size() * base_hinge; + int remainder = target_total_hinges - assigned; + + if (remainder > 0 && !valid_blocks.empty()) + { + // Sort valid blocks by RMSD descending + std::sort(valid_blocks.begin(), valid_blocks.end(), [](const BlockMeta &a, const BlockMeta &b) + { return a.rmsd > b.rmsd; }); + + int v_idx = 0; + while (remainder > 0) + { + precalc_local_hinge[valid_blocks[v_idx].index]++; // Give +1 to the front runners + remainder--; + v_idx = (v_idx + 1) % valid_blocks.size(); + } + } + } + + // Step 5: Iteratively align each block + std::string cur_global_seqM = "", cur_global_seqxA = "", cur_global_seqyA = ""; + cur_global_seqM.reserve(xlen + ylen + max_gap); + cur_global_seqxA.reserve(xlen + ylen + max_gap); + cur_global_seqyA.reserve(xlen + ylen + max_gap); + + std::vector> cur_tu_vec; + std::vector cur_global_res_tu(xlen, -1); + + for (size_t k = 0; k < bounds1.size() - 1; k++) + { + int x_s = bounds1[k], x_e = bounds1[k + 1]; + int y_s = bounds2[k], y_e = bounds2[k + 1]; + int L1_sub = x_e - x_s; + int L2_sub = y_e - y_s; + + if (L1_sub < 3 || L2_sub < 3) + { + for (int i = 0; i < L1_sub; i++) + { + cur_global_seqxA += seqx[x_s + i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; + } + for (int i = 0; i < L2_sub; i++) + { + cur_global_seqxA += '-'; + cur_global_seqyA += seqy[y_s + i]; + cur_global_seqM += ' '; + } + continue; + } + + double **xa_sub, **ya_sub; + NewArray(&xa_sub, L1_sub, 3); + NewArray(&ya_sub, L2_sub, 3); + char *seqx_sub = new char[L1_sub + 1]; + char *seqy_sub = new char[L2_sub + 1]; + char *secx_sub = new char[L1_sub + 1]; + char *secy_sub = new char[L2_sub + 1]; + + for (int i = 0; i < L1_sub; i++) + { + xa_sub[i][0] = xa[x_s + i][0]; + xa_sub[i][1] = xa[x_s + i][1]; + xa_sub[i][2] = xa[x_s + i][2]; + seqx_sub[i] = seqx[x_s + i]; + secx_sub[i] = secx[x_s + i]; + } + seqx_sub[L1_sub] = '\0'; + secx_sub[L1_sub] = '\0'; + + for (int i = 0; i < L2_sub; i++) + { + ya_sub[i][0] = ya[y_s + i][0]; + ya_sub[i][1] = ya[y_s + i][1]; + ya_sub[i][2] = ya[y_s + i][2]; + seqy_sub[i] = seqy[y_s + i]; + secy_sub[i] = secy[y_s + i]; + } + seqy_sub[L2_sub] = '\0'; + secy_sub[L2_sub] = '\0'; + + double t0_best[3], u0_best[3][3]; + double TM_best_max = -1.0; + std::string seqM_best, seqxA_best, seqyA_best; + std::vector> tu_vec_best; + + bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; + + // Determine local_hinge_opt based on user requirements. + // If hinge_set is true, we use the precalculated distributed hinges. + // Otherwise, set to 0 if the block length is less than 2 * fragLen, else 2. + int local_hinge_opt; + if (hinge_set) + { + local_hinge_opt = precalc_local_hinge[k]; + } + else + { + local_hinge_opt = (std::min(L1_sub, L2_sub) < 2 * fragLen) ? 0 : 2; + } + + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + { + FlexAlignResult cur_res; + execute_flexalign_with_fallback( + xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, + L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_type, local_hinge_opt, cur_ss_opt, cur_res); + + double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; + if (cur_max_TM > TM_best_max) + { + TM_best_max = cur_max_TM; + for (int a = 0; a < 3; a++) + { + t0_best[a] = cur_res.t0[a]; + for (int b = 0; b < 3; b++) + u0_best[a][b] = cur_res.u0[a][b]; + } + seqM_best = cur_res.seqM; + seqxA_best = cur_res.seqxA; + seqyA_best = cur_res.seqyA; + tu_vec_best = cur_res.tu_vec; + } + } + + if (TM_best_max <= 0) + { + for (int i = 0; i < L1_sub; i++) + { + cur_global_seqxA += seqx_sub[i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; + } + for (int i = 0; i < L2_sub; i++) + { + cur_global_seqxA += '-'; + cur_global_seqyA += seqy_sub[i]; + cur_global_seqM += ' '; + } + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; + continue; + } + + if (tu_vec_best.empty()) + { + std::vector tu_tmp(12); + t_u2tu(t0_best, u0_best, tu_tmp); + tu_vec_best.push_back(tu_tmp); + } + + int base_tu_idx = cur_tu_vec.size(); + for (size_t m = 0; m < tu_vec_best.size(); m++) + cur_tu_vec.push_back(tu_vec_best[m]); + + int rx = x_s; + int current_global_idx = base_tu_idx; + + for (size_t i = 0; i < seqxA_best.length(); i++) + { + char c = seqM_best[i]; + + if (c != ' ' && c != '.' && c != ':') + { + int local_hinge_idx = -1; + if (c >= '0' && c <= '9') + local_hinge_idx = c - '0'; + else if (c >= 'a' && c <= 'z') + local_hinge_idx = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + local_hinge_idx = c - 'A' + 36; + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) + current_global_idx = base_tu_idx + local_hinge_idx; + } + + if (seqxA_best[i] != '-') + { + cur_global_res_tu[rx] = current_global_idx; + rx++; + } + + if (seqxA_best[i] != '-' && seqyA_best[i] != '-') + { + if (c != ' ' && c != '.' && c != ':') + { + char global_c; + if (current_global_idx < 10) + global_c = '0' + current_global_idx; + else if (current_global_idx < 36) + global_c = 'a' + (current_global_idx - 10); + else if (current_global_idx < 62) + global_c = 'A' + (current_global_idx - 36); + else + global_c = '*'; + seqM_best[i] = global_c; + } + else + { + seqM_best[i] = c; + } + } + else + { + seqM_best[i] = ' '; + } + } + + cur_global_seqM += seqM_best; + cur_global_seqxA += seqxA_best; + cur_global_seqyA += seqyA_best; + + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; + } + + // Step 6: Recalculate global metrics correctly for current DP boundary + // Variables to receive dummy outputs from parameter_set4final + double dummy_D0_MIN, dummy_Lnorm, dummy_d0_search; + double cur_d0A, cur_d0B, cur_d0a, cur_d0u = 0.0; + + // Calculate d0 using parameter_set4final to correctly handle both proteins and RNA/DNA, + // and to prevent std::pow domain errors (NaN) when sequence length <= 15. + parameter_set4final(ylen, dummy_D0_MIN, dummy_Lnorm, cur_d0A, dummy_d0_search, mol_type); + parameter_set4final(xlen, dummy_D0_MIN, dummy_Lnorm, cur_d0B, dummy_d0_search, mol_type); + parameter_set4final((xlen + ylen) * 0.5, dummy_D0_MIN, dummy_Lnorm, cur_d0a, dummy_d0_search, mol_type); + + if (u_opt) + { + parameter_set4final(Lnorm_ass, dummy_D0_MIN, dummy_Lnorm, cur_d0u, dummy_d0_search, mol_type); + } + + double cur_TM1 = 0.0, cur_TM2 = 0.0, cur_TM3 = 0.0, cur_TM4 = 0.0, cur_TM5 = 0.0; + double cur_rmsd0 = 0.0, cur_Liden = 0.0; + int cur_n_ali8 = 0, cur_n_ali = 0; + std::vector cur_do_vec; + + int i_res = 0, j_res = 0; + for (size_t r = 0; r < cur_global_seqxA.length(); r++) + { + bool x_valid = (cur_global_seqxA[r] != '-'); + bool y_valid = (cur_global_seqyA[r] != '-'); + + if (x_valid && y_valid) + { + int matrix_idx = cur_global_res_tu[i_res]; + if (matrix_idx >= 0 && matrix_idx < cur_tu_vec.size()) + { + double t_k[3], u_k[3][3]; + tu2t_u(cur_tu_vec[matrix_idx], t_k, u_k); + + double x_rot[3]; + transform(t_k, u_k, xa[i_res], x_rot); + double dist2 = dist(x_rot, ya[j_res]); + double d = std::sqrt(dist2); + + cur_TM2 += 1.0 / (1.0 + dist2 / (cur_d0B * cur_d0B)); + cur_TM1 += 1.0 / (1.0 + dist2 / (cur_d0A * cur_d0A)); + if (a_opt) + cur_TM3 += 1.0 / (1.0 + dist2 / (cur_d0a * cur_d0a)); + if (u_opt) + cur_TM4 += 1.0 / (1.0 + dist2 / (cur_d0u * cur_d0u)); + if (d_opt) + cur_TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); + + cur_n_ali++; + cur_do_vec.push_back(d); + + if (d <= d0_out) + { + cur_rmsd0 += dist2; + cur_n_ali8++; + if (seqx[i_res] == seqy[j_res]) + cur_Liden += 1.0; + } + } + else + { + cur_do_vec.push_back(-1); + } + } + else + { + cur_do_vec.push_back(-1); + } + + if (x_valid) + i_res++; + if (y_valid) + j_res++; + } + + // Normalize TM-scores + cur_TM2 /= xlen; + cur_TM1 /= ylen; + if (a_opt) + cur_TM3 /= (xlen + ylen) * 0.5; + if (u_opt) + cur_TM4 /= Lnorm_ass; + if (d_opt) + cur_TM5 /= ylen; + if (cur_n_ali8 > 0) + cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); + else + cur_rmsd0 = 0.0; + + // Compare against the flexalign_greedy defender! + double cur_global_max_TM = (cur_TM1 > cur_TM2) ? cur_TM1 : cur_TM2; + + if (cur_global_max_TM > best_global_max_TM) + { + // <--- ADD DEBUG HERE + // if (b_idx == 1) + // { + // std::cout << "[DEBUG] strict" << std::endl; + // } + + best_global_max_TM = cur_global_max_TM; + best_tu_vec = cur_tu_vec; + best_TM1 = cur_TM1; + best_TM2 = cur_TM2; + best_TM3 = cur_TM3; + best_TM4 = cur_TM4; + best_TM5 = cur_TM5; + best_rmsd0 = cur_rmsd0; + best_Liden = cur_Liden; + best_TM_ali = cur_TM1; + best_rmsd_ali = cur_rmsd0; + best_L_ali = cur_n_ali; + best_n_ali = cur_n_ali; + best_n_ali8 = cur_n_ali8; + best_seqM = cur_global_seqM; + best_seqxA = cur_global_seqxA; + best_seqyA = cur_global_seqyA; + best_do_vec = cur_do_vec; + best_d0A = cur_d0A; + best_d0B = cur_d0B; + best_d0a = cur_d0a; + best_d0u = cur_d0u; + + if (!best_tu_vec.empty()) + { + tu2t_u(best_tu_vec[0], best_t0, best_u0); + } + } + } + + // Safety check + if (best_global_max_TM < 0) + return 0; + + // Output best values back to the reference parameters + TM1 = best_TM1; + TM2 = best_TM2; + TM3 = best_TM3; + TM4 = best_TM4; + TM5 = best_TM5; + rmsd0 = best_rmsd0; + Liden = best_Liden; + TM_ali = best_TM_ali; + rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; + n_ali = best_n_ali; + n_ali8 = best_n_ali8; + seqM = best_seqM; + seqxA = best_seqxA; + seqyA = best_seqyA; + do_vec = best_do_vec; + tu_vec = best_tu_vec; + d0A = best_d0A; + d0B = best_d0B; + d0a = best_d0a; + d0u = best_d0u; + + for (int a = 0; a < 3; a++) + { + t0[a] = best_t0[a]; + for (int b = 0; b < 3; b++) + u0[a][b] = best_u0[a][b]; + } + + return tu_vec.size(); } #endif diff --git a/param_set.h b/param_set.h index 9300404..1cc4807 100644 --- a/param_set.h +++ b/param_set.h @@ -7,71 +7,89 @@ #include "basic_fun.h" void parameter_set4search(const int xlen, const int ylen, - double &D0_MIN, double &Lnorm, - double &score_d8, double &d0, double &d0_search, double &dcu0) + double &D0_MIN, double &Lnorm, + double &score_d8, double &d0, double &d0_search, double &dcu0) { - //parameter initialization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8 - D0_MIN=0.5; - dcu0=4.25; //update 3.85-->4.25 - - Lnorm=getmin(xlen, ylen); //normalize TMscore by this in searching - if (Lnorm<=19) //update 15-->19 - d0=0.168; //update 0.5-->0.168 - else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8); - D0_MIN=d0+0.8; //this should be moved to above - d0=D0_MIN; //update: best for search + // parameter initialization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8 + D0_MIN = 0.5; + dcu0 = 4.25; // update 3.85-->4.25 - d0_search=d0; - if (d0_search>8) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + Lnorm = getmin(xlen, ylen); // normalize TMscore by this in searching + if (Lnorm <= 19) // update 15-->19 + d0 = 0.168; // update 0.5-->0.168 + else + d0 = (1.24 * pow((Lnorm * 1.0 - 15), 1.0 / 3) - 1.8); + D0_MIN = d0 + 0.8; // this should be moved to above + d0 = D0_MIN; // update: best for search - score_d8=1.5*pow(Lnorm*1.0, 0.3)+3.5; //remove pairs with dis>d8 during search & final + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; + + score_d8 = 1.5 * pow(Lnorm * 1.0, 0.3) + 3.5; // remove pairs with dis>d8 during search & final } void parameter_set4final_C3prime(const double len, double &D0_MIN, - double &Lnorm, double &d0, double &d0_search) + double &Lnorm, double &d0, double &d0_search) { - D0_MIN=0.3; - - Lnorm=len; //normalize TMscore by this in searching - if(Lnorm<=11) d0=0.3; - else if(Lnorm>11&&Lnorm<=15) d0=0.4; - else if(Lnorm>15&&Lnorm<=19) d0=0.5; - else if(Lnorm>19&&Lnorm<=23) d0=0.6; - else if(Lnorm>23&&Lnorm<30) d0=0.7; - else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5); + D0_MIN = 0.3; + + Lnorm = len; // normalize TMscore by this in searching + if (Lnorm <= 11) + d0 = 0.3; + else if (Lnorm > 11 && Lnorm <= 15) + d0 = 0.4; + else if (Lnorm > 15 && Lnorm <= 19) + d0 = 0.5; + else if (Lnorm > 19 && Lnorm <= 23) + d0 = 0.6; + else if (Lnorm > 23 && Lnorm < 30) + d0 = 0.7; + else + d0 = (0.6 * pow((Lnorm * 1.0 - 0.5), 1.0 / 2) - 2.5); - d0_search=d0; - if (d0_search>8) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; } void parameter_set4final(const double len, double &D0_MIN, double &Lnorm, - double &d0, double &d0_search, const int mol_type) + double &d0, double &d0_search, const int mol_type) { - if (mol_type>0) // RNA + if (mol_type > 0) // RNA { parameter_set4final_C3prime(len, D0_MIN, Lnorm, - d0, d0_search); + d0, d0_search); return; } - D0_MIN=0.5; - - Lnorm=len; //normalize TMscore by this in searching - if (Lnorm<=21) d0=0.5; - else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8); - if (d08) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + D0_MIN = 0.5; + + Lnorm = len; // normalize TMscore by this in searching + if (Lnorm <= 21) + d0 = 0.5; + else + d0 = (1.24 * pow((Lnorm * 1.0 - 15), 1.0 / 3) - 1.8); + if (d0 < D0_MIN) + d0 = D0_MIN; + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; } void parameter_set4scale(const int len, const double d_s, double &Lnorm, - double &d0, double &d0_search) + double &d0, double &d0_search) { - d0=d_s; - Lnorm=len; //normalize TMscore by this in searching - d0_search=d0; - if (d0_search>8) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + d0 = d_s; + Lnorm = len; // normalize TMscore by this in searching + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; } diff --git a/qTMclust.cpp b/qTMclust.cpp index 1bd3e2d..ef7368a 100644 --- a/qTMclust.cpp +++ b/qTMclust.cpp @@ -10,133 +10,136 @@ using namespace std; void print_extra_help() { - cout << -"Additional options:\n" -" -fast Fast but slightly inaccurate final alignment\n" -"\n" -" -atom 4-character atom name used to represent a residue.\n" -" Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n" -" (note the spaces before and after CA).\n" -"\n" -" -mol Molecule type: RNA or protein\n" -" Default is detect molecule type automatically\n" -"\n" -" -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n" -" 0: (default) only align 'ATOM ' residues\n" -" 1: align both 'ATOM ' and 'HETATM' residues\n" -"\n" -" -infmt Input format\n" -" -1: (default) automatically detect PDB or PDBx/mmCIF format\n" -" 0: PDB format\n" -" 1: SPICKER format\n" -" 2: xyz format\n" -" 3: PDBx/mmCIF format\n" -" -chain Chains to parse in structure_2. Use _ for a chain without chain ID.\n" -" Multiple chains can be separated by commas, e.g.,\n" -" USalign -chain1 C,D,E,F 5jdo.pdb -chain2 A,B,C,D 3wtg.pdb -ter 0\n" -"\n" - <0) // RNA + if (mol_type > 0) // RNA { - lb_HwRMSD=0.02*TMcut; - lb_TMfast=0.60*TMcut; + lb_HwRMSD = 0.02 * TMcut; + lb_TMfast = 0.60 * TMcut; } else // protein { - lb_HwRMSD=0.25*TMcut; - lb_TMfast=0.80*TMcut; + lb_HwRMSD = 0.25 * TMcut; + lb_TMfast = 0.80 * TMcut; } } return; } -void read_init_cluster(const string&filename, - map > &init_cluster) +void read_init_cluster(const string &filename, + map> &init_cluster) { ifstream fin; string line; vector line_vec; map tmp_map; - size_t i,j; + size_t i, j; fin.open(filename.c_str()); while (fin.good()) { - getline(fin,line); - split(line,line_vec,'\t'); - for (i=0;i ().swap(tmp_map); - } - for (i=0;i().swap(tmp_map); + } + for (i = 0; i < line_vec.size(); i++) + line_vec[i].clear(); + line_vec.clear(); } fin.close(); vector().swap(line_vec); @@ -144,8 +147,8 @@ void read_init_cluster(const string&filename, int main(int argc, char *argv[]) { - if (argc < 2) print_help(); - + if (argc < 2) + print_help(); clock_t t1, t2; t1 = clock(); @@ -153,67 +156,75 @@ int main(int argc, char *argv[]) /**********************/ /* get argument */ /**********************/ - string xname = ""; - double TMcut = 0.5; + string xname = ""; + double TMcut = 0.5; string fname_clust = ""; // file name for output cluster result - string fname_init = ""; - string fname_lign = ""; // file name for user alignment + string fname_init = ""; + string fname_lign = ""; // file name for user alignment vector sequence; // get value from alignment file double Lnorm_ass, d0_scale; bool h_opt = false; // print full help message - int i_opt = 0; // 3 for -I, stick to user given alignment - int a_opt = 0; // flag for -a, do not normalized by average length - int s_opt = 2; // flag for -s, normalized by longer length + int i_opt = 0; // 3 for -I, stick to user given alignment + int a_opt = 0; // flag for -a, do not normalized by average length + int s_opt = 2; // flag for -s, normalized by longer length bool u_opt = false; // flag for -u, normalized by user specified length bool d_opt = false; // flag for -d, user specified d0 - int infmt_opt =-1; // PDB or PDBx/mmCIF format - int ter_opt =3; // TER, END, or different chainID - int split_opt =0; // do not split chain - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int het_opt =0; // do not read HETATM residues - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir_opt =""; // set -dir to empty - int byresi_opt=0; // set -byresi to 0 + int infmt_opt = -1; // PDB or PDBx/mmCIF format + int ter_opt = 3; // TER, END, or different chainID + int split_opt = 0; // do not split chain + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int het_opt = 0; // do not read HETATM residues + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir_opt = ""; // set -dir to empty + int byresi_opt = 0; // set -byresi to 0 vector chain_list; vector chain2parse; vector model2parse; - map > init_cluster; + map> init_cluster; - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( (!strcmp(argv[i],"-u")||!strcmp(argv[i],"-L")) && i < (argc-1) ) + if ((!strcmp(argv[i], "-u") || !strcmp(argv[i], "-L")) && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -u has not been implemented yet"); - Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++; + Lnorm_ass = atof(argv[i + 1]); + u_opt = true; + i++; } - else if ( !strcmp(argv[i],"-d") && i < (argc-1) ) + else if (!strcmp(argv[i], "-d") && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -d has not been implemented yet"); - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if (!strcmp(argv[i], "-I") && i < (argc-1) ) + else if (!strcmp(argv[i], "-I") && i < (argc - 1)) { - fname_lign = argv[i + 1]; i_opt = 3; i++; + fname_lign = argv[i + 1]; + i_opt = 3; + i++; } - else if ( !strcmp(argv[i],"-o") && i < (argc-1) ) + else if (!strcmp(argv[i], "-o") && i < (argc - 1)) { - fname_clust = argv[i + 1]; i++; + fname_clust = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-a") && i < (argc-1)) + else if (!strcmp(argv[i], "-a") && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -a is not used for clustering"); } - else if ( !strcmp(argv[i],"-s") && i < (argc-1) ) + else if (!strcmp(argv[i], "-s") && i < (argc - 1)) { - s_opt=atoi(argv[i + 1]); i++; - if (s_opt<1 || s_opt>6) + s_opt = atoi(argv[i + 1]); + i++; + if (s_opt < 1 || s_opt > 6) PrintErrorAndQuit("-s must be within 1 to 6"); } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } @@ -221,136 +232,157 @@ int main(int argc, char *argv[]) { fast_opt = true; } - else if ( !strcmp(argv[i],"-infmt") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt") && i < (argc - 1)) { - infmt_opt=atoi(argv[i + 1]); i++; + infmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-ter") && i < (argc-1) ) + else if (!strcmp(argv[i], "-ter") && i < (argc - 1)) { - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") && i < (argc-1) ) + else if (!strcmp(argv[i], "-split") && i < (argc - 1)) { - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") && i < (argc-1) ) + else if (!strcmp(argv[i], "-atom") && i < (argc - 1)) { - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mol") && i < (argc - 1)) { - mol_opt=argv[i + 1]; i++; + mol_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir") && i < (argc - 1)) { - dir_opt=argv[i + 1]; i++; + dir_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) ) + else if (!strcmp(argv[i], "-suffix") && i < (argc - 1)) { - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) ) + else if (!strcmp(argv[i], "-TMcut") && i < (argc - 1)) { - TMcut=atof(argv[i + 1]); i++; - if (TMcut>1 or TMcut<0.45) + TMcut = atof(argv[i + 1]); + i++; + if (TMcut > 1 or TMcut < 0.45) PrintErrorAndQuit("TMcut must be in the range of [0.45,1)"); } - else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) ) + else if (!strcmp(argv[i], "-byresi") && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -byresi has not been implemented yet"); - byresi_opt=atoi(argv[i + 1]); i++; + byresi_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") && i < (argc-1) ) + else if (!strcmp(argv[i], "-het") && i < (argc - 1)) { - het_opt=atoi(argv[i + 1]); i++; + het_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-init") && i < (argc-1) ) + else if (!strcmp(argv[i], "-init") && i < (argc - 1)) { - read_init_cluster(argv[i+1],init_cluster); i++; + read_init_cluster(argv[i + 1], init_cluster); + i++; } - else if (!strcmp(argv[i], "-chain") ) + else if (!strcmp(argv[i], "-chain")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain"); - split(argv[i+1],chain2parse,','); + split(argv[i + 1], chain2parse, ','); i++; } - else if (!strcmp(argv[i], "-model") ) + else if (!strcmp(argv[i], "-model")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model"); - split(argv[i+1],model2parse,','); + split(argv[i + 1], model2parse, ','); i++; } - else if (xname.size() == 0) xname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (xname.size() == 0) + xname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if(xname.size()==0) print_help(h_opt); + if (xname.size() == 0) + print_help(h_opt); - if (suffix_opt.size() && dir_opt.size()==0) + if (suffix_opt.size() && dir_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set"); - if (atom_opt.size()!=4) + if (atom_opt.size() != 4) PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space."); - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein."); - else if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; + else if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; - if (u_opt && Lnorm_ass<=0) + if (u_opt && Lnorm_ass <= 0) PrintErrorAndQuit("Wrong value for option -u! It should be >0"); - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (split_opt==1 && ter_opt!=0) + if (split_opt == 1 && ter_opt != 0) PrintErrorAndQuit("-split 1 should be used with -ter 0"); - else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) + else if (split_opt == 2 && ter_opt != 0 && ter_opt != 1) PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); - if (split_opt<0 || split_opt>2) + if (split_opt < 0 || split_opt > 2) PrintErrorAndQuit("-split can only be 0, 1 or 2"); /* read initial alignment file from 'align.txt' */ - if (i_opt) read_user_alignment(sequence, fname_lign, i_opt); + if (i_opt) + read_user_alignment(sequence, fname_lign, i_opt); - if (byresi_opt) i_opt=3; + if (byresi_opt) + i_opt = 3; /* parse file list */ - if (dir_opt.size()==0) chain_list.push_back(xname); - else file2chainlist(chain_list, xname, dir_opt, suffix_opt); + if (dir_opt.size() == 0) + chain_list.push_back(xname); + else + file2chainlist(chain_list, xname, dir_opt, suffix_opt); /* declare previously global variables */ - vector >PDB_lines; // text of chain - vector mol_vec; // molecule type of chain1, RNA if >0 - vector chainID_list; // list of chainID - size_t xchainnum=0; // number of chains in a PDB file - size_t i,j; // number of residues/chains in a PDB is - // usually quite limited. Yet, the number of - // files can be very large. size_t is safer - // than int for very long list of files - int xlen,ylen; // chain length - double **xa,**ya; // xyz coordinate - vector resi_vec; // residue index for chain, dummy variable - vector >chainLen_list; // vector of (length,index) pair - vector > seq_vec; - vector > sec_vec; - vector > >xyz_vec; + vector> PDB_lines; // text of chain + vector mol_vec; // molecule type of chain1, RNA if >0 + vector chainID_list; // list of chainID + size_t xchainnum = 0; // number of chains in a PDB file + size_t i, j; // number of residues/chains in a PDB is + // usually quite limited. Yet, the number of + // files can be very large. size_t is safer + // than int for very long list of files + int xlen, ylen; // chain length + double **xa, **ya; // xyz coordinate + vector resi_vec; // residue index for chain, dummy variable + vector> chainLen_list; // vector of (length,index) pair + vector> seq_vec; + vector> sec_vec; + vector>> xyz_vec; /* parse files */ string chain_name; - vector seq_tmp; - vector sec_tmp; - vector flt_tmp(3,0); - vector >xyz_tmp; + vector seq_tmp; + vector sec_tmp; + vector flt_tmp(3, 0); + vector> xyz_tmp; int r; // residue index size_t newchainnum; - double ub_HwRMSD=0.90*TMcut+0.10; - double lb_HwRMSD=0.5*TMcut; - double ub_TMfast=0.90*TMcut+0.10; - double lb_TMfast=0.9*TMcut; - if (s_opt==2 || s_opt==4 || s_opt==5) a_opt=-2; // normalized by longer length, i.e. smaller TM - else if (s_opt==1 || s_opt==5) a_opt=-1; // normalized by shorter length, i.e. larger TM - else if (s_opt==3) a_opt= 1; // normalized by average length + double ub_HwRMSD = 0.90 * TMcut + 0.10; + double lb_HwRMSD = 0.5 * TMcut; + double ub_TMfast = 0.90 * TMcut + 0.10; + double lb_TMfast = 0.9 * TMcut; + if (s_opt == 2 || s_opt == 4 || s_opt == 5) + a_opt = -2; // normalized by longer length, i.e. smaller TM + else if (s_opt == 1 || s_opt == 5) + a_opt = -1; // normalized by shorter length, i.e. larger TM + else if (s_opt == 3) + a_opt = 1; // normalized by average length #ifdef TMalign_HwRMSD_h /* These parameters controls HwRMSD filter. iter_opt typically should be @@ -361,50 +393,54 @@ int main(int argc, char *argv[]) * After HwRMSD filter, at least min_repr_num and at most max_repr_num * are used for subsequent TMalign. The actual number of representatives * are decided by xlen */ - const int glocal =0; // global alignment - const int iter_opt =10; - const int min_repr_num=10; - const int max_repr_num=50; + const int glocal = 0; // global alignment + const int iter_opt = 10; + const int min_repr_num = 10; + const int max_repr_num = 50; #endif - for (i=0;i >().swap(PDB_lines); - size_t Nstruct=chainLen_list.size(); + vector>().swap(PDB_lines); + size_t Nstruct = chainLen_list.size(); /* sort by chain length */ - stable_sort(chainLen_list.begin(),chainLen_list.end(), - greater >()); - cout<<"Clustering "<="<>()); + cout << "Clustering " << chainLen_list.size() + << " chains with TM-score cutoff >=" << TMcut << '\n' + << "Longest chain " << chainID_list[chainLen_list[0].second] << '\t' + << chainLen_list[0].first << " residues.\n" + << "Shortest chain " << chainID_list[chainLen_list.back().second] << '\t' + << chainLen_list.back().first << " residues." << endl; /* set the first cluster */ - vector clust_mem_vec(Nstruct,-1); // cluster membership - vector clust_repr_vec; // the same as number of clusters - size_t chain_i=chainLen_list[0].second; + vector clust_mem_vec(Nstruct, -1); // cluster membership + vector clust_repr_vec; // the same as number of clusters + size_t chain_i = chainLen_list[0].second; clust_repr_vec.push_back(chain_i); - clust_mem_vec[chain_i]=0; - map clust_repr_map; + clust_mem_vec[chain_i] = 0; + map clust_repr_map; /* perform alignment */ size_t chain_j; - const double fast_lb=50.; // proteins shorter than fast_lb never use -fast - const double fast_ub=1000.;// proteins longer than fast_ub always use -fast - double Lave; // average protein length for chain_i and chain_j - size_t sizePROT; // number of representatives for current chain - vector index_vec; // index of cluster representatives for the chain - bool found_clust; // whether current chain hit previous cluster - - for (i=1;i index_vec; // index of cluster representatives for the chain + bool found_clust; // whether current chain hit previous cluster + + for (i = 1; i < Nstruct; i++) { - chain_i=chainLen_list[i].second; - xlen=xyz_vec[chain_i].size(); - if (xlen<=5) // TMalign cannot handle L<=5 + chain_i = chainLen_list[i].second; + xlen = xyz_vec[chain_i].size(); + if (xlen <= 5) // TMalign cannot handle L<=5 { - clust_mem_vec[chain_i]=clust_repr_vec.size(); + clust_mem_vec[chain_i] = clust_repr_vec.size(); clust_repr_vec.push_back(clust_repr_vec.size()); continue; } NewArray(&xa, xlen, 3); - for (r=0;r0;j--) - { - chain_j=clust_repr_vec[j-1]; - ylen=xyz_vec[chain_j].size(); - if (mol_vec[chain_i]*mol_vec[chain_j]<0) continue; - else if (s_opt==2 && xlen 0; j--) + { + chain_j = clust_repr_vec[j - 1]; + ylen = xyz_vec[chain_j].size(); + if (mol_vec[chain_i] * mol_vec[chain_j] < 0) + continue; + else if (s_opt == 2 && xlen < TMcut * ylen) + continue; + else if (s_opt == 3 && xlen < (2 * TMcut - 1) * ylen) + continue; + else if (s_opt == 4 && xlen * (2 / TMcut - 1) < ylen) + continue; + else if (s_opt == 5 && xlen < TMcut * TMcut * ylen) + continue; + else if (s_opt == 6 && xlen * xlen < (2 * TMcut * TMcut - 1) * ylen * ylen) + continue; index_vec.push_back(chain_j); } - sizePROT=index_vec.size(); + sizePROT = index_vec.size(); - string key=chainID_list[chain_i]; - cout<<'>'<' << chainID_list[chain_i] << '\t' << xlen << '\t' + << setiosflags(ios::fixed) << setprecision(2) + << 100. * i / Nstruct << "%(#" << i << ")\t" + << "#repr=" << sizePROT << "/" << clust_repr_vec.size() << endl; #ifdef TMalign_HwRMSD_h - vector > HwRMSDscore_list; + vector> HwRMSDscore_list; double TM; - size_t init_count=0; - for (j=0;j=2 && - HwRMSDscore_list.size()>=init_cluster[key].size() && !init_cluster[key].count(value)) + chain_j = index_vec[j]; + string value = chainID_list[chain_j]; + if (init_cluster.count(key) && init_count >= 2 && + HwRMSDscore_list.size() >= init_cluster[key].size() && !init_cluster[key].count(value)) + continue; + ylen = xyz_vec[chain_j].size(); + if (mol_vec[chain_i] * mol_vec[chain_j] < 0) + continue; + else if (s_opt == 2 && xlen < TMcut * ylen) + continue; + else if (s_opt == 3 && xlen < (2 * TMcut - 1) * ylen) + continue; + else if (s_opt == 4 && xlen * (2 / TMcut - 1) < ylen) continue; - ylen=xyz_vec[chain_j].size(); - if (mol_vec[chain_i]*mol_vec[chain_j]<0) continue; - else if (s_opt==2 && xlen "< "<=lb_HwRMSD || Lave<=fast_lb) + TM = TM3; // average length + if (s_opt == 1) + TM = TM2; // shorter length + else if (s_opt == 2) + TM = TM1; // longer length + else if (s_opt == 3) + TM = (TM1 + TM2) / 2; // average TM + else if (s_opt == 4) + TM = 2 / (1 / TM1 + 1 / TM2); // harmonic average + else if (s_opt == 5) + TM = sqrt(TM1 * TM2); // geometric average + else if (s_opt == 6) + TM = sqrt((TM1 * TM1 + TM2 * TM2) / 2); // root mean square + + Lave = sqrt(xlen * ylen); // geometry average because O(L1*L2) + if (TM >= lb_HwRMSD || Lave <= fast_lb) { if (init_cluster.count(key) && init_cluster[key].count(value)) { - HwRMSDscore_list.push_back(make_pair(TM+1,index_vec[j])); + HwRMSDscore_list.push_back(make_pair(TM + 1, index_vec[j])); init_count++; - if (init_count==init_cluster[key].size()) break; + if (init_count == init_cluster[key].size()) + break; } else - HwRMSDscore_list.push_back(make_pair(TM,index_vec[j])); + HwRMSDscore_list.push_back(make_pair(TM, index_vec[j])); } /* clean up after each HwRMSD */ @@ -588,82 +644,93 @@ int main(int argc, char *argv[]) seqxA.clear(); seqyA.clear(); DeleteArray(&ya, ylen); - delete [] invmap; + delete[] invmap; /* if a good hit is guaranteed to be found, stop the loop */ - if (TM>=ub_HwRMSD) break; + if (TM >= ub_HwRMSD) + break; } - stable_sort(HwRMSDscore_list.begin(),HwRMSDscore_list.end(), - greater >()); + stable_sort(HwRMSDscore_list.begin(), HwRMSDscore_list.end(), + greater>()); - int cur_repr_num_cutoff=min_repr_num; - if (xlen<=fast_lb) cur_repr_num_cutoff=max_repr_num; - else if (xlen>fast_lb && xlen=2) cur_repr_num_cutoff=init_count; + int cur_repr_num_cutoff = min_repr_num; + if (xlen <= fast_lb) + cur_repr_num_cutoff = max_repr_num; + else if (xlen > fast_lb && xlen < fast_ub) + cur_repr_num_cutoff += + (fast_ub - xlen) / (fast_ub - fast_lb) * (max_repr_num - min_repr_num); + // if (init_count>=2) cur_repr_num_cutoff=init_count; index_vec.clear(); - for (j=0;jfast_lb && TM=cur_repr_num_cutoff) break; + for (j = 0; j < HwRMSDscore_list.size(); j++) + { + TM = HwRMSDscore_list[j].first; + chain_j = HwRMSDscore_list[j].second; + ylen = xyz_vec[chain_j].size(); + Lave = sqrt(xlen * ylen); // geometry average because O(L1*L2) + if (Lave > fast_lb && TM < TMcut * 0.5 && + index_vec.size() >= cur_repr_num_cutoff) + break; index_vec.push_back(chain_j); - cout<<"#"<=fast_ub); - + Lave = sqrt(xlen * ylen); // geometry average because O(L1*L2) + bool overwrite_fast_opt = (fast_opt == true || Lave >= fast_ub); + /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for s_opt, u_opt, d_opt + double TM3, TM4, TM5; // for s_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; - + /* entry function for structure alignment */ - int status=TMalign_main( + int status = TMalign_main( xa, ya, &seq_vec[chain_i][0], &seq_vec[chain_j][0], &sec_vec[chain_i][0], &sec_vec[chain_j][0], t0, u0, TM1, TM2, TM3, TM4, TM5, @@ -672,42 +739,48 @@ int main(int argc, char *argv[]) rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, overwrite_fast_opt, - mol_vec[chain_i]+mol_vec[chain_j],TMcut); + mol_vec[chain_i] + mol_vec[chain_j], TMcut, 0); - cout<=ub_TMfast || - (TM>=TMcut && (fast_opt || overwrite_fast_opt==false))) + if (TM >= ub_TMfast || + (TM >= TMcut && (fast_opt || overwrite_fast_opt == false))) { - clust_mem_vec[chain_i]=clust_repr_map[chain_j]; + clust_mem_vec[chain_i] = clust_repr_map[chain_j]; DeleteArray(&ya, ylen); - found_clust=true; + found_clust = true; break; } - if (TM=TMcut) + + TM = TM3; // average length + if (s_opt == 1) + TM = TM2; // shorter length + else if (s_opt == 2) + TM = TM1; // longer length + else if (s_opt == 3) + TM = (TM1 + TM2) / 2; // average TM + else if (s_opt == 4) + TM = 2 / (1 / TM1 + 1 / TM2); // harmonic average + else if (s_opt == 5) + TM = sqrt(TM1 * TM2); // geometric average + else if (s_opt == 6) + TM = sqrt((TM1 * TM1 + TM2 * TM2) / 2); // root mean square + cout << "*\t" << chainID_list[chain_j] << '\t' << TM2 << '\t' << TM1 << endl; + if (TM >= TMcut) { - clust_mem_vec[chain_i]=clust_repr_map[chain_j]; - found_clust=true; + clust_mem_vec[chain_i] = clust_repr_map[chain_j]; + found_clust = true; break; } } @@ -746,15 +825,15 @@ int main(int argc, char *argv[]) if (!found_clust) // new cluster { - clust_mem_vec[chain_i]=clust_repr_vec.size(); - clust_repr_map[chain_i]=clust_repr_vec.size(); + clust_mem_vec[chain_i] = clust_repr_vec.size(); + clust_repr_map[chain_i] = clust_repr_vec.size(); clust_repr_vec.push_back(chain_i); } else // member structures are not used further { - vector ().swap(seq_vec[chain_i]); - vector ().swap(sec_vec[chain_i]); - vector > ().swap(xyz_vec[chain_i]); + vector().swap(seq_vec[chain_i]); + vector().swap(sec_vec[chain_i]); + vector>().swap(xyz_vec[chain_i]); } } @@ -766,24 +845,25 @@ int main(int argc, char *argv[]) /* print out cluster */ stringstream txt; - for (j=0;j().swap(chain2parse); vector().swap(model2parse); - map >().swap(init_cluster); + map>().swap(init_cluster); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; }