From 655963e61621a60481fb8b6b1d031fce540fc34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Thu, 12 Mar 2026 06:34:51 +0800 Subject: [PATCH 01/23] add -mm 8/9 --- .gitignore | 2 + MMalign.cpp | 1051 ++++++----- MMalign.h | 4 +- TMalign.cpp | 816 ++++---- TMalign.h | 4716 ++++++++++++++++++++++++---------------------- USalign.cpp | 5126 ++++++++++++++++++++++++++++---------------------- flexalign.h | 2449 +++++++++++++----------- qTMclust.cpp | 944 +++++----- 8 files changed, 8200 insertions(+), 6908 deletions(-) diff --git a/.gitignore b/.gitignore index 32d1bd3..321adfa 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ .*.sw* # binary executables +addChainID cif2pdb TMalign TMalignc @@ -24,4 +25,5 @@ pdbAtomName *.zip .idea +.gitignore cmake-build-debug diff --git a/MMalign.cpp b/MMalign.cpp index 72f4b45..5d8e4b4 100644 --- a/MMalign.cpp +++ b/MMalign.cpp @@ -6,131 +6,130 @@ using namespace std; void print_version() { - cout << -"\n" -" **********************************************************************\n" -" * MM-align (Version 20231222): complex structure alignment *\n" -" * References: S Mukherjee, Y Zhang. Nucl Acids Res 37(11):e83 (2009) *\n" -" * Please email comments and suggestions to yangzhanglab@umich.edu *\n" -" **********************************************************************" - << endl; + cout << "\n" + " **********************************************************************\n" + " * MM-align (Version 20231222): complex structure alignment *\n" + " * References: S Mukherjee, Y Zhang. Nucl Acids Res 37(11):e83 (2009) *\n" + " * Please email comments and suggestions to yangzhanglab@umich.edu *\n" + " **********************************************************************" + << endl; } void print_extra_help() { - cout << -"Additional options:\n" -" -fast Fast but slightly inaccurate alignment\n" -"\n" -" -dir1 Use a list of PDB chains listed by 'chain1_list' under\n" -" 'chain1_folder' as all chains for the first complex.\n" -" Note that the slash is necessary.\n" -" $ MMalign -dir1 chain1_folder/ chain1_list complex2\n" -"\n" -" -dir2 Use a list of PDB chains listed by'chain2_list'\n" -" under 'chain2_folder' as all chains for the second complex.\n" -" $ MMalign complex1 -dir2 chain2_folder/ chain2_list\n" -"\n" -" -suffix (Only when -dir1 and/or -dir2 are set, default is empty)\n" -" add file name suffix to files listed by chain1_list or chain2_list\n" -"\n" -" -atom 4-character atom name used to represent a residue.\n" -" Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n" -" (note the spaces before and after CA).\n" -"\n" -" -mol Types of molecules to align\n""Molecule type: RNA or protein\n" -" auto : (default) align both proteins and nucleic acids\n" -" protein: only align proteins\n" -" RNA : only align nucleic acids (RNA and DNA)\n" -"\n" -" -split Whether to split PDB file into multiple chains\n" -" 2: (default) treat each chain as a seperate chain (-ter should be <=1)\n" -" 1: treat each MODEL as a separate chain (-ter should be 0)\n" -" and joins all chains in a MODEL into a single chain.\n" -"\n" -" -outfmt Output format\n" -" 0: (default) full output\n" -" 1: fasta format compact output\n" -" 2: tabular format very compact output\n" -" -1: full output, but without version or citation information\n" -"\n" -" -TMcut -1: (default) do not consider TMcut\n" -" Values in [0.5,1): Do not proceed with TM-align for this\n" -" structure pair if TM-score is unlikely to reach TMcut.\n" -" TMcut is normalized is set by -a option:\n" -" -2: normalized by longer structure length\n" -" -1: normalized by shorter structure length\n" -" 0: (default, same as F) normalized by second structure\n" -" 1: same as T, normalized by average structure length\n" -"\n" -" -mirror Whether to align the mirror image of input structure\n" -" 0: (default) do not align mirrored structure\n" -" 1: align mirror of chain1 to origin chain2\n" -"\n" -" -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n" -" 0: (default) only align 'ATOM ' residues\n" -" 1: align both 'ATOM ' and 'HETATM' residues\n" -"\n" -" -infmt1 Input format for complex1\n" -" -infmt2 Input format for complex2\n" -" -1: (default) automatically detect PDB or PDBx/mmCIF format\n" -" 0: PDB format\n" -" 1: SPICKER format\n" -" 2: xyz format\n" -" 3: PDBx/mmCIF format\n" - < sequence; // get value from alignment file - double d0_scale =0; + string xname = ""; + string yname = ""; + string fname_super = ""; // file name for superposed structure + string fname_lign = ""; // file name for user alignment + string fname_matrix = ""; // file name for output matrix + vector sequence; // get value from alignment file + double d0_scale = 0; bool h_opt = false; // print full help message bool v_opt = false; // print version bool m_opt = false; // flag for -m, output rotation matrix bool o_opt = false; // flag for -o, output superposed structure - int a_opt = 0; // flag for -a, do not normalized by average length + int a_opt = 0; // flag for -a, do not normalized by average length bool d_opt = false; // flag for -d, user specified d0 - bool full_opt = false;// do not show chain level alignment - double TMcut =-1; - int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1 - int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2 - int ter_opt =1; // ENDMDL or END - int split_opt =2; // split by chain - int outfmt_opt=0; // set -outfmt to full output - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int mirror_opt=0; // do not align mirror - int het_opt =0; // do not read HETATM residues - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir1_opt =""; // set -dir1 to empty - string dir2_opt =""; // set -dir2 to empty + bool full_opt = false; // do not show chain level alignment + double TMcut = -1; + int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 + int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 + int ter_opt = 1; // ENDMDL or END + int split_opt = 2; // split by chain + int outfmt_opt = 0; // set -outfmt to full output + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int mirror_opt = 0; // do not align mirror + int het_opt = 0; // do not read HETATM residues + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir1_opt = ""; // set -dir1 to empty + string dir2_opt = ""; // set -dir2 to empty vector chain1_list; // only when -dir1 is set vector chain2_list; // only when -dir2 is set vector chain2parse1; @@ -175,266 +174,299 @@ int main(int argc, char *argv[]) vector model2parse1; vector model2parse2; - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( !strcmp(argv[i],"-o") && i < (argc-1) ) + if (!strcmp(argv[i], "-o") && i < (argc - 1)) { - fname_super = argv[i + 1]; o_opt = true; i++; + fname_super = argv[i + 1]; + o_opt = true; + i++; } - else if ( !strcmp(argv[i],"-a") && i < (argc-1) ) + else if (!strcmp(argv[i], "-a") && i < (argc - 1)) { - if (!strcmp(argv[i + 1], "T")) a_opt=true; - else if (!strcmp(argv[i + 1], "F")) a_opt=false; - else + if (!strcmp(argv[i + 1], "T")) + a_opt = true; + else if (!strcmp(argv[i + 1], "F")) + a_opt = false; + else { - a_opt=atoi(argv[i + 1]); - if (a_opt!=-2 && a_opt!=-1 && a_opt!=1) + a_opt = atoi(argv[i + 1]); + if (a_opt != -2 && a_opt != -1 && a_opt != 1) PrintErrorAndQuit("-a must be -2, -1, 1, T or F"); } i++; } - else if ( !strcmp(argv[i],"-full") && i < (argc-1) ) + else if (!strcmp(argv[i], "-full") && i < (argc - 1)) { - if (!strcmp(argv[i + 1], "T")) full_opt=true; - else if (!strcmp(argv[i + 1], "F")) full_opt=false; - else PrintErrorAndQuit("-full must be T or F"); + if (!strcmp(argv[i + 1], "T")) + full_opt = true; + else if (!strcmp(argv[i + 1], "F")) + full_opt = false; + else + PrintErrorAndQuit("-full must be T or F"); i++; } - else if ( !strcmp(argv[i],"-d") && i < (argc-1) ) + else if (!strcmp(argv[i], "-d") && i < (argc - 1)) { - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if ( !strcmp(argv[i],"-v") ) + else if (!strcmp(argv[i], "-v")) { v_opt = true; } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } - else if (!strcmp(argv[i], "-chain1") ) + else if (!strcmp(argv[i], "-chain1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain1"); - split(argv[i+1],chain2parse1,','); + split(argv[i + 1], chain2parse1, ','); i++; } - else if (!strcmp(argv[i], "-chain2") ) + else if (!strcmp(argv[i], "-chain2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain2"); - split(argv[i+1],chain2parse2,','); + split(argv[i + 1], chain2parse2, ','); i++; } - else if (!strcmp(argv[i], "-model1") ) + else if (!strcmp(argv[i], "-model1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model1"); - split(argv[i+1],model2parse1,','); + split(argv[i + 1], model2parse1, ','); i++; } - else if (!strcmp(argv[i], "-model2") ) + else if (!strcmp(argv[i], "-model2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model2"); - split(argv[i+1],model2parse2,','); + split(argv[i + 1], model2parse2, ','); i++; } - else if (!strcmp(argv[i], "-m") && i < (argc-1) ) + else if (!strcmp(argv[i], "-m") && i < (argc - 1)) { - fname_matrix = argv[i + 1]; m_opt = true; i++; - }// get filename for rotation matrix + fname_matrix = argv[i + 1]; + m_opt = true; + i++; + } // get filename for rotation matrix else if (!strcmp(argv[i], "-fast")) { fast_opt = true; } - else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt1") && i < (argc - 1)) { - infmt1_opt=atoi(argv[i + 1]); i++; + infmt1_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt2") && i < (argc - 1)) { - infmt2_opt=atoi(argv[i + 1]); i++; + infmt2_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-ter") && i < (argc-1) ) + else if (!strcmp(argv[i], "-ter") && i < (argc - 1)) { - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") && i < (argc-1) ) + else if (!strcmp(argv[i], "-split") && i < (argc - 1)) { - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") && i < (argc-1) ) + else if (!strcmp(argv[i], "-atom") && i < (argc - 1)) { - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mol") && i < (argc - 1)) { - mol_opt=argv[i + 1]; i++; + mol_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir1") && i < (argc - 1)) { - dir1_opt=argv[i + 1]; i++; + dir1_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir2") && i < (argc - 1)) { - dir2_opt=argv[i + 1]; i++; + dir2_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) ) + else if (!strcmp(argv[i], "-suffix") && i < (argc - 1)) { - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) ) + else if (!strcmp(argv[i], "-outfmt") && i < (argc - 1)) { - outfmt_opt=atoi(argv[i + 1]); i++; + outfmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) ) + else if (!strcmp(argv[i], "-TMcut") && i < (argc - 1)) { - TMcut=atof(argv[i + 1]); i++; + TMcut = atof(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") && i < (argc-1) ) + else if (!strcmp(argv[i], "-het") && i < (argc - 1)) { - het_opt=atoi(argv[i + 1]); i++; + het_opt = atoi(argv[i + 1]); + i++; } - else if (xname.size() == 0) xname=argv[i]; - else if (yname.size() == 0) yname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (xname.size() == 0) + xname = argv[i]; + else if (yname.size() == 0) + yname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if(yname.size()==0) + if (yname.size() == 0) { - if (h_opt) print_help(h_opt); + if (h_opt) + print_help(h_opt); if (v_opt) { print_version(); exit(EXIT_FAILURE); } - if (xname.size()==0) + if (xname.size() == 0) PrintErrorAndQuit("Please provide input structures"); PrintErrorAndQuit("Please provide the second input structure"); } - if (suffix_opt.size() && dir1_opt.size()+dir2_opt.size()==0) + if (suffix_opt.size() && dir1_opt.size() + dir2_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir1 or -dir2 is set"); if ((dir1_opt.size() || dir2_opt.size()) && (m_opt || o_opt)) PrintErrorAndQuit("-m or -o cannot be set with -dir1 or -dir2"); - if (atom_opt.size()!=4) + if (atom_opt.size() != 4) PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space."); - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein."); - else if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; + else if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (outfmt_opt>=2 && (a_opt || d_opt)) + if (outfmt_opt >= 2 && (a_opt || d_opt)) PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -d"); - if (ter_opt!=0 && ter_opt!=1) + if (ter_opt != 0 && ter_opt != 1) PrintErrorAndQuit("-ter should be 1 or 0"); - if (split_opt!=1 && split_opt!=2) + if (split_opt != 1 && split_opt != 2) PrintErrorAndQuit("-split should be 1 or 2"); - else if (split_opt==1 && ter_opt!=0) + else if (split_opt == 1 && ter_opt != 0) PrintErrorAndQuit("-split 1 should be used with -ter 0"); if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt PrintErrorAndQuit("ERROR! Please provide a file name for option -m!"); /* parse file list */ - if (dir1_opt.size()==0) chain1_list.push_back(xname); - else file2chainlist(chain1_list, xname, dir1_opt, suffix_opt); + if (dir1_opt.size() == 0) + chain1_list.push_back(xname); + else + file2chainlist(chain1_list, xname, dir1_opt, suffix_opt); - if (dir2_opt.size()==0) chain2_list.push_back(yname); - else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt); + if (dir2_opt.size() == 0) + chain2_list.push_back(yname); + else + file2chainlist(chain2_list, yname, dir2_opt, suffix_opt); - if (outfmt_opt==2) - cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t" - <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"< > > xa_vec; // structure of complex1 - vector > > ya_vec; // structure of complex2 - vector >seqx_vec; // sequence of complex1 - vector >seqy_vec; // sequence of complex2 - vector >secx_vec; // secondary structure of complex1 - vector >secy_vec; // secondary structure of complex2 - vector mol_vec1; // molecule type of complex1, RNA if >0 - vector mol_vec2; // molecule type of complex2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - vector xlen_vec; // length of complex1 - vector ylen_vec; // length of complex2 - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int xlen_aa,ylen_aa; // total length of protein - int xlen_na,ylen_na; // total length of RNA/DNA - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 + vector>> xa_vec; // structure of complex1 + vector>> ya_vec; // structure of complex2 + vector> seqx_vec; // sequence of complex1 + vector> seqy_vec; // sequence of complex2 + vector> secx_vec; // secondary structure of complex1 + vector> secy_vec; // secondary structure of complex2 + vector mol_vec1; // molecule type of complex1, RNA if >0 + vector mol_vec2; // molecule type of complex2, RNA if >0 + vector chainID_list1; // list of chainID1 + vector chainID_list2; // list of chainID2 + vector xlen_vec; // length of complex1 + vector ylen_vec; // length of complex2 + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int xlen_aa, ylen_aa; // total length of protein + int xlen_na, ylen_na; // total length of RNA/DNA + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 /* parse complex */ parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1, - xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, - atom_opt, false, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, - resi_vec1, chain2parse1, model2parse1); - if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 1"); + xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, + atom_opt, false, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, + resi_vec1, chain2parse1, model2parse1); + if (xa_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 1"); parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2, - ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, - atom_opt, false, 0, het_opt, ylen_aa, ylen_na, o_opt, - resi_vec2, chain2parse2, model2parse2); - if (ya_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 2"); - int len_aa=getmin(xlen_aa,ylen_aa); - int len_na=getmin(xlen_na,ylen_na); + ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, + atom_opt, false, 0, het_opt, ylen_aa, ylen_na, o_opt, + resi_vec2, chain2parse2, model2parse2); + if (ya_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 2"); + int len_aa = getmin(xlen_aa, ylen_aa); + int len_na = getmin(xlen_na, ylen_na); if (a_opt) { - len_aa=(xlen_aa+ylen_aa)/2; - len_na=(xlen_na+ylen_na)/2; + len_aa = (xlen_aa + ylen_aa) / 2; + len_na = (xlen_na + ylen_na) / 2; } - map chainmap; + map chainmap; /* perform monomer alignment if there is only one chain */ - if (xa_vec.size()==1 && ya_vec.size()==1) + if (xa_vec.size() == 1 && ya_vec.size() == 1) { xlen = xlen_vec[0]; ylen = ylen_vec[0]; - seqx = new char[xlen+1]; - seqy = new char[ylen+1]; - secx = new char[xlen+1]; - secy = new char[ylen+1]; + seqx = new char[xlen + 1]; + seqy = new char[ylen + 1]; + secx = new char[xlen + 1]; + secy = new char[ylen + 1]; NewArray(&xa, xlen, 3); NewArray(&ya, ylen, 3); - copy_chain_data(xa_vec[0],seqx_vec[0],secx_vec[0], xlen,xa,seqx,secx); - copy_chain_data(ya_vec[0],seqy_vec[0],secy_vec[0], ylen,ya,seqy,secy); - + copy_chain_data(xa_vec[0], seqx_vec[0], secx_vec[0], xlen, xa, seqx, secx); + copy_chain_data(ya_vec[0], seqy_vec[0], secy_vec[0], ylen, ya, seqy, secy); + /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; /* entry function for structure alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, 0, d0_scale, - 0, a_opt, false, d_opt, fast_opt, - mol_vec1[0]+mol_vec2[0],TMcut); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, 0, d0_scale, + 0, a_opt, false, d_opt, fast_opt, + mol_vec1[0] + mol_vec2[0], TMcut, 0); /* print result */ output_results( @@ -444,7 +476,7 @@ int main(int argc, char *argv[]) xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out, seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B, - 0, d0_scale, d0a, d0u, (m_opt?fname_matrix:"").c_str(), + 0, d0_scale, d0a, d0u, (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, true, split_opt, o_opt, fname_super, 0, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2); @@ -452,135 +484,141 @@ int main(int argc, char *argv[]) seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqx; - delete[]seqy; - delete[]secx; - delete[]secy; - DeleteArray(&xa,xlen); - DeleteArray(&ya,ylen); + delete[] seqx; + delete[] seqy; + delete[] secx; + delete[] secy; + DeleteArray(&xa, xlen); + DeleteArray(&ya, ylen); chain1_list.clear(); chain2_list.clear(); sequence.clear(); do_vec.clear(); - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - chainID_list1.clear(); // list of chainID1 - chainID_list2.clear(); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + chainID_list1.clear(); // list of chainID1 + chainID_list2.clear(); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } /* declare TM-score tables */ - int chain1_num=xa_vec.size(); - int chain2_num=ya_vec.size(); - vector tmp_str_vec(chain2_num,""); + int chain1_num = xa_vec.size(); + int chain2_num = ya_vec.size(); + vector tmp_str_vec(chain2_num, ""); double **TMave_mat; double **ut_mat; // rotation matrices for all-against-all alignment - int ui,uj,ut_idx; - NewArray(&TMave_mat,chain1_num,chain2_num); - NewArray(&ut_mat,chain1_num*chain2_num,4*3); - vector >seqxA_mat(chain1_num,tmp_str_vec); - vector > seqM_mat(chain1_num,tmp_str_vec); - vector >seqyA_mat(chain1_num,tmp_str_vec); + int ui, uj, ut_idx; + NewArray(&TMave_mat, chain1_num, chain2_num); + NewArray(&ut_mat, chain1_num * chain2_num, 4 * 3); + vector> seqxA_mat(chain1_num, tmp_str_vec); + vector> seqM_mat(chain1_num, tmp_str_vec); + vector> seqyA_mat(chain1_num, tmp_str_vec); - double maxTMmono=-1; - int maxTMmono_i,maxTMmono_j; + double maxTMmono = -1; + int maxTMmono_i, maxTMmono_j; /* get all-against-all alignment */ - if (len_aa+len_na>500) fast_opt=true; - for (i=0;i 500) + fast_opt = true; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (xlen<3) + xlen = xlen_vec[i]; + if (xlen < 3) { - for (j=0;j do_vec; - int Lnorm_tmp=len_aa; - if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na; + int Lnorm_tmp = len_aa; + if (mol_vec1[i] + mol_vec2[j] > 0) + Lnorm_tmp = len_na; /* entry function for structure alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_tmp, d0_scale, - 0, false, true, false, fast_opt, - mol_vec1[i]+mol_vec2[j],TMcut); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_tmp, d0_scale, + 0, false, true, false, fast_opt, + mol_vec1[i] + mol_vec2[j], TMcut, 0); /* store result */ - for (ui=0;ui<3;ui++) - for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=u0[ui][uj]; - for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=t0[uj]; - seqxA_mat[i][j]=seqxA; - seqyA_mat[i][j]=seqyA; - TMave_mat[i][j]=TM4*Lnorm_tmp; - if (TMave_mat[i][j]>maxTMmono) + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = u0[ui][uj]; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = t0[uj]; + seqxA_mat[i][j] = seqxA; + seqyA_mat[i][j] = seqyA; + TMave_mat[i][j] = TM4 * Lnorm_tmp; + if (TMave_mat[i][j] > maxTMmono) { - maxTMmono=TMave_mat[i][j]; - maxTMmono_i=i; - maxTMmono_j=j; + maxTMmono = TMave_mat[i][j]; + maxTMmono_i = i; + maxTMmono_j = j; } /* clean up */ @@ -588,133 +626,136 @@ int main(int argc, char *argv[]) seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); do_vec.clear(); } - delete[]seqx; - delete[]secx; - DeleteArray(&xa,xlen); + delete[] seqx; + delete[] secx; + DeleteArray(&xa, xlen); } /* calculate initial chain-chain assignment */ int *assign1_list; // value is index of assigned chain2 int *assign2_list; // value is index of assigned chain1 - assign1_list=new int[chain1_num]; - assign2_list=new int[chain2_num]; - double total_score=enhanced_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num); - if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain"); + assign1_list = new int[chain1_num]; + assign2_list = new int[chain2_num]; + double total_score = enhanced_greedy_search(TMave_mat, assign1_list, + assign2_list, chain1_num, chain2_num); + if (total_score <= 0) + PrintErrorAndQuit("ERROR! No assignable chain"); /* refine alignment for large oligomers */ - int aln_chain_num=count_assign_pair(assign1_list,chain1_num); - bool is_oligomer=(aln_chain_num>=3); - if (aln_chain_num==2) // dimer alignment + int aln_chain_num = count_assign_pair(assign1_list, chain1_num); + bool is_oligomer = (aln_chain_num >= 3); + if (aln_chain_num == 2) // dimer alignment { - int na_chain_num1,na_chain_num2,aa_chain_num1,aa_chain_num2; - count_na_aa_chain_num(na_chain_num1,aa_chain_num1,mol_vec1); - count_na_aa_chain_num(na_chain_num2,aa_chain_num2,mol_vec2); + int na_chain_num1, na_chain_num2, aa_chain_num1, aa_chain_num2; + count_na_aa_chain_num(na_chain_num1, aa_chain_num1, mol_vec1); + count_na_aa_chain_num(na_chain_num2, aa_chain_num2, mol_vec2); /* align protein-RNA hybrid dimer to another hybrid dimer */ - if (na_chain_num1==1 && na_chain_num2==1 && - aa_chain_num1==1 && aa_chain_num2==1) is_oligomer=false; + if (na_chain_num1 == 1 && na_chain_num2 == 1 && + aa_chain_num1 == 1 && aa_chain_num2 == 1) + is_oligomer = false; /* align pure protein dimer or pure RNA dimer */ - else if ((getmin(na_chain_num1,na_chain_num2)==0 && - aa_chain_num1==2 && aa_chain_num2==2) || - (getmin(aa_chain_num1,aa_chain_num2)==0 && - na_chain_num1==2 && na_chain_num2==2)) + else if ((getmin(na_chain_num1, na_chain_num2) == 0 && + aa_chain_num1 == 2 && aa_chain_num2 == 2) || + (getmin(aa_chain_num1, aa_chain_num2) == 0 && + na_chain_num1 == 2 && na_chain_num2 == 2)) { - adjust_dimer_assignment(xa_vec,ya_vec,xlen_vec,ylen_vec,mol_vec1, - mol_vec2,assign1_list,assign2_list,seqxA_mat,seqyA_mat); - is_oligomer=false; // cannot refiner further + adjust_dimer_assignment(xa_vec, ya_vec, xlen_vec, ylen_vec, mol_vec1, + mol_vec2, assign1_list, assign2_list, seqxA_mat, seqyA_mat); + is_oligomer = false; // cannot refiner further } - else is_oligomer=true; /* align oligomers to dimer */ + else + is_oligomer = true; /* align oligomers to dimer */ } - if (aln_chain_num>=3 || is_oligomer) // oligomer alignment + if (aln_chain_num >= 3 || is_oligomer) // oligomer alignment { /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); - + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); + /* clean up */ DeleteArray(&xcentroids, chain1_num); DeleteArray(&ycentroids, chain2_num); } /* store initial assignment */ - int init_pair_num=count_assign_pair(assign1_list,chain1_num); + int init_pair_num = count_assign_pair(assign1_list, chain1_num); int *assign1_init, *assign2_init; - assign1_init=new int[chain1_num]; - assign2_init=new int[chain2_num]; + assign1_init = new int[chain1_num]; + assign2_init = new int[chain2_num]; double **TMave_init; - NewArray(&TMave_init,chain1_num,chain2_num); - vector >seqxA_init(chain1_num,tmp_str_vec); - vector >seqyA_init(chain1_num,tmp_str_vec); + NewArray(&TMave_init, chain1_num, chain2_num); + vector> seqxA_init(chain1_num, tmp_str_vec); + vector> seqyA_init(chain1_num, tmp_str_vec); vector sequence_init; copy_chain_assign_data(chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); /* perform iterative alignment */ - double max_total_score=0; // ignore old total_score because previous - // score was from monomeric chain superpositions - int max_iter=5-(int)((len_aa+len_na)/200); - if (max_iter<2) max_iter=2; + double max_total_score = 0; // ignore old total_score because previous + // score was from monomeric chain superpositions + int max_iter = 5 - (int)((len_aa + len_na) / 200); + if (max_iter < 2) + max_iter = 2; MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec, - seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, - ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, - chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, - sequence, d0_scale, fast_opt, chainmap); + seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, + ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, + chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, + sequence, d0_scale, fast_opt, chainmap); - if (aln_chain_num>=4 && is_oligomer && chainmap.size()==0) // oligomer alignment + if (aln_chain_num >= 4 && is_oligomer && chainmap.size() == 0) // oligomer alignment { MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, 1, 0, 5, ter_opt, split_opt, - 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); - + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, 1, 0, 5, ter_opt, split_opt, + 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); /* clean up */ DeleteArray(&xcentroids, chain1_num); @@ -722,116 +763,122 @@ int main(int argc, char *argv[]) } /* sometime MMalign_iter is even worse than monomer alignment */ - if (max_total_score=init_pair_num) copy_chain_assign_data( - chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); - double max_total_score_cross=max_total_score; - - //if (init_pair_num!=2 && is_oligomer==false) MMalign_cross( - //max_total_score_cross, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec, - //secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - //xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, - //TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, sequence_init, - //d0_scale, true); - //else - if (len_aa+len_na<10000) + int iter_pair_num = count_assign_pair(assign1_list, chain1_num); + if (iter_pair_num >= init_pair_num) + copy_chain_assign_data( + chain1_num, chain2_num, sequence_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + double max_total_score_cross = max_total_score; + + // if (init_pair_num!=2 && is_oligomer==false) MMalign_cross( + // max_total_score_cross, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec, + // secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + // xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, + // TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, sequence_init, + // d0_scale, true); + // else + if (len_aa + len_na < 10000) { MMalign_dimer(max_total_score_cross, xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, - TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, - sequence_init, d0_scale, fast_opt); - if (max_total_score_cross>max_total_score) + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, + TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, + sequence_init, d0_scale, fast_opt); + if (max_total_score_cross > max_total_score) { - max_total_score=max_total_score_cross; + max_total_score = max_total_score_cross; copy_chain_assign_data(chain1_num, chain2_num, sequence, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); } - } + } /* final alignment */ - if (outfmt_opt==0) print_version(); + if (outfmt_opt == 0) + print_version(); MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, - a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, + a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); /* clean up everything */ - delete [] assign1_list; - delete [] assign2_list; - DeleteArray(&TMave_mat,chain1_num); - DeleteArray(&ut_mat, chain1_num*chain2_num); - vector >().swap(seqxA_mat); - vector >().swap(seqM_mat); - vector >().swap(seqyA_mat); + delete[] assign1_list; + delete[] assign2_list; + DeleteArray(&TMave_mat, chain1_num); + DeleteArray(&ut_mat, chain1_num * chain2_num); + vector>().swap(seqxA_mat); + vector>().swap(seqM_mat); + vector>().swap(seqyA_mat); vector().swap(tmp_str_vec); - delete [] assign1_init; - delete [] assign2_init; - DeleteArray(&TMave_init,chain1_num); - vector >().swap(seqxA_init); - vector >().swap(seqyA_init); - - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - vector().swap(chainID_list1); // list of chainID1 - vector().swap(chainID_list2); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + delete[] assign1_init; + delete[] assign2_init; + DeleteArray(&TMave_init, chain1_num); + vector>().swap(seqxA_init); + vector>().swap(seqyA_init); + + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + vector().swap(chainID_list1); // list of chainID1 + vector().swap(chainID_list2); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 vector().swap(chain1_list); vector().swap(chain2_list); vector().swap(sequence); - vector().swap(resi_vec1); // residue index for chain1 - vector().swap(resi_vec2); // residue index for chain2 + vector().swap(resi_vec1); // residue index for chain1 + vector().swap(resi_vec2); // residue index for chain2 vector().swap(chain2parse1); vector().swap(chain2parse2); vector().swap(model2parse1); vector().swap(model2parse2); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } diff --git a/MMalign.h b/MMalign.h index 231cfa3..2ea5059 100644 --- a/MMalign.h +++ b/MMalign.h @@ -1279,7 +1279,7 @@ double MMalign_search( d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, false, true, false, fast_opt, mol_type, -1); + i_opt, false, true, false, fast_opt, mol_type, -1, 0); /* clean up */ delete [] seqx; @@ -1460,7 +1460,7 @@ void MMalign_final( d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, - 3, a_opt, false, d_opt, fast_opt, mol_type, -1); + 3, a_opt, false, d_opt, fast_opt, mol_type, -1, 0); /* prepare full complex alignment */ string chainID1=""; diff --git a/TMalign.cpp b/TMalign.cpp index e2539d0..3dba166 100644 --- a/TMalign.cpp +++ b/TMalign.cpp @@ -6,161 +6,159 @@ using namespace std; void print_version() { - cout << -"\n" -" **********************************************************************\n" -" * TM-align (Version 20240303): protein and RNA structure alignment *\n" -" * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n" -" * S Gong, C Zhang, Y Zhang. Bioinformatics, bz282 (2019) *\n" -" * Please email comments and suggestions to yangzhanglab@umich.edu *\n" -" **********************************************************************" - << endl; + cout << "\n" + " **********************************************************************\n" + " * TM-align (Version 20240303): protein and RNA structure alignment *\n" + " * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n" + " * S Gong, C Zhang, Y Zhang. Bioinformatics, bz282 (2019) *\n" + " * Please email comments and suggestions to yangzhanglab@umich.edu *\n" + " **********************************************************************" + << endl; } void print_extra_help() { - cout << -"Additional options:\n" -" -fast Fast but slightly inaccurate alignment\n" -"\n" -" -dir Perform all-against-all alignment among the list of PDB\n" -" chains listed by 'chain_list' under 'chain_folder'. Note\n" -" that the slash is necessary.\n" -" $ TMalign -dir chain_folder/ chain_list\n" -"\n" -" -dir1 Use chain2 to search a list of PDB chains listed by 'chain1_list'\n" -" under 'chain1_folder'. Note that the slash is necessary.\n" -" $ TMalign -dir1 chain1_folder/ chain1_list chain2\n" -"\n" -" -dir2 Use chain1 to search a list of PDB chains listed by 'chain2_list'\n" -" under 'chain2_folder'\n" -" $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n" -"\n" -" -pair (Only when -dir1 and -dir2 are set, default is no) whether to\n" -" perform pair alignment rather than all-against-all alignment\n" -"\n" -" -suffix (Only when -dir1 and/or -dir2 are set, default is empty)\n" -" add file name suffix to files listed by chain1_list or chain2_list\n" -"\n" -" -atom 4-character atom name used to represent a residue.\n" -" Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n" -" (note the spaces before and after CA).\n" -"\n" -" -mol Molecule type: RNA or protein\n" -" Default is detect molecule type automatically\n" -"\n" -" -ter Strings to mark the end of a chain\n" -" 3: (default) TER, ENDMDL, END or different chain ID\n" -" 2: ENDMDL, END, or different chain ID\n" -" 1: ENDMDL or END\n" -" 0: (default in the first C++ TMalign) end of file\n" -"\n" -" -split Whether to split PDB file into multiple chains\n" -" 0: (default) treat the whole structure as one single chain\n" -" 1: treat each MODEL as a separate chain (-ter should be 0)\n" -" 2: treat each chain as a seperate chain (-ter should be <=1)\n" -"\n" -" -outfmt Output format\n" -" 0: (default) full output\n" -" 1: fasta format compact output\n" -" 2: tabular format very compact output\n" -" -1: full output, but without version or citation information\n" -"\n" -" -byresi Whether to assume residue index correspondence between the\n" -" two structures. The same as -TMscore.\n" -" 0: (default) sequence independent alignment\n" -" 1: (same as TMscore program) sequence-dependent superposition,\n" -" i.e. align by residue index\n" -" 2: (same as TMscore -c, should be used with -ter <=1)\n" -" align by residue index and chain ID\n" -" 3: (similar to TMscore -c, should be used with -ter <=1)\n" -" align by residue index and order of chain\n" -//" 4: sequence dependent alignment: perform Needleman-Wunsch\n" -//" global sequence alignment, followed by TM-score superposition\n" -" 5: sequence dependent alignment: perform glocal sequence\n" -" alignment followed by TM-score superposition.\n" -" -byresi 5 is thee same as -seq\n" -"\n" -" -TMcut -1: (default) do not consider TMcut\n" -" Values in [0.5,1): Do not proceed with TM-align for this\n" -" structure pair if TM-score is unlikely to reach TMcut.\n" -" TMcut is normalized is set by -a option:\n" -" -2: normalized by longer structure length\n" -" -1: normalized by shorter structure length\n" -" 0: (default, same as F) normalized by second structure\n" -" 1: same as T, normalized by average structure length\n" -"\n" -" -cp ALignment with circular permutation\n" -"\n" -" -mirror Whether to align the mirror image of input structure\n" -" 0: (default) do not align mirrored structure\n" -" 1: align mirror of chain1 to origin chain2\n" -"\n" -" -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n" -" 0: (default) only align 'ATOM ' residues\n" -" 1: align both 'ATOM ' and 'HETATM' residues\n" -"\n" -" -infmt1 Input format for chain1\n" -" -infmt2 Input format for chain2\n" -" -1: (default) automatically detect PDB or PDBx/mmCIF format\n" -" 0: PDB format\n" -" 1: SPICKER format\n" -" 2: xyz format\n" -" 3: PDBx/mmCIF format\n" - <= minimum length of the two structures\n" -" otherwise, TM-score may be >1\n" -"\n" -" -a TM-score normalized by the average length of two structures\n" -" T or F, (default F)\n" -"\n" -" -i Start with an alignment specified in fasta file 'align.txt'\n" -"\n" -" -I Stick to the alignment specified in 'align.txt'\n" -"\n" -" -m Output TM-align rotation matrix\n" -"\n" -" -d TM-score scaled by an assigned d0, e.g. 5 Angstroms\n" -"\n" -" -o Output the superposition of PDB1.pdb to TM_sup.pdb\n" -" $ TMalign PDB1.pdb PDB2.pdb -o TM_sup.pdb\n" -" To view superposed full-atom structures:\n" -" $ pymol TM_sup.pdb PDB2.pdb\n" -"\n" -" -v Print the version of TM-align\n" -"\n" -" -h Print the full help message, including options not available\n" -" in standard TM-align program\n" -"\n" -" (Options -u, -a, -d, -o won't change the final structure alignment)\n\n" -"Example usages:\n" -" TMalign PDB1.pdb PDB2.pdb\n" -" TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n" -" TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n" -" TMalign PDB1.pdb PDB2.pdb -i align.txt\n" -" TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n" - <= minimum length of the two structures\n" + " otherwise, TM-score may be >1\n" + "\n" + " -a TM-score normalized by the average length of two structures\n" + " T or F, (default F)\n" + "\n" + " -i Start with an alignment specified in fasta file 'align.txt'\n" + "\n" + " -I Stick to the alignment specified in 'align.txt'\n" + "\n" + " -m Output TM-align rotation matrix\n" + "\n" + " -d TM-score scaled by an assigned d0, e.g. 5 Angstroms\n" + "\n" + " -o Output the superposition of PDB1.pdb to TM_sup.pdb\n" + " $ TMalign PDB1.pdb PDB2.pdb -o TM_sup.pdb\n" + " To view superposed full-atom structures:\n" + " $ pymol TM_sup.pdb PDB2.pdb\n" + "\n" + " -v Print the version of TM-align\n" + "\n" + " -h Print the full help message, including options not available\n" + " in standard TM-align program\n" + "\n" + " (Options -u, -a, -d, -o won't change the final structure alignment)\n\n" + "Example usages:\n" + " TMalign PDB1.pdb PDB2.pdb\n" + " TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n" + " TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n" + " TMalign PDB1.pdb PDB2.pdb -i align.txt\n" + " TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n" + << endl; + + if (h_opt) + print_extra_help(); exit(EXIT_SUCCESS); } int main(int argc, char *argv[]) { - if (argc < 2) print_help(); - + if (argc < 2) + print_help(); clock_t t1, t2; t1 = clock(); @@ -168,41 +166,41 @@ int main(int argc, char *argv[]) /**********************/ /* get argument */ /**********************/ - string xname = ""; - string yname = ""; - string fname_super = ""; // file name for superposed structure - string fname_lign = ""; // file name for user alignment - string fname_matrix= ""; // file name for output matrix - vector sequence; // get value from alignment file + string xname = ""; + string yname = ""; + string fname_super = ""; // file name for superposed structure + string fname_lign = ""; // file name for user alignment + string fname_matrix = ""; // file name for output matrix + vector sequence; // get value from alignment file double Lnorm_ass, d0_scale; bool h_opt = false; // print full help message bool v_opt = false; // print version bool m_opt = false; // flag for -m, output rotation matrix - int i_opt = 0; // 1 for -i, 3 for -I + int i_opt = 0; // 1 for -i, 3 for -I bool o_opt = false; // flag for -o, output superposed structure - int a_opt = 0; // flag for -a, do not normalized by average length + int a_opt = 0; // flag for -a, do not normalized by average length bool u_opt = false; // flag for -u, normalized by user specified length bool d_opt = false; // flag for -d, user specified d0 - double TMcut =-1; - int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1 - int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2 - int ter_opt =3; // TER, END, or different chainID - int split_opt =0; // do not split chain - int outfmt_opt=0; // set -outfmt to full output - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int cp_opt =0; // do not check circular permutation - int mirror_opt=0; // do not align mirror - int het_opt =0; // do not read HETATM residues - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir_opt =""; // set -dir to empty - string dir1_opt =""; // set -dir1 to empty - string dir2_opt =""; // set -dir2 to empty - bool pair_opt=false; // pair alignment - int byresi_opt=0; // set -byresi to 0 + double TMcut = -1; + int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 + int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 + int ter_opt = 3; // TER, END, or different chainID + int split_opt = 0; // do not split chain + int outfmt_opt = 0; // set -outfmt to full output + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int cp_opt = 0; // do not check circular permutation + int mirror_opt = 0; // do not align mirror + int het_opt = 0; // do not read HETATM residues + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir_opt = ""; // set -dir to empty + string dir1_opt = ""; // set -dir1 to empty + string dir2_opt = ""; // set -dir2 to empty + bool pair_opt = false; // pair alignment + int byresi_opt = 0; // set -byresi to 0 vector chain1_list; // only when -dir1 is set vector chain2_list; // only when -dir2 is set vector chain2parse1; @@ -210,185 +208,220 @@ int main(int argc, char *argv[]) vector model2parse1; vector model2parse2; - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( !strcmp(argv[i],"-o") && i < (argc-1) ) + if (!strcmp(argv[i], "-o") && i < (argc - 1)) { - fname_super = argv[i + 1]; o_opt = true; i++; + fname_super = argv[i + 1]; + o_opt = true; + i++; } - else if ( (!strcmp(argv[i],"-u") || - !strcmp(argv[i],"-L")) && i < (argc-1) ) + else if ((!strcmp(argv[i], "-u") || + !strcmp(argv[i], "-L")) && + i < (argc - 1)) { - Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++; + Lnorm_ass = atof(argv[i + 1]); + u_opt = true; + i++; } - else if ( !strcmp(argv[i],"-a") && i < (argc-1) ) + else if (!strcmp(argv[i], "-a") && i < (argc - 1)) { - if (!strcmp(argv[i + 1], "T")) a_opt=true; - else if (!strcmp(argv[i + 1], "F")) a_opt=false; - else + if (!strcmp(argv[i + 1], "T")) + a_opt = true; + else if (!strcmp(argv[i + 1], "F")) + a_opt = false; + else { - a_opt=atoi(argv[i + 1]); - if (a_opt!=-2 && a_opt!=-1 && a_opt!=1) + a_opt = atoi(argv[i + 1]); + if (a_opt != -2 && a_opt != -1 && a_opt != 1) PrintErrorAndQuit("-a must be -2, -1, 1, T or F"); } i++; } - else if ( !strcmp(argv[i],"-d") && i < (argc-1) ) + else if (!strcmp(argv[i], "-d") && i < (argc - 1)) { - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if ( !strcmp(argv[i],"-v") ) + else if (!strcmp(argv[i], "-v")) { v_opt = true; } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } - else if ( !strcmp(argv[i],"-i") && i < (argc-1) ) + else if (!strcmp(argv[i], "-i") && i < (argc - 1)) { - if (i_opt==3) + if (i_opt == 3) PrintErrorAndQuit("ERROR! -i and -I cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 1; i++; + fname_lign = argv[i + 1]; + i_opt = 1; + i++; } - else if (!strcmp(argv[i], "-I") && i < (argc-1) ) + else if (!strcmp(argv[i], "-I") && i < (argc - 1)) { - if (i_opt==1) + if (i_opt == 1) PrintErrorAndQuit("ERROR! -I and -i cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 3; i++; + fname_lign = argv[i + 1]; + i_opt = 3; + i++; } - else if (!strcmp(argv[i], "-chain1") ) + else if (!strcmp(argv[i], "-chain1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain1"); - split(argv[i+1],chain2parse1,','); + split(argv[i + 1], chain2parse1, ','); i++; } - else if (!strcmp(argv[i], "-chain2") ) + else if (!strcmp(argv[i], "-chain2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain2"); - split(argv[i+1],chain2parse2,','); + split(argv[i + 1], chain2parse2, ','); i++; } - else if (!strcmp(argv[i], "-model1") ) + else if (!strcmp(argv[i], "-model1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model1"); - split(argv[i+1],model2parse1,','); + split(argv[i + 1], model2parse1, ','); i++; } - else if (!strcmp(argv[i], "-model2") ) + else if (!strcmp(argv[i], "-model2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model2"); - split(argv[i+1],model2parse2,','); + split(argv[i + 1], model2parse2, ','); i++; } - else if (!strcmp(argv[i], "-m") && i < (argc-1) ) + else if (!strcmp(argv[i], "-m") && i < (argc - 1)) { - fname_matrix = argv[i + 1]; m_opt = true; i++; - }// get filename for rotation matrix + fname_matrix = argv[i + 1]; + m_opt = true; + i++; + } // get filename for rotation matrix else if (!strcmp(argv[i], "-fast")) { fast_opt = true; } - else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt1") && i < (argc - 1)) { - infmt1_opt=atoi(argv[i + 1]); i++; + infmt1_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt2") && i < (argc - 1)) { - infmt2_opt=atoi(argv[i + 1]); i++; + infmt2_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-ter") && i < (argc-1) ) + else if (!strcmp(argv[i], "-ter") && i < (argc - 1)) { - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") && i < (argc-1) ) + else if (!strcmp(argv[i], "-split") && i < (argc - 1)) { - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") && i < (argc-1) ) + else if (!strcmp(argv[i], "-atom") && i < (argc - 1)) { - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mol") && i < (argc - 1)) { - mol_opt=argv[i + 1]; i++; + mol_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir") && i < (argc - 1)) { - dir_opt=argv[i + 1]; i++; + dir_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir1") && i < (argc - 1)) { - dir1_opt=argv[i + 1]; i++; + dir1_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir2") && i < (argc - 1)) { - dir2_opt=argv[i + 1]; i++; + dir2_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-pair") ) + else if (!strcmp(argv[i], "-pair")) { - pair_opt=true; + pair_opt = true; } - else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) ) + else if (!strcmp(argv[i], "-suffix") && i < (argc - 1)) { - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) ) + else if (!strcmp(argv[i], "-outfmt") && i < (argc - 1)) { - outfmt_opt=atoi(argv[i + 1]); i++; + outfmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) ) + else if (!strcmp(argv[i], "-TMcut") && i < (argc - 1)) { - TMcut=atof(argv[i + 1]); i++; + TMcut = atof(argv[i + 1]); + i++; } - else if ((!strcmp(argv[i],"-byresi") || !strcmp(argv[i],"-tmscore") || - !strcmp(argv[i],"-TMscore")) && i < (argc-1) ) + else if ((!strcmp(argv[i], "-byresi") || !strcmp(argv[i], "-tmscore") || + !strcmp(argv[i], "-TMscore")) && + i < (argc - 1)) { - byresi_opt=atoi(argv[i + 1]); i++; + byresi_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-seq") ) + else if (!strcmp(argv[i], "-seq")) { - byresi_opt=5; + byresi_opt = 5; } - else if ( !strcmp(argv[i],"-cp") ) + else if (!strcmp(argv[i], "-cp")) { - cp_opt=1; + cp_opt = 1; } - else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mirror") && i < (argc - 1)) { - mirror_opt=atoi(argv[i + 1]); i++; + mirror_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") && i < (argc-1) ) + else if (!strcmp(argv[i], "-het") && i < (argc - 1)) { - het_opt=atoi(argv[i + 1]); i++; + het_opt = atoi(argv[i + 1]); + i++; } - else if (xname.size() == 0) xname=argv[i]; - else if (yname.size() == 0) yname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (xname.size() == 0) + xname = argv[i]; + else if (yname.size() == 0) + yname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if(xname.size()==0 || (yname.size()==0 && dir_opt.size()==0) || - (yname.size() && dir_opt.size())) + if (xname.size() == 0 || (yname.size() == 0 && dir_opt.size() == 0) || + (yname.size() && dir_opt.size())) { - if (h_opt) print_help(h_opt); + if (h_opt) + print_help(h_opt); if (v_opt) { print_version(); exit(EXIT_FAILURE); } - if (xname.size()==0) + if (xname.size() == 0) PrintErrorAndQuit("Please provide input structures"); - else if (yname.size()==0 && dir_opt.size()==0) + else if (yname.size() == 0 && dir_opt.size() == 0) PrintErrorAndQuit("Please provide structure B"); else if (yname.size() && dir_opt.size()) PrintErrorAndQuit("Please provide only one file name if -dir is set"); } - if (suffix_opt.size() && dir_opt.size()+dir1_opt.size()+dir2_opt.size()==0) + if (suffix_opt.size() && dir_opt.size() + dir1_opt.size() + dir2_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set"); if ((dir_opt.size() || dir1_opt.size() || dir2_opt.size())) { @@ -397,239 +430,260 @@ int main(int argc, char *argv[]) else if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size())) PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2"); } - if (atom_opt.size()!=4) + if (atom_opt.size() != 4) PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space."); - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein."); - else if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; + else if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; - if (u_opt && Lnorm_ass<=0) + if (u_opt && Lnorm_ass <= 0) PrintErrorAndQuit("Wrong value for option -u! It should be >0"); - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (outfmt_opt>=2 && (a_opt || u_opt || d_opt)) + if (outfmt_opt >= 2 && (a_opt || u_opt || d_opt)) PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d"); - if (byresi_opt!=0) + if (byresi_opt != 0) { if (i_opt) PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I"); - if (byresi_opt<0 || byresi_opt>5) + if (byresi_opt < 0 || byresi_opt > 5) PrintErrorAndQuit("-byresi can only be 0, 1, 2, 3, 4, or 5"); - if (byresi_opt>=2 && byresi_opt<=3 && ter_opt>=2) + if (byresi_opt >= 2 && byresi_opt <= 3 && ter_opt >= 2) PrintErrorAndQuit("-byresi 2 and -byresi 3 should be used with -ter <=1"); } - if (split_opt==1 && ter_opt!=0) + if (split_opt == 1 && ter_opt != 0) PrintErrorAndQuit("-split 1 should be used with -ter 0"); - else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) + else if (split_opt == 2 && ter_opt != 0 && ter_opt != 1) PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); - if (split_opt<0 || split_opt>2) + if (split_opt < 0 || split_opt > 2) PrintErrorAndQuit("-split can only be 0, 1 or 2"); - if (cp_opt!=0 && cp_opt!=1) + if (cp_opt != 0 && cp_opt != 1) PrintErrorAndQuit("-cp can only be 0 or 1"); if (cp_opt && i_opt) PrintErrorAndQuit("-cp cannot be used with -i or -I"); /* read initial alignment file from 'align.txt' */ - if (i_opt) read_user_alignment(sequence, fname_lign, i_opt); + if (i_opt) + read_user_alignment(sequence, fname_lign, i_opt); - if (byresi_opt) i_opt=3; + if (byresi_opt) + i_opt = 3; if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt PrintErrorAndQuit("ERROR! Please provide a file name for option -m!"); /* parse file list */ - if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname); - else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt); + if (dir1_opt.size() + dir_opt.size() == 0) + chain1_list.push_back(xname); + else + file2chainlist(chain1_list, xname, dir_opt + dir1_opt, suffix_opt); if (dir_opt.size()) - for (int i=0;i >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 vector mol_vec1; // molecule type of chain1, RNA if >0 vector mol_vec2; // molecule type of chain2, RNA if >0 vector chainID_list1; // list of chainID1 vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=byresi_opt; // whether to read residue index - if (byresi_opt==0 && o_opt) read_resi=2; + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = byresi_opt; // whether to read residue index + if (byresi_opt == 0 && o_opt) + read_resi = 2; /* loop over file names */ - for (i=0;i0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment - - for (j=(dir_opt.size()>0)*(i+1);j 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment + + for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) { - if (pair_opt && j!=i) continue; + if (pair_opt && j != i) + continue; /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, false, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, false, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) { - cerr<<"Warning! Cannot parse file: "<0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; /* entry function for structure alignment */ - if (cp_opt) CPalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); - else TMalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); + if (cp_opt) + CPalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], TMcut); + else + TMalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], TMcut, 0); /* print result */ - if (outfmt_opt==0) print_version(); + if (outfmt_opt == 0) + print_version(); output_results( - xname.substr(dir1_opt.size()+dir_opt.size()), - yname.substr(dir2_opt.size()+dir_opt.size()), + xname.substr(dir1_opt.size() + dir_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size()), chainID_list1[chain_i].c_str(), chainID_list2[chain_j].c_str(), - xlen, ylen, t0, u0, TM1, TM2, + xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out, seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B, - Lnorm_ass, d0_scale, d0a, d0u, - (m_opt?fname_matrix:"").c_str(), + Lnorm_ass, d0_scale, d0a, d0u, + (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, 0, split_opt, o_opt, - (o_opt?fname_super:"").c_str(), - i_opt, a_opt, u_opt, d_opt,mirror_opt, - resi_vec1, resi_vec2 ); + (o_opt ? fname_super : "").c_str(), + i_opt, a_opt, u_opt, d_opt, mirror_opt, + resi_vec1, resi_vec2); /* Done! Free memory */ seqM.clear(); seqxA.clear(); seqyA.clear(); DeleteArray(&ya, ylen); - delete [] seqy; - delete [] secy; + delete[] seqy; + delete[] secy; resi_vec2.clear(); do_vec.clear(); } // chain_j - if (chain2_list.size()>1) + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j().swap(model2parse2); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } diff --git a/TMalign.h b/TMalign.h index 383eaf5..e8009a0 100644 --- a/TMalign.h +++ b/TMalign.h @@ -10,88 +10,93 @@ // 1, collect those residues with dis3) + // there are not enough feasible pairs, relieve the threshold + if (n_cut < 3 && n_ali > 3) { inc++; - double dinc=(d+inc*0.5); + double dinc = (d + inc * 0.5); d_tmp = dinc * dinc; } - else break; - } + else + break; + } - *score1=score_sum/Lnorm; + *score1 = score_sum / Lnorm; return n_cut; } int score_fun8_standard(double **xa, double **ya, int n_ali, double d, - int i_ali[], double *score1, int score_sum_method, - double score_d8, double d0) + int i_ali[], double *score1, int score_sum_method, + double score_d8, double d0) { double score_sum = 0, di; - double d_tmp = d*d; - double d02 = d0*d0; - double score_d8_cut = score_d8*score_d8; + double d_tmp = d * d; + double d02 = d0 * d0; + double score_d8_cut = score_d8 * score_d8; int i, n_cut, inc = 0; while (1) { n_cut = 0; score_sum = 0; - for (i = 0; i3) + // there are not enough feasible pairs, relieve the threshold + if (n_cut < 3 && n_ali > 3) { inc++; - double dinc = (d + inc*0.5); + double dinc = (d + inc * 0.5); d_tmp = dinc * dinc; } - else break; + else + break; } *score1 = score_sum / n_ali; @@ -99,164 +104,167 @@ int score_fun8_standard(double **xa, double **ya, int n_ali, double d, } double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm, - double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step, - int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm, - double score_d8, double d0) + double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step, + int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm, + double score_d8, double d0) { int i, m; - double score_max, score, rmsd; - const int kmax=Lali; + double score_max, score, rmsd; + const int kmax = Lali; int k_ali[kmax], ka, k; double t[3]; double u[3][3]; double d; - - //iterative parameters - int n_it=20; //maximum number of iterations - int n_init_max=6; //maximum number of different fragment length - int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4 - int L_ini_min=4; - if(Laliscore_max) - { - score_max=score; - - //save the rotation matrix - for(k=0; k<3; k++) + n_cut = score_fun8(xt, ytm, Lali, d, i_ali, &score, + score_sum_method, Lnorm, score_d8, d0); + if (score > score_max) + { + score_max = score; + + // save the rotation matrix + for (k = 0; k < 3; k++) { - t0[k]=t[k]; - u0[k][0]=u[k][0]; - u0[k][1]=u[k][1]; - u0[k][2]=u[k][2]; + t0[k] = t[k]; + u0[k][0] = u[k][0]; + u0[k][1] = u[k][1]; + u0[k][2] = u[k][2]; } } - - //try to extend the alignment iteratively + + // try to extend the alignment iteratively d = local_d0_search + 1; - for(int it=0; itscore_max) + n_cut = score_fun8(xt, ytm, Lali, d, i_ali, &score, + score_sum_method, Lnorm, score_d8, d0); + if (score > score_max) { - score_max=score; + score_max = score; - //save the rotation matrix - for(k=0; k<3; k++) + // save the rotation matrix + for (k = 0; k < 3; k++) { - t0[k]=t[k]; - u0[k][0]=u[k][0]; - u0[k][1]=u[k][1]; - u0[k][2]=u[k][2]; - } + t0[k] = t[k]; + u0[k][0] = u[k][0]; + u0[k][1] = u[k][1]; + u0[k][2] = u[k][2]; + } } - - //check if it converges - if(n_cut==ka) - { - for(k=0; kiL_max) i=iL_max; //do this to use the last missed fragment + i = i + simplify_step; // shift the fragment + if (i > iL_max) + i = iL_max; // do this to use the last missed fragment } - else if(i>=iL_max) break; - }//while(1) - //end of one fragment - }//for(i_init + else if (i >= iL_max) + break; + } // while(1) + // end of one fragment + } // for(i_init return score_max; } - -double TMscore8_search_standard( double **r1, double **r2, - double **xtm, double **ytm, double **xt, int Lali, - double t0[3], double u0[3][3], int simplify_step, int score_sum_method, - double *Rcomm, double local_d0_search, double score_d8, double d0) +double TMscore8_search_standard(double **r1, double **r2, + double **xtm, double **ytm, double **xt, int Lali, + double t0[3], double u0[3][3], int simplify_step, int score_sum_method, + double *Rcomm, double local_d0_search, double score_d8, double d0) { int i, m; double score_max, score, rmsd; @@ -266,15 +274,16 @@ double TMscore8_search_standard( double **r1, double **r2, double u[3][3]; double d; - //iterative parameters - int n_it = 20; //maximum number of iterations - int n_init_max = 6; //maximum number of different fragment length - int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4 + // iterative parameters + int n_it = 20; // maximum number of iterations + int n_init_max = 6; // maximum number of different fragment length + int L_ini[n_init_max]; // fragment lengths, Lali, Lali/2, Lali/4 ... 4 int L_ini_min = 4; - if (Laliscore_max) + if (score > score_max) { score_max = score; - //save the rotation matrix - for (k = 0; k<3; k++) + // save the rotation matrix + for (k = 0; k < 3; k++) { t0[k] = t[k]; u0[k][0] = u[k][0]; @@ -345,12 +354,12 @@ double TMscore8_search_standard( double **r1, double **r2, } } - //try to extend the alignment iteratively + // try to extend the alignment iteratively d = local_d0_search + 1; - for (int it = 0; itscore_max) + score_sum_method, score_d8, d0); + if (score > score_max) { score_max = score; - //save the rotation matrix - for (k = 0; k<3; k++) + // save the rotation matrix + for (k = 0; k < 3; k++) { t0[k] = t[k]; u0[k][0] = u[k][0]; @@ -383,309 +392,325 @@ double TMscore8_search_standard( double **r1, double **r2, } } - //check if it converges + // check if it converges if (n_cut == ka) { - for (k = 0; kiL_max) i = iL_max; //do this to use the last missed fragment + i = i + simplify_step; // shift the fragment + if (i > iL_max) + i = iL_max; // do this to use the last missed fragment } - else if (i >= iL_max) break; - }//while(1) - //end of one fragment - }//for(i_init + else if (i >= iL_max) + break; + } // while(1) + // end of one fragment + } // for(i_init return score_max; } -//Comprehensive TMscore search engine -// input: two vector sets: x, y -// an alignment invmap0[] between x and y -// simplify_step: 1 or 40 or other integers -// score_sum_method: 0 for score over all pairs -// 8 for socre over the pairs with dist=0) //aligned + j = invmap0[i]; + if (j >= 0) // aligned { - xtm[k][0]=x[j][0]; - xtm[k][1]=x[j][1]; - xtm[k][2]=x[j][2]; - - ytm[k][0]=y[i][0]; - ytm[k][1]=y[i][1]; - ytm[k][2]=y[i][2]; + xtm[k][0] = x[j][0]; + xtm[k][1] = x[j][1]; + xtm[k][2] = x[j][2]; + + ytm[k][0] = y[i][0]; + ytm[k][1] = y[i][1]; + ytm[k][2] = y[i][2]; k++; } } - //detailed search 40-->1 + // detailed search 40-->1 tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, simplify_step, - score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); + score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); return tmscore; } -double detailed_search_standard( double **r1, double **r2, - double **xtm, double **ytm, double **xt, double **x, double **y, - int xlen, int ylen, int invmap0[], double t[3], double u[3][3], - int simplify_step, int score_sum_method, double local_d0_search, - const bool& bNormalize, double Lnorm, double score_d8, double d0) +double detailed_search_standard(double **r1, double **r2, + double **xtm, double **ytm, double **xt, double **x, double **y, + int xlen, int ylen, int invmap0[], double t[3], double u[3][3], + int simplify_step, int score_sum_method, double local_d0_search, + const bool &bNormalize, double Lnorm, double score_d8, double d0) { - //x is model, y is template, try to superpose onto y - int i, j, k; + // x is model, y is template, try to superpose onto y + int i, j, k; double tmscore; double rmsd; - k=0; - for(i=0; i=0) //aligned + j = invmap0[i]; + if (j >= 0) // aligned { - xtm[k][0]=x[j][0]; - xtm[k][1]=x[j][1]; - xtm[k][2]=x[j][2]; - - ytm[k][0]=y[i][0]; - ytm[k][1]=y[i][1]; - ytm[k][2]=y[i][2]; + xtm[k][0] = x[j][0]; + xtm[k][1] = x[j][1]; + xtm[k][2] = x[j][2]; + + ytm[k][0] = y[i][0]; + ytm[k][1] = y[i][1]; + ytm[k][2] = y[i][2]; k++; } } - //detailed search 40-->1 - tmscore = TMscore8_search_standard( r1, r2, xtm, ytm, xt, k, t, u, - simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0); - if (bNormalize)// "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false; + // detailed search 40-->1 + tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, k, t, u, + simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0); + if (bNormalize) // "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false; tmscore = tmscore * k / Lnorm; return tmscore; } -//compute the score quickly in three iterations -double get_score_fast( double **r1, double **r2, double **xtm, double **ytm, - double **x, double **y, int xlen, int ylen, int invmap[], - double d0, double d0_search, double t[3], double u[3][3]) +// compute the score quickly in three iterations +double get_score_fast(double **r1, double **r2, double **xtm, double **ytm, + double **x, double **y, int xlen, int ylen, int invmap[], + double d0, double d0_search, double t[3], double u[3][3]) { double rms, tmscore, tmscore1, tmscore2; int i, j, k; - k=0; - for(j=0; j=0) - { - r1[k][0]=x[i][0]; - r1[k][1]=x[i][1]; - r1[k][2]=x[i][2]; - - r2[k][0]=y[j][0]; - r2[k][1]=y[j][1]; - r2[k][2]=y[j][2]; - - xtm[k][0]=x[i][0]; - xtm[k][1]=x[i][1]; - xtm[k][2]=x[i][2]; - - ytm[k][0]=y[j][0]; - ytm[k][1]=y[j][1]; - ytm[k][2]=y[j][2]; - + k = 0; + for (j = 0; j < ylen; j++) + { + i = invmap[j]; + if (i >= 0) + { + r1[k][0] = x[i][0]; + r1[k][1] = x[i][1]; + r1[k][2] = x[i][2]; + + r2[k][0] = y[j][0]; + r2[k][1] = y[j][1]; + r2[k][2] = y[j][2]; + + xtm[k][0] = x[i][0]; + xtm[k][1] = x[i][1]; + xtm[k][2] = x[i][2]; + + ytm[k][0] = y[j][0]; + ytm[k][1] = y[j][1]; + ytm[k][2] = y[j][2]; + k++; } - else if(i!=-1) PrintErrorAndQuit("Wrong map!\n"); + else if (i != -1) + PrintErrorAndQuit("Wrong map!\n"); } Kabsch(r1, r2, k, 1, &rms, t, u); - - //evaluate score + + // evaluate score double di; - const int len=k; - double dis[len]; - double d00=d0_search; - double d002=d00*d00; - double d02=d0*d0; - - int n_ali=k; + const int len = k; + double dis[len]; + double d00 = d0_search; + double d002 = d00 * d00; + double d02 = d0 * d0; + + int n_ali = k; double xrot[3]; - tmscore=0; - for(k=0; k dis_vec(dis, dis+n_ali); + tmscore = 0; + for (k = 0; k < n_ali; k++) + { + transform(t, u, &xtm[k][0], xrot); + di = dist(xrot, &ytm[k][0]); + dis[k] = di; + tmscore += 1 / (1 + di / d02); + } + + // second iteration + double d002t = d002; + vector dis_vec(dis, dis + n_ali); sort(dis_vec.begin(), dis_vec.end()); - if (d002t3) d002t += 0.5; - else break; + // there are not enough feasible pairs, relieve the threshold + if (j < 3 && n_ali > 3) + d002t += 0.5; + else + break; } - - if(n_ali!=j) + + if (n_ali != j) { Kabsch(r1, r2, j, 1, &rms, t, u); - tmscore1=0; - for(k=0; k dis_vec(dis, dis+n_ali); + tmscore1 = 0; + for (k = 0; k < n_ali; k++) + { + transform(t, u, &xtm[k][0], xrot); + di = dist(xrot, &ytm[k][0]); + dis[k] = di; + tmscore1 += 1 / (1 + di / d02); + } + + // third iteration + d002t = d002 + 1; + vector dis_vec(dis, dis + n_ali); sort(dis_vec.begin(), dis_vec.end()); - if (d002t3) d002t += 0.5; - else break; + // there are not enough feasible pairs, relieve the threshold + if (j < 3 && n_ali > 3) + d002t += 0.5; + else + break; } - //evaluate the score + // evaluate the score Kabsch(r1, r2, j, 1, &rms, t, u); - tmscore2=0; - for(k=0; k=tmscore) tmscore=tmscore1; - if(tmscore2>=tmscore) tmscore=tmscore2; + + if (tmscore1 >= tmscore) + tmscore = tmscore1; + if (tmscore2 >= tmscore) + tmscore = tmscore2; return tmscore; // no need to normalize this score because it will not be used for latter scoring } - -//perform gapless threading to find the best initial alignment -//input: x, y, xlen, ylen -//output: y2x0 stores the best alignment: e.g., -//y2x0[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 +// perform gapless threading to find the best initial alignment +// input: x, y, xlen, ylen +// output: y2x0 stores the best alignment: e.g., +// y2x0[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 double get_initial(double **r1, double **r2, double **xtm, double **ytm, - double **x, double **y, int xlen, int ylen, int *y2x, - double d0, double d0_search, const bool fast_opt, - double t[3], double u[3][3]) + double **x, double **y, int xlen, int ylen, int *y2x, + double d0, double d0_search, const bool fast_opt, + double t[3], double u[3][3]) { - int min_len=getmin(xlen, ylen); - if(min_len<3) PrintErrorAndQuit("Sequence is too short <3!\n"); - - int min_ali= min_len/2; //minimum size of considered fragment - if(min_ali<=5) min_ali=5; + int min_len = getmin(xlen, ylen); + if (min_len < 3) + PrintErrorAndQuit("Sequence is too short <3!\n"); + + int min_ali = min_len / 2; // minimum size of considered fragment + if (min_ali <= 5) + min_ali = 5; int n1, n2; - n1 = -ylen+min_ali; - n2 = xlen-min_ali; + n1 = -ylen + min_ali; + n2 = xlen - min_ali; int i, j, k, k_best; - double tmscore, tmscore_max=-1; + double tmscore, tmscore_max = -1; - k_best=n1; - for(k=n1; k<=n2; k+=(fast_opt)?5:1) + k_best = n1; + for (k = n1; k <= n2; k += (fast_opt) ? 5 : 1) { - //get the map - for(j=0; j=0 && i= 0 && i < xlen) + y2x[j] = i; + else + y2x[j] = -1; } - - //evaluate the map quickly in three iterations - //this is not real tmscore, it is used to evaluate the goodness of the initial alignment - tmscore=get_score_fast(r1, r2, xtm, ytm, - x, y, xlen, ylen, y2x, d0,d0_search, t, u); - if(tmscore>=tmscore_max) + + // evaluate the map quickly in three iterations + // this is not real tmscore, it is used to evaluate the goodness of the initial alignment + tmscore = get_score_fast(r1, r2, xtm, ytm, + x, y, xlen, ylen, y2x, d0, d0_search, t, u); + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - k_best=k; + tmscore_max = tmscore; + k_best = k; } } - - //extract the best map - k=k_best; - for(j=0; j=0 && i= 0 && i < xlen) + y2x[j] = i; + else + y2x[j] = -1; + } return tmscore_max; } @@ -693,179 +718,205 @@ double get_initial(double **r1, double **r2, double **xtm, double **ytm, void smooth(int *sec, int len) { int i, j; - //smooth single --x-- => ----- - for (i=2; i ----- + for (i = 2; i < len - 2; i++) { - if(sec[i]==2 || sec[i]==4) + if (sec[i] == 2 || sec[i] == 4) { - j=sec[i]; - if (sec[i-2]!=j && sec[i-1]!=j && sec[i+1]!=j && sec[i+2]!=j) - sec[i]=1; + j = sec[i]; + if (sec[i - 2] != j && sec[i - 1] != j && sec[i + 1] != j && sec[i + 2] != j) + sec[i] = 1; } } - // smooth double + // smooth double // --xx-- => ------ - for (i=0; icoil, 2->helix, 3->turn, 4->strand */ void make_sec(double **x, int len, char *sec) { int j1, j2, j3, j4, j5; double d13, d14, d15, d24, d25, d35; - for(int i=0; i=0 && j5= 0 && j5 < len) + { + d13 = sqrt(dist(x[j1], x[j3])); + d14 = sqrt(dist(x[j1], x[j4])); + d15 = sqrt(dist(x[j1], x[j5])); + d24 = sqrt(dist(x[j2], x[j4])); + d25 = sqrt(dist(x[j2], x[j5])); + d35 = sqrt(dist(x[j3], x[j5])); + sec[i] = sec_str(d13, d14, d15, d24, d25, d35); + } + } + sec[len] = 0; } /* a c d b: a paired to b, c paired to d */ -bool overlap(const int a1,const int b1,const int c1,const int d1, - const int a2,const int b2,const int c2,const int d2) +bool overlap(const int a1, const int b1, const int c1, const int d1, + const int a2, const int b2, const int c2, const int d2) { - return (a2>=a1&&a2<=c1)||(c2>=a1&&c2<=c1)|| - (d2>=a1&&d2<=c1)||(b2>=a1&&b2<=c1)|| - (a2>=d1&&a2<=b1)||(c2>=d1&&c2<=b1)|| - (d2>=d1&&d2<=b1)||(b2>=d1&&b2<=b1); + return (a2 >= a1 && a2 <= c1) || (c2 >= a1 && c2 <= c1) || + (d2 >= a1 && d2 <= c1) || (b2 >= a1 && b2 <= c1) || + (a2 >= d1 && a2 <= b1) || (c2 >= d1 && c2 <= b1) || + (d2 >= d1 && d2 <= b1) || (b2 >= d1 && b2 <= b1); } /* find base pairing stacks in RNA*/ -void sec_str(int len,char *seq, const vector >&bp, - int a, int b,int &c, int &d) +void sec_str(int len, char *seq, const vector> &bp, + int a, int b, int &c, int &d) { int i; - - for (i=0;i0) + if (a + i < len - 3 && b - i > 0) { - if (a+iunpair, 2->paired with upstream, 3->paired with downstream */ -void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt) +void make_sec(char *seq, double **x, int len, char *sec, const string atom_opt) { - int ii,jj,i,j; + int ii, jj, i, j; - float lb=12.5; // lower bound for " C3'" - float ub=15.0; // upper bound for " C3'" - if (atom_opt==" C4'") {lb=14.0;ub=16.0;} - else if(atom_opt==" C5'") {lb=16.0;ub=18.0;} - else if(atom_opt==" O3'") {lb=13.5;ub=16.5;} - else if(atom_opt==" O5'") {lb=15.5;ub=18.5;} - else if(atom_opt==" P ") {lb=16.5;ub=21.0;} + float lb = 12.5; // lower bound for " C3'" + float ub = 15.0; // upper bound for " C3'" + if (atom_opt == " C4'") + { + lb = 14.0; + ub = 16.0; + } + else if (atom_opt == " C5'") + { + lb = 16.0; + ub = 18.0; + } + else if (atom_opt == " O3'") + { + lb = 13.5; + ub = 16.5; + } + else if (atom_opt == " O5'") + { + lb = 15.5; + ub = 18.5; + } + else if (atom_opt == " P ") + { + lb = 16.5; + ub = 21.0; + } float dis; - vector bp_tmp(len,false); - vector > bp(len,bp_tmp); + vector bp_tmp(len, false); + vector> bp(len, bp_tmp); bp_tmp.clear(); - for (i=0; ilb && dis lb && dis < ub); } } } - + // From 5' to 3': A0_var C0_var D0_var B0_var: A0_var paired to B0_var, C0_var paired to D0_var - vector A0_var,B0_var,C0_var,D0_var; - for (i=0; i A0_var, B0_var, C0_var, D0_var; + for (i = 0; i < len - 2; i++) { - for (j=i+3; j0 && j+1 0 && j + 1 < len && bp[i - 1][j + 1]) + continue; + if (!bp[i + 1][j - 1]) + continue; + sec_str(len, seq, bp, i, j, ii, jj); + if (jj < i || j < ii) { - ii=i; - jj=j; + ii = i; + jj = j; } A0_var.push_back(i); B0_var.push_back(j); @@ -873,9 +924,9 @@ void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt) D0_var.push_back(jj); } } - - //int sign; - for (i=0;iC0_var[i]) break; - sec[A0_var[i]+j]='<'; - sec[D0_var[i]+j]='>'; + if (A0_var[i] + j > C0_var[i]) + break; + sec[A0_var[i] + j] = '<'; + sec[D0_var[i] + j] = '>'; } } - sec[len]=0; + sec[len] = 0; /* clean up */ A0_var.clear(); @@ -919,39 +971,39 @@ void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt) bp.clear(); } -//get initial alignment from secondary structure alignment -//input: x, y, xlen, ylen -//output: y2x stores the best alignment: e.g., -//y2x[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 +// get initial alignment from secondary structure alignment +// input: x, y, xlen, ylen +// output: y2x stores the best alignment: e.g., +// y2x[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 void get_initial_ss(bool **path, double **val, - const char *secx, const char *secy, int xlen, int ylen, int *y2x) + const char *secx, const char *secy, int xlen, int ylen, int *y2x) { - double gap_open=-1.0; + double gap_open = -1.0; NWDP_TM(path, val, secx, secy, xlen, ylen, gap_open, y2x); } - // get_initial5 in TMalign fortran, get_initial_local in TMalign c by yangji -//get initial alignment of local structure superposition -//input: x, y, xlen, ylen -//output: y2x stores the best alignment: e.g., -//y2x[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 -bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, - bool **path, double **val, - double **x, double **y, int xlen, int ylen, int *y2x, - double d0, double d0_search, const bool fast_opt, const double D0_MIN) +// get initial alignment of local structure superposition +// input: x, y, xlen, ylen +// output: y2x stores the best alignment: e.g., +// y2x[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 +bool get_initial5(double **r1, double **r2, double **xtm, double **ytm, + bool **path, double **val, + double **x, double **y, int xlen, int ylen, int *y2x, + double d0, double d0_search, const bool fast_opt, const double D0_MIN) { double GL, rmsd; double t[3]; double u[3][3]; double d01 = d0 + 1.5; - if (d01 < D0_MIN) d01 = D0_MIN; - double d02 = d01*d01; + if (d01 < D0_MIN) + d01 = D0_MIN; + double d02 = d01 * d01; double GLmax = 0; int aL = getmin(xlen, ylen); @@ -984,7 +1036,7 @@ bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, n_jump2 = ylen / 3; // fragment to superimpose--------------> - int n_frag[2] = { 20, 100 }; + int n_frag[2] = {20, 100}; if (n_frag[0] > (aL / 3)) n_frag[0] = aL / 3; if (n_frag[1] > (aL / 2)) @@ -993,8 +1045,8 @@ bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, // start superimpose search--------------> if (fast_opt) { - n_jump1*=5; - n_jump2*=5; + n_jump1 *= 5; + n_jump2 *= 5; } bool flag = false; for (int i_frag = 0; i_frag < 2; i_frag++) @@ -1002,11 +1054,11 @@ bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, int m1 = xlen - n_frag[i_frag] + 1; int m2 = ylen - n_frag[i_frag] + 1; - for (int i = 0; iGLmax) + invmap, d0, d0_search, t, u); + if (GL > GLmax) { GLmax = GL; - for (int ii = 0; ii=0) - { - r1[k][0]=x[i][0]; - r1[k][1]=x[i][1]; - r1[k][2]=x[i][2]; - - r2[k][0]=y[j][0]; - r2[k][1]=y[j][1]; - r2[k][2]=y[j][2]; - + int i, k = 0; + for (int j = 0; j < ylen; j++) + { + i = y2x[j]; + if (i >= 0) + { + r1[k][0] = x[i][0]; + r1[k][1] = x[i][1]; + r1[k][2] = x[i][2]; + + r2[k][0] = y[j][0]; + r2[k][1] = y[j][1]; + r2[k][2] = y[j][2]; + k++; } } Kabsch(r1, r2, k, 1, &rmsd, t, u); - - for(int ii=0; ii=0 -//the jth element in y is aligned to a gap in x if i==-1 +// get initial alignment from secondary structure and previous alignments +// input: x, y, xlen, ylen +// output: y2x stores the best alignment: e.g., +// y2x[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 void get_initial_ssplus(double **r1, double **r2, double **score, bool **path, - double **val, const char *secx, const char *secy, double **x, double **y, - int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0) + double **val, const char *secx, const char *secy, double **x, double **y, + int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0) { - //create score matrix for DP + // create score matrix for DP score_matrix_rmsd_sec(r1, r2, score, secx, secy, x, y, xlen, ylen, - y2x0, D0_MIN,d0); - - double gap_open=-1.0; + y2x0, D0_MIN, d0); + + double gap_open = -1.0; NWDP_TM(score, path, val, xlen, ylen, gap_open, y2x); } - void find_max_frag(double **x, int len, int *start_max, - int *end_max, double dcu0, const bool fast_opt) + int *end_max, double dcu0, const bool fast_opt) { - int r_min, fra_min=4; //minimum fragment for search - if (fast_opt) fra_min=8; + int r_min, fra_min = 4; // minimum fragment for search + if (fast_opt) + fra_min = 8; int start; - int Lfr_max=0; + int Lfr_max = 0; - r_min= (int) (len*1.0/3.0); //minimum fragment, in case too small protein - if(r_min > fra_min) r_min=fra_min; - - int inc=0; - double dcu0_cut=dcu0*dcu0;; - double dcu_cut=dcu0_cut; + r_min = (int)(len * 1.0 / 3.0); // minimum fragment, in case too small protein + if (r_min > fra_min) + r_min = fra_min; - while(Lfr_max < r_min) - { - Lfr_max=0; - int j=1; //number of residues at nf-fragment - start=0; - for(int i=1; i Lfr_max) + if (j > Lfr_max) { - Lfr_max=j; - *start_max=start; - *end_max=i; + Lfr_max = j; + *start_max = start; + *end_max = i; } - j=1; + j = 1; } } else { - if(j>Lfr_max) + if (j > Lfr_max) { - Lfr_max=j; - *start_max=start; - *end_max=i-1; + Lfr_max = j; + *start_max = start; + *end_max = i - 1; } - j=1; - start=i; + j = 1; + start = i; } - }// for i; - - if(Lfr_max < r_min) + } // for i; + + if (Lfr_max < r_min) { inc++; - double dinc=pow(1.1, (double) inc) * dcu0; - dcu_cut= dinc*dinc; + double dinc = pow(1.1, (double)inc) * dcu0; + dcu_cut = dinc * dinc; } - }//while <; + } // while <; } -//perform fragment gapless threading to find the best initial alignment -//input: x, y, xlen, ylen -//output: y2x0 stores the best alignment: e.g., -//y2x0[j]=i means: -//the jth element in y is aligned to the ith element in x if i>=0 -//the jth element in y is aligned to a gap in x if i==-1 +// perform fragment gapless threading to find the best initial alignment +// input: x, y, xlen, ylen +// output: y2x0 stores the best alignment: e.g., +// y2x0[j]=i means: +// the jth element in y is aligned to the ith element in x if i>=0 +// the jth element in y is aligned to a gap in x if i==-1 double get_initial_fgt(double **r1, double **r2, double **xtm, double **ytm, - double **x, double **y, int xlen, int ylen, - int *y2x, double d0, double d0_search, - double dcu0, const bool fast_opt, double t[3], double u[3][3]) + double **x, double **y, int xlen, int ylen, + int *y2x, double d0, double d0_search, + double dcu0, const bool fast_opt, double t[3], double u[3][3]) { - int fra_min=4; //minimum fragment for search - if (fast_opt) fra_min=8; - int fra_min1=fra_min-1; //cutoff for shift, save time + int fra_min = 4; // minimum fragment for search + if (fast_opt) + fra_min = 8; + int fra_min1 = fra_min - 1; // cutoff for shift, save time - int xstart=0, ystart=0, xend=0, yend=0; + int xstart = 0, ystart = 0, xend = 0, yend = 0; find_max_frag(x, xlen, &xstart, &xend, dcu0, fast_opt); find_max_frag(y, ylen, &ystart, ¥d, dcu0, fast_opt); - - int Lx = xend-xstart+1; - int Ly = yend-ystart+1; + int Lx = xend - xstart + 1; + int Ly = yend - ystart + 1; int *ifr, *y2x_; - int L_fr=getmin(Lx, Ly); - ifr= new int[L_fr]; - y2x_= new int[ylen+1]; + int L_fr = getmin(Lx, Ly); + ifr = new int[L_fr]; + y2x_ = new int[ylen + 1]; - //select what piece will be used. The original implement may cause - //asymetry, but only when xlen==ylen and Lx==Ly - //if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1 + // select what piece will be used. The original implement may cause + // asymetry, but only when xlen==ylen and Lx==Ly + // if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1 - if(LxLy || (Lx==Ly && xlen>ylen)) - { - for(int i=0; i Ly || (Lx == Ly && xlen > ylen)) + { + for (int i = 0; i < L_fr; i++) + ifr[i] = ystart + i; } else // solve asymetric for 1x5gA vs 2q7nA5 { /* In this case, L0==xlen==ylen; L_fr==Lx==Ly */ - int L0=xlen; - double tmscore, tmscore_max=-1; + int L0 = xlen; + double tmscore, tmscore_max = -1; int i, j, k; int n1, n2; int min_len; int min_ali; /* part 1, normalized by xlen */ - for(i=0; i=0 && i= 0 && i < L1) + y2x_[j] = ifr[i]; + else + y2x_[j] = -1; } - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, - d0, d0_search, t, u); + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, + d0, d0_search, t, u); - if(tmscore>=tmscore_max) + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0 && i= 0 && i < xlen) + y2x_[ifr[j]] = i; } - - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, - x, y, xlen, ylen, y2x_, d0,d0_search, t, u); - if(tmscore>=tmscore_max) + + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, + x, y, xlen, ylen, y2x_, d0, d0_search, t, u); + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0 && i= 0 && i < L1) + y2x_[j] = ifr[i]; + else + y2x_[j] = -1; } - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, - d0, d0_search, t, u); + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, + d0, d0_search, t, u); - if(tmscore>=tmscore_max) + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0 && i= 0 && i < xlen) + y2x_[ifr[j]] = i; } - - //evaluate the map quickly in three iterations - tmscore=get_score_fast(r1, r2, xtm, ytm, - x, y, xlen, ylen, y2x_, d0,d0_search, t, u); - if(tmscore>=tmscore_max) + + // evaluate the map quickly in three iterations + tmscore = get_score_fast(r1, r2, xtm, ytm, + x, y, xlen, ylen, y2x_, d0, d0_search, t, u); + if (tmscore >= tmscore_max) { - tmscore_max=tmscore; - for(j=0; j=0) //aligned + if (i >= 0) // aligned { - xtm[k][0]=x[i][0]; - xtm[k][1]=x[i][1]; - xtm[k][2]=x[i][2]; - - ytm[k][0]=y[j][0]; - ytm[k][1]=y[j][1]; - ytm[k][2]=y[j][2]; + xtm[k][0] = x[i][0]; + xtm[k][1] = x[i][1]; + xtm[k][2] = x[i][2]; + + ytm[k][0] = y[j][0]; + ytm[k][1] = y[j][1]; + ytm[k][2] = y[j][2]; k++; } } tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, - simplify_step, score_sum_method, &rmsd, local_d0_search, - Lnorm, score_d8, d0); + simplify_step, score_sum_method, &rmsd, local_d0_search, + Lnorm, score_d8, d0); - - if(tmscore>tmscore_max) + if (tmscore > tmscore_max) { - tmscore_max=tmscore; - for(i=0; i0) + + if (iteration > 0) { - if(fabs(tmscore_old-tmscore)<0.000001) break; + if (fabs(tmscore_old - tmscore) < 0.000001) + break; } - tmscore_old=tmscore; - }// for iteration - - }//for gapopen - - - delete []invmap; + tmscore_old = tmscore; + } // for iteration + + } // for gapopen + + delete[] invmap; return tmscore_max; } - /* script format: 0 - no script; 1 - pymol; 3 - chimerax */ void output_pymol(const string xname, const string yname, - const string fname_super, double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2, const int o_opt=1) + const string fname_super, double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2, const int o_opt = 1) { - int compress_type=0; // uncompressed file + int compress_type = 0; // uncompressed file ifstream fin; #ifndef REDI_PSTREAM_H_SEEN ifstream fin_gz; #else redi::ipstream fin_gz; // if file is compressed - if (xname.size()>=3 && - xname.substr(xname.size()-3,3)==".gz") + if (xname.size() >= 3 && + xname.substr(xname.size() - 3, 3) == ".gz") { - fin_gz.open("gunzip -c "+xname); - compress_type=1; + fin_gz.open("gunzip -c " + xname); + compress_type = 1; } - else if (xname.size()>=4 && - xname.substr(xname.size()-4,4)==".bz2") + else if (xname.size() >= 4 && + xname.substr(xname.size() - 4, 4) == ".bz2") { - fin_gz.open("bzcat "+xname); - compress_type=2; + fin_gz.open("bzcat " + xname); + compress_type = 2; } else #endif - fin.open(xname.c_str()); + fin.open(xname.c_str()); stringstream buf; stringstream buf_pymol; @@ -1510,132 +1580,160 @@ void output_pymol(const string xname, const string yname, double x1[3]; // after transform /* for PDBx/mmCIF only */ - map _atom_site; + map _atom_site; size_t atom_site_pos; vector line_vec; - int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF + int infmt = -1; // 0 - PDB, 3 - PDBx/mmCIF - while (compress_type?fin_gz.good():fin.good()) + while (compress_type ? fin_gz.good() : fin.good()) { - if (compress_type) getline(fin_gz, line); - else getline(fin, line); - if (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0) // PDB format + if (compress_type) + getline(fin_gz, line); + else + getline(fin, line); + if (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0) // PDB format { - infmt=0; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; + infmt = 0; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; transform(t, u, x, x1); - buf<=1 && line.compare(0,3,"END")==0) break; + buf << line << '\n'; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } - if (compress_type) fin_gz.close(); - else fin.close(); + if (compress_type) + fin_gz.close(); + else + fin.close(); - string fname_super_full=fname_super; - if (infmt==0) fname_super_full+=".pdb"; - else if (infmt==3) fname_super_full+=".cif"; + string fname_super_full = fname_super; + if (infmt == 0) + fname_super_full += ".pdb"; + else if (infmt == 3) + fname_super_full += ".cif"; ofstream fp; fp.open(fname_super_full.c_str()); - fp<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - if (o_opt==1) + if (o_opt == 1) { - chain1_sele=" and c. "+chainID1.substr(1); - chain2_sele=" and c. "+chainID2.substr(1); + chain1_sele = " and c. " + chainID1.substr(1); + chain2_sele = " and c. " + chainID2.substr(1); } - else if (o_opt==3) + else if (o_opt == 3) { - chain1_sele="/"+chainID1.substr(1); - chain2_sele="/"+chainID2.substr(1); + chain1_sele = "/" + chainID1.substr(1); + chain2_sele = "/" + chainID2.substr(1); } } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i pml_list; - pml_list.push_back(fname_super+""); - pml_list.push_back(fname_super+"_atm"); - pml_list.push_back(fname_super+"_all"); - pml_list.push_back(fname_super+"_all_atm"); - pml_list.push_back(fname_super+"_all_atm_lig"); + pml_list.push_back(fname_super + ""); + pml_list.push_back(fname_super + "_atm"); + pml_list.push_back(fname_super + "_all"); + pml_list.push_back(fname_super + "_all_atm"); + pml_list.push_back(fname_super + "_all_atm_lig"); - for (int p=0;p&chain_list, - const int infmt_opt, double **ut_mat, const string &fname_super, - const int o_opt=1) +void output_mTMalign_pymol(const vector &chain_list, + const int infmt_opt, double **ut_mat, const string &fname_super, + const int o_opt = 1) { - int compress_type=0; // uncompressed file + int compress_type = 0; // uncompressed file size_t m; string name; double t[3]; double u[3][3]; - int ui,uj; + int ui, uj; string filename; vector color_list; color_list.push_back("red"); @@ -1918,262 +2045,294 @@ void output_mTMalign_pymol(const vector&chain_list, color_list.push_back("grey"); stringstream buf_pymol; - if (o_opt==1) - buf_pymol<<"#!/usr/bin/env pymol\n"; - else if (o_opt==3) - buf_pymol<<"#!/usr/bin/env chimerax --script\n"; - for (m=0;m=3 && - name.substr(name.size()-3,3)==".gz") + if (name.size() >= 3 && + name.substr(name.size() - 3, 3) == ".gz") { - fin_gz.open("gunzip -c "+name); - compress_type=1; + fin_gz.open("gunzip -c " + name); + compress_type = 1; } - else if (name.size()>=4 && - name.substr(name.size()-4,4)==".bz2") + else if (name.size() >= 4 && + name.substr(name.size() - 4, 4) == ".bz2") { - fin_gz.open("bzcat "+name); - compress_type=2; + fin_gz.open("bzcat " + name); + compress_type = 2; } else #endif fin.open(name.c_str()); stringstream buf; - buf< _atom_site; + map _atom_site; size_t atom_site_pos; vector line_vec; - int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF + int infmt = -1; // 0 - PDB, 3 - PDBx/mmCIF - while (compress_type?fin_gz.good():fin.good()) + while (compress_type ? fin_gz.good() : fin.good()) { - if (compress_type) getline(fin_gz, line); - else getline(fin, line); - if (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0) // PDB format + if (compress_type) + getline(fin_gz, line); + else + getline(fin, line); + if (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0) // PDB format { - infmt=0; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); + infmt = 0; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); transform(t, u, x, x1); - buf< ().swap(color_list); + vector().swap(color_list); } void output_rasmol(const string xname, const string yname, - const string fname_super, double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2, - const int xlen, const int ylen, const double d0A, const int n_ali8, - const double rmsd, const double TM1, const double Liden) + const string fname_super, double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2, + const int xlen, const int ylen, const double d0A, const int n_ali8, + const double rmsd, const double TM1, const double Liden) { stringstream buf; stringstream buf_all; stringstream buf_atm; stringstream buf_all_atm; stringstream buf_all_atm_lig; - //stringstream buf_pdb; + // stringstream buf_pdb; stringstream buf_tm; string line; - double x[3]; // before transform - double x1[3]; // after transform + double x[3]; // before transform + double x1[3]; // after transform bool after_ter; // true if passed the "TER" line in PDB string asym_id; // chain ID - buf_tm<<"REMARK US-align" - <<"\nREMARK Structure 1:"<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - chain1_sele=chainID1.substr(1); - chain2_sele=chainID2.substr(1); + chain1_sele = chainID1.substr(1); + chain2_sele = chainID2.substr(1); } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i _atom_site; + map _atom_site; int atom_site_pos; vector line_vec; - string atom; // 4-character atom name - string AA; // 3-character residue name - string resi; // 4-character residue sequence number - string inscode; // 1-character insertion code + string atom; // 4-character atom name + string AA; // 3-character residue name + string resi; // 4-character residue sequence number + string inscode; // 1-character insertion code string model_index; // model index - bool is_mmcif=false; + bool is_mmcif = false; /* used for CONECT record of chain1 */ - int ca_idx1=0; // all CA atoms - int lig_idx1=0; // all atoms - vector idx_vec; + int ca_idx1 = 0; // all CA atoms + int lig_idx1 = 0; // all atoms + vector idx_vec; /* used for CONECT record of chain2 */ - int ca_idx2=0; // all CA atoms - int lig_idx2=0; // all atoms + int ca_idx2 = 0; // all CA atoms + int lig_idx2 = 0; // all atoms /* extract aligned region */ vector resi_aln1; vector resi_aln2; - int i1=-1; - int i2=-1; + int i1 = -1; + int i2 = -1; if (!mm_opt) { - for (i=0;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (is_mmcif==false && line.size()>=54 && - (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format - { - if (line[16]!='A' && line[16]!=' ') continue; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (is_mmcif == false && line.size() >= 54 && + (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format + { + if (line[16] != 'A' && line[16] != ' ') + continue; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; transform(t, u, x, x1); - //buf_pdb<=2) - { - if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + << line.substr(11, 9) << " A" << line.substr(22, 8) + << setiosflags(ios::fixed) << setprecision(3) + << setw(8) << x1[0] << setw(8) << x1[1] << setw(8) << x1[2] << '\n'; + if (chain1_sele.size() && line[21] != chain1_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) + { + if (ca_idx1 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; if (_atom_site.count("auth_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain1_sele; - else if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain1_sele; + else if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain1_sele; - if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain1_sele; + if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - //buf_pdb<=1 && line.compare(0,3,"END")==0) break; + // buf_pdb<= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=1;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (line.size()>=54 && (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (line.size() >= 54 && (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format { - if (line[16]!='A' && line[16]!=' ') continue; - if (after_ter && line.compare(0,6,"ATOM ")==0) continue; + if (line[16] != 'A' && line[16] != ' ') + continue; + if (after_ter && line.compare(0, 6, "ATOM ") == 0) + continue; lig_idx2++; - buf_all_atm_lig<=2) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + lig_idx2 + << line.substr(11, 9) << " B" << line.substr(22, 32) << '\n'; + if (chain2_sele.size() && line[21] != chain2_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) { - if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1)) + if (ca_idx2 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; - + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + if (_atom_site.count("auth_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - if (after_ter==false || - line_vec[_atom_site["group_PDB"]]=="HETATM") + if (after_ter == false || + line_vec[_atom_site["group_PDB"]] == "HETATM") { lig_idx2++; - buf_all_atm_lig<=1 && line.compare(0,3,"END")==0) break; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=ca_idx1+1;i&resi_vec1, const vector&resi_vec2) + const string chainID1, const string chainID2, + const int xlen, const int ylen, double t[3], double u[3][3], + const double TM1, const double TM2, + const double TM3, const double TM4, const double TM5, + const double rmsd, const double d0_out, const char *seqM, + const char *seqxA, const char *seqyA, const double Liden, + const int n_ali8, const int L_ali, const double TM_ali, + const double rmsd_ali, const double TM_0, const double d0_0, + const double d0A, const double d0B, const double Lnorm_ass, + const double d0_scale, const double d0a, const double d0u, + const char *fname_matrix, const int outfmt_opt, const int ter_opt, + const int mm_opt, const int split_opt, const int o_opt, + const string fname_super, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const int mirror_opt, + const vector &resi_vec1, const vector &resi_vec2) { - if (outfmt_opt<=0) + if (outfmt_opt <= 0) { printf("\nName of Structure_1: %s%s (to be superimposed onto Structure_2)\n", - xname.c_str(), chainID1.c_str()); + xname.c_str(), chainID1.c_str()); printf("Name of Structure_2: %s%s\n", yname.c_str(), chainID2.c_str()); printf("Length of Structure_1: %d residues\n", xlen); printf("Length of Structure_2: %d residues\n\n", ylen); @@ -2856,178 +3098,190 @@ void output_results(const string xname, const string yname, if (i_opt) printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali); - printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); printf("TM-score= %6.5f (normalized by length of Structure_1: L=%d, d0=%.2f)\n", TM2, xlen, d0B); printf("TM-score= %6.5f (normalized by length of Structure_2: L=%d, d0=%.2f)\n", TM1, ylen, d0A); - if (a_opt==1) - printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt == 1) + printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); if (u_opt) printf("TM-score= %6.5f (normalized by user-specified L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u); if (d_opt) printf("TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen); printf("(You should use TM-score normalized by length of the reference structure)\n"); - - //output alignment + + // output alignment printf("\n(\":\" denotes residue pairs of d <%4.1f Angstrom, ", d0_out); printf("\".\" denotes other aligned residues)\n"); printf("%s\n", seqxA); printf("%s\n", seqM); printf("%s\n", seqyA); } - else if (outfmt_opt==1) + else if (outfmt_opt == 1) { printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - xname.c_str(), chainID1.c_str(), xlen, d0B, Liden/xlen, TM2); + xname.c_str(), chainID1.c_str(), xlen, d0B, Liden / xlen, TM2); printf("%s\n", seqxA); printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - yname.c_str(), chainID2.c_str(), ylen, d0A, Liden/ylen, TM1); + yname.c_str(), chainID2.c_str(), ylen, d0A, Liden / ylen, TM1); printf("%s\n", seqyA); printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n", - n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); if (i_opt) printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali); - if(a_opt) - printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt) + printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); - if(u_opt) + if (u_opt) printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u); - if(d_opt) + if (d_opt) printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen); printf("$$$$\n"); } - else if (outfmt_opt==2) + else if (outfmt_opt == 2) { printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", - xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), - TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0, - xlen, ylen, n_ali8); + xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), + TM2, TM1, rmsd, Liden / xlen, Liden / ylen, (n_ali8 > 0) ? Liden / n_ali8 : 0, + xlen, ylen, n_ali8); } - if (outfmt_opt<5) cout << endl; + if (outfmt_opt < 5) + cout << endl; - if (strlen(fname_matrix)) output_rotation_matrix(fname_matrix, t, u); + if (strlen(fname_matrix)) + output_rotation_matrix(fname_matrix, t, u); - if (o_opt==1 || o_opt==3) + if (o_opt == 1 || o_opt == 3) output_pymol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, o_opt); - else if (o_opt==2) + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, o_opt); + else if (o_opt == 2) output_rasmol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, - xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, + xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); } void output_mTMalign_results(const string xname, const string yname, - const string chainID1, const string chainID2, - const int xlen, const int ylen, double t[3], double u[3][3], - const double TM1, const double TM2, - const double TM3, const double TM4, const double TM5, - const double rmsd, const double d0_out, const char *seqM, - const char *seqxA, const char *seqyA, const double Liden, - const int n_ali8, const int L_ali, const double TM_ali, - const double rmsd_ali, const double TM_0, const double d0_0, - const double d0A, const double d0B, const double Lnorm_ass, - const double d0_scale, const double d0a, const double d0u, - const char* fname_matrix, const int outfmt_opt, const int ter_opt, - const int mm_opt, const int split_opt, const int o_opt, - const string fname_super, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const int mirror_opt, - const vector&resi_vec1, const vector&resi_vec2) + const string chainID1, const string chainID2, + const int xlen, const int ylen, double t[3], double u[3][3], + const double TM1, const double TM2, + const double TM3, const double TM4, const double TM5, + const double rmsd, const double d0_out, const char *seqM, + const char *seqxA, const char *seqyA, const double Liden, + const int n_ali8, const int L_ali, const double TM_ali, + const double rmsd_ali, const double TM_0, const double d0_0, + const double d0A, const double d0B, const double Lnorm_ass, + const double d0_scale, const double d0a, const double d0u, + const char *fname_matrix, const int outfmt_opt, const int ter_opt, + const int mm_opt, const int split_opt, const int o_opt, + const string fname_super, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const int mirror_opt, + const vector &resi_vec1, const vector &resi_vec2) { - if (outfmt_opt<=0) + if (outfmt_opt <= 0) { - printf("Average aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + printf("Average aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); printf("Average TM-score= %6.5f (normalized by length of shorter structure: L=%d, d0=%.2f)\n", TM2, xlen, d0B); printf("Average TM-score= %6.5f (normalized by length of longer structure: L=%d, d0=%.2f)\n", TM1, ylen, d0A); - if (a_opt==1) - printf("Average TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt == 1) + printf("Average TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); if (u_opt) printf("Average TM-score= %6.5f (normalized by average L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u); if (d_opt) printf("Average TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen); - - //output alignment + + // output alignment printf("In the following, seqID=n_identical/L.\n\n%s\n", seqM); } - else if (outfmt_opt==1) + else if (outfmt_opt == 1) { printf("%s\n", seqM); printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n", - n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); if (i_opt) printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali); - if(a_opt) - printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt) + printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); - if(u_opt) + if (u_opt) printf("# TM-score=%.5f (normalized by average L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u); - if(d_opt) + if (d_opt) printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen); printf("$$$$\n"); } - else if (outfmt_opt==2) + else if (outfmt_opt == 2) { printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", - xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), - TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0, - xlen, ylen, n_ali8); + xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), + TM2, TM1, rmsd, Liden / xlen, Liden / ylen, (n_ali8 > 0) ? Liden / n_ali8 : 0, + xlen, ylen, n_ali8); } cout << endl; - if (strlen(fname_matrix)) output_rotation_matrix(fname_matrix, t, u); + if (strlen(fname_matrix)) + output_rotation_matrix(fname_matrix, t, u); - if (o_opt==1 || o_opt==3) + if (o_opt == 1 || o_opt == 3) output_pymol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, o_opt); - else if (o_opt==2) + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, o_opt); + else if (o_opt == 2) output_rasmol(xname, yname, fname_super, t, u, ter_opt, - mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, - xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); + mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, + xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); } double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm, - double **xt, double **x, double **y, int xlen, int ylen, int invmap[], - int& L_ali, double& RMSD, double D0_MIN, double Lnorm, double d0, - double d0_search, double score_d8, double t[3], double u[3][3], - const int mol_type) + double **xt, double **x, double **y, int xlen, int ylen, int invmap[], + int &L_ali, double &RMSD, double D0_MIN, double Lnorm, double d0, + double d0_search, double score_d8, double t[3], double u[3][3], + const int mol_type) { D0_MIN = 0.5; Lnorm = ylen; - if (mol_type>0) // RNA + if (mol_type > 0) // RNA { - if (Lnorm<=11) d0=0.3; - else if(Lnorm>11 && Lnorm<=15) d0=0.4; - else if(Lnorm>15 && Lnorm<=19) d0=0.5; - else if(Lnorm>19 && Lnorm<=23) d0=0.6; - else if(Lnorm>23 && Lnorm<30) d0=0.7; - else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5); + if (Lnorm <= 11) + d0 = 0.3; + else if (Lnorm > 11 && Lnorm <= 15) + d0 = 0.4; + else if (Lnorm > 15 && Lnorm <= 19) + d0 = 0.5; + else if (Lnorm > 19 && Lnorm <= 23) + d0 = 0.6; + else if (Lnorm > 23 && Lnorm < 30) + d0 = 0.7; + else + d0 = (0.6 * pow((Lnorm * 1.0 - 0.5), 1.0 / 2) - 2.5); } else { - if (Lnorm > 21) d0=(1.24*pow((Lnorm*1.0-15), 1.0/3) -1.8); - else d0 = D0_MIN; - if (d0 < D0_MIN) d0 = D0_MIN; + if (Lnorm > 21) + d0 = (1.24 * pow((Lnorm * 1.0 - 15), 1.0 / 3) - 1.8); + else + d0 = D0_MIN; + if (d0 < D0_MIN) + d0 = D0_MIN; } - double d0_input = d0;// Scaled by seq_min + double d0_input = d0; // Scaled by seq_min - double tmscore;// collected alined residues from invmap + double tmscore; // collected alined residues from invmap int n_al = 0; int i; - for (int j = 0; j= 0) @@ -3050,21 +3304,22 @@ double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm, n_al++; } - else if (i != -1) PrintErrorAndQuit("Wrong map!\n"); + else if (i != -1) + PrintErrorAndQuit("Wrong map!\n"); } L_ali = n_al; Kabsch(r1, r2, n_al, 0, &RMSD, t, u); - RMSD = sqrt( RMSD/(1.0*n_al) ); - + RMSD = sqrt(RMSD / (1.0 * n_al)); + int temp_simplify_step = 1; int temp_score_sum_method = 0; d0_search = d0_input; double rms = 0.0; tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, n_al, t, u, - temp_simplify_step, temp_score_sum_method, &rms, d0_input, - score_d8, d0); - tmscore = tmscore * n_al / (1.0*Lnorm); + temp_simplify_step, temp_score_sum_method, &rms, d0_input, + score_d8, d0); + tmscore = tmscore * n_al / (1.0 * Lnorm); return tmscore; } @@ -3072,57 +3327,61 @@ double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm, /* copy the value of t and u into t0,u0 */ void copy_t_u(double t[3], double u[3][3], double t0[3], double u0[3][3]) { - int i,j; - for (i=0;i<3;i++) + int i, j; + for (i = 0; i < 3; i++) { - t0[i]=t[i]; - for (j=0;j<3;j++) u0[i][j]=u[i][j]; + t0[i] = t[i]; + for (j = 0; j < 3; j++) + u0[i][j] = u[i][j]; } } /* calculate approximate TM-score given rotation matrix */ double approx_TM(const int xlen, const int ylen, const int a_opt, - double **xa, double **ya, double t[3], double u[3][3], - const int invmap0[], const int mol_type) + double **xa, double **ya, double t[3], double u[3][3], + const int invmap0[], const int mol_type) { - double Lnorm_0=ylen; // normalized by the second protein - if (a_opt==-2 && xlen>ylen) Lnorm_0=xlen; // longer - else if (a_opt==-1 && xlen ylen) + Lnorm_0 = xlen; // longer + else if (a_opt == -1 && xlen < ylen) + Lnorm_0 = xlen; // shorter + else if (a_opt == 1) + Lnorm_0 = (xlen + ylen) / 2.; // average + double D0_MIN; double Lnorm; double d0; double d0_search; parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); - double TMtmp=0; + double TMtmp = 0; double d; - double xtmp[3]={0,0,0}; + double xtmp[3] = {0, 0, 0}; - for(int i=0,j=0; j=0)//aligned + i = invmap0[j]; + if (i >= 0) // aligned { transform(t, u, &xa[i][0], &xtmp[0]); - d=sqrt(dist(&xtmp[0], &ya[j][0])); - TMtmp+=1/(1+(d/d0)*(d/d0)); - //if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0)); + d = sqrt(dist(&xtmp[0], &ya[j][0])); + TMtmp += 1 / (1 + (d / d0) * (d / d0)); + // if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0)); } } - TMtmp/=Lnorm_0; + TMtmp /= Lnorm_0; return TMtmp; } void clean_up_after_approx_TM(int *invmap0, int *invmap, - double **score, bool **path, double **val, double **xtm, double **ytm, - double **xt, double **r1, double **r2, const int xlen, const int minlen) + double **score, bool **path, double **val, double **xtm, double **ytm, + double **xt, double **r1, double **r2, const int xlen, const int minlen) { - delete [] invmap0; - delete [] invmap; - DeleteArray(&score, xlen+1); - DeleteArray(&path, xlen+1); - DeleteArray(&val, xlen+1); + delete[] invmap0; + delete[] invmap; + DeleteArray(&score, xlen + 1); + DeleteArray(&path, xlen + 1); + DeleteArray(&val, xlen + 1); DeleteArray(&xtm, minlen); DeleteArray(&ytm, minlen); DeleteArray(&xt, xlen); @@ -3132,42 +3391,42 @@ void clean_up_after_approx_TM(int *invmap0, int *invmap, } /* Entry function for TM-align. Return TM-score calculation status: - * 0 - full TM-score calculation + * 0 - full TM-score calculation * 1 - terminated due to exception * 2-7 - pre-terminated due to low TM-score */ int TMalign_main(double **xa, double **ya, - const char *seqx, const char *seqy, const char *secx, const char *secy, - double t0[3], double u0[3][3], - double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, - double &d0_0, double &TM_0, - double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, - string &seqM, string &seqxA, string &seqyA, vector&do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const double TMcut=-1) + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + string &seqM, string &seqxA, string &seqyA, vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const double TMcut, const int ss_opt) { - double D0_MIN; //for d0 - double Lnorm; //normalization length - double score_d8,d0,d0_search,dcu0;//for TMscore search - double t[3], u[3][3]; //Kabsch translation vector and rotation matrix - double **score; // Input score table for dynamic programming - bool **path; // for dynamic programming - double **val; // for dynamic programming - double **xtm, **ytm; // for TMscore search engine - double **xt; //for saving the superposed version of r_1 or xtm - double **r1, **r2; // for Kabsch rotation + double D0_MIN; // for d0 + double Lnorm; // normalization length + double score_d8, d0, d0_search, dcu0; // for TMscore search + double t[3], u[3][3]; // Kabsch translation vector and rotation matrix + double **score; // Input score table for dynamic programming + bool **path; // for dynamic programming + double **val; // for dynamic programming + double **xtm, **ytm; // for TMscore search engine + double **xt; // for saving the superposed version of r_1 or xtm + double **r1, **r2; // for Kabsch rotation /***********************/ /* allocate memory */ /***********************/ int minlen = min(xlen, ylen); - NewArray(&score, xlen+1, ylen+1); - NewArray(&path, xlen+1, ylen+1); - NewArray(&val, xlen+1, ylen+1); + NewArray(&score, xlen + 1, ylen + 1); + NewArray(&path, xlen + 1, ylen + 1); + NewArray(&val, xlen + 1, ylen + 1); NewArray(&xtm, minlen, 3); NewArray(&ytm, minlen, 3); NewArray(&xt, xlen, 3); @@ -3175,188 +3434,213 @@ int TMalign_main(double **xa, double **ya, NewArray(&r2, minlen, 3); /***********************/ - /* parameter set */ + /* parameter set */ /***********************/ - parameter_set4search(xlen, ylen, D0_MIN, Lnorm, - score_d8, d0, d0_search, dcu0); - int simplify_step = 40; //for simplified search engine - int score_sum_method = 8; //for scoring method, whether only sum over pairs with dis= ylen || i1 >= xlen) kk1 = L; - else if (sequence[0][kk1] != '-') invmap[i2] = i1; + if (i2 >= ylen || i1 >= xlen) + kk1 = L; + else if (sequence[0][kk1] != '-') + invmap[i2] = i1; } } //--------------- 2. Align proteins from original alignment - double prevD0_MIN = D0_MIN;// stored for later use + double prevD0_MIN = D0_MIN; // stored for later use int prevLnorm = Lnorm; double prevd0 = d0; TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8, - t, u, mol_type); + invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8, + t, u, mol_type); D0_MIN = prevD0_MIN; Lnorm = prevLnorm; d0 = prevd0; TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0); + invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0); if (TM > TMmax) { TMmax = TM; - for (i = 0; iTMmax) TMmax = TM; - if (TMcut>0) copy_t_u(t, u, t0, u0); - //run dynamic programing iteratively to find the best alignment + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) + TMmax = TM; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + // run dynamic programing iteratively to find the best alignment TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, xlen, ylen, - t, u, invmap, 0, 2, (fast_opt)?2:30, local_d0_search, - D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, local_d0_search, + D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) { TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); } - if (TMcut>0) // pre-terminate if TM-score is too low + if (TMcut > 0) // pre-terminate if TM-score is too low { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.5*TMcut) + if (TMtmp < 0.5 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 2; } } /************************************************************/ - /* get initial alignment based on secondary structure */ + /* get initial alignment based on secondary structure */ /************************************************************/ - get_initial_ss(path, val, secx, secy, xlen, ylen, invmap); - TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, - t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, - score_d8, d0); - if (TM>TMmax) + if (ss_opt != 1) { - TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); - } - if (TM > TMmax*0.2) - { - TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30, - local_d0_search, D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + get_initial_ss(path, val, secx, secy, xlen, ylen, invmap); + TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } + if (TM > TMmax * 0.2) + { + TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, + xlen, ylen, t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, + local_d0_search, D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) + { + TMmax = TM; + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } } - } - - if (TMcut>0) // pre-terminate if TM-score is too low - { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.52*TMcut) + if (TMcut > 0) // pre-terminate if TM-score is too low { - TM1=TM2=TM3=TM4=TM5=TMtmp; - clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); - return 3; + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); + + if (TMtmp < 0.52 * TMcut) + { + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; + clean_up_after_approx_TM(invmap0, invmap, score, path, val, + xtm, ytm, xt, r1, r2, xlen, minlen); + return 3; + } } } /************************************************************/ - /* get initial alignment based on local superposition */ + /* get initial alignment based on local superposition */ /************************************************************/ //=initial5 in original TM-align - if (get_initial5( r1, r2, xtm, ytm, path, val, xa, ya, - xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN)) + if (ss_opt != 1) { - TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap, t, u, simplify_step, score_sum_method, - local_d0_search, Lnorm, score_d8, d0); - if (TM>TMmax) + if (get_initial5(r1, r2, xtm, ytm, path, val, xa, ya, + xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN)) { - TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); - } - if (TM > TMmax*ddcc) - { - TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search, - D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, + invmap, t, u, simplify_step, score_sum_method, + local_d0_search, Lnorm, score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (int i = 0; i0) copy_t_u(t, u, t0, u0); + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } + if (TM > TMmax * ddcc) + { + TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, + xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search, + D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) + { + TMmax = TM; + for (int i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } } } + else + cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" + << endl; } - else - cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" << endl; - if (TMcut>0) // pre-terminate if TM-score is too low + if (TMcut > 0) // pre-terminate if TM-score is too low { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.54*TMcut) + if (TMtmp < 0.54 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 4; } } @@ -3365,82 +3649,93 @@ int TMalign_main(double **xa, double **ya, /* get initial alignment by local superposition+secondary structure */ /********************************************************************/ //=initial3 in original TM-align - get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya, - xlen, ylen, invmap0, invmap, D0_MIN, d0); - TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, - t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, - score_d8, d0); - if (TM>TMmax) + if (ss_opt != 1) { - TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); - } - if (TM > TMmax*ddcc) - { - TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30, - local_d0_search, D0_MIN, Lnorm, d0, score_d8); - if (TM>TMmax) + get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya, + xlen, ylen, invmap0, invmap, D0_MIN, d0); + TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } + if (TM > TMmax * ddcc) + { + TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, + xlen, ylen, t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, + local_d0_search, D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) + { + TMmax = TM; + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); + } } - } - - if (TMcut>0) // pre-terminate if TM-score is too low - { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.56*TMcut) + if (TMcut > 0) // pre-terminate if TM-score is too low { - TM1=TM2=TM3=TM4=TM5=TMtmp; - clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); - return 5; + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); + + if (TMtmp < 0.56 * TMcut) + { + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; + clean_up_after_approx_TM(invmap0, invmap, score, path, val, + xtm, ytm, xt, r1, r2, xlen, minlen); + return 5; + } } } /*******************************************************************/ - /* get initial alignment based on fragment gapless threading */ + /* get initial alignment based on fragment gapless threading */ /*******************************************************************/ //=initial4 in original TM-align get_initial_fgt(r1, r2, xtm, ytm, xa, ya, xlen, ylen, - invmap, d0, d0_search, dcu0, fast_opt, t, u); + invmap, d0, d0_search, dcu0, fast_opt, t, u); TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, - t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, - score_d8, d0); - if (TM>TMmax) + t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, + score_d8, d0); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); } - if (TM > TMmax*ddcc) + if (TM > TMmax * ddcc) { TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, - xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN, - Lnorm, d0, score_d8); - if (TM>TMmax) + xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN, + Lnorm, d0, score_d8); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i0) copy_t_u(t, u, t0, u0); + for (i = 0; i < ylen; i++) + invmap0[i] = invmap[i]; + if (TMcut > 0) + copy_t_u(t, u, t0, u0); } } - if (TMcut>0) // pre-terminate if TM-score is too low + if (TMcut > 0) // pre-terminate if TM-score is too low { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.58*TMcut) + if (TMtmp < 0.58 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 6; } } @@ -3449,16 +3744,16 @@ int TMalign_main(double **xa, double **ya, //************************************************// // get initial alignment from user's input: // //************************************************// - if (i_opt>=1 && i_opt<=2)// if input has set parameter for "-i" + if (i_opt >= 1 && i_opt <= 2) // if input has set parameter for "-i" { - for (int j = 0; j < ylen; j++)// Set aligned position to be "-1" + for (int j = 0; j < ylen; j++) // Set aligned position to be "-1" invmap[j] = -1; - int i1 = -1;// in C version, index starts from zero, not from one + int i1 = -1; // in C version, index starts from zero, not from one int i2 = -1; int L1 = sequence[0].size(); int L2 = sequence[1].size(); - int L = min(L1, L2);// Get positions for aligned residues + int L = min(L1, L2); // Get positions for aligned residues for (int kk1 = 0; kk1 < L; kk1++) { if (sequence[0][kk1] != '-') @@ -3466,75 +3761,77 @@ int TMalign_main(double **xa, double **ya, if (sequence[1][kk1] != '-') { i2++; - if (i2 >= ylen || i1 >= xlen) kk1 = L; - else if (sequence[0][kk1] != '-') invmap[i2] = i1; + if (i2 >= ylen || i1 >= xlen) + kk1 = L; + else if (sequence[0][kk1] != '-') + invmap[i2] = i1; } } //--------------- 2. Align proteins from original alignment - double prevD0_MIN = D0_MIN;// stored for later use + double prevD0_MIN = D0_MIN; // stored for later use int prevLnorm = Lnorm; double prevd0 = d0; TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, - xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, - d0_search, score_d8, t, u, mol_type); + xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, + d0_search, score_d8, t, u, mol_type); D0_MIN = prevD0_MIN; Lnorm = prevLnorm; d0 = prevd0; TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, - xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm, - score_d8, d0); + xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm, + score_d8, d0); if (TM > TMmax) { TMmax = TM; - for (i = 0; iTMmax) + xlen, ylen, t, u, invmap, 0, 2, (fast_opt) ? 2 : 30, + local_d0_search, D0_MIN, Lnorm, d0, score_d8); + if (TM > TMmax) { TMmax = TM; - for (i = 0; i=0) + if (invmap0[i] >= 0) { - flag=true; + flag = true; break; } } - if(!flag) + if (!flag) { cout << "There is no alignment between the two structures! " << "Program stop with no result!" << endl; - TM1=TM2=TM3=TM4=TM5=0; + TM1 = TM2 = TM3 = TM4 = TM5 = 0; return 1; } /* last TM-score pre-termination */ - if (TMcut>0) + if (TMcut > 0) { - double TMtmp=approx_TM(xlen, ylen, a_opt, - xa, ya, t0, u0, invmap0, mol_type); + double TMtmp = approx_TM(xlen, ylen, a_opt, + xa, ya, t0, u0, invmap0, mol_type); - if (TMtmp<0.6*TMcut) + if (TMtmp < 0.6 * TMcut) { - TM1=TM2=TM3=TM4=TM5=TMtmp; + TM1 = TM2 = TM3 = TM4 = TM5 = TMtmp; clean_up_after_approx_TM(invmap0, invmap, score, path, val, - xtm, ytm, xt, r1, r2, xlen, minlen); + xtm, ytm, xt, r1, r2, xlen, minlen); return 7; } } @@ -3542,42 +3839,43 @@ int TMalign_main(double **xa, double **ya, //********************************************************************// // Detailed TMscore search engine --> prepare for final TMscore // //********************************************************************// - //run detailed TMscore search engine for the best alignment, and - //extract the best rotation matrix (t, u) for the best alignment - simplify_step=1; - if (fast_opt) simplify_step=40; - score_sum_method=8; + // run detailed TMscore search engine for the best alignment, and + // extract the best rotation matrix (t, u) for the best alignment + simplify_step = 1; + if (fast_opt) + simplify_step = 40; + score_sum_method = 8; TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, - invmap0, t, u, simplify_step, score_sum_method, local_d0_search, - false, Lnorm, score_d8, d0); + invmap0, t, u, simplify_step, score_sum_method, local_d0_search, + false, Lnorm, score_d8, d0); - //select pairs with dis=0)//aligned + i = invmap0[j]; + if (i >= 0) // aligned { n_ali++; - d=sqrt(dist(&xt[i][0], &ya[j][0])); + d = sqrt(dist(&xt[i][0], &ya[j][0])); if (d <= score_d8 || (i_opt == 3)) { - m1[k]=i; - m2[k]=j; + m1[k] = i; + m2[k] = j; - xtm[k][0]=xa[i][0]; - xtm[k][1]=xa[i][1]; - xtm[k][2]=xa[i][2]; + xtm[k][0] = xa[i][0]; + xtm[k][1] = xa[i][1]; + xtm[k][2] = xa[i][2]; - ytm[k][0]=ya[j][0]; - ytm[k][1]=ya[j][1]; - ytm[k][2]=ya[j][2]; + ytm[k][0] = ya[j][0]; + ytm[k][1] = ya[j][1]; + ytm[k][2] = ya[j][2]; r1[k][0] = xt[i][0]; r1[k][1] = xt[i][1]; @@ -3590,243 +3888,242 @@ int TMalign_main(double **xa, double **ya, } } } - n_ali8=k; + n_ali8 = k; - Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u + Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u); // rmsd0 is used for final output, only recalculate rmsd0, not t & u rmsd0 = sqrt(rmsd0 / n_ali8); - //****************************************// // Final TMscore // // Please set parameters for output // //****************************************// double rmsd; - simplify_step=1; - score_sum_method=0; - double Lnorm_0=ylen; - + simplify_step = 1; + score_sum_method = 0; + double Lnorm_0 = ylen; - //normalized by length of structure A + // normalized by length of structure A parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); - d0A=d0; - d0_0=d0A; + d0A = d0; + d0_0 = d0A; local_d0_search = d0_search; TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step, - score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); + score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); TM_0 = TM1; - //normalized by length of structure B - parameter_set4final(xlen+0.0, D0_MIN, Lnorm, d0, d0_search, mol_type); - d0B=d0; + // normalized by length of structure B + parameter_set4final(xlen + 0.0, D0_MIN, Lnorm, d0, d0_search, mol_type); + d0B = d0; local_d0_search = d0_search; TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step, - score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); + score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); double Lnorm_d0; - if (a_opt>0) + if (a_opt > 0) { - //normalized by average length of structures A, B - Lnorm_0=(xlen+ylen)*0.5; + // normalized by average length of structures A, B + Lnorm_0 = (xlen + ylen) * 0.5; parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); - d0a=d0; - d0_0=d0a; + d0a = d0; + d0_0 = d0a; local_d0_search = d0_search; TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, - simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, - score_d8, d0); - TM_0=TM3; + simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, + score_d8, d0); + TM_0 = TM3; } if (u_opt) { - //normalized by user assigned length + // normalized by user assigned length parameter_set4final(Lnorm_ass, D0_MIN, Lnorm, - d0, d0_search, mol_type); - d0u=d0; - d0_0=d0u; - Lnorm_0=Lnorm_ass; + d0, d0_search, mol_type); + d0u = d0; + d0_0 = d0u; + Lnorm_0 = Lnorm_ass; local_d0_search = d0_search; TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, - simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, - score_d8, d0); - TM_0=TM4; + simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, + score_d8, d0); + TM_0 = TM4; } if (d_opt) { - //scaled by user assigned d0 + // scaled by user assigned d0 parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search); - d0_out=d0_scale; - d0_0=d0_scale; - //Lnorm_0=ylen; - Lnorm_d0=Lnorm_0; + d0_out = d0_scale; + d0_0 = d0_scale; + // Lnorm_0=ylen; + Lnorm_d0 = Lnorm_0; local_d0_search = d0_search; TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, - simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, - score_d8, d0); - TM_0=TM5; + simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, + score_d8, d0); + TM_0 = TM5; } /* derive alignment from superposition */ - int ali_len=xlen+ylen; //maximum length of alignment - seqxA.assign(ali_len,'-'); - seqM.assign( ali_len,' '); - seqyA.assign(ali_len,'-'); + int ali_len = xlen + ylen; // maximum length of alignment + seqxA.assign(ali_len, '-'); + seqM.assign(ali_len, ' '); + seqyA.assign(ali_len, '-'); do_vec.clear(); - do_vec.assign(ali_len,0); - - //do_rotation(xa, xt, xlen, t, u); + do_vec.assign(ali_len, 0); + + // do_rotation(xa, xt, xlen, t, u); do_rotation(xa, xt, xlen, t0, u0); - int kk=0, i_old=0, j_old=0; - d=0; - Liden=0; - //double SO=0; - for(int k=0; k &do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const double TMcut=-1) + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + string &seqM, string &seqxA, string &seqyA, vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const double TMcut = -1) { - char *seqx_cp; // for the protein sequence - char *secx_cp; // for the secondary structure - double **xa_cp; // coordinates - string seqxA_cp,seqyA_cp; // alignment - int i,r; - int cp_point=0; // position of circular permutation - int cp_aln_best=0; // amount of aligned residue in sliding window - int cp_aln_current;// amount of aligned residue in sliding window + char *seqx_cp; // for the protein sequence + char *secx_cp; // for the secondary structure + double **xa_cp; // coordinates + string seqxA_cp, seqyA_cp; // alignment + int i, r; + int cp_point = 0; // position of circular permutation + int cp_aln_best = 0; // amount of aligned residue in sliding window + int cp_aln_current; // amount of aligned residue in sliding window /* duplicate structure */ - NewArray(&xa_cp, xlen*2, 3); - seqx_cp = new char[xlen*2 + 1]; - secx_cp = new char[xlen*2 + 1]; - for (r=0;rcp_aln_best) + if (cp_aln_current > cp_aln_best) { - cp_aln_best=cp_aln_current; - cp_point=r; + cp_aln_best = cp_aln_current; + cp_point = r; } } seqM.clear(); @@ -3834,146 +4131,153 @@ int CPalign_main(double **xa, double **ya, seqyA.clear(); seqxA_cp.clear(); seqyA_cp.clear(); - rmsd0=Liden=n_ali=n_ali8=0; + rmsd0 = Liden = n_ali = n_ali8 = 0; /* fTM-align alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, - do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_tmp, d0_scale, - 0, false, true, false, true, mol_type, -1); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, + do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_tmp, d0_scale, + 0, false, true, false, true, mol_type, -1, 0); /* do not use circular permutation of number of aligned residues is not * larger than sequence-order dependent alignment */ - //cout<<"cp: aln="<= length\n" -" of protein to avoid TM-score >1. -u does not change final alignment.\n" -"\n" -" -o Output superposed structure1 to sup.* for PyMOL viewing.\n" -" $ USalign structure1.pdb structure2.pdb -o sup\n" -" $ pymol -d @sup.pml # C-alpha trace aligned region\n" -" $ pymol -d @sup_all.pml # C-alpha trace whole chain\n" -" $ pymol -d @sup_atm.pml # full-atom aligned region\n" -" $ pymol -d @sup_all_atm.pml # full-atom whole chain\n" -" $ pymol -d @sup_all_atm_lig.pml # full-atom with all molecules\n" -"\n" -" -rasmol Output superposed structure1 to sup.* for RasMol viewing.\n" -" $ USalign structure1.pdb structure2.pdb -rasmol sup\n" -" $ rasmol -script sup # C-alpha trace aligned region\n" -" $ rasmol -script sup_all # C-alpha trace whole chain\n" -" $ rasmol -script sup_atm # full-atom aligned region\n" -" $ rasmol -script sup_all_atm # full-atom whole chain\n" -" $ rasmol -script sup_all_atm_lig # full-atom with all molecules\n" -"\n" -"-chimerax Output superposed structure1 to sup.* for ChimeraX viewing.\n" -" $ USalign structure1.pdb structure2.pdb -chimerax sup\n" -" $ chimerax --script sup.cxc # C-alpha trace aligned region\n" -" $ chimerax --script sup_all.cxc # C-alpha trace whole chain\n" -" $ chimerax --script sup_atm.cxc # full-atom aligned region\n" -" $ chimerax --script sup_all_atm.cxc # full-atom whole chain\n" -" $ chimerax --script sup_all_atm_lig.cxc # full-atom with all molecules\n" -"\n" -" -do Output distance of aligned residue pairs\n" -"\n" -//" -h Print the full help message, including additional options\n" -//"\n" -"Example usages ('gunzip' program is needed to read .gz compressed files):\n" -" USalign 101m.cif.gz 1mba.pdb # pairwise monomeric protein alignment\n" -" USalign 1qf6.cif 5yyn.pdb.gz -mol RNA # pairwise monomeric RNA alignment\n" -" USalign model.pdb native.pdb -TMscore 1 # calculate TM-score between two conformations of a monomer\n" -" USalign 4v4a.cif 4v49.cif -mm 1 -ter 1 # oligomeric alignment for asymmetic units\n" -" USalign 3ksc.pdb1 4lej.pdb1 -mm 1 -ter 0 # oligomeric alignment for biological units\n" -" USalign 1ajk.pdb.gz 2ayh.pdb.gz -mm 3 # circular permutation alignment\n" - <= length\n" + " of protein to avoid TM-score >1. -u does not change final alignment.\n" + "\n" + " -o Output superposed structure1 to sup.* for PyMOL viewing.\n" + " $ USalign structure1.pdb structure2.pdb -o sup\n" + " $ pymol -d @sup.pml # C-alpha trace aligned region\n" + " $ pymol -d @sup_all.pml # C-alpha trace whole chain\n" + " $ pymol -d @sup_atm.pml # full-atom aligned region\n" + " $ pymol -d @sup_all_atm.pml # full-atom whole chain\n" + " $ pymol -d @sup_all_atm_lig.pml # full-atom with all molecules\n" + "\n" + " -rasmol Output superposed structure1 to sup.* for RasMol viewing.\n" + " $ USalign structure1.pdb structure2.pdb -rasmol sup\n" + " $ rasmol -script sup # C-alpha trace aligned region\n" + " $ rasmol -script sup_all # C-alpha trace whole chain\n" + " $ rasmol -script sup_atm # full-atom aligned region\n" + " $ rasmol -script sup_all_atm # full-atom whole chain\n" + " $ rasmol -script sup_all_atm_lig # full-atom with all molecules\n" + "\n" + "-chimerax Output superposed structure1 to sup.* for ChimeraX viewing.\n" + " $ USalign structure1.pdb structure2.pdb -chimerax sup\n" + " $ chimerax --script sup.cxc # C-alpha trace aligned region\n" + " $ chimerax --script sup_all.cxc # C-alpha trace whole chain\n" + " $ chimerax --script sup_atm.cxc # full-atom aligned region\n" + " $ chimerax --script sup_all_atm.cxc # full-atom whole chain\n" + " $ chimerax --script sup_all_atm_lig.cxc # full-atom with all molecules\n" + "\n" + " -do Output distance of aligned residue pairs\n" + "\n" + //" -h Print the full help message, including additional options\n" + //"\n" + "Example usages ('gunzip' program is needed to read .gz compressed files):\n" + " USalign 101m.cif.gz 1mba.pdb # pairwise monomeric protein alignment\n" + " USalign 1qf6.cif 5yyn.pdb.gz -mol RNA # pairwise monomeric RNA alignment\n" + " USalign model.pdb native.pdb -TMscore 1 # calculate TM-score between two conformations of a monomer\n" + " USalign 4v4a.cif 4v49.cif -mm 1 -ter 1 # oligomeric alignment for asymmetic units\n" + " USalign 3ksc.pdb1 4lej.pdb1 -mm 1 -ter 0 # oligomeric alignment for biological units\n" + " USalign 1ajk.pdb.gz 2ayh.pdb.gz -mm 3 # circular permutation alignment\n" + << endl; + + // if (h_opt) + print_extra_help(); exit(EXIT_SUCCESS); } /* TMalign, RNAalign, CPalign, TMscore */ int TMalign(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int cp_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, - const string &dir2_opt, const vector &chain2parse1, - const vector &chain2parse2, const vector &model2parse1, - const vector &model2parse2, const int byresi_opt, - const vector &chain1_list, const vector &chain2_list, - const bool se_opt, const bool do_opt) + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int cp_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, + const string &dir2_opt, const vector &chain2parse1, + const vector &chain2parse2, const vector &model2parse1, + const vector &model2parse2, const int byresi_opt, + const vector &chain1_list, const vector &chain2_list, + const bool se_opt, const bool do_opt) { /* declare previously global variables */ - vector >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 vector mol_vec1; // molecule type of chain1, RNA if >0 vector mol_vec2; // molecule type of chain2, RNA if >0 vector chainID_list1; // list of chainID1 vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=byresi_opt; // whether to read residue index - if (byresi_opt==0 && o_opt) read_resi=2; + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = byresi_opt; // whether to read residue index + if (byresi_opt == 0 && o_opt) + read_resi = 2; /* loop over file names */ - for (i=0;i0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment + xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, + resi_vec1, read_resi); + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; + if (mol_vec1[chain_i] > 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment - for (j=(dir_opt.size()>0)*(i+1);j 0) * (i + 1); j < chain2_list.size(); j++) { - if (dirpair_opt.size() && j!=i) continue; + if (dirpair_opt.size() && j != i) + continue; /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) { - cerr<<"Warning! Cannot parse file: "<0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; - bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; vector do_vec; /* entry function for structure alignment */ - if (cp_opt) CPalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); - else if (se_opt) - { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; - se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, + if (cp_opt) + CPalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, - mol_vec1[chain_i]+mol_vec2[chain_j], - outfmt_opt, invmap); - if (outfmt_opt>=2) + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], TMcut); + else if (se_opt) + { + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; + se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], + outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r21) + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j &sequence, - const double d0_scale, const bool m_opt, const int o_opt, - const int a_opt, const bool d_opt, const bool full_opt, - const double TMcut, const int infmt1_opt, const int infmt2_opt, - const int ter_opt, const int split_opt, const int outfmt_opt, - bool fast_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const vector &chain1_list, const vector &chain2_list, - const int byresi_opt,const string&chainmapfile, const bool se_opt) + const string &fname_super, const string &fname_lign, + const string &fname_matrix, vector &sequence, + const double d0_scale, const bool m_opt, const int o_opt, + const int a_opt, const bool d_opt, const bool full_opt, + const double TMcut, const int infmt1_opt, const int infmt2_opt, + const int ter_opt, const int split_opt, const int outfmt_opt, + bool fast_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const vector &chain1_list, const vector &chain2_list, + const int byresi_opt, const string &chainmapfile, const bool se_opt) { /* declare previously global variables */ - vector > > xa_vec; // structure of complex1 - vector > > ya_vec; // structure of complex2 - vector >seqx_vec; // sequence of complex1 - vector >seqy_vec; // sequence of complex2 - vector >secx_vec; // secondary structure of complex1 - vector >secy_vec; // secondary structure of complex2 - vector mol_vec1; // molecule type of complex1, RNA if >0 - vector mol_vec2; // molecule type of complex2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - vector xlen_vec; // length of complex1 - vector ylen_vec; // length of complex2 - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int xlen_aa,ylen_aa; // total length of protein - int xlen_na,ylen_na; // total length of RNA/DNA - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 + vector>> xa_vec; // structure of complex1 + vector>> ya_vec; // structure of complex2 + vector> seqx_vec; // sequence of complex1 + vector> seqy_vec; // sequence of complex2 + vector> secx_vec; // secondary structure of complex1 + vector> secy_vec; // secondary structure of complex2 + vector mol_vec1; // molecule type of complex1, RNA if >0 + vector mol_vec2; // molecule type of complex2, RNA if >0 + vector chainID_list1; // list of chainID1 + vector chainID_list2; // list of chainID2 + vector xlen_vec; // length of complex1 + vector ylen_vec; // length of complex2 + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int xlen_aa, ylen_aa; // total length of protein + int xlen_na, ylen_na; // total length of RNA/DNA + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 /* parse complex */ parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1, - xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, - atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, - resi_vec1, chain2parse1, model2parse1); - if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 1"); + xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, + atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, + resi_vec1, chain2parse1, model2parse1); + if (xa_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 1"); parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2, - ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, - atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, - resi_vec2, chain2parse2, model2parse2); - if (ya_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 2"); - int len_aa=getmin(xlen_aa,ylen_aa); - int len_na=getmin(xlen_na,ylen_na); + ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, + atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, + resi_vec2, chain2parse2, model2parse2); + if (ya_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 chain in complex 2"); + int len_aa = getmin(xlen_aa, ylen_aa); + int len_na = getmin(xlen_na, ylen_na); if (a_opt) { - len_aa=(xlen_aa+ylen_aa)/2; - len_na=(xlen_na+ylen_na)/2; + len_aa = (xlen_aa + ylen_aa) / 2; + len_na = (xlen_na + ylen_na) / 2; } - int i_opt=0; - if (byresi_opt) i_opt=3; + int i_opt = 0; + if (byresi_opt) + i_opt = 3; - map chainmap; + map chainmap; if (chainmapfile.size()) { string line; - int chainidx1,chainidx2; + int chainidx1, chainidx2; vector line_vec; ifstream fin; - bool fromStdin=(chainmapfile=="-"); - if (!fromStdin) fin.open(chainmapfile.c_str()); - while (fromStdin?cin.good():fin.good()) - { - if (fromStdin) getline(cin,line); - else getline(fin,line); - if (line.size()==0 || line[0]=='#') continue; - split(line,line_vec,'\t'); - if (line_vec.size()==2) + bool fromStdin = (chainmapfile == "-"); + if (!fromStdin) + fin.open(chainmapfile.c_str()); + while (fromStdin ? cin.good() : fin.good()) + { + if (fromStdin) + getline(cin, line); + else + getline(fin, line); + if (line.size() == 0 || line[0] == '#') + continue; + split(line, line_vec, '\t'); + if (line_vec.size() == 2) { - chainidx1=-1; - chainidx2=-1; - - for (i=0;i=0 && chainidx2>=0) + if (chainidx1 >= 0 && chainidx2 >= 0) { if (chainmap.count(chainidx1)) - cerr<<"ERROR! "< do_vec; - - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); + + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, 0, d0_scale, - i_opt, a_opt, false, d_opt, - mol_vec1[0]+mol_vec2[0], outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, 0, d0_scale, + i_opt, a_opt, false, d_opt, + mol_vec1[0] + mol_vec2[0], outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2 > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - chainID_list1.clear(); // list of chainID1 - chainID_list2.clear(); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + chainID_list1.clear(); // list of chainID1 + chainID_list2.clear(); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 return 0; } /* declare TM-score tables */ - int chain1_num=xa_vec.size(); - int chain2_num=ya_vec.size(); - vector tmp_str_vec(chain2_num,""); + int chain1_num = xa_vec.size(); + int chain2_num = ya_vec.size(); + vector tmp_str_vec(chain2_num, ""); double **TMave_mat; double **ut_mat; // rotation matrices for all-against-all alignment - int ui,uj,ut_idx; - NewArray(&TMave_mat,chain1_num,chain2_num); - NewArray(&ut_mat,chain1_num*chain2_num,4*3); - vector >seqxA_mat(chain1_num,tmp_str_vec); - vector > seqM_mat(chain1_num,tmp_str_vec); - vector >seqyA_mat(chain1_num,tmp_str_vec); + int ui, uj, ut_idx; + NewArray(&TMave_mat, chain1_num, chain2_num); + NewArray(&ut_mat, chain1_num * chain2_num, 4 * 3); + vector> seqxA_mat(chain1_num, tmp_str_vec); + vector> seqM_mat(chain1_num, tmp_str_vec); + vector> seqyA_mat(chain1_num, tmp_str_vec); - double maxTMmono=-1; - int maxTMmono_i,maxTMmono_j; + double maxTMmono = -1; + int maxTMmono_i, maxTMmono_j; /* get all-against-all alignment */ - if (len_aa+len_na>500) fast_opt=true; - for (i=0;i 500) + fast_opt = true; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (xlen<3) + xlen = xlen_vec[i]; + if (xlen < 3) { - for (j=0;j do_vec; - int Lnorm_tmp=len_aa; - if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na; - + int Lnorm_tmp = len_aa; + if (mol_vec1[i] + mol_vec2[j] > 0) + Lnorm_tmp = len_na; + if (byresi_opt) { - int total_aln=extract_aln_from_resi(sequence, seqx,seqy, - resi_vec1,resi_vec2,xlen_vec,ylen_vec, i, j, byresi_opt); - seqxA_mat[i][j]=sequence[0]; - seqyA_mat[i][j]=sequence[1]; - if (total_aln>xlen+ylen-3) + int total_aln = extract_aln_from_resi(sequence, seqx, seqy, + resi_vec1, resi_vec2, xlen_vec, ylen_vec, i, j, byresi_opt); + seqxA_mat[i][j] = sequence[0]; + seqyA_mat[i][j] = sequence[1]; + if (total_aln > xlen + ylen - 3) { - for (ui=0;ui<3;ui++) for (uj=0;uj<3;uj++) - ut_mat[ut_idx][ui*3+uj]=(ui==uj)?1:0; - for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=0; - TMave_mat[i][j]=0; + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = (ui == uj) ? 1 : 0; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = 0; + TMave_mat[i][j] = 0; seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); continue; } } @@ -872,54 +906,58 @@ int MMalign(const string &xname, const string &yname, /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_tmp, d0_scale, - i_opt, false, true, false, - mol_vec1[i]+mol_vec2[j], outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_tmp, d0_scale, + i_opt, false, true, false, + mol_vec1[i] + mol_vec2[j], outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2maxTMmono) + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = u0[ui][uj]; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = t0[uj]; + seqxA_mat[i][j] = seqxA; + seqyA_mat[i][j] = seqyA; + TMave_mat[i][j] = TM4 * Lnorm_tmp; + if (TMave_mat[i][j] > maxTMmono) { - maxTMmono=TMave_mat[i][j]; - maxTMmono_i=i; - maxTMmono_j=j; + maxTMmono = TMave_mat[i][j]; + maxTMmono_i = i; + maxTMmono_j = j; } /* clean up */ @@ -927,70 +965,73 @@ int MMalign(const string &xname, const string &yname, seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); do_vec.clear(); } - delete[]seqx; - delete[]secx; - DeleteArray(&xa,xlen); + delete[] seqx; + delete[] secx; + DeleteArray(&xa, xlen); } /* calculate initial chain-chain assignment */ int *assign1_list; // value is index of assigned chain2 int *assign2_list; // value is index of assigned chain1 - assign1_list=new int[chain1_num]; - assign2_list=new int[chain2_num]; - double total_score=enhanced_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num); - if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain"); + assign1_list = new int[chain1_num]; + assign2_list = new int[chain2_num]; + double total_score = enhanced_greedy_search(TMave_mat, assign1_list, + assign2_list, chain1_num, chain2_num); + if (total_score <= 0) + PrintErrorAndQuit("ERROR! No assignable chain"); /* refine alignment for large oligomers */ - int aln_chain_num=count_assign_pair(assign1_list,chain1_num); - bool is_oligomer=(aln_chain_num>=3); - if (aln_chain_num==2 && chainmap.size()==0 && !se_opt) // dimer alignment + int aln_chain_num = count_assign_pair(assign1_list, chain1_num); + bool is_oligomer = (aln_chain_num >= 3); + if (aln_chain_num == 2 && chainmap.size() == 0 && !se_opt) // dimer alignment { - int na_chain_num1,na_chain_num2,aa_chain_num1,aa_chain_num2; - count_na_aa_chain_num(na_chain_num1,aa_chain_num1,mol_vec1); - count_na_aa_chain_num(na_chain_num2,aa_chain_num2,mol_vec2); + int na_chain_num1, na_chain_num2, aa_chain_num1, aa_chain_num2; + count_na_aa_chain_num(na_chain_num1, aa_chain_num1, mol_vec1); + count_na_aa_chain_num(na_chain_num2, aa_chain_num2, mol_vec2); /* align protein-RNA hybrid dimer to another hybrid dimer */ - if (na_chain_num1==1 && na_chain_num2==1 && - aa_chain_num1==1 && aa_chain_num2==1) is_oligomer=false; + if (na_chain_num1 == 1 && na_chain_num2 == 1 && + aa_chain_num1 == 1 && aa_chain_num2 == 1) + is_oligomer = false; /* align pure protein dimer or pure RNA dimer */ - else if ((getmin(na_chain_num1,na_chain_num2)==0 && - aa_chain_num1==2 && aa_chain_num2==2) || - (getmin(aa_chain_num1,aa_chain_num2)==0 && - na_chain_num1==2 && na_chain_num2==2)) + else if ((getmin(na_chain_num1, na_chain_num2) == 0 && + aa_chain_num1 == 2 && aa_chain_num2 == 2) || + (getmin(aa_chain_num1, aa_chain_num2) == 0 && + na_chain_num1 == 2 && na_chain_num2 == 2)) { - adjust_dimer_assignment(xa_vec,ya_vec,xlen_vec,ylen_vec,mol_vec1, - mol_vec2,assign1_list,assign2_list,seqxA_mat,seqyA_mat); - is_oligomer=false; // cannot refiner further + adjust_dimer_assignment(xa_vec, ya_vec, xlen_vec, ylen_vec, mol_vec1, + mol_vec2, assign1_list, assign2_list, seqxA_mat, seqyA_mat); + is_oligomer = false; // cannot refiner further } - else is_oligomer=true; /* align oligomers to dimer */ + else + is_oligomer = true; /* align oligomers to dimer */ } - if ((aln_chain_num>=3 || is_oligomer) && chainmap.size()==0 && !se_opt) // oligomer alignment + if ((aln_chain_num >= 3 || is_oligomer) && chainmap.size() == 0 && !se_opt) // oligomer alignment { /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); /* clean up */ DeleteArray(&xcentroids, chain1_num); @@ -998,64 +1039,64 @@ int MMalign(const string &xname, const string &yname, } /* store initial assignment */ - int init_pair_num=count_assign_pair(assign1_list,chain1_num); + int init_pair_num = count_assign_pair(assign1_list, chain1_num); int *assign1_init, *assign2_init; - assign1_init=new int[chain1_num]; - assign2_init=new int[chain2_num]; + assign1_init = new int[chain1_num]; + assign2_init = new int[chain2_num]; double **TMave_init; - NewArray(&TMave_init,chain1_num,chain2_num); - vector >seqxA_init(chain1_num,tmp_str_vec); - vector >seqyA_init(chain1_num,tmp_str_vec); + NewArray(&TMave_init, chain1_num, chain2_num); + vector> seqxA_init(chain1_num, tmp_str_vec); + vector> seqyA_init(chain1_num, tmp_str_vec); vector sequence_init; copy_chain_assign_data(chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); /* perform iterative alignment */ - double max_total_score=0; // ignore old total_score because previous - // score was from monomeric chain superpositions - int max_iter=5-(int)((len_aa+len_na)/200); - if (max_iter<2) max_iter=2; - //if (byresi_opt==0) + double max_total_score = 0; // ignore old total_score because previous + // score was from monomeric chain superpositions + int max_iter = 5 - (int)((len_aa + len_na) / 200); + if (max_iter < 2) + max_iter = 2; + // if (byresi_opt==0) if (!se_opt) MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec, - seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, - ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, - chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, - sequence, d0_scale, fast_opt, chainmap, byresi_opt); - - if (byresi_opt && aln_chain_num>=4 && is_oligomer && chainmap.size()==0 && !se_opt) // oligomer alignment + seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, + ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, + chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, + sequence, d0_scale, fast_opt, chainmap, byresi_opt); + + if (byresi_opt && aln_chain_num >= 4 && is_oligomer && chainmap.size() == 0 && !se_opt) // oligomer alignment { MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, 1, 0, 5, ter_opt, split_opt, - 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); - + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, 1, 0, 5, ter_opt, split_opt, + 0, 0, true, true, mirror_opt, resi_vec1, resi_vec2); /* extract centroid coordinates */ double **xcentroids; double **ycentroids; NewArray(&xcentroids, chain1_num, 3); NewArray(&ycentroids, chain2_num, 3); - double d0MM=getmin( + double d0MM = getmin( calculate_centroids(xa_vec, chain1_num, xcentroids), calculate_centroids(ya_vec, chain2_num, ycentroids)); /* refine enhanced greedy search with centroid superposition */ - //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); + // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); /* clean up */ DeleteArray(&xcentroids, chain1_num); @@ -1063,210 +1104,219 @@ int MMalign(const string &xname, const string &yname, } /* sometime MMalign_iter is even worse than monomer alignment */ - if (byresi_opt==0 && max_total_score=init_pair_num) copy_chain_assign_data( - chain1_num, chain2_num, sequence_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); - double max_total_score_cross=max_total_score; - if (byresi_opt==0 && len_aa+len_na<10000) + int iter_pair_num = count_assign_pair(assign1_list, chain1_num); + if (iter_pair_num >= init_pair_num) + copy_chain_assign_data( + chain1_num, chain2_num, sequence_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat, + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init); + double max_total_score_cross = max_total_score; + if (byresi_opt == 0 && len_aa + len_na < 10000) { MMalign_dimer(max_total_score_cross, xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, - TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, - sequence_init, d0_scale, fast_opt); - if (max_total_score_cross>max_total_score) + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num, + TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, + sequence_init, d0_scale, fast_opt); + if (max_total_score_cross > max_total_score) { - max_total_score=max_total_score_cross; + max_total_score = max_total_score_cross; copy_chain_assign_data(chain1_num, chain2_num, sequence, - seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, - seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); + seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init, + seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat); } - } + } /* final alignment */ - if (outfmt_opt==0) print_version(); - if (se_opt) MMalign_se_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, - a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); - else MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), - chainID_list1, chainID_list2, - fname_super, fname_lign, fname_matrix, - xa_vec, ya_vec, seqx_vec, seqy_vec, - secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, - xa, ya, seqx, seqy, secx, secy, len_aa, len_na, - chain1_num, chain2_num, TMave_mat, - seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, - d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, - a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); + if (outfmt_opt == 0) + print_version(); + if (se_opt) + MMalign_se_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, + a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); + else + MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()), + chainID_list1, chainID_list2, + fname_super, fname_lign, fname_matrix, + xa_vec, ya_vec, seqx_vec, seqy_vec, + secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec, + xa, ya, seqx, seqy, secx, secy, len_aa, len_na, + chain1_num, chain2_num, TMave_mat, + seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence, + d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt, + a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); /* clean up everything */ - delete [] assign1_list; - delete [] assign2_list; - DeleteArray(&TMave_mat,chain1_num); - DeleteArray(&ut_mat, chain1_num*chain2_num); - vector >().swap(seqxA_mat); - vector >().swap(seqM_mat); - vector >().swap(seqyA_mat); + delete[] assign1_list; + delete[] assign2_list; + DeleteArray(&TMave_mat, chain1_num); + DeleteArray(&ut_mat, chain1_num * chain2_num); + vector>().swap(seqxA_mat); + vector>().swap(seqM_mat); + vector>().swap(seqyA_mat); vector().swap(tmp_str_vec); - delete [] assign1_init; - delete [] assign2_init; - DeleteArray(&TMave_init,chain1_num); - vector >().swap(seqxA_init); - vector >().swap(seqyA_init); - - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - vector().swap(chainID_list1); // list of chainID1 - vector().swap(chainID_list2); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 - vector ().swap(resi_vec1); // residue index for chain1 - vector ().swap(resi_vec2); // residue index for chain2 - map ().swap(chainmap); + delete[] assign1_init; + delete[] assign2_init; + DeleteArray(&TMave_init, chain1_num); + vector>().swap(seqxA_init); + vector>().swap(seqyA_init); + + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + vector().swap(chainID_list1); // list of chainID1 + vector().swap(chainID_list2); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 + vector().swap(resi_vec1); // residue index for chain1 + vector().swap(resi_vec2); // residue index for chain2 + map().swap(chainmap); return 1; } - /* alignment individual chains to a complex. */ -int MMdock(const string &xname, const string &yname, const string &fname_super, - const string &fname_matrix, vector &sequence, const double Lnorm_ass, - const double d0_scale, const bool m_opt, const int o_opt, - const int a_opt, const bool u_opt, const bool d_opt, - const double TMcut, const int infmt1_opt, const int infmt2_opt, - const int ter_opt, const int split_opt, const int outfmt_opt, - bool fast_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const vector &chain1_list, const vector &chain2_list, - const bool do_opt) +int MMdock(const string &xname, const string &yname, const string &fname_super, + const string &fname_matrix, vector &sequence, const double Lnorm_ass, + const double d0_scale, const bool m_opt, const int o_opt, + const int a_opt, const bool u_opt, const bool d_opt, + const double TMcut, const int infmt1_opt, const int infmt2_opt, + const int ter_opt, const int split_opt, const int outfmt_opt, + bool fast_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const vector &chain1_list, const vector &chain2_list, + const bool do_opt) { /* declare previously global variables */ - vector > > xa_vec; // structure of complex1 - vector > > ya_vec; // structure of complex2 - vector >seqx_vec; // sequence of complex1 - vector >seqy_vec; // sequence of complex2 - vector >secx_vec; // secondary structure of complex1 - vector >secy_vec; // secondary structure of complex2 - vector mol_vec1; // molecule type of complex1, RNA if >0 - vector mol_vec2; // molecule type of complex2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - vector xlen_vec; // length of complex1 - vector ylen_vec; // length of complex2 - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int xlen_aa,ylen_aa; // total length of protein - int xlen_na,ylen_na; // total length of RNA/DNA - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 + vector>> xa_vec; // structure of complex1 + vector>> ya_vec; // structure of complex2 + vector> seqx_vec; // sequence of complex1 + vector> seqy_vec; // sequence of complex2 + vector> secx_vec; // secondary structure of complex1 + vector> secy_vec; // secondary structure of complex2 + vector mol_vec1; // molecule type of complex1, RNA if >0 + vector mol_vec2; // molecule type of complex2, RNA if >0 + vector chainID_list1; // list of chainID1 + vector chainID_list2; // list of chainID2 + vector xlen_vec; // length of complex1 + vector ylen_vec; // length of complex2 + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int xlen_aa, ylen_aa; // total length of protein + int xlen_na, ylen_na; // total length of RNA/DNA + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 /* parse complex */ parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1, - xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, - atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, - resi_vec1, chain2parse1, model2parse1); - if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 individual chain"); + xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt, + atom_opt, autojustify, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, + resi_vec1, chain2parse1, model2parse1); + if (xa_vec.size() == 0) + PrintErrorAndQuit("ERROR! 0 individual chain"); parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2, - ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, - atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, resi_vec2, - chain2parse2, model2parse2); - if (xa_vec.size()>ya_vec.size()) PrintErrorAndQuit( - "ERROR! more individual chains to align than number of chains in complex template"); - int len_aa=getmin(xlen_aa,ylen_aa); - int len_na=getmin(xlen_na,ylen_na); + ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt, + atom_opt, autojustify, 0, het_opt, ylen_aa, ylen_na, o_opt, resi_vec2, + chain2parse2, model2parse2); + if (xa_vec.size() > ya_vec.size()) + PrintErrorAndQuit( + "ERROR! more individual chains to align than number of chains in complex template"); + int len_aa = getmin(xlen_aa, ylen_aa); + int len_na = getmin(xlen_na, ylen_na); if (a_opt) { - len_aa=(xlen_aa+ylen_aa)/2; - len_na=(xlen_na+ylen_na)/2; + len_aa = (xlen_aa + ylen_aa) / 2; + len_na = (xlen_na + ylen_na) / 2; } /* perform monomer alignment if there is only one chain */ - if (xa_vec.size()==1 && ya_vec.size()==1) + if (xa_vec.size() == 1 && ya_vec.size() == 1) { xlen = xlen_vec[0]; ylen = ylen_vec[0]; - seqx = new char[xlen+1]; - seqy = new char[ylen+1]; - secx = new char[xlen+1]; - secy = new char[ylen+1]; + seqx = new char[xlen + 1]; + seqy = new char[ylen + 1]; + secx = new char[xlen + 1]; + secy = new char[ylen + 1]; NewArray(&xa, xlen, 3); NewArray(&ya, ylen, 3); - copy_chain_data(xa_vec[0],seqx_vec[0],secx_vec[0], xlen,xa,seqx,secx); - copy_chain_data(ya_vec[0],seqy_vec[0],secy_vec[0], ylen,ya,seqy,secy); - + copy_chain_data(xa_vec[0], seqx_vec[0], secx_vec[0], xlen, xa, seqx, secx); + copy_chain_data(ya_vec[0], seqy_vec[0], secy_vec[0], ylen, ya, seqy, secy); + /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; /* entry function for structure alignment */ TMalign_main(xa, ya, seqx, seqy, secx, secy, - t0, u0, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 0, a_opt, u_opt, d_opt, fast_opt, - mol_vec1[0]+mol_vec2[0],TMcut); + t0, u0, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + 0, a_opt, u_opt, d_opt, fast_opt, + mol_vec1[0] + mol_vec2[0], TMcut, 0); /* print result */ output_results( @@ -1276,191 +1326,199 @@ int MMdock(const string &xname, const string &yname, const string &fname_super, xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out, seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B, - Lnorm_ass, d0_scale, d0a, d0u, (m_opt?fname_matrix:"").c_str(), - (outfmt_opt==2?outfmt_opt:3), ter_opt, true, split_opt, o_opt, fname_super, + Lnorm_ass, d0_scale, d0a, d0u, (m_opt ? fname_matrix : "").c_str(), + (outfmt_opt == 2 ? outfmt_opt : 3), ter_opt, true, split_opt, o_opt, fname_super, 0, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2); - if (outfmt_opt==2) printf("%s%s\t%s%s\t%.4f\n", - xname.substr(dir1_opt.size()).c_str(), chainID_list1[0].c_str(), - yname.substr(dir2_opt.size()).c_str(), chainID_list2[0].c_str(), - sqrt((TM1*TM1+TM2*TM2)/2)); + if (outfmt_opt == 2) + printf("%s%s\t%s%s\t%.4f\n", + xname.substr(dir1_opt.size()).c_str(), chainID_list1[0].c_str(), + yname.substr(dir2_opt.size()).c_str(), chainID_list2[0].c_str(), + sqrt((TM1 * TM1 + TM2 * TM2) / 2)); /* clean up */ seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqx; - delete[]seqy; - delete[]secx; - delete[]secy; - DeleteArray(&xa,xlen); - DeleteArray(&ya,ylen); + delete[] seqx; + delete[] seqy; + delete[] secx; + delete[] secy; + DeleteArray(&xa, xlen); + DeleteArray(&ya, ylen); do_vec.clear(); - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - chainID_list1.clear(); // list of chainID1 - chainID_list2.clear(); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + chainID_list1.clear(); // list of chainID1 + chainID_list2.clear(); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 return 0; } /* declare TM-score tables */ - int chain1_num=xa_vec.size(); - int chain2_num=ya_vec.size(); - vector tmp_str_vec(chain2_num,""); + int chain1_num = xa_vec.size(); + int chain2_num = ya_vec.size(); + vector tmp_str_vec(chain2_num, ""); double **TMave_mat; - NewArray(&TMave_mat,chain1_num,chain2_num); - vector >seqxA_mat(chain1_num,tmp_str_vec); - vector > seqM_mat(chain1_num,tmp_str_vec); - vector >seqyA_mat(chain1_num,tmp_str_vec); + NewArray(&TMave_mat, chain1_num, chain2_num); + vector> seqxA_mat(chain1_num, tmp_str_vec); + vector> seqM_mat(chain1_num, tmp_str_vec); + vector> seqyA_mat(chain1_num, tmp_str_vec); /* trimComplex */ - vector > > ya_trim_vec; // structure of complex2 - vector >seqy_trim_vec; // sequence of complex2 - vector >secy_trim_vec; // secondary structure of complex2 - vector ylen_trim_vec; // length of complex2 - int Lchain_aa_max1=0; - int Lchain_na_max1=0; - for (i=0;i>> ya_trim_vec; // structure of complex2 + vector> seqy_trim_vec; // sequence of complex2 + vector> secy_trim_vec; // secondary structure of complex2 + vector ylen_trim_vec; // length of complex2 + int Lchain_aa_max1 = 0; + int Lchain_na_max1 = 0; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (mol_vec1[i]>0 && xlen>Lchain_na_max1) Lchain_na_max1=xlen; - else if (mol_vec1[i]<=0 && xlen>Lchain_aa_max1) Lchain_aa_max1=xlen; + xlen = xlen_vec[i]; + if (mol_vec1[i] > 0 && xlen > Lchain_na_max1) + Lchain_na_max1 = xlen; + else if (mol_vec1[i] <= 0 && xlen > Lchain_aa_max1) + Lchain_aa_max1 = xlen; } - int trim_chain_count=trimComplex(ya_trim_vec,seqy_trim_vec, - secy_trim_vec,ylen_trim_vec,ya_vec,seqy_vec,secy_vec,ylen_vec, - mol_vec2,Lchain_aa_max1,Lchain_na_max1); - int ylen_trim; // chain length - double **ya_trim; // structure of single chain - char *seqy_trim; // for the protein sequence - char *secy_trim; // for the secondary structure + int trim_chain_count = trimComplex(ya_trim_vec, seqy_trim_vec, + secy_trim_vec, ylen_trim_vec, ya_vec, seqy_vec, secy_vec, ylen_vec, + mol_vec2, Lchain_aa_max1, Lchain_na_max1); + int ylen_trim; // chain length + double **ya_trim; // structure of single chain + char *seqy_trim; // for the protein sequence + char *secy_trim; // for the secondary structure double **xt; /* get all-against-all alignment */ - if (len_aa+len_na>500) fast_opt=true; - for (i=0;i 500) + fast_opt = true; + for (i = 0; i < chain1_num; i++) { - xlen=xlen_vec[i]; - if (xlen<3) + xlen = xlen_vec[i]; + if (xlen < 3) { - for (j=0;j do_vec; - int Lnorm_tmp=len_aa; - if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na; + int Lnorm_tmp = len_aa; + if (mol_vec1[i] + mol_vec2[j] > 0) + Lnorm_tmp = len_na; /* entry function for structure alignment */ - if (trim_chain_count && ylen_trim_vec[j] > >().swap(ya_trim_vec); - vector >().swap(seqy_trim_vec); - vector >().swap(secy_trim_vec); - vector ().swap(ylen_trim_vec); + vector>>().swap(ya_trim_vec); + vector>().swap(seqy_trim_vec); + vector>().swap(secy_trim_vec); + vector().swap(ylen_trim_vec); /* calculate initial chain-chain assignment */ int *assign1_list; // value is index of assigned chain2 int *assign2_list; // value is index of assigned chain1 - assign1_list=new int[chain1_num]; - assign2_list=new int[chain2_num]; + assign1_list = new int[chain1_num]; + assign2_list = new int[chain2_num]; enhanced_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num); + assign2_list, chain1_num, chain2_num); /* final alignment */ - if (outfmt_opt==0) print_version(); + if (outfmt_opt == 0) + print_version(); double **ut_mat; // rotation matrices for all-against-all alignment - NewArray(&ut_mat,chain1_num,4*3); - int ui,uj; - vectorxname_vec; - vectoryname_vec; - vectorTM_vec; - for (i=0;i xname_vec; + vector yname_vec; + vector TM_vec; + for (i = 0; i < chain1_num; i++) { - j=assign1_list[i]; - xname_vec.push_back(xname+chainID_list1[i]); - if (j<0) + j = assign1_list[i]; + xname_vec.push_back(xname + chainID_list1[i]); + if (j < 0) { - cerr<<"Warning! "< do_vec; int c; - for (c=0; c().swap(TM_vec); vector().swap(xname_vec); vector().swap(yname_vec); - delete [] assign1_list; - delete [] assign2_list; - DeleteArray(&TMave_mat,chain1_num); - DeleteArray(&ut_mat, chain1_num); - vector >().swap(seqxA_mat); - vector >().swap(seqM_mat); - vector >().swap(seqyA_mat); + delete[] assign1_list; + delete[] assign2_list; + DeleteArray(&TMave_mat, chain1_num); + DeleteArray(&ut_mat, chain1_num); + vector>().swap(seqxA_mat); + vector>().swap(seqM_mat); + vector>().swap(seqyA_mat); vector().swap(tmp_str_vec); - vector > >().swap(xa_vec); // structure of complex1 - vector > >().swap(ya_vec); // structure of complex2 - vector >().swap(seqx_vec); // sequence of complex1 - vector >().swap(seqy_vec); // sequence of complex2 - vector >().swap(secx_vec); // secondary structure of complex1 - vector >().swap(secy_vec); // secondary structure of complex2 - mol_vec1.clear(); // molecule type of complex1, RNA if >0 - mol_vec2.clear(); // molecule type of complex2, RNA if >0 - vector().swap(chainID_list1); // list of chainID1 - vector().swap(chainID_list2); // list of chainID2 - xlen_vec.clear(); // length of complex1 - ylen_vec.clear(); // length of complex2 + vector>>().swap(xa_vec); // structure of complex1 + vector>>().swap(ya_vec); // structure of complex2 + vector>().swap(seqx_vec); // sequence of complex1 + vector>().swap(seqy_vec); // sequence of complex2 + vector>().swap(secx_vec); // secondary structure of complex1 + vector>().swap(secy_vec); // secondary structure of complex2 + mol_vec1.clear(); // molecule type of complex1, RNA if >0 + mol_vec2.clear(); // molecule type of complex2, RNA if >0 + vector().swap(chainID_list1); // list of chainID1 + vector().swap(chainID_list2); // list of chainID2 + xlen_vec.clear(); // length of complex1 + ylen_vec.clear(); // length of complex2 return 1; } int mTMalign(string &xname, string &yname, const string &fname_super, - const string &fname_matrix, - vector &sequence, double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - bool u_opt, const bool d_opt, const bool full_opt, const double TMcut, - const int infmt_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, bool fast_opt, - const int het_opt, const string &atom_opt, const bool autojustify, - const string &mol_opt, const string &dir_opt, const int byresi_opt, - const vector &chain_list, const vector &chain2parse, - const vector &model2parse, const bool se_opt) + const string &fname_matrix, + vector &sequence, double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + bool u_opt, const bool d_opt, const bool full_opt, const double TMcut, + const int infmt_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, bool fast_opt, + const int het_opt, const string &atom_opt, const bool autojustify, + const string &mol_opt, const string &dir_opt, const int byresi_opt, + const vector &chain_list, const vector &chain2parse, + const vector &model2parse, const bool se_opt) { /* declare previously global variables */ - vector > >a_vec; // atomic structure - vector > >ua_vec; // unchanged atomic structure - vector >seq_vec; // sequence of complex - vector >sec_vec; // secondary structure of complex - vector mol_vec; // molecule type of complex1, RNA if >0 - vector chainID_list; // list of chainID - vector len_vec; // length of complex - int i,j; // chain index - int xlen, ylen; // chain length - double **xa, **ya; // structure of single chain - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int len_aa,len_na; // total length of protein and RNA/DNA - vector resi_vec; // residue index for chain + vector>> a_vec; // atomic structure + vector>> ua_vec; // unchanged atomic structure + vector> seq_vec; // sequence of complex + vector> sec_vec; // secondary structure of complex + vector mol_vec; // molecule type of complex1, RNA if >0 + vector chainID_list; // list of chainID + vector len_vec; // length of complex + int i, j; // chain index + int xlen, ylen; // chain length + double **xa, **ya; // structure of single chain + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int len_aa, len_na; // total length of protein and RNA/DNA + vector resi_vec; // residue index for chain /* parse chain list */ parse_chain_list(chain_list, a_vec, seq_vec, sec_vec, mol_vec, - len_vec, chainID_list, ter_opt, split_opt, mol_opt, infmt_opt, - atom_opt, autojustify, false, het_opt, len_aa, len_na, o_opt, - resi_vec, chain2parse, model2parse); - int chain_num=a_vec.size(); - if (chain_num<=1) PrintErrorAndQuit("ERROR! <2 chains for multiple alignment"); - if (m_opt||o_opt) for (i=0;ixlen) xlen=len_vec[i]; - total_len+=len_vec[i]; - mol_type+=mol_vec[i]; + if (len_vec[i] > xlen) + xlen = len_vec[i]; + total_len += len_vec[i]; + mol_type += mol_vec[i]; } - if (!u_opt) Lnorm_ass=total_len/chain_num; - u_opt=true; - total_len-=xlen; - if (total_len>750) fast_opt=true; + if (!u_opt) + Lnorm_ass = total_len / chain_num; + u_opt = true; + total_len -= xlen; + if (total_len > 750) + fast_opt = true; /* get all-against-all alignment */ double **TMave_mat; - NewArray(&TMave_mat,chain_num,chain_num); - vector tmp_str_vec(chain_num,""); - vector >seqxA_mat(chain_num,tmp_str_vec); - vector >seqyA_mat(chain_num,tmp_str_vec); - for (i=0;i tmp_str_vec(chain_num, ""); + vector> seqxA_mat(chain_num, tmp_str_vec); + vector> seqyA_mat(chain_num, tmp_str_vec); + for (i = 0; i < chain_num; i++) + for (j = 0; j < chain_num; j++) + TMave_mat[i][j] = 0; + for (i = 0; i < chain_num; i++) { - xlen=len_vec[i]; - if (xlen<3) continue; - seqx = new char[xlen+1]; - secx = new char[xlen+1]; + xlen = len_vec[i]; + if (xlen < 3) + continue; + seqx = new char[xlen + 1]; + secx = new char[xlen + 1]; NewArray(&xa, xlen, 3); - copy_chain_data(a_vec[i],seq_vec[i],sec_vec[i],xlen,xa,seqx,secx); - seqxA_mat[i][i]=seqyA_mat[i][i]=(string)(seqx); - for (j=i+1;j do_vec; /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 0, false, u_opt, false, mol_type, outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + 0, false, u_opt, false, mol_type, outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2xname_vec; - for (i=0;iyname_vec; + int repr_idx = 0; + vector xname_vec; + for (i = 0; i < chain_num; i++) + xname_vec.push_back( + chain_list[i].substr(dir_opt.size()) + chainID_list[i]); + vector yname_vec; double *TMave_list; TMave_list = new double[chain_num]; int *assign_list; - assign_list=new int[chain_num]; - vector msa(ylen,""); // row is position along msa; column is sequence + assign_list = new int[chain_num]; + vector msa(ylen, ""); // row is position along msa; column is sequence int compare_num; double TM1_total, TM2_total; - double TM3_total, TM4_total, TM5_total; // for a_opt, u_opt, d_opt + double TM3_total, TM4_total, TM5_total; // for a_opt, u_opt, d_opt double d0_0_total, TM_0_total; double d0A_total, d0B_total, d0u_total, d0a_total; double d0_out_total; double rmsd0_total; - int L_ali_total; // Aligned length in standard_TMscore + int L_ali_total; // Aligned length in standard_TMscore double Liden_total; - double TM_ali_total, rmsd_ali_total; // TMscore and rmsd in standard_TMscore + double TM_ali_total, rmsd_ali_total; // TMscore and rmsd in standard_TMscore int n_ali_total; int n_ali8_total; int xlen_total, ylen_total; - double TM4_total_max=0; - - int max_iter=5-(int)(total_len/200); - if (max_iter<2) max_iter=2; - int iter=0; - vector TM_vec(chain_num,0); - vector d0_vec(chain_num,0); - vector seqID_vec(chain_num,0); - vector > TM_mat(chain_num,TM_vec); - vector > d0_mat(chain_num,d0_vec); - vector > seqID_mat(chain_num,seqID_vec); - for (iter=0; iter TM_vec(chain_num, 0); + vector d0_vec(chain_num, 0); + vector seqID_vec(chain_num, 0); + vector> TM_mat(chain_num, TM_vec); + vector> d0_mat(chain_num, d0_vec); + vector> seqID_mat(chain_num, seqID_vec); + for (iter = 0; iter < max_iter; iter++) { - /* select representative */ - for (j=0; j do_vec; /* entry function for structure alignment */ if (se_opt) { - int *invmap = new int[ylen+1]; - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + int *invmap = new int[ylen + 1]; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 2, a_opt, u_opt, d_opt, mol_type, outfmt_opt, invmap); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + 2, a_opt, u_opt, d_opt, mol_type, outfmt_opt, invmap); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2 msa_ext; // row is position along msa; column is sequence - for (r=0;r msa_ext; // row is position along msa; column is sequence + for (r = 0; r < ylen; r++) + msa[r] = seqy[r]; + // for (r=0;r do_vec; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, - do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - 0, a_opt, u_opt, d_opt, mol_type, 1, invmap); - - int rx=0,ry=0; - ylen_ext=seqxA.size(); - NewArray(&ya_ext, ylen_ext, 3); // structure of single chain - seqy_ext= new char[ylen_ext+1]; // for the protein sequence - secy_ext= new char[ylen_ext+1]; // for the secondary structure - string tmp_gap=""; - for (r=0;r().swap(msa_ext); - vector >().swap(TM_pair_vec); - for (i=0; i>().swap(TM_pair_vec); + for (i = 0; i < chain_num; i++) { - tm_idx=assign_list[i]; - if (tm_idx<0) continue; - seqyA_mat[i][i]=""; - for (r=0 ;r do_vec; se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, - do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - true, a_opt, u_opt, d_opt, mol_type, 1, invmap); + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, + do_vec, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + true, a_opt, u_opt, d_opt, mol_type, 1, invmap); - if (xlen<=ylen) + if (xlen <= ylen) { - xlen_total+=xlen; - ylen_total+=ylen; - TM1_total+=TM1; - TM2_total+=TM2; - d0A_total+=d0A; - d0B_total+=d0B; + xlen_total += xlen; + ylen_total += ylen; + TM1_total += TM1; + TM2_total += TM2; + d0A_total += d0A; + d0B_total += d0B; } else { - xlen_total+=ylen; - ylen_total+=xlen; - TM1_total+=TM2; - TM2_total+=TM1; - d0A_total+=d0B; - d0B_total+=d0A; + xlen_total += ylen; + ylen_total += xlen; + TM1_total += TM2; + TM2_total += TM1; + d0A_total += d0B; + d0B_total += d0A; } - TM_mat[i][j]=TM2; - TM_mat[j][i]=TM1; - d0_mat[i][j]=d0B; - d0_mat[j][i]=d0A; - seqID_mat[i][j]=1.*Liden/xlen; - seqID_mat[j][i]=1.*Liden/ylen; - - TM3_total+=TM3; - TM4_total+=TM4; - TM5_total+=TM5; - d0_0_total+=d0_0; - TM_0_total+=TM_0; - d0u_total+=d0u; - d0_out_total+=d0_out; - rmsd0_total+=rmsd0; - L_ali_total+=L_ali; // Aligned length in standard_TMscore - Liden_total+=Liden; - TM_ali_total+=TM_ali; - rmsd_ali_total+=rmsd_ali; // TMscore and rmsd in standard_TMscore - n_ali_total+=n_ali; - n_ali8_total+=n_ali8; + TM_mat[i][j] = TM2; + TM_mat[j][i] = TM1; + d0_mat[i][j] = d0B; + d0_mat[j][i] = d0A; + seqID_mat[i][j] = 1. * Liden / xlen; + seqID_mat[j][i] = 1. * Liden / ylen; + + TM3_total += TM3; + TM4_total += TM4; + TM5_total += TM5; + d0_0_total += d0_0; + TM_0_total += TM_0; + d0u_total += d0u; + d0_out_total += d0_out; + rmsd0_total += rmsd0; + L_ali_total += L_ali; // Aligned length in standard_TMscore + Liden_total += Liden; + TM_ali_total += TM_ali; + rmsd_ali_total += rmsd_ali; // TMscore and rmsd in standard_TMscore + n_ali_total += n_ali; + n_ali8_total += n_ali8; /* clean up */ - delete[]invmap; + delete[] invmap; seqM.clear(); seqxA.clear(); seqyA.clear(); - delete[]seqy; - delete[]secy; - DeleteArray(&ya,ylen); + delete[] seqy; + delete[] secy; + DeleteArray(&ya, ylen); do_vec.clear(); } - delete[]seqx; - delete[]secx; - DeleteArray(&xa,xlen); + delete[] seqx; + delete[] secx; + DeleteArray(&xa, xlen); } - if (TM4_total<=TM4_total_max) break; - TM4_total_max=TM4_total; + if (TM4_total <= TM4_total_max) + break; + TM4_total_max = TM4_total; } - for (i=0;i"<" << xname_vec[i] << "\tL=" << len_vec[i] + << "\td0=" << setiosflags(ios::fixed) << setprecision(2) << d0_vec[i] + << "\tseqID=" << setiosflags(ios::fixed) << setprecision(3) << seqID_vec[i] + << "\tTM-score=" << setiosflags(ios::fixed) << setprecision(5) << TM_vec[i]; + if (i == repr_idx) + buf << "\t*"; + buf << '\n' + << seqxA_mat[i][i] << endl; } - seqM=buf.str(); - seqM=seqM.substr(0,seqM.size()-1); + seqM = buf.str(); + seqM = seqM.substr(0, seqM.size() - 1); buf.str(string()); - //MergeAlign(seqxA_mat,seqyA_mat,repr_idx,xname_vec,chain_num,seqM); - if (outfmt_opt==0) print_version(); - output_mTMalign_results( xname,yname, "","", - xlen_total, ylen_total, t0, u0, TM1_total, TM2_total, - TM3_total, TM4_total, TM5_total, rmsd0_total, d0_out_total, - seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden_total, - n_ali8_total, L_ali_total, TM_ali_total, rmsd_ali_total, - TM_0_total, d0_0_total, d0A_total, d0B_total, - Lnorm_ass, d0_scale, d0a_total, d0u_total, - "", outfmt_opt, ter_opt, 0, split_opt, false, - "", false, a_opt, u_opt, d_opt, false, - resi_vec, resi_vec ); + // MergeAlign(seqxA_mat,seqyA_mat,repr_idx,xname_vec,chain_num,seqM); + if (outfmt_opt == 0) + print_version(); + output_mTMalign_results(xname, yname, "", "", + xlen_total, ylen_total, t0, u0, TM1_total, TM2_total, + TM3_total, TM4_total, TM5_total, rmsd0_total, d0_out_total, + seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden_total, + n_ali8_total, L_ali_total, TM_ali_total, rmsd_ali_total, + TM_0_total, d0_0_total, d0A_total, d0B_total, + Lnorm_ass, d0_scale, d0a_total, d0u_total, + "", outfmt_opt, ter_opt, 0, split_opt, false, + "", false, a_opt, u_opt, d_opt, false, + resi_vec, resi_vec); if (m_opt || o_opt) { double **ut_mat; // rotation matrices for all-against-all alignment - int ui,uj; + int ui, uj; double t[3], u[3][3]; double rmsd; - NewArray(&ut_mat,chain_num,4*3); - for (i=0;i > >().swap(ua_vec); + vector>>().swap(ua_vec); if (m_opt) { - assign_list[repr_idx]=-1; + assign_list[repr_idx] = -1; output_dock_rotation_matrix(fname_matrix.c_str(), - xname_vec,yname_vec, ut_mat, assign_list); + xname_vec, yname_vec, ut_mat, assign_list); } - //if (o_opt) output_dock(chain_list, ter_opt, split_opt, - //infmt_opt, atom_opt, false, ut_mat, fname_super); - if (o_opt) output_mTMalign_pymol(chain_list, - infmt_opt, ut_mat, fname_super, o_opt); - - DeleteArray(&ut_mat,chain_num); + // if (o_opt) output_dock(chain_list, ter_opt, split_opt, + // infmt_opt, atom_opt, false, ut_mat, fname_super); + if (o_opt) + output_mTMalign_pymol(chain_list, + infmt_opt, ut_mat, fname_super, o_opt); + + DeleteArray(&ut_mat, chain_num); } /* clean up */ vector().swap(msa); vector().swap(tmp_str_vec); - vector >().swap(seqxA_mat); - vector >().swap(seqyA_mat); + vector>().swap(seqxA_mat); + vector>().swap(seqyA_mat); vector().swap(xname_vec); vector().swap(yname_vec); - delete[]TMave_list; - DeleteArray(&TMave_mat,chain_num); - vector > >().swap(a_vec); // structure of complex - vector >().swap(seq_vec); // sequence of complex - vector >().swap(sec_vec); // secondary structure of complex - vector().swap(mol_vec); // molecule type of complex1, RNA if >0 - vector().swap(chainID_list); // list of chainID - vector().swap(len_vec); // length of complex + delete[] TMave_list; + DeleteArray(&TMave_mat, chain_num); + vector>>().swap(a_vec); // structure of complex + vector>().swap(seq_vec); // sequence of complex + vector>().swap(sec_vec); // secondary structure of complex + vector().swap(mol_vec); // molecule type of complex1, RNA if >0 + vector().swap(chainID_list); // list of chainID + vector().swap(len_vec); // length of complex vector().swap(TM_vec); vector().swap(d0_vec); vector().swap(seqID_vec); - vector >().swap(TM_mat); - vector >().swap(d0_mat); - vector >().swap(seqID_mat); + vector>().swap(TM_mat); + vector>().swap(d0_mat); + vector>().swap(seqID_mat); return 1; } /* sequence order independent alignment */ int SOIalign(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int cp_opt, const int mirror_opt, const int het_opt, - const string &atom_opt, const bool autojustify, const string &mol_opt, - const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, - const string &dir2_opt, const vector &chain2parse1, - const vector &chain2parse2, const vector &model2parse1, - const vector &model2parse2, const vector &chain1_list, - const vector &chain2_list, const bool se_opt, - const int closeK_opt, const int mm_opt) + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int cp_opt, const int mirror_opt, const int het_opt, + const string &atom_opt, const bool autojustify, const string &mol_opt, + const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, + const string &dir2_opt, const vector &chain2parse1, + const vector &chain2parse2, const vector &model2parse1, + const vector &model2parse2, const vector &chain1_list, + const vector &chain2_list, const bool se_opt, + const int closeK_opt, const int mm_opt) { /* declare previously global variables */ - vector >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 vector mol_vec1; // molecule type of chain1, RNA if >0 vector mol_vec2; // molecule type of chain2, RNA if >0 vector chainID_list1; // list of chainID1 vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - int **secx_bond; // boundary of secondary structure - int **secy_bond; // boundary of secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - double **xk, **yk; // k closest residues - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=0; // whether to read residue index - if (o_opt) read_resi=2; + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + int **secx_bond; // boundary of secondary structure + int **secy_bond; // boundary of secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + double **xk, **yk; // k closest residues + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = 0; // whether to read residue index + if (o_opt) + read_resi = 2; /* loop over file names */ - for (i=0;i=3) NewArray(&xk, xlen*closeK_opt, 3); + if (closeK_opt >= 3) + NewArray(&xk, xlen * closeK_opt, 3); seqx = new char[xlen + 1]; secx = new char[xlen + 1]; - xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, - resi_vec1, read_resi); - if (mirror_opt) for (r=0;r0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment - if (closeK_opt>=3) getCloseK(xa, xlen, closeK_opt, xk); - if (mm_opt==6) + xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, + resi_vec1, read_resi); + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; + if (mol_vec1[chain_i] > 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment + if (closeK_opt >= 3) + getCloseK(xa, xlen, closeK_opt, xk); + if (mm_opt == 6) { NewArray(&secx_bond, xlen, 2); assign_sec_bond(secx_bond, secx, xlen); } - for (j=(dir_opt.size()>0)*(i+1);j 0) * (i + 1); j < chain2_list.size(); j++) { - if (dirpair_opt.size() && i!=j) continue; + if (dirpair_opt.size() && i != j) + continue; /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) { - cerr<<"Warning! Cannot parse file: "<=3) NewArray(&yk, ylen*closeK_opt, 3); + if (closeK_opt >= 3) + NewArray(&yk, ylen * closeK_opt, 3); seqy = new char[ylen + 1]; secy = new char[ylen + 1]; ylen = read_PDB(PDB_lines2[chain_j], ya, seqy, - resi_vec2, read_resi); - if (mol_vec2[chain_j]>0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); - if (closeK_opt>=3) getCloseK(ya, ylen, closeK_opt, yk); - if (mm_opt==6) + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); + if (closeK_opt >= 3) + getCloseK(ya, ylen, closeK_opt, yk); + if (mm_opt == 6) { NewArray(&secy_bond, ylen, 2); assign_sec_bond(secy_bond, secy, ylen); @@ -2563,111 +2696,117 @@ int SOIalign(string &xname, string &yname, const string &fname_super, /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; - bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt; - int *invmap = new int[ylen+1]; - double *dist_list = new double[ylen+1]; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + int *invmap = new int[ylen + 1]; + double *dist_list = new double[ylen + 1]; /* entry function for structure alignment */ - if (se_opt) + if (se_opt) { - u0[0][0]=u0[1][1]=u0[2][2]=1; - u0[0][1]= u0[0][2]= - u0[1][0]= u0[1][2]= - u0[2][0]= u0[2][1]= - t0[0] =t0[1] =t0[2] =0; + u0[0][0] = u0[1][1] = u0[2][2] = 1; + u0[0][1] = u0[0][2] = + u0[1][0] = u0[1][2] = + u0[2][0] = u0[2][1] = + t0[0] = t0[1] = t0[2] = 0; soi_se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, - mol_vec1[chain_i]+mol_vec2[chain_j], - outfmt_opt, invmap, dist_list, - secx_bond, secy_bond, mm_opt); - if (outfmt_opt>=2) + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], + outfmt_opt, invmap, dist_list, + secx_bond, secy_bond, mm_opt); + if (outfmt_opt >= 2) { - Liden=L_ali=0; - int r1,r2; - for (r2=0;r2=3) DeleteArray(&yk, ylen*closeK_opt); - delete [] seqy; - delete [] secy; + if (closeK_opt >= 3) + DeleteArray(&yk, ylen * closeK_opt); + delete[] seqy; + delete[] secy; resi_vec2.clear(); - if (mm_opt==6) DeleteArray(&secy_bond, ylen); + if (mm_opt == 6) + DeleteArray(&secy_bond, ylen); } // chain_j - if (chain2_list.size()>1) + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j=3) DeleteArray(&xk, xlen*closeK_opt); - delete [] seqx; - delete [] secx; + if (closeK_opt >= 3) + DeleteArray(&xk, xlen * closeK_opt); + delete[] seqx; + delete[] secx; resi_vec1.clear(); - if (mm_opt==6) DeleteArray(&secx_bond, xlen); + if (mm_opt == 6) + DeleteArray(&secx_bond, xlen); } // chain_i xname.clear(); PDB_lines1.clear(); chainID_list1.clear(); mol_vec1.clear(); } // i - if (chain2_list.size()==1) + if (chain2_list.size() == 1) { yname.clear(); - for (chain_j=0;chain_j &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int mirror_opt, const int het_opt, const string &atom_opt, - const bool autojustify, const string &mol_opt, const string &dir_opt, - const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const int byresi_opt, const vector &chain1_list, - const vector &chain2_list, const int hinge_opt) + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int mirror_opt, const int het_opt, const string &atom_opt, + const bool autojustify, const string &mol_opt, const string &dir_opt, + const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const int byresi_opt, const vector &chain1_list, + const vector &chain2_list, const int hinge_opt, const int ss_opt) { /* declare previously global variables */ - vector >PDB_lines1; // text of chain1 - vector >PDB_lines2; // text of chain2 + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 vector mol_vec1; // molecule type of chain1, RNA if >0 vector mol_vec2; // molecule type of chain2, RNA if >0 vector chainID_list1; // list of chainID1 vector chainID_list2; // list of chainID2 - int i,j; // file index - int chain_i,chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum,ychainnum;// number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi=byresi_opt; // whether to read residue index - if (byresi_opt==0 && o_opt) read_resi=2; + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = byresi_opt; // whether to read residue index + if (byresi_opt == 0 && o_opt) + read_resi = 2; /* loop over file names */ - for (i=0;i0) make_sec(seqx,xa, xlen, secx,atom_opt); - else make_sec(xa, xlen, secx); // secondary structure assignment + xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, + resi_vec1, read_resi); + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; + if (mol_vec1[chain_i] > 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment - for (j=(dir_opt.size()>0)*(i+1);j 0) * (i + 1); j < chain2_list.size(); j++) { - if (dirpair_opt.size() && i!=j) continue; + if (dirpair_opt.size() && i != j) + continue; /* parse chain 2 */ - if (PDB_lines2.size()==0) + if (PDB_lines2.size() == 0) { - yname=chain2_list[j]; - ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) { - cerr<<"Warning! Cannot parse file: "<0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else make_sec(ya, ylen, secy); + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); - if (byresi_opt) extract_aln_from_resi(sequence, - seqx,seqy,resi_vec1,resi_vec2,byresi_opt); + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt + double TM3, TM4, TM5; // for a_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; - bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt; - vector >tu_vec; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + vector> tu_vec; vector do_vec; /* entry function for structure alignment */ - int hingeNum=flexalign_main( + int hingeNum = flexalign_main( xa, ya, seqx, seqy, secx, secy, t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, @@ -2849,28 +3002,28 @@ int flexalign(string &xname, string &yname, const string &fname_super, rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],hinge_opt); - - if (hinge_opt && hingeNum<=1 && - n_ali8<0.6*getmin(xlen,ylen)) + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt); + + if (hinge_opt && hingeNum <= 1 && + n_ali8 < 0.6 * getmin(xlen, ylen)) { double t0_h[3], u0_h[3][3]; double TM1_h, TM2_h; double TM3_h, TM4_h, TM5_h; double d0_0_h, TM_0_h; - double d0_out_h=5.0; + double d0_out_h = 5.0; string seqM_h, seqxA_h, seqyA_h; double rmsd0_h = 0.0; int L_ali_h; - double Liden_h=0; + double Liden_h = 0; double TM_ali_h, rmsd_ali_h; - int n_ali_h=0; - int n_ali8_h=0; - vector >tu_vec_h(1,tu_vec[0]); + int n_ali_h = 0; + int n_ali8_h = 0; + vector> tu_vec_h(1, tu_vec[0]); vector do_vec_h; - tu2t_u(tu_vec[0],t0_h,u0_h); + tu2t_u(tu_vec[0], t0_h, u0_h); - int hingeNum_h=flexalign_main( + int hingeNum_h = flexalign_main( xa, ya, seqx, seqy, secx, secy, t0_h, u0_h, tu_vec_h, TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, @@ -2879,55 +3032,59 @@ int flexalign(string &xname, string &yname, const string &fname_super, Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h, xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i]+mol_vec2[chain_j],hinge_opt); - - double TM =(TM1 >TM2 )?TM1 :TM2; - double TM_h=(TM1_h>TM2_h)?TM1_h:TM2_h; - if (TM_h>TM) + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt); + + double TM = (TM1 > TM2) ? TM1 : TM2; + double TM_h = (TM1_h > TM2_h) ? TM1_h : TM2_h; + if (TM_h > TM) { - hingeNum=hingeNum_h; - tu2t_u(tu_vec_h[0],t0,u0); - TM1=TM1_h; - TM2=TM2_h; - TM3=TM3_h; - TM4=TM4_h; - TM5=TM5_h; - d0_0=d0_0_h; - TM_0=TM_0_h; - d0_out=d0_out_h; - seqM=seqM_h; - seqxA=seqxA_h; - seqyA=seqyA_h; - rmsd0=rmsd0_h; - L_ali=L_ali_h; - Liden=Liden_h; - TM_ali=TM_ali_h; - rmsd_ali=rmsd_ali_h; - n_ali=n_ali_h; - n_ali8=n_ali8_h; + hingeNum = hingeNum_h; + tu2t_u(tu_vec_h[0], t0, u0); + TM1 = TM1_h; + TM2 = TM2_h; + TM3 = TM3_h; + TM4 = TM4_h; + TM5 = TM5_h; + d0_0 = d0_0_h; + TM_0 = TM_0_h; + d0_out = d0_out_h; + seqM = seqM_h; + seqxA = seqxA_h; + seqyA = seqyA_h; + rmsd0 = rmsd0_h; + L_ali = L_ali_h; + Liden = Liden_h; + TM_ali = TM_ali_h; + rmsd_ali = rmsd_ali_h; + n_ali = n_ali_h; + n_ali8 = n_ali8_h; + for (int hinge = 0; hinge < tu_vec.size(); hinge++) + tu_vec[hinge].clear(); tu_vec.clear(); - for (int hinge=0;hinge1) + if (chain2_list.size() > 1) { yname.clear(); - for (chain_j=0;chain_j &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int mirror_opt, const int het_opt, const string &atom_opt, + const bool autojustify, const string &mol_opt, const string &dir_opt, + const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const int byresi_opt, const vector &chain1_list, + const vector &chain2_list, const int hinge_opt) +{ + /* declare previously global variables */ + vector> PDB_lines1; // text of chain1 + vector> PDB_lines2; // text of chain2 + vector mol_vec1; // molecule type of chain1, RNA if >0 + vector mol_vec2; // molecule type of chain2, RNA if >0 + vector chainID_list1; // list of chainID1 + vector chainID_list2; // list of chainID2 + int i, j; // file index + int chain_i, chain_j; // chain index + int r; // residue index + int xlen, ylen; // chain length + int xchainnum, ychainnum; // number of chains in a PDB file + char *seqx, *seqy; // for the protein sequence + char *secx, *secy; // for the secondary structure + double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and + // ya[0...ylen-1][0..2], in general, + // ya is regarded as native structure + // --> superpose xa onto ya + vector resi_vec1; // residue index for chain1 + vector resi_vec2; // residue index for chain2 + int read_resi = byresi_opt; // whether to read residue index + if (byresi_opt == 0 && o_opt) + read_resi = 2; + + /* loop over file names */ + for (i = 0; i < chain1_list.size(); i++) + { + /* parse chain 1 */ + xname = chain1_list[i]; + xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, + mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse1, model2parse1); + if (!xchainnum) + { + cerr << "Warning! Cannot parse file: " << xname + << ". Chain number 0." << endl; + continue; + } + for (chain_i = 0; chain_i < xchainnum; chain_i++) + { + xlen = PDB_lines1[chain_i].size(); + if (mol_opt == "RNA") + mol_vec1[chain_i] = 1; + else if (mol_opt == "protein") + mol_vec1[chain_i] = -1; + if (!xlen) + { + cerr << "Warning! Cannot parse file: " << xname + << ". Chain length 0." << endl; + continue; + } + else if (xlen < 3) + { + cerr << "Sequence is too short <3!: " << xname << endl; + continue; + } + NewArray(&xa, xlen, 3); + seqx = new char[xlen + 1]; + secx = new char[xlen + 1]; + xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, + resi_vec1, read_resi); + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; + if (mol_vec1[chain_i] > 0) + make_sec(seqx, xa, xlen, secx, atom_opt); + else + make_sec(xa, xlen, secx); // secondary structure assignment + + for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) + { + if (dirpair_opt.size() && i != j) + continue; + /* parse chain 2 */ + if (PDB_lines2.size() == 0) + { + yname = chain2_list[j]; + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); + if (!ychainnum) + { + cerr << "Warning! Cannot parse file: " << yname + << ". Chain number 0." << endl; + continue; + } + } + for (chain_j = 0; chain_j < ychainnum; chain_j++) + { + ylen = PDB_lines2[chain_j].size(); + if (mol_opt == "RNA") + mol_vec2[chain_j] = 1; + else if (mol_opt == "protein") + mol_vec2[chain_j] = -1; + if (!ylen) + { + cerr << "Warning! Cannot parse file: " << yname + << ". Chain length 0." << endl; + continue; + } + else if (ylen < 3) + { + cerr << "Sequence is too short <3!: " << yname << endl; + continue; + } + NewArray(&ya, ylen, 3); + seqy = new char[ylen + 1]; + secy = new char[ylen + 1]; + ylen = read_PDB(PDB_lines2[chain_j], ya, seqy, + resi_vec2, read_resi); + if (mol_vec2[chain_j] > 0) + make_sec(seqy, ya, ylen, secy, atom_opt); + else + make_sec(ya, ylen, secy); + + if (byresi_opt) + extract_aln_from_resi(sequence, + seqx, seqy, resi_vec1, resi_vec2, byresi_opt); + + /* declare variables to hold the best result among ss_opt true/false */ + double best_t0[3], best_u0[3][3]; + double best_TM1 = -1.0, best_TM2 = -1.0, best_TM3 = -1.0, best_TM4 = -1.0, best_TM5 = -1.0; + double best_d0_0 = 0.0, best_TM_0 = 0.0, best_d0A = 0.0, best_d0B = 0.0, best_d0u = 0.0, best_d0a = 0.0, best_d0_out = 5.0; + string best_seqM, best_seqxA, best_seqyA; + double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; + int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; + vector> best_tu_vec; + vector best_do_vec; + double global_max_TM = -1.0; + + /* loop to test both true and false for ss_opt */ + for (int cur_ss_opt = 0; cur_ss_opt < 2; cur_ss_opt++) + { + /* declare variables specific to this pair and iteration */ + double t0[3], u0[3][3]; + double TM1, TM2, TM3, TM4, TM5; + double d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out = 5.0; + string seqM, seqxA, seqyA; + double rmsd0 = 0.0; + int L_ali; + double Liden = 0; + double TM_ali, rmsd_ali; + int n_ali = 0, n_ali8 = 0; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + vector> tu_vec; + vector do_vec; + + /* entry function for structure alignment */ + int hingeNum = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, cur_ss_opt); + + if (hinge_opt && hingeNum <= 1 && + n_ali8 < 0.6 * getmin(xlen, ylen)) + { + double t0_h[3], u0_h[3][3]; + double TM1_h, TM2_h, TM3_h, TM4_h, TM5_h; + double d0_0_h, TM_0_h; + double d0_out_h = 5.0; + string seqM_h, seqxA_h, seqyA_h; + double rmsd0_h = 0.0, Liden_h = 0, TM_ali_h, rmsd_ali_h; + int L_ali_h, n_ali_h = 0, n_ali8_h = 0; + vector> tu_vec_h(1, tu_vec[0]); + vector do_vec_h; + tu2t_u(tu_vec[0], t0_h, u0_h); + + int hingeNum_h = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + t0_h, u0_h, tu_vec_h, + TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, + d0_0_h, TM_0_h, d0A, d0B, d0u, d0a, d0_out_h, + seqM_h, seqxA_h, seqyA_h, do_vec_h, rmsd0_h, L_ali_h, + Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h, + xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, + a_opt, u_opt, d_opt, force_fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, cur_ss_opt); + + double TM = (TM1 > TM2) ? TM1 : TM2; + double TM_h = (TM1_h > TM2_h) ? TM1_h : TM2_h; + if (TM_h > TM) + { + hingeNum = hingeNum_h; + tu2t_u(tu_vec_h[0], t0, u0); + TM1 = TM1_h; + TM2 = TM2_h; + TM3 = TM3_h; + TM4 = TM4_h; + TM5 = TM5_h; + d0_0 = d0_0_h; + TM_0 = TM_0_h; + d0_out = d0_out_h; + seqM = seqM_h; + seqxA = seqxA_h; + seqyA = seqyA_h; + rmsd0 = rmsd0_h; + L_ali = L_ali_h; + Liden = Liden_h; + TM_ali = TM_ali_h; + rmsd_ali = rmsd_ali_h; + n_ali = n_ali_h; + n_ali8 = n_ali8_h; + + for (int hinge = 0; hinge < tu_vec.size(); hinge++) + tu_vec[hinge].clear(); + tu_vec.clear(); + for (int hinge = 0; hinge < tu_vec_h.size(); hinge++) + tu_vec.push_back(tu_vec_h[hinge]); + do_vec.clear(); + for (int r = 0; r < do_vec_h.size(); r++) + do_vec.push_back(do_vec_h[r]); + } + else + { + tu2t_u(tu_vec[0], t0, u0); + } + do_vec_h.clear(); + } + + /* Compare current run max TM-score with the global best */ + double cur_max_TM = (TM1 > TM2) ? TM1 : TM2; + if (cur_max_TM > global_max_TM) + { + global_max_TM = cur_max_TM; + + /* copy primitive types to best cache */ + for (int k = 0; k < 3; k++) + best_t0[k] = t0[k]; + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + best_u0[k][l] = u0[k][l]; + best_TM1 = TM1; + best_TM2 = TM2; + best_TM3 = TM3; + best_TM4 = TM4; + best_TM5 = TM5; + best_d0_0 = d0_0; + best_TM_0 = TM_0; + best_d0A = d0A; + best_d0B = d0B; + best_d0u = d0u; + best_d0a = d0a; + best_d0_out = d0_out; + best_rmsd0 = rmsd0; + best_Liden = Liden; + best_TM_ali = TM_ali; + best_rmsd_ali = rmsd_ali; + best_L_ali = L_ali; + best_n_ali = n_ali; + best_n_ali8 = n_ali8; + + /* copy complex objects to best cache */ + best_seqM = seqM; + best_seqxA = seqxA; + best_seqyA = seqyA; + + best_tu_vec.clear(); + for (int k = 0; k < tu_vec.size(); k++) + best_tu_vec.push_back(tu_vec[k]); + + best_do_vec.clear(); + for (int k = 0; k < do_vec.size(); k++) + best_do_vec.push_back(do_vec[k]); + } + } /* end of ss_opt loop */ + + /* print result using the best run */ + if (outfmt_opt == 0) + print_version(); + output_flexalign_results( + xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), + chainID_list1[chain_i], chainID_list2[chain_j], + xlen, ylen, best_t0, best_u0, best_tu_vec, best_TM1, best_TM2, best_TM3, best_TM4, best_TM5, + best_rmsd0, best_d0_out, best_seqM.c_str(), + best_seqxA.c_str(), best_seqyA.c_str(), best_Liden, + best_n_ali8, best_L_ali, best_TM_ali, best_rmsd_ali, best_TM_0, best_d0_0, + best_d0A, best_d0B, Lnorm_ass, d0_scale, best_d0a, best_d0u, + (m_opt ? fname_matrix : "").c_str(), + outfmt_opt, ter_opt, false, split_opt, o_opt, + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, + resi_vec1, resi_vec2); + + /* Done! Free memory */ + best_tu_vec.clear(); + best_seqM.clear(); + best_seqxA.clear(); + best_seqyA.clear(); + best_do_vec.clear(); + DeleteArray(&ya, ylen); + delete[] seqy; + delete[] secy; + resi_vec2.clear(); + } + if (chain2_list.size() > 1) + { + yname.clear(); + for (chain_j = 0; chain_j < ychainnum; chain_j++) + PDB_lines2[chain_j].clear(); + PDB_lines2.clear(); + chainID_list2.clear(); + mol_vec2.clear(); + } + } + PDB_lines1[chain_i].clear(); + DeleteArray(&xa, xlen); + delete[] seqx; + delete[] secx; + resi_vec1.clear(); + } + xname.clear(); + PDB_lines1.clear(); + chainID_list1.clear(); + mol_vec1.clear(); + } + if (chain2_list.size() == 1) + { + yname.clear(); + for (chain_j = 0; chain_j < ychainnum; chain_j++) + PDB_lines2[chain_j].clear(); + PDB_lines2.clear(); + resi_vec2.clear(); + chainID_list2.clear(); + mol_vec2.clear(); + } + return 0; +} int main(int argc, char *argv[]) { - if (argc < 2) print_help(); - + if (argc < 2) + print_help(); clock_t t1, t2; t1 = clock(); @@ -2989,67 +3493,67 @@ int main(int argc, char *argv[]) /**********************/ /* get argument */ /**********************/ - string xname = ""; - string yname = ""; - string fname_super = ""; // file name for superposed structure - string fname_lign = ""; // file name for user alignment - string fname_matrix= ""; // file name for output matrix - vector sequence; // get value from alignment file + string xname = ""; + string yname = ""; + string fname_super = ""; // file name for superposed structure + string fname_lign = ""; // file name for user alignment + string fname_matrix = ""; // file name for output matrix + vector sequence; // get value from alignment file double Lnorm_ass, d0_scale; - bool h_opt = false; // print full help message - bool v_opt = false; // print version - bool m_opt = false; // flag for -m, output rotation matrix - int i_opt = 0; // 1 for -i, 3 for -I - int o_opt = 0; // 1 for -o, 2 for -rasmol, 3 for -chimerax - int a_opt = 0; // flag for -a, do not normalized by average length - bool u_opt = false; // flag for -u, normalized by user specified length - bool d_opt = false; // flag for -d, user specified d0 - bool do_opt= false; // flag for -do, output distance of i-th aligned pair - - bool full_opt = false;// do not show chain level alignment - double TMcut =-1; - bool se_opt =false; - int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1 - int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2 - int ter_opt =-1; // default change to 2 (END, or different chainID) - int split_opt =-1; // default change to 2 (split each chains) - int outfmt_opt=0; // set -outfmt to full output - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int cp_opt =0; // do not check circular permutation - int closeK_opt=-1; // number of atoms for SOI initial alignment. - // 5 and 0 for -mm 5 and 6 - int hinge_opt =9; // maximum number of hinge allowed for flexible - int mirror_opt=0; // do not align mirror - int het_opt=0; // do not read HETATM residues - int mm_opt=0; // do not perform MM-align - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir_opt =""; // set -dir to empty - string dirpair_opt=""; // set -dirpair to empty - string dir1_opt =""; // set -dir1 to empty - string dir2_opt =""; // set -dir2 to empty - string chainmapfile=""; // chain mapping between two complexes - int byresi_opt=0; // set -byresi to 0 + bool h_opt = false; // print full help message + bool v_opt = false; // print version + bool m_opt = false; // flag for -m, output rotation matrix + int i_opt = 0; // 1 for -i, 3 for -I + int o_opt = 0; // 1 for -o, 2 for -rasmol, 3 for -chimerax + int a_opt = 0; // flag for -a, do not normalized by average length + bool u_opt = false; // flag for -u, normalized by user specified length + bool d_opt = false; // flag for -d, user specified d0 + bool do_opt = false; // flag for -do, output distance of i-th aligned pair + + bool full_opt = false; // do not show chain level alignment + double TMcut = -1; + bool se_opt = false; + int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 + int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 + int ter_opt = -1; // default change to 2 (END, or different chainID) + int split_opt = -1; // default change to 2 (split each chains) + int outfmt_opt = 0; // set -outfmt to full output + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int cp_opt = 0; // do not check circular permutation + int closeK_opt = -1; // number of atoms for SOI initial alignment. + // 5 and 0 for -mm 5 and 6 + int hinge_opt = 9; // maximum number of hinge allowed for flexible + int mirror_opt = 0; // do not align mirror + int het_opt = 0; // do not read HETATM residues + int mm_opt = 0; // do not perform MM-align + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir_opt = ""; // set -dir to empty + string dirpair_opt = ""; // set -dirpair to empty + string dir1_opt = ""; // set -dir1 to empty + string dir2_opt = ""; // set -dir2 to empty + string chainmapfile = ""; // chain mapping between two complexes + int byresi_opt = 0; // set -byresi to 0 vector chain1_list; // only when -dir1 is set vector chain2_list; // only when -dir2 is set vector chain2parse1; vector chain2parse2; vector model2parse1; vector model2parse2; - vector > chain_pair_list; // only when -dirpair is set + vector> chain_pair_list; // only when -dirpair is set - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( !strcmp(argv[i],"-o") ) + if (!strcmp(argv[i], "-o")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -o"); - if (o_opt==2) - cerr<<"Warning! -rasmol is already set. Ignore -o"<=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -rasmol"); - if (o_opt==1) - cerr<<"Warning! -o is already set. Ignore -rasmol"<=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chimerax"); - if (o_opt==1) - cerr<<"Warning! -o is already set. Ignore -chimerax"<=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -u or -L"); - Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++; - if (Lnorm_ass<=0) PrintErrorAndQuit( - "ERROR! The value for -u or -L should be >0"); + Lnorm_ass = atof(argv[i + 1]); + u_opt = true; + i++; + if (Lnorm_ass <= 0) + PrintErrorAndQuit( + "ERROR! The value for -u or -L should be >0"); } - else if ( !strcmp(argv[i],"-a") ) + else if (!strcmp(argv[i], "-a")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -a"); - if (!strcmp(argv[i + 1], "T")) a_opt=true; - else if (!strcmp(argv[i + 1], "F")) a_opt=false; - else + if (!strcmp(argv[i + 1], "T")) + a_opt = true; + else if (!strcmp(argv[i + 1], "F")) + a_opt = false; + else { - a_opt=atoi(argv[i + 1]); - if (a_opt!=-2 && a_opt!=-1 && a_opt!=1) + a_opt = atoi(argv[i + 1]); + if (a_opt != -2 && a_opt != -1 && a_opt != 1) PrintErrorAndQuit("-a must be -2, -1, 1, T or F"); } i++; } - else if ( !strcmp(argv[i],"-full") ) + else if (!strcmp(argv[i], "-full")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -full"); - if (!strcmp(argv[i + 1], "T")) full_opt=true; - else if (!strcmp(argv[i + 1], "F")) full_opt=false; - else PrintErrorAndQuit("-full must be T or F"); + if (!strcmp(argv[i + 1], "T")) + full_opt = true; + else if (!strcmp(argv[i + 1], "F")) + full_opt = false; + else + PrintErrorAndQuit("-full must be T or F"); i++; } - else if ( !strcmp(argv[i],"-d") ) + else if (!strcmp(argv[i], "-d")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -d"); - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if ( !strcmp(argv[i],"-closeK") ) + else if (!strcmp(argv[i], "-closeK")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -closeK"); - closeK_opt = atoi(argv[i + 1]); i++; + closeK_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-hinge") ) + else if (!strcmp(argv[i], "-hinge")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -hinge"); - hinge_opt = atoi(argv[i + 1]); i++; + hinge_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-v") ) + else if (!strcmp(argv[i], "-v")) { v_opt = true; } - else if ( !strcmp(argv[i],"-do") ) + else if (!strcmp(argv[i], "-do")) { do_opt = true; } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } - else if ( !strcmp(argv[i],"-i") ) + else if (!strcmp(argv[i], "-i")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -i"); - if (i_opt==3) + if (i_opt == 3) PrintErrorAndQuit("ERROR! -i and -I cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 1; i++; + fname_lign = argv[i + 1]; + i_opt = 1; + i++; } - else if (!strcmp(argv[i], "-I") ) + else if (!strcmp(argv[i], "-I")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -I"); - if (i_opt==1) + if (i_opt == 1) PrintErrorAndQuit("ERROR! -I and -i cannot be used together"); - fname_lign = argv[i + 1]; i_opt = 3; i++; + fname_lign = argv[i + 1]; + i_opt = 3; + i++; } - else if (!strcmp(argv[i], "-chainmap") ) + else if (!strcmp(argv[i], "-chainmap")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chainmap"); - chainmapfile = argv[i + 1]; i++; + chainmapfile = argv[i + 1]; + i++; } - else if (!strcmp(argv[i], "-chain1") ) + else if (!strcmp(argv[i], "-chain1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain1"); - split(argv[i+1],chain2parse1,','); + split(argv[i + 1], chain2parse1, ','); i++; } - else if (!strcmp(argv[i], "-chain2") ) + else if (!strcmp(argv[i], "-chain2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain2"); - split(argv[i+1],chain2parse2,','); + split(argv[i + 1], chain2parse2, ','); i++; } - else if (!strcmp(argv[i], "-model1") ) + else if (!strcmp(argv[i], "-model1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model1"); - split(argv[i+1],model2parse1,','); + split(argv[i + 1], model2parse1, ','); i++; } - else if (!strcmp(argv[i], "-model2") ) + else if (!strcmp(argv[i], "-model2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model2"); - split(argv[i+1],model2parse2,','); + split(argv[i + 1], model2parse2, ','); i++; } - else if (!strcmp(argv[i], "-m") ) + else if (!strcmp(argv[i], "-m")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -m"); - fname_matrix = argv[i + 1]; m_opt = true; i++; - }// get filename for rotation matrix + fname_matrix = argv[i + 1]; + m_opt = true; + i++; + } // get filename for rotation matrix else if (!strcmp(argv[i], "-fast")) { fast_opt = true; @@ -3212,370 +3735,446 @@ int main(int argc, char *argv[]) { se_opt = true; } - else if ( !strcmp(argv[i],"-infmt1") ) + else if (!strcmp(argv[i], "-infmt1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -infmt1"); - infmt1_opt=atoi(argv[i + 1]); i++; - if (infmt1_opt<-1 || infmt1_opt>3) + infmt1_opt = atoi(argv[i + 1]); + i++; + if (infmt1_opt < -1 || infmt1_opt > 3) PrintErrorAndQuit("ERROR! -infmt1 can only be -1, 0, 1, 2, or 3"); } - else if ( !strcmp(argv[i],"-infmt2") ) + else if (!strcmp(argv[i], "-infmt2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -infmt2"); - infmt2_opt=atoi(argv[i + 1]); i++; - if (infmt2_opt<-1 || infmt2_opt>3) + infmt2_opt = atoi(argv[i + 1]); + i++; + if (infmt2_opt < -1 || infmt2_opt > 3) PrintErrorAndQuit("ERROR! -infmt2 can only be -1, 0, 1, 2, or 3"); } - else if ( !strcmp(argv[i],"-ter") ) + else if (!strcmp(argv[i], "-ter")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -ter"); - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") ) + else if (!strcmp(argv[i], "-split")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -split"); - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") ) + else if (!strcmp(argv[i], "-atom")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -atom"); - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") ) + else if (!strcmp(argv[i], "-mol")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -mol"); - mol_opt=argv[i + 1]; i++; - if (mol_opt=="prot") mol_opt="protein"; - else if (mol_opt=="DNA") mol_opt="RNA"; - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + mol_opt = argv[i + 1]; + i++; + if (mol_opt == "prot") + mol_opt = "protein"; + else if (mol_opt == "DNA") + mol_opt = "RNA"; + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be one of the " - "following:\nauto, prot (the same as 'protein'), and " - "RNA (the same as 'DNA')."); + "following:\nauto, prot (the same as 'protein'), and " + "RNA (the same as 'DNA')."); } - else if ( !strcmp(argv[i],"-dir") ) + else if (!strcmp(argv[i], "-dir")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dir"); - dir_opt=argv[i + 1]; i++; + dir_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dirpair") ) + else if (!strcmp(argv[i], "-dirpair")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dirpair"); - dirpair_opt=argv[i + 1]; i++; + dirpair_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir1") ) + else if (!strcmp(argv[i], "-dir1")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dir1"); - dir1_opt=argv[i + 1]; i++; + dir1_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir2") ) + else if (!strcmp(argv[i], "-dir2")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -dir2"); - dir2_opt=argv[i + 1]; i++; + dir2_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-suffix") ) + else if (!strcmp(argv[i], "-suffix")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -suffix"); - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-outfmt") ) + else if (!strcmp(argv[i], "-outfmt")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -outfmt"); - outfmt_opt=atoi(argv[i + 1]); i++; + outfmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-TMcut") ) + else if (!strcmp(argv[i], "-TMcut")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -TMcut"); - TMcut=atof(argv[i + 1]); i++; + TMcut = atof(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-byresi") || - !strcmp(argv[i],"-tmscore") || - !strcmp(argv[i],"-TMscore")) + else if (!strcmp(argv[i], "-byresi") || + !strcmp(argv[i], "-tmscore") || + !strcmp(argv[i], "-TMscore")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -byresi"); - byresi_opt=atoi(argv[i + 1]); i++; + byresi_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-seq") ) + else if (!strcmp(argv[i], "-seq")) { - byresi_opt=5; + byresi_opt = 5; } - else if ( !strcmp(argv[i],"-cp") ) + else if (!strcmp(argv[i], "-cp")) { - mm_opt=3; + mm_opt = 3; } - else if ( !strcmp(argv[i],"-mirror") ) + else if (!strcmp(argv[i], "-mirror")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -mirror"); - mirror_opt=atoi(argv[i + 1]); i++; + mirror_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") ) + else if (!strcmp(argv[i], "-het")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -het"); - het_opt=atoi(argv[i + 1]); i++; - if (het_opt!=0 && het_opt!=1 && het_opt!=2) + het_opt = atoi(argv[i + 1]); + i++; + if (het_opt != 0 && het_opt != 1 && het_opt != 2) PrintErrorAndQuit("-het must be 0, 1, or 2"); } - else if ( !strcmp(argv[i],"-mm") ) + else if (!strcmp(argv[i], "-mm")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -mm"); - mm_opt=atoi(argv[i + 1]); i++; + mm_opt = atoi(argv[i + 1]); + i++; } - else if (xname.size() == 0) xname=argv[i]; - else if (yname.size() == 0) yname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (xname.size() == 0) + xname = argv[i]; + else if (yname.size() == 0) + yname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if (xname.size()==0 || (yname.size() && dir_opt.size()) || + if (xname.size() == 0 || (yname.size() && dir_opt.size()) || (yname.size() && dirpair_opt.size()) || - (yname.size()==0 && dir_opt.size()==0 && dirpair_opt.size()==0)) + (yname.size() == 0 && dir_opt.size() == 0 && dirpair_opt.size() == 0)) { - if (h_opt) print_help(h_opt); + if (h_opt) + print_help(h_opt); if (v_opt) { print_version(); exit(EXIT_FAILURE); } - if (xname.size()==0) + if (xname.size() == 0) PrintErrorAndQuit("Please provide input structures"); - else if (yname.size()==0 && dir_opt.size()==0 && dirpair_opt.size()==0 && mm_opt!=4) + else if (yname.size() == 0 && dir_opt.size() == 0 && dirpair_opt.size() == 0 && mm_opt != 4) PrintErrorAndQuit("Please provide structure B"); - else if (yname.size() && dir_opt.size()+dirpair_opt.size()) + else if (yname.size() && dir_opt.size() + dirpair_opt.size()) PrintErrorAndQuit("Please provide only one file name if -dir is set"); } - if (suffix_opt.size() && dir_opt.size()+dirpair_opt.size()+dir1_opt.size()+dir2_opt.size()==0) + if (suffix_opt.size() && dir_opt.size() + dirpair_opt.size() + dir1_opt.size() + dir2_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set"); if ((dir_opt.size() || dirpair_opt.size() || dir1_opt.size() || dir2_opt.size())) { - if (mm_opt!=2 && mm_opt!=4) + if (mm_opt != 2 && mm_opt != 4) { if (o_opt) PrintErrorAndQuit("-o cannot be set with -dir, -dir1 or -dir2"); - if (m_opt && fname_matrix!="-") + if (m_opt && fname_matrix != "-") PrintErrorAndQuit("-m can only be - or unset when using -dir, -dir1 or -dir2"); } - else if ((dir_opt.size() || dirpair_opt.size() )&& (dir1_opt.size() || dir2_opt.size())) + else if ((dir_opt.size() || dirpair_opt.size()) && (dir1_opt.size() || dir2_opt.size())) PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2"); else if (dir_opt.size() && dirpair_opt.size()) PrintErrorAndQuit("-dir cannot be set with -dirpair"); } - if (o_opt && (infmt1_opt!=-1 && infmt1_opt!=0 && infmt1_opt!=3)) + if (o_opt && (infmt1_opt != -1 && infmt1_opt != 0 && infmt1_opt != 3)) PrintErrorAndQuit("-o can only be used with -infmt1 -1, 0 or 3"); - bool autojustify=(atom_opt=="auto" || atom_opt=="PC4'"); // auto re-pad atom name - if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; - if (atom_opt.size()!=4) + bool autojustify = (atom_opt == "auto" || atom_opt == "PC4'"); // auto re-pad atom name + if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; + if (atom_opt.size() != 4) { - cerr<<"ERROR! Atom name must have 4 characters, including space.\n" - "For example, C alpha, C3' and P atoms should be specified by\n" - "-atom \" CA \", -atom \" P \" and -atom \" C3'\", respectively."<=5 || atom_opt.size()==0) return 1; - else if (atom_opt.size()==1) atom_opt=" "+atom_opt+" "; - else if (atom_opt.size()==2) atom_opt=" "+atom_opt+" "; - else if (atom_opt.size()==3) atom_opt=" "+atom_opt; - cerr<<"Change -atom to \""<= 5 || atom_opt.size() == 0) + return 1; + else if (atom_opt.size() == 1) + atom_opt = " " + atom_opt + " "; + else if (atom_opt.size() == 2) + atom_opt = " " + atom_opt + " "; + else if (atom_opt.size() == 3) + atom_opt = " " + atom_opt; + cerr << "Change -atom to \"" << atom_opt << "\"" << endl; } - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (outfmt_opt>=2 && (a_opt || u_opt || d_opt)) + if (outfmt_opt >= 2 && (a_opt || u_opt || d_opt)) PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d"); - if (byresi_opt!=0) + if (byresi_opt != 0) { if (i_opt) PrintErrorAndQuit("-TMscore >=1 cannot be used with -i or -I"); - if (byresi_opt<0 || byresi_opt>7) + if (byresi_opt < 0 || byresi_opt > 7) PrintErrorAndQuit("-TMscore can only be 0 to 7"); - if ((byresi_opt==2 || byresi_opt==3 || byresi_opt==6) && ter_opt>=2) + if ((byresi_opt == 2 || byresi_opt == 3 || byresi_opt == 6) && ter_opt >= 2) PrintErrorAndQuit("-TMscore 2 and 6 must be used with -ter <=1"); } - //if (split_opt==1 && ter_opt!=0) - //PrintErrorAndQuit("-split 1 should be used with -ter 0"); - //else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) - //PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); - if (split_opt<0) - if (byresi_opt==2 || byresi_opt==3) split_opt=0; - else split_opt=2; - else if (split_opt>2) + // if (split_opt==1 && ter_opt!=0) + // PrintErrorAndQuit("-split 1 should be used with -ter 0"); + // else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) + // PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); + if (split_opt < 0) + if (byresi_opt == 2 || byresi_opt == 3) + split_opt = 0; + else + split_opt = 2; + else if (split_opt > 2) PrintErrorAndQuit("-split can only be 0, 1 or 2"); - if (mm_opt==3) + if (mm_opt == 3) { - cp_opt=true; - mm_opt=0; + cp_opt = true; + mm_opt = 0; } if (cp_opt && i_opt) PrintErrorAndQuit("-mm 3 cannot be used with -i or -I"); - if (mirror_opt && het_opt!=1) - cerr<<"WARNING! -mirror was not used with -het 1. " - <<"D amino acids may not be correctly aligned."<=2 && (mm_opt==1 || mm_opt==2)) PrintErrorAndQuit("-mm 1 or 2 must be used with -ter 0 or -ter 1"); - if (mm_opt==4 && (yname.size() || dir2_opt.size())) - cerr<<"WARNING! structure_2 is ignored for -mm 4"<= 2 && (mm_opt == 1 || mm_opt == 2)) + PrintErrorAndQuit("-mm 1 or 2 must be used with -ter 0 or -ter 1"); + if (mm_opt == 4 && (yname.size() || dir2_opt.size())) + cerr << "WARNING! structure_2 is ignored for -mm 4" << endl; + if (dirpair_opt.size() && (mm_opt == 2 || mm_opt == 4)) PrintErrorAndQuit("-mm 2 or 4 cannot be used with -dirpair"); } - else if (full_opt) PrintErrorAndQuit("-full can only be used with -mm"); + else if (full_opt) + PrintErrorAndQuit("-full can only be used with -mm"); - if (o_opt && ter_opt<=1 && split_opt==2) + if (o_opt && ter_opt <= 1 && split_opt == 2) { - if (mm_opt && o_opt==2) cerr<<"WARNING! -mm may generate incorrect" - <<" RasMol output due to limitations in PDB file format. " - <<"When -mm is used, -o is recommended over -rasmol"<=10) + if (mm_opt == 7 && hinge_opt >= 10) PrintErrorAndQuit("ERROR! -hinge must be <10"); - if (chainmapfile.size() && mm_opt!=1) + if (chainmapfile.size() && mm_opt != 1) PrintErrorAndQuit("ERROR! -chainmap must be used with -mm 1"); - /* read initial alignment file from 'align.txt' */ - if (i_opt) read_user_alignment(sequence, fname_lign, i_opt); + if (i_opt) + read_user_alignment(sequence, fname_lign, i_opt); - if (byresi_opt==6 || byresi_opt==7) mm_opt=1; - else if (byresi_opt) i_opt=3; + if (byresi_opt == 6 || byresi_opt == 7) + mm_opt = 1; + else if (byresi_opt) + i_opt = 3; if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt PrintErrorAndQuit("ERROR! Please provide a file name for option -m!"); /* parse file list */ - int i; + int i; if (dirpair_opt.size()) - file2chainpairlist(chain1_list,chain2_list, xname, dirpair_opt, suffix_opt); + file2chainpairlist(chain1_list, chain2_list, xname, dirpair_opt, suffix_opt); else { - if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname); - else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt); + if (dir1_opt.size() + dir_opt.size() == 0) + chain1_list.push_back(xname); + else + file2chainlist(chain1_list, xname, dir_opt + dir1_opt, suffix_opt); if (dir_opt.size()) - for (i=0;i tmp_vec1; vector tmp_vec2; - for (i=0;i().swap(chain1_list); @@ -3585,10 +4184,11 @@ int main(int argc, char *argv[]) vector().swap(model2parse1); vector().swap(model2parse2); vector().swap(sequence); - vector >().swap(chain_pair_list); + vector>().swap(chain_pair_list); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; - if (outfmt_opt<2) printf("#Total CPU time is %5.2f seconds\n", diff); + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; + if (outfmt_opt < 2) + printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } diff --git a/flexalign.h b/flexalign.h index e2f8db0..ec3d0d5 100644 --- a/flexalign.h +++ b/flexalign.h @@ -5,378 +5,397 @@ #include "TMalign.h" -void t_u2tu(double t0[3],double u0[3][3], vector &tu_tmp) +void t_u2tu(double t0[3], double u0[3][3], vector &tu_tmp) { - int i,j,k; - for (i=0;i<3;i++) tu_tmp[i]=t0[i]; - k=3; - for (i=0;i<3;i++) for (j=0;j<3;j++) - { - tu_tmp[k]=u0[i][j]; - k++; - } + int i, j, k; + for (i = 0; i < 3; i++) + tu_tmp[i] = t0[i]; + k = 3; + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + { + tu_tmp[k] = u0[i][j]; + k++; + } } -void tu2t_u(vector tu_tmp, double t0[3],double u0[3][3]) +void tu2t_u(vector tu_tmp, double t0[3], double u0[3][3]) { - int i,j,k; - for (i=0;i<3;i++) t0[i]=tu_tmp[i]; - k=3; - for (i=0;i<3;i++) for (j=0;j<3;j++) - { - u0[i][j]=tu_tmp[k]; - k++; - } + int i, j, k; + for (i = 0; i < 3; i++) + t0[i] = tu_tmp[i]; + k = 3; + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + { + u0[i][j] = tu_tmp[k]; + k++; + } } void aln2invmap(const string &seqxA, const string &seqyA, int *invmap) { - int i,j,r; - int ylen=0; - for (r=0;r >&tu_vec, - double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, - double &d0_0, double &TM_0, - double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, - string &seqM, string &seqxA, string &seqyA, vector&do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const int hinge_opt) + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], vector> &tu_vec, + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + string &seqM, string &seqxA, string &seqyA, vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt) { - vector tu_tmp(12,0); - int round2=tu_vec.size(); - if (round2==0) + vector tu_tmp(12, 0); + int round2 = tu_vec.size(); + if (round2 == 0) { TMalign_main(xa, ya, seqx, seqy, secx, secy, t0, u0, - TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, - d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, - d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type); - - t_u2tu(t0,u0,tu_tmp); + TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, + d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, + d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type, -1, ss_opt); + + t_u2tu(t0, u0, tu_tmp); tu_vec.push_back(tu_tmp); } - - int i,j,r; - int* invmap=new int[ylen+1]; - for (j=0;jTM2_h)?TM1_h:TM2_h; - double TM =(TM1 >TM2 )?TM1 :TM2 ; - if (TM_h>TM) + for (j = 0; j < ylen + 1; j++) + invmap[j] = -1; + TM1 = TM2 = TM3 = TM4 = TM5 = rmsd0 = 0; + seqM = ""; + seqxA = ""; + seqyA = ""; + n_ali = n_ali8 = 0; + se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, d0_0, + TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, + a_opt, u_opt, d_opt, mol_type, 0, invmap, 1); + + double TM_h = (TM1_h > TM2_h) ? TM1_h : TM2_h; + double TM = (TM1 > TM2) ? TM1 : TM2; + if (TM_h > TM) { - TM1=TM1_h; - TM2=TM2_h; - TM3=TM3_h; - TM4=TM4_h; - TM5=TM5_h; - seqM=seqM_h; - seqxA=seqxA_h; - seqyA=seqyA_h; - rmsd0=rmsd0_h; - n_ali=n_ali_h; - n_ali8=n_ali8_h; - for (j=0;j r1toi(xlen_h,0); - vector r2toj(ylen_h,0); + vector r1toi(xlen_h, 0); + vector r2toj(ylen_h, 0); - int r1,r2; - i=j=-1; - r1=r2=0; - for (r=0;r=5) + d0A, d0B, d0u, d0a, d0_out, seqM_h, seqxA_h, seqyA_h, do_vec, + rmsd0_h, L_ali, Liden, TM_ali, rmsd_ali, n_ali_h, n_ali8_h, + xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, + a_opt, u_opt, d_opt, mol_type, 0, invmap_h, hinge + 1); + int new_ali = 0; + for (r = 0; r < seqM_h.size(); r++) + new_ali += (seqM_h[r] == hinge + '1'); + if (n_ali8_h - n_ali8 < 5) + new_ali = 0; + if (new_ali >= 5) { - TM1=TM1_h; - TM2=TM2_h; - TM3=TM3_h; - TM4=TM4_h; - TM5=TM5_h; - seqM=seqM_h; - seqxA=seqxA_h; - seqyA=seqyA_h; - rmsd0=rmsd0_h; - n_ali=n_ali_h; - n_ali8=n_ali8_h; - t_u2tu(t0,u0,tu_tmp); + TM1 = TM1_h; + TM2 = TM2_h; + TM3 = TM3_h; + TM4 = TM4_h; + TM5 = TM5_h; + seqM = seqM_h; + seqxA = seqxA_h; + seqyA = seqyA_h; + rmsd0 = rmsd0_h; + n_ali = n_ali_h; + n_ali8 = n_ali8_h; + t_u2tu(t0, u0, tu_tmp); tu_vec.push_back(tu_tmp); - for (j=0;jhinge="<=0) cout<<"("<hinge="<=0) cout<<"("< seqM_char(ylen,' '); - vector di_vec(ylen,-1); + vector seqM_char(ylen, ' '); + vector di_vec(ylen, -1); double d; - for (hinge=tu_vec.size()-1;hinge>=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - tu2t_u(tu_vec[hinge],t0,u0); + tu2t_u(tu_vec[hinge], t0, u0); do_rotation(xa, xt, xlen, t0, u0); - for (j=0;j=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r 0 && (seqM[r - 1] == hinge + '0' || seqM[r - 1] == ' ')) + continue; + if (r < seqM.size() - 1 && r > 0 && seqM[r - 1] != seqM[r + 1]) continue; - if (r>0 && (seqM[r-1]==hinge+'0' || seqM[r-1]==' ')) continue; - if (r0 && seqM[r-1]!=seqM[r+1]) continue; - if (r>0) seqM[r]=seqM_char[j]=seqM[r-1]; - else seqM[r]=seqM_char[j]=seqM[r+1]; + if (r > 0) + seqM[r] = seqM_char[j] = seqM[r - 1]; + else + seqM[r] = seqM_char[j] = seqM[r + 1]; } } /* smooth out AFP assignment: remove singleton at the end of fragment */ - char left_hinge=' '; - char right_hinge=' '; - for (hinge=tu_vec.size()-1;hinge>=0;hinge--) + char left_hinge = ' '; + char right_hinge = ' '; + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r0 && seqM[r-1]==' ' && r 0 && seqM[r - 1] == ' ' && r < seqM.size() - 1 && seqM[r + 1] == ' ') continue; - - left_hinge=' '; - for (i=r-1;i>=0;i--) + + left_hinge = ' '; + for (i = r - 1; i >= 0; i--) { - if (seqM[i]==' ') continue; - left_hinge=seqM[i]; + if (seqM[i] == ' ') + continue; + left_hinge = seqM[i]; break; } - if (left_hinge==hinge+'0') continue; - - right_hinge=' '; - for (i=r+1;i=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r 0 && (seqM[r - 1] == ' ' || seqM[r - 1] == hinge + '0')) + continue; + if (r < seqM.size() - 2 && r > 0 && seqM[r - 1] != seqM[r + 2]) continue; - if (r>0 && (seqM[r-1]==' ' || seqM[r-1]==hinge+'0')) continue; - if (r0 && seqM[r-1]!=seqM[r+2]) continue; - if (r>0) seqM[r]=seqM_char[j]=seqM[r+1]=seqM_char[j+1]=seqM[r-1]; - else seqM[r]=seqM_char[j]=seqM[r+1]=seqM_char[j+1]=seqM[r+2]; + if (r > 0) + seqM[r] = seqM_char[j] = seqM[r + 1] = seqM_char[j + 1] = seqM[r - 1]; + else + seqM[r] = seqM_char[j] = seqM[r + 1] = seqM_char[j + 1] = seqM[r + 2]; } } /* smooth out AFP assignment: remove disconnected singleton */ - int i1,i2; - for (hinge=tu_vec.size()-1;hinge>=0;hinge--) + int i1, i2; + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - j=-1; - for (r=0;r=0;i--) + if (seqM[r] != hinge + '0') + continue; + + left_hinge = ' '; + for (i = r - 1; i >= 0; i--) { - if (seqM[i]==' ') continue; - left_hinge=seqM[i]; - i1=(r-i); + if (seqM[i] == ' ') + continue; + left_hinge = seqM[i]; + i1 = (r - i); break; } - if (left_hinge==hinge+'0') continue; - - right_hinge=' '; - for (i=r+1;i=0;hinge--) + for (hinge = tu_vec.size() - 1; hinge >= 0; hinge--) { - tu2t_u(tu_vec[hinge],t0,u0); + tu2t_u(tu_vec[hinge], t0, u0); do_rotation(xa, xt, xlen, t0, u0); - for (j=0;j0;hinge--) + TM2 /= xlen; + TM1 /= ylen; + TM3 /= (xlen + ylen) * 0.5; + TM4 /= Lnorm_ass; + TM5 /= ylen; + if (n_ali8) + rmsd0 = sqrt(rmsd0 / n_ali8); + for (hinge = tu_vec.size() - 1; hinge > 0; hinge--) { - int afp_len=0; - for (r=0;r >&tu_vec, double t[3], double u[3][3]) +void output_flexalign_rotation_matrix(const char *fname_matrix, + const vector> &tu_vec, double t[3], double u[3][3]) { stringstream ss; char dest[1000]; - for (int hinge=0;hinge >&tu_vec, - double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2, - const int xlen, const int ylen, const double d0A, const int n_ali8, - const double rmsd, const double TM1, const double Liden) + const string fname_super, const vector> &tu_vec, + double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2, + const int xlen, const int ylen, const double d0A, const int n_ali8, + const double rmsd, const double TM1, const double Liden) { stringstream buf; stringstream buf_all; stringstream buf_atm; stringstream buf_all_atm; stringstream buf_all_atm_lig; - //stringstream buf_pdb; + // stringstream buf_pdb; stringstream buf_tm; string line; - double x[3]; // before transform - double x1[3]; // after transform + double x[3]; // before transform + double x1[3]; // after transform bool after_ter; // true if passed the "TER" line in PDB string asym_id; // chain ID - - map resi2hinge_dict; - int r,i,j; - j=-1; - char hinge_char=0; - int ali_len=strlen(seqM); - for (r=0;r resi2hinge_dict; + int r, i, j; + j = -1; + char hinge_char = 0; + int ali_len = strlen(seqM); + for (r = 0; r < strlen(seqxA); r++) { - if (seqxA[r]=='-') continue; + if (seqxA[r] == '-') + continue; j++; - hinge_char=seqM[r]; - if (hinge_char==' ') + hinge_char = seqM[r]; + if (hinge_char == ' ') { - for (i=1;i=0 && seqM[r-i]!=' ') - hinge_char=seqM[r-i]; - else if (r+i= 0 && seqM[r - i] != ' ') + hinge_char = seqM[r - i]; + else if (r + i < xlen && seqM[r + i] != ' ') + hinge_char = seqM[r + i]; + if (hinge_char != ' ') + break; } } - resi2hinge_dict[resi_vec1[j]]=hinge_char-'0'; + resi2hinge_dict[resi_vec1[j]] = hinge_char - '0'; } - string resi=resi_vec1[0]; - int read_resi=resi.size()-4; - - buf_tm<<"REMARK US-align" - <<"\nREMARK Structure 1:"<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - chain1_sele=chainID1.substr(1); - chain2_sele=chainID2.substr(1); + chain1_sele = chainID1.substr(1); + chain2_sele = chainID2.substr(1); } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i _atom_site; + map _atom_site; int atom_site_pos; vector line_vec; - string atom; // 4-character atom name - string AA; // 3-character residue name - string inscode; // 1-character insertion code + string atom; // 4-character atom name + string AA; // 3-character residue name + string inscode; // 1-character insertion code string model_index; // model index - bool is_mmcif=false; + bool is_mmcif = false; /* used for CONECT record of chain1 */ - int ca_idx1=0; // all CA atoms - int lig_idx1=0; // all atoms - vector idx_vec; + int ca_idx1 = 0; // all CA atoms + int lig_idx1 = 0; // all atoms + vector idx_vec; /* used for CONECT record of chain2 */ - int ca_idx2=0; // all CA atoms - int lig_idx2=0; // all atoms + int ca_idx2 = 0; // all CA atoms + int lig_idx2 = 0; // all atoms /* extract aligned region */ vector resi_aln1; vector resi_aln2; - int i1=-1; - int i2=-1; + int i1 = -1; + int i2 = -1; if (!mm_opt) { - for (i=0;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (is_mmcif==false && line.size()>=54 && - (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (is_mmcif == false && line.size() >= 54 && + (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format { - if (line[16]!='A' && line[16]!=' ') continue; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; - if (read_resi==1) resi=line.substr(22,5); - else resi=line.substr(22,5)+line[21]; - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + if (line[16] != 'A' && line[16] != ' ') + continue; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; + if (read_resi == 1) + resi = line.substr(22, 5); + else + resi = line.substr(22, 5) + line[21]; + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - //buf_pdb<=2) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + << line.substr(11, 9) << " A" << line.substr(22, 8) + << setiosflags(ios::fixed) << setprecision(3) + << setw(8) << x1[0] << setw(8) << x1[1] << setw(8) << x1[2] << '\n'; + if (chain1_sele.size() && line[21] != chain1_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) { - if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) + if (ca_idx1 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=2) + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + resi += line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + else + resi += " "; + if (read_resi >= 2) { if (_atom_site.count("auth_asym_id")) - asym_id=line_vec[_atom_site["auth_asym_id"]]; - else asym_id=line_vec[_atom_site["label_asym_id"]]; - if (asym_id==".") asym_id=" "; - resi+=asym_id[0]; + asym_id = line_vec[_atom_site["auth_asym_id"]]; + else + asym_id = line_vec[_atom_site["label_asym_id"]]; + if (asym_id == ".") + asym_id = " "; + resi += asym_id[0]; } - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - if (_atom_site.count("label_alt_id")==0 || - line_vec[_atom_site["label_alt_id"]]=="." || - line_vec[_atom_site["label_alt_id"]]=="A") + if (_atom_site.count("label_alt_id") == 0 || + line_vec[_atom_site["label_alt_id"]] == "." || + line_vec[_atom_site["label_alt_id"]] == "A") { - atom=line_vec[_atom_site["label_atom_id"]]; - if (atom[0]=='"') atom=atom.substr(1); - if (atom.size() && atom[atom.size()-1]=='"') - atom=atom.substr(0,atom.size()-1); - if (atom.size()==0) atom=" "; - else if (atom.size()==1) atom=" "+atom+" "; - else if (atom.size()==2) atom=" "+atom+" "; - else if (atom.size()==3) atom=" "+atom; - else if (atom.size()>=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; if (_atom_site.count("auth_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain1_sele; - else if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain1_sele; + else if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain1_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain1_sele; - if (ter_opt>=2 && ca_idx1 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain1_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain1_sele; + if (ter_opt >= 2 && ca_idx1 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - //buf_pdb<=1 && line.compare(0,3,"END")==0) break; + // buf_pdb<= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=1;i=3 && line.compare(0,3,"TER")==0) after_ter=true; - if (line.size()>=54 && (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0)) // PDB format + if (ter_opt >= 3 && line.compare(0, 3, "TER") == 0) + after_ter = true; + if (line.size() >= 54 && (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0)) // PDB format { - if (line[16]!='A' && line[16]!=' ') continue; - if (after_ter && line.compare(0,6,"ATOM ")==0) continue; + if (line[16] != 'A' && line[16] != ' ') + continue; + if (after_ter && line.compare(0, 6, "ATOM ") == 0) + continue; lig_idx2++; - buf_all_atm_lig<=2) + buf_all_atm_lig << line.substr(0, 6) << setw(5) << lig_idx1 + lig_idx2 + << line.substr(11, 9) << " B" << line.substr(22, 32) << '\n'; + if (chain1_sele.size() && line[21] != chain1_sele[0]) + continue; + if (after_ter || line.compare(0, 6, "ATOM ")) + continue; + if (ter_opt >= 2) { - if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1)) + if (ca_idx2 && asym_id.size() && asym_id != line.substr(21, 1)) { - after_ter=true; + after_ter = true; continue; } - asym_id=line[21]; + asym_id = line[21]; } - buf_all_atm<<"ATOM "<=5) atom=atom.substr(0,4); - - AA=line_vec[_atom_site["label_comp_id"]]; // residue name - if (AA.size()==1) AA=" "+AA; - else if (AA.size()==2) AA=" " +AA; - else if (AA.size()>=4) AA=AA.substr(0,3); - + atom = line_vec[_atom_site["label_atom_id"]]; + if (atom[0] == '"') + atom = atom.substr(1); + if (atom.size() && atom[atom.size() - 1] == '"') + atom = atom.substr(0, atom.size() - 1); + if (atom.size() == 0) + atom = " "; + else if (atom.size() == 1) + atom = " " + atom + " "; + else if (atom.size() == 2) + atom = " " + atom + " "; + else if (atom.size() == 3) + atom = " " + atom; + else if (atom.size() >= 5) + atom = atom.substr(0, 4); + + AA = line_vec[_atom_site["label_comp_id"]]; // residue name + if (AA.size() == 1) + AA = " " + AA; + else if (AA.size() == 2) + AA = " " + AA; + else if (AA.size() >= 4) + AA = AA.substr(0, 3); + if (_atom_site.count("auth_seq_id")) - resi=line_vec[_atom_site["auth_seq_id"]]; - else resi=line_vec[_atom_site["label_seq_id"]]; - while (resi.size()<4) resi=' '+resi; - if (resi.size()>4) resi=resi.substr(0,4); - - inscode=' '; - if (_atom_site.count("pdbx_PDB_ins_code") && - line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") - inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; - + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + while (resi.size() < 4) + resi = ' ' + resi; + if (resi.size() > 4) + resi = resi.substr(0, 4); + + inscode = ' '; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + inscode = line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + if (_atom_site.count("auth_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["auth_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["auth_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["auth_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["auth_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["auth_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["auth_asym_id"]]; } else if (_atom_site.count("label_asym_id")) { - if (chain2_sele.size()) after_ter - =line_vec[_atom_site["label_asym_id"]]!=chain2_sele; - if (ter_opt>=2 && ca_idx2 && asym_id.size() && - asym_id!=line_vec[_atom_site["label_asym_id"]]) - after_ter=true; - asym_id=line_vec[_atom_site["label_asym_id"]]; + if (chain2_sele.size()) + after_ter = line_vec[_atom_site["label_asym_id"]] != chain2_sele; + if (ter_opt >= 2 && ca_idx2 && asym_id.size() && + asym_id != line_vec[_atom_site["label_asym_id"]]) + after_ter = true; + asym_id = line_vec[_atom_site["label_asym_id"]]; } - if (after_ter==false || - line_vec[_atom_site["group_PDB"]]=="HETATM") + if (after_ter == false || + line_vec[_atom_site["group_PDB"]] == "HETATM") { lig_idx2++; - buf_all_atm_lig<=1 && line.compare(0,3,"END")==0) break; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } fin.close(); - if (!mm_opt) buf<<"TER\n"; - buf_all<<"TER\n"; - if (!mm_opt) buf_atm<<"TER\n"; - buf_all_atm<<"TER\n"; - buf_all_atm_lig<<"TER\n"; - for (i=ca_idx1+1;i >&tu_vec, - double t[3], double u[3][3], const int ter_opt, - const int mm_opt, const int split_opt, const int mirror_opt, - const char *seqM, const char *seqxA, const char *seqyA, - const vector&resi_vec1, const vector&resi_vec2, - const string chainID1, const string chainID2) + const string fname_super, const vector> &tu_vec, + double t[3], double u[3][3], const int ter_opt, + const int mm_opt, const int split_opt, const int mirror_opt, + const char *seqM, const char *seqxA, const char *seqyA, + const vector &resi_vec1, const vector &resi_vec2, + const string chainID1, const string chainID2) { - int compress_type=0; // uncompressed file + int compress_type = 0; // uncompressed file ifstream fin; #ifndef REDI_PSTREAM_H_SEEN ifstream fin_gz; #else redi::ipstream fin_gz; // if file is compressed - if (xname.size()>=3 && - xname.substr(xname.size()-3,3)==".gz") + if (xname.size() >= 3 && + xname.substr(xname.size() - 3, 3) == ".gz") { - fin_gz.open("gunzip -c "+xname); - compress_type=1; + fin_gz.open("gunzip -c " + xname); + compress_type = 1; } - else if (xname.size()>=4 && - xname.substr(xname.size()-4,4)==".bz2") + else if (xname.size() >= 4 && + xname.substr(xname.size() - 4, 4) == ".bz2") { - fin_gz.open("bzcat "+xname); - compress_type=2; + fin_gz.open("bzcat " + xname); + compress_type = 2; } else #endif - fin.open(xname.c_str()); - - map resi2hinge_dict; - int r,i,j; - j=-1; - char hinge_char=0; - int xlen=resi_vec1.size(); - int ali_len=strlen(seqM); - for (r=0;r resi2hinge_dict; + int r, i, j; + j = -1; + char hinge_char = 0; + int xlen = resi_vec1.size(); + int ali_len = strlen(seqM); + for (r = 0; r < strlen(seqxA); r++) { - if (seqxA[r]=='-') continue; + if (seqxA[r] == '-') + continue; j++; - hinge_char=seqM[r]; - if (hinge_char==' ') + hinge_char = seqM[r]; + if (hinge_char == ' ') { - for (i=1;i=0 && seqM[r-i]!=' ') - hinge_char=seqM[r-i]; - else if (r+i= 0 && seqM[r - i] != ' ') + hinge_char = seqM[r - i]; + else if (r + i < xlen && seqM[r + i] != ' ') + hinge_char = seqM[r + i]; + if (hinge_char != ' ') + break; } } - resi2hinge_dict[resi_vec1[j]]=hinge_char-'0'; + resi2hinge_dict[resi_vec1[j]] = hinge_char - '0'; } - string resi=resi_vec1[0]; - int read_resi=resi.size()-4; + string resi = resi_vec1[0]; + int read_resi = resi.size() - 4; stringstream buf; stringstream buf_pymol; @@ -1409,161 +1568,195 @@ void output_flexalign_pymol(const string xname, const string yname, double x1[3]; // after transform /* for PDBx/mmCIF only */ - map _atom_site; + map _atom_site; size_t atom_site_pos; vector line_vec; - int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF - int hinge=0; - string asym_id="."; // this is similar to chainID, except that - // chainID is char while asym_id is a string - // with possibly multiple char - while (compress_type?fin_gz.good():fin.good()) + int infmt = -1; // 0 - PDB, 3 - PDBx/mmCIF + int hinge = 0; + string asym_id = "."; // this is similar to chainID, except that + // chainID is char while asym_id is a string + // with possibly multiple char + while (compress_type ? fin_gz.good() : fin.good()) { - if (compress_type) getline(fin_gz, line); - else getline(fin, line); - if (line.compare(0, 6, "ATOM ")==0 || - line.compare(0, 6, "HETATM")==0) // PDB format + if (compress_type) + getline(fin_gz, line); + else + getline(fin, line); + if (line.compare(0, 6, "ATOM ") == 0 || + line.compare(0, 6, "HETATM") == 0) // PDB format { - infmt=0; - x[0]=atof(line.substr(30,8).c_str()); - x[1]=atof(line.substr(38,8).c_str()); - x[2]=atof(line.substr(46,8).c_str()); - if (mirror_opt) x[2]=-x[2]; - if (read_resi==1) resi=line.substr(22,5); - else resi=line.substr(22,5)+line[21]; - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + infmt = 0; + x[0] = atof(line.substr(30, 8).c_str()); + x[1] = atof(line.substr(38, 8).c_str()); + x[2] = atof(line.substr(46, 8).c_str()); + if (mirror_opt) + x[2] = -x[2]; + if (read_resi == 1) + resi = line.substr(22, 5); + else + resi = line.substr(22, 5) + line[21]; + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - buf<=2) + resi = line_vec[_atom_site["auth_seq_id"]]; + else + resi = line_vec[_atom_site["label_seq_id"]]; + if (_atom_site.count("pdbx_PDB_ins_code") && + line_vec[_atom_site["pdbx_PDB_ins_code"]] != "?") + resi += line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; + else + resi += " "; + if (read_resi >= 2) { if (_atom_site.count("auth_asym_id")) - asym_id=line_vec[_atom_site["auth_asym_id"]]; - else asym_id=line_vec[_atom_site["label_asym_id"]]; - if (asym_id==".") asym_id=" "; - resi+=asym_id[0]; + asym_id = line_vec[_atom_site["auth_asym_id"]]; + else + asym_id = line_vec[_atom_site["label_asym_id"]]; + if (asym_id == ".") + asym_id = " "; + resi += asym_id[0]; } - hinge=0; - if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi]; - tu2t_u(tu_vec[hinge],t,u); + hinge = 0; + if (resi2hinge_dict.count(resi)) + hinge = resi2hinge_dict[resi]; + tu2t_u(tu_vec[hinge], t, u); transform(t, u, x, x1); - for (atom_site_pos=0; atom_site_pos<_atom_site.size(); atom_site_pos++) + for (atom_site_pos = 0; atom_site_pos < _atom_site.size(); atom_site_pos++) { - if (atom_site_pos==_atom_site["Cartn_x"]) - buf<=1 && line.compare(0,3,"END")==0) break; + buf << line << '\n'; + if (ter_opt >= 1 && line.compare(0, 3, "END") == 0) + break; } } - if (compress_type) fin_gz.close(); - else fin.close(); + if (compress_type) + fin_gz.close(); + else + fin.close(); - string fname_super_full=fname_super; - if (infmt==0) fname_super_full+=".pdb"; - else if (infmt==3) fname_super_full+=".cif"; + string fname_super_full = fname_super; + if (infmt == 0) + fname_super_full += ".pdb"; + else if (infmt == 3) + fname_super_full += ".cif"; ofstream fp; fp.open(fname_super_full.c_str()); - fp<=1) // align one chain from model 1 + if (split_opt == 2 && ter_opt >= 1) // align one chain from model 1 { - chain1_sele=" and c. "+chainID1.substr(1); - chain2_sele=" and c. "+chainID2.substr(1); + chain1_sele = " and c. " + chainID1.substr(1); + chain2_sele = " and c. " + chainID2.substr(1); } - else if (split_opt==2 && ter_opt==0) // align one chain from each model + else if (split_opt == 2 && ter_opt == 0) // align one chain from each model { - for (i=1;i pml_list; - pml_list.push_back(fname_super+""); - pml_list.push_back(fname_super+"_atm"); - pml_list.push_back(fname_super+"_all"); - pml_list.push_back(fname_super+"_all_atm"); - pml_list.push_back(fname_super+"_all_atm_lig"); + pml_list.push_back(fname_super + ""); + pml_list.push_back(fname_super + "_atm"); + pml_list.push_back(fname_super + "_all"); + pml_list.push_back(fname_super + "_all_atm"); + pml_list.push_back(fname_super + "_all_atm_lig"); - for (int p=0;p >&tu_vec, const double TM1, const double TM2, - const double TM3, const double TM4, const double TM5, - const double rmsd, const double d0_out, const char *seqM, - const char *seqxA, const char *seqyA, const double Liden, - const int n_ali8, const int L_ali, const double TM_ali, - const double rmsd_ali, const double TM_0, const double d0_0, - const double d0A, const double d0B, const double Lnorm_ass, - const double d0_scale, const double d0a, const double d0u, - const char* fname_matrix, const int outfmt_opt, const int ter_opt, - const int mm_opt, const int split_opt, const int o_opt, - const string fname_super, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const int mirror_opt, - const vector&resi_vec1, const vector&resi_vec2) + const string chainID1, const string chainID2, + const int xlen, const int ylen, double t[3], double u[3][3], + const vector> &tu_vec, const double TM1, const double TM2, + const double TM3, const double TM4, const double TM5, + const double rmsd, const double d0_out, const char *seqM, + const char *seqxA, const char *seqyA, const double Liden, + const int n_ali8, const int L_ali, const double TM_ali, + const double rmsd_ali, const double TM_0, const double d0_0, + const double d0A, const double d0B, const double Lnorm_ass, + const double d0_scale, const double d0a, const double d0u, + const char *fname_matrix, const int outfmt_opt, const int ter_opt, + const int mm_opt, const int split_opt, const int o_opt, + const string fname_super, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const int mirror_opt, + const vector &resi_vec1, const vector &resi_vec2) { - if (outfmt_opt<=0) + if (outfmt_opt <= 0) { printf("\nName of Structure_1: %s%s (to be superimposed onto Structure_2)\n", - xname.c_str(), chainID1.c_str()); + xname.c_str(), chainID1.c_str()); printf("Name of Structure_2: %s%s\n", yname.c_str(), chainID2.c_str()); printf("Length of Structure_1: %d residues\n", xlen); printf("Length of Structure_2: %d residues\n\n", ylen); @@ -1757,70 +1960,72 @@ void output_flexalign_results(const string xname, const string yname, if (i_opt) printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali); - printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); printf("TM-score= %6.5f (normalized by length of Structure_1: L=%d, d0=%.2f)\n", TM2, xlen, d0B); printf("TM-score= %6.5f (normalized by length of Structure_2: L=%d, d0=%.2f)\n", TM1, ylen, d0A); - if (a_opt==1) - printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt == 1) + printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); if (u_opt) printf("TM-score= %6.5f (normalized by user-specified L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u); if (d_opt) printf("TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen); printf("(You should use TM-score normalized by length of the reference structure)\n"); - - //output alignment + + // output alignment printf("\n([0-9] denote different aligned fragment pairs separated by different hinges)\n"); printf("%s\n", seqxA); printf("%s\n", seqM); printf("%s\n", seqyA); } - else if (outfmt_opt==1) + else if (outfmt_opt == 1) { printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - xname.c_str(), chainID1.c_str(), xlen, d0B, Liden/xlen, TM2); + xname.c_str(), chainID1.c_str(), xlen, d0B, Liden / xlen, TM2); printf("%s\n", seqxA); printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", - yname.c_str(), chainID2.c_str(), ylen, d0A, Liden/ylen, TM1); + yname.c_str(), chainID2.c_str(), ylen, d0A, Liden / ylen, TM1); printf("%s\n", seqyA); printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n", - n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); + n_ali8, rmsd, (n_ali8 > 0) ? Liden / n_ali8 : 0); if (i_opt) printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali); - if(a_opt) - printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); + if (a_opt) + printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen + ylen) * 0.5, d0a); - if(u_opt) + if (u_opt) printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u); - if(d_opt) + if (d_opt) printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen); printf("$$$$\n"); } - else if (outfmt_opt==2) + else if (outfmt_opt == 2) { printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", - xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), - TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0, - xlen, ylen, n_ali8); + xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), + TM2, TM1, rmsd, Liden / xlen, Liden / ylen, (n_ali8 > 0) ? Liden / n_ali8 : 0, + xlen, ylen, n_ali8); } cout << endl; - if (strlen(fname_matrix)) output_flexalign_rotation_matrix( + if (strlen(fname_matrix)) + output_flexalign_rotation_matrix( fname_matrix, tu_vec, t, u); - if (o_opt==1) output_flexalign_pymol(xname, yname, fname_super, tu_vec, - t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2); - else if (o_opt==2) + if (o_opt == 1) + output_flexalign_pymol(xname, yname, fname_super, tu_vec, + t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2); + else if (o_opt == 2) output_flexalign_rasmol(xname, yname, fname_super, tu_vec, - t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, - resi_vec1, resi_vec2, chainID1, chainID2, - xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); + t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA, + resi_vec1, resi_vec2, chainID1, chainID2, + xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); } #endif diff --git a/qTMclust.cpp b/qTMclust.cpp index 1bd3e2d..ef7368a 100644 --- a/qTMclust.cpp +++ b/qTMclust.cpp @@ -10,133 +10,136 @@ using namespace std; void print_extra_help() { - cout << -"Additional options:\n" -" -fast Fast but slightly inaccurate final alignment\n" -"\n" -" -atom 4-character atom name used to represent a residue.\n" -" Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n" -" (note the spaces before and after CA).\n" -"\n" -" -mol Molecule type: RNA or protein\n" -" Default is detect molecule type automatically\n" -"\n" -" -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n" -" 0: (default) only align 'ATOM ' residues\n" -" 1: align both 'ATOM ' and 'HETATM' residues\n" -"\n" -" -infmt Input format\n" -" -1: (default) automatically detect PDB or PDBx/mmCIF format\n" -" 0: PDB format\n" -" 1: SPICKER format\n" -" 2: xyz format\n" -" 3: PDBx/mmCIF format\n" -" -chain Chains to parse in structure_2. Use _ for a chain without chain ID.\n" -" Multiple chains can be separated by commas, e.g.,\n" -" USalign -chain1 C,D,E,F 5jdo.pdb -chain2 A,B,C,D 3wtg.pdb -ter 0\n" -"\n" - <0) // RNA + if (mol_type > 0) // RNA { - lb_HwRMSD=0.02*TMcut; - lb_TMfast=0.60*TMcut; + lb_HwRMSD = 0.02 * TMcut; + lb_TMfast = 0.60 * TMcut; } else // protein { - lb_HwRMSD=0.25*TMcut; - lb_TMfast=0.80*TMcut; + lb_HwRMSD = 0.25 * TMcut; + lb_TMfast = 0.80 * TMcut; } } return; } -void read_init_cluster(const string&filename, - map > &init_cluster) +void read_init_cluster(const string &filename, + map> &init_cluster) { ifstream fin; string line; vector line_vec; map tmp_map; - size_t i,j; + size_t i, j; fin.open(filename.c_str()); while (fin.good()) { - getline(fin,line); - split(line,line_vec,'\t'); - for (i=0;i ().swap(tmp_map); - } - for (i=0;i().swap(tmp_map); + } + for (i = 0; i < line_vec.size(); i++) + line_vec[i].clear(); + line_vec.clear(); } fin.close(); vector().swap(line_vec); @@ -144,8 +147,8 @@ void read_init_cluster(const string&filename, int main(int argc, char *argv[]) { - if (argc < 2) print_help(); - + if (argc < 2) + print_help(); clock_t t1, t2; t1 = clock(); @@ -153,67 +156,75 @@ int main(int argc, char *argv[]) /**********************/ /* get argument */ /**********************/ - string xname = ""; - double TMcut = 0.5; + string xname = ""; + double TMcut = 0.5; string fname_clust = ""; // file name for output cluster result - string fname_init = ""; - string fname_lign = ""; // file name for user alignment + string fname_init = ""; + string fname_lign = ""; // file name for user alignment vector sequence; // get value from alignment file double Lnorm_ass, d0_scale; bool h_opt = false; // print full help message - int i_opt = 0; // 3 for -I, stick to user given alignment - int a_opt = 0; // flag for -a, do not normalized by average length - int s_opt = 2; // flag for -s, normalized by longer length + int i_opt = 0; // 3 for -I, stick to user given alignment + int a_opt = 0; // flag for -a, do not normalized by average length + int s_opt = 2; // flag for -s, normalized by longer length bool u_opt = false; // flag for -u, normalized by user specified length bool d_opt = false; // flag for -d, user specified d0 - int infmt_opt =-1; // PDB or PDBx/mmCIF format - int ter_opt =3; // TER, END, or different chainID - int split_opt =0; // do not split chain - bool fast_opt =false; // flags for -fast, fTM-align algorithm - int het_opt =0; // do not read HETATM residues - string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA - string mol_opt ="auto";// auto-detect the molecule type as protein/RNA - string suffix_opt=""; // set -suffix to empty - string dir_opt =""; // set -dir to empty - int byresi_opt=0; // set -byresi to 0 + int infmt_opt = -1; // PDB or PDBx/mmCIF format + int ter_opt = 3; // TER, END, or different chainID + int split_opt = 0; // do not split chain + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int het_opt = 0; // do not read HETATM residues + string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA + string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA + string suffix_opt = ""; // set -suffix to empty + string dir_opt = ""; // set -dir to empty + int byresi_opt = 0; // set -byresi to 0 vector chain_list; vector chain2parse; vector model2parse; - map > init_cluster; + map> init_cluster; - for(int i = 1; i < argc; i++) + for (int i = 1; i < argc; i++) { - if ( (!strcmp(argv[i],"-u")||!strcmp(argv[i],"-L")) && i < (argc-1) ) + if ((!strcmp(argv[i], "-u") || !strcmp(argv[i], "-L")) && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -u has not been implemented yet"); - Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++; + Lnorm_ass = atof(argv[i + 1]); + u_opt = true; + i++; } - else if ( !strcmp(argv[i],"-d") && i < (argc-1) ) + else if (!strcmp(argv[i], "-d") && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -d has not been implemented yet"); - d0_scale = atof(argv[i + 1]); d_opt = true; i++; + d0_scale = atof(argv[i + 1]); + d_opt = true; + i++; } - else if (!strcmp(argv[i], "-I") && i < (argc-1) ) + else if (!strcmp(argv[i], "-I") && i < (argc - 1)) { - fname_lign = argv[i + 1]; i_opt = 3; i++; + fname_lign = argv[i + 1]; + i_opt = 3; + i++; } - else if ( !strcmp(argv[i],"-o") && i < (argc-1) ) + else if (!strcmp(argv[i], "-o") && i < (argc - 1)) { - fname_clust = argv[i + 1]; i++; + fname_clust = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-a") && i < (argc-1)) + else if (!strcmp(argv[i], "-a") && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -a is not used for clustering"); } - else if ( !strcmp(argv[i],"-s") && i < (argc-1) ) + else if (!strcmp(argv[i], "-s") && i < (argc - 1)) { - s_opt=atoi(argv[i + 1]); i++; - if (s_opt<1 || s_opt>6) + s_opt = atoi(argv[i + 1]); + i++; + if (s_opt < 1 || s_opt > 6) PrintErrorAndQuit("-s must be within 1 to 6"); } - else if ( !strcmp(argv[i],"-h") ) + else if (!strcmp(argv[i], "-h")) { h_opt = true; } @@ -221,136 +232,157 @@ int main(int argc, char *argv[]) { fast_opt = true; } - else if ( !strcmp(argv[i],"-infmt") && i < (argc-1) ) + else if (!strcmp(argv[i], "-infmt") && i < (argc - 1)) { - infmt_opt=atoi(argv[i + 1]); i++; + infmt_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-ter") && i < (argc-1) ) + else if (!strcmp(argv[i], "-ter") && i < (argc - 1)) { - ter_opt=atoi(argv[i + 1]); i++; + ter_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-split") && i < (argc-1) ) + else if (!strcmp(argv[i], "-split") && i < (argc - 1)) { - split_opt=atoi(argv[i + 1]); i++; + split_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-atom") && i < (argc-1) ) + else if (!strcmp(argv[i], "-atom") && i < (argc - 1)) { - atom_opt=argv[i + 1]; i++; + atom_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-mol") && i < (argc-1) ) + else if (!strcmp(argv[i], "-mol") && i < (argc - 1)) { - mol_opt=argv[i + 1]; i++; + mol_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-dir") && i < (argc-1) ) + else if (!strcmp(argv[i], "-dir") && i < (argc - 1)) { - dir_opt=argv[i + 1]; i++; + dir_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) ) + else if (!strcmp(argv[i], "-suffix") && i < (argc - 1)) { - suffix_opt=argv[i + 1]; i++; + suffix_opt = argv[i + 1]; + i++; } - else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) ) + else if (!strcmp(argv[i], "-TMcut") && i < (argc - 1)) { - TMcut=atof(argv[i + 1]); i++; - if (TMcut>1 or TMcut<0.45) + TMcut = atof(argv[i + 1]); + i++; + if (TMcut > 1 or TMcut < 0.45) PrintErrorAndQuit("TMcut must be in the range of [0.45,1)"); } - else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) ) + else if (!strcmp(argv[i], "-byresi") && i < (argc - 1)) { PrintErrorAndQuit("Sorry! -byresi has not been implemented yet"); - byresi_opt=atoi(argv[i + 1]); i++; + byresi_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-het") && i < (argc-1) ) + else if (!strcmp(argv[i], "-het") && i < (argc - 1)) { - het_opt=atoi(argv[i + 1]); i++; + het_opt = atoi(argv[i + 1]); + i++; } - else if ( !strcmp(argv[i],"-init") && i < (argc-1) ) + else if (!strcmp(argv[i], "-init") && i < (argc - 1)) { - read_init_cluster(argv[i+1],init_cluster); i++; + read_init_cluster(argv[i + 1], init_cluster); + i++; } - else if (!strcmp(argv[i], "-chain") ) + else if (!strcmp(argv[i], "-chain")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -chain"); - split(argv[i+1],chain2parse,','); + split(argv[i + 1], chain2parse, ','); i++; } - else if (!strcmp(argv[i], "-model") ) + else if (!strcmp(argv[i], "-model")) { - if (i>=(argc-1)) + if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -model"); - split(argv[i+1],model2parse,','); + split(argv[i + 1], model2parse, ','); i++; } - else if (xname.size() == 0) xname=argv[i]; - else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); + else if (xname.size() == 0) + xname = argv[i]; + else + PrintErrorAndQuit(string("ERROR! Undefined option ") + argv[i]); } - if(xname.size()==0) print_help(h_opt); + if (xname.size() == 0) + print_help(h_opt); - if (suffix_opt.size() && dir_opt.size()==0) + if (suffix_opt.size() && dir_opt.size() == 0) PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set"); - if (atom_opt.size()!=4) + if (atom_opt.size() != 4) PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space."); - if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") + if (mol_opt != "auto" && mol_opt != "protein" && mol_opt != "RNA") PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein."); - else if (mol_opt=="protein" && atom_opt=="auto") - atom_opt=" CA "; - else if (mol_opt=="RNA" && atom_opt=="auto") - atom_opt=" C3'"; + else if (mol_opt == "protein" && atom_opt == "auto") + atom_opt = " CA "; + else if (mol_opt == "RNA" && atom_opt == "auto") + atom_opt = " C3'"; - if (u_opt && Lnorm_ass<=0) + if (u_opt && Lnorm_ass <= 0) PrintErrorAndQuit("Wrong value for option -u! It should be >0"); - if (d_opt && d0_scale<=0) + if (d_opt && d0_scale <= 0) PrintErrorAndQuit("Wrong value for option -d! It should be >0"); - if (split_opt==1 && ter_opt!=0) + if (split_opt == 1 && ter_opt != 0) PrintErrorAndQuit("-split 1 should be used with -ter 0"); - else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) + else if (split_opt == 2 && ter_opt != 0 && ter_opt != 1) PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); - if (split_opt<0 || split_opt>2) + if (split_opt < 0 || split_opt > 2) PrintErrorAndQuit("-split can only be 0, 1 or 2"); /* read initial alignment file from 'align.txt' */ - if (i_opt) read_user_alignment(sequence, fname_lign, i_opt); + if (i_opt) + read_user_alignment(sequence, fname_lign, i_opt); - if (byresi_opt) i_opt=3; + if (byresi_opt) + i_opt = 3; /* parse file list */ - if (dir_opt.size()==0) chain_list.push_back(xname); - else file2chainlist(chain_list, xname, dir_opt, suffix_opt); + if (dir_opt.size() == 0) + chain_list.push_back(xname); + else + file2chainlist(chain_list, xname, dir_opt, suffix_opt); /* declare previously global variables */ - vector >PDB_lines; // text of chain - vector mol_vec; // molecule type of chain1, RNA if >0 - vector chainID_list; // list of chainID - size_t xchainnum=0; // number of chains in a PDB file - size_t i,j; // number of residues/chains in a PDB is - // usually quite limited. Yet, the number of - // files can be very large. size_t is safer - // than int for very long list of files - int xlen,ylen; // chain length - double **xa,**ya; // xyz coordinate - vector resi_vec; // residue index for chain, dummy variable - vector >chainLen_list; // vector of (length,index) pair - vector > seq_vec; - vector > sec_vec; - vector > >xyz_vec; + vector> PDB_lines; // text of chain + vector mol_vec; // molecule type of chain1, RNA if >0 + vector chainID_list; // list of chainID + size_t xchainnum = 0; // number of chains in a PDB file + size_t i, j; // number of residues/chains in a PDB is + // usually quite limited. Yet, the number of + // files can be very large. size_t is safer + // than int for very long list of files + int xlen, ylen; // chain length + double **xa, **ya; // xyz coordinate + vector resi_vec; // residue index for chain, dummy variable + vector> chainLen_list; // vector of (length,index) pair + vector> seq_vec; + vector> sec_vec; + vector>> xyz_vec; /* parse files */ string chain_name; - vector seq_tmp; - vector sec_tmp; - vector flt_tmp(3,0); - vector >xyz_tmp; + vector seq_tmp; + vector sec_tmp; + vector flt_tmp(3, 0); + vector> xyz_tmp; int r; // residue index size_t newchainnum; - double ub_HwRMSD=0.90*TMcut+0.10; - double lb_HwRMSD=0.5*TMcut; - double ub_TMfast=0.90*TMcut+0.10; - double lb_TMfast=0.9*TMcut; - if (s_opt==2 || s_opt==4 || s_opt==5) a_opt=-2; // normalized by longer length, i.e. smaller TM - else if (s_opt==1 || s_opt==5) a_opt=-1; // normalized by shorter length, i.e. larger TM - else if (s_opt==3) a_opt= 1; // normalized by average length + double ub_HwRMSD = 0.90 * TMcut + 0.10; + double lb_HwRMSD = 0.5 * TMcut; + double ub_TMfast = 0.90 * TMcut + 0.10; + double lb_TMfast = 0.9 * TMcut; + if (s_opt == 2 || s_opt == 4 || s_opt == 5) + a_opt = -2; // normalized by longer length, i.e. smaller TM + else if (s_opt == 1 || s_opt == 5) + a_opt = -1; // normalized by shorter length, i.e. larger TM + else if (s_opt == 3) + a_opt = 1; // normalized by average length #ifdef TMalign_HwRMSD_h /* These parameters controls HwRMSD filter. iter_opt typically should be @@ -361,50 +393,54 @@ int main(int argc, char *argv[]) * After HwRMSD filter, at least min_repr_num and at most max_repr_num * are used for subsequent TMalign. The actual number of representatives * are decided by xlen */ - const int glocal =0; // global alignment - const int iter_opt =10; - const int min_repr_num=10; - const int max_repr_num=50; + const int glocal = 0; // global alignment + const int iter_opt = 10; + const int min_repr_num = 10; + const int max_repr_num = 50; #endif - for (i=0;i >().swap(PDB_lines); - size_t Nstruct=chainLen_list.size(); + vector>().swap(PDB_lines); + size_t Nstruct = chainLen_list.size(); /* sort by chain length */ - stable_sort(chainLen_list.begin(),chainLen_list.end(), - greater >()); - cout<<"Clustering "<="<>()); + cout << "Clustering " << chainLen_list.size() + << " chains with TM-score cutoff >=" << TMcut << '\n' + << "Longest chain " << chainID_list[chainLen_list[0].second] << '\t' + << chainLen_list[0].first << " residues.\n" + << "Shortest chain " << chainID_list[chainLen_list.back().second] << '\t' + << chainLen_list.back().first << " residues." << endl; /* set the first cluster */ - vector clust_mem_vec(Nstruct,-1); // cluster membership - vector clust_repr_vec; // the same as number of clusters - size_t chain_i=chainLen_list[0].second; + vector clust_mem_vec(Nstruct, -1); // cluster membership + vector clust_repr_vec; // the same as number of clusters + size_t chain_i = chainLen_list[0].second; clust_repr_vec.push_back(chain_i); - clust_mem_vec[chain_i]=0; - map clust_repr_map; + clust_mem_vec[chain_i] = 0; + map clust_repr_map; /* perform alignment */ size_t chain_j; - const double fast_lb=50.; // proteins shorter than fast_lb never use -fast - const double fast_ub=1000.;// proteins longer than fast_ub always use -fast - double Lave; // average protein length for chain_i and chain_j - size_t sizePROT; // number of representatives for current chain - vector index_vec; // index of cluster representatives for the chain - bool found_clust; // whether current chain hit previous cluster - - for (i=1;i index_vec; // index of cluster representatives for the chain + bool found_clust; // whether current chain hit previous cluster + + for (i = 1; i < Nstruct; i++) { - chain_i=chainLen_list[i].second; - xlen=xyz_vec[chain_i].size(); - if (xlen<=5) // TMalign cannot handle L<=5 + chain_i = chainLen_list[i].second; + xlen = xyz_vec[chain_i].size(); + if (xlen <= 5) // TMalign cannot handle L<=5 { - clust_mem_vec[chain_i]=clust_repr_vec.size(); + clust_mem_vec[chain_i] = clust_repr_vec.size(); clust_repr_vec.push_back(clust_repr_vec.size()); continue; } NewArray(&xa, xlen, 3); - for (r=0;r0;j--) - { - chain_j=clust_repr_vec[j-1]; - ylen=xyz_vec[chain_j].size(); - if (mol_vec[chain_i]*mol_vec[chain_j]<0) continue; - else if (s_opt==2 && xlen 0; j--) + { + chain_j = clust_repr_vec[j - 1]; + ylen = xyz_vec[chain_j].size(); + if (mol_vec[chain_i] * mol_vec[chain_j] < 0) + continue; + else if (s_opt == 2 && xlen < TMcut * ylen) + continue; + else if (s_opt == 3 && xlen < (2 * TMcut - 1) * ylen) + continue; + else if (s_opt == 4 && xlen * (2 / TMcut - 1) < ylen) + continue; + else if (s_opt == 5 && xlen < TMcut * TMcut * ylen) + continue; + else if (s_opt == 6 && xlen * xlen < (2 * TMcut * TMcut - 1) * ylen * ylen) + continue; index_vec.push_back(chain_j); } - sizePROT=index_vec.size(); + sizePROT = index_vec.size(); - string key=chainID_list[chain_i]; - cout<<'>'<' << chainID_list[chain_i] << '\t' << xlen << '\t' + << setiosflags(ios::fixed) << setprecision(2) + << 100. * i / Nstruct << "%(#" << i << ")\t" + << "#repr=" << sizePROT << "/" << clust_repr_vec.size() << endl; #ifdef TMalign_HwRMSD_h - vector > HwRMSDscore_list; + vector> HwRMSDscore_list; double TM; - size_t init_count=0; - for (j=0;j=2 && - HwRMSDscore_list.size()>=init_cluster[key].size() && !init_cluster[key].count(value)) + chain_j = index_vec[j]; + string value = chainID_list[chain_j]; + if (init_cluster.count(key) && init_count >= 2 && + HwRMSDscore_list.size() >= init_cluster[key].size() && !init_cluster[key].count(value)) + continue; + ylen = xyz_vec[chain_j].size(); + if (mol_vec[chain_i] * mol_vec[chain_j] < 0) + continue; + else if (s_opt == 2 && xlen < TMcut * ylen) + continue; + else if (s_opt == 3 && xlen < (2 * TMcut - 1) * ylen) + continue; + else if (s_opt == 4 && xlen * (2 / TMcut - 1) < ylen) continue; - ylen=xyz_vec[chain_j].size(); - if (mol_vec[chain_i]*mol_vec[chain_j]<0) continue; - else if (s_opt==2 && xlen "< "<=lb_HwRMSD || Lave<=fast_lb) + TM = TM3; // average length + if (s_opt == 1) + TM = TM2; // shorter length + else if (s_opt == 2) + TM = TM1; // longer length + else if (s_opt == 3) + TM = (TM1 + TM2) / 2; // average TM + else if (s_opt == 4) + TM = 2 / (1 / TM1 + 1 / TM2); // harmonic average + else if (s_opt == 5) + TM = sqrt(TM1 * TM2); // geometric average + else if (s_opt == 6) + TM = sqrt((TM1 * TM1 + TM2 * TM2) / 2); // root mean square + + Lave = sqrt(xlen * ylen); // geometry average because O(L1*L2) + if (TM >= lb_HwRMSD || Lave <= fast_lb) { if (init_cluster.count(key) && init_cluster[key].count(value)) { - HwRMSDscore_list.push_back(make_pair(TM+1,index_vec[j])); + HwRMSDscore_list.push_back(make_pair(TM + 1, index_vec[j])); init_count++; - if (init_count==init_cluster[key].size()) break; + if (init_count == init_cluster[key].size()) + break; } else - HwRMSDscore_list.push_back(make_pair(TM,index_vec[j])); + HwRMSDscore_list.push_back(make_pair(TM, index_vec[j])); } /* clean up after each HwRMSD */ @@ -588,82 +644,93 @@ int main(int argc, char *argv[]) seqxA.clear(); seqyA.clear(); DeleteArray(&ya, ylen); - delete [] invmap; + delete[] invmap; /* if a good hit is guaranteed to be found, stop the loop */ - if (TM>=ub_HwRMSD) break; + if (TM >= ub_HwRMSD) + break; } - stable_sort(HwRMSDscore_list.begin(),HwRMSDscore_list.end(), - greater >()); + stable_sort(HwRMSDscore_list.begin(), HwRMSDscore_list.end(), + greater>()); - int cur_repr_num_cutoff=min_repr_num; - if (xlen<=fast_lb) cur_repr_num_cutoff=max_repr_num; - else if (xlen>fast_lb && xlen=2) cur_repr_num_cutoff=init_count; + int cur_repr_num_cutoff = min_repr_num; + if (xlen <= fast_lb) + cur_repr_num_cutoff = max_repr_num; + else if (xlen > fast_lb && xlen < fast_ub) + cur_repr_num_cutoff += + (fast_ub - xlen) / (fast_ub - fast_lb) * (max_repr_num - min_repr_num); + // if (init_count>=2) cur_repr_num_cutoff=init_count; index_vec.clear(); - for (j=0;jfast_lb && TM=cur_repr_num_cutoff) break; + for (j = 0; j < HwRMSDscore_list.size(); j++) + { + TM = HwRMSDscore_list[j].first; + chain_j = HwRMSDscore_list[j].second; + ylen = xyz_vec[chain_j].size(); + Lave = sqrt(xlen * ylen); // geometry average because O(L1*L2) + if (Lave > fast_lb && TM < TMcut * 0.5 && + index_vec.size() >= cur_repr_num_cutoff) + break; index_vec.push_back(chain_j); - cout<<"#"<=fast_ub); - + Lave = sqrt(xlen * ylen); // geometry average because O(L1*L2) + bool overwrite_fast_opt = (fast_opt == true || Lave >= fast_ub); + /* declare variable specific to this pair of TMalign */ double t0[3], u0[3][3]; double TM1, TM2; - double TM3, TM4, TM5; // for s_opt, u_opt, d_opt + double TM3, TM4, TM5; // for s_opt, u_opt, d_opt double d0_0, TM_0; double d0A, d0B, d0u, d0a; - double d0_out=5.0; - string seqM, seqxA, seqyA;// for output alignment + double d0_out = 5.0; + string seqM, seqxA, seqyA; // for output alignment double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden=0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali=0; - int n_ali8=0; + int L_ali; // Aligned length in standard_TMscore + double Liden = 0; + double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore + int n_ali = 0; + int n_ali8 = 0; vector do_vec; - + /* entry function for structure alignment */ - int status=TMalign_main( + int status = TMalign_main( xa, ya, &seq_vec[chain_i][0], &seq_vec[chain_j][0], &sec_vec[chain_i][0], &sec_vec[chain_j][0], t0, u0, TM1, TM2, TM3, TM4, TM5, @@ -672,42 +739,48 @@ int main(int argc, char *argv[]) rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, overwrite_fast_opt, - mol_vec[chain_i]+mol_vec[chain_j],TMcut); + mol_vec[chain_i] + mol_vec[chain_j], TMcut, 0); - cout<=ub_TMfast || - (TM>=TMcut && (fast_opt || overwrite_fast_opt==false))) + if (TM >= ub_TMfast || + (TM >= TMcut && (fast_opt || overwrite_fast_opt == false))) { - clust_mem_vec[chain_i]=clust_repr_map[chain_j]; + clust_mem_vec[chain_i] = clust_repr_map[chain_j]; DeleteArray(&ya, ylen); - found_clust=true; + found_clust = true; break; } - if (TM=TMcut) + + TM = TM3; // average length + if (s_opt == 1) + TM = TM2; // shorter length + else if (s_opt == 2) + TM = TM1; // longer length + else if (s_opt == 3) + TM = (TM1 + TM2) / 2; // average TM + else if (s_opt == 4) + TM = 2 / (1 / TM1 + 1 / TM2); // harmonic average + else if (s_opt == 5) + TM = sqrt(TM1 * TM2); // geometric average + else if (s_opt == 6) + TM = sqrt((TM1 * TM1 + TM2 * TM2) / 2); // root mean square + cout << "*\t" << chainID_list[chain_j] << '\t' << TM2 << '\t' << TM1 << endl; + if (TM >= TMcut) { - clust_mem_vec[chain_i]=clust_repr_map[chain_j]; - found_clust=true; + clust_mem_vec[chain_i] = clust_repr_map[chain_j]; + found_clust = true; break; } } @@ -746,15 +825,15 @@ int main(int argc, char *argv[]) if (!found_clust) // new cluster { - clust_mem_vec[chain_i]=clust_repr_vec.size(); - clust_repr_map[chain_i]=clust_repr_vec.size(); + clust_mem_vec[chain_i] = clust_repr_vec.size(); + clust_repr_map[chain_i] = clust_repr_vec.size(); clust_repr_vec.push_back(chain_i); } else // member structures are not used further { - vector ().swap(seq_vec[chain_i]); - vector ().swap(sec_vec[chain_i]); - vector > ().swap(xyz_vec[chain_i]); + vector().swap(seq_vec[chain_i]); + vector().swap(sec_vec[chain_i]); + vector>().swap(xyz_vec[chain_i]); } } @@ -766,24 +845,25 @@ int main(int argc, char *argv[]) /* print out cluster */ stringstream txt; - for (j=0;j().swap(chain2parse); vector().swap(model2parse); - map >().swap(init_cluster); + map>().swap(init_cluster); t2 = clock(); - float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; + float diff = ((float)t2 - (float)t1) / CLOCKS_PER_SEC; printf("#Total CPU time is %5.2f seconds\n", diff); return 0; } From f5edc01984fa9cde866c98488b58b382d208c549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Tue, 21 Apr 2026 10:15:56 +0800 Subject: [PATCH 02/23] -mm 7/8/9 allow -u/-L --- USalign.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index cfb1bd2..0891caf 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2989,7 +2989,7 @@ int flexalign(string &xname, string &yname, const string &fname_super, double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore int n_ali = 0; int n_ali8 = 0; - bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + bool force_fast_opt = (getmin(xlen, ylen) > 2000) ? true : fast_opt; vector> tu_vec; vector do_vec; @@ -3996,7 +3996,7 @@ int main(int argc, char *argv[]) { if (i_opt) PrintErrorAndQuit("-mm cannot be used with -i or -I"); - if (u_opt) + if (u_opt && mm_opt < 7) PrintErrorAndQuit("-mm cannot be used with -u or -L"); // if (cp_opt) PrintErrorAndQuit("-mm cannot be used with -cp"); if (dir_opt.size() && (mm_opt == 1 || mm_opt == 2)) From d5f7abc1c35106db367aa96d62e8b2837b039685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Wed, 29 Apr 2026 09:02:19 +0800 Subject: [PATCH 03/23] flex_bisection init --- USalign.cpp | 321 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) diff --git a/USalign.cpp b/USalign.cpp index 0891caf..385a0bf 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -3482,6 +3482,319 @@ int flexalign_best(string &xname, string &yname, const string &fname_super, return 0; } +// Needleman-Wunsch 序列比对并补齐 Gap 生成全映射 +void get_full_mapping(const string& seq1, const string& seq2, vector& map1, vector& map2) { + int n = seq1.length(); + int m = seq2.length(); + int match = 2, mismatch = -1, gap = -1; + vector> score(n + 1, vector(m + 1, 0)); + + for (int i = 0; i <= n; i++) score[i][0] = gap * i; + for (int j = 0; j <= m; j++) score[0][j] = gap * j; + + for (int i = 1; i <= n; i++) { + for (int j = 1; j <= m; j++) { + int diag = score[i-1][j-1] + (seq1[i-1] == seq2[j-1] ? match : mismatch); + int up = score[i-1][j] + gap; + int left = score[i][j-1] + gap; + score[i][j] = max({diag, up, left}); + } + } + + map raw_map1, raw_map2; + int i = n, j = m; + while (i > 0 && j > 0) { + int current = score[i][j]; + int diag = score[i-1][j-1] + (seq1[i-1] == seq2[j-1] ? match : mismatch); + int left = score[i][j-1] + gap; + + if (current == diag) { + raw_map1[i-1] = j-1; + raw_map2[j-1] = i-1; + i--; j--; + } else if (current == left) { + j--; + } else { + i--; + } + } + + // 填补 Gap:向左右寻找最近的已比对位点 + map1.assign(n, 0); + for (int i = 0; i < n; i++) { + if (raw_map1.count(i)) map1[i] = raw_map1[i]; + else { + int left = i - 1, right = i + 1; + while (left >= 0 && !raw_map1.count(left)) left--; + while (right < n && !raw_map1.count(right)) right++; + if (left >= 0 && right < n) map1[i] = (i - left) <= (right - i) ? raw_map1[left] : raw_map1[right]; + else if (left >= 0) map1[i] = raw_map1[left]; + else if (right < n) map1[i] = raw_map1[right]; + } + } + + map2.assign(m, 0); + for (int i = 0; i < m; i++) { + if (raw_map2.count(i)) map2[i] = raw_map2[i]; + else { + int left = i - 1, right = i + 1; + while (left >= 0 && !raw_map2.count(left)) left--; + while (right < m && !raw_map2.count(right)) right++; + if (left >= 0 && right < m) map2[i] = (i - left) <= (right - i) ? raw_map2[left] : raw_map2[right]; + else if (left >= 0) map2[i] = raw_map2[left]; + else if (right < m) map2[i] = raw_map2[right]; + } + } +} + +// 缓存单次切片的最佳结果结构体 +struct BisectRes { + int start1, end1, start2, end2; + double TM_u, avg_TM; + string seqxA, seqyA, seqM; + vector> tu_vec; + int L_ali; + double Liden, TM_ali, rmsd_ali; +}; + +void recursive_bisection( + double **xa_full, double **ya_full, const string& seqx_full, const string& seqy_full, + const string& secx_full, const string& secy_full, + int start1, int end1, int start2, int end2, + const vector& map1, const vector& map2, + double Lnorm_ass, double tm_threshold, int min_length, + int mol_type, int hinge_opt, int i_opt, int a_opt, bool u_opt, bool d_opt, + double d0_scale, bool fast_opt, vector& sequence, + vector& results +) { + int len1 = end1 - start1 + 1; + int len2 = end2 - start2 + 1; + int shorter_len = min(len1, len2); + + // 1. 内存切片构造 + double **xa, **ya; + char *seqx = new char[len1 + 1]; + char *secx = new char[len1 + 1]; + char *seqy = new char[len2 + 1]; + char *secy = new char[len2 + 1]; + NewArray(&xa, len1, 3); + NewArray(&ya, len2, 3); + + for (int i = 0; i < len1; i++) { + xa[i][0] = xa_full[start1 + i][0]; xa[i][1] = xa_full[start1 + i][1]; xa[i][2] = xa_full[start1 + i][2]; + seqx[i] = seqx_full[start1 + i]; secx[i] = secx_full[start1 + i]; + } + for (int i = 0; i < len2; i++) { + ya[i][0] = ya_full[start2 + i][0]; ya[i][1] = ya_full[start2 + i][1]; ya[i][2] = ya_full[start2 + i][2]; + seqy[i] = seqy_full[start2 + i]; secy[i] = secy_full[start2 + i]; + } + seqx[len1] = '\0'; secx[len1] = '\0'; + seqy[len2] = '\0'; secy[len2] = '\0'; + + // 2. 调用 flexalign_best 的核心评估逻辑 (ss_opt = 0 and 1) + double global_max_TM = -1.0; + BisectRes best_res; + best_res.start1 = start1; best_res.end1 = end1; + best_res.start2 = start2; best_res.end2 = end2; + + for (int cur_ss_opt = 0; cur_ss_opt < 2; cur_ss_opt++) { + double t0[3], u0[3][3]; + double TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out = 5.0; + string seqM, seqxA, seqyA; + double rmsd0 = 0.0, Liden = 0, TM_ali, rmsd_ali; + int L_ali, n_ali = 0, n_ali8 = 0; + vector> tu_vec; + vector do_vec; + + bool force_fast_opt = (min(len1, len2) > 1500) ? true : fast_opt; + + flexalign_main( + xa, ya, seqx, seqy, secx, secy, + t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, + d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, + seqM, seqxA, seqyA, do_vec, + rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, + len1, len2, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_type, hinge_opt, cur_ss_opt + ); + + double cur_avg_TM = (TM1 + TM2) / 2.0; + if (cur_avg_TM > global_max_TM) { + global_max_TM = cur_avg_TM; + best_res.avg_TM = cur_avg_TM; + best_res.TM_u = TM4; // TM4 承载基于 Lnorm_ass (user-specified) 的归一化分数 + best_res.seqxA = seqxA; best_res.seqyA = seqyA; best_res.seqM = seqM; + best_res.L_ali = L_ali; best_res.Liden = Liden; + best_res.TM_ali = TM_ali; best_res.rmsd_ali = rmsd_ali; + + best_res.tu_vec.clear(); + for(auto& t : tu_vec) best_res.tu_vec.push_back(t); + } + } + + // 清理当前切片内存 + delete[] seqx; delete[] secx; delete[] seqy; delete[] secy; + DeleteArray(&xa, len1); DeleteArray(&ya, len2); + + // 3. 递归终止条件 + if (best_res.avg_TM >= tm_threshold || shorter_len < min_length) { + results.push_back(best_res); + return; + } + + // 4. 计算中点并二分 + int mid1, mid2; + if (len1 <= len2) { + mid1 = start1 + len1 / 2 - 1; + mid2 = map1[mid1]; + mid2 = max(start2, min(mid2, end2 - 1)); + } else { + mid2 = start2 + len2 / 2 - 1; + mid1 = map2[mid2]; + mid1 = max(start1, min(mid1, end1 - 1)); + } + + recursive_bisection(xa_full, ya_full, seqx_full, seqy_full, secx_full, secy_full, + start1, mid1, start2, mid2, map1, map2, Lnorm_ass, tm_threshold, min_length, + mol_type, hinge_opt, i_opt, a_opt, u_opt, d_opt, d0_scale, fast_opt, sequence, results); + + recursive_bisection(xa_full, ya_full, seqx_full, seqy_full, secx_full, secy_full, + mid1 + 1, end1, mid2 + 1, end2, map1, map2, Lnorm_ass, tm_threshold, min_length, + mol_type, hinge_opt, i_opt, a_opt, u_opt, d_opt, d0_scale, fast_opt, sequence, results); +} + +int flexalign_bisection(string &xname, string &yname, const string &fname_super, + const string &fname_lign, const string &fname_matrix, + vector &sequence, double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int mirror_opt, const int het_opt, const string &atom_opt, + const bool autojustify, const string &mol_opt, const string &dir_opt, + const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const int byresi_opt, const vector &chain1_list, + const vector &chain2_list, const int hinge_opt, + double tm_threshold = 0.6, int min_length = 50) +{ + vector> PDB_lines1, PDB_lines2; + vector mol_vec1, mol_vec2; + vector chainID_list1, chainID_list2; + int read_resi = byresi_opt; + if (byresi_opt == 0 && o_opt) read_resi = 2; + + for (int i = 0; i < chain1_list.size(); i++) { + xname = chain1_list[i]; + int xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, split_opt, het_opt, chain2parse1, model2parse1); + if (!xchainnum) continue; + + for (int chain_i = 0; chain_i < xchainnum; chain_i++) { + int xlen = PDB_lines1[chain_i].size(); + if (mol_opt == "RNA") mol_vec1[chain_i] = 1; else if (mol_opt == "protein") mol_vec1[chain_i] = -1; + if (xlen < 3) continue; + + double **xa; NewArray(&xa, xlen, 3); + char *seqx = new char[xlen + 1]; char *secx = new char[xlen + 1]; + vector resi_vec1; + read_PDB(PDB_lines1[chain_i], xa, seqx, resi_vec1, read_resi); + if (mirror_opt) for (int r = 0; r < xlen; r++) xa[r][2] = -xa[r][2]; + (mol_vec1[chain_i] > 0) ? make_sec(seqx, xa, xlen, secx, atom_opt) : make_sec(xa, xlen, secx); + + for (int j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) { + if (dirpair_opt.size() && i != j) continue; + if (PDB_lines2.size() == 0) { + yname = chain2_list[j]; + int ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, split_opt, het_opt, chain2parse2, model2parse2); + if (!ychainnum) continue; + } + + for (int chain_j = 0; chain_j < PDB_lines2.size(); chain_j++) { + int ylen = PDB_lines2[chain_j].size(); + if (mol_opt == "RNA") mol_vec2[chain_j] = 1; else if (mol_opt == "protein") mol_vec2[chain_j] = -1; + if (ylen < 3) continue; + + double **ya; NewArray(&ya, ylen, 3); + char *seqy = new char[ylen + 1]; char *secy = new char[ylen + 1]; + vector resi_vec2; + read_PDB(PDB_lines2[chain_j], ya, seqy, resi_vec2, read_resi); + (mol_vec2[chain_j] > 0) ? make_sec(seqy, ya, ylen, secy, atom_opt) : make_sec(ya, ylen, secy); + + // ======================================= + // === Bisection 专属逻辑与数据流合并 === + // ======================================= + int global_short_L = min(xlen, ylen); + if (!u_opt) Lnorm_ass = global_short_L; // 强制启用 User-specified 归一化统计累加分数 + + vector map1, map2; + get_full_mapping(string(seqx), string(seqy), map1, map2); + + vector results; + recursive_bisection( + xa, ya, string(seqx), string(seqy), string(secx), string(secy), + 0, xlen - 1, 0, ylen - 1, map1, map2, Lnorm_ass, tm_threshold, min_length, + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, i_opt, a_opt, + true /* 锁定 u_opt 为 true 获取全局分布 */, d_opt, d0_scale, fast_opt, sequence, results + ); + + // 结果拼接 + double final_TM_u = 0.0; + string final_seqxA = "", final_seqyA = "", final_seqM = ""; + vector> final_tu_vec; + int final_L_ali = 0; + double final_Liden = 0, final_TM_ali = 0, final_rmsd_ali = 0; + + for (size_t rIdx = 0; rIdx < results.size(); rIdx++) { + BisectRes& res = results[rIdx]; + final_TM_u += res.TM_u; // TM 累加 + final_L_ali += res.L_ali; + final_Liden += res.Liden; + final_TM_ali += res.TM_ali; + final_rmsd_ali += res.rmsd_ali; // 简单累加,严格意义上需要整体重新 Kabsch,这里为了输出妥协 + + for (auto& t : res.tu_vec) final_tu_vec.push_back(t); + + if (rIdx > 0) { + final_seqxA += "*"; final_seqyA += "*"; final_seqM += "*"; + } + final_seqxA += res.seqxA; final_seqyA += res.seqyA; final_seqM += res.seqM; + } + + // 取第一个有效切片的位移矩阵作为 t0, u0 兼容打印(仅作基准展示) + double best_t0[3] = {0}, best_u0[3][3] = {{1,0,0},{0,1,0},{0,0,1}}; + if (!final_tu_vec.empty()) { + for(int k=0; k<3; k++) for(int l=0; l<3; l++) best_u0[k][l] = final_tu_vec[0][k*3+l]; + for(int k=0; k<3; k++) best_t0[k] = final_tu_vec[0][9+k]; + } + + if (outfmt_opt == 0) print_version(); + + // 输出调用 + output_flexalign_results( + xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), + chainID_list1[chain_i], chainID_list2[chain_j], + xlen, ylen, best_t0, best_u0, final_tu_vec, + final_TM_u, final_TM_u, final_TM_u, final_TM_u, final_TM_u, // 占位传导 TM Score + 0.0 /*RMSD placeholder*/, 5.0, final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), + final_Liden, final_L_ali, final_L_ali, final_TM_ali, final_rmsd_ali, + 0.0, 0.0, 0.0, 0.0, Lnorm_ass, d0_scale, 0.0, 0.0, + (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, false, split_opt, o_opt, + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, resi_vec1, resi_vec2 + ); + + // Memory Cleanup + delete[] seqy; delete[] secy; DeleteArray(&ya, ylen); + } + } + delete[] seqx; delete[] secx; DeleteArray(&xa, xlen); + } + PDB_lines1.clear(); + } + return 0; +} + int main(int argc, char *argv[]) { if (argc < 2) @@ -4173,6 +4486,14 @@ int main(int argc, char *argv[]) atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt); + else if (mm_opt == 10) + flexalign_bisection(xname, yname, fname_super, fname_lign, + fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, + a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, + split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, + dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, + byresi_opt, chain1_list, chain2_list, hinge_opt, 0.6, 50); else cerr << "WARNING! -mm " << mm_opt << " not implemented" << endl; From 521925f32e37679116922c03b8c9c0c3a4ed596a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Wed, 29 Apr 2026 09:58:32 +0800 Subject: [PATCH 04/23] flex_bisection init --- USalign.cpp | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 385a0bf..ed9586c 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -3554,6 +3554,7 @@ struct BisectRes { string seqxA, seqyA, seqM; vector> tu_vec; int L_ali; + int n_ali8; // <--- 新增:保存距离小于 8 Angstrom 的比对对数 double Liden, TM_ali, rmsd_ali; }; @@ -3624,7 +3625,9 @@ void recursive_bisection( best_res.avg_TM = cur_avg_TM; best_res.TM_u = TM4; // TM4 承载基于 Lnorm_ass (user-specified) 的归一化分数 best_res.seqxA = seqxA; best_res.seqyA = seqyA; best_res.seqM = seqM; - best_res.L_ali = L_ali; best_res.Liden = Liden; + best_res.L_ali = L_ali; + best_res.n_ali8 = n_ali8; // <--- 新增:记录 n_ali8 + best_res.Liden = Liden; best_res.TM_ali = TM_ali; best_res.rmsd_ali = rmsd_ali; best_res.tu_vec.clear(); @@ -3743,15 +3746,20 @@ int flexalign_bisection(string &xname, string &yname, const string &fname_super, string final_seqxA = "", final_seqyA = "", final_seqM = ""; vector> final_tu_vec; int final_L_ali = 0; - double final_Liden = 0, final_TM_ali = 0, final_rmsd_ali = 0; + int final_n_ali8 = 0; + double final_Liden = 0, final_TM_ali = 0; + double sum_sq_dist = 0.0; // 用于正确计算全局 RMSD for (size_t rIdx = 0; rIdx < results.size(); rIdx++) { BisectRes& res = results[rIdx]; final_TM_u += res.TM_u; // TM 累加 final_L_ali += res.L_ali; + final_n_ali8 += res.n_ali8; final_Liden += res.Liden; final_TM_ali += res.TM_ali; - final_rmsd_ali += res.rmsd_ali; // 简单累加,严格意义上需要整体重新 Kabsch,这里为了输出妥协 + + // 正确的 RMSD 聚合:基于每个 block 的平方和累加 + sum_sq_dist += res.L_ali * res.rmsd_ali * res.rmsd_ali; for (auto& t : res.tu_vec) final_tu_vec.push_back(t); @@ -3761,6 +3769,14 @@ int flexalign_bisection(string &xname, string &yname, const string &fname_super, final_seqxA += res.seqxA; final_seqyA += res.seqyA; final_seqM += res.seqM; } + // 计算真正的合并后 RMSD + double final_rmsd_ali = (final_L_ali > 0) ? sqrt(sum_sq_dist / final_L_ali) : 0.0; + + // 还原 TM1 和 TM2 分数(考虑到 xlen 和 ylen 不同的情况) + double sum_tm_raw = final_TM_u * Lnorm_ass; + double final_TM1 = sum_tm_raw / xlen; + double final_TM2 = sum_tm_raw / ylen; + // 取第一个有效切片的位移矩阵作为 t0, u0 兼容打印(仅作基准展示) double best_t0[3] = {0}, best_u0[3][3] = {{1,0,0},{0,1,0},{0,0,1}}; if (!final_tu_vec.empty()) { @@ -3770,15 +3786,16 @@ int flexalign_bisection(string &xname, string &yname, const string &fname_super, if (outfmt_opt == 0) print_version(); - // 输出调用 + // 输出调用(修正了参数顺序错误问题) output_flexalign_results( xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), chainID_list1[chain_i], chainID_list2[chain_j], xlen, ylen, best_t0, best_u0, final_tu_vec, - final_TM_u, final_TM_u, final_TM_u, final_TM_u, final_TM_u, // 占位传导 TM Score - 0.0 /*RMSD placeholder*/, 5.0, final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), - final_Liden, final_L_ali, final_L_ali, final_TM_ali, final_rmsd_ali, + final_TM1, final_TM2, final_TM_u, final_TM_u, final_TM_u, // 正确缩放的 TM1 和 TM2 + final_rmsd_ali /*用真实 RMSD 作为 placeholder*/, 5.0, + final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), + final_Liden, final_n_ali8, final_L_ali, final_TM_ali, final_rmsd_ali, // 修正参数位置 0.0, 0.0, 0.0, 0.0, Lnorm_ass, d0_scale, 0.0, 0.0, (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, false, split_opt, o_opt, fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, resi_vec1, resi_vec2 From 8eaf54a0b43584895936bba9f27f82f3ae500903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Thu, 30 Apr 2026 10:06:30 +0800 Subject: [PATCH 05/23] flexalign code improve --- USalign.cpp | 1124 +++++++++++++++------------------------------------ 1 file changed, 325 insertions(+), 799 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index ed9586c..89848d3 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2841,648 +2841,11 @@ int SOIalign(string &xname, string &yname, const string &fname_super, return 0; } -int flexalign(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int mirror_opt, const int het_opt, const string &atom_opt, - const bool autojustify, const string &mol_opt, const string &dir_opt, - const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const int byresi_opt, const vector &chain1_list, - const vector &chain2_list, const int hinge_opt, const int ss_opt) -{ - /* declare previously global variables */ - vector> PDB_lines1; // text of chain1 - vector> PDB_lines2; // text of chain2 - vector mol_vec1; // molecule type of chain1, RNA if >0 - vector mol_vec2; // molecule type of chain2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - int i, j; // file index - int chain_i, chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum, ychainnum; // number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi = byresi_opt; // whether to read residue index - if (byresi_opt == 0 && o_opt) - read_resi = 2; - - /* loop over file names */ - for (i = 0; i < chain1_list.size(); i++) - { - /* parse chain 1 */ - xname = chain1_list[i]; - xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, - mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse1, model2parse1); - if (!xchainnum) - { - cerr << "Warning! Cannot parse file: " << xname - << ". Chain number 0." << endl; - continue; - } - for (chain_i = 0; chain_i < xchainnum; chain_i++) - { - xlen = PDB_lines1[chain_i].size(); - if (mol_opt == "RNA") - mol_vec1[chain_i] = 1; - else if (mol_opt == "protein") - mol_vec1[chain_i] = -1; - if (!xlen) - { - cerr << "Warning! Cannot parse file: " << xname - << ". Chain length 0." << endl; - continue; - } - else if (xlen < 3) - { - cerr << "Sequence is too short <3!: " << xname << endl; - continue; - } - NewArray(&xa, xlen, 3); - seqx = new char[xlen + 1]; - secx = new char[xlen + 1]; - xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, - resi_vec1, read_resi); - if (mirror_opt) - for (r = 0; r < xlen; r++) - xa[r][2] = -xa[r][2]; - if (mol_vec1[chain_i] > 0) - make_sec(seqx, xa, xlen, secx, atom_opt); - else - make_sec(xa, xlen, secx); // secondary structure assignment - - for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) - { - if (dirpair_opt.size() && i != j) - continue; - /* parse chain 2 */ - if (PDB_lines2.size() == 0) - { - yname = chain2_list[j]; - ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); - if (!ychainnum) - { - cerr << "Warning! Cannot parse file: " << yname - << ". Chain number 0." << endl; - continue; - } - } - for (chain_j = 0; chain_j < ychainnum; chain_j++) - { - ylen = PDB_lines2[chain_j].size(); - if (mol_opt == "RNA") - mol_vec2[chain_j] = 1; - else if (mol_opt == "protein") - mol_vec2[chain_j] = -1; - if (!ylen) - { - cerr << "Warning! Cannot parse file: " << yname - << ". Chain length 0." << endl; - continue; - } - else if (ylen < 3) - { - cerr << "Sequence is too short <3!: " << yname << endl; - continue; - } - NewArray(&ya, ylen, 3); - seqy = new char[ylen + 1]; - secy = new char[ylen + 1]; - ylen = read_PDB(PDB_lines2[chain_j], ya, seqy, - resi_vec2, read_resi); - if (mol_vec2[chain_j] > 0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else - make_sec(ya, ylen, secy); - - if (byresi_opt) - extract_aln_from_resi(sequence, - seqx, seqy, resi_vec1, resi_vec2, byresi_opt); - - /* declare variable specific to this pair of TMalign */ - double t0[3], u0[3][3]; - double TM1, TM2; - double TM3, TM4, TM5; // for a_opt, u_opt, d_opt - double d0_0, TM_0; - double d0A, d0B, d0u, d0a; - double d0_out = 5.0; - string seqM, seqxA, seqyA; // for output alignment - double rmsd0 = 0.0; - int L_ali; // Aligned length in standard_TMscore - double Liden = 0; - double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore - int n_ali = 0; - int n_ali8 = 0; - bool force_fast_opt = (getmin(xlen, ylen) > 2000) ? true : fast_opt; - vector> tu_vec; - vector do_vec; - - /* entry function for structure alignment */ - int hingeNum = flexalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt); - - if (hinge_opt && hingeNum <= 1 && - n_ali8 < 0.6 * getmin(xlen, ylen)) - { - double t0_h[3], u0_h[3][3]; - double TM1_h, TM2_h; - double TM3_h, TM4_h, TM5_h; - double d0_0_h, TM_0_h; - double d0_out_h = 5.0; - string seqM_h, seqxA_h, seqyA_h; - double rmsd0_h = 0.0; - int L_ali_h; - double Liden_h = 0; - double TM_ali_h, rmsd_ali_h; - int n_ali_h = 0; - int n_ali8_h = 0; - vector> tu_vec_h(1, tu_vec[0]); - vector do_vec_h; - tu2t_u(tu_vec[0], t0_h, u0_h); - - int hingeNum_h = flexalign_main( - xa, ya, seqx, seqy, secx, secy, - t0_h, u0_h, tu_vec_h, - TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, - d0_0_h, TM_0_h, d0A, d0B, d0u, d0a, d0_out_h, - seqM_h, seqxA_h, seqyA_h, do_vec_h, rmsd0_h, L_ali_h, - Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h, - xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, - a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt); - - double TM = (TM1 > TM2) ? TM1 : TM2; - double TM_h = (TM1_h > TM2_h) ? TM1_h : TM2_h; - if (TM_h > TM) - { - hingeNum = hingeNum_h; - tu2t_u(tu_vec_h[0], t0, u0); - TM1 = TM1_h; - TM2 = TM2_h; - TM3 = TM3_h; - TM4 = TM4_h; - TM5 = TM5_h; - d0_0 = d0_0_h; - TM_0 = TM_0_h; - d0_out = d0_out_h; - seqM = seqM_h; - seqxA = seqxA_h; - seqyA = seqyA_h; - rmsd0 = rmsd0_h; - L_ali = L_ali_h; - Liden = Liden_h; - TM_ali = TM_ali_h; - rmsd_ali = rmsd_ali_h; - n_ali = n_ali_h; - n_ali8 = n_ali8_h; - for (int hinge = 0; hinge < tu_vec.size(); hinge++) - tu_vec[hinge].clear(); - tu_vec.clear(); - for (int hinge = 0; hinge < tu_vec_h.size(); hinge++) - tu_vec.push_back(tu_vec_h[hinge]); - do_vec.clear(); - for (int r = 0; r < do_vec_h.size(); r++) - do_vec.push_back(do_vec_h[r]); - } - else - tu2t_u(tu_vec[0], t0, u0); - do_vec_h.clear(); - } - - /* print result */ - if (outfmt_opt == 0) - print_version(); - output_flexalign_results( - xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), - yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), - chainID_list1[chain_i], chainID_list2[chain_j], - xlen, ylen, t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, - rmsd0, d0_out, seqM.c_str(), - seqxA.c_str(), seqyA.c_str(), Liden, - n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, - d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, - (m_opt ? fname_matrix : "").c_str(), - outfmt_opt, ter_opt, false, split_opt, o_opt, - fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, - resi_vec1, resi_vec2); - - /* Done! Free memory */ - tu_vec.clear(); - seqM.clear(); - seqxA.clear(); - seqyA.clear(); - DeleteArray(&ya, ylen); - delete[] seqy; - delete[] secy; - resi_vec2.clear(); - do_vec.clear(); - } // chain_j - if (chain2_list.size() > 1) - { - yname.clear(); - for (chain_j = 0; chain_j < ychainnum; chain_j++) - PDB_lines2[chain_j].clear(); - PDB_lines2.clear(); - chainID_list2.clear(); - mol_vec2.clear(); - } - } // j - PDB_lines1[chain_i].clear(); - DeleteArray(&xa, xlen); - delete[] seqx; - delete[] secx; - resi_vec1.clear(); - } // chain_i - xname.clear(); - PDB_lines1.clear(); - chainID_list1.clear(); - mol_vec1.clear(); - } // i - if (chain2_list.size() == 1) - { - yname.clear(); - for (chain_j = 0; chain_j < ychainnum; chain_j++) - PDB_lines2[chain_j].clear(); - PDB_lines2.clear(); - resi_vec2.clear(); - chainID_list2.clear(); - mol_vec2.clear(); - } - return 0; -} - -int flexalign_best(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int mirror_opt, const int het_opt, const string &atom_opt, - const bool autojustify, const string &mol_opt, const string &dir_opt, - const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const int byresi_opt, const vector &chain1_list, - const vector &chain2_list, const int hinge_opt) -{ - /* declare previously global variables */ - vector> PDB_lines1; // text of chain1 - vector> PDB_lines2; // text of chain2 - vector mol_vec1; // molecule type of chain1, RNA if >0 - vector mol_vec2; // molecule type of chain2, RNA if >0 - vector chainID_list1; // list of chainID1 - vector chainID_list2; // list of chainID2 - int i, j; // file index - int chain_i, chain_j; // chain index - int r; // residue index - int xlen, ylen; // chain length - int xchainnum, ychainnum; // number of chains in a PDB file - char *seqx, *seqy; // for the protein sequence - char *secx, *secy; // for the secondary structure - double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and - // ya[0...ylen-1][0..2], in general, - // ya is regarded as native structure - // --> superpose xa onto ya - vector resi_vec1; // residue index for chain1 - vector resi_vec2; // residue index for chain2 - int read_resi = byresi_opt; // whether to read residue index - if (byresi_opt == 0 && o_opt) - read_resi = 2; - - /* loop over file names */ - for (i = 0; i < chain1_list.size(); i++) - { - /* parse chain 1 */ - xname = chain1_list[i]; - xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, - mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse1, model2parse1); - if (!xchainnum) - { - cerr << "Warning! Cannot parse file: " << xname - << ". Chain number 0." << endl; - continue; - } - for (chain_i = 0; chain_i < xchainnum; chain_i++) - { - xlen = PDB_lines1[chain_i].size(); - if (mol_opt == "RNA") - mol_vec1[chain_i] = 1; - else if (mol_opt == "protein") - mol_vec1[chain_i] = -1; - if (!xlen) - { - cerr << "Warning! Cannot parse file: " << xname - << ". Chain length 0." << endl; - continue; - } - else if (xlen < 3) - { - cerr << "Sequence is too short <3!: " << xname << endl; - continue; - } - NewArray(&xa, xlen, 3); - seqx = new char[xlen + 1]; - secx = new char[xlen + 1]; - xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, - resi_vec1, read_resi); - if (mirror_opt) - for (r = 0; r < xlen; r++) - xa[r][2] = -xa[r][2]; - if (mol_vec1[chain_i] > 0) - make_sec(seqx, xa, xlen, secx, atom_opt); - else - make_sec(xa, xlen, secx); // secondary structure assignment +// ======================================================================= +// Data structures and Helpers for flexalign unified pipeline +// ======================================================================= - for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) - { - if (dirpair_opt.size() && i != j) - continue; - /* parse chain 2 */ - if (PDB_lines2.size() == 0) - { - yname = chain2_list[j]; - ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, - mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, - split_opt, het_opt, chain2parse2, model2parse2); - if (!ychainnum) - { - cerr << "Warning! Cannot parse file: " << yname - << ". Chain number 0." << endl; - continue; - } - } - for (chain_j = 0; chain_j < ychainnum; chain_j++) - { - ylen = PDB_lines2[chain_j].size(); - if (mol_opt == "RNA") - mol_vec2[chain_j] = 1; - else if (mol_opt == "protein") - mol_vec2[chain_j] = -1; - if (!ylen) - { - cerr << "Warning! Cannot parse file: " << yname - << ". Chain length 0." << endl; - continue; - } - else if (ylen < 3) - { - cerr << "Sequence is too short <3!: " << yname << endl; - continue; - } - NewArray(&ya, ylen, 3); - seqy = new char[ylen + 1]; - secy = new char[ylen + 1]; - ylen = read_PDB(PDB_lines2[chain_j], ya, seqy, - resi_vec2, read_resi); - if (mol_vec2[chain_j] > 0) - make_sec(seqy, ya, ylen, secy, atom_opt); - else - make_sec(ya, ylen, secy); - - if (byresi_opt) - extract_aln_from_resi(sequence, - seqx, seqy, resi_vec1, resi_vec2, byresi_opt); - - /* declare variables to hold the best result among ss_opt true/false */ - double best_t0[3], best_u0[3][3]; - double best_TM1 = -1.0, best_TM2 = -1.0, best_TM3 = -1.0, best_TM4 = -1.0, best_TM5 = -1.0; - double best_d0_0 = 0.0, best_TM_0 = 0.0, best_d0A = 0.0, best_d0B = 0.0, best_d0u = 0.0, best_d0a = 0.0, best_d0_out = 5.0; - string best_seqM, best_seqxA, best_seqyA; - double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; - int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; - vector> best_tu_vec; - vector best_do_vec; - double global_max_TM = -1.0; - - /* loop to test both true and false for ss_opt */ - for (int cur_ss_opt = 0; cur_ss_opt < 2; cur_ss_opt++) - { - /* declare variables specific to this pair and iteration */ - double t0[3], u0[3][3]; - double TM1, TM2, TM3, TM4, TM5; - double d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out = 5.0; - string seqM, seqxA, seqyA; - double rmsd0 = 0.0; - int L_ali; - double Liden = 0; - double TM_ali, rmsd_ali; - int n_ali = 0, n_ali8 = 0; - bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; - vector> tu_vec; - vector do_vec; - - /* entry function for structure alignment */ - int hingeNum = flexalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, cur_ss_opt); - - if (hinge_opt && hingeNum <= 1 && - n_ali8 < 0.6 * getmin(xlen, ylen)) - { - double t0_h[3], u0_h[3][3]; - double TM1_h, TM2_h, TM3_h, TM4_h, TM5_h; - double d0_0_h, TM_0_h; - double d0_out_h = 5.0; - string seqM_h, seqxA_h, seqyA_h; - double rmsd0_h = 0.0, Liden_h = 0, TM_ali_h, rmsd_ali_h; - int L_ali_h, n_ali_h = 0, n_ali8_h = 0; - vector> tu_vec_h(1, tu_vec[0]); - vector do_vec_h; - tu2t_u(tu_vec[0], t0_h, u0_h); - - int hingeNum_h = flexalign_main( - xa, ya, seqx, seqy, secx, secy, - t0_h, u0_h, tu_vec_h, - TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, - d0_0_h, TM_0_h, d0A, d0B, d0u, d0a, d0_out_h, - seqM_h, seqxA_h, seqyA_h, do_vec_h, rmsd0_h, L_ali_h, - Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h, - xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, - a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, cur_ss_opt); - - double TM = (TM1 > TM2) ? TM1 : TM2; - double TM_h = (TM1_h > TM2_h) ? TM1_h : TM2_h; - if (TM_h > TM) - { - hingeNum = hingeNum_h; - tu2t_u(tu_vec_h[0], t0, u0); - TM1 = TM1_h; - TM2 = TM2_h; - TM3 = TM3_h; - TM4 = TM4_h; - TM5 = TM5_h; - d0_0 = d0_0_h; - TM_0 = TM_0_h; - d0_out = d0_out_h; - seqM = seqM_h; - seqxA = seqxA_h; - seqyA = seqyA_h; - rmsd0 = rmsd0_h; - L_ali = L_ali_h; - Liden = Liden_h; - TM_ali = TM_ali_h; - rmsd_ali = rmsd_ali_h; - n_ali = n_ali_h; - n_ali8 = n_ali8_h; - - for (int hinge = 0; hinge < tu_vec.size(); hinge++) - tu_vec[hinge].clear(); - tu_vec.clear(); - for (int hinge = 0; hinge < tu_vec_h.size(); hinge++) - tu_vec.push_back(tu_vec_h[hinge]); - do_vec.clear(); - for (int r = 0; r < do_vec_h.size(); r++) - do_vec.push_back(do_vec_h[r]); - } - else - { - tu2t_u(tu_vec[0], t0, u0); - } - do_vec_h.clear(); - } - - /* Compare current run max TM-score with the global best */ - double cur_max_TM = (TM1 > TM2) ? TM1 : TM2; - if (cur_max_TM > global_max_TM) - { - global_max_TM = cur_max_TM; - - /* copy primitive types to best cache */ - for (int k = 0; k < 3; k++) - best_t0[k] = t0[k]; - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) - best_u0[k][l] = u0[k][l]; - best_TM1 = TM1; - best_TM2 = TM2; - best_TM3 = TM3; - best_TM4 = TM4; - best_TM5 = TM5; - best_d0_0 = d0_0; - best_TM_0 = TM_0; - best_d0A = d0A; - best_d0B = d0B; - best_d0u = d0u; - best_d0a = d0a; - best_d0_out = d0_out; - best_rmsd0 = rmsd0; - best_Liden = Liden; - best_TM_ali = TM_ali; - best_rmsd_ali = rmsd_ali; - best_L_ali = L_ali; - best_n_ali = n_ali; - best_n_ali8 = n_ali8; - - /* copy complex objects to best cache */ - best_seqM = seqM; - best_seqxA = seqxA; - best_seqyA = seqyA; - - best_tu_vec.clear(); - for (int k = 0; k < tu_vec.size(); k++) - best_tu_vec.push_back(tu_vec[k]); - - best_do_vec.clear(); - for (int k = 0; k < do_vec.size(); k++) - best_do_vec.push_back(do_vec[k]); - } - } /* end of ss_opt loop */ - - /* print result using the best run */ - if (outfmt_opt == 0) - print_version(); - output_flexalign_results( - xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), - yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), - chainID_list1[chain_i], chainID_list2[chain_j], - xlen, ylen, best_t0, best_u0, best_tu_vec, best_TM1, best_TM2, best_TM3, best_TM4, best_TM5, - best_rmsd0, best_d0_out, best_seqM.c_str(), - best_seqxA.c_str(), best_seqyA.c_str(), best_Liden, - best_n_ali8, best_L_ali, best_TM_ali, best_rmsd_ali, best_TM_0, best_d0_0, - best_d0A, best_d0B, Lnorm_ass, d0_scale, best_d0a, best_d0u, - (m_opt ? fname_matrix : "").c_str(), - outfmt_opt, ter_opt, false, split_opt, o_opt, - fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, - resi_vec1, resi_vec2); - - /* Done! Free memory */ - best_tu_vec.clear(); - best_seqM.clear(); - best_seqxA.clear(); - best_seqyA.clear(); - best_do_vec.clear(); - DeleteArray(&ya, ylen); - delete[] seqy; - delete[] secy; - resi_vec2.clear(); - } - if (chain2_list.size() > 1) - { - yname.clear(); - for (chain_j = 0; chain_j < ychainnum; chain_j++) - PDB_lines2[chain_j].clear(); - PDB_lines2.clear(); - chainID_list2.clear(); - mol_vec2.clear(); - } - } - PDB_lines1[chain_i].clear(); - DeleteArray(&xa, xlen); - delete[] seqx; - delete[] secx; - resi_vec1.clear(); - } - xname.clear(); - PDB_lines1.clear(); - chainID_list1.clear(); - mol_vec1.clear(); - } - if (chain2_list.size() == 1) - { - yname.clear(); - for (chain_j = 0; chain_j < ychainnum; chain_j++) - PDB_lines2[chain_j].clear(); - PDB_lines2.clear(); - resi_vec2.clear(); - chainID_list2.clear(); - mol_vec2.clear(); - } - return 0; -} - -// Needleman-Wunsch 序列比对并补齐 Gap 生成全映射 +// Needleman-Wunsch sequence alignment and gap filling to generate full mapping void get_full_mapping(const string& seq1, const string& seq2, vector& map1, vector& map2) { int n = seq1.length(); int m = seq2.length(); @@ -3519,7 +2882,7 @@ void get_full_mapping(const string& seq1, const string& seq2, vector& map1, } } - // 填补 Gap:向左右寻找最近的已比对位点 + // Fill Gaps: Look left and right for the nearest aligned positions map1.assign(n, 0); for (int i = 0; i < n; i++) { if (raw_map1.count(i)) map1[i] = raw_map1[i]; @@ -3547,17 +2910,86 @@ void get_full_mapping(const string& seq1, const string& seq2, vector& map1, } } -// 缓存单次切片的最佳结果结构体 +// Data structure to hold outputs of flexalign_main to avoid parameter clutter +struct FlexAlignResult { + double t0[3]; + double u0[3][3]; + vector> tu_vec; + double TM1, TM2, TM3, TM4, TM5; + double d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out; + string seqM, seqxA, seqyA; + vector do_vec; + double rmsd0, Liden, TM_ali, rmsd_ali; + int L_ali, n_ali, n_ali8, hingeNum; + + FlexAlignResult() : TM1(-1.0), TM2(-1.0), TM3(-1.0), TM4(-1.0), TM5(-1.0), + d0_0(0.0), TM_0(0.0), d0A(0.0), d0B(0.0), d0u(0.0), d0a(0.0), d0_out(5.0), + rmsd0(0.0), Liden(0.0), TM_ali(0.0), rmsd_ali(0.0), + L_ali(0), n_ali(0), n_ali8(0), hingeNum(0) { + for(int i=0; i<3; i++) { + t0[i] = 0.0; + for(int j=0; j<3; j++) u0[i][j] = (i==j)?1.0:0.0; + } + } +}; + +// Structure to cache the best result of a single slice struct BisectRes { int start1, end1, start2, end2; - double TM_u, avg_TM; - string seqxA, seqyA, seqM; - vector> tu_vec; - int L_ali; - int n_ali8; // <--- 新增:保存距离小于 8 Angstrom 的比对对数 - double Liden, TM_ali, rmsd_ali; + double avg_TM; + FlexAlignResult flex_res; }; +enum FlexAlignMode { + FLEX_STANDARD = 0, + FLEX_BEST = 1, + FLEX_BISECTION = 2 +}; + +// Encapsulates the execution of flexalign_main and its fallback refinement logic +void execute_flexalign_with_fallback( + double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy, + int xlen, int ylen, vector &sequence, const double Lnorm_ass, const double d0_scale, + const int i_opt, const int a_opt, const bool u_opt, const bool d_opt, const bool force_fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt, FlexAlignResult &res) +{ + res.hingeNum = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + res.t0, res.u0, res.tu_vec, res.TM1, res.TM2, res.TM3, res.TM4, res.TM5, + res.d0_0, res.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res.d0_out, + res.seqM, res.seqxA, res.seqyA, res.do_vec, + res.rmsd0, res.L_ali, res.Liden, res.TM_ali, res.rmsd_ali, res.n_ali, res.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, ss_opt); + + // Fallback compensation when too few hinges are found + if (hinge_opt && res.hingeNum <= 1 && res.n_ali8 < 0.6 * getmin(xlen, ylen)) + { + FlexAlignResult res_h; + res_h.tu_vec.push_back(res.tu_vec[0]); + tu2t_u(res.tu_vec[0], res_h.t0, res_h.u0); + + res_h.hingeNum = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + res_h.t0, res_h.u0, res_h.tu_vec, + res_h.TM1, res_h.TM2, res_h.TM3, res_h.TM4, res_h.TM5, + res_h.d0_0, res_h.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res_h.d0_out, + res_h.seqM, res_h.seqxA, res_h.seqyA, res_h.do_vec, + res_h.rmsd0, res_h.L_ali, res_h.Liden, res_h.TM_ali, res_h.rmsd_ali, + res_h.n_ali, res_h.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, + a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, ss_opt); + + double TM = (res.TM1 > res.TM2) ? res.TM1 : res.TM2; + double TM_h = (res_h.TM1 > res_h.TM2) ? res_h.TM1 : res_h.TM2; + if (TM_h > TM) { + res = res_h; // Safely overwrite with the better refined results + } + } +} + void recursive_bisection( double **xa_full, double **ya_full, const string& seqx_full, const string& seqy_full, const string& secx_full, const string& secy_full, @@ -3572,7 +3004,7 @@ void recursive_bisection( int len2 = end2 - start2 + 1; int shorter_len = min(len1, len2); - // 1. 内存切片构造 + // 1. Construct memory slices double **xa, **ya; char *seqx = new char[len1 + 1]; char *secx = new char[len1 + 1]; @@ -3592,60 +3024,40 @@ void recursive_bisection( seqx[len1] = '\0'; secx[len1] = '\0'; seqy[len2] = '\0'; secy[len2] = '\0'; - // 2. 调用 flexalign_best 的核心评估逻辑 (ss_opt = 0 and 1) + // 2. Call core evaluation logic of flexalign (test both ss_opts) double global_max_TM = -1.0; BisectRes best_res; best_res.start1 = start1; best_res.end1 = end1; best_res.start2 = start2; best_res.end2 = end2; for (int cur_ss_opt = 0; cur_ss_opt < 2; cur_ss_opt++) { - double t0[3], u0[3][3]; - double TM1, TM2, TM3, TM4, TM5, d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out = 5.0; - string seqM, seqxA, seqyA; - double rmsd0 = 0.0, Liden = 0, TM_ali, rmsd_ali; - int L_ali, n_ali = 0, n_ali8 = 0; - vector> tu_vec; - vector do_vec; - + FlexAlignResult cur_res; bool force_fast_opt = (min(len1, len2) > 1500) ? true : fast_opt; - flexalign_main( - xa, ya, seqx, seqy, secx, secy, - t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5, - d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, - seqM, seqxA, seqyA, do_vec, - rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, - len1, len2, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_type, hinge_opt, cur_ss_opt + execute_flexalign_with_fallback( + xa, ya, seqx, seqy, secx, secy, len1, len2, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_type, hinge_opt, cur_ss_opt, cur_res ); - double cur_avg_TM = (TM1 + TM2) / 2.0; + double cur_avg_TM = (cur_res.TM1 + cur_res.TM2) / 2.0; if (cur_avg_TM > global_max_TM) { global_max_TM = cur_avg_TM; best_res.avg_TM = cur_avg_TM; - best_res.TM_u = TM4; // TM4 承载基于 Lnorm_ass (user-specified) 的归一化分数 - best_res.seqxA = seqxA; best_res.seqyA = seqyA; best_res.seqM = seqM; - best_res.L_ali = L_ali; - best_res.n_ali8 = n_ali8; // <--- 新增:记录 n_ali8 - best_res.Liden = Liden; - best_res.TM_ali = TM_ali; best_res.rmsd_ali = rmsd_ali; - - best_res.tu_vec.clear(); - for(auto& t : tu_vec) best_res.tu_vec.push_back(t); + best_res.flex_res = cur_res; } } - // 清理当前切片内存 + // Clean up current slice memory delete[] seqx; delete[] secx; delete[] seqy; delete[] secy; DeleteArray(&xa, len1); DeleteArray(&ya, len2); - // 3. 递归终止条件 + // 3. Recursive termination condition if (best_res.avg_TM >= tm_threshold || shorter_len < min_length) { results.push_back(best_res); return; } - // 4. 计算中点并二分 + // 4. Calculate midpoint and bisect int mid1, mid2; if (len1 <= len2) { mid1 = start1 + len1 / 2 - 1; @@ -3666,152 +3078,266 @@ void recursive_bisection( mol_type, hinge_opt, i_opt, a_opt, u_opt, d_opt, d0_scale, fast_opt, sequence, results); } -int flexalign_bisection(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int mirror_opt, const int het_opt, const string &atom_opt, - const bool autojustify, const string &mol_opt, const string &dir_opt, - const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const int byresi_opt, const vector &chain1_list, - const vector &chain2_list, const int hinge_opt, - double tm_threshold = 0.6, int min_length = 50) +// Unified engine replacing flexalign, flexalign_best, and flexalign_bisection +int flexalign_unified(string &xname, string &yname, const string &fname_super, + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int mirror_opt, const int het_opt, const string &atom_opt, + const bool autojustify, const string &mol_opt, const string &dir_opt, + const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const int byresi_opt, const vector &chain1_list, + const vector &chain2_list, const int hinge_opt, const int ss_opt, + FlexAlignMode mode = FLEX_STANDARD, + double tm_threshold = 0.6, int min_length = 50) { - vector> PDB_lines1, PDB_lines2; - vector mol_vec1, mol_vec2; - vector chainID_list1, chainID_list2; - int read_resi = byresi_opt; + vector> PDB_lines1; + vector> PDB_lines2; + vector mol_vec1; + vector mol_vec2; + vector chainID_list1; + vector chainID_list2; + int i, j, chain_i, chain_j, r, xlen, ylen, xchainnum, ychainnum; + char *seqx, *seqy, *secx, *secy; + double **xa, **ya; + vector resi_vec1; + vector resi_vec2; + int read_resi = byresi_opt; if (byresi_opt == 0 && o_opt) read_resi = 2; - for (int i = 0; i < chain1_list.size(); i++) { + for (i = 0; i < chain1_list.size(); i++) { xname = chain1_list[i]; - int xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, split_opt, het_opt, chain2parse1, model2parse1); - if (!xchainnum) continue; - - for (int chain_i = 0; chain_i < xchainnum; chain_i++) { - int xlen = PDB_lines1[chain_i].size(); - if (mol_opt == "RNA") mol_vec1[chain_i] = 1; else if (mol_opt == "protein") mol_vec1[chain_i] = -1; + xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, + mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse1, model2parse1); + if (!xchainnum) { + cerr << "Warning! Cannot parse file: " << xname << ". Chain number 0." << endl; + continue; + } + for (chain_i = 0; chain_i < xchainnum; chain_i++) { + xlen = PDB_lines1[chain_i].size(); + if (mol_opt == "RNA") mol_vec1[chain_i] = 1; + else if (mol_opt == "protein") mol_vec1[chain_i] = -1; if (xlen < 3) continue; - double **xa; NewArray(&xa, xlen, 3); - char *seqx = new char[xlen + 1]; char *secx = new char[xlen + 1]; - vector resi_vec1; + NewArray(&xa, xlen, 3); + seqx = new char[xlen + 1]; secx = new char[xlen + 1]; read_PDB(PDB_lines1[chain_i], xa, seqx, resi_vec1, read_resi); - if (mirror_opt) for (int r = 0; r < xlen; r++) xa[r][2] = -xa[r][2]; + if (mirror_opt) for (r = 0; r < xlen; r++) xa[r][2] = -xa[r][2]; (mol_vec1[chain_i] > 0) ? make_sec(seqx, xa, xlen, secx, atom_opt) : make_sec(xa, xlen, secx); - for (int j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) { + for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) { if (dirpair_opt.size() && i != j) continue; if (PDB_lines2.size() == 0) { yname = chain2_list[j]; - int ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, split_opt, het_opt, chain2parse2, model2parse2); + ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, + mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, + split_opt, het_opt, chain2parse2, model2parse2); if (!ychainnum) continue; } - - for (int chain_j = 0; chain_j < PDB_lines2.size(); chain_j++) { - int ylen = PDB_lines2[chain_j].size(); - if (mol_opt == "RNA") mol_vec2[chain_j] = 1; else if (mol_opt == "protein") mol_vec2[chain_j] = -1; + for (chain_j = 0; chain_j < ychainnum; chain_j++) { + ylen = PDB_lines2[chain_j].size(); + if (mol_opt == "RNA") mol_vec2[chain_j] = 1; + else if (mol_opt == "protein") mol_vec2[chain_j] = -1; if (ylen < 3) continue; - double **ya; NewArray(&ya, ylen, 3); - char *seqy = new char[ylen + 1]; char *secy = new char[ylen + 1]; - vector resi_vec2; + NewArray(&ya, ylen, 3); + seqy = new char[ylen + 1]; secy = new char[ylen + 1]; read_PDB(PDB_lines2[chain_j], ya, seqy, resi_vec2, read_resi); (mol_vec2[chain_j] > 0) ? make_sec(seqy, ya, ylen, secy, atom_opt) : make_sec(ya, ylen, secy); - // ======================================= - // === Bisection 专属逻辑与数据流合并 === - // ======================================= - int global_short_L = min(xlen, ylen); - if (!u_opt) Lnorm_ass = global_short_L; // 强制启用 User-specified 归一化统计累加分数 - - vector map1, map2; - get_full_mapping(string(seqx), string(seqy), map1, map2); - - vector results; - recursive_bisection( - xa, ya, string(seqx), string(seqy), string(secx), string(secy), - 0, xlen - 1, 0, ylen - 1, map1, map2, Lnorm_ass, tm_threshold, min_length, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, i_opt, a_opt, - true /* 锁定 u_opt 为 true 获取全局分布 */, d_opt, d0_scale, fast_opt, sequence, results - ); - - // 结果拼接 - double final_TM_u = 0.0; - string final_seqxA = "", final_seqyA = "", final_seqM = ""; - vector> final_tu_vec; - int final_L_ali = 0; - int final_n_ali8 = 0; - double final_Liden = 0, final_TM_ali = 0; - double sum_sq_dist = 0.0; // 用于正确计算全局 RMSD - - for (size_t rIdx = 0; rIdx < results.size(); rIdx++) { - BisectRes& res = results[rIdx]; - final_TM_u += res.TM_u; // TM 累加 - final_L_ali += res.L_ali; - final_n_ali8 += res.n_ali8; - final_Liden += res.Liden; - final_TM_ali += res.TM_ali; + if (byresi_opt) extract_aln_from_resi(sequence, seqx, seqy, resi_vec1, resi_vec2, byresi_opt); + + // --- CORE DISPATCH LOGIC START --- + if (mode == FLEX_BISECTION) { + int global_short_L = min(xlen, ylen); + double cur_Lnorm_ass = u_opt ? Lnorm_ass : global_short_L; + + vector map1, map2; + get_full_mapping(string(seqx), string(seqy), map1, map2); + + vector results; + recursive_bisection( + xa, ya, string(seqx), string(seqy), string(secx), string(secy), + 0, xlen - 1, 0, ylen - 1, map1, map2, cur_Lnorm_ass, tm_threshold, min_length, + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, i_opt, a_opt, + true, d_opt, d0_scale, fast_opt, sequence, results + ); + + // Result concatenation variables + double final_TM_u = 0.0; + string final_seqxA = "", final_seqyA = "", final_seqM = ""; + vector> final_tu_vec; + int final_L_ali = 0, final_n_ali = 0, final_n_ali8 = 0; - // 正确的 RMSD 聚合:基于每个 block 的平方和累加 - sum_sq_dist += res.L_ali * res.rmsd_ali * res.rmsd_ali; + // Using correctly weighted statistical aggregates for accurate flexible properties + double final_Liden = 0, sum_TM_ali_L = 0.0, sum_sq_dist_ali = 0.0, sum_sq_dist_0 = 0.0; + + for (size_t rIdx = 0; rIdx < results.size(); rIdx++) { + FlexAlignResult& res = results[rIdx].flex_res; + + // Accumulate unnormalized TM score component + double tm_raw = res.TM4 * cur_Lnorm_ass; + final_TM_u += tm_raw; + + final_L_ali += res.L_ali; + final_n_ali += res.n_ali; + final_n_ali8 += res.n_ali8; + final_Liden += res.Liden; + + // Correct calculation for overall flexible RMSD utilizing respective block rigid transforms + sum_sq_dist_ali += res.L_ali * res.rmsd_ali * res.rmsd_ali; + sum_sq_dist_0 += res.n_ali * res.rmsd0 * res.rmsd0; + + // Correct calculation to prevent accumulating ratios > 1.0 for TM_ali + sum_TM_ali_L += res.L_ali * res.TM_ali; + + if (rIdx > 0) { + // CRITICAL SEGFAULT FIX: Using dual '-' gap instead of '*'. + // This visually and logically separates the hinge blocks for output_flexalign_results + // WITHOUT incrementing the actual sequence position index (rx, ry) out of bounds. + final_seqxA += "-"; + final_seqyA += "-"; + final_seqM += " "; + } + final_seqxA += res.seqxA; + final_seqyA += res.seqyA; + final_seqM += res.seqM; + + for (auto& t : res.tu_vec) final_tu_vec.push_back(t); + } - for (auto& t : res.tu_vec) final_tu_vec.push_back(t); + // Derive true merged mathematical properties + double final_rmsd_ali = (final_L_ali > 0) ? sqrt(sum_sq_dist_ali / final_L_ali) : 0.0; + double final_rmsd0 = (final_n_ali > 0) ? sqrt(sum_sq_dist_0 / final_n_ali) : 0.0; + double final_TM_ali = (final_L_ali > 0) ? (sum_TM_ali_L / final_L_ali) : 0.0; + + // Restore TM1 and TM2 scores utilizing the accumulated component sum + double final_TM1 = final_TM_u / xlen; + double final_TM2 = final_TM_u / ylen; + double final_TM_norm = final_TM_u / cur_Lnorm_ass; + + // Take the translation/rotation matrix of the first valid slice as a base placeholder + double best_t0[3] = {0}, best_u0[3][3] = {{1,0,0},{0,1,0},{0,0,1}}; + if (!final_tu_vec.empty()) { + for(int k=0; k<3; k++) for(int l=0; l<3; l++) best_u0[k][l] = final_tu_vec[0][k*3+l]; + for(int k=0; k<3; k++) best_t0[k] = final_tu_vec[0][9+k]; + } + + // Retain scale constants generated from the very first aligned slice + double final_d0_0 = results.empty() ? 0.0 : results[0].flex_res.d0_0; + double final_TM_0 = results.empty() ? 0.0 : results[0].flex_res.TM_0; + double final_d0A = results.empty() ? 0.0 : results[0].flex_res.d0A; + double final_d0B = results.empty() ? 0.0 : results[0].flex_res.d0B; + double final_d0a = results.empty() ? 0.0 : results[0].flex_res.d0a; + double final_d0u = results.empty() ? 0.0 : results[0].flex_res.d0u; + double final_d0_out = 5.0; + + if (outfmt_opt == 0) print_version(); - if (rIdx > 0) { - final_seqxA += "*"; final_seqyA += "*"; final_seqM += "*"; + output_flexalign_results( + xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), + chainID_list1[chain_i], chainID_list2[chain_j], + xlen, ylen, best_t0, best_u0, final_tu_vec, + final_TM1, final_TM2, final_TM_norm, final_TM_norm, final_TM_norm, + final_rmsd0, final_d0_out, + final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), + final_Liden, final_n_ali8, final_L_ali, final_TM_ali, final_rmsd_ali, + final_TM_0, final_d0_0, final_d0A, final_d0B, cur_Lnorm_ass, d0_scale, final_d0a, final_d0u, + (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, false, split_opt, o_opt, + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, resi_vec1, resi_vec2 + ); + } + else { + // === Standard & Best specific logic === + FlexAlignResult best_res; + double global_max_TM = -1.0; + + int start_ss = (mode == FLEX_BEST) ? 0 : ss_opt; + int end_ss = (mode == FLEX_BEST) ? 1 : ss_opt; + + bool force_fast_opt = (getmin(xlen, ylen) > ((mode == FLEX_STANDARD) ? 2000 : 1500)) ? true : fast_opt; + + for (int cur_ss_opt = start_ss; cur_ss_opt <= end_ss; cur_ss_opt++) { + FlexAlignResult cur_res; + execute_flexalign_with_fallback( + xa, ya, seqx, seqy, secx, secy, xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_vec1[chain_i] + mol_vec2[chain_j], + hinge_opt, cur_ss_opt, cur_res + ); + + double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; + if (cur_max_TM > global_max_TM) { + global_max_TM = cur_max_TM; + best_res = cur_res; + } } - final_seqxA += res.seqxA; final_seqyA += res.seqyA; final_seqM += res.seqM; - } - // 计算真正的合并后 RMSD - double final_rmsd_ali = (final_L_ali > 0) ? sqrt(sum_sq_dist / final_L_ali) : 0.0; - - // 还原 TM1 和 TM2 分数(考虑到 xlen 和 ylen 不同的情况) - double sum_tm_raw = final_TM_u * Lnorm_ass; - double final_TM1 = sum_tm_raw / xlen; - double final_TM2 = sum_tm_raw / ylen; - - // 取第一个有效切片的位移矩阵作为 t0, u0 兼容打印(仅作基准展示) - double best_t0[3] = {0}, best_u0[3][3] = {{1,0,0},{0,1,0},{0,0,1}}; - if (!final_tu_vec.empty()) { - for(int k=0; k<3; k++) for(int l=0; l<3; l++) best_u0[k][l] = final_tu_vec[0][k*3+l]; - for(int k=0; k<3; k++) best_t0[k] = final_tu_vec[0][9+k]; + if (outfmt_opt == 0) print_version(); + output_flexalign_results( + xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), + chainID_list1[chain_i], chainID_list2[chain_j], + xlen, ylen, best_res.t0, best_res.u0, best_res.tu_vec, best_res.TM1, best_res.TM2, best_res.TM3, best_res.TM4, best_res.TM5, + best_res.rmsd0, best_res.d0_out, best_res.seqM.c_str(), + best_res.seqxA.c_str(), best_res.seqyA.c_str(), best_res.Liden, + best_res.n_ali8, best_res.L_ali, best_res.TM_ali, best_res.rmsd_ali, best_res.TM_0, best_res.d0_0, + best_res.d0A, best_res.d0B, Lnorm_ass, d0_scale, best_res.d0a, best_res.d0u, + (m_opt ? fname_matrix : "").c_str(), + outfmt_opt, ter_opt, false, split_opt, o_opt, + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, + resi_vec1, resi_vec2); } + // --- CORE DISPATCH LOGIC END --- - if (outfmt_opt == 0) print_version(); - - // 输出调用(修正了参数顺序错误问题) - output_flexalign_results( - xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), - yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), - chainID_list1[chain_i], chainID_list2[chain_j], - xlen, ylen, best_t0, best_u0, final_tu_vec, - final_TM1, final_TM2, final_TM_u, final_TM_u, final_TM_u, // 正确缩放的 TM1 和 TM2 - final_rmsd_ali /*用真实 RMSD 作为 placeholder*/, 5.0, - final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), - final_Liden, final_n_ali8, final_L_ali, final_TM_ali, final_rmsd_ali, // 修正参数位置 - 0.0, 0.0, 0.0, 0.0, Lnorm_ass, d0_scale, 0.0, 0.0, - (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, false, split_opt, o_opt, - fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, resi_vec1, resi_vec2 - ); - - // Memory Cleanup - delete[] seqy; delete[] secy; DeleteArray(&ya, ylen); + // Cleanup memory + DeleteArray(&ya, ylen); + delete[] seqy; delete[] secy; + resi_vec2.clear(); + } + if (chain2_list.size() > 1) { + yname.clear(); + for (chain_j = 0; chain_j < ychainnum; chain_j++) PDB_lines2[chain_j].clear(); + PDB_lines2.clear(); chainID_list2.clear(); mol_vec2.clear(); } } - delete[] seqx; delete[] secx; DeleteArray(&xa, xlen); + PDB_lines1[chain_i].clear(); + DeleteArray(&xa, xlen); + delete[] seqx; delete[] secx; + resi_vec1.clear(); } - PDB_lines1.clear(); + xname.clear(); PDB_lines1.clear(); chainID_list1.clear(); mol_vec1.clear(); + } + if (chain2_list.size() == 1) { + yname.clear(); + for (chain_j = 0; chain_j < ychainnum; chain_j++) PDB_lines2[chain_j].clear(); + PDB_lines2.clear(); resi_vec2.clear(); chainID_list2.clear(); mol_vec2.clear(); } return 0; } +// ======================================================================= +// Direct Drop-in Wrappers (No changes needed in main() bindings) +// ======================================================================= + +int flexalign(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt) { + return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, ss_opt, FLEX_STANDARD); +} + +int flexalign_best(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) { + return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt is ignored in BEST mode */, FLEX_BEST); +} + +int flexalign_bisection(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, double tm_threshold = 0.6, int min_length = 50) { + return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt ignored */, FLEX_BISECTION, tm_threshold, min_length); +} + int main(int argc, char *argv[]) { if (argc < 2) @@ -4505,12 +4031,12 @@ int main(int argc, char *argv[]) byresi_opt, chain1_list, chain2_list, hinge_opt); else if (mm_opt == 10) flexalign_bisection(xname, yname, fname_super, fname_lign, - fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, - a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt, 0.6, 50); + fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, + a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, + split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, + dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, + byresi_opt, chain1_list, chain2_list, hinge_opt, 0.7, 50); else cerr << "WARNING! -mm " << mm_opt << " not implemented" << endl; From 83cc8600f1642fdcb5679b6959475835618bc098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Thu, 30 Apr 2026 13:30:30 +0800 Subject: [PATCH 06/23] add -mm 10 --- USalign.cpp | 475 +++++++++++++++++++++++++++++++++------------------- flexalign.h | 30 +++- 2 files changed, 329 insertions(+), 176 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 89848d3..aa9b183 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2846,72 +2846,101 @@ int SOIalign(string &xname, string &yname, const string &fname_super, // ======================================================================= // Needleman-Wunsch sequence alignment and gap filling to generate full mapping -void get_full_mapping(const string& seq1, const string& seq2, vector& map1, vector& map2) { +void get_full_mapping(const string &seq1, const string &seq2, vector &map1, vector &map2) +{ int n = seq1.length(); int m = seq2.length(); int match = 2, mismatch = -1, gap = -1; vector> score(n + 1, vector(m + 1, 0)); - - for (int i = 0; i <= n; i++) score[i][0] = gap * i; - for (int j = 0; j <= m; j++) score[0][j] = gap * j; - - for (int i = 1; i <= n; i++) { - for (int j = 1; j <= m; j++) { - int diag = score[i-1][j-1] + (seq1[i-1] == seq2[j-1] ? match : mismatch); - int up = score[i-1][j] + gap; - int left = score[i][j-1] + gap; + + for (int i = 0; i <= n; i++) + score[i][0] = gap * i; + for (int j = 0; j <= m; j++) + score[0][j] = gap * j; + + for (int i = 1; i <= n; i++) + { + for (int j = 1; j <= m; j++) + { + int diag = score[i - 1][j - 1] + (seq1[i - 1] == seq2[j - 1] ? match : mismatch); + int up = score[i - 1][j] + gap; + int left = score[i][j - 1] + gap; score[i][j] = max({diag, up, left}); } } map raw_map1, raw_map2; int i = n, j = m; - while (i > 0 && j > 0) { + while (i > 0 && j > 0) + { int current = score[i][j]; - int diag = score[i-1][j-1] + (seq1[i-1] == seq2[j-1] ? match : mismatch); - int left = score[i][j-1] + gap; - - if (current == diag) { - raw_map1[i-1] = j-1; - raw_map2[j-1] = i-1; - i--; j--; - } else if (current == left) { + int diag = score[i - 1][j - 1] + (seq1[i - 1] == seq2[j - 1] ? match : mismatch); + int left = score[i][j - 1] + gap; + + if (current == diag) + { + raw_map1[i - 1] = j - 1; + raw_map2[j - 1] = i - 1; + i--; + j--; + } + else if (current == left) + { j--; - } else { + } + else + { i--; } } // Fill Gaps: Look left and right for the nearest aligned positions map1.assign(n, 0); - for (int i = 0; i < n; i++) { - if (raw_map1.count(i)) map1[i] = raw_map1[i]; - else { + for (int i = 0; i < n; i++) + { + if (raw_map1.count(i)) + map1[i] = raw_map1[i]; + else + { int left = i - 1, right = i + 1; - while (left >= 0 && !raw_map1.count(left)) left--; - while (right < n && !raw_map1.count(right)) right++; - if (left >= 0 && right < n) map1[i] = (i - left) <= (right - i) ? raw_map1[left] : raw_map1[right]; - else if (left >= 0) map1[i] = raw_map1[left]; - else if (right < n) map1[i] = raw_map1[right]; + while (left >= 0 && !raw_map1.count(left)) + left--; + while (right < n && !raw_map1.count(right)) + right++; + if (left >= 0 && right < n) + map1[i] = (i - left) <= (right - i) ? raw_map1[left] : raw_map1[right]; + else if (left >= 0) + map1[i] = raw_map1[left]; + else if (right < n) + map1[i] = raw_map1[right]; } } map2.assign(m, 0); - for (int i = 0; i < m; i++) { - if (raw_map2.count(i)) map2[i] = raw_map2[i]; - else { + for (int i = 0; i < m; i++) + { + if (raw_map2.count(i)) + map2[i] = raw_map2[i]; + else + { int left = i - 1, right = i + 1; - while (left >= 0 && !raw_map2.count(left)) left--; - while (right < m && !raw_map2.count(right)) right++; - if (left >= 0 && right < m) map2[i] = (i - left) <= (right - i) ? raw_map2[left] : raw_map2[right]; - else if (left >= 0) map2[i] = raw_map2[left]; - else if (right < m) map2[i] = raw_map2[right]; + while (left >= 0 && !raw_map2.count(left)) + left--; + while (right < m && !raw_map2.count(right)) + right++; + if (left >= 0 && right < m) + map2[i] = (i - left) <= (right - i) ? raw_map2[left] : raw_map2[right]; + else if (left >= 0) + map2[i] = raw_map2[left]; + else if (right < m) + map2[i] = raw_map2[right]; } } } // Data structure to hold outputs of flexalign_main to avoid parameter clutter -struct FlexAlignResult { +struct FlexAlignResult +{ double t0[3]; double u0[3][3]; vector> tu_vec; @@ -2925,22 +2954,27 @@ struct FlexAlignResult { FlexAlignResult() : TM1(-1.0), TM2(-1.0), TM3(-1.0), TM4(-1.0), TM5(-1.0), d0_0(0.0), TM_0(0.0), d0A(0.0), d0B(0.0), d0u(0.0), d0a(0.0), d0_out(5.0), rmsd0(0.0), Liden(0.0), TM_ali(0.0), rmsd_ali(0.0), - L_ali(0), n_ali(0), n_ali8(0), hingeNum(0) { - for(int i=0; i<3; i++) { + L_ali(0), n_ali(0), n_ali8(0), hingeNum(0) + { + for (int i = 0; i < 3; i++) + { t0[i] = 0.0; - for(int j=0; j<3; j++) u0[i][j] = (i==j)?1.0:0.0; + for (int j = 0; j < 3; j++) + u0[i][j] = (i == j) ? 1.0 : 0.0; } } }; // Structure to cache the best result of a single slice -struct BisectRes { +struct BisectRes +{ int start1, end1, start2, end2; double avg_TM; FlexAlignResult flex_res; }; -enum FlexAlignMode { +enum FlexAlignMode +{ FLEX_STANDARD = 0, FLEX_BEST = 1, FLEX_BISECTION = 2 @@ -2984,22 +3018,23 @@ void execute_flexalign_with_fallback( double TM = (res.TM1 > res.TM2) ? res.TM1 : res.TM2; double TM_h = (res_h.TM1 > res_h.TM2) ? res_h.TM1 : res_h.TM2; - if (TM_h > TM) { + if (TM_h > TM) + { res = res_h; // Safely overwrite with the better refined results } } } void recursive_bisection( - double **xa_full, double **ya_full, const string& seqx_full, const string& seqy_full, - const string& secx_full, const string& secy_full, + double **xa_full, double **ya_full, const string &seqx_full, const string &seqy_full, + const string &secx_full, const string &secy_full, int start1, int end1, int start2, int end2, - const vector& map1, const vector& map2, + const vector &map1, const vector &map2, double Lnorm_ass, double tm_threshold, int min_length, - int mol_type, int hinge_opt, int i_opt, int a_opt, bool u_opt, bool d_opt, - double d0_scale, bool fast_opt, vector& sequence, - vector& results -) { + int mol_type, int hinge_opt, int i_opt, int a_opt, bool u_opt, bool d_opt, + double d0_scale, bool fast_opt, vector &sequence, + vector &results) +{ int len1 = end1 - start1 + 1; int len2 = end2 - start2 + 1; int shorter_len = min(len1, len2); @@ -3013,34 +3048,47 @@ void recursive_bisection( NewArray(&xa, len1, 3); NewArray(&ya, len2, 3); - for (int i = 0; i < len1; i++) { - xa[i][0] = xa_full[start1 + i][0]; xa[i][1] = xa_full[start1 + i][1]; xa[i][2] = xa_full[start1 + i][2]; - seqx[i] = seqx_full[start1 + i]; secx[i] = secx_full[start1 + i]; + for (int i = 0; i < len1; i++) + { + xa[i][0] = xa_full[start1 + i][0]; + xa[i][1] = xa_full[start1 + i][1]; + xa[i][2] = xa_full[start1 + i][2]; + seqx[i] = seqx_full[start1 + i]; + secx[i] = secx_full[start1 + i]; } - for (int i = 0; i < len2; i++) { - ya[i][0] = ya_full[start2 + i][0]; ya[i][1] = ya_full[start2 + i][1]; ya[i][2] = ya_full[start2 + i][2]; - seqy[i] = seqy_full[start2 + i]; secy[i] = secy_full[start2 + i]; + for (int i = 0; i < len2; i++) + { + ya[i][0] = ya_full[start2 + i][0]; + ya[i][1] = ya_full[start2 + i][1]; + ya[i][2] = ya_full[start2 + i][2]; + seqy[i] = seqy_full[start2 + i]; + secy[i] = secy_full[start2 + i]; } - seqx[len1] = '\0'; secx[len1] = '\0'; - seqy[len2] = '\0'; secy[len2] = '\0'; + seqx[len1] = '\0'; + secx[len1] = '\0'; + seqy[len2] = '\0'; + secy[len2] = '\0'; // 2. Call core evaluation logic of flexalign (test both ss_opts) double global_max_TM = -1.0; BisectRes best_res; - best_res.start1 = start1; best_res.end1 = end1; - best_res.start2 = start2; best_res.end2 = end2; + best_res.start1 = start1; + best_res.end1 = end1; + best_res.start2 = start2; + best_res.end2 = end2; - for (int cur_ss_opt = 0; cur_ss_opt < 2; cur_ss_opt++) { + for (int cur_ss_opt = 0; cur_ss_opt < 2; cur_ss_opt++) + { FlexAlignResult cur_res; bool force_fast_opt = (min(len1, len2) > 1500) ? true : fast_opt; execute_flexalign_with_fallback( xa, ya, seqx, seqy, secx, secy, len1, len2, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_type, hinge_opt, cur_ss_opt, cur_res - ); + i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_type, hinge_opt, cur_ss_opt, cur_res); double cur_avg_TM = (cur_res.TM1 + cur_res.TM2) / 2.0; - if (cur_avg_TM > global_max_TM) { + if (cur_avg_TM > global_max_TM) + { global_max_TM = cur_avg_TM; best_res.avg_TM = cur_avg_TM; best_res.flex_res = cur_res; @@ -3048,22 +3096,30 @@ void recursive_bisection( } // Clean up current slice memory - delete[] seqx; delete[] secx; delete[] seqy; delete[] secy; - DeleteArray(&xa, len1); DeleteArray(&ya, len2); + delete[] seqx; + delete[] secx; + delete[] seqy; + delete[] secy; + DeleteArray(&xa, len1); + DeleteArray(&ya, len2); // 3. Recursive termination condition - if (best_res.avg_TM >= tm_threshold || shorter_len < min_length) { + if (best_res.avg_TM >= tm_threshold || shorter_len < min_length) + { results.push_back(best_res); return; } // 4. Calculate midpoint and bisect int mid1, mid2; - if (len1 <= len2) { + if (len1 <= len2) + { mid1 = start1 + len1 / 2 - 1; mid2 = map1[mid1]; mid2 = max(start2, min(mid2, end2 - 1)); - } else { + } + else + { mid2 = start2 + len2 / 2 - 1; mid1 = map2[mid2]; mid1 = max(start1, min(mid1, end1 - 1)); @@ -3072,7 +3128,7 @@ void recursive_bisection( recursive_bisection(xa_full, ya_full, seqx_full, seqy_full, secx_full, secy_full, start1, mid1, start2, mid2, map1, map2, Lnorm_ass, tm_threshold, min_length, mol_type, hinge_opt, i_opt, a_opt, u_opt, d_opt, d0_scale, fast_opt, sequence, results); - + recursive_bisection(xa_full, ya_full, seqx_full, seqy_full, secx_full, secy_full, mid1 + 1, end1, mid2 + 1, end2, map1, map2, Lnorm_ass, tm_threshold, min_length, mol_type, hinge_opt, i_opt, a_opt, u_opt, d_opt, d0_scale, fast_opt, sequence, results); @@ -3080,84 +3136,105 @@ void recursive_bisection( // Unified engine replacing flexalign, flexalign_best, and flexalign_bisection int flexalign_unified(string &xname, string &yname, const string &fname_super, - const string &fname_lign, const string &fname_matrix, - vector &sequence, const double Lnorm_ass, const double d0_scale, - const bool m_opt, const int i_opt, const int o_opt, const int a_opt, - const bool u_opt, const bool d_opt, const double TMcut, - const int infmt1_opt, const int infmt2_opt, const int ter_opt, - const int split_opt, const int outfmt_opt, const bool fast_opt, - const int mirror_opt, const int het_opt, const string &atom_opt, - const bool autojustify, const string &mol_opt, const string &dir_opt, - const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, - const vector &chain2parse1, const vector &chain2parse2, - const vector &model2parse1, const vector &model2parse2, - const int byresi_opt, const vector &chain1_list, - const vector &chain2_list, const int hinge_opt, const int ss_opt, - FlexAlignMode mode = FLEX_STANDARD, - double tm_threshold = 0.6, int min_length = 50) + const string &fname_lign, const string &fname_matrix, + vector &sequence, const double Lnorm_ass, const double d0_scale, + const bool m_opt, const int i_opt, const int o_opt, const int a_opt, + const bool u_opt, const bool d_opt, const double TMcut, + const int infmt1_opt, const int infmt2_opt, const int ter_opt, + const int split_opt, const int outfmt_opt, const bool fast_opt, + const int mirror_opt, const int het_opt, const string &atom_opt, + const bool autojustify, const string &mol_opt, const string &dir_opt, + const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, + const vector &chain2parse1, const vector &chain2parse2, + const vector &model2parse1, const vector &model2parse2, + const int byresi_opt, const vector &chain1_list, + const vector &chain2_list, const int hinge_opt, const int ss_opt, + FlexAlignMode mode = FLEX_STANDARD, + double tm_threshold = 0.6, int min_length = 50) { - vector> PDB_lines1; - vector> PDB_lines2; - vector mol_vec1; - vector mol_vec2; - vector chainID_list1; - vector chainID_list2; + vector> PDB_lines1; + vector> PDB_lines2; + vector mol_vec1; + vector mol_vec2; + vector chainID_list1; + vector chainID_list2; int i, j, chain_i, chain_j, r, xlen, ylen, xchainnum, ychainnum; char *seqx, *seqy, *secx, *secy; double **xa, **ya; vector resi_vec1; vector resi_vec2; int read_resi = byresi_opt; - if (byresi_opt == 0 && o_opt) read_resi = 2; + if (byresi_opt == 0 && o_opt) + read_resi = 2; - for (i = 0; i < chain1_list.size(); i++) { + for (i = 0; i < chain1_list.size(); i++) + { xname = chain1_list[i]; xchainnum = get_PDB_lines(xname, PDB_lines1, chainID_list1, mol_vec1, ter_opt, infmt1_opt, atom_opt, autojustify, split_opt, het_opt, chain2parse1, model2parse1); - if (!xchainnum) { + if (!xchainnum) + { cerr << "Warning! Cannot parse file: " << xname << ". Chain number 0." << endl; continue; } - for (chain_i = 0; chain_i < xchainnum; chain_i++) { + for (chain_i = 0; chain_i < xchainnum; chain_i++) + { xlen = PDB_lines1[chain_i].size(); - if (mol_opt == "RNA") mol_vec1[chain_i] = 1; - else if (mol_opt == "protein") mol_vec1[chain_i] = -1; - if (xlen < 3) continue; + if (mol_opt == "RNA") + mol_vec1[chain_i] = 1; + else if (mol_opt == "protein") + mol_vec1[chain_i] = -1; + if (xlen < 3) + continue; NewArray(&xa, xlen, 3); - seqx = new char[xlen + 1]; secx = new char[xlen + 1]; + seqx = new char[xlen + 1]; + secx = new char[xlen + 1]; read_PDB(PDB_lines1[chain_i], xa, seqx, resi_vec1, read_resi); - if (mirror_opt) for (r = 0; r < xlen; r++) xa[r][2] = -xa[r][2]; + if (mirror_opt) + for (r = 0; r < xlen; r++) + xa[r][2] = -xa[r][2]; (mol_vec1[chain_i] > 0) ? make_sec(seqx, xa, xlen, secx, atom_opt) : make_sec(xa, xlen, secx); - for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) { - if (dirpair_opt.size() && i != j) continue; - if (PDB_lines2.size() == 0) { + for (j = (dir_opt.size() > 0) * (i + 1); j < chain2_list.size(); j++) + { + if (dirpair_opt.size() && i != j) + continue; + if (PDB_lines2.size() == 0) + { yname = chain2_list[j]; ychainnum = get_PDB_lines(yname, PDB_lines2, chainID_list2, mol_vec2, ter_opt, infmt2_opt, atom_opt, autojustify, split_opt, het_opt, chain2parse2, model2parse2); - if (!ychainnum) continue; + if (!ychainnum) + continue; } - for (chain_j = 0; chain_j < ychainnum; chain_j++) { + for (chain_j = 0; chain_j < ychainnum; chain_j++) + { ylen = PDB_lines2[chain_j].size(); - if (mol_opt == "RNA") mol_vec2[chain_j] = 1; - else if (mol_opt == "protein") mol_vec2[chain_j] = -1; - if (ylen < 3) continue; + if (mol_opt == "RNA") + mol_vec2[chain_j] = 1; + else if (mol_opt == "protein") + mol_vec2[chain_j] = -1; + if (ylen < 3) + continue; NewArray(&ya, ylen, 3); - seqy = new char[ylen + 1]; secy = new char[ylen + 1]; + seqy = new char[ylen + 1]; + secy = new char[ylen + 1]; read_PDB(PDB_lines2[chain_j], ya, seqy, resi_vec2, read_resi); (mol_vec2[chain_j] > 0) ? make_sec(seqy, ya, ylen, secy, atom_opt) : make_sec(ya, ylen, secy); - if (byresi_opt) extract_aln_from_resi(sequence, seqx, seqy, resi_vec1, resi_vec2, byresi_opt); + if (byresi_opt) + extract_aln_from_resi(sequence, seqx, seqy, resi_vec1, resi_vec2, byresi_opt); // --- CORE DISPATCH LOGIC START --- - if (mode == FLEX_BISECTION) { + if (mode == FLEX_BISECTION) + { int global_short_L = min(xlen, ylen); double cur_Lnorm_ass = u_opt ? Lnorm_ass : global_short_L; - + vector map1, map2; get_full_mapping(string(seqx), string(seqy), map1, map2); @@ -3165,57 +3242,83 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, recursive_bisection( xa, ya, string(seqx), string(seqy), string(secx), string(secy), 0, xlen - 1, 0, ylen - 1, map1, map2, cur_Lnorm_ass, tm_threshold, min_length, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, i_opt, a_opt, - true, d_opt, d0_scale, fast_opt, sequence, results - ); + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, i_opt, a_opt, + true, d_opt, d0_scale, fast_opt, sequence, results); // Result concatenation variables double final_TM_u = 0.0; string final_seqxA = "", final_seqyA = "", final_seqM = ""; vector> final_tu_vec; int final_L_ali = 0, final_n_ali = 0, final_n_ali8 = 0; - + // Using correctly weighted statistical aggregates for accurate flexible properties double final_Liden = 0, sum_TM_ali_L = 0.0, sum_sq_dist_ali = 0.0, sum_sq_dist_0 = 0.0; - for (size_t rIdx = 0; rIdx < results.size(); rIdx++) { - FlexAlignResult& res = results[rIdx].flex_res; - + for (size_t rIdx = 0; rIdx < results.size(); rIdx++) + { + FlexAlignResult &res = results[rIdx].flex_res; + // Accumulate unnormalized TM score component - double tm_raw = res.TM4 * cur_Lnorm_ass; - final_TM_u += tm_raw; + double tm_raw = res.TM4 * cur_Lnorm_ass; + final_TM_u += tm_raw; final_L_ali += res.L_ali; final_n_ali += res.n_ali; final_n_ali8 += res.n_ali8; final_Liden += res.Liden; - + // Correct calculation for overall flexible RMSD utilizing respective block rigid transforms sum_sq_dist_ali += res.L_ali * res.rmsd_ali * res.rmsd_ali; - sum_sq_dist_0 += res.n_ali * res.rmsd0 * res.rmsd0; - + sum_sq_dist_0 += res.n_ali * res.rmsd0 * res.rmsd0; + // Correct calculation to prevent accumulating ratios > 1.0 for TM_ali - sum_TM_ali_L += res.L_ali * res.TM_ali; - - if (rIdx > 0) { + sum_TM_ali_L += res.L_ali * res.TM_ali; + + if (rIdx > 0) + { // CRITICAL SEGFAULT FIX: Using dual '-' gap instead of '*'. // This visually and logically separates the hinge blocks for output_flexalign_results // WITHOUT incrementing the actual sequence position index (rx, ry) out of bounds. - final_seqxA += "-"; - final_seqyA += "-"; - final_seqM += " "; + final_seqxA += "-"; + final_seqyA += "-"; + final_seqM += " "; } - final_seqxA += res.seqxA; - final_seqyA += res.seqyA; - final_seqM += res.seqM; - - for (auto& t : res.tu_vec) final_tu_vec.push_back(t); + final_seqxA += res.seqxA; + final_seqyA += res.seqyA; + int current_tu_offset = final_tu_vec.size(); + string shifted_seqM = res.seqM; + + for (char &c : shifted_seqM) + { + if (c >= '0' && c <= '9') + { + int val = c - '0'; + int new_val = val + current_tu_offset; + + if (new_val <= 9) + { + c = '0' + new_val; + } + else if (new_val < 36) + { + c = 'a' + (new_val - 10); + } + else + { + c = 'A' + (new_val - 36); + } + } + } + final_seqM += shifted_seqM; + + for (auto &t : res.tu_vec) + final_tu_vec.push_back(t); } // Derive true merged mathematical properties double final_rmsd_ali = (final_L_ali > 0) ? sqrt(sum_sq_dist_ali / final_L_ali) : 0.0; - double final_rmsd0 = (final_n_ali > 0) ? sqrt(sum_sq_dist_0 / final_n_ali) : 0.0; - double final_TM_ali = (final_L_ali > 0) ? (sum_TM_ali_L / final_L_ali) : 0.0; + double final_rmsd0 = (final_n_ali > 0) ? sqrt(sum_sq_dist_0 / final_n_ali) : 0.0; + double final_TM_ali = (final_L_ali > 0) ? (sum_TM_ali_L / final_L_ali) : 0.0; // Restore TM1 and TM2 scores utilizing the accumulated component sum double final_TM1 = final_TM_u / xlen; @@ -3223,63 +3326,70 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, double final_TM_norm = final_TM_u / cur_Lnorm_ass; // Take the translation/rotation matrix of the first valid slice as a base placeholder - double best_t0[3] = {0}, best_u0[3][3] = {{1,0,0},{0,1,0},{0,0,1}}; - if (!final_tu_vec.empty()) { - for(int k=0; k<3; k++) for(int l=0; l<3; l++) best_u0[k][l] = final_tu_vec[0][k*3+l]; - for(int k=0; k<3; k++) best_t0[k] = final_tu_vec[0][9+k]; + double best_t0[3] = {0}, best_u0[3][3] = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}; + if (!final_tu_vec.empty()) + { + for (int k = 0; k < 3; k++) + for (int l = 0; l < 3; l++) + best_u0[k][l] = final_tu_vec[0][k * 3 + l]; + for (int k = 0; k < 3; k++) + best_t0[k] = final_tu_vec[0][9 + k]; } // Retain scale constants generated from the very first aligned slice double final_d0_0 = results.empty() ? 0.0 : results[0].flex_res.d0_0; double final_TM_0 = results.empty() ? 0.0 : results[0].flex_res.TM_0; - double final_d0A = results.empty() ? 0.0 : results[0].flex_res.d0A; - double final_d0B = results.empty() ? 0.0 : results[0].flex_res.d0B; - double final_d0a = results.empty() ? 0.0 : results[0].flex_res.d0a; - double final_d0u = results.empty() ? 0.0 : results[0].flex_res.d0u; + double final_d0A = results.empty() ? 0.0 : results[0].flex_res.d0A; + double final_d0B = results.empty() ? 0.0 : results[0].flex_res.d0B; + double final_d0a = results.empty() ? 0.0 : results[0].flex_res.d0a; + double final_d0u = results.empty() ? 0.0 : results[0].flex_res.d0u; double final_d0_out = 5.0; - if (outfmt_opt == 0) print_version(); - + if (outfmt_opt == 0) + print_version(); + output_flexalign_results( xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), chainID_list1[chain_i], chainID_list2[chain_j], - xlen, ylen, best_t0, best_u0, final_tu_vec, + xlen, ylen, best_t0, best_u0, final_tu_vec, final_TM1, final_TM2, final_TM_norm, final_TM_norm, final_TM_norm, - final_rmsd0, final_d0_out, - final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), + final_rmsd0, final_d0_out, + final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), final_Liden, final_n_ali8, final_L_ali, final_TM_ali, final_rmsd_ali, final_TM_0, final_d0_0, final_d0A, final_d0B, cur_Lnorm_ass, d0_scale, final_d0a, final_d0u, (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, false, split_opt, o_opt, - fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, resi_vec1, resi_vec2 - ); - } - else { + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, resi_vec1, resi_vec2); + } + else + { // === Standard & Best specific logic === FlexAlignResult best_res; double global_max_TM = -1.0; int start_ss = (mode == FLEX_BEST) ? 0 : ss_opt; - int end_ss = (mode == FLEX_BEST) ? 1 : ss_opt; - + int end_ss = (mode == FLEX_BEST) ? 1 : ss_opt; + bool force_fast_opt = (getmin(xlen, ylen) > ((mode == FLEX_STANDARD) ? 2000 : 1500)) ? true : fast_opt; - for (int cur_ss_opt = start_ss; cur_ss_opt <= end_ss; cur_ss_opt++) { + for (int cur_ss_opt = start_ss; cur_ss_opt <= end_ss; cur_ss_opt++) + { FlexAlignResult cur_res; execute_flexalign_with_fallback( xa, ya, seqx, seqy, secx, secy, xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_vec1[chain_i] + mol_vec2[chain_j], - hinge_opt, cur_ss_opt, cur_res - ); + i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_vec1[chain_i] + mol_vec2[chain_j], + hinge_opt, cur_ss_opt, cur_res); double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; - if (cur_max_TM > global_max_TM) { + if (cur_max_TM > global_max_TM) + { global_max_TM = cur_max_TM; best_res = cur_res; } } - if (outfmt_opt == 0) print_version(); + if (outfmt_opt == 0) + print_version(); output_flexalign_results( xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), @@ -3298,26 +3408,40 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, // Cleanup memory DeleteArray(&ya, ylen); - delete[] seqy; delete[] secy; + delete[] seqy; + delete[] secy; resi_vec2.clear(); } - if (chain2_list.size() > 1) { + if (chain2_list.size() > 1) + { yname.clear(); - for (chain_j = 0; chain_j < ychainnum; chain_j++) PDB_lines2[chain_j].clear(); - PDB_lines2.clear(); chainID_list2.clear(); mol_vec2.clear(); + for (chain_j = 0; chain_j < ychainnum; chain_j++) + PDB_lines2[chain_j].clear(); + PDB_lines2.clear(); + chainID_list2.clear(); + mol_vec2.clear(); } } PDB_lines1[chain_i].clear(); DeleteArray(&xa, xlen); - delete[] seqx; delete[] secx; + delete[] seqx; + delete[] secx; resi_vec1.clear(); } - xname.clear(); PDB_lines1.clear(); chainID_list1.clear(); mol_vec1.clear(); + xname.clear(); + PDB_lines1.clear(); + chainID_list1.clear(); + mol_vec1.clear(); } - if (chain2_list.size() == 1) { + if (chain2_list.size() == 1) + { yname.clear(); - for (chain_j = 0; chain_j < ychainnum; chain_j++) PDB_lines2[chain_j].clear(); - PDB_lines2.clear(); resi_vec2.clear(); chainID_list2.clear(); mol_vec2.clear(); + for (chain_j = 0; chain_j < ychainnum; chain_j++) + PDB_lines2[chain_j].clear(); + PDB_lines2.clear(); + resi_vec2.clear(); + chainID_list2.clear(); + mol_vec2.clear(); } return 0; } @@ -3326,15 +3450,18 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, // Direct Drop-in Wrappers (No changes needed in main() bindings) // ======================================================================= -int flexalign(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt) { +int flexalign(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt) +{ return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, ss_opt, FLEX_STANDARD); } -int flexalign_best(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) { +int flexalign_best(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) +{ return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt is ignored in BEST mode */, FLEX_BEST); } -int flexalign_bisection(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, double tm_threshold = 0.6, int min_length = 50) { +int flexalign_bisection(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, double tm_threshold = 0.6, int min_length = 50) +{ return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt ignored */, FLEX_BISECTION, tm_threshold, min_length); } diff --git a/flexalign.h b/flexalign.h index ec3d0d5..439ba51 100644 --- a/flexalign.h +++ b/flexalign.h @@ -749,7 +749,20 @@ void output_flexalign_rasmol(const string xname, const string yname, break; } } - resi2hinge_dict[resi_vec1[j]] = hinge_char - '0'; + int hinge_idx = 0; + if (hinge_char >= '0' && hinge_char <= '9') + { + hinge_idx = hinge_char - '0'; + } + else if (hinge_char >= 'a' && hinge_char <= 'z') + { + hinge_idx = hinge_char - 'a' + 10; + } + else if (hinge_char >= 'A' && hinge_char <= 'Z') + { + hinge_idx = hinge_char - 'A' + 36; + } + resi2hinge_dict[resi_vec1[j]] = hinge_idx; } string resi = resi_vec1[0]; int read_resi = resi.size() - 4; @@ -1556,7 +1569,20 @@ void output_flexalign_pymol(const string xname, const string yname, break; } } - resi2hinge_dict[resi_vec1[j]] = hinge_char - '0'; + int hinge_idx = 0; + if (hinge_char >= '0' && hinge_char <= '9') + { + hinge_idx = hinge_char - '0'; + } + else if (hinge_char >= 'a' && hinge_char <= 'z') + { + hinge_idx = hinge_char - 'a' + 10; + } + else if (hinge_char >= 'A' && hinge_char <= 'Z') + { + hinge_idx = hinge_char - 'A' + 36; + } + resi2hinge_dict[resi_vec1[j]] = hinge_idx; } string resi = resi_vec1[0]; int read_resi = resi.size() - 4; From 1601e488c27a9ebb37003a4e9d199a0eea59a5da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Fri, 1 May 2026 09:56:04 +0800 Subject: [PATCH 07/23] add -TMflex --- .gitignore | 1 + USalign.cpp | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 321adfa..4f55d8c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ # binary executables addChainID +biounitasym cif2pdb TMalign TMalignc diff --git a/USalign.cpp b/USalign.cpp index 7890f43..33c0e8e 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -95,6 +95,9 @@ void print_extra_help() //"\n" //" -hinge Maximum number of hinge allowed in flexible alignment. default: 9\n" "\n" + " -TMflex TM-score threshold for bisection flexible alignment\n" + " (-mm 10). Default is 0.6.\n" + "\n" " -se Do not perform superposition. Useful for extracting alignment from\n" " superposed structure pairs\n" "\n" @@ -3378,7 +3381,7 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, int start_ss = (mode == FLEX_BEST) ? 0 : ss_opt; int end_ss = (mode == FLEX_BEST) ? 1 : ss_opt; - bool force_fast_opt = (getmin(xlen, ylen) > ((mode == FLEX_STANDARD) ? 2000 : 1500)) ? true : fast_opt; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; for (int cur_ss_opt = start_ss; cur_ss_opt <= end_ss; cur_ss_opt++) { @@ -3515,6 +3518,7 @@ int main(int argc, char *argv[]) int closeK_opt = -1; // number of atoms for SOI initial alignment. // 5 and 0 for -mm 5 and 6 int hinge_opt = 9; // maximum number of hinge allowed for flexible + double tmflex_opt = 0.6; // TM-score threshold for -mm 10 int mirror_opt = 0; // do not align mirror int het_opt = 0; // do not read HETATM residues int mm_opt = 0; // do not perform MM-align @@ -3643,6 +3647,13 @@ int main(int argc, char *argv[]) hinge_opt = atoi(argv[i + 1]); i++; } + else if (!strcmp(argv[i], "-TMflex")) + { + if (i >= (argc - 1)) + PrintErrorAndQuit("ERROR! Missing value for -TMflex"); + tmflex_opt = atof(argv[i + 1]); + i++; + } else if (!strcmp(argv[i], "-v")) { v_opt = true; @@ -4171,7 +4182,7 @@ int main(int argc, char *argv[]) split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt, 0.7, 50); + byresi_opt, chain1_list, chain2_list, hinge_opt, tmflex_opt, 50); else cerr << "WARNING! -mm " << mm_opt << " not implemented" << endl; From a7f58587c930284911974081e9285e0777c58fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Tue, 12 May 2026 08:12:10 +0800 Subject: [PATCH 08/23] rewrite -mm 10 --- USalign.cpp | 474 +++++++--------------------------------------------- 1 file changed, 58 insertions(+), 416 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 33c0e8e..7a55af3 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -8,17 +8,16 @@ using namespace std; void print_version() { - cout << -"\n" -" ********************************************************************\n" -" * US-align (Version 20260329) *\n" -" * Universal Structure Alignment of Proteins and Nucleic Acids *\n" -" * Reference: C Zhang, L Freddolino, Y Zhang. (2026) Nat Protoc *\n" -" * C Zhang, M Shine, AM Pyle, Y Zhang. (2022) Nat Methods*\n" -" * C Zhang, AM Pyle (2022) iScience. *\n" -" * Please email comments and suggestions to zhang@zhanggroup.org *\n" -" ********************************************************************" - << endl; + cout << "\n" + " ********************************************************************\n" + " * US-align (Version 20260329) *\n" + " * Universal Structure Alignment of Proteins and Nucleic Acids *\n" + " * Reference: C Zhang, L Freddolino, Y Zhang. (2026) Nat Protoc *\n" + " * C Zhang, M Shine, AM Pyle, Y Zhang. (2022) Nat Methods*\n" + " * C Zhang, AM Pyle (2022) iScience. *\n" + " * Please email comments and suggestions to zhang@zhanggroup.org *\n" + " ********************************************************************" + << endl; } void print_extra_help() @@ -95,9 +94,6 @@ void print_extra_help() //"\n" //" -hinge Maximum number of hinge allowed in flexible alignment. default: 9\n" "\n" - " -TMflex TM-score threshold for bisection flexible alignment\n" - " (-mm 10). Default is 0.6.\n" - "\n" " -se Do not perform superposition. Useful for extracting alignment from\n" " superposed structure pairs\n" "\n" @@ -797,18 +793,18 @@ int MMalign(const string &xname, const string &yname, } /* declare TM-score tables */ - int chain1_num=xa_vec.size(); - int chain2_num=ya_vec.size(); - int chain_num =MAX(chain1_num,chain2_num); - vector tmp_str_vec(chain2_num,""); + int chain1_num = xa_vec.size(); + int chain2_num = ya_vec.size(); + int chain_num = MAX(chain1_num, chain2_num); + vector tmp_str_vec(chain2_num, ""); double **TMave_mat; double **ut_mat; // rotation matrices for all-against-all alignment - int ui,uj,ut_idx; - NewArray(&TMave_mat,chain_num,chain_num); - NewArray(&ut_mat,chain1_num*chain2_num,4*3); - vector >seqxA_mat(chain1_num,tmp_str_vec); - vector > seqM_mat(chain1_num,tmp_str_vec); - vector >seqyA_mat(chain1_num,tmp_str_vec); + int ui, uj, ut_idx; + NewArray(&TMave_mat, chain_num, chain_num); + NewArray(&ut_mat, chain1_num * chain2_num, 4 * 3); + vector> seqxA_mat(chain1_num, tmp_str_vec); + vector> seqM_mat(chain1_num, tmp_str_vec); + vector> seqyA_mat(chain1_num, tmp_str_vec); double maxTMmono = -1; int maxTMmono_i, maxTMmono_j; @@ -821,7 +817,8 @@ int MMalign(const string &xname, const string &yname, xlen = xlen_vec[i]; if (xlen < 3) { - for (j=0;j xlen + ylen - 3) { - for (ui=0;ui<3;ui++) for (uj=0;uj<3;uj++) - ut_mat[ut_idx][ui*3+uj]=(ui==uj)?1:0; - for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=0; - TMave_mat[i][j]=TMave_mat[j][i]=0; + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = (ui == uj) ? 1 : 0; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = 0; + TMave_mat[i][j] = TMave_mat[j][i] = 0; seqM.clear(); seqxA.clear(); seqyA.clear(); @@ -948,13 +947,15 @@ int MMalign(const string &xname, const string &yname, mol_vec1[i] + mol_vec2[j], TMcut, 0); /* store result */ - for (ui=0;ui<3;ui++) - for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=u0[ui][uj]; - for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=t0[uj]; - seqxA_mat[i][j]=seqxA; - seqyA_mat[i][j]=seqyA; - TMave_mat[i][j]=TMave_mat[j][i]=TM4*Lnorm_tmp; - if (TMave_mat[i][j]>maxTMmono) + for (ui = 0; ui < 3; ui++) + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][ui * 3 + uj] = u0[ui][uj]; + for (uj = 0; uj < 3; uj++) + ut_mat[ut_idx][9 + uj] = t0[uj]; + seqxA_mat[i][j] = seqxA; + seqyA_mat[i][j] = seqyA; + TMave_mat[i][j] = TMave_mat[j][i] = TM4 * Lnorm_tmp; + if (TMave_mat[i][j] > maxTMmono) { maxTMmono = TMave_mat[i][j]; maxTMmono_i = i; @@ -1028,20 +1029,20 @@ int MMalign(const string &xname, const string &yname, /* refine enhanced greedy search with centroid superposition */ // double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num); homo_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na, ut_mat); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na, ut_mat); - if (chain1_num<=chain2_num) + if (chain1_num <= chain2_num) { hetero_refined_greedy_search(TMave_mat, assign1_list, - assign2_list, chain1_num, chain2_num, xcentroids, - ycentroids, d0MM, len_aa+len_na); + assign2_list, chain1_num, chain2_num, xcentroids, + ycentroids, d0MM, len_aa + len_na); } else { hetero_refined_greedy_search(TMave_mat, assign2_list, - assign1_list, chain2_num, chain1_num, ycentroids, - xcentroids, d0MM, len_aa+len_na); + assign1_list, chain2_num, chain1_num, ycentroids, + xcentroids, d0MM, len_aa + len_na); } /* clean up */ @@ -1196,13 +1197,13 @@ int MMalign(const string &xname, const string &yname, a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2); /* clean up everything */ - delete [] assign1_list; - delete [] assign2_list; - DeleteArray(&TMave_mat,chain_num); - DeleteArray(&ut_mat, chain1_num*chain2_num); - vector >().swap(seqxA_mat); - vector >().swap(seqM_mat); - vector >().swap(seqyA_mat); + delete[] assign1_list; + delete[] assign2_list; + DeleteArray(&TMave_mat, chain_num); + DeleteArray(&ut_mat, chain1_num * chain2_num); + vector>().swap(seqxA_mat); + vector>().swap(seqM_mat); + vector>().swap(seqyA_mat); vector().swap(tmp_str_vec); delete[] assign1_init; @@ -2856,99 +2857,6 @@ int SOIalign(string &xname, string &yname, const string &fname_super, // Data structures and Helpers for flexalign unified pipeline // ======================================================================= -// Needleman-Wunsch sequence alignment and gap filling to generate full mapping -void get_full_mapping(const string &seq1, const string &seq2, vector &map1, vector &map2) -{ - int n = seq1.length(); - int m = seq2.length(); - int match = 2, mismatch = -1, gap = -1; - vector> score(n + 1, vector(m + 1, 0)); - - for (int i = 0; i <= n; i++) - score[i][0] = gap * i; - for (int j = 0; j <= m; j++) - score[0][j] = gap * j; - - for (int i = 1; i <= n; i++) - { - for (int j = 1; j <= m; j++) - { - int diag = score[i - 1][j - 1] + (seq1[i - 1] == seq2[j - 1] ? match : mismatch); - int up = score[i - 1][j] + gap; - int left = score[i][j - 1] + gap; - score[i][j] = max({diag, up, left}); - } - } - - map raw_map1, raw_map2; - int i = n, j = m; - while (i > 0 && j > 0) - { - int current = score[i][j]; - int diag = score[i - 1][j - 1] + (seq1[i - 1] == seq2[j - 1] ? match : mismatch); - int left = score[i][j - 1] + gap; - - if (current == diag) - { - raw_map1[i - 1] = j - 1; - raw_map2[j - 1] = i - 1; - i--; - j--; - } - else if (current == left) - { - j--; - } - else - { - i--; - } - } - - // Fill Gaps: Look left and right for the nearest aligned positions - map1.assign(n, 0); - for (int i = 0; i < n; i++) - { - if (raw_map1.count(i)) - map1[i] = raw_map1[i]; - else - { - int left = i - 1, right = i + 1; - while (left >= 0 && !raw_map1.count(left)) - left--; - while (right < n && !raw_map1.count(right)) - right++; - if (left >= 0 && right < n) - map1[i] = (i - left) <= (right - i) ? raw_map1[left] : raw_map1[right]; - else if (left >= 0) - map1[i] = raw_map1[left]; - else if (right < n) - map1[i] = raw_map1[right]; - } - } - - map2.assign(m, 0); - for (int i = 0; i < m; i++) - { - if (raw_map2.count(i)) - map2[i] = raw_map2[i]; - else - { - int left = i - 1, right = i + 1; - while (left >= 0 && !raw_map2.count(left)) - left--; - while (right < m && !raw_map2.count(right)) - right++; - if (left >= 0 && right < m) - map2[i] = (i - left) <= (right - i) ? raw_map2[left] : raw_map2[right]; - else if (left >= 0) - map2[i] = raw_map2[left]; - else if (right < m) - map2[i] = raw_map2[right]; - } - } -} - // Data structure to hold outputs of flexalign_main to avoid parameter clutter struct FlexAlignResult { @@ -2976,19 +2884,11 @@ struct FlexAlignResult } }; -// Structure to cache the best result of a single slice -struct BisectRes -{ - int start1, end1, start2, end2; - double avg_TM; - FlexAlignResult flex_res; -}; - enum FlexAlignMode { FLEX_STANDARD = 0, FLEX_BEST = 1, - FLEX_BISECTION = 2 + FLEX_FATCAT = 2 }; // Encapsulates the execution of flexalign_main and its fallback refinement logic @@ -3036,115 +2936,6 @@ void execute_flexalign_with_fallback( } } -void recursive_bisection( - double **xa_full, double **ya_full, const string &seqx_full, const string &seqy_full, - const string &secx_full, const string &secy_full, - int start1, int end1, int start2, int end2, - const vector &map1, const vector &map2, - double Lnorm_ass, double tm_threshold, int min_length, - int mol_type, int hinge_opt, int i_opt, int a_opt, bool u_opt, bool d_opt, - double d0_scale, bool fast_opt, vector &sequence, - vector &results) -{ - int len1 = end1 - start1 + 1; - int len2 = end2 - start2 + 1; - int shorter_len = min(len1, len2); - - // 1. Construct memory slices - double **xa, **ya; - char *seqx = new char[len1 + 1]; - char *secx = new char[len1 + 1]; - char *seqy = new char[len2 + 1]; - char *secy = new char[len2 + 1]; - NewArray(&xa, len1, 3); - NewArray(&ya, len2, 3); - - for (int i = 0; i < len1; i++) - { - xa[i][0] = xa_full[start1 + i][0]; - xa[i][1] = xa_full[start1 + i][1]; - xa[i][2] = xa_full[start1 + i][2]; - seqx[i] = seqx_full[start1 + i]; - secx[i] = secx_full[start1 + i]; - } - for (int i = 0; i < len2; i++) - { - ya[i][0] = ya_full[start2 + i][0]; - ya[i][1] = ya_full[start2 + i][1]; - ya[i][2] = ya_full[start2 + i][2]; - seqy[i] = seqy_full[start2 + i]; - secy[i] = secy_full[start2 + i]; - } - seqx[len1] = '\0'; - secx[len1] = '\0'; - seqy[len2] = '\0'; - secy[len2] = '\0'; - - // 2. Call core evaluation logic of flexalign (test both ss_opts) - double global_max_TM = -1.0; - BisectRes best_res; - best_res.start1 = start1; - best_res.end1 = end1; - best_res.start2 = start2; - best_res.end2 = end2; - - for (int cur_ss_opt = 0; cur_ss_opt < 2; cur_ss_opt++) - { - FlexAlignResult cur_res; - bool force_fast_opt = (min(len1, len2) > 1500) ? true : fast_opt; - - execute_flexalign_with_fallback( - xa, ya, seqx, seqy, secx, secy, len1, len2, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, mol_type, hinge_opt, cur_ss_opt, cur_res); - - double cur_avg_TM = (cur_res.TM1 + cur_res.TM2) / 2.0; - if (cur_avg_TM > global_max_TM) - { - global_max_TM = cur_avg_TM; - best_res.avg_TM = cur_avg_TM; - best_res.flex_res = cur_res; - } - } - - // Clean up current slice memory - delete[] seqx; - delete[] secx; - delete[] seqy; - delete[] secy; - DeleteArray(&xa, len1); - DeleteArray(&ya, len2); - - // 3. Recursive termination condition - if (best_res.avg_TM >= tm_threshold || shorter_len < min_length) - { - results.push_back(best_res); - return; - } - - // 4. Calculate midpoint and bisect - int mid1, mid2; - if (len1 <= len2) - { - mid1 = start1 + len1 / 2 - 1; - mid2 = map1[mid1]; - mid2 = max(start2, min(mid2, end2 - 1)); - } - else - { - mid2 = start2 + len2 / 2 - 1; - mid1 = map2[mid2]; - mid1 = max(start1, min(mid1, end1 - 1)); - } - - recursive_bisection(xa_full, ya_full, seqx_full, seqy_full, secx_full, secy_full, - start1, mid1, start2, mid2, map1, map2, Lnorm_ass, tm_threshold, min_length, - mol_type, hinge_opt, i_opt, a_opt, u_opt, d_opt, d0_scale, fast_opt, sequence, results); - - recursive_bisection(xa_full, ya_full, seqx_full, seqy_full, secx_full, secy_full, - mid1 + 1, end1, mid2 + 1, end2, map1, map2, Lnorm_ass, tm_threshold, min_length, - mol_type, hinge_opt, i_opt, a_opt, u_opt, d_opt, d0_scale, fast_opt, sequence, results); -} - // Unified engine replacing flexalign, flexalign_best, and flexalign_bisection int flexalign_unified(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, @@ -3160,8 +2951,7 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt, - FlexAlignMode mode = FLEX_STANDARD, - double tm_threshold = 0.6, int min_length = 50) + FlexAlignMode mode = FLEX_STANDARD) { vector> PDB_lines1; vector> PDB_lines2; @@ -3241,136 +3031,9 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, extract_aln_from_resi(sequence, seqx, seqy, resi_vec1, resi_vec2, byresi_opt); // --- CORE DISPATCH LOGIC START --- - if (mode == FLEX_BISECTION) + if (mode == FLEX_FATCAT) { - int global_short_L = min(xlen, ylen); - double cur_Lnorm_ass = u_opt ? Lnorm_ass : global_short_L; - - vector map1, map2; - get_full_mapping(string(seqx), string(seqy), map1, map2); - - vector results; - recursive_bisection( - xa, ya, string(seqx), string(seqy), string(secx), string(secy), - 0, xlen - 1, 0, ylen - 1, map1, map2, cur_Lnorm_ass, tm_threshold, min_length, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, i_opt, a_opt, - true, d_opt, d0_scale, fast_opt, sequence, results); - - // Result concatenation variables - double final_TM_u = 0.0; - string final_seqxA = "", final_seqyA = "", final_seqM = ""; - vector> final_tu_vec; - int final_L_ali = 0, final_n_ali = 0, final_n_ali8 = 0; - - // Using correctly weighted statistical aggregates for accurate flexible properties - double final_Liden = 0, sum_TM_ali_L = 0.0, sum_sq_dist_ali = 0.0, sum_sq_dist_0 = 0.0; - - for (size_t rIdx = 0; rIdx < results.size(); rIdx++) - { - FlexAlignResult &res = results[rIdx].flex_res; - - // Accumulate unnormalized TM score component - double tm_raw = res.TM4 * cur_Lnorm_ass; - final_TM_u += tm_raw; - - final_L_ali += res.L_ali; - final_n_ali += res.n_ali; - final_n_ali8 += res.n_ali8; - final_Liden += res.Liden; - - // Correct calculation for overall flexible RMSD utilizing respective block rigid transforms - sum_sq_dist_ali += res.L_ali * res.rmsd_ali * res.rmsd_ali; - sum_sq_dist_0 += res.n_ali * res.rmsd0 * res.rmsd0; - - // Correct calculation to prevent accumulating ratios > 1.0 for TM_ali - sum_TM_ali_L += res.L_ali * res.TM_ali; - - if (rIdx > 0) - { - // CRITICAL SEGFAULT FIX: Using dual '-' gap instead of '*'. - // This visually and logically separates the hinge blocks for output_flexalign_results - // WITHOUT incrementing the actual sequence position index (rx, ry) out of bounds. - final_seqxA += "-"; - final_seqyA += "-"; - final_seqM += " "; - } - final_seqxA += res.seqxA; - final_seqyA += res.seqyA; - int current_tu_offset = final_tu_vec.size(); - string shifted_seqM = res.seqM; - - for (char &c : shifted_seqM) - { - if (c >= '0' && c <= '9') - { - int val = c - '0'; - int new_val = val + current_tu_offset; - - if (new_val <= 9) - { - c = '0' + new_val; - } - else if (new_val < 36) - { - c = 'a' + (new_val - 10); - } - else - { - c = 'A' + (new_val - 36); - } - } - } - final_seqM += shifted_seqM; - - for (auto &t : res.tu_vec) - final_tu_vec.push_back(t); - } - - // Derive true merged mathematical properties - double final_rmsd_ali = (final_L_ali > 0) ? sqrt(sum_sq_dist_ali / final_L_ali) : 0.0; - double final_rmsd0 = (final_n_ali > 0) ? sqrt(sum_sq_dist_0 / final_n_ali) : 0.0; - double final_TM_ali = (final_L_ali > 0) ? (sum_TM_ali_L / final_L_ali) : 0.0; - - // Restore TM1 and TM2 scores utilizing the accumulated component sum - double final_TM1 = final_TM_u / xlen; - double final_TM2 = final_TM_u / ylen; - double final_TM_norm = final_TM_u / cur_Lnorm_ass; - - // Take the translation/rotation matrix of the first valid slice as a base placeholder - double best_t0[3] = {0}, best_u0[3][3] = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}; - if (!final_tu_vec.empty()) - { - for (int k = 0; k < 3; k++) - for (int l = 0; l < 3; l++) - best_u0[k][l] = final_tu_vec[0][k * 3 + l]; - for (int k = 0; k < 3; k++) - best_t0[k] = final_tu_vec[0][9 + k]; - } - - // Retain scale constants generated from the very first aligned slice - double final_d0_0 = results.empty() ? 0.0 : results[0].flex_res.d0_0; - double final_TM_0 = results.empty() ? 0.0 : results[0].flex_res.TM_0; - double final_d0A = results.empty() ? 0.0 : results[0].flex_res.d0A; - double final_d0B = results.empty() ? 0.0 : results[0].flex_res.d0B; - double final_d0a = results.empty() ? 0.0 : results[0].flex_res.d0a; - double final_d0u = results.empty() ? 0.0 : results[0].flex_res.d0u; - double final_d0_out = 5.0; - - if (outfmt_opt == 0) - print_version(); - - output_flexalign_results( - xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), - yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), - chainID_list1[chain_i], chainID_list2[chain_j], - xlen, ylen, best_t0, best_u0, final_tu_vec, - final_TM1, final_TM2, final_TM_norm, final_TM_norm, final_TM_norm, - final_rmsd0, final_d0_out, - final_seqM.c_str(), final_seqxA.c_str(), final_seqyA.c_str(), - final_Liden, final_n_ali8, final_L_ali, final_TM_ali, final_rmsd_ali, - final_TM_0, final_d0_0, final_d0A, final_d0B, cur_Lnorm_ass, d0_scale, final_d0a, final_d0u, - (m_opt ? fname_matrix : "").c_str(), outfmt_opt, ter_opt, false, split_opt, o_opt, - fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, resi_vec1, resi_vec2); + continue; } else { @@ -3471,11 +3134,6 @@ int flexalign_best(string &xname, string &yname, const string &fname_super, cons return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt is ignored in BEST mode */, FLEX_BEST); } -int flexalign_bisection(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, double tm_threshold = 0.6, int min_length = 50) -{ - return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt ignored */, FLEX_BISECTION, tm_threshold, min_length); -} - int main(int argc, char *argv[]) { if (argc < 2) @@ -3518,7 +3176,6 @@ int main(int argc, char *argv[]) int closeK_opt = -1; // number of atoms for SOI initial alignment. // 5 and 0 for -mm 5 and 6 int hinge_opt = 9; // maximum number of hinge allowed for flexible - double tmflex_opt = 0.6; // TM-score threshold for -mm 10 int mirror_opt = 0; // do not align mirror int het_opt = 0; // do not read HETATM residues int mm_opt = 0; // do not perform MM-align @@ -3647,13 +3304,6 @@ int main(int argc, char *argv[]) hinge_opt = atoi(argv[i + 1]); i++; } - else if (!strcmp(argv[i], "-TMflex")) - { - if (i >= (argc - 1)) - PrintErrorAndQuit("ERROR! Missing value for -TMflex"); - tmflex_opt = atof(argv[i + 1]); - i++; - } else if (!strcmp(argv[i], "-v")) { v_opt = true; @@ -4175,14 +3825,6 @@ int main(int argc, char *argv[]) atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt); - else if (mm_opt == 10) - flexalign_bisection(xname, yname, fname_super, fname_lign, - fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, - a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt, tmflex_opt, 50); else cerr << "WARNING! -mm " << mm_opt << " not implemented" << endl; From 692f13ad6b31fd39e1069ebdeea4c26d4a6d7a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Tue, 12 May 2026 18:12:55 +0800 Subject: [PATCH 09/23] rewrite -mm 10 --- USalign.cpp | 674 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 672 insertions(+), 2 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 7a55af3..4614ea3 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2936,7 +2936,631 @@ void execute_flexalign_with_fallback( } } -// Unified engine replacing flexalign, flexalign_best, and flexalign_bisection + +// ========================================== +// FATCAT Core Algorithm (flexalign_fatcat_main) +// ========================================== +struct FATCAT_AFP { + int i, j, len; + double score; + double R[3][3]; + double t[3]; +}; + +int flexalign_fatcat_main(double **xa, double **ya, + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], vector> &tu_vec, + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + string &seqM, string &seqxA, string &seqyA, vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt, + int sparse_val = 0) +{ + // FATCAT base parameters + int fragLen = 8; + double rmsdCut = 3.0; + double badRmsd = 4.0; + double resScore = 3.0; + double gap_ext = -0.5; + double disCut = 5.0; + double disSmooth = 4.0; + double twist_pen = -25.0; + int max_twists = 9; + int max_gap = 40; + double max_penalty = -5.0; + int misCut = 2 * fragLen; + int maxGapFrag = fragLen + max_gap; + double afp_dis_cut = fragLen * fragLen * (disCut * disCut); + + // ========================================== + // Step 1: Extract initial AFPs in batches + // ========================================== + vector initial_afps; + int step = sparse_val + 1; + + // Optimization: Use stack memory for the tight O(N^2) loop + double r1_static[8][3], r2_static[8][3]; + double *r1[8], *r2[8]; + for (int k = 0; k < 8; k++) { + r1[k] = r1_static[k]; + r2[k] = r2_static[k]; + } + + for (int i = 0; i <= xlen - fragLen; i += step) { + for (int j = 0; j <= ylen - fragLen; j += step) { + int d3_term = min(i, j) + min(xlen - (i + fragLen), ylen - (j + fragLen)) + fragLen; + if (d3_term < 0.3 * min(xlen, ylen)) continue; + + // Explicit Euclidean distance math + double dx1 = xa[i + fragLen - 1][0] - xa[i][0]; + double dy1 = xa[i + fragLen - 1][1] - xa[i][1]; + double dz1 = xa[i + fragLen - 1][2] - xa[i][2]; + double d1 = sqrt(dx1*dx1 + dy1*dy1 + dz1*dz1); + + double dx2 = ya[j + fragLen - 1][0] - ya[j][0]; + double dy2 = ya[j + fragLen - 1][1] - ya[j][1]; + double dz2 = ya[j + fragLen - 1][2] - ya[j][2]; + double d2 = sqrt(dx2*dx2 + dy2*dy2 + dz2*dz2); + + // Use fabs() instead of abs() + if (fabs(d1 - d2) > 2.0 * rmsdCut) continue; + + for (int k = 0; k < fragLen; k++) { + r1[k][0] = xa[i + k][0]; r1[k][1] = xa[i + k][1]; r1[k][2] = xa[i + k][2]; + r2[k][0] = ya[j + k][0]; r2[k][1] = ya[j + k][1]; r2[k][2] = ya[j + k][2]; + } + + // Mode=0 to compute correct error, then map to RMSD manually + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1, r2, fragLen, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = sqrt(rms_sum_sq / fragLen); + + if (rmsd_tmp < rmsdCut) { + FATCAT_AFP afp; + afp.i = i; afp.j = j; afp.len = fragLen; + afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / badRmsd) * (rmsd_tmp / badRmsd)); + for (int a = 0; a < 3; a++) { + afp.t[a] = t_tmp[a]; + for (int b = 0; b < 3; b++) afp.R[a][b] = u_tmp[a][b]; + } + initial_afps.push_back(afp); + } + } + } + + // ========================================== + // Step 2: Merge diagonal AFPs + // ========================================== + map> diagonals; + for (size_t k = 0; k < initial_afps.size(); k++) { + diagonals[initial_afps[k].i - initial_afps[k].j].push_back(initial_afps[k]); + } + + vector merged_afps; + for (map>::iterator it = diagonals.begin(); it != diagonals.end(); ++it) { + vector& group = it->second; + for (size_t a = 0; a < group.size(); a++) { + for (size_t b = a + 1; b < group.size(); b++) { + if (group[b].i < group[a].i) { + FATCAT_AFP tmp = group[a]; group[a] = group[b]; group[b] = tmp; + } + } + } + int n_group = group.size(); + vector invalid(n_group, false); + for (int idx = 0; idx < n_group; idx++) { + if (invalid[idx]) continue; + FATCAT_AFP curr = group[idx]; + for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) { + FATCAT_AFP nxt = group[nxt_idx]; + if (nxt.i > curr.i + curr.len) break; + + if (nxt.i + nxt.len > curr.i + curr.len) { + int new_len = (nxt.i + nxt.len) - curr.i; + double **r1_merge, **r2_merge; + NewArray(&r1_merge, new_len, 3); + NewArray(&r2_merge, new_len, 3); + for (int k = 0; k < new_len; k++) { + r1_merge[k][0] = xa[curr.i + k][0]; r1_merge[k][1] = xa[curr.i + k][1]; r1_merge[k][2] = xa[curr.i + k][2]; + r2_merge[k][0] = ya[curr.j + k][0]; r2_merge[k][1] = ya[curr.j + k][1]; r2_merge[k][2] = ya[curr.j + k][2]; + } + + // Mode=0 to compute correct error + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1_merge, r2_merge, new_len, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = sqrt(rms_sum_sq / new_len); + + DeleteArray(&r1_merge, new_len); DeleteArray(&r2_merge, new_len); + + if (rmsd_tmp < rmsdCut) { + curr.len = new_len; + for (int a = 0; a < 3; a++) { + curr.t[a] = t_tmp[a]; + for (int b = 0; b < 3; b++) curr.R[a][b] = u_tmp[a][b]; + } + curr.score = resScore * new_len * (1.0 - (rmsd_tmp / badRmsd) * (rmsd_tmp / badRmsd)); + invalid[nxt_idx] = true; + } + } + } + merged_afps.push_back(curr); + } + } + + for (size_t a = 0; a < merged_afps.size(); a++) { + for (size_t b = a + 1; b < merged_afps.size(); b++) { + if (merged_afps[b].i < merged_afps[a].i || (merged_afps[b].i == merged_afps[a].i && merged_afps[b].j < merged_afps[a].j)) { + FATCAT_AFP tmp = merged_afps[a]; merged_afps[a] = merged_afps[b]; merged_afps[b] = tmp; + } + } + } + int n_afps = merged_afps.size(); + if (n_afps == 0) return 0; + + // ========================================== + // Step 3: Global Dynamic Programming (DP) + // ========================================== + vector> afp_aft_index(xlen, vector(ylen, -1)); + vector> afp_bef_index(xlen, vector(ylen, -1)); + map>> i_to_j; + + for (int m = 0; m < n_afps; m++) { + i_to_j[merged_afps[m].i].push_back(make_pair(merged_afps[m].j, m)); + } + + for (map>>::iterator it = i_to_j.begin(); it != i_to_j.end(); ++it) { + int i_val = it->first; + for (size_t p = 0; p < it->second.size(); p++) { + afp_aft_index[i_val][it->second[p].first] = it->second[p].second; + afp_bef_index[i_val][it->second[p].first] = it->second[p].second; + } + int curr_bef = -1; + for (int j_val = 0; j_val < ylen; j_val++) { + if (afp_bef_index[i_val][j_val] != -1) curr_bef = afp_bef_index[i_val][j_val]; + else afp_bef_index[i_val][j_val] = curr_bef; + } + int curr_aft = -1; + for (int j_val = ylen - 1; j_val >= 0; j_val--) { + if (afp_aft_index[i_val][j_val] != -1) curr_aft = afp_aft_index[i_val][j_val]; + else afp_aft_index[i_val][j_val] = curr_aft; + } + } + + vector sco(n_afps); + vector twi(n_afps, 0); + vector pre(n_afps, -1); + for (int m = 0; m < n_afps; m++) sco[m] = merged_afps[m].score; + + for (int m = 0; m < n_afps; m++) { + int curr_i = merged_afps[m].i; + int curr_j = merged_afps[m].j; + int a3 = curr_i - fragLen; + int a2 = max(0, a3 - misCut); + int a1 = max(0, curr_i - maxGapFrag); + int b3 = curr_j - fragLen; + int b2 = max(0, b3 - misCut); + int b1 = max(0, curr_j - maxGapFrag); + + vector valid_prevs; + for (int step = 0; step < 2; step++) { + int a_s, a_e, b_s, b_e; + if (step == 0) { a_s = max(a1, 0); a_e = min(a3, xlen - 1); b_s = max(b2, 0); b_e = min(b3, ylen - 1); } + else { a_s = max(a2, 0); a_e = min(a3, xlen - 1); b_s = max(b1, 0); b_e = min(b2 - 1, ylen - 1); } + + if (b_s >= ylen || b_e < 0) continue; + for (int prev_i = a_s; prev_i <= a_e; prev_i++) { + int s1 = afp_aft_index[prev_i][b_s]; + int s2 = afp_bef_index[prev_i][b_e]; + if (s1 != -1 && s2 != -1 && s1 <= s2) { + for (int s = s1; s <= s2; s++) valid_prevs.push_back(s); + } + } + } + + double curr_sco = merged_afps[m].score; + for (size_t v = 0; v < valid_prevs.size(); v++) { + int prev = valid_prevs[v]; + int prev_twi = twi[prev]; + if (prev_twi > max_twists) continue; + + int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); + int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); + int m_gap = max(gap_i, gap_j); + int m_mis = 0; + if (gap_i < 0 || gap_j < 0) m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + + double gp = gap_ext * m_mis; + if (m_gap > 0) gp += gap_ext * m_gap; + if (gp < max_penalty) gp = max_penalty; + + // Explicit Euclidean math for distance differences + double rms_sq = 0; + for (int k = 0; k < fragLen; k++) { + for (int l = 0; l < fragLen; l++) { + double dx1 = xa[curr_i + k][0] - xa[merged_afps[prev].i + l][0]; + double dy1 = xa[curr_i + k][1] - xa[merged_afps[prev].i + l][1]; + double dz1 = xa[curr_i + k][2] - xa[merged_afps[prev].i + l][2]; + double dist1 = sqrt(dx1*dx1 + dy1*dy1 + dz1*dz1); + + double dx2 = ya[curr_j + k][0] - ya[merged_afps[prev].j + l][0]; + double dy2 = ya[curr_j + k][1] - ya[merged_afps[prev].j + l][1]; + double dz2 = ya[curr_j + k][2] - ya[merged_afps[prev].j + l][2]; + double dist2 = sqrt(dx2*dx2 + dy2*dy2 + dz2*dz2); + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } + } + + double tp = 0.0; + int is_twist = 0; + if (rms_sq >= afp_dis_cut) { + tp = twist_pen; is_twist = 1; + } else { + double dvar = sqrt(rms_sq / (fragLen * fragLen)); + if (dvar > disCut - disSmooth) tp = twist_pen * sqrt((dvar - disCut + disSmooth) / disSmooth); + } + + if (prev_twi + is_twist > max_twists) continue; + + double stmp = sco[prev] + curr_sco + tp + gp; + if (stmp > sco[m]) { + sco[m] = stmp; pre[m] = prev; twi[m] = prev_twi + is_twist; + } + } + } + + int best_m = 0; + for (int m = 1; m < n_afps; m++) if (sco[m] > sco[best_m]) best_m = m; + + vector path; + int curr_m = best_m; + while (curr_m != -1) { path.push_back(curr_m); curr_m = pre[curr_m]; } + reverse(path.begin(), path.end()); + + // ========================================== + // Step 4: Split structure based on twists + // ========================================== + vector> blocks; + vector curr_block; + curr_block.push_back(merged_afps[path[0]]); + for (size_t k = 1; k < path.size(); k++) { + FATCAT_AFP curr = merged_afps[path[k]]; + FATCAT_AFP prv = merged_afps[path[k - 1]]; + + double rms_sq = 0; + for (int i_idx = 0; i_idx < fragLen; i_idx++) { + for (int j_idx = 0; j_idx < fragLen; j_idx++) { + double dx1 = xa[curr.i + i_idx][0] - xa[prv.i + j_idx][0]; + double dy1 = xa[curr.i + i_idx][1] - xa[prv.i + j_idx][1]; + double dz1 = xa[curr.i + i_idx][2] - xa[prv.i + j_idx][2]; + double dist1 = sqrt(dx1*dx1 + dy1*dy1 + dz1*dz1); + + double dx2 = ya[curr.j + i_idx][0] - ya[prv.j + j_idx][0]; + double dy2 = ya[curr.j + i_idx][1] - ya[prv.j + j_idx][1]; + double dz2 = ya[curr.j + i_idx][2] - ya[prv.j + j_idx][2]; + double dist2 = sqrt(dx2*dx2 + dy2*dy2 + dz2*dz2); + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } + } + + double dvar = (rms_sq > afp_dis_cut) ? 1e9 : sqrt(rms_sq / (fragLen * fragLen)); + if (dvar >= disCut) { blocks.push_back(curr_block); curr_block.clear(); } + curr_block.push_back(curr); + } + blocks.push_back(curr_block); + + struct Region { int s1, e1, s2, e2; }; + vector real_blocks; + int last_i = 0, last_j = 0; + + for (size_t b = 0; b < blocks.size(); b++) { + int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; + for (size_t a = 0; a < blocks[b].size(); a++) { + FATCAT_AFP afp = blocks[b][a]; + int skip = max(max(last_i - afp.i, last_j - afp.j), 0); + if (skip >= afp.len) continue; + + int eff_i = afp.i + skip; + int eff_j = afp.j + skip; + int eff_L = afp.len - skip; + if (b_s1 == -1) { b_s1 = eff_i; b_s2 = eff_j; } + b_e1 = eff_i + eff_L; b_e2 = eff_j + eff_L; + last_i = b_e1; last_j = b_e2; + } + if (b_s1 != -1) { + Region r = {b_s1, b_e1, b_s2, b_e2}; + real_blocks.push_back(r); + } + } + if (real_blocks.empty()) return 0; + + // Calculate bounds using middle split strategy (0-based) + vector bounds1, bounds2; + bounds1.push_back(0); bounds2.push_back(0); + for (size_t k = 0; k < real_blocks.size() - 1; k++) { + bounds1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); + bounds2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); + } + bounds1.push_back(xlen); bounds2.push_back(ylen); + + // ========================================== + // [DEBUG] TEMPORARY DEBUG OUTPUT + // ========================================== + cout << "\n========================================" << endl; + cout << "PDB1 Interval: "; + for (size_t k = 0; k < bounds1.size() - 1; k++) { + cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; + if (k < bounds1.size() - 2) cout << ","; + } + cout << "\nPDB2 Interval: "; + for (size_t k = 0; k < bounds2.size() - 1; k++) { + cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; + if (k < bounds2.size() - 2) cout << ","; + } + cout << "\n========================================\n" << endl; + + // ========================================== + // Step 5: Iteratively align each block using TRUE flexalign_best logic + // ========================================== + string global_seqM = "", global_seqxA = "", global_seqyA = ""; + tu_vec.clear(); + + // Array to map each global residue explicitly to its underlying rotation matrix + vector global_res_tu(xlen, -1); + + for (size_t k = 0; k < bounds1.size() - 1; k++) { + int x_s = bounds1[k], x_e = bounds1[k + 1]; + int y_s = bounds2[k], y_e = bounds2[k + 1]; + int L1_sub = x_e - x_s; + int L2_sub = y_e - y_s; + + // Pad unaligned sequences to prevent coordinate desynchronization + if (L1_sub < 3 || L2_sub < 3) { + for (int i = 0; i < L1_sub; i++) { + global_seqxA += seqx[x_s + i]; global_seqyA += '-'; global_seqM += ' '; + } + for (int i = 0; i < L2_sub; i++) { + global_seqxA += '-'; global_seqyA += seqy[y_s + i]; global_seqM += ' '; + } + continue; + } + + double **xa_sub, **ya_sub; + NewArray(&xa_sub, L1_sub, 3); + NewArray(&ya_sub, L2_sub, 3); + char *seqx_sub = new char[L1_sub + 1]; + char *seqy_sub = new char[L2_sub + 1]; + char *secx_sub = new char[L1_sub + 1]; + char *secy_sub = new char[L2_sub + 1]; + + for (int i = 0; i < L1_sub; i++) { + xa_sub[i][0] = xa[x_s + i][0]; xa_sub[i][1] = xa[x_s + i][1]; xa_sub[i][2] = xa[x_s + i][2]; + seqx_sub[i] = seqx[x_s + i]; secx_sub[i] = secx[x_s + i]; + } + seqx_sub[L1_sub] = '\0'; secx_sub[L1_sub] = '\0'; + + for (int i = 0; i < L2_sub; i++) { + ya_sub[i][0] = ya[y_s + i][0]; ya_sub[i][1] = ya[y_s + i][1]; ya_sub[i][2] = ya[y_s + i][2]; + seqy_sub[i] = seqy[y_s + i]; secy_sub[i] = secy[y_s + i]; + } + seqy_sub[L2_sub] = '\0'; secy_sub[L2_sub] = '\0'; + + double t0_best[3], u0_best[3][3]; + double TM_best_max = -1.0; + string seqM_best, seqxA_best, seqyA_best; + vector> tu_vec_best; + + bool force_fast_opt = (getmin(L1_sub, L2_sub) > 1500) ? true : fast_opt; + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) { + double t0_s[3], u0_s[3][3]; + vector> tu_vec_s; + double TM1_s=0, TM2_s=0, TM3_s=0, TM4_s=0, TM5_s=0; + double d0_0_s=0, TM_0_s=0, d0A_s=0, d0B_s=0, d0u_s=0, d0a_s=0, d0_out_s=5.0; + string seqM_s, seqxA_s, seqyA_s; + vector do_vec_s; + double rmsd0_s=0; int L_ali_s=0; double Liden_s=0; + double TM_ali_s=0, rmsd_ali_s=0; int n_ali_s=0, n_ali8_s=0; + + flexalign_main( + xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, + t0_s, u0_s, tu_vec_s, TM1_s, TM2_s, TM3_s, TM4_s, TM5_s, + d0_0_s, TM_0_s, d0A_s, d0B_s, d0u_s, d0a_s, d0_out_s, + seqM_s, seqxA_s, seqyA_s, do_vec_s, + rmsd0_s, L_ali_s, Liden_s, TM_ali_s, rmsd_ali_s, n_ali_s, n_ali8_s, + L1_sub, L2_sub, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, cur_ss_opt); + + double cur_max_TM = (TM1_s > TM2_s) ? TM1_s : TM2_s; + if (cur_max_TM > TM_best_max) { + TM_best_max = cur_max_TM; + for(int a=0; a<3; a++) { + t0_best[a] = t0_s[a]; + for(int b=0; b<3; b++) u0_best[a][b] = u0_s[a][b]; + } + seqM_best = seqM_s; + seqxA_best = seqxA_s; + seqyA_best = seqyA_s; + tu_vec_best = tu_vec_s; + } + } + + if (TM_best_max < 0) { + for (int i = 0; i < L1_sub; i++) { + global_seqxA += seqx_sub[i]; global_seqyA += '-'; global_seqM += ' '; + } + for (int i = 0; i < L2_sub; i++) { + global_seqxA += '-'; global_seqyA += seqy_sub[i]; global_seqM += ' '; + } + DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; + continue; + } + + if (tu_vec_best.empty()) { + vector tu_tmp(12); + t_u2tu(t0_best, u0_best, tu_tmp); + tu_vec_best.push_back(tu_tmp); + } + + int base_tu_idx = tu_vec.size(); + for (size_t m = 0; m < tu_vec_best.size(); m++) { + tu_vec.push_back(tu_vec_best[m]); + } + + // ========================================== + // NEW FIX: Global numbering logic for 0-9, a-z, A-Z + // ========================================== + int rx = x_s; + for (size_t i = 0; i < seqxA_best.length(); i++) { + int current_global_idx = base_tu_idx; + + // Extract the true internal matrix map + if (seqxA_best[i] != '-') { + char c = seqM_best[i]; + if (c != ' ') { + int local_hinge_idx = -1; + if (c >= '1' && c <= '9') local_hinge_idx = c - '1'; // 1-based flexalign -> 0-based offset + else if (c >= 'a' && c <= 'z') local_hinge_idx = c - 'a' + 9; + + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) { + current_global_idx = base_tu_idx + local_hinge_idx; + } + } + global_res_tu[rx] = current_global_idx; + rx++; + } else { + // Determine ID strictly for the visual formatting of Y-insertions + char c = seqM_best[i]; + if (c != ' ') { + int local_hinge_idx = -1; + if (c >= '1' && c <= '9') local_hinge_idx = c - '1'; + else if (c >= 'a' && c <= 'z') local_hinge_idx = c - 'a' + 9; + + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) { + current_global_idx = base_tu_idx + local_hinge_idx; + } + } + } + + // Re-label matched areas starting sequentially from '0' + if (seqxA_best[i] != '-' && seqyA_best[i] != '-') { + char global_c; + if (current_global_idx < 10) global_c = '0' + current_global_idx; + else if (current_global_idx < 36) global_c = 'a' + (current_global_idx - 10); + else if (current_global_idx < 62) global_c = 'A' + (current_global_idx - 36); + else global_c = '*'; + + seqM_best[i] = global_c; + } else { + seqM_best[i] = ' '; // Standard gap alignment remains blank + } + } + + global_seqM += seqM_best; + global_seqxA += seqxA_best; + global_seqyA += seqyA_best; + + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; + } + + // ========================================== + // Step 6: Recalculate global metrics correctly + // ========================================== + seqM = global_seqM; + seqxA = global_seqxA; + seqyA = global_seqyA; + + d0A = 1.24 * pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; + if (d0A < 0.5) d0A = 0.5; + d0B = 1.24 * pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; + if (d0B < 0.5) d0B = 0.5; + d0a = 1.24 * pow((xlen + ylen) * 0.5 - 15.0, 1.0 / 3.0) - 1.8; + if (d0a < 0.5) d0a = 0.5; + if (u_opt) { + d0u = 1.24 * pow(Lnorm_ass - 15.0, 1.0 / 3.0) - 1.8; + if (d0u < 0.5) d0u = 0.5; + } + + TM1 = TM2 = TM3 = TM4 = TM5 = rmsd0 = 0.0; + Liden = 0; n_ali8 = 0; n_ali = 0; + do_vec.clear(); + + int i_res = 0, j_res = 0; + for (size_t r = 0; r < seqxA.length(); r++) { + bool x_valid = (seqxA[r] != '-'); + bool y_valid = (seqyA[r] != '-'); + + if (x_valid && y_valid) { + if (seqxA[r] == seqyA[r]) Liden++; + + int matrix_idx = global_res_tu[i_res]; + + if (matrix_idx >= 0 && matrix_idx < tu_vec.size()) { + double t_k[3], u_k[3][3]; + tu2t_u(tu_vec[matrix_idx], t_k, u_k); + + double x_rot[3]; + x_rot[0] = t_k[0] + u_k[0][0]*xa[i_res][0] + u_k[0][1]*xa[i_res][1] + u_k[0][2]*xa[i_res][2]; + x_rot[1] = t_k[1] + u_k[1][0]*xa[i_res][0] + u_k[1][1]*xa[i_res][1] + u_k[1][2]*xa[i_res][2]; + x_rot[2] = t_k[2] + u_k[2][0]*xa[i_res][0] + u_k[2][1]*xa[i_res][1] + u_k[2][2]*xa[i_res][2]; + + double dist2 = dist(x_rot, ya[j_res]); + double d = sqrt(dist2); + + TM2 += 1.0 / (1.0 + dist2 / (d0B * d0B)); + TM1 += 1.0 / (1.0 + dist2 / (d0A * d0A)); + if (a_opt) TM3 += 1.0 / (1.0 + dist2 / (d0a * d0a)); + if (u_opt) TM4 += 1.0 / (1.0 + dist2 / (d0u * d0u)); + if (d_opt) TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); + + n_ali++; + do_vec.push_back(d); + + if (d <= d0_out) { + rmsd0 += dist2; + n_ali8++; + } + } else { + do_vec.push_back(-1); + } + } else { + do_vec.push_back(-1); + } + + if (x_valid) i_res++; + if (y_valid) j_res++; + } + + TM2 /= xlen; + TM1 /= ylen; + if (a_opt) TM3 /= (xlen + ylen) * 0.5; + if (u_opt) TM4 /= Lnorm_ass; + if (d_opt) TM5 /= ylen; + + if (n_ali8 > 0) rmsd0 = sqrt(rmsd0 / n_ali8); + else rmsd0 = 0.0; + + L_ali = n_ali; + TM_ali = TM1; + rmsd_ali = rmsd0; + + if (!tu_vec.empty()) tu2t_u(tu_vec[0], t0, u0); + + return tu_vec.size(); +} + +// Unified engine replacing flexalign, flexalign_best, and flexalign_fatcat int flexalign_unified(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, @@ -3033,7 +3657,40 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, // --- CORE DISPATCH LOGIC START --- if (mode == FLEX_FATCAT) { - continue; + FlexAlignResult fatcat_res; + bool force_fast_opt = (getmin(xlen, ylen) > 1500) ? true : fast_opt; + + fatcat_res.hingeNum = flexalign_fatcat_main( + xa, ya, seqx, seqy, secx, secy, + fatcat_res.t0, fatcat_res.u0, fatcat_res.tu_vec, + fatcat_res.TM1, fatcat_res.TM2, fatcat_res.TM3, fatcat_res.TM4, fatcat_res.TM5, + fatcat_res.d0_0, fatcat_res.TM_0, + fatcat_res.d0A, fatcat_res.d0B, fatcat_res.d0u, fatcat_res.d0a, fatcat_res.d0_out, + fatcat_res.seqM, fatcat_res.seqxA, fatcat_res.seqyA, fatcat_res.do_vec, + fatcat_res.rmsd0, fatcat_res.L_ali, fatcat_res.Liden, + fatcat_res.TM_ali, fatcat_res.rmsd_ali, fatcat_res.n_ali, fatcat_res.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt, 0 /* sparse_val */ + ); + + if (outfmt_opt == 0) + print_version(); + output_flexalign_results( + xname.substr(dir1_opt.size() + dir_opt.size() + dirpair_opt.size()), + yname.substr(dir2_opt.size() + dir_opt.size() + dirpair_opt.size()), + chainID_list1[chain_i], chainID_list2[chain_j], + xlen, ylen, fatcat_res.t0, fatcat_res.u0, fatcat_res.tu_vec, + fatcat_res.TM1, fatcat_res.TM2, fatcat_res.TM3, fatcat_res.TM4, fatcat_res.TM5, + fatcat_res.rmsd0, fatcat_res.d0_out, fatcat_res.seqM.c_str(), + fatcat_res.seqxA.c_str(), fatcat_res.seqyA.c_str(), fatcat_res.Liden, + fatcat_res.n_ali8, fatcat_res.L_ali, fatcat_res.TM_ali, fatcat_res.rmsd_ali, + fatcat_res.TM_0, fatcat_res.d0_0, + fatcat_res.d0A, fatcat_res.d0B, Lnorm_ass, d0_scale, fatcat_res.d0a, fatcat_res.d0u, + (m_opt ? fname_matrix : "").c_str(), + outfmt_opt, ter_opt, false, split_opt, o_opt, + fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt, + resi_vec1, resi_vec2); } else { @@ -3134,6 +3791,11 @@ int flexalign_best(string &xname, string &yname, const string &fname_super, cons return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt is ignored in BEST mode */, FLEX_BEST); } +int flexalign_fatcat(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) +{ + return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt ignore */, FLEX_FATCAT); +} + int main(int argc, char *argv[]) { if (argc < 2) @@ -3825,6 +4487,14 @@ int main(int argc, char *argv[]) atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt); + else if (mm_opt == 10) + flexalign_fatcat(xname, yname, fname_super, fname_lign, + fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, + a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, + split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, + dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, + byresi_opt, chain1_list, chain2_list, hinge_opt); else cerr << "WARNING! -mm " << mm_opt << " not implemented" << endl; From 610fcbe13c9c54be46d4bf30d1a34115101057d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Tue, 12 May 2026 20:11:45 +0800 Subject: [PATCH 10/23] rewrite -mm 10 --- USalign.cpp | 913 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 580 insertions(+), 333 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 4614ea3..8a63739 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2936,11 +2936,11 @@ void execute_flexalign_with_fallback( } } - // ========================================== // FATCAT Core Algorithm (flexalign_fatcat_main) // ========================================== -struct FATCAT_AFP { +struct FATCAT_AFP +{ int i, j, len; double score; double R[3][3]; @@ -2948,20 +2948,20 @@ struct FATCAT_AFP { }; int flexalign_fatcat_main(double **xa, double **ya, - const char *seqx, const char *seqy, const char *secx, const char *secy, - double t0[3], double u0[3][3], vector> &tu_vec, - double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, - double &d0_0, double &TM_0, - double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, - string &seqM, string &seqxA, string &seqyA, vector &do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const int hinge_opt, const int ss_opt, - int sparse_val = 0) + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], std::vector> &tu_vec, + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + std::string &seqM, std::string &seqxA, std::string &seqyA, std::vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const std::vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt, + int sparse_val = 0) { // FATCAT base parameters int fragLen = 8; @@ -2979,56 +2979,89 @@ int flexalign_fatcat_main(double **xa, double **ya, int maxGapFrag = fragLen + max_gap; double afp_dis_cut = fragLen * fragLen * (disCut * disCut); + // ========================================== + // OPTIMIZATION 1: Precompute local intra-protein distance matrices + // This entirely eliminates the costly sqrt() calls inside the O(N^2 * fragLen^2) DP loop. + // ========================================== + int max_dist_window = max_gap + 2 * fragLen + 1; + std::vector> disTable1(xlen, std::vector(max_dist_window, 0.0)); + std::vector> disTable2(ylen, std::vector(max_dist_window, 0.0)); + + for (int i = 0; i < xlen; i++) + { + for (int j = i; j < std::min(xlen, i + max_dist_window); j++) + { + double dx = xa[i][0] - xa[j][0]; + double dy = xa[i][1] - xa[j][1]; + double dz = xa[i][2] - xa[j][2]; + disTable1[i][j - i] = std::sqrt(dx * dx + dy * dy + dz * dz); + } + } + for (int i = 0; i < ylen; i++) + { + for (int j = i; j < std::min(ylen, i + max_dist_window); j++) + { + double dx = ya[i][0] - ya[j][0]; + double dy = ya[i][1] - ya[j][1]; + double dz = ya[i][2] - ya[j][2]; + disTable2[i][j - i] = std::sqrt(dx * dx + dy * dy + dz * dz); + } + } + // ========================================== // Step 1: Extract initial AFPs in batches // ========================================== - vector initial_afps; + std::vector initial_afps; int step = sparse_val + 1; - - // Optimization: Use stack memory for the tight O(N^2) loop + double r1_static[8][3], r2_static[8][3]; double *r1[8], *r2[8]; - for (int k = 0; k < 8; k++) { + for (int k = 0; k < 8; k++) + { r1[k] = r1_static[k]; r2[k] = r2_static[k]; } - for (int i = 0; i <= xlen - fragLen; i += step) { - for (int j = 0; j <= ylen - fragLen; j += step) { - int d3_term = min(i, j) + min(xlen - (i + fragLen), ylen - (j + fragLen)) + fragLen; - if (d3_term < 0.3 * min(xlen, ylen)) continue; - - // Explicit Euclidean distance math - double dx1 = xa[i + fragLen - 1][0] - xa[i][0]; - double dy1 = xa[i + fragLen - 1][1] - xa[i][1]; - double dz1 = xa[i + fragLen - 1][2] - xa[i][2]; - double d1 = sqrt(dx1*dx1 + dy1*dy1 + dz1*dz1); + for (int i = 0; i <= xlen - fragLen; i += step) + { + for (int j = 0; j <= ylen - fragLen; j += step) + { + int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen), ylen - (j + fragLen)) + fragLen; + if (d3_term < 0.3 * std::min(xlen, ylen)) + continue; - double dx2 = ya[j + fragLen - 1][0] - ya[j][0]; - double dy2 = ya[j + fragLen - 1][1] - ya[j][1]; - double dz2 = ya[j + fragLen - 1][2] - ya[j][2]; - double d2 = sqrt(dx2*dx2 + dy2*dy2 + dz2*dz2); + double dist1 = disTable1[i][fragLen - 1]; // Precomputed end-to-end distance + double dist2 = disTable2[j][fragLen - 1]; - // Use fabs() instead of abs() - if (fabs(d1 - d2) > 2.0 * rmsdCut) continue; + if (std::fabs(dist1 - dist2) > 2.0 * rmsdCut) + continue; - for (int k = 0; k < fragLen; k++) { - r1[k][0] = xa[i + k][0]; r1[k][1] = xa[i + k][1]; r1[k][2] = xa[i + k][2]; - r2[k][0] = ya[j + k][0]; r2[k][1] = ya[j + k][1]; r2[k][2] = ya[j + k][2]; + for (int k = 0; k < fragLen; k++) + { + r1[k][0] = xa[i + k][0]; + r1[k][1] = xa[i + k][1]; + r1[k][2] = xa[i + k][2]; + r2[k][0] = ya[j + k][0]; + r2[k][1] = ya[j + k][1]; + r2[k][2] = ya[j + k][2]; } - - // Mode=0 to compute correct error, then map to RMSD manually + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; Kabsch(r1, r2, fragLen, 0, &rms_sum_sq, t_tmp, u_tmp); - double rmsd_tmp = sqrt(rms_sum_sq / fragLen); + double rmsd_tmp = std::sqrt(rms_sum_sq / fragLen); - if (rmsd_tmp < rmsdCut) { + if (rmsd_tmp < rmsdCut) + { FATCAT_AFP afp; - afp.i = i; afp.j = j; afp.len = fragLen; + afp.i = i; + afp.j = j; + afp.len = fragLen; afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / badRmsd) * (rmsd_tmp / badRmsd)); - for (int a = 0; a < 3; a++) { + for (int a = 0; a < 3; a++) + { afp.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) afp.R[a][b] = u_tmp[a][b]; + for (int b = 0; b < 3; b++) + afp.R[a][b] = u_tmp[a][b]; } initial_afps.push_back(afp); } @@ -3038,52 +3071,72 @@ int flexalign_fatcat_main(double **xa, double **ya, // ========================================== // Step 2: Merge diagonal AFPs // ========================================== - map> diagonals; - for (size_t k = 0; k < initial_afps.size(); k++) { - diagonals[initial_afps[k].i - initial_afps[k].j].push_back(initial_afps[k]); + // OPTIMIZATION 2: Flat vector instead of std::map + int max_diagonal_idx = xlen + ylen + 1; + std::vector> diagonals(max_diagonal_idx); + for (size_t k = 0; k < initial_afps.size(); k++) + { + diagonals[initial_afps[k].i - initial_afps[k].j + ylen].push_back(initial_afps[k]); } - - vector merged_afps; - for (map>::iterator it = diagonals.begin(); it != diagonals.end(); ++it) { - vector& group = it->second; - for (size_t a = 0; a < group.size(); a++) { - for (size_t b = a + 1; b < group.size(); b++) { - if (group[b].i < group[a].i) { - FATCAT_AFP tmp = group[a]; group[a] = group[b]; group[b] = tmp; - } - } - } + + std::vector merged_afps; + + // OPTIMIZATION 4: Pre-allocate max buffers for merge checking + int max_merge_len = std::min(xlen, ylen); + double **r1_merge, **r2_merge; + NewArray(&r1_merge, max_merge_len, 3); + NewArray(&r2_merge, max_merge_len, 3); + + for (int d = 0; d < max_diagonal_idx; d++) + { + if (diagonals[d].empty()) + continue; + std::vector &group = diagonals[d]; + + // OPTIMIZATION 3: O(N log N) std::sort instead of O(N^2) loops + std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { return a.i < b.i; }); + int n_group = group.size(); - vector invalid(n_group, false); - for (int idx = 0; idx < n_group; idx++) { - if (invalid[idx]) continue; + std::vector invalid(n_group, false); + for (int idx = 0; idx < n_group; idx++) + { + if (invalid[idx]) + continue; FATCAT_AFP curr = group[idx]; - for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) { + for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) + { FATCAT_AFP nxt = group[nxt_idx]; - if (nxt.i > curr.i + curr.len) break; - - if (nxt.i + nxt.len > curr.i + curr.len) { + if (nxt.i > curr.i + curr.len) + break; + + if (nxt.i + nxt.len > curr.i + curr.len) + { int new_len = (nxt.i + nxt.len) - curr.i; - double **r1_merge, **r2_merge; - NewArray(&r1_merge, new_len, 3); - NewArray(&r2_merge, new_len, 3); - for (int k = 0; k < new_len; k++) { - r1_merge[k][0] = xa[curr.i + k][0]; r1_merge[k][1] = xa[curr.i + k][1]; r1_merge[k][2] = xa[curr.i + k][2]; - r2_merge[k][0] = ya[curr.j + k][0]; r2_merge[k][1] = ya[curr.j + k][1]; r2_merge[k][2] = ya[curr.j + k][2]; + + // Directly use pre-allocated buffers + for (int k = 0; k < new_len; k++) + { + r1_merge[k][0] = xa[curr.i + k][0]; + r1_merge[k][1] = xa[curr.i + k][1]; + r1_merge[k][2] = xa[curr.i + k][2]; + r2_merge[k][0] = ya[curr.j + k][0]; + r2_merge[k][1] = ya[curr.j + k][1]; + r2_merge[k][2] = ya[curr.j + k][2]; } - - // Mode=0 to compute correct error + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; Kabsch(r1_merge, r2_merge, new_len, 0, &rms_sum_sq, t_tmp, u_tmp); - double rmsd_tmp = sqrt(rms_sum_sq / new_len); - - DeleteArray(&r1_merge, new_len); DeleteArray(&r2_merge, new_len); + double rmsd_tmp = std::sqrt(rms_sum_sq / new_len); - if (rmsd_tmp < rmsdCut) { + if (rmsd_tmp < rmsdCut) + { curr.len = new_len; - for (int a = 0; a < 3; a++) { + for (int a = 0; a < 3; a++) + { curr.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) curr.R[a][b] = u_tmp[a][b]; + for (int b = 0; b < 3; b++) + curr.R[a][b] = u_tmp[a][b]; } curr.score = resScore * new_len * (1.0 - (rmsd_tmp / badRmsd) * (rmsd_tmp / badRmsd)); invalid[nxt_idx] = true; @@ -3093,106 +3146,150 @@ int flexalign_fatcat_main(double **xa, double **ya, merged_afps.push_back(curr); } } - - for (size_t a = 0; a < merged_afps.size(); a++) { - for (size_t b = a + 1; b < merged_afps.size(); b++) { - if (merged_afps[b].i < merged_afps[a].i || (merged_afps[b].i == merged_afps[a].i && merged_afps[b].j < merged_afps[a].j)) { - FATCAT_AFP tmp = merged_afps[a]; merged_afps[a] = merged_afps[b]; merged_afps[b] = tmp; - } - } - } + DeleteArray(&r1_merge, max_merge_len); + DeleteArray(&r2_merge, max_merge_len); + + // Sort final merged afps + std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { + if (a.i == b.i) return a.j < b.j; + return a.i < b.i; }); + int n_afps = merged_afps.size(); - if (n_afps == 0) return 0; + if (n_afps == 0) + return 0; // ========================================== // Step 3: Global Dynamic Programming (DP) // ========================================== - vector> afp_aft_index(xlen, vector(ylen, -1)); - vector> afp_bef_index(xlen, vector(ylen, -1)); - map>> i_to_j; - - for (int m = 0; m < n_afps; m++) { - i_to_j[merged_afps[m].i].push_back(make_pair(merged_afps[m].j, m)); + // OPTIMIZATION 5: Flat 1D vectors for 2D DP cache + std::vector afp_aft_index(xlen * ylen, -1); + std::vector afp_bef_index(xlen * ylen, -1); + + // Flat mapping instead of std::map + std::vector>> i_to_j(xlen); + for (int m = 0; m < n_afps; m++) + { + i_to_j[merged_afps[m].i].push_back(std::make_pair(merged_afps[m].j, m)); } - for (map>>::iterator it = i_to_j.begin(); it != i_to_j.end(); ++it) { - int i_val = it->first; - for (size_t p = 0; p < it->second.size(); p++) { - afp_aft_index[i_val][it->second[p].first] = it->second[p].second; - afp_bef_index[i_val][it->second[p].first] = it->second[p].second; + for (int i_val = 0; i_val < xlen; i_val++) + { + if (i_to_j[i_val].empty()) + continue; + for (size_t p = 0; p < i_to_j[i_val].size(); p++) + { + int j_val = i_to_j[i_val][p].first; + afp_aft_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; + afp_bef_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; } int curr_bef = -1; - for (int j_val = 0; j_val < ylen; j_val++) { - if (afp_bef_index[i_val][j_val] != -1) curr_bef = afp_bef_index[i_val][j_val]; - else afp_bef_index[i_val][j_val] = curr_bef; + for (int j_val = 0; j_val < ylen; j_val++) + { + if (afp_bef_index[i_val * ylen + j_val] != -1) + curr_bef = afp_bef_index[i_val * ylen + j_val]; + else + afp_bef_index[i_val * ylen + j_val] = curr_bef; } int curr_aft = -1; - for (int j_val = ylen - 1; j_val >= 0; j_val--) { - if (afp_aft_index[i_val][j_val] != -1) curr_aft = afp_aft_index[i_val][j_val]; - else afp_aft_index[i_val][j_val] = curr_aft; + for (int j_val = ylen - 1; j_val >= 0; j_val--) + { + if (afp_aft_index[i_val * ylen + j_val] != -1) + curr_aft = afp_aft_index[i_val * ylen + j_val]; + else + afp_aft_index[i_val * ylen + j_val] = curr_aft; } } - vector sco(n_afps); - vector twi(n_afps, 0); - vector pre(n_afps, -1); - for (int m = 0; m < n_afps; m++) sco[m] = merged_afps[m].score; + std::vector sco(n_afps); + std::vector twi(n_afps, 0); + std::vector pre(n_afps, -1); + for (int m = 0; m < n_afps; m++) + sco[m] = merged_afps[m].score; - for (int m = 0; m < n_afps; m++) { + for (int m = 0; m < n_afps; m++) + { int curr_i = merged_afps[m].i; int curr_j = merged_afps[m].j; int a3 = curr_i - fragLen; - int a2 = max(0, a3 - misCut); - int a1 = max(0, curr_i - maxGapFrag); + int a2 = std::max(0, a3 - misCut); + int a1 = std::max(0, curr_i - maxGapFrag); int b3 = curr_j - fragLen; - int b2 = max(0, b3 - misCut); - int b1 = max(0, curr_j - maxGapFrag); + int b2 = std::max(0, b3 - misCut); + int b1 = std::max(0, curr_j - maxGapFrag); - vector valid_prevs; - for (int step = 0; step < 2; step++) { + std::vector valid_prevs; + for (int step = 0; step < 2; step++) + { int a_s, a_e, b_s, b_e; - if (step == 0) { a_s = max(a1, 0); a_e = min(a3, xlen - 1); b_s = max(b2, 0); b_e = min(b3, ylen - 1); } - else { a_s = max(a2, 0); a_e = min(a3, xlen - 1); b_s = max(b1, 0); b_e = min(b2 - 1, ylen - 1); } - - if (b_s >= ylen || b_e < 0) continue; - for (int prev_i = a_s; prev_i <= a_e; prev_i++) { - int s1 = afp_aft_index[prev_i][b_s]; - int s2 = afp_bef_index[prev_i][b_e]; - if (s1 != -1 && s2 != -1 && s1 <= s2) { - for (int s = s1; s <= s2; s++) valid_prevs.push_back(s); + if (step == 0) + { + a_s = std::max(a1, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b2, 0); + b_e = std::min(b3, ylen - 1); + } + else + { + a_s = std::max(a2, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b1, 0); + b_e = std::min(b2 - 1, ylen - 1); + } + + if (b_s >= ylen || b_e < 0) + continue; + for (int prev_i = a_s; prev_i <= a_e; prev_i++) + { + int s1 = afp_aft_index[prev_i * ylen + b_s]; + int s2 = afp_bef_index[prev_i * ylen + b_e]; + if (s1 != -1 && s2 != -1 && s1 <= s2) + { + for (int s = s1; s <= s2; s++) + valid_prevs.push_back(s); } } } double curr_sco = merged_afps[m].score; - for (size_t v = 0; v < valid_prevs.size(); v++) { + for (size_t v = 0; v < valid_prevs.size(); v++) + { int prev = valid_prevs[v]; int prev_twi = twi[prev]; - if (prev_twi > max_twists) continue; + if (prev_twi > max_twists) + continue; int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); - int m_gap = max(gap_i, gap_j); + int m_gap = std::max(gap_i, gap_j); int m_mis = 0; - if (gap_i < 0 || gap_j < 0) m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + if (gap_i < 0 || gap_j < 0) + m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; double gp = gap_ext * m_mis; - if (m_gap > 0) gp += gap_ext * m_gap; - if (gp < max_penalty) gp = max_penalty; + if (m_gap > 0) + gp += gap_ext * m_gap; + if (gp < max_penalty) + gp = max_penalty; - // Explicit Euclidean math for distance differences + // USE PRECOMPUTED DISTANCES - O(1) inside loop double rms_sq = 0; - for (int k = 0; k < fragLen; k++) { - for (int l = 0; l < fragLen; l++) { - double dx1 = xa[curr_i + k][0] - xa[merged_afps[prev].i + l][0]; - double dy1 = xa[curr_i + k][1] - xa[merged_afps[prev].i + l][1]; - double dz1 = xa[curr_i + k][2] - xa[merged_afps[prev].i + l][2]; - double dist1 = sqrt(dx1*dx1 + dy1*dy1 + dz1*dz1); - - double dx2 = ya[curr_j + k][0] - ya[merged_afps[prev].j + l][0]; - double dy2 = ya[curr_j + k][1] - ya[merged_afps[prev].j + l][1]; - double dz2 = ya[curr_j + k][2] - ya[merged_afps[prev].j + l][2]; - double dist2 = sqrt(dx2*dx2 + dy2*dy2 + dz2*dz2); + for (int k = 0; k < fragLen; k++) + { + for (int l = 0; l < fragLen; l++) + { + double dist1, dist2; + int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } @@ -3200,139 +3297,196 @@ int flexalign_fatcat_main(double **xa, double **ya, double tp = 0.0; int is_twist = 0; - if (rms_sq >= afp_dis_cut) { - tp = twist_pen; is_twist = 1; - } else { - double dvar = sqrt(rms_sq / (fragLen * fragLen)); - if (dvar > disCut - disSmooth) tp = twist_pen * sqrt((dvar - disCut + disSmooth) / disSmooth); + if (rms_sq >= afp_dis_cut) + { + tp = twist_pen; + is_twist = 1; + } + else + { + double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); + if (dvar > disCut - disSmooth) + tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); } - if (prev_twi + is_twist > max_twists) continue; + if (prev_twi + is_twist > max_twists) + continue; double stmp = sco[prev] + curr_sco + tp + gp; - if (stmp > sco[m]) { - sco[m] = stmp; pre[m] = prev; twi[m] = prev_twi + is_twist; + if (stmp > sco[m]) + { + sco[m] = stmp; + pre[m] = prev; + twi[m] = prev_twi + is_twist; } } } int best_m = 0; - for (int m = 1; m < n_afps; m++) if (sco[m] > sco[best_m]) best_m = m; + for (int m = 1; m < n_afps; m++) + if (sco[m] > sco[best_m]) + best_m = m; - vector path; + std::vector path; int curr_m = best_m; - while (curr_m != -1) { path.push_back(curr_m); curr_m = pre[curr_m]; } - reverse(path.begin(), path.end()); + while (curr_m != -1) + { + path.push_back(curr_m); + curr_m = pre[curr_m]; + } + std::reverse(path.begin(), path.end()); // ========================================== // Step 4: Split structure based on twists // ========================================== - vector> blocks; - vector curr_block; + std::vector> blocks; + std::vector curr_block; curr_block.push_back(merged_afps[path[0]]); - for (size_t k = 1; k < path.size(); k++) { + for (size_t k = 1; k < path.size(); k++) + { FATCAT_AFP curr = merged_afps[path[k]]; FATCAT_AFP prv = merged_afps[path[k - 1]]; - + + // USE PRECOMPUTED DISTANCES - O(1) inside loop double rms_sq = 0; - for (int i_idx = 0; i_idx < fragLen; i_idx++) { - for (int j_idx = 0; j_idx < fragLen; j_idx++) { - double dx1 = xa[curr.i + i_idx][0] - xa[prv.i + j_idx][0]; - double dy1 = xa[curr.i + i_idx][1] - xa[prv.i + j_idx][1]; - double dz1 = xa[curr.i + i_idx][2] - xa[prv.i + j_idx][2]; - double dist1 = sqrt(dx1*dx1 + dy1*dy1 + dz1*dz1); - - double dx2 = ya[curr.j + i_idx][0] - ya[prv.j + j_idx][0]; - double dy2 = ya[curr.j + i_idx][1] - ya[prv.j + j_idx][1]; - double dz2 = ya[curr.j + i_idx][2] - ya[prv.j + j_idx][2]; - double dist2 = sqrt(dx2*dx2 + dy2*dy2 + dz2*dz2); + for (int i_idx = 0; i_idx < fragLen; i_idx++) + { + for (int j_idx = 0; j_idx < fragLen; j_idx++) + { + double dist1, dist2; + int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } } - - double dvar = (rms_sq > afp_dis_cut) ? 1e9 : sqrt(rms_sq / (fragLen * fragLen)); - if (dvar >= disCut) { blocks.push_back(curr_block); curr_block.clear(); } + + double dvar = (rms_sq > afp_dis_cut) ? 1e9 : std::sqrt(rms_sq / (fragLen * fragLen)); + if (dvar >= disCut) + { + blocks.push_back(curr_block); + curr_block.clear(); + } curr_block.push_back(curr); } blocks.push_back(curr_block); - struct Region { int s1, e1, s2, e2; }; - vector real_blocks; + struct Region + { + int s1, e1, s2, e2; + }; + std::vector real_blocks; int last_i = 0, last_j = 0; - - for (size_t b = 0; b < blocks.size(); b++) { + + for (size_t b = 0; b < blocks.size(); b++) + { int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; - for (size_t a = 0; a < blocks[b].size(); a++) { + for (size_t a = 0; a < blocks[b].size(); a++) + { FATCAT_AFP afp = blocks[b][a]; - int skip = max(max(last_i - afp.i, last_j - afp.j), 0); - if (skip >= afp.len) continue; - + int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); + if (skip >= afp.len) + continue; + int eff_i = afp.i + skip; int eff_j = afp.j + skip; int eff_L = afp.len - skip; - if (b_s1 == -1) { b_s1 = eff_i; b_s2 = eff_j; } - b_e1 = eff_i + eff_L; b_e2 = eff_j + eff_L; - last_i = b_e1; last_j = b_e2; + if (b_s1 == -1) + { + b_s1 = eff_i; + b_s2 = eff_j; + } + b_e1 = eff_i + eff_L; + b_e2 = eff_j + eff_L; + last_i = b_e1; + last_j = b_e2; } - if (b_s1 != -1) { + if (b_s1 != -1) + { Region r = {b_s1, b_e1, b_s2, b_e2}; real_blocks.push_back(r); } } - if (real_blocks.empty()) return 0; + if (real_blocks.empty()) + return 0; - // Calculate bounds using middle split strategy (0-based) - vector bounds1, bounds2; - bounds1.push_back(0); bounds2.push_back(0); - for (size_t k = 0; k < real_blocks.size() - 1; k++) { + std::vector bounds1, bounds2; + bounds1.push_back(0); + bounds2.push_back(0); + for (size_t k = 0; k < real_blocks.size() - 1; k++) + { bounds1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); bounds2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); } - bounds1.push_back(xlen); bounds2.push_back(ylen); - + bounds1.push_back(xlen); + bounds2.push_back(ylen); // ========================================== // [DEBUG] TEMPORARY DEBUG OUTPUT // ========================================== - cout << "\n========================================" << endl; - cout << "PDB1 Interval: "; - for (size_t k = 0; k < bounds1.size() - 1; k++) { - cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; - if (k < bounds1.size() - 2) cout << ","; - } - cout << "\nPDB2 Interval: "; - for (size_t k = 0; k < bounds2.size() - 1; k++) { - cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; - if (k < bounds2.size() - 2) cout << ","; - } - cout << "\n========================================\n" << endl; - + // cout << "\n========================================" << endl; + // cout << "PDB1 Interval: "; + // for (size_t k = 0; k < bounds1.size() - 1; k++) + // { + // cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; + // if (k < bounds1.size() - 2) + // cout << ","; + // } + // cout << "\nPDB2 Interval: "; + // for (size_t k = 0; k < bounds2.size() - 1; k++) + // { + // cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; + // if (k < bounds2.size() - 2) + // cout << ","; + // } + // cout << "\n========================================\n" + // << endl; // ========================================== // Step 5: Iteratively align each block using TRUE flexalign_best logic // ========================================== - string global_seqM = "", global_seqxA = "", global_seqyA = ""; + std::string global_seqM = "", global_seqxA = "", global_seqyA = ""; + // OPTIMIZATION 6: String capacity reservation to avoid reallocation overhead + global_seqM.reserve(xlen + ylen + max_gap); + global_seqxA.reserve(xlen + ylen + max_gap); + global_seqyA.reserve(xlen + ylen + max_gap); + tu_vec.clear(); - - // Array to map each global residue explicitly to its underlying rotation matrix - vector global_res_tu(xlen, -1); + std::vector global_res_tu(xlen, -1); - for (size_t k = 0; k < bounds1.size() - 1; k++) { + for (size_t k = 0; k < bounds1.size() - 1; k++) + { int x_s = bounds1[k], x_e = bounds1[k + 1]; int y_s = bounds2[k], y_e = bounds2[k + 1]; int L1_sub = x_e - x_s; int L2_sub = y_e - y_s; - // Pad unaligned sequences to prevent coordinate desynchronization - if (L1_sub < 3 || L2_sub < 3) { - for (int i = 0; i < L1_sub; i++) { - global_seqxA += seqx[x_s + i]; global_seqyA += '-'; global_seqM += ' '; + // If the sub-region is too short, just fill with gaps + if (L1_sub < 3 || L2_sub < 3) + { + for (int i = 0; i < L1_sub; i++) + { + global_seqxA += seqx[x_s + i]; + global_seqyA += '-'; + global_seqM += ' '; } - for (int i = 0; i < L2_sub; i++) { - global_seqxA += '-'; global_seqyA += seqy[y_s + i]; global_seqM += ' '; + for (int i = 0; i < L2_sub; i++) + { + global_seqxA += '-'; + global_seqyA += seqy[y_s + i]; + global_seqM += ' '; } continue; } + // Allocate memory for sub-structures double **xa_sub, **ya_sub; NewArray(&xa_sub, L1_sub, 3); NewArray(&ya_sub, L2_sub, 3); @@ -3341,33 +3495,52 @@ int flexalign_fatcat_main(double **xa, double **ya, char *secx_sub = new char[L1_sub + 1]; char *secy_sub = new char[L2_sub + 1]; - for (int i = 0; i < L1_sub; i++) { - xa_sub[i][0] = xa[x_s + i][0]; xa_sub[i][1] = xa[x_s + i][1]; xa_sub[i][2] = xa[x_s + i][2]; - seqx_sub[i] = seqx[x_s + i]; secx_sub[i] = secx[x_s + i]; + // Copy data for structure 1 + for (int i = 0; i < L1_sub; i++) + { + xa_sub[i][0] = xa[x_s + i][0]; + xa_sub[i][1] = xa[x_s + i][1]; + xa_sub[i][2] = xa[x_s + i][2]; + seqx_sub[i] = seqx[x_s + i]; + secx_sub[i] = secx[x_s + i]; } - seqx_sub[L1_sub] = '\0'; secx_sub[L1_sub] = '\0'; + seqx_sub[L1_sub] = '\0'; + secx_sub[L1_sub] = '\0'; - for (int i = 0; i < L2_sub; i++) { - ya_sub[i][0] = ya[y_s + i][0]; ya_sub[i][1] = ya[y_s + i][1]; ya_sub[i][2] = ya[y_s + i][2]; - seqy_sub[i] = seqy[y_s + i]; secy_sub[i] = secy[y_s + i]; + // Copy data for structure 2 + for (int i = 0; i < L2_sub; i++) + { + ya_sub[i][0] = ya[y_s + i][0]; + ya_sub[i][1] = ya[y_s + i][1]; + ya_sub[i][2] = ya[y_s + i][2]; + seqy_sub[i] = seqy[y_s + i]; + secy_sub[i] = secy[y_s + i]; } - seqy_sub[L2_sub] = '\0'; secy_sub[L2_sub] = '\0'; + seqy_sub[L2_sub] = '\0'; + secy_sub[L2_sub] = '\0'; + // Variables to store the best results for this sub-block double t0_best[3], u0_best[3][3]; double TM_best_max = -1.0; - string seqM_best, seqxA_best, seqyA_best; - vector> tu_vec_best; + std::string seqM_best, seqxA_best, seqyA_best; + std::vector> tu_vec_best; - bool force_fast_opt = (getmin(L1_sub, L2_sub) > 1500) ? true : fast_opt; - for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) { + bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; + + // Test different secondary structure options (flexalign_best behavior) + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + { double t0_s[3], u0_s[3][3]; - vector> tu_vec_s; - double TM1_s=0, TM2_s=0, TM3_s=0, TM4_s=0, TM5_s=0; - double d0_0_s=0, TM_0_s=0, d0A_s=0, d0B_s=0, d0u_s=0, d0a_s=0, d0_out_s=5.0; - string seqM_s, seqxA_s, seqyA_s; - vector do_vec_s; - double rmsd0_s=0; int L_ali_s=0; double Liden_s=0; - double TM_ali_s=0, rmsd_ali_s=0; int n_ali_s=0, n_ali8_s=0; + std::vector> tu_vec_s; + double TM1_s = 0, TM2_s = 0, TM3_s = 0, TM4_s = 0, TM5_s = 0; + double d0_0_s = 0, TM_0_s = 0, d0A_s = 0, d0B_s = 0, d0u_s = 0, d0a_s = 0, d0_out_s = 5.0; + std::string seqM_s, seqxA_s, seqyA_s; + std::vector do_vec_s; + double rmsd0_s = 0; + int L_ali_s = 0; + double Liden_s = 0; + double TM_ali_s = 0, rmsd_ali_s = 0; + int n_ali_s = 0, n_ali8_s = 0; flexalign_main( xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, @@ -3380,11 +3553,14 @@ int flexalign_fatcat_main(double **xa, double **ya, mol_type, hinge_opt, cur_ss_opt); double cur_max_TM = (TM1_s > TM2_s) ? TM1_s : TM2_s; - if (cur_max_TM > TM_best_max) { + if (cur_max_TM > TM_best_max) + { TM_best_max = cur_max_TM; - for(int a=0; a<3; a++) { + for (int a = 0; a < 3; a++) + { t0_best[a] = t0_s[a]; - for(int b=0; b<3; b++) u0_best[a][b] = u0_s[a][b]; + for (int b = 0; b < 3; b++) + u0_best[a][b] = u0_s[a][b]; } seqM_best = seqM_s; seqxA_best = seqxA_s; @@ -3393,169 +3569,240 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - if (TM_best_max < 0) { - for (int i = 0; i < L1_sub; i++) { - global_seqxA += seqx_sub[i]; global_seqyA += '-'; global_seqM += ' '; + // If alignment completely failed + if (TM_best_max < 0) + { + for (int i = 0; i < L1_sub; i++) + { + global_seqxA += seqx_sub[i]; + global_seqyA += '-'; + global_seqM += ' '; } - for (int i = 0; i < L2_sub; i++) { - global_seqxA += '-'; global_seqyA += seqy_sub[i]; global_seqM += ' '; + for (int i = 0; i < L2_sub; i++) + { + global_seqxA += '-'; + global_seqyA += seqy_sub[i]; + global_seqM += ' '; } - DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; - continue; + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; + continue; } - if (tu_vec_best.empty()) { - vector tu_tmp(12); + // Ensure we have at least one valid transform matrix + if (tu_vec_best.empty()) + { + std::vector tu_tmp(12); t_u2tu(t0_best, u0_best, tu_tmp); tu_vec_best.push_back(tu_tmp); } + // Incorporate local transformation matrices into the global list int base_tu_idx = tu_vec.size(); - for (size_t m = 0; m < tu_vec_best.size(); m++) { + for (size_t m = 0; m < tu_vec_best.size(); m++) + { tu_vec.push_back(tu_vec_best[m]); } - // ========================================== - // NEW FIX: Global numbering logic for 0-9, a-z, A-Z - // ========================================== int rx = x_s; - for (size_t i = 0; i < seqxA_best.length(); i++) { - int current_global_idx = base_tu_idx; - - // Extract the true internal matrix map - if (seqxA_best[i] != '-') { - char c = seqM_best[i]; - if (c != ' ') { - int local_hinge_idx = -1; - if (c >= '1' && c <= '9') local_hinge_idx = c - '1'; // 1-based flexalign -> 0-based offset - else if (c >= 'a' && c <= 'z') local_hinge_idx = c - 'a' + 9; - - if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) { - current_global_idx = base_tu_idx + local_hinge_idx; - } + + // FIX: current_global_idx must stay outside the character loop + // to maintain the last known state across gaps or unaligned regions + int current_global_idx = base_tu_idx; + + for (size_t i = 0; i < seqxA_best.length(); i++) + { + char c = seqM_best[i]; + + // Parse US-align standard hinge characters to update current global matrix index + // Valid hinge chars are '0'-'9', 'a'-'z', 'A'-'Z'. Exclude spaces and weak alignment chars. + if (c != ' ' && c != '.' && c != ':') + { + int local_hinge_idx = -1; + if (c >= '0' && c <= '9') + local_hinge_idx = c - '0'; + else if (c >= 'a' && c <= 'z') + local_hinge_idx = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + local_hinge_idx = c - 'A' + 36; + + // Safely update the global index tracking + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) + { + current_global_idx = base_tu_idx + local_hinge_idx; } + } + + // Assign the corresponding rotation matrix index to the original residue + if (seqxA_best[i] != '-') + { global_res_tu[rx] = current_global_idx; rx++; - } else { - // Determine ID strictly for the visual formatting of Y-insertions - char c = seqM_best[i]; - if (c != ' ') { - int local_hinge_idx = -1; - if (c >= '1' && c <= '9') local_hinge_idx = c - '1'; - else if (c >= 'a' && c <= 'z') local_hinge_idx = c - 'a' + 9; - - if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) { - current_global_idx = base_tu_idx + local_hinge_idx; - } + } + + // Construct the final stitched string format correctly + if (seqxA_best[i] != '-' && seqyA_best[i] != '-') + { + // Only assign a global hinge character if the local alignment considered it a true match + if (c != ' ' && c != '.' && c != ':') + { + char global_c; + if (current_global_idx < 10) + global_c = '0' + current_global_idx; + else if (current_global_idx < 36) + global_c = 'a' + (current_global_idx - 10); + else if (current_global_idx < 62) + global_c = 'A' + (current_global_idx - 36); + else + global_c = '*'; // Fallback if hinges exceed 62 + + seqM_best[i] = global_c; + } + else + { + // Preserve weak matches ('.' or ':') or spaces + seqM_best[i] = c; } } - - // Re-label matched areas starting sequentially from '0' - if (seqxA_best[i] != '-' && seqyA_best[i] != '-') { - char global_c; - if (current_global_idx < 10) global_c = '0' + current_global_idx; - else if (current_global_idx < 36) global_c = 'a' + (current_global_idx - 10); - else if (current_global_idx < 62) global_c = 'A' + (current_global_idx - 36); - else global_c = '*'; - - seqM_best[i] = global_c; - } else { - seqM_best[i] = ' '; // Standard gap alignment remains blank + else + { + seqM_best[i] = ' '; // Ensure gap positions correctly get space } } + // Append to the global alignment strings global_seqM += seqM_best; global_seqxA += seqxA_best; global_seqyA += seqyA_best; + // Clean up sub-block memory DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; } // ========================================== - // Step 6: Recalculate global metrics correctly + // Step 6: Recalculate global metrics correctly // ========================================== seqM = global_seqM; seqxA = global_seqxA; seqyA = global_seqyA; - d0A = 1.24 * pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (d0A < 0.5) d0A = 0.5; - d0B = 1.24 * pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (d0B < 0.5) d0B = 0.5; - d0a = 1.24 * pow((xlen + ylen) * 0.5 - 15.0, 1.0 / 3.0) - 1.8; - if (d0a < 0.5) d0a = 0.5; - if (u_opt) { - d0u = 1.24 * pow(Lnorm_ass - 15.0, 1.0 / 3.0) - 1.8; - if (d0u < 0.5) d0u = 0.5; + d0A = 1.24 * std::pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; + if (d0A < 0.5) + d0A = 0.5; + d0B = 1.24 * std::pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; + if (d0B < 0.5) + d0B = 0.5; + d0a = 1.24 * std::pow((xlen + ylen) * 0.5 - 15.0, 1.0 / 3.0) - 1.8; + if (d0a < 0.5) + d0a = 0.5; + if (u_opt) + { + d0u = 1.24 * std::pow(Lnorm_ass - 15.0, 1.0 / 3.0) - 1.8; + if (d0u < 0.5) + d0u = 0.5; } TM1 = TM2 = TM3 = TM4 = TM5 = rmsd0 = 0.0; - Liden = 0; n_ali8 = 0; n_ali = 0; + Liden = 0.0; // FIX: Reset to absolute 0 + n_ali8 = 0; + n_ali = 0; do_vec.clear(); int i_res = 0, j_res = 0; - for (size_t r = 0; r < seqxA.length(); r++) { + for (size_t r = 0; r < seqxA.length(); r++) + { bool x_valid = (seqxA[r] != '-'); bool y_valid = (seqyA[r] != '-'); - if (x_valid && y_valid) { - if (seqxA[r] == seqyA[r]) Liden++; - + if (x_valid && y_valid) + { int matrix_idx = global_res_tu[i_res]; - - if (matrix_idx >= 0 && matrix_idx < tu_vec.size()) { + + if (matrix_idx >= 0 && matrix_idx < tu_vec.size()) + { double t_k[3], u_k[3][3]; - tu2t_u(tu_vec[matrix_idx], t_k, u_k); - + tu2t_u(tu_vec[matrix_idx], t_k, u_k); + double x_rot[3]; - x_rot[0] = t_k[0] + u_k[0][0]*xa[i_res][0] + u_k[0][1]*xa[i_res][1] + u_k[0][2]*xa[i_res][2]; - x_rot[1] = t_k[1] + u_k[1][0]*xa[i_res][0] + u_k[1][1]*xa[i_res][1] + u_k[1][2]*xa[i_res][2]; - x_rot[2] = t_k[2] + u_k[2][0]*xa[i_res][0] + u_k[2][1]*xa[i_res][1] + u_k[2][2]*xa[i_res][2]; + x_rot[0] = t_k[0] + u_k[0][0] * xa[i_res][0] + u_k[0][1] * xa[i_res][1] + u_k[0][2] * xa[i_res][2]; + x_rot[1] = t_k[1] + u_k[1][0] * xa[i_res][0] + u_k[1][1] * xa[i_res][1] + u_k[1][2] * xa[i_res][2]; + x_rot[2] = t_k[2] + u_k[2][0] * xa[i_res][0] + u_k[2][1] * xa[i_res][1] + u_k[2][2] * xa[i_res][2]; double dist2 = dist(x_rot, ya[j_res]); - double d = sqrt(dist2); - + double d = std::sqrt(dist2); + TM2 += 1.0 / (1.0 + dist2 / (d0B * d0B)); TM1 += 1.0 / (1.0 + dist2 / (d0A * d0A)); - if (a_opt) TM3 += 1.0 / (1.0 + dist2 / (d0a * d0a)); - if (u_opt) TM4 += 1.0 / (1.0 + dist2 / (d0u * d0u)); - if (d_opt) TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); + if (a_opt) + TM3 += 1.0 / (1.0 + dist2 / (d0a * d0a)); + if (u_opt) + TM4 += 1.0 / (1.0 + dist2 / (d0u * d0u)); + if (d_opt) + TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); n_ali++; do_vec.push_back(d); - - if (d <= d0_out) { + + if (d <= d0_out) + { rmsd0 += dist2; n_ali8++; + + // FIX: ONLY increment Liden if the pair is structurally aligned (d <= d0_out) + // This matches the denominator (n_ali8) used in output_flexalign_results. + if (seqx[i_res] == seqy[j_res]) + { + Liden += 1.0; + } } - } else { + } + else + { do_vec.push_back(-1); } - } else { + } + else + { do_vec.push_back(-1); } - if (x_valid) i_res++; - if (y_valid) j_res++; + if (x_valid) + i_res++; + if (y_valid) + j_res++; } TM2 /= xlen; TM1 /= ylen; - if (a_opt) TM3 /= (xlen + ylen) * 0.5; - if (u_opt) TM4 /= Lnorm_ass; - if (d_opt) TM5 /= ylen; - - if (n_ali8 > 0) rmsd0 = sqrt(rmsd0 / n_ali8); - else rmsd0 = 0.0; - + if (a_opt) + TM3 /= (xlen + ylen) * 0.5; + if (u_opt) + TM4 /= Lnorm_ass; + if (d_opt) + TM5 /= ylen; + + if (n_ali8 > 0) + rmsd0 = std::sqrt(rmsd0 / n_ali8); + else + rmsd0 = 0.0; + L_ali = n_ali; TM_ali = TM1; rmsd_ali = rmsd0; - if (!tu_vec.empty()) tu2t_u(tu_vec[0], t0, u0); + // Notice: NO Division by L_ali here. We leave it as an absolute count! + + if (!tu_vec.empty()) + tu2t_u(tu_vec[0], t0, u0); return tu_vec.size(); } From 5abc15dcfff003388c09ecdd171398bb58de2214 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Wed, 13 May 2026 10:26:18 +0800 Subject: [PATCH 11/23] -mm 10 bugfix --- USalign.cpp | 61 ++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 8a63739..1431c9a 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2978,6 +2978,7 @@ int flexalign_fatcat_main(double **xa, double **ya, int misCut = 2 * fragLen; int maxGapFrag = fragLen + max_gap; double afp_dis_cut = fragLen * fragLen * (disCut * disCut); + int min_block_len = fragLen; // ========================================== // OPTIMIZATION 1: Precompute local intra-protein distance matrices @@ -3412,8 +3413,13 @@ int flexalign_fatcat_main(double **xa, double **ya, } if (b_s1 != -1) { - Region r = {b_s1, b_e1, b_s2, b_e2}; - real_blocks.push_back(r); + int block_len_1 = b_e1 - b_s1; + int block_len_2 = b_e2 - b_s2; + if (block_len_1 >= min_block_len && block_len_2 >= min_block_len) + { + Region r = {b_s1, b_e1, b_s2, b_e2}; + real_blocks.push_back(r); + } } } if (real_blocks.empty()) @@ -3526,49 +3532,36 @@ int flexalign_fatcat_main(double **xa, double **ya, std::vector> tu_vec_best; bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; - + std::vector local_sequence = sequence; + // Test different secondary structure options (flexalign_best behavior) for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) { - double t0_s[3], u0_s[3][3]; - std::vector> tu_vec_s; - double TM1_s = 0, TM2_s = 0, TM3_s = 0, TM4_s = 0, TM5_s = 0; - double d0_0_s = 0, TM_0_s = 0, d0A_s = 0, d0B_s = 0, d0u_s = 0, d0a_s = 0, d0_out_s = 5.0; - std::string seqM_s, seqxA_s, seqyA_s; - std::vector do_vec_s; - double rmsd0_s = 0; - int L_ali_s = 0; - double Liden_s = 0; - double TM_ali_s = 0, rmsd_ali_s = 0; - int n_ali_s = 0, n_ali8_s = 0; - - flexalign_main( + FlexAlignResult cur_res; + + // This ensures that the fallback compensation runs if too few hinges are found. + execute_flexalign_with_fallback( xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, - t0_s, u0_s, tu_vec_s, TM1_s, TM2_s, TM3_s, TM4_s, TM5_s, - d0_0_s, TM_0_s, d0A_s, d0B_s, d0u_s, d0a_s, d0_out_s, - seqM_s, seqxA_s, seqyA_s, do_vec_s, - rmsd0_s, L_ali_s, Liden_s, TM_ali_s, rmsd_ali_s, n_ali_s, n_ali8_s, - L1_sub, L2_sub, sequence, Lnorm_ass, d0_scale, + L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_type, hinge_opt, cur_ss_opt); + mol_type, hinge_opt, cur_ss_opt, cur_res); - double cur_max_TM = (TM1_s > TM2_s) ? TM1_s : TM2_s; + double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; if (cur_max_TM > TM_best_max) { TM_best_max = cur_max_TM; for (int a = 0; a < 3; a++) { - t0_best[a] = t0_s[a]; + t0_best[a] = cur_res.t0[a]; for (int b = 0; b < 3; b++) - u0_best[a][b] = u0_s[a][b]; + u0_best[a][b] = cur_res.u0[a][b]; } - seqM_best = seqM_s; - seqxA_best = seqxA_s; - seqyA_best = seqyA_s; - tu_vec_best = tu_vec_s; + seqM_best = cur_res.seqM; + seqxA_best = cur_res.seqxA; + seqyA_best = cur_res.seqyA; + tu_vec_best = cur_res.tu_vec; } } - // If alignment completely failed if (TM_best_max < 0) { @@ -3609,8 +3602,8 @@ int flexalign_fatcat_main(double **xa, double **ya, } int rx = x_s; - - // FIX: current_global_idx must stay outside the character loop + + // FIX: current_global_idx must stay outside the character loop // to maintain the last known state across gaps or unaligned regions int current_global_idx = base_tu_idx; @@ -3712,7 +3705,7 @@ int flexalign_fatcat_main(double **xa, double **ya, } TM1 = TM2 = TM3 = TM4 = TM5 = rmsd0 = 0.0; - Liden = 0.0; // FIX: Reset to absolute 0 + Liden = 0.0; // FIX: Reset to absolute 0 n_ali8 = 0; n_ali = 0; do_vec.clear(); @@ -3756,7 +3749,7 @@ int flexalign_fatcat_main(double **xa, double **ya, { rmsd0 += dist2; n_ali8++; - + // FIX: ONLY increment Liden if the pair is structurally aligned (d <= d0_out) // This matches the denominator (n_ali8) used in output_flexalign_results. if (seqx[i_res] == seqy[j_res]) From 94d07dab23362b9fef5e61e286df1c79e1fb2146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Wed, 13 May 2026 20:07:46 +0800 Subject: [PATCH 12/23] -mm 10 bugfix --- USalign.cpp | 253 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 183 insertions(+), 70 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 1431c9a..f621260 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -3338,123 +3338,236 @@ int flexalign_fatcat_main(double **xa, double **ya, std::reverse(path.begin(), path.end()); // ========================================== - // Step 4: Split structure based on twists + // Step 4: Split structure based on FATCAT exact heuristics // ========================================== - std::vector> blocks; - std::vector curr_block; - curr_block.push_back(merged_afps[path[0]]); - for (size_t k = 1; k < path.size(); k++) - { - FATCAT_AFP curr = merged_afps[path[k]]; - FATCAT_AFP prv = merged_afps[path[k - 1]]; - // USE PRECOMPUTED DISTANCES - O(1) inside loop + // Lambda 1: Calculate Distance Variation (dvar) exactly as FATCAT's CalAfpDis + auto get_dvar = [&](const FATCAT_AFP& prv, const FATCAT_AFP& curr) -> double { double rms_sq = 0; - for (int i_idx = 0; i_idx < fragLen; i_idx++) - { - for (int j_idx = 0; j_idx < fragLen; j_idx++) - { + for (int i_idx = 0; i_idx < fragLen; i_idx++) { + for (int j_idx = 0; j_idx < fragLen; j_idx++) { double dist1, dist2; int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; - if (idx1_a >= idx1_b) - dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else - dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + if (idx1_a >= idx1_b) dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else dist1 = disTable1[idx1_a][idx1_b - idx1_a]; int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; - if (idx2_a >= idx2_b) - dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else - dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + if (idx2_a >= idx2_b) dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } } + if (rms_sq > afp_dis_cut) return 1e9; // Trigger twist + return std::sqrt(rms_sq / (fragLen * fragLen)); + }; - double dvar = (rms_sq > afp_dis_cut) ? 1e9 : std::sqrt(rms_sq / (fragLen * fragLen)); - if (dvar >= disCut) - { + // Lambda 2: Calculate fast rigid-body Kabsch RMSD for a block + auto calc_block_rmsd = [&](const std::vector& afp_list) -> double { + std::vector r1, r2; + for (size_t a = 0; a < afp_list.size(); a++) { + for (int l = 0; l < afp_list[a].len; l++) { + r1.push_back(afp_list[a].i + l); + r2.push_back(afp_list[a].j + l); + } + } + int n = r1.size(); + if (n < 3) return 0.0; + double** p1; NewArray(&p1, n, 3); + double** p2; NewArray(&p2, n, 3); + for(int i = 0; i < n; i++) { + p1[i][0] = xa[r1[i]][0]; p1[i][1] = xa[r1[i]][1]; p1[i][2] = xa[r1[i]][2]; + p2[i][0] = ya[r2[i]][0]; p2[i][1] = ya[r2[i]][1]; p2[i][2] = ya[r2[i]][2]; + } + double rms_sq_sum, t_tmp[3], u_tmp[3][3]; + Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); + DeleteArray(&p1, n); + DeleteArray(&p2, n); + return std::sqrt(rms_sq_sum / n); + }; + + // --- Phase 1: Initial AFP Chaining (Simulating FATCAT TraceBack) --- + struct Block { + std::vector afps; + std::vector dvars; // records dvar between afps[i-1] and afps[i] + }; + std::vector blocks; + Block curr_block; + curr_block.afps.push_back(merged_afps[path[0]]); + curr_block.dvars.push_back(0.0); // First AFP has no previous connection + + for (size_t k = 1; k < path.size(); k++) { + FATCAT_AFP curr = merged_afps[path[k]]; + FATCAT_AFP prv = merged_afps[path[k - 1]]; + double dvar = get_dvar(prv, curr); + + if (dvar >= disCut) { // Twist detected, start new block blocks.push_back(curr_block); - curr_block.clear(); + curr_block.afps.clear(); + curr_block.dvars.clear(); + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(0.0); + } else { + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(dvar); // Record dvar for SplitBlock later + } + } + if (!curr_block.afps.empty()) blocks.push_back(curr_block); + + double local_badRmsd = 4.0; // FATCAT's defined threshold + + // --- Phase 2: SplitBlock (Exact FATCAT Logic) --- + // Finds the block with highest RMSD > 4.0 and cuts it at the connection with max dvar + bool splitted = true; + while (splitted && blocks.size() < (size_t)(max_twists + 1)) { + splitted = false; + double max_rmsd = 0.0; + int target_b = -1; + + for (size_t b = 0; b < blocks.size(); b++) { + if (blocks[b].afps.size() > 2) { + double cur_rmsd = calc_block_rmsd(blocks[b].afps); + if (cur_rmsd > max_rmsd) { + max_rmsd = cur_rmsd; + target_b = b; + } + } + } + + if (max_rmsd >= local_badRmsd && target_b != -1) { + double max_t = 0; + int cut_idx = 0; + for (size_t i = 1; i < blocks[target_b].afps.size(); i++) { + if (blocks[target_b].dvars[i] > max_t) { + max_t = blocks[target_b].dvars[i]; + cut_idx = i; + } + } + + if (cut_idx > 0) { + // Execute split at cut_idx + Block right_blk; + right_blk.afps.assign(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); + right_blk.dvars.assign(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); + right_blk.dvars[0] = 0.0; // Clean the break point + + blocks[target_b].afps.erase(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); + blocks[target_b].dvars.erase(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); + + blocks.insert(blocks.begin() + target_b + 1, right_blk); + splitted = true; + } } - curr_block.push_back(curr); } - blocks.push_back(curr_block); - struct Region - { - int s1, e1, s2, e2; - }; + // --- Phase 3: DeleteBlock (Exact FATCAT Logic) --- + // Remove isolated single-AFP blocks that don't span enough length + for (int b = 0; b < (int)blocks.size(); b++) { + if (blocks[b].afps.size() <= 1) { + int e1 = (b < (int)blocks.size() - 1) ? blocks[b+1].afps.front().i : xlen; + int e2 = (b < (int)blocks.size() - 1) ? blocks[b+1].afps.front().j : ylen; + int b1 = (b > 0) ? blocks[b-1].afps.back().i + blocks[b-1].afps.back().len : 0; + int b2 = (b > 0) ? blocks[b-1].afps.back().j + blocks[b-1].afps.back().len : 0; + int span = std::min(e1 - b1, e2 - b2); + if (span < 2 * fragLen) { + blocks.erase(blocks.begin() + b); + b--; + } + } + } + + // --- Phase 4: MergeBlock (Exact FATCAT Logic) --- + // Re-merge adjacent blocks if their combined rigid body RMSD is < 4.0 + bool merged = true; + while (merged && blocks.size() > 1) { + merged = false; + double min_rmsd = 1e9; + int min_b = -1; + for (size_t b = 0; b < blocks.size() - 1; b++) { + std::vector temp_merged = blocks[b].afps; + temp_merged.insert(temp_merged.end(), blocks[b+1].afps.begin(), blocks[b+1].afps.end()); + double cur_rmsd = calc_block_rmsd(temp_merged); + if (cur_rmsd < min_rmsd) { + min_rmsd = cur_rmsd; + min_b = b; + } + } + + if (min_rmsd < local_badRmsd && min_b != -1) { + blocks[min_b].afps.insert(blocks[min_b].afps.end(), blocks[min_b+1].afps.begin(), blocks[min_b+1].afps.end()); + blocks.erase(blocks.begin() + min_b + 1); + merged = true; + } + } + + // --- Phase 5: Build strictly contiguous boundaries for Step 5 TM-align --- + // Applies US-align's overlap removal (skip) and midpoint boundary assignment + struct Region { int s1, e1, s2, e2; }; std::vector real_blocks; int last_i = 0, last_j = 0; - for (size_t b = 0; b < blocks.size(); b++) - { + for (size_t b = 0; b < blocks.size(); b++) { int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; - for (size_t a = 0; a < blocks[b].size(); a++) - { - FATCAT_AFP afp = blocks[b][a]; + for (size_t a = 0; a < blocks[b].afps.size(); a++) { + FATCAT_AFP afp = blocks[b].afps[a]; + + // Core safety: strip overlaps to ensure monotonicity int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); - if (skip >= afp.len) - continue; + if (skip >= afp.len) continue; int eff_i = afp.i + skip; int eff_j = afp.j + skip; int eff_L = afp.len - skip; - if (b_s1 == -1) - { - b_s1 = eff_i; - b_s2 = eff_j; - } + + if (b_s1 == -1) { b_s1 = eff_i; b_s2 = eff_j; } b_e1 = eff_i + eff_L; b_e2 = eff_j + eff_L; last_i = b_e1; last_j = b_e2; } - if (b_s1 != -1) - { - int block_len_1 = b_e1 - b_s1; - int block_len_2 = b_e2 - b_s2; - if (block_len_1 >= min_block_len && block_len_2 >= min_block_len) - { + if (b_s1 != -1) { + // Keep block only if its "non-overlapping" core is large enough + if (b_e1 - b_s1 >= min_block_len && b_e2 - b_s2 >= min_block_len) { Region r = {b_s1, b_e1, b_s2, b_e2}; real_blocks.push_back(r); } } } - if (real_blocks.empty()) - return 0; + + if (real_blocks.empty()) return 0; std::vector bounds1, bounds2; bounds1.push_back(0); bounds2.push_back(0); - for (size_t k = 0; k < real_blocks.size() - 1; k++) - { + for (size_t k = 0; k < real_blocks.size() - 1; k++) { + // Find exact midpoints between valid, non-overlapping blocks bounds1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); bounds2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); } bounds1.push_back(xlen); bounds2.push_back(ylen); + // ========================================== // [DEBUG] TEMPORARY DEBUG OUTPUT // ========================================== - // cout << "\n========================================" << endl; - // cout << "PDB1 Interval: "; - // for (size_t k = 0; k < bounds1.size() - 1; k++) - // { - // cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; - // if (k < bounds1.size() - 2) - // cout << ","; - // } - // cout << "\nPDB2 Interval: "; - // for (size_t k = 0; k < bounds2.size() - 1; k++) - // { - // cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; - // if (k < bounds2.size() - 2) - // cout << ","; - // } - // cout << "\n========================================\n" - // << endl; + cout << "\n========================================" << endl; + cout << "PDB1 Interval: "; + for (size_t k = 0; k < bounds1.size() - 1; k++) + { + cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; + if (k < bounds1.size() - 2) + cout << ","; + } + cout << "\nPDB2 Interval: "; + for (size_t k = 0; k < bounds2.size() - 1; k++) + { + cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; + if (k < bounds2.size() - 2) + cout << ","; + } + cout << "\n========================================\n" + << endl; + // ========================================== // Step 5: Iteratively align each block using TRUE flexalign_best logic // ========================================== From 40fece5c2e6eab2fa5e70d764b35bab082610516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Thu, 14 May 2026 14:05:20 +0800 Subject: [PATCH 13/23] -mm 10 penalty update --- USalign.cpp | 715 +++++++++++++++++++++++++++++----------------------- 1 file changed, 403 insertions(+), 312 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index f621260..eb62c4b 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2978,7 +2978,6 @@ int flexalign_fatcat_main(double **xa, double **ya, int misCut = 2 * fragLen; int maxGapFrag = fragLen + max_gap; double afp_dis_cut = fragLen * fragLen * (disCut * disCut); - int min_block_len = fragLen; // ========================================== // OPTIMIZATION 1: Precompute local intra-protein distance matrices @@ -3027,7 +3026,7 @@ int flexalign_fatcat_main(double **xa, double **ya, { for (int j = 0; j <= ylen - fragLen; j += step) { - int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen), ylen - (j + fragLen)) + fragLen; + int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen - 1), ylen - (j + fragLen)) + fragLen; if (d3_term < 0.3 * std::min(xlen, ylen)) continue; @@ -3161,9 +3160,14 @@ int flexalign_fatcat_main(double **xa, double **ya, return 0; // ========================================== - // Step 3: Global Dynamic Programming (DP) + // Step 3 & 4: Dual Dynamic Programming and Domain Splitting + // We run two competing gap penalty logics: + // 1. Original FATCAT (allows overlaps/rewards) + // 2. Strict Penalty (penalizes overlaps) + // The winner is the one that identifies more rigid domains (blocks). // ========================================== - // OPTIMIZATION 5: Flat 1D vectors for 2D DP cache + + // OPTIMIZATION 5: Flat 1D vectors for 2D DP cache (Pre-computation) std::vector afp_aft_index(xlen * ylen, -1); std::vector afp_bef_index(xlen * ylen, -1); @@ -3202,182 +3206,62 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - std::vector sco(n_afps); - std::vector twi(n_afps, 0); - std::vector pre(n_afps, -1); - for (int m = 0; m < n_afps; m++) - sco[m] = merged_afps[m].score; - - for (int m = 0; m < n_afps; m++) + // Lambda 1: Calculate Distance Variation (dvar) exactly as FATCAT's CalAfpDis + auto get_dvar = [&](const FATCAT_AFP &prv, const FATCAT_AFP &curr) -> double { - int curr_i = merged_afps[m].i; - int curr_j = merged_afps[m].j; - int a3 = curr_i - fragLen; - int a2 = std::max(0, a3 - misCut); - int a1 = std::max(0, curr_i - maxGapFrag); - int b3 = curr_j - fragLen; - int b2 = std::max(0, b3 - misCut); - int b1 = std::max(0, curr_j - maxGapFrag); - - std::vector valid_prevs; - for (int step = 0; step < 2; step++) - { - int a_s, a_e, b_s, b_e; - if (step == 0) - { - a_s = std::max(a1, 0); - a_e = std::min(a3, xlen - 1); - b_s = std::max(b2, 0); - b_e = std::min(b3, ylen - 1); - } - else - { - a_s = std::max(a2, 0); - a_e = std::min(a3, xlen - 1); - b_s = std::max(b1, 0); - b_e = std::min(b2 - 1, ylen - 1); - } - - if (b_s >= ylen || b_e < 0) - continue; - for (int prev_i = a_s; prev_i <= a_e; prev_i++) - { - int s1 = afp_aft_index[prev_i * ylen + b_s]; - int s2 = afp_bef_index[prev_i * ylen + b_e]; - if (s1 != -1 && s2 != -1 && s1 <= s2) - { - for (int s = s1; s <= s2; s++) - valid_prevs.push_back(s); - } - } - } - - double curr_sco = merged_afps[m].score; - for (size_t v = 0; v < valid_prevs.size(); v++) + double rms_sq = 0; + for (int i_idx = 0; i_idx < fragLen; i_idx++) { - int prev = valid_prevs[v]; - int prev_twi = twi[prev]; - if (prev_twi > max_twists) - continue; - - int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); - int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); - int m_gap = std::max(gap_i, gap_j); - int m_mis = 0; - if (gap_i < 0 || gap_j < 0) - m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; - - double gp = gap_ext * m_mis; - if (m_gap > 0) - gp += gap_ext * m_gap; - if (gp < max_penalty) - gp = max_penalty; - - // USE PRECOMPUTED DISTANCES - O(1) inside loop - double rms_sq = 0; - for (int k = 0; k < fragLen; k++) + for (int j_idx = 0; j_idx < fragLen; j_idx++) { - for (int l = 0; l < fragLen; l++) - { - double dist1, dist2; - int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; - if (idx1_a >= idx1_b) - dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else - dist1 = disTable1[idx1_a][idx1_b - idx1_a]; - - int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; - if (idx2_a >= idx2_b) - dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else - dist2 = disTable2[idx2_a][idx2_b - idx2_a]; - - rms_sq += (dist1 - dist2) * (dist1 - dist2); - } - } - - double tp = 0.0; - int is_twist = 0; - if (rms_sq >= afp_dis_cut) - { - tp = twist_pen; - is_twist = 1; - } - else - { - double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); - if (dvar > disCut - disSmooth) - tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); - } - - if (prev_twi + is_twist > max_twists) - continue; - - double stmp = sco[prev] + curr_sco + tp + gp; - if (stmp > sco[m]) - { - sco[m] = stmp; - pre[m] = prev; - twi[m] = prev_twi + is_twist; - } - } - } - - int best_m = 0; - for (int m = 1; m < n_afps; m++) - if (sco[m] > sco[best_m]) - best_m = m; - - std::vector path; - int curr_m = best_m; - while (curr_m != -1) - { - path.push_back(curr_m); - curr_m = pre[curr_m]; - } - std::reverse(path.begin(), path.end()); - - // ========================================== - // Step 4: Split structure based on FATCAT exact heuristics - // ========================================== - - // Lambda 1: Calculate Distance Variation (dvar) exactly as FATCAT's CalAfpDis - auto get_dvar = [&](const FATCAT_AFP& prv, const FATCAT_AFP& curr) -> double { - double rms_sq = 0; - for (int i_idx = 0; i_idx < fragLen; i_idx++) { - for (int j_idx = 0; j_idx < fragLen; j_idx++) { double dist1, dist2; int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; - if (idx1_a >= idx1_b) dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; - if (idx2_a >= idx2_b) dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } } - if (rms_sq > afp_dis_cut) return 1e9; // Trigger twist + if (rms_sq > afp_dis_cut) + return 1e9; // Trigger twist return std::sqrt(rms_sq / (fragLen * fragLen)); }; // Lambda 2: Calculate fast rigid-body Kabsch RMSD for a block - auto calc_block_rmsd = [&](const std::vector& afp_list) -> double { + auto calc_block_rmsd = [&](const std::vector &afp_list) -> double + { std::vector r1, r2; - for (size_t a = 0; a < afp_list.size(); a++) { - for (int l = 0; l < afp_list[a].len; l++) { + for (size_t a = 0; a < afp_list.size(); a++) + { + for (int l = 0; l < afp_list[a].len; l++) + { r1.push_back(afp_list[a].i + l); r2.push_back(afp_list[a].j + l); } } int n = r1.size(); - if (n < 3) return 0.0; - double** p1; NewArray(&p1, n, 3); - double** p2; NewArray(&p2, n, 3); - for(int i = 0; i < n; i++) { - p1[i][0] = xa[r1[i]][0]; p1[i][1] = xa[r1[i]][1]; p1[i][2] = xa[r1[i]][2]; - p2[i][0] = ya[r2[i]][0]; p2[i][1] = ya[r2[i]][1]; p2[i][2] = ya[r2[i]][2]; + if (n < 3) + return 0.0; + double **p1; + NewArray(&p1, n, 3); + double **p2; + NewArray(&p2, n, 3); + for (int i = 0; i < n; i++) + { + p1[i][0] = xa[r1[i]][0]; + p1[i][1] = xa[r1[i]][1]; + p1[i][2] = xa[r1[i]][2]; + p2[i][0] = ya[r2[i]][0]; + p2[i][1] = ya[r2[i]][1]; + p2[i][2] = ya[r2[i]][2]; } double rms_sq_sum, t_tmp[3], u_tmp[3][3]; Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); @@ -3386,187 +3270,394 @@ int flexalign_fatcat_main(double **xa, double **ya, return std::sqrt(rms_sq_sum / n); }; - // --- Phase 1: Initial AFP Chaining (Simulating FATCAT TraceBack) --- - struct Block { - std::vector afps; - std::vector dvars; // records dvar between afps[i-1] and afps[i] + struct Region + { + int s1, e1, s2, e2; }; - std::vector blocks; - Block curr_block; - curr_block.afps.push_back(merged_afps[path[0]]); - curr_block.dvars.push_back(0.0); // First AFP has no previous connection - for (size_t k = 1; k < path.size(); k++) { - FATCAT_AFP curr = merged_afps[path[k]]; - FATCAT_AFP prv = merged_afps[path[k - 1]]; - double dvar = get_dvar(prv, curr); + // Lambda 3: The core DP and Splitting logic, taking logic_type as parameter + // logic_type 0 = FATCAT original (allows reward), logic_type 1 = Strict Penalty + auto run_dp_and_split = [&](int logic_type) -> std::pair, std::vector> + { + std::vector sco(n_afps); + std::vector twi(n_afps, 0); + std::vector pre(n_afps, -1); + for (int m = 0; m < n_afps; m++) + sco[m] = merged_afps[m].score; + + // --- Step 3 Execution --- + for (int m = 0; m < n_afps; m++) + { + int curr_i = merged_afps[m].i; + int curr_j = merged_afps[m].j; + int a3 = curr_i - fragLen; + int a2 = std::max(0, a3 - misCut); + int a1 = std::max(0, curr_i - maxGapFrag); + int b3 = curr_j - fragLen; + int b2 = std::max(0, b3 - misCut); + int b1 = std::max(0, curr_j - maxGapFrag); + + std::vector valid_prevs; + for (int step = 0; step < 2; step++) + { + int a_s, a_e, b_s, b_e; + if (step == 0) + { + a_s = std::max(a1, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b2, 0); + b_e = std::min(b3, ylen - 1); + } + else + { + a_s = std::max(a2, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b1, 0); + b_e = std::min(b2 - 1, ylen - 1); + } - if (dvar >= disCut) { // Twist detected, start new block - blocks.push_back(curr_block); - curr_block.afps.clear(); - curr_block.dvars.clear(); - curr_block.afps.push_back(curr); - curr_block.dvars.push_back(0.0); - } else { - curr_block.afps.push_back(curr); - curr_block.dvars.push_back(dvar); // Record dvar for SplitBlock later - } - } - if (!curr_block.afps.empty()) blocks.push_back(curr_block); - - double local_badRmsd = 4.0; // FATCAT's defined threshold - - // --- Phase 2: SplitBlock (Exact FATCAT Logic) --- - // Finds the block with highest RMSD > 4.0 and cuts it at the connection with max dvar - bool splitted = true; - while (splitted && blocks.size() < (size_t)(max_twists + 1)) { - splitted = false; - double max_rmsd = 0.0; - int target_b = -1; - - for (size_t b = 0; b < blocks.size(); b++) { - if (blocks[b].afps.size() > 2) { - double cur_rmsd = calc_block_rmsd(blocks[b].afps); - if (cur_rmsd > max_rmsd) { - max_rmsd = cur_rmsd; - target_b = b; + if (b_s >= ylen || b_e < 0) + continue; + for (int prev_i = a_s; prev_i <= a_e; prev_i++) + { + int s1 = afp_aft_index[prev_i * ylen + b_s]; + int s2 = afp_bef_index[prev_i * ylen + b_e]; + if (s1 != -1 && s2 != -1 && s1 <= s2) + for (int s = s1; s <= s2; s++) + valid_prevs.push_back(s); } } - } - - if (max_rmsd >= local_badRmsd && target_b != -1) { - double max_t = 0; - int cut_idx = 0; - for (size_t i = 1; i < blocks[target_b].afps.size(); i++) { - if (blocks[target_b].dvars[i] > max_t) { - max_t = blocks[target_b].dvars[i]; - cut_idx = i; + + double curr_sco = merged_afps[m].score; + for (size_t v = 0; v < valid_prevs.size(); v++) + { + int prev = valid_prevs[v]; + int prev_twi = twi[prev]; + if (prev_twi > max_twists) + continue; + + int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); + int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); + int m_gap = std::max(gap_i, gap_j); + + double gp = 0.0; + // SWITCH: Apply different gap penalties based on the chosen logic + if (logic_type == 0) + { + // FATCAT Original Logic (Reward allowed if m_gap < 0) + gp = gap_ext * m_gap; + } + else + { + // Strict Penalty Logic (No rewards for overlaps) + int m_mis = 0; + if (gap_i < 0 || gap_j < 0) + m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + gp = gap_ext * m_mis; + if (m_gap > 0) + gp += gap_ext * m_gap; + } + + if (gp < max_penalty) + gp = max_penalty; + + // Fast distance variation check + double rms_sq = 0; + for (int k = 0; k < fragLen; k++) + { + for (int l = 0; l < fragLen; l++) + { + double dist1, dist2; + int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } + } + + double tp = 0.0; + int is_twist = 0; + if (rms_sq >= afp_dis_cut) + { + tp = twist_pen; + is_twist = 1; + } + else + { + double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); + if (dvar > disCut - disSmooth) + tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); + } + + if (prev_twi + is_twist > max_twists) + continue; + + double stmp = sco[prev] + curr_sco + tp + gp; + if (stmp > sco[m]) + { + sco[m] = stmp; + pre[m] = prev; + twi[m] = prev_twi + is_twist; } - } - - if (cut_idx > 0) { - // Execute split at cut_idx - Block right_blk; - right_blk.afps.assign(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); - right_blk.dvars.assign(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); - right_blk.dvars[0] = 0.0; // Clean the break point - - blocks[target_b].afps.erase(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); - blocks[target_b].dvars.erase(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); - - blocks.insert(blocks.begin() + target_b + 1, right_blk); - splitted = true; } } - } - // --- Phase 3: DeleteBlock (Exact FATCAT Logic) --- - // Remove isolated single-AFP blocks that don't span enough length - for (int b = 0; b < (int)blocks.size(); b++) { - if (blocks[b].afps.size() <= 1) { - int e1 = (b < (int)blocks.size() - 1) ? blocks[b+1].afps.front().i : xlen; - int e2 = (b < (int)blocks.size() - 1) ? blocks[b+1].afps.front().j : ylen; - int b1 = (b > 0) ? blocks[b-1].afps.back().i + blocks[b-1].afps.back().len : 0; - int b2 = (b > 0) ? blocks[b-1].afps.back().j + blocks[b-1].afps.back().len : 0; - int span = std::min(e1 - b1, e2 - b2); - if (span < 2 * fragLen) { - blocks.erase(blocks.begin() + b); - b--; + int best_m = 0; + for (int m = 1; m < n_afps; m++) + if (sco[m] > sco[best_m]) + best_m = m; + + std::vector path; + int curr_m = best_m; + while (curr_m != -1) + { + path.push_back(curr_m); + curr_m = pre[curr_m]; + } + std::reverse(path.begin(), path.end()); + + // Return empty if no path + std::vector b1, b2; + if (path.empty()) + return std::make_pair(b1, b2); + + // --- Step 4 Execution (Block Splitting & Merging) --- + struct Block + { + std::vector afps; + std::vector dvars; + }; + std::vector blocks; + Block curr_block; + curr_block.afps.push_back(merged_afps[path[0]]); + curr_block.dvars.push_back(0.0); + + for (size_t k = 1; k < path.size(); k++) + { + FATCAT_AFP curr = merged_afps[path[k]]; + FATCAT_AFP prv = merged_afps[path[k - 1]]; + double dvar = get_dvar(prv, curr); + + if (dvar >= disCut) + { + blocks.push_back(curr_block); + curr_block.afps.clear(); + curr_block.dvars.clear(); + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(0.0); + } + else + { + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(dvar); } } - } + if (!curr_block.afps.empty()) + blocks.push_back(curr_block); + + double local_badRmsd = 4.0; + + // SplitBlock + bool splitted = true; + while (splitted && blocks.size() < (size_t)(max_twists + 1)) + { + splitted = false; + double max_rmsd = 0.0; + int target_b = -1; - // --- Phase 4: MergeBlock (Exact FATCAT Logic) --- - // Re-merge adjacent blocks if their combined rigid body RMSD is < 4.0 - bool merged = true; - while (merged && blocks.size() > 1) { - merged = false; - double min_rmsd = 1e9; - int min_b = -1; - for (size_t b = 0; b < blocks.size() - 1; b++) { - std::vector temp_merged = blocks[b].afps; - temp_merged.insert(temp_merged.end(), blocks[b+1].afps.begin(), blocks[b+1].afps.end()); - double cur_rmsd = calc_block_rmsd(temp_merged); - if (cur_rmsd < min_rmsd) { - min_rmsd = cur_rmsd; - min_b = b; + for (size_t b = 0; b < blocks.size(); b++) + { + if (blocks[b].afps.size() > 2) + { + double cur_rmsd = calc_block_rmsd(blocks[b].afps); + if (cur_rmsd > max_rmsd) + { + max_rmsd = cur_rmsd; + target_b = b; + } + } + } + + if (max_rmsd >= local_badRmsd && target_b != -1) + { + double max_t = 0; + int cut_idx = 0; + for (size_t i = 1; i < blocks[target_b].afps.size(); i++) + { + if (blocks[target_b].dvars[i] > max_t) + { + max_t = blocks[target_b].dvars[i]; + cut_idx = i; + } + } + + if (cut_idx > 0) + { + Block right_blk; + right_blk.afps.assign(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); + right_blk.dvars.assign(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); + right_blk.dvars[0] = 0.0; + + blocks[target_b].afps.erase(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); + blocks[target_b].dvars.erase(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); + + blocks.insert(blocks.begin() + target_b + 1, right_blk); + splitted = true; + } } } - - if (min_rmsd < local_badRmsd && min_b != -1) { - blocks[min_b].afps.insert(blocks[min_b].afps.end(), blocks[min_b+1].afps.begin(), blocks[min_b+1].afps.end()); - blocks.erase(blocks.begin() + min_b + 1); - merged = true; + + // DeleteBlock + for (int b = 0; b < (int)blocks.size(); b++) + { + if (blocks[b].afps.size() <= 1) + { + int e1 = (b < (int)blocks.size() - 1) ? blocks[b + 1].afps.front().i : xlen; + int e2 = (b < (int)blocks.size() - 1) ? blocks[b + 1].afps.front().j : ylen; + int b1 = (b > 0) ? blocks[b - 1].afps.back().i + blocks[b - 1].afps.back().len : 0; + int b2 = (b > 0) ? blocks[b - 1].afps.back().j + blocks[b - 1].afps.back().len : 0; + int span = std::min(e1 - b1, e2 - b2); + if (span < 2 * fragLen) + { + blocks.erase(blocks.begin() + b); + b--; + } + } } - } - // --- Phase 5: Build strictly contiguous boundaries for Step 5 TM-align --- - // Applies US-align's overlap removal (skip) and midpoint boundary assignment - struct Region { int s1, e1, s2, e2; }; - std::vector real_blocks; - int last_i = 0, last_j = 0; - - for (size_t b = 0; b < blocks.size(); b++) { - int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; - for (size_t a = 0; a < blocks[b].afps.size(); a++) { - FATCAT_AFP afp = blocks[b].afps[a]; - - // Core safety: strip overlaps to ensure monotonicity - int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); - if (skip >= afp.len) continue; - - int eff_i = afp.i + skip; - int eff_j = afp.j + skip; - int eff_L = afp.len - skip; - - if (b_s1 == -1) { b_s1 = eff_i; b_s2 = eff_j; } - b_e1 = eff_i + eff_L; - b_e2 = eff_j + eff_L; - last_i = b_e1; - last_j = b_e2; + // MergeBlock + bool merged = true; + while (merged && blocks.size() > 1) + { + merged = false; + double min_rmsd = 1e9; + int min_b = -1; + for (size_t b = 0; b < blocks.size() - 1; b++) + { + std::vector temp_merged = blocks[b].afps; + temp_merged.insert(temp_merged.end(), blocks[b + 1].afps.begin(), blocks[b + 1].afps.end()); + double cur_rmsd = calc_block_rmsd(temp_merged); + if (cur_rmsd < min_rmsd) + { + min_rmsd = cur_rmsd; + min_b = b; + } + } + + if (min_rmsd < local_badRmsd && min_b != -1) + { + blocks[min_b].afps.insert(blocks[min_b].afps.end(), blocks[min_b + 1].afps.begin(), blocks[min_b + 1].afps.end()); + blocks.erase(blocks.begin() + min_b + 1); + merged = true; + } } - if (b_s1 != -1) { - // Keep block only if its "non-overlapping" core is large enough - if (b_e1 - b_s1 >= min_block_len && b_e2 - b_s2 >= min_block_len) { + + // Compile contiguous boundaries + std::vector real_blocks; + int last_i = 0, last_j = 0; + for (size_t b = 0; b < blocks.size(); b++) + { + int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; + for (size_t a = 0; a < blocks[b].afps.size(); a++) + { + FATCAT_AFP afp = blocks[b].afps[a]; + int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); + if (skip >= afp.len) + continue; + + int eff_i = afp.i + skip; + int eff_j = afp.j + skip; + int eff_L = afp.len - skip; + + if (b_s1 == -1) + { + b_s1 = eff_i; + b_s2 = eff_j; + } + b_e1 = eff_i + eff_L; + b_e2 = eff_j + eff_L; + last_i = b_e1; + last_j = b_e2; + } + if (b_s1 != -1) + { + if (b_e1 - b_s1 >= 3 && b_e2 - b_s2 >= 3) { Region r = {b_s1, b_e1, b_s2, b_e2}; real_blocks.push_back(r); + } } } - } - - if (real_blocks.empty()) return 0; - std::vector bounds1, bounds2; - bounds1.push_back(0); - bounds2.push_back(0); - for (size_t k = 0; k < real_blocks.size() - 1; k++) { - // Find exact midpoints between valid, non-overlapping blocks - bounds1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); - bounds2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); - } - bounds1.push_back(xlen); - bounds2.push_back(ylen); - + if (real_blocks.empty()) + return std::make_pair(b1, b2); + + b1.push_back(0); + b2.push_back(0); + for (size_t k = 0; k < real_blocks.size() - 1; k++) + { + b1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); + b2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); + } + b1.push_back(xlen); + b2.push_back(ylen); + + return std::make_pair(b1, b2); + }; + // ========================================== - // [DEBUG] TEMPORARY DEBUG OUTPUT + // Run competing strategies and select the winner // ========================================== - cout << "\n========================================" << endl; - cout << "PDB1 Interval: "; - for (size_t k = 0; k < bounds1.size() - 1; k++) + auto bounds_fatcat = run_dp_and_split(0); // FATCAT Reward Logic + auto bounds_strict = run_dp_and_split(1); // Strict Penalty Logic + + std::vector bounds1, bounds2; + int domains_fatcat = bounds_fatcat.first.empty() ? 0 : bounds_fatcat.first.size() - 1; + int domains_strict = bounds_strict.first.empty() ? 0 : bounds_strict.first.size() - 1; + + // We favor the logic that produces a higher number of rigid domains (blocks) + if (domains_strict > domains_fatcat) { - cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; - if (k < bounds1.size() - 2) - cout << ","; + bounds1 = bounds_strict.first; + bounds2 = bounds_strict.second; } - cout << "\nPDB2 Interval: "; - for (size_t k = 0; k < bounds2.size() - 1; k++) + else { - cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; - if (k < bounds2.size() - 2) - cout << ","; + bounds1 = bounds_fatcat.first; + bounds2 = bounds_fatcat.second; } - cout << "\n========================================\n" - << endl; + + if (bounds1.empty()) + return 0; + + // ========================================== + // [DEBUG] TEMPORARY DEBUG OUTPUT + // ========================================== + // cout << "\n========================================" << endl; + // cout << "PDB1 Interval: "; + // for (size_t k = 0; k < bounds1.size() - 1; k++) + // { + // cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; + // if (k < bounds1.size() - 2) + // cout << ","; + // } + // cout << "\nPDB2 Interval: "; + // for (size_t k = 0; k < bounds2.size() - 1; k++) + // { + // cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; + // if (k < bounds2.size() - 2) + // cout << ","; + // } + // cout << "\n========================================\n" + // << endl; // ========================================== // Step 5: Iteratively align each block using TRUE flexalign_best logic From 938dda2035e020dffeb12f9f8b6ebb08187ce997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Sat, 16 May 2026 13:56:43 +0800 Subject: [PATCH 14/23] -mm 10 polish --- USalign.cpp | 114 ++++++++-------------------------------------------- 1 file changed, 16 insertions(+), 98 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index eb62c4b..34e4d85 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2981,7 +2981,7 @@ int flexalign_fatcat_main(double **xa, double **ya, // ========================================== // OPTIMIZATION 1: Precompute local intra-protein distance matrices - // This entirely eliminates the costly sqrt() calls inside the O(N^2 * fragLen^2) DP loop. + // Utilizes basic_fun.h dist() function to replace manual distance calculation // ========================================== int max_dist_window = max_gap + 2 * fragLen + 1; std::vector> disTable1(xlen, std::vector(max_dist_window, 0.0)); @@ -2991,20 +2991,16 @@ int flexalign_fatcat_main(double **xa, double **ya, { for (int j = i; j < std::min(xlen, i + max_dist_window); j++) { - double dx = xa[i][0] - xa[j][0]; - double dy = xa[i][1] - xa[j][1]; - double dz = xa[i][2] - xa[j][2]; - disTable1[i][j - i] = std::sqrt(dx * dx + dy * dy + dz * dz); + // Use dist() from basic_fun.h which computes squared distance + disTable1[i][j - i] = std::sqrt(dist(xa[i], xa[j])); } } for (int i = 0; i < ylen; i++) { for (int j = i; j < std::min(ylen, i + max_dist_window); j++) { - double dx = ya[i][0] - ya[j][0]; - double dy = ya[i][1] - ya[j][1]; - double dz = ya[i][2] - ya[j][2]; - disTable2[i][j - i] = std::sqrt(dx * dx + dy * dy + dz * dz); + // Use dist() from basic_fun.h + disTable2[i][j - i] = std::sqrt(dist(ya[i], ya[j])); } } @@ -3071,7 +3067,6 @@ int flexalign_fatcat_main(double **xa, double **ya, // ========================================== // Step 2: Merge diagonal AFPs // ========================================== - // OPTIMIZATION 2: Flat vector instead of std::map int max_diagonal_idx = xlen + ylen + 1; std::vector> diagonals(max_diagonal_idx); for (size_t k = 0; k < initial_afps.size(); k++) @@ -3080,8 +3075,6 @@ int flexalign_fatcat_main(double **xa, double **ya, } std::vector merged_afps; - - // OPTIMIZATION 4: Pre-allocate max buffers for merge checking int max_merge_len = std::min(xlen, ylen); double **r1_merge, **r2_merge; NewArray(&r1_merge, max_merge_len, 3); @@ -3093,7 +3086,6 @@ int flexalign_fatcat_main(double **xa, double **ya, continue; std::vector &group = diagonals[d]; - // OPTIMIZATION 3: O(N log N) std::sort instead of O(N^2) loops std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) { return a.i < b.i; }); @@ -3114,7 +3106,6 @@ int flexalign_fatcat_main(double **xa, double **ya, { int new_len = (nxt.i + nxt.len) - curr.i; - // Directly use pre-allocated buffers for (int k = 0; k < new_len; k++) { r1_merge[k][0] = xa[curr.i + k][0]; @@ -3149,7 +3140,6 @@ int flexalign_fatcat_main(double **xa, double **ya, DeleteArray(&r1_merge, max_merge_len); DeleteArray(&r2_merge, max_merge_len); - // Sort final merged afps std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) { if (a.i == b.i) return a.j < b.j; @@ -3161,17 +3151,10 @@ int flexalign_fatcat_main(double **xa, double **ya, // ========================================== // Step 3 & 4: Dual Dynamic Programming and Domain Splitting - // We run two competing gap penalty logics: - // 1. Original FATCAT (allows overlaps/rewards) - // 2. Strict Penalty (penalizes overlaps) - // The winner is the one that identifies more rigid domains (blocks). // ========================================== - - // OPTIMIZATION 5: Flat 1D vectors for 2D DP cache (Pre-computation) std::vector afp_aft_index(xlen * ylen, -1); std::vector afp_bef_index(xlen * ylen, -1); - // Flat mapping instead of std::map std::vector>> i_to_j(xlen); for (int m = 0; m < n_afps; m++) { @@ -3206,7 +3189,6 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - // Lambda 1: Calculate Distance Variation (dvar) exactly as FATCAT's CalAfpDis auto get_dvar = [&](const FATCAT_AFP &prv, const FATCAT_AFP &curr) -> double { double rms_sq = 0; @@ -3231,11 +3213,10 @@ int flexalign_fatcat_main(double **xa, double **ya, } } if (rms_sq > afp_dis_cut) - return 1e9; // Trigger twist + return 1e9; return std::sqrt(rms_sq / (fragLen * fragLen)); }; - // Lambda 2: Calculate fast rigid-body Kabsch RMSD for a block auto calc_block_rmsd = [&](const std::vector &afp_list) -> double { std::vector r1, r2; @@ -3275,8 +3256,6 @@ int flexalign_fatcat_main(double **xa, double **ya, int s1, e1, s2, e2; }; - // Lambda 3: The core DP and Splitting logic, taking logic_type as parameter - // logic_type 0 = FATCAT original (allows reward), logic_type 1 = Strict Penalty auto run_dp_and_split = [&](int logic_type) -> std::pair, std::vector> { std::vector sco(n_afps); @@ -3285,7 +3264,6 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int m = 0; m < n_afps; m++) sco[m] = merged_afps[m].score; - // --- Step 3 Execution --- for (int m = 0; m < n_afps; m++) { int curr_i = merged_afps[m].i; @@ -3341,15 +3319,12 @@ int flexalign_fatcat_main(double **xa, double **ya, int m_gap = std::max(gap_i, gap_j); double gp = 0.0; - // SWITCH: Apply different gap penalties based on the chosen logic if (logic_type == 0) { - // FATCAT Original Logic (Reward allowed if m_gap < 0) gp = gap_ext * m_gap; } else { - // Strict Penalty Logic (No rewards for overlaps) int m_mis = 0; if (gap_i < 0 || gap_j < 0) m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; @@ -3361,7 +3336,6 @@ int flexalign_fatcat_main(double **xa, double **ya, if (gp < max_penalty) gp = max_penalty; - // Fast distance variation check double rms_sq = 0; for (int k = 0; k < fragLen; k++) { @@ -3425,12 +3399,10 @@ int flexalign_fatcat_main(double **xa, double **ya, } std::reverse(path.begin(), path.end()); - // Return empty if no path std::vector b1, b2; if (path.empty()) return std::make_pair(b1, b2); - // --- Step 4 Execution (Block Splitting & Merging) --- struct Block { std::vector afps; @@ -3466,7 +3438,6 @@ int flexalign_fatcat_main(double **xa, double **ya, double local_badRmsd = 4.0; - // SplitBlock bool splitted = true; while (splitted && blocks.size() < (size_t)(max_twists + 1)) { @@ -3516,7 +3487,6 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - // DeleteBlock for (int b = 0; b < (int)blocks.size(); b++) { if (blocks[b].afps.size() <= 1) @@ -3534,7 +3504,6 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - // MergeBlock bool merged = true; while (merged && blocks.size() > 1) { @@ -3561,7 +3530,6 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - // Compile contiguous boundaries std::vector real_blocks; int last_i = 0, last_j = 0; for (size_t b = 0; b < blocks.size(); b++) @@ -3613,17 +3581,13 @@ int flexalign_fatcat_main(double **xa, double **ya, return std::make_pair(b1, b2); }; - // ========================================== - // Run competing strategies and select the winner - // ========================================== - auto bounds_fatcat = run_dp_and_split(0); // FATCAT Reward Logic - auto bounds_strict = run_dp_and_split(1); // Strict Penalty Logic + auto bounds_fatcat = run_dp_and_split(0); + auto bounds_strict = run_dp_and_split(1); std::vector bounds1, bounds2; int domains_fatcat = bounds_fatcat.first.empty() ? 0 : bounds_fatcat.first.size() - 1; int domains_strict = bounds_strict.first.empty() ? 0 : bounds_strict.first.size() - 1; - // We favor the logic that produces a higher number of rigid domains (blocks) if (domains_strict > domains_fatcat) { bounds1 = bounds_strict.first; @@ -3638,32 +3602,10 @@ int flexalign_fatcat_main(double **xa, double **ya, if (bounds1.empty()) return 0; - // ========================================== - // [DEBUG] TEMPORARY DEBUG OUTPUT - // ========================================== - // cout << "\n========================================" << endl; - // cout << "PDB1 Interval: "; - // for (size_t k = 0; k < bounds1.size() - 1; k++) - // { - // cout << (bounds1[k] + 1) << "-" << bounds1[k + 1]; - // if (k < bounds1.size() - 2) - // cout << ","; - // } - // cout << "\nPDB2 Interval: "; - // for (size_t k = 0; k < bounds2.size() - 1; k++) - // { - // cout << (bounds2[k] + 1) << "-" << bounds2[k + 1]; - // if (k < bounds2.size() - 2) - // cout << ","; - // } - // cout << "\n========================================\n" - // << endl; - // ========================================== // Step 5: Iteratively align each block using TRUE flexalign_best logic // ========================================== std::string global_seqM = "", global_seqxA = "", global_seqyA = ""; - // OPTIMIZATION 6: String capacity reservation to avoid reallocation overhead global_seqM.reserve(xlen + ylen + max_gap); global_seqxA.reserve(xlen + ylen + max_gap); global_seqyA.reserve(xlen + ylen + max_gap); @@ -3678,7 +3620,6 @@ int flexalign_fatcat_main(double **xa, double **ya, int L1_sub = x_e - x_s; int L2_sub = y_e - y_s; - // If the sub-region is too short, just fill with gaps if (L1_sub < 3 || L2_sub < 3) { for (int i = 0; i < L1_sub; i++) @@ -3696,7 +3637,6 @@ int flexalign_fatcat_main(double **xa, double **ya, continue; } - // Allocate memory for sub-structures double **xa_sub, **ya_sub; NewArray(&xa_sub, L1_sub, 3); NewArray(&ya_sub, L2_sub, 3); @@ -3705,7 +3645,6 @@ int flexalign_fatcat_main(double **xa, double **ya, char *secx_sub = new char[L1_sub + 1]; char *secy_sub = new char[L2_sub + 1]; - // Copy data for structure 1 for (int i = 0; i < L1_sub; i++) { xa_sub[i][0] = xa[x_s + i][0]; @@ -3717,7 +3656,6 @@ int flexalign_fatcat_main(double **xa, double **ya, seqx_sub[L1_sub] = '\0'; secx_sub[L1_sub] = '\0'; - // Copy data for structure 2 for (int i = 0; i < L2_sub; i++) { ya_sub[i][0] = ya[y_s + i][0]; @@ -3729,7 +3667,6 @@ int flexalign_fatcat_main(double **xa, double **ya, seqy_sub[L2_sub] = '\0'; secy_sub[L2_sub] = '\0'; - // Variables to store the best results for this sub-block double t0_best[3], u0_best[3][3]; double TM_best_max = -1.0; std::string seqM_best, seqxA_best, seqyA_best; @@ -3738,12 +3675,10 @@ int flexalign_fatcat_main(double **xa, double **ya, bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; std::vector local_sequence = sequence; - // Test different secondary structure options (flexalign_best behavior) for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) { FlexAlignResult cur_res; - // This ensures that the fallback compensation runs if too few hinges are found. execute_flexalign_with_fallback( xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, @@ -3766,7 +3701,7 @@ int flexalign_fatcat_main(double **xa, double **ya, tu_vec_best = cur_res.tu_vec; } } - // If alignment completely failed + if (TM_best_max < 0) { for (int i = 0; i < L1_sub; i++) @@ -3790,7 +3725,6 @@ int flexalign_fatcat_main(double **xa, double **ya, continue; } - // Ensure we have at least one valid transform matrix if (tu_vec_best.empty()) { std::vector tu_tmp(12); @@ -3798,7 +3732,6 @@ int flexalign_fatcat_main(double **xa, double **ya, tu_vec_best.push_back(tu_tmp); } - // Incorporate local transformation matrices into the global list int base_tu_idx = tu_vec.size(); for (size_t m = 0; m < tu_vec_best.size(); m++) { @@ -3806,17 +3739,12 @@ int flexalign_fatcat_main(double **xa, double **ya, } int rx = x_s; - - // FIX: current_global_idx must stay outside the character loop - // to maintain the last known state across gaps or unaligned regions int current_global_idx = base_tu_idx; for (size_t i = 0; i < seqxA_best.length(); i++) { char c = seqM_best[i]; - // Parse US-align standard hinge characters to update current global matrix index - // Valid hinge chars are '0'-'9', 'a'-'z', 'A'-'Z'. Exclude spaces and weak alignment chars. if (c != ' ' && c != '.' && c != ':') { int local_hinge_idx = -1; @@ -3827,24 +3755,20 @@ int flexalign_fatcat_main(double **xa, double **ya, else if (c >= 'A' && c <= 'Z') local_hinge_idx = c - 'A' + 36; - // Safely update the global index tracking if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) { current_global_idx = base_tu_idx + local_hinge_idx; } } - // Assign the corresponding rotation matrix index to the original residue if (seqxA_best[i] != '-') { global_res_tu[rx] = current_global_idx; rx++; } - // Construct the final stitched string format correctly if (seqxA_best[i] != '-' && seqyA_best[i] != '-') { - // Only assign a global hinge character if the local alignment considered it a true match if (c != ' ' && c != '.' && c != ':') { char global_c; @@ -3855,28 +3779,25 @@ int flexalign_fatcat_main(double **xa, double **ya, else if (current_global_idx < 62) global_c = 'A' + (current_global_idx - 36); else - global_c = '*'; // Fallback if hinges exceed 62 + global_c = '*'; seqM_best[i] = global_c; } else { - // Preserve weak matches ('.' or ':') or spaces seqM_best[i] = c; } } else { - seqM_best[i] = ' '; // Ensure gap positions correctly get space + seqM_best[i] = ' '; } } - // Append to the global alignment strings global_seqM += seqM_best; global_seqxA += seqxA_best; global_seqyA += seqyA_best; - // Clean up sub-block memory DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); delete[] seqx_sub; @@ -3887,6 +3808,7 @@ int flexalign_fatcat_main(double **xa, double **ya, // ========================================== // Step 6: Recalculate global metrics correctly + // Utilizing basic_fun.h transform() and dist() functions for matrix rotations and distance // ========================================== seqM = global_seqM; seqxA = global_seqxA; @@ -3909,7 +3831,7 @@ int flexalign_fatcat_main(double **xa, double **ya, } TM1 = TM2 = TM3 = TM4 = TM5 = rmsd0 = 0.0; - Liden = 0.0; // FIX: Reset to absolute 0 + Liden = 0.0; n_ali8 = 0; n_ali = 0; do_vec.clear(); @@ -3929,11 +3851,11 @@ int flexalign_fatcat_main(double **xa, double **ya, double t_k[3], u_k[3][3]; tu2t_u(tu_vec[matrix_idx], t_k, u_k); + // Use the transform() from basic_fun.h double x_rot[3]; - x_rot[0] = t_k[0] + u_k[0][0] * xa[i_res][0] + u_k[0][1] * xa[i_res][1] + u_k[0][2] * xa[i_res][2]; - x_rot[1] = t_k[1] + u_k[1][0] * xa[i_res][0] + u_k[1][1] * xa[i_res][1] + u_k[1][2] * xa[i_res][2]; - x_rot[2] = t_k[2] + u_k[2][0] * xa[i_res][0] + u_k[2][1] * xa[i_res][1] + u_k[2][2] * xa[i_res][2]; + transform(t_k, u_k, xa[i_res], x_rot); + // Use the dist() from basic_fun.h double dist2 = dist(x_rot, ya[j_res]); double d = std::sqrt(dist2); @@ -3954,8 +3876,6 @@ int flexalign_fatcat_main(double **xa, double **ya, rmsd0 += dist2; n_ali8++; - // FIX: ONLY increment Liden if the pair is structurally aligned (d <= d0_out) - // This matches the denominator (n_ali8) used in output_flexalign_results. if (seqx[i_res] == seqy[j_res]) { Liden += 1.0; @@ -3996,8 +3916,6 @@ int flexalign_fatcat_main(double **xa, double **ya, TM_ali = TM1; rmsd_ali = rmsd0; - // Notice: NO Division by L_ali here. We leave it as an absolute count! - if (!tu_vec.empty()) tu2t_u(tu_vec[0], t0, u0); From 990b3e5a089cbf5e30c8ce4ca6a7fc6f8816965f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Tue, 19 May 2026 09:35:40 +0800 Subject: [PATCH 15/23] -mm 10 Run through both sets of bounds --- USalign.cpp | 605 +++++++++++++++++++++++++++++----------------------- 1 file changed, 337 insertions(+), 268 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 34e4d85..33f9fe3 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -3584,340 +3584,409 @@ int flexalign_fatcat_main(double **xa, double **ya, auto bounds_fatcat = run_dp_and_split(0); auto bounds_strict = run_dp_and_split(1); - std::vector bounds1, bounds2; - int domains_fatcat = bounds_fatcat.first.empty() ? 0 : bounds_fatcat.first.size() - 1; - int domains_strict = bounds_strict.first.empty() ? 0 : bounds_strict.first.size() - 1; - - if (domains_strict > domains_fatcat) - { - bounds1 = bounds_strict.first; - bounds2 = bounds_strict.second; - } - else - { - bounds1 = bounds_fatcat.first; - bounds2 = bounds_fatcat.second; - } - - if (bounds1.empty()) - return 0; - // ========================================== - // Step 5: Iteratively align each block using TRUE flexalign_best logic + // NEW LOGIC: Run through both sets of bounds (fatcat and strict) + // Calculate final TM-scores for both, and select the one with the higher TM-score. // ========================================== - std::string global_seqM = "", global_seqxA = "", global_seqyA = ""; - global_seqM.reserve(xlen + ylen + max_gap); - global_seqxA.reserve(xlen + ylen + max_gap); - global_seqyA.reserve(xlen + ylen + max_gap); - - tu_vec.clear(); - std::vector global_res_tu(xlen, -1); - - for (size_t k = 0; k < bounds1.size() - 1; k++) + std::vector, std::vector>> all_bounds; + + // Add the loose logic bounds + all_bounds.push_back(bounds_fatcat); + + // Add the strict logic bounds only if they differ from the loose one to save computation time + if (bounds_strict.first != bounds_fatcat.first || bounds_strict.second != bounds_fatcat.second) { - int x_s = bounds1[k], x_e = bounds1[k + 1]; - int y_s = bounds2[k], y_e = bounds2[k + 1]; - int L1_sub = x_e - x_s; - int L2_sub = y_e - y_s; - - if (L1_sub < 3 || L2_sub < 3) - { - for (int i = 0; i < L1_sub; i++) - { - global_seqxA += seqx[x_s + i]; - global_seqyA += '-'; - global_seqM += ' '; - } - for (int i = 0; i < L2_sub; i++) - { - global_seqxA += '-'; - global_seqyA += seqy[y_s + i]; - global_seqM += ' '; - } - continue; - } - - double **xa_sub, **ya_sub; - NewArray(&xa_sub, L1_sub, 3); - NewArray(&ya_sub, L2_sub, 3); - char *seqx_sub = new char[L1_sub + 1]; - char *seqy_sub = new char[L2_sub + 1]; - char *secx_sub = new char[L1_sub + 1]; - char *secy_sub = new char[L2_sub + 1]; + all_bounds.push_back(bounds_strict); + } - for (int i = 0; i < L1_sub; i++) - { - xa_sub[i][0] = xa[x_s + i][0]; - xa_sub[i][1] = xa[x_s + i][1]; - xa_sub[i][2] = xa[x_s + i][2]; - seqx_sub[i] = seqx[x_s + i]; - secx_sub[i] = secx[x_s + i]; - } - seqx_sub[L1_sub] = '\0'; - secx_sub[L1_sub] = '\0'; + // Variables to store the best global metrics across all tested bounds + double best_global_max_TM = -1.0; + std::vector> best_tu_vec; + double best_t0[3], best_u0[3][3]; + double best_TM1 = 0.0, best_TM2 = 0.0, best_TM3 = 0.0, best_TM4 = 0.0, best_TM5 = 0.0; + double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; + int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; + std::string best_seqM = "", best_seqxA = "", best_seqyA = ""; + std::vector best_do_vec; + + // To store best normalization factors + double best_d0A = 0.0, best_d0B = 0.0, best_d0a = 0.0, best_d0u = 0.0; + + // Loop through both bound sets + for (size_t b_idx = 0; b_idx < all_bounds.size(); b_idx++) + { + std::vector& bounds1 = all_bounds[b_idx].first; + std::vector& bounds2 = all_bounds[b_idx].second; - for (int i = 0; i < L2_sub; i++) - { - ya_sub[i][0] = ya[y_s + i][0]; - ya_sub[i][1] = ya[y_s + i][1]; - ya_sub[i][2] = ya[y_s + i][2]; - seqy_sub[i] = seqy[y_s + i]; - secy_sub[i] = secy[y_s + i]; - } - seqy_sub[L2_sub] = '\0'; - secy_sub[L2_sub] = '\0'; + // Skip if no valid bounds were found in this logic + if (bounds1.empty()) continue; - double t0_best[3], u0_best[3][3]; - double TM_best_max = -1.0; - std::string seqM_best, seqxA_best, seqyA_best; - std::vector> tu_vec_best; + // ========================================== + // Step 5: Iteratively align each block using TRUE flexalign_best logic + // ========================================== + std::string cur_global_seqM = "", cur_global_seqxA = "", cur_global_seqyA = ""; + cur_global_seqM.reserve(xlen + ylen + max_gap); + cur_global_seqxA.reserve(xlen + ylen + max_gap); + cur_global_seqyA.reserve(xlen + ylen + max_gap); - bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; - std::vector local_sequence = sequence; + std::vector> cur_tu_vec; + std::vector cur_global_res_tu(xlen, -1); - for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + for (size_t k = 0; k < bounds1.size() - 1; k++) { - FlexAlignResult cur_res; + int x_s = bounds1[k], x_e = bounds1[k + 1]; + int y_s = bounds2[k], y_e = bounds2[k + 1]; + int L1_sub = x_e - x_s; + int L2_sub = y_e - y_s; - execute_flexalign_with_fallback( - xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, - L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_type, hinge_opt, cur_ss_opt, cur_res); - - double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; - if (cur_max_TM > TM_best_max) + // If the block is too short, just assign gaps + if (L1_sub < 3 || L2_sub < 3) { - TM_best_max = cur_max_TM; - for (int a = 0; a < 3; a++) + for (int i = 0; i < L1_sub; i++) { - t0_best[a] = cur_res.t0[a]; - for (int b = 0; b < 3; b++) - u0_best[a][b] = cur_res.u0[a][b]; + cur_global_seqxA += seqx[x_s + i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; } - seqM_best = cur_res.seqM; - seqxA_best = cur_res.seqxA; - seqyA_best = cur_res.seqyA; - tu_vec_best = cur_res.tu_vec; + for (int i = 0; i < L2_sub; i++) + { + cur_global_seqxA += '-'; + cur_global_seqyA += seqy[y_s + i]; + cur_global_seqM += ' '; + } + continue; } - } - if (TM_best_max < 0) - { + // Extract sub-sequences and coordinates + double **xa_sub, **ya_sub; + NewArray(&xa_sub, L1_sub, 3); + NewArray(&ya_sub, L2_sub, 3); + char *seqx_sub = new char[L1_sub + 1]; + char *seqy_sub = new char[L2_sub + 1]; + char *secx_sub = new char[L1_sub + 1]; + char *secy_sub = new char[L2_sub + 1]; + for (int i = 0; i < L1_sub; i++) { - global_seqxA += seqx_sub[i]; - global_seqyA += '-'; - global_seqM += ' '; + xa_sub[i][0] = xa[x_s + i][0]; + xa_sub[i][1] = xa[x_s + i][1]; + xa_sub[i][2] = xa[x_s + i][2]; + seqx_sub[i] = seqx[x_s + i]; + secx_sub[i] = secx[x_s + i]; } + seqx_sub[L1_sub] = '\0'; + secx_sub[L1_sub] = '\0'; + for (int i = 0; i < L2_sub; i++) { - global_seqxA += '-'; - global_seqyA += seqy_sub[i]; - global_seqM += ' '; + ya_sub[i][0] = ya[y_s + i][0]; + ya_sub[i][1] = ya[y_s + i][1]; + ya_sub[i][2] = ya[y_s + i][2]; + seqy_sub[i] = seqy[y_s + i]; + secy_sub[i] = secy[y_s + i]; } - DeleteArray(&xa_sub, L1_sub); - DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; - delete[] seqy_sub; - delete[] secx_sub; - delete[] secy_sub; - continue; - } + seqy_sub[L2_sub] = '\0'; + secy_sub[L2_sub] = '\0'; - if (tu_vec_best.empty()) - { - std::vector tu_tmp(12); - t_u2tu(t0_best, u0_best, tu_tmp); - tu_vec_best.push_back(tu_tmp); - } + double t0_best[3], u0_best[3][3]; + double TM_best_max = -1.0; + std::string seqM_best, seqxA_best, seqyA_best; + std::vector> tu_vec_best; - int base_tu_idx = tu_vec.size(); - for (size_t m = 0; m < tu_vec_best.size(); m++) - { - tu_vec.push_back(tu_vec_best[m]); - } + bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; + std::vector local_sequence = sequence; - int rx = x_s; - int current_global_idx = base_tu_idx; + // Iterate over Secondary Structure optimization choices + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + { + FlexAlignResult cur_res; - for (size_t i = 0; i < seqxA_best.length(); i++) - { - char c = seqM_best[i]; + execute_flexalign_with_fallback( + xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, + L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, cur_ss_opt, cur_res); - if (c != ' ' && c != '.' && c != ':') + double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; + if (cur_max_TM > TM_best_max) + { + TM_best_max = cur_max_TM; + for (int a = 0; a < 3; a++) + { + t0_best[a] = cur_res.t0[a]; + for (int b = 0; b < 3; b++) + u0_best[a][b] = cur_res.u0[a][b]; + } + seqM_best = cur_res.seqM; + seqxA_best = cur_res.seqxA; + seqyA_best = cur_res.seqyA; + tu_vec_best = cur_res.tu_vec; + } + } + + // Fallback for current block + if (TM_best_max < 0) { - int local_hinge_idx = -1; - if (c >= '0' && c <= '9') - local_hinge_idx = c - '0'; - else if (c >= 'a' && c <= 'z') - local_hinge_idx = c - 'a' + 10; - else if (c >= 'A' && c <= 'Z') - local_hinge_idx = c - 'A' + 36; - - if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) + for (int i = 0; i < L1_sub; i++) { - current_global_idx = base_tu_idx + local_hinge_idx; + cur_global_seqxA += seqx_sub[i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; } + for (int i = 0; i < L2_sub; i++) + { + cur_global_seqxA += '-'; + cur_global_seqyA += seqy_sub[i]; + cur_global_seqM += ' '; + } + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; + continue; } - if (seqxA_best[i] != '-') + // Ensure tu_vec_best is populated + if (tu_vec_best.empty()) { - global_res_tu[rx] = current_global_idx; - rx++; + std::vector tu_tmp(12); + t_u2tu(t0_best, u0_best, tu_tmp); + tu_vec_best.push_back(tu_tmp); } - if (seqxA_best[i] != '-' && seqyA_best[i] != '-') + int base_tu_idx = cur_tu_vec.size(); + for (size_t m = 0; m < tu_vec_best.size(); m++) { + cur_tu_vec.push_back(tu_vec_best[m]); + } + + int rx = x_s; + int current_global_idx = base_tu_idx; + + // Merge current block sequences to the global sequence + for (size_t i = 0; i < seqxA_best.length(); i++) + { + char c = seqM_best[i]; + if (c != ' ' && c != '.' && c != ':') { - char global_c; - if (current_global_idx < 10) - global_c = '0' + current_global_idx; - else if (current_global_idx < 36) - global_c = 'a' + (current_global_idx - 10); - else if (current_global_idx < 62) - global_c = 'A' + (current_global_idx - 36); - else - global_c = '*'; + int local_hinge_idx = -1; + if (c >= '0' && c <= '9') + local_hinge_idx = c - '0'; + else if (c >= 'a' && c <= 'z') + local_hinge_idx = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + local_hinge_idx = c - 'A' + 36; + + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) + { + current_global_idx = base_tu_idx + local_hinge_idx; + } + } - seqM_best[i] = global_c; + if (seqxA_best[i] != '-') + { + cur_global_res_tu[rx] = current_global_idx; + rx++; + } + + if (seqxA_best[i] != '-' && seqyA_best[i] != '-') + { + if (c != ' ' && c != '.' && c != ':') + { + char global_c; + if (current_global_idx < 10) + global_c = '0' + current_global_idx; + else if (current_global_idx < 36) + global_c = 'a' + (current_global_idx - 10); + else if (current_global_idx < 62) + global_c = 'A' + (current_global_idx - 36); + else + global_c = '*'; + + seqM_best[i] = global_c; + } + else + { + seqM_best[i] = c; + } } else { - seqM_best[i] = c; + seqM_best[i] = ' '; } } - else - { - seqM_best[i] = ' '; - } - } - global_seqM += seqM_best; - global_seqxA += seqxA_best; - global_seqyA += seqyA_best; + cur_global_seqM += seqM_best; + cur_global_seqxA += seqxA_best; + cur_global_seqyA += seqyA_best; - DeleteArray(&xa_sub, L1_sub); - DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; - delete[] seqy_sub; - delete[] secx_sub; - delete[] secy_sub; - } - - // ========================================== - // Step 6: Recalculate global metrics correctly - // Utilizing basic_fun.h transform() and dist() functions for matrix rotations and distance - // ========================================== - seqM = global_seqM; - seqxA = global_seqxA; - seqyA = global_seqyA; - - d0A = 1.24 * std::pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (d0A < 0.5) - d0A = 0.5; - d0B = 1.24 * std::pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (d0B < 0.5) - d0B = 0.5; - d0a = 1.24 * std::pow((xlen + ylen) * 0.5 - 15.0, 1.0 / 3.0) - 1.8; - if (d0a < 0.5) - d0a = 0.5; - if (u_opt) - { - d0u = 1.24 * std::pow(Lnorm_ass - 15.0, 1.0 / 3.0) - 1.8; - if (d0u < 0.5) - d0u = 0.5; - } + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; + } - TM1 = TM2 = TM3 = TM4 = TM5 = rmsd0 = 0.0; - Liden = 0.0; - n_ali8 = 0; - n_ali = 0; - do_vec.clear(); + // ========================================== + // Step 6: Recalculate global metrics correctly for current DP boundary + // Utilizing basic_fun.h transform() and dist() functions for matrix rotations and distance + // ========================================== + double cur_d0A = 1.24 * std::pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; + if (cur_d0A < 0.5) cur_d0A = 0.5; + double cur_d0B = 1.24 * std::pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; + if (cur_d0B < 0.5) cur_d0B = 0.5; + double cur_d0a = 1.24 * std::pow((xlen + ylen) * 0.5 - 15.0, 1.0 / 3.0) - 1.8; + if (cur_d0a < 0.5) cur_d0a = 0.5; + + double cur_d0u = 0.0; + if (u_opt) + { + cur_d0u = 1.24 * std::pow(Lnorm_ass - 15.0, 1.0 / 3.0) - 1.8; + if (cur_d0u < 0.5) cur_d0u = 0.5; + } - int i_res = 0, j_res = 0; - for (size_t r = 0; r < seqxA.length(); r++) - { - bool x_valid = (seqxA[r] != '-'); - bool y_valid = (seqyA[r] != '-'); + double cur_TM1 = 0.0, cur_TM2 = 0.0, cur_TM3 = 0.0, cur_TM4 = 0.0, cur_TM5 = 0.0; + double cur_rmsd0 = 0.0, cur_Liden = 0.0; + int cur_n_ali8 = 0, cur_n_ali = 0; + std::vector cur_do_vec; - if (x_valid && y_valid) + int i_res = 0, j_res = 0; + for (size_t r = 0; r < cur_global_seqxA.length(); r++) { - int matrix_idx = global_res_tu[i_res]; + bool x_valid = (cur_global_seqxA[r] != '-'); + bool y_valid = (cur_global_seqyA[r] != '-'); - if (matrix_idx >= 0 && matrix_idx < tu_vec.size()) + if (x_valid && y_valid) { - double t_k[3], u_k[3][3]; - tu2t_u(tu_vec[matrix_idx], t_k, u_k); - - // Use the transform() from basic_fun.h - double x_rot[3]; - transform(t_k, u_k, xa[i_res], x_rot); - - // Use the dist() from basic_fun.h - double dist2 = dist(x_rot, ya[j_res]); - double d = std::sqrt(dist2); - - TM2 += 1.0 / (1.0 + dist2 / (d0B * d0B)); - TM1 += 1.0 / (1.0 + dist2 / (d0A * d0A)); - if (a_opt) - TM3 += 1.0 / (1.0 + dist2 / (d0a * d0a)); - if (u_opt) - TM4 += 1.0 / (1.0 + dist2 / (d0u * d0u)); - if (d_opt) - TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); - - n_ali++; - do_vec.push_back(d); - - if (d <= d0_out) + int matrix_idx = cur_global_res_tu[i_res]; + + if (matrix_idx >= 0 && matrix_idx < cur_tu_vec.size()) { - rmsd0 += dist2; - n_ali8++; + double t_k[3], u_k[3][3]; + tu2t_u(cur_tu_vec[matrix_idx], t_k, u_k); + + // Use the transform() and dist() from basic_fun.h + double x_rot[3]; + transform(t_k, u_k, xa[i_res], x_rot); + double dist2 = dist(x_rot, ya[j_res]); + double d = std::sqrt(dist2); + + cur_TM2 += 1.0 / (1.0 + dist2 / (cur_d0B * cur_d0B)); + cur_TM1 += 1.0 / (1.0 + dist2 / (cur_d0A * cur_d0A)); + if (a_opt) cur_TM3 += 1.0 / (1.0 + dist2 / (cur_d0a * cur_d0a)); + if (u_opt) cur_TM4 += 1.0 / (1.0 + dist2 / (cur_d0u * cur_d0u)); + if (d_opt) cur_TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); + + cur_n_ali++; + cur_do_vec.push_back(d); - if (seqx[i_res] == seqy[j_res]) + if (d <= d0_out) { - Liden += 1.0; + cur_rmsd0 += dist2; + cur_n_ali8++; + + if (seqx[i_res] == seqy[j_res]) + { + cur_Liden += 1.0; + } } } + else + { + cur_do_vec.push_back(-1); + } } else { - do_vec.push_back(-1); + cur_do_vec.push_back(-1); } + + if (x_valid) i_res++; + if (y_valid) j_res++; } + + // Normalize TM-scores + cur_TM2 /= xlen; + cur_TM1 /= ylen; + if (a_opt) cur_TM3 /= (xlen + ylen) * 0.5; + if (u_opt) cur_TM4 /= Lnorm_ass; + if (d_opt) cur_TM5 /= ylen; + + if (cur_n_ali8 > 0) + cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); else + cur_rmsd0 = 0.0; + + // Compare the current iteration against the best found so far + double cur_global_max_TM = (cur_TM1 > cur_TM2) ? cur_TM1 : cur_TM2; + + if (cur_global_max_TM > best_global_max_TM) { - do_vec.push_back(-1); + best_global_max_TM = cur_global_max_TM; + best_tu_vec = cur_tu_vec; + best_TM1 = cur_TM1; + best_TM2 = cur_TM2; + best_TM3 = cur_TM3; + best_TM4 = cur_TM4; + best_TM5 = cur_TM5; + best_rmsd0 = cur_rmsd0; + best_Liden = cur_Liden; + best_TM_ali = cur_TM1; + best_rmsd_ali = cur_rmsd0; + best_L_ali = cur_n_ali; + best_n_ali = cur_n_ali; + best_n_ali8 = cur_n_ali8; + best_seqM = cur_global_seqM; + best_seqxA = cur_global_seqxA; + best_seqyA = cur_global_seqyA; + best_do_vec = cur_do_vec; + + best_d0A = cur_d0A; + best_d0B = cur_d0B; + best_d0a = cur_d0a; + best_d0u = cur_d0u; + + if (!best_tu_vec.empty()) { + tu2t_u(best_tu_vec[0], best_t0, best_u0); + } } - - if (x_valid) - i_res++; - if (y_valid) - j_res++; } - TM2 /= xlen; - TM1 /= ylen; - if (a_opt) - TM3 /= (xlen + ylen) * 0.5; - if (u_opt) - TM4 /= Lnorm_ass; - if (d_opt) - TM5 /= ylen; - - if (n_ali8 > 0) - rmsd0 = std::sqrt(rmsd0 / n_ali8); - else - rmsd0 = 0.0; - - L_ali = n_ali; - TM_ali = TM1; - rmsd_ali = rmsd0; - - if (!tu_vec.empty()) - tu2t_u(tu_vec[0], t0, u0); + // Safety check if both attempts somehow failed + if (best_global_max_TM < 0) return 0; + + // Output best values back to the reference parameters + TM1 = best_TM1; + TM2 = best_TM2; + TM3 = best_TM3; + TM4 = best_TM4; + TM5 = best_TM5; + rmsd0 = best_rmsd0; + Liden = best_Liden; + TM_ali = best_TM_ali; + rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; + n_ali = best_n_ali; + n_ali8 = best_n_ali8; + seqM = best_seqM; + seqxA = best_seqxA; + seqyA = best_seqyA; + do_vec = best_do_vec; + tu_vec = best_tu_vec; + + d0A = best_d0A; + d0B = best_d0B; + d0a = best_d0a; + d0u = best_d0u; + + for (int a = 0; a < 3; a++) { + t0[a] = best_t0[a]; + for (int b = 0; b < 3; b++) u0[a][b] = best_u0[a][b]; + } return tu_vec.size(); } From 48785505101a5fef8b705ae439c05fc8ffbd3020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Fri, 22 May 2026 07:43:00 +0800 Subject: [PATCH 16/23] DEBUG: Print the end-to-end continuous regions sent to flexalign --- USalign.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/USalign.cpp b/USalign.cpp index 33f9fe3..3e66c1d 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -3621,6 +3621,23 @@ int flexalign_fatcat_main(double **xa, double **ya, // Skip if no valid bounds were found in this logic if (bounds1.empty()) continue; + // ========================================== + // INSERT NEW DEBUG CODE HERE + // ========================================== + // DEBUG: Print the end-to-end continuous regions sent to flexalign + // std::cout << "DEBUG (Mode " << (b_idx == 0 ? "FATCAT" : "STRICT") << "): " + // << "Sequence partitioned into " << bounds1.size() - 1 << " continuous sub-regions.\n"; + // for (size_t k = 0; k < bounds1.size() - 1; k++) { + // int L1_sub = bounds1[k + 1] - bounds1[k]; + // int L2_sub = bounds2[k + 1] - bounds2[k]; + // bool will_skip = (L1_sub < 3 || L2_sub < 3); + // std::cout << " Sub-region " << k + 1 << ": " + // << "Seq1 [" << bounds1[k] << " to " << bounds1[k + 1] << ") <-> " + // << "Seq2 [" << bounds2[k] << " to " << bounds2[k + 1] << ") " + // << (will_skip ? "[SKIPPED: fill gaps]" : "[SENT TO flexalign]") << "\n"; + // } + // ========================================== + // ========================================== // Step 5: Iteratively align each block using TRUE flexalign_best logic // ========================================== From 9c3c424f26d7cace968113fbcb867b19116c3245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Tue, 26 May 2026 09:45:17 +0800 Subject: [PATCH 17/23] add -hinge --- USalign.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 3e66c1d..3596ad2 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -91,8 +91,8 @@ void print_extra_help() //"\n" //" -closeK Number of closest atoms used for sequence order independent\n" //" initial alignment. default: 5\n" - //"\n" - //" -hinge Maximum number of hinge allowed in flexible alignment. default: 9\n" + "\n" + " -hinge Maximum number of hinge allowed in flexible alignment. default: 9\n" "\n" " -se Do not perform superposition. Useful for extracting alignment from\n" " superposed structure pairs\n" @@ -2972,13 +2972,14 @@ int flexalign_fatcat_main(double **xa, double **ya, double disCut = 5.0; double disSmooth = 4.0; double twist_pen = -25.0; - int max_twists = 9; int max_gap = 40; double max_penalty = -5.0; int misCut = 2 * fragLen; int maxGapFrag = fragLen + max_gap; double afp_dis_cut = fragLen * fragLen * (disCut * disCut); + int max_twists = hinge_opt; + // ========================================== // OPTIMIZATION 1: Precompute local intra-protein distance matrices // Utilizes basic_fun.h dist() function to replace manual distance calculation @@ -4794,7 +4795,7 @@ int main(int argc, char *argv[]) closeK_opt = 0; } - if (mm_opt == 7 && hinge_opt >= 10) + if (mm_opt >= 7 && hinge_opt >= 10) PrintErrorAndQuit("ERROR! -hinge must be <10"); if (chainmapfile.size() && mm_opt != 1) From e0cb6b178ad345cc454d4ffba8f7a4f05af66149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Wed, 3 Jun 2026 09:15:58 +0800 Subject: [PATCH 18/23] Wrapper for generating bounds with specific parameter sets --- USalign.cpp | 466 ++++++++++++++++++++++++++-------------------------- 1 file changed, 229 insertions(+), 237 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 718b3f3..8e02235 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2966,8 +2966,6 @@ int flexalign_fatcat_main(double **xa, double **ya, { // FATCAT base parameters int fragLen = 8; - double rmsdCut = 3.0; - double badRmsd = 4.0; double resScore = 3.0; double gap_ext = -0.5; double disCut = 5.0; @@ -3007,259 +3005,263 @@ int flexalign_fatcat_main(double **xa, double **ya, } // ========================================== - // Step 1: Extract initial AFPs in batches + // NEW LOGIC: Wrapper for generating bounds with specific parameter sets // ========================================== - std::vector initial_afps; - int step = sparse_val + 1; - - double r1_static[8][3], r2_static[8][3]; - double *r1[8], *r2[8]; - for (int k = 0; k < 8; k++) + auto generate_bounds = [&](double cur_rmsdCut, double cur_badRmsd, double cur_local_badRmsd) -> std::pair, std::vector> { - r1[k] = r1_static[k]; - r2[k] = r2_static[k]; - } + // ========================================== + // Step 1: Extract initial AFPs in batches + // ========================================== + std::vector initial_afps; + int step = sparse_val + 1; - for (int i = 0; i <= xlen - fragLen; i += step) - { - for (int j = 0; j <= ylen - fragLen; j += step) + double r1_static[8][3], r2_static[8][3]; + double *r1[8], *r2[8]; + for (int k = 0; k < 8; k++) { - int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen - 1), ylen - (j + fragLen)) + fragLen; - if (d3_term < 0.3 * std::min(xlen, ylen)) - continue; + r1[k] = r1_static[k]; + r2[k] = r2_static[k]; + } - double dist1 = disTable1[i][fragLen - 1]; // Precomputed end-to-end distance - double dist2 = disTable2[j][fragLen - 1]; + for (int i = 0; i <= xlen - fragLen; i += step) + { + for (int j = 0; j <= ylen - fragLen; j += step) + { + int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen - 1), ylen - (j + fragLen)) + fragLen; + if (d3_term < 0.3 * std::min(xlen, ylen)) + continue; - if (std::fabs(dist1 - dist2) > 2.0 * rmsdCut) - continue; + double dist1 = disTable1[i][fragLen - 1]; // Precomputed end-to-end distance + double dist2 = disTable2[j][fragLen - 1]; - for (int k = 0; k < fragLen; k++) - { - r1[k][0] = xa[i + k][0]; - r1[k][1] = xa[i + k][1]; - r1[k][2] = xa[i + k][2]; - r2[k][0] = ya[j + k][0]; - r2[k][1] = ya[j + k][1]; - r2[k][2] = ya[j + k][2]; - } + if (std::fabs(dist1 - dist2) > 2.0 * cur_rmsdCut) + continue; - double rms_sum_sq, t_tmp[3], u_tmp[3][3]; - Kabsch(r1, r2, fragLen, 0, &rms_sum_sq, t_tmp, u_tmp); - double rmsd_tmp = std::sqrt(rms_sum_sq / fragLen); + for (int k = 0; k < fragLen; k++) + { + r1[k][0] = xa[i + k][0]; + r1[k][1] = xa[i + k][1]; + r1[k][2] = xa[i + k][2]; + r2[k][0] = ya[j + k][0]; + r2[k][1] = ya[j + k][1]; + r2[k][2] = ya[j + k][2]; + } - if (rmsd_tmp < rmsdCut) - { - FATCAT_AFP afp; - afp.i = i; - afp.j = j; - afp.len = fragLen; - afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / badRmsd) * (rmsd_tmp / badRmsd)); - for (int a = 0; a < 3; a++) + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1, r2, fragLen, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = std::sqrt(rms_sum_sq / fragLen); + + if (rmsd_tmp < cur_rmsdCut) { - afp.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) - afp.R[a][b] = u_tmp[a][b]; + FATCAT_AFP afp; + afp.i = i; + afp.j = j; + afp.len = fragLen; + afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); + for (int a = 0; a < 3; a++) + { + afp.t[a] = t_tmp[a]; + for (int b = 0; b < 3; b++) + afp.R[a][b] = u_tmp[a][b]; + } + initial_afps.push_back(afp); } - initial_afps.push_back(afp); } } - } - - // ========================================== - // Step 2: Merge diagonal AFPs - // ========================================== - int max_diagonal_idx = xlen + ylen + 1; - std::vector> diagonals(max_diagonal_idx); - for (size_t k = 0; k < initial_afps.size(); k++) - { - diagonals[initial_afps[k].i - initial_afps[k].j + ylen].push_back(initial_afps[k]); - } - std::vector merged_afps; - int max_merge_len = std::min(xlen, ylen); - double **r1_merge, **r2_merge; - NewArray(&r1_merge, max_merge_len, 3); - NewArray(&r2_merge, max_merge_len, 3); - - for (int d = 0; d < max_diagonal_idx; d++) - { - if (diagonals[d].empty()) - continue; - std::vector &group = diagonals[d]; + // ========================================== + // Step 2: Merge diagonal AFPs + // ========================================== + int max_diagonal_idx = xlen + ylen + 1; + std::vector> diagonals(max_diagonal_idx); + for (size_t k = 0; k < initial_afps.size(); k++) + { + diagonals[initial_afps[k].i - initial_afps[k].j + ylen].push_back(initial_afps[k]); + } - std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) - { return a.i < b.i; }); + std::vector merged_afps; + int max_merge_len = std::min(xlen, ylen); + double **r1_merge, **r2_merge; + NewArray(&r1_merge, max_merge_len, 3); + NewArray(&r2_merge, max_merge_len, 3); - int n_group = group.size(); - std::vector invalid(n_group, false); - for (int idx = 0; idx < n_group; idx++) + for (int d = 0; d < max_diagonal_idx; d++) { - if (invalid[idx]) + if (diagonals[d].empty()) continue; - FATCAT_AFP curr = group[idx]; - for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) - { - FATCAT_AFP nxt = group[nxt_idx]; - if (nxt.i > curr.i + curr.len) - break; + std::vector &group = diagonals[d]; + + std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { return a.i < b.i; }); - if (nxt.i + nxt.len > curr.i + curr.len) + int n_group = group.size(); + std::vector invalid(n_group, false); + for (int idx = 0; idx < n_group; idx++) + { + if (invalid[idx]) + continue; + FATCAT_AFP curr = group[idx]; + for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) { - int new_len = (nxt.i + nxt.len) - curr.i; + FATCAT_AFP nxt = group[nxt_idx]; + if (nxt.i > curr.i + curr.len) + break; - for (int k = 0; k < new_len; k++) + if (nxt.i + nxt.len > curr.i + curr.len) { - r1_merge[k][0] = xa[curr.i + k][0]; - r1_merge[k][1] = xa[curr.i + k][1]; - r1_merge[k][2] = xa[curr.i + k][2]; - r2_merge[k][0] = ya[curr.j + k][0]; - r2_merge[k][1] = ya[curr.j + k][1]; - r2_merge[k][2] = ya[curr.j + k][2]; - } + int new_len = (nxt.i + nxt.len) - curr.i; - double rms_sum_sq, t_tmp[3], u_tmp[3][3]; - Kabsch(r1_merge, r2_merge, new_len, 0, &rms_sum_sq, t_tmp, u_tmp); - double rmsd_tmp = std::sqrt(rms_sum_sq / new_len); + for (int k = 0; k < new_len; k++) + { + r1_merge[k][0] = xa[curr.i + k][0]; + r1_merge[k][1] = xa[curr.i + k][1]; + r1_merge[k][2] = xa[curr.i + k][2]; + r2_merge[k][0] = ya[curr.j + k][0]; + r2_merge[k][1] = ya[curr.j + k][1]; + r2_merge[k][2] = ya[curr.j + k][2]; + } - if (rmsd_tmp < rmsdCut) - { - curr.len = new_len; - for (int a = 0; a < 3; a++) + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1_merge, r2_merge, new_len, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = std::sqrt(rms_sum_sq / new_len); + + if (rmsd_tmp < cur_rmsdCut) { - curr.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) - curr.R[a][b] = u_tmp[a][b]; + curr.len = new_len; + for (int a = 0; a < 3; a++) + { + curr.t[a] = t_tmp[a]; + for (int b = 0; b < 3; b++) + curr.R[a][b] = u_tmp[a][b]; + } + curr.score = resScore * new_len * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); + invalid[nxt_idx] = true; } - curr.score = resScore * new_len * (1.0 - (rmsd_tmp / badRmsd) * (rmsd_tmp / badRmsd)); - invalid[nxt_idx] = true; } } + merged_afps.push_back(curr); } - merged_afps.push_back(curr); } - } - DeleteArray(&r1_merge, max_merge_len); - DeleteArray(&r2_merge, max_merge_len); - - std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) - { - if (a.i == b.i) return a.j < b.j; - return a.i < b.i; }); + DeleteArray(&r1_merge, max_merge_len); + DeleteArray(&r2_merge, max_merge_len); - int n_afps = merged_afps.size(); - if (n_afps == 0) - return 0; + std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { + if (a.i == b.i) return a.j < b.j; + return a.i < b.i; }); - // ========================================== - // Step 3 & 4: Dual Dynamic Programming and Domain Splitting - // ========================================== - std::vector afp_aft_index(xlen * ylen, -1); - std::vector afp_bef_index(xlen * ylen, -1); + int n_afps = merged_afps.size(); + std::vector ret_b1, ret_b2; + if (n_afps == 0) + return std::make_pair(ret_b1, ret_b2); - std::vector>> i_to_j(xlen); - for (int m = 0; m < n_afps; m++) - { - i_to_j[merged_afps[m].i].push_back(std::make_pair(merged_afps[m].j, m)); - } + // ========================================== + // Step 3 & 4: Dual Dynamic Programming and Domain Splitting + // ========================================== + std::vector afp_aft_index(xlen * ylen, -1); + std::vector afp_bef_index(xlen * ylen, -1); - for (int i_val = 0; i_val < xlen; i_val++) - { - if (i_to_j[i_val].empty()) - continue; - for (size_t p = 0; p < i_to_j[i_val].size(); p++) - { - int j_val = i_to_j[i_val][p].first; - afp_aft_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; - afp_bef_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; - } - int curr_bef = -1; - for (int j_val = 0; j_val < ylen; j_val++) + std::vector>> i_to_j(xlen); + for (int m = 0; m < n_afps; m++) { - if (afp_bef_index[i_val * ylen + j_val] != -1) - curr_bef = afp_bef_index[i_val * ylen + j_val]; - else - afp_bef_index[i_val * ylen + j_val] = curr_bef; + i_to_j[merged_afps[m].i].push_back(std::make_pair(merged_afps[m].j, m)); } - int curr_aft = -1; - for (int j_val = ylen - 1; j_val >= 0; j_val--) - { - if (afp_aft_index[i_val * ylen + j_val] != -1) - curr_aft = afp_aft_index[i_val * ylen + j_val]; - else - afp_aft_index[i_val * ylen + j_val] = curr_aft; - } - } - auto get_dvar = [&](const FATCAT_AFP &prv, const FATCAT_AFP &curr) -> double - { - double rms_sq = 0; - for (int i_idx = 0; i_idx < fragLen; i_idx++) + for (int i_val = 0; i_val < xlen; i_val++) { - for (int j_idx = 0; j_idx < fragLen; j_idx++) + if (i_to_j[i_val].empty()) + continue; + for (size_t p = 0; p < i_to_j[i_val].size(); p++) + { + int j_val = i_to_j[i_val][p].first; + afp_aft_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; + afp_bef_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; + } + int curr_bef = -1; + for (int j_val = 0; j_val < ylen; j_val++) { - double dist1, dist2; - int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; - if (idx1_a >= idx1_b) - dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + if (afp_bef_index[i_val * ylen + j_val] != -1) + curr_bef = afp_bef_index[i_val * ylen + j_val]; else - dist1 = disTable1[idx1_a][idx1_b - idx1_a]; - - int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; - if (idx2_a >= idx2_b) - dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + afp_bef_index[i_val * ylen + j_val] = curr_bef; + } + int curr_aft = -1; + for (int j_val = ylen - 1; j_val >= 0; j_val--) + { + if (afp_aft_index[i_val * ylen + j_val] != -1) + curr_aft = afp_aft_index[i_val * ylen + j_val]; else - dist2 = disTable2[idx2_a][idx2_b - idx2_a]; - - rms_sq += (dist1 - dist2) * (dist1 - dist2); + afp_aft_index[i_val * ylen + j_val] = curr_aft; } } - if (rms_sq > afp_dis_cut) - return 1e9; - return std::sqrt(rms_sq / (fragLen * fragLen)); - }; - auto calc_block_rmsd = [&](const std::vector &afp_list) -> double - { - std::vector r1, r2; - for (size_t a = 0; a < afp_list.size(); a++) + auto get_dvar = [&](const FATCAT_AFP &prv, const FATCAT_AFP &curr) -> double { - for (int l = 0; l < afp_list[a].len; l++) + double rms_sq = 0; + for (int i_idx = 0; i_idx < fragLen; i_idx++) { - r1.push_back(afp_list[a].i + l); - r2.push_back(afp_list[a].j + l); + for (int j_idx = 0; j_idx < fragLen; j_idx++) + { + double dist1, dist2; + int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } } - } - int n = r1.size(); - if (n < 3) - return 0.0; - double **p1; - NewArray(&p1, n, 3); - double **p2; - NewArray(&p2, n, 3); - for (int i = 0; i < n; i++) + if (rms_sq > afp_dis_cut) + return 1e9; + return std::sqrt(rms_sq / (fragLen * fragLen)); + }; + + auto calc_block_rmsd = [&](const std::vector &afp_list) -> double { - p1[i][0] = xa[r1[i]][0]; - p1[i][1] = xa[r1[i]][1]; - p1[i][2] = xa[r1[i]][2]; - p2[i][0] = ya[r2[i]][0]; - p2[i][1] = ya[r2[i]][1]; - p2[i][2] = ya[r2[i]][2]; - } - double rms_sq_sum, t_tmp[3], u_tmp[3][3]; - Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); - DeleteArray(&p1, n); - DeleteArray(&p2, n); - return std::sqrt(rms_sq_sum / n); - }; + std::vector r1, r2; + for (size_t a = 0; a < afp_list.size(); a++) + { + for (int l = 0; l < afp_list[a].len; l++) + { + r1.push_back(afp_list[a].i + l); + r2.push_back(afp_list[a].j + l); + } + } + int n = r1.size(); + if (n < 3) + return 0.0; + double **p1; + NewArray(&p1, n, 3); + double **p2; + NewArray(&p2, n, 3); + for (int i = 0; i < n; i++) + { + p1[i][0] = xa[r1[i]][0]; + p1[i][1] = xa[r1[i]][1]; + p1[i][2] = xa[r1[i]][2]; + p2[i][0] = ya[r2[i]][0]; + p2[i][1] = ya[r2[i]][1]; + p2[i][2] = ya[r2[i]][2]; + } + double rms_sq_sum, t_tmp[3], u_tmp[3][3]; + Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); + DeleteArray(&p1, n); + DeleteArray(&p2, n); + return std::sqrt(rms_sq_sum / n); + }; - struct Region - { - int s1, e1, s2, e2; - }; + struct Region + { + int s1, e1, s2, e2; + }; - auto run_dp_and_split = [&](int logic_type) -> std::pair, std::vector> - { std::vector sco(n_afps); std::vector twi(n_afps, 0); std::vector pre(n_afps, -1); @@ -3278,10 +3280,10 @@ int flexalign_fatcat_main(double **xa, double **ya, int b1 = std::max(0, curr_j - maxGapFrag); std::vector valid_prevs; - for (int step = 0; step < 2; step++) + for (int st = 0; st < 2; st++) { int a_s, a_e, b_s, b_e; - if (step == 0) + if (st == 0) { a_s = std::max(a1, 0); a_e = std::min(a3, xlen - 1); @@ -3320,20 +3322,14 @@ int flexalign_fatcat_main(double **xa, double **ya, int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); int m_gap = std::max(gap_i, gap_j); + // unified gap penalty logic double gp = 0.0; - if (logic_type == 0) - { - gp = gap_ext * m_gap; - } - else - { - int m_mis = 0; - if (gap_i < 0 || gap_j < 0) - m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; - gp = gap_ext * m_mis; - if (m_gap > 0) - gp += gap_ext * m_gap; - } + int m_mis = 0; + if (gap_i < 0 || gap_j < 0) + m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + gp = gap_ext * m_mis; + if (m_gap > 0) + gp += gap_ext * m_gap; if (gp < max_penalty) gp = max_penalty; @@ -3401,9 +3397,8 @@ int flexalign_fatcat_main(double **xa, double **ya, } std::reverse(path.begin(), path.end()); - std::vector b1, b2; if (path.empty()) - return std::make_pair(b1, b2); + return std::make_pair(ret_b1, ret_b2); struct Block { @@ -3438,8 +3433,6 @@ int flexalign_fatcat_main(double **xa, double **ya, if (!curr_block.afps.empty()) blocks.push_back(curr_block); - double local_badRmsd = 4.0; - bool splitted = true; while (splitted && blocks.size() < (size_t)(max_twists + 1)) { @@ -3460,7 +3453,7 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - if (max_rmsd >= local_badRmsd && target_b != -1) + if (max_rmsd >= cur_local_badRmsd && target_b != -1) { double max_t = 0; int cut_idx = 0; @@ -3524,7 +3517,7 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - if (min_rmsd < local_badRmsd && min_b != -1) + if (min_rmsd < cur_local_badRmsd && min_b != -1) { blocks[min_b].afps.insert(blocks[min_b].afps.end(), blocks[min_b + 1].afps.begin(), blocks[min_b + 1].afps.end()); blocks.erase(blocks.begin() + min_b + 1); @@ -3568,23 +3561,23 @@ int flexalign_fatcat_main(double **xa, double **ya, } if (real_blocks.empty()) - return std::make_pair(b1, b2); + return std::make_pair(ret_b1, ret_b2); - b1.push_back(0); - b2.push_back(0); + ret_b1.push_back(0); + ret_b2.push_back(0); for (size_t k = 0; k < real_blocks.size() - 1; k++) { - b1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); - b2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); + ret_b1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); + ret_b2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); } - b1.push_back(xlen); - b2.push_back(ylen); + ret_b1.push_back(xlen); + ret_b2.push_back(ylen); - return std::make_pair(b1, b2); + return std::make_pair(ret_b1, ret_b2); }; - auto bounds_fatcat = run_dp_and_split(0); - auto bounds_strict = run_dp_and_split(1); + auto bounds_fatcat = generate_bounds(2.5, 3.0, 5.0); + auto bounds_strict = generate_bounds(3.0, 4.0, 4.0); // ========================================== // NEW LOGIC: Run through both sets of bounds (fatcat and strict) @@ -4009,7 +4002,6 @@ int flexalign_fatcat_main(double **xa, double **ya, return tu_vec.size(); } - // Unified engine replacing flexalign, flexalign_best, and flexalign_fatcat int flexalign_unified(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, From 601e5f40ccd49f64ff40ea5a07d23d370926e90a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Wed, 3 Jun 2026 16:36:17 +0800 Subject: [PATCH 19/23] added the logic to skip boundary processing and set hinge_opt to 0 for short sub-blocks --- USalign.cpp | 595 +++++++++++++++++++--------------------------------- 1 file changed, 214 insertions(+), 381 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 8e02235..468b199 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2964,6 +2964,75 @@ int flexalign_fatcat_main(double **xa, double **ya, const int mol_type, const int hinge_opt, const int ss_opt, int sparse_val = 0) { + // ========================================== + // TRUE -mm 9 BASELINE (Defender) + // Run full sequence without generate_bounds slicing! + // This perfectly simulates FLEX_BEST (-mm 9) behavior. + // ========================================== + double best_global_max_TM = -1.0; + std::vector> best_tu_vec; + double best_t0[3], best_u0[3][3]; + double best_TM1 = 0.0, best_TM2 = 0.0, best_TM3 = 0.0, best_TM4 = 0.0, best_TM5 = 0.0; + double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; + int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; + std::string best_seqM = "", best_seqxA = "", best_seqyA = ""; + std::vector best_do_vec; + double best_d0A = 0.0, best_d0B = 0.0, best_d0a = 0.0, best_d0u = 0.0; + + bool force_fast_opt_global = (std::min(xlen, ylen) > 1500) ? true : fast_opt; + std::vector local_sequence = sequence; + + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + { + FlexAlignResult base_res; + // Pass full unbroken sequences directly to flexalign (identical to -mm 9) + execute_flexalign_with_fallback( + xa, ya, (char*)seqx, (char*)seqy, (char*)secx, (char*)secy, + xlen, ylen, local_sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt_global, + mol_type, 9, cur_ss_opt, base_res); // -mm 9 explicitly uses 9 hinges + + double cur_max_TM = (base_res.TM1 > base_res.TM2) ? base_res.TM1 : base_res.TM2; + if (cur_max_TM > best_global_max_TM) + { + best_global_max_TM = cur_max_TM; + for (int a = 0; a < 3; a++) { + best_t0[a] = base_res.t0[a]; + for (int b = 0; b < 3; b++) best_u0[a][b] = base_res.u0[a][b]; + } + best_tu_vec = base_res.tu_vec; + best_TM1 = base_res.TM1; best_TM2 = base_res.TM2; best_TM3 = base_res.TM3; + best_TM4 = base_res.TM4; best_TM5 = base_res.TM5; + best_rmsd0 = base_res.rmsd0; best_Liden = base_res.Liden; + best_TM_ali = base_res.TM_ali; best_rmsd_ali = base_res.rmsd_ali; + best_L_ali = base_res.L_ali; best_n_ali = base_res.n_ali; best_n_ali8 = base_res.n_ali8; + best_seqM = base_res.seqM; best_seqxA = base_res.seqxA; best_seqyA = base_res.seqyA; + best_do_vec = base_res.do_vec; + best_d0A = base_res.d0A; best_d0B = base_res.d0B; + best_d0a = base_res.d0a; best_d0u = base_res.d0u; + } + } + + // Early exit if the true -mm 9 baseline is already excellent + if (best_global_max_TM >= 0.85) + { + TM1 = best_TM1; TM2 = best_TM2; TM3 = best_TM3; TM4 = best_TM4; TM5 = best_TM5; + rmsd0 = best_rmsd0; Liden = best_Liden; TM_ali = best_TM_ali; rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; n_ali = best_n_ali; n_ali8 = best_n_ali8; + seqM = best_seqM; seqxA = best_seqxA; seqyA = best_seqyA; + do_vec = best_do_vec; tu_vec = best_tu_vec; + d0A = best_d0A; d0B = best_d0B; d0a = best_d0a; d0u = best_d0u; + for (int a = 0; a < 3; a++) { + t0[a] = best_t0[a]; + for (int b = 0; b < 3; b++) u0[a][b] = best_u0[a][b]; + } + return tu_vec.size(); + } + + // ========================================== + // Proceed to FATCAT sliced bounds logic... + // ========================================== + // FATCAT base parameters int fragLen = 8; double resScore = 3.0; @@ -2976,13 +3045,9 @@ int flexalign_fatcat_main(double **xa, double **ya, int misCut = 2 * fragLen; int maxGapFrag = fragLen + max_gap; double afp_dis_cut = fragLen * fragLen * (disCut * disCut); - int max_twists = hinge_opt; - // ========================================== // OPTIMIZATION 1: Precompute local intra-protein distance matrices - // Utilizes basic_fun.h dist() function to replace manual distance calculation - // ========================================== int max_dist_window = max_gap + 2 * fragLen + 1; std::vector> disTable1(xlen, std::vector(max_dist_window, 0.0)); std::vector> disTable2(ylen, std::vector(max_dist_window, 0.0)); @@ -2990,28 +3055,18 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int i = 0; i < xlen; i++) { for (int j = i; j < std::min(xlen, i + max_dist_window); j++) - { - // Use dist() from basic_fun.h which computes squared distance disTable1[i][j - i] = std::sqrt(dist(xa[i], xa[j])); - } } for (int i = 0; i < ylen; i++) { for (int j = i; j < std::min(ylen, i + max_dist_window); j++) - { - // Use dist() from basic_fun.h disTable2[i][j - i] = std::sqrt(dist(ya[i], ya[j])); - } } - // ========================================== - // NEW LOGIC: Wrapper for generating bounds with specific parameter sets - // ========================================== + // Wrapper for generating bounds auto generate_bounds = [&](double cur_rmsdCut, double cur_badRmsd, double cur_local_badRmsd) -> std::pair, std::vector> { - // ========================================== // Step 1: Extract initial AFPs in batches - // ========================================== std::vector initial_afps; int step = sparse_val + 1; @@ -3031,7 +3086,7 @@ int flexalign_fatcat_main(double **xa, double **ya, if (d3_term < 0.3 * std::min(xlen, ylen)) continue; - double dist1 = disTable1[i][fragLen - 1]; // Precomputed end-to-end distance + double dist1 = disTable1[i][fragLen - 1]; double dist2 = disTable2[j][fragLen - 1]; if (std::fabs(dist1 - dist2) > 2.0 * cur_rmsdCut) @@ -3039,12 +3094,8 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int k = 0; k < fragLen; k++) { - r1[k][0] = xa[i + k][0]; - r1[k][1] = xa[i + k][1]; - r1[k][2] = xa[i + k][2]; - r2[k][0] = ya[j + k][0]; - r2[k][1] = ya[j + k][1]; - r2[k][2] = ya[j + k][2]; + r1[k][0] = xa[i + k][0]; r1[k][1] = xa[i + k][1]; r1[k][2] = xa[i + k][2]; + r2[k][0] = ya[j + k][0]; r2[k][1] = ya[j + k][1]; r2[k][2] = ya[j + k][2]; } double rms_sum_sq, t_tmp[3], u_tmp[3][3]; @@ -3054,24 +3105,19 @@ int flexalign_fatcat_main(double **xa, double **ya, if (rmsd_tmp < cur_rmsdCut) { FATCAT_AFP afp; - afp.i = i; - afp.j = j; - afp.len = fragLen; + afp.i = i; afp.j = j; afp.len = fragLen; afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); for (int a = 0; a < 3; a++) { afp.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) - afp.R[a][b] = u_tmp[a][b]; + for (int b = 0; b < 3; b++) afp.R[a][b] = u_tmp[a][b]; } initial_afps.push_back(afp); } } } - // ========================================== // Step 2: Merge diagonal AFPs - // ========================================== int max_diagonal_idx = xlen + ylen + 1; std::vector> diagonals(max_diagonal_idx); for (size_t k = 0; k < initial_afps.size(); k++) @@ -3091,8 +3137,7 @@ int flexalign_fatcat_main(double **xa, double **ya, continue; std::vector &group = diagonals[d]; - std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) - { return a.i < b.i; }); + std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) { return a.i < b.i; }); int n_group = group.size(); std::vector invalid(n_group, false); @@ -3113,12 +3158,8 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int k = 0; k < new_len; k++) { - r1_merge[k][0] = xa[curr.i + k][0]; - r1_merge[k][1] = xa[curr.i + k][1]; - r1_merge[k][2] = xa[curr.i + k][2]; - r2_merge[k][0] = ya[curr.j + k][0]; - r2_merge[k][1] = ya[curr.j + k][1]; - r2_merge[k][2] = ya[curr.j + k][2]; + r1_merge[k][0] = xa[curr.i + k][0]; r1_merge[k][1] = xa[curr.i + k][1]; r1_merge[k][2] = xa[curr.i + k][2]; + r2_merge[k][0] = ya[curr.j + k][0]; r2_merge[k][1] = ya[curr.j + k][1]; r2_merge[k][2] = ya[curr.j + k][2]; } double rms_sum_sq, t_tmp[3], u_tmp[3][3]; @@ -3131,8 +3172,7 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int a = 0; a < 3; a++) { curr.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) - curr.R[a][b] = u_tmp[a][b]; + for (int b = 0; b < 3; b++) curr.R[a][b] = u_tmp[a][b]; } curr.score = resScore * new_len * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); invalid[nxt_idx] = true; @@ -3145,19 +3185,17 @@ int flexalign_fatcat_main(double **xa, double **ya, DeleteArray(&r1_merge, max_merge_len); DeleteArray(&r2_merge, max_merge_len); - std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) - { + std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) { if (a.i == b.i) return a.j < b.j; - return a.i < b.i; }); + return a.i < b.i; + }); int n_afps = merged_afps.size(); std::vector ret_b1, ret_b2; if (n_afps == 0) return std::make_pair(ret_b1, ret_b2); - // ========================================== // Step 3 & 4: Dual Dynamic Programming and Domain Splitting - // ========================================== std::vector afp_aft_index(xlen * ylen, -1); std::vector afp_bef_index(xlen * ylen, -1); @@ -3169,8 +3207,7 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int i_val = 0; i_val < xlen; i_val++) { - if (i_to_j[i_val].empty()) - continue; + if (i_to_j[i_val].empty()) continue; for (size_t p = 0; p < i_to_j[i_val].size(); p++) { int j_val = i_to_j[i_val][p].first; @@ -3180,18 +3217,14 @@ int flexalign_fatcat_main(double **xa, double **ya, int curr_bef = -1; for (int j_val = 0; j_val < ylen; j_val++) { - if (afp_bef_index[i_val * ylen + j_val] != -1) - curr_bef = afp_bef_index[i_val * ylen + j_val]; - else - afp_bef_index[i_val * ylen + j_val] = curr_bef; + if (afp_bef_index[i_val * ylen + j_val] != -1) curr_bef = afp_bef_index[i_val * ylen + j_val]; + else afp_bef_index[i_val * ylen + j_val] = curr_bef; } int curr_aft = -1; for (int j_val = ylen - 1; j_val >= 0; j_val--) { - if (afp_aft_index[i_val * ylen + j_val] != -1) - curr_aft = afp_aft_index[i_val * ylen + j_val]; - else - afp_aft_index[i_val * ylen + j_val] = curr_aft; + if (afp_aft_index[i_val * ylen + j_val] != -1) curr_aft = afp_aft_index[i_val * ylen + j_val]; + else afp_aft_index[i_val * ylen + j_val] = curr_aft; } } @@ -3204,22 +3237,17 @@ int flexalign_fatcat_main(double **xa, double **ya, { double dist1, dist2; int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; - if (idx1_a >= idx1_b) - dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else - dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + if (idx1_a >= idx1_b) dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else dist1 = disTable1[idx1_a][idx1_b - idx1_a]; int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; - if (idx2_a >= idx2_b) - dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else - dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + if (idx2_a >= idx2_b) dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } } - if (rms_sq > afp_dis_cut) - return 1e9; + if (rms_sq > afp_dis_cut) return 1e9; return std::sqrt(rms_sq / (fragLen * fragLen)); }; @@ -3235,38 +3263,26 @@ int flexalign_fatcat_main(double **xa, double **ya, } } int n = r1.size(); - if (n < 3) - return 0.0; - double **p1; - NewArray(&p1, n, 3); - double **p2; - NewArray(&p2, n, 3); + if (n < 3) return 0.0; + double **p1; NewArray(&p1, n, 3); + double **p2; NewArray(&p2, n, 3); for (int i = 0; i < n; i++) { - p1[i][0] = xa[r1[i]][0]; - p1[i][1] = xa[r1[i]][1]; - p1[i][2] = xa[r1[i]][2]; - p2[i][0] = ya[r2[i]][0]; - p2[i][1] = ya[r2[i]][1]; - p2[i][2] = ya[r2[i]][2]; + p1[i][0] = xa[r1[i]][0]; p1[i][1] = xa[r1[i]][1]; p1[i][2] = xa[r1[i]][2]; + p2[i][0] = ya[r2[i]][0]; p2[i][1] = ya[r2[i]][1]; p2[i][2] = ya[r2[i]][2]; } double rms_sq_sum, t_tmp[3], u_tmp[3][3]; Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); - DeleteArray(&p1, n); - DeleteArray(&p2, n); + DeleteArray(&p1, n); DeleteArray(&p2, n); return std::sqrt(rms_sq_sum / n); }; - struct Region - { - int s1, e1, s2, e2; - }; + struct Region { int s1, e1, s2, e2; }; std::vector sco(n_afps); std::vector twi(n_afps, 0); std::vector pre(n_afps, -1); - for (int m = 0; m < n_afps; m++) - sco[m] = merged_afps[m].score; + for (int m = 0; m < n_afps; m++) sco[m] = merged_afps[m].score; for (int m = 0; m < n_afps; m++) { @@ -3285,28 +3301,22 @@ int flexalign_fatcat_main(double **xa, double **ya, int a_s, a_e, b_s, b_e; if (st == 0) { - a_s = std::max(a1, 0); - a_e = std::min(a3, xlen - 1); - b_s = std::max(b2, 0); - b_e = std::min(b3, ylen - 1); + a_s = std::max(a1, 0); a_e = std::min(a3, xlen - 1); + b_s = std::max(b2, 0); b_e = std::min(b3, ylen - 1); } else { - a_s = std::max(a2, 0); - a_e = std::min(a3, xlen - 1); - b_s = std::max(b1, 0); - b_e = std::min(b2 - 1, ylen - 1); + a_s = std::max(a2, 0); a_e = std::min(a3, xlen - 1); + b_s = std::max(b1, 0); b_e = std::min(b2 - 1, ylen - 1); } - if (b_s >= ylen || b_e < 0) - continue; + if (b_s >= ylen || b_e < 0) continue; for (int prev_i = a_s; prev_i <= a_e; prev_i++) { int s1 = afp_aft_index[prev_i * ylen + b_s]; int s2 = afp_bef_index[prev_i * ylen + b_e]; if (s1 != -1 && s2 != -1 && s1 <= s2) - for (int s = s1; s <= s2; s++) - valid_prevs.push_back(s); + for (int s = s1; s <= s2; s++) valid_prevs.push_back(s); } } @@ -3315,24 +3325,18 @@ int flexalign_fatcat_main(double **xa, double **ya, { int prev = valid_prevs[v]; int prev_twi = twi[prev]; - if (prev_twi > max_twists) - continue; + if (prev_twi > max_twists) continue; int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); int m_gap = std::max(gap_i, gap_j); - // unified gap penalty logic double gp = 0.0; int m_mis = 0; - if (gap_i < 0 || gap_j < 0) - m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + if (gap_i < 0 || gap_j < 0) m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; gp = gap_ext * m_mis; - if (m_gap > 0) - gp += gap_ext * m_gap; - - if (gp < max_penalty) - gp = max_penalty; + if (m_gap > 0) gp += gap_ext * m_gap; + if (gp < max_penalty) gp = max_penalty; double rms_sq = 0; for (int k = 0; k < fragLen; k++) @@ -3341,16 +3345,12 @@ int flexalign_fatcat_main(double **xa, double **ya, { double dist1, dist2; int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; - if (idx1_a >= idx1_b) - dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else - dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + if (idx1_a >= idx1_b) dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else dist1 = disTable1[idx1_a][idx1_b - idx1_a]; int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; - if (idx2_a >= idx2_b) - dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else - dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + if (idx2_a >= idx2_b) dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } @@ -3360,45 +3360,37 @@ int flexalign_fatcat_main(double **xa, double **ya, int is_twist = 0; if (rms_sq >= afp_dis_cut) { - tp = twist_pen; - is_twist = 1; + tp = twist_pen; is_twist = 1; } else { double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); - if (dvar > disCut - disSmooth) - tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); + if (dvar > disCut - disSmooth) tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); } - if (prev_twi + is_twist > max_twists) - continue; + if (prev_twi + is_twist > max_twists) continue; double stmp = sco[prev] + curr_sco + tp + gp; if (stmp > sco[m]) { - sco[m] = stmp; - pre[m] = prev; - twi[m] = prev_twi + is_twist; + sco[m] = stmp; pre[m] = prev; twi[m] = prev_twi + is_twist; } } } int best_m = 0; for (int m = 1; m < n_afps; m++) - if (sco[m] > sco[best_m]) - best_m = m; + if (sco[m] > sco[best_m]) best_m = m; std::vector path; int curr_m = best_m; while (curr_m != -1) { - path.push_back(curr_m); - curr_m = pre[curr_m]; + path.push_back(curr_m); curr_m = pre[curr_m]; } std::reverse(path.begin(), path.end()); - if (path.empty()) - return std::make_pair(ret_b1, ret_b2); + if (path.empty()) return std::make_pair(ret_b1, ret_b2); struct Block { @@ -3419,19 +3411,15 @@ int flexalign_fatcat_main(double **xa, double **ya, if (dvar >= disCut) { blocks.push_back(curr_block); - curr_block.afps.clear(); - curr_block.dvars.clear(); - curr_block.afps.push_back(curr); - curr_block.dvars.push_back(0.0); + curr_block.afps.clear(); curr_block.dvars.clear(); + curr_block.afps.push_back(curr); curr_block.dvars.push_back(0.0); } else { - curr_block.afps.push_back(curr); - curr_block.dvars.push_back(dvar); + curr_block.afps.push_back(curr); curr_block.dvars.push_back(dvar); } } - if (!curr_block.afps.empty()) - blocks.push_back(curr_block); + if (!curr_block.afps.empty()) blocks.push_back(curr_block); bool splitted = true; while (splitted && blocks.size() < (size_t)(max_twists + 1)) @@ -3445,25 +3433,16 @@ int flexalign_fatcat_main(double **xa, double **ya, if (blocks[b].afps.size() > 2) { double cur_rmsd = calc_block_rmsd(blocks[b].afps); - if (cur_rmsd > max_rmsd) - { - max_rmsd = cur_rmsd; - target_b = b; - } + if (cur_rmsd > max_rmsd) { max_rmsd = cur_rmsd; target_b = b; } } } if (max_rmsd >= cur_local_badRmsd && target_b != -1) { - double max_t = 0; - int cut_idx = 0; + double max_t = 0; int cut_idx = 0; for (size_t i = 1; i < blocks[target_b].afps.size(); i++) { - if (blocks[target_b].dvars[i] > max_t) - { - max_t = blocks[target_b].dvars[i]; - cut_idx = i; - } + if (blocks[target_b].dvars[i] > max_t) { max_t = blocks[target_b].dvars[i]; cut_idx = i; } } if (cut_idx > 0) @@ -3472,10 +3451,8 @@ int flexalign_fatcat_main(double **xa, double **ya, right_blk.afps.assign(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); right_blk.dvars.assign(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); right_blk.dvars[0] = 0.0; - blocks[target_b].afps.erase(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); blocks[target_b].dvars.erase(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); - blocks.insert(blocks.begin() + target_b + 1, right_blk); splitted = true; } @@ -3491,11 +3468,7 @@ int flexalign_fatcat_main(double **xa, double **ya, int b1 = (b > 0) ? blocks[b - 1].afps.back().i + blocks[b - 1].afps.back().len : 0; int b2 = (b > 0) ? blocks[b - 1].afps.back().j + blocks[b - 1].afps.back().len : 0; int span = std::min(e1 - b1, e2 - b2); - if (span < 2 * fragLen) - { - blocks.erase(blocks.begin() + b); - b--; - } + if (span < 2 * fragLen) { blocks.erase(blocks.begin() + b); b--; } } } @@ -3510,11 +3483,7 @@ int flexalign_fatcat_main(double **xa, double **ya, std::vector temp_merged = blocks[b].afps; temp_merged.insert(temp_merged.end(), blocks[b + 1].afps.begin(), blocks[b + 1].afps.end()); double cur_rmsd = calc_block_rmsd(temp_merged); - if (cur_rmsd < min_rmsd) - { - min_rmsd = cur_rmsd; - min_b = b; - } + if (cur_rmsd < min_rmsd) { min_rmsd = cur_rmsd; min_b = b; } } if (min_rmsd < cur_local_badRmsd && min_b != -1) @@ -3534,108 +3503,72 @@ int flexalign_fatcat_main(double **xa, double **ya, { FATCAT_AFP afp = blocks[b].afps[a]; int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); - if (skip >= afp.len) - continue; - - int eff_i = afp.i + skip; - int eff_j = afp.j + skip; - int eff_L = afp.len - skip; + if (skip >= afp.len) continue; - if (b_s1 == -1) - { - b_s1 = eff_i; - b_s2 = eff_j; - } - b_e1 = eff_i + eff_L; - b_e2 = eff_j + eff_L; - last_i = b_e1; - last_j = b_e2; + int eff_i = afp.i + skip; int eff_j = afp.j + skip; int eff_L = afp.len - skip; + if (b_s1 == -1) { b_s1 = eff_i; b_s2 = eff_j; } + b_e1 = eff_i + eff_L; b_e2 = eff_j + eff_L; + last_i = b_e1; last_j = b_e2; } if (b_s1 != -1) { - if (b_e1 - b_s1 >= 3 && b_e2 - b_s2 >= 3) { - Region r = {b_s1, b_e1, b_s2, b_e2}; - real_blocks.push_back(r); + if (b_e1 - b_s1 >= 4 && b_e2 - b_s2 >= 4) { + Region r = {b_s1, b_e1, b_s2, b_e2}; + real_blocks.push_back(r); } } } - if (real_blocks.empty()) - return std::make_pair(ret_b1, ret_b2); + if (real_blocks.empty()) return std::make_pair(ret_b1, ret_b2); - ret_b1.push_back(0); - ret_b2.push_back(0); + ret_b1.push_back(0); ret_b2.push_back(0); for (size_t k = 0; k < real_blocks.size() - 1; k++) { ret_b1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); ret_b2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); } - ret_b1.push_back(xlen); - ret_b2.push_back(ylen); + ret_b1.push_back(xlen); ret_b2.push_back(ylen); return std::make_pair(ret_b1, ret_b2); }; - auto bounds_fatcat = generate_bounds(2.5, 3.0, 5.0); - auto bounds_strict = generate_bounds(3.0, 4.0, 4.0); + auto bounds_fatcat = generate_bounds(3.0, 4.0, 4.0); + auto bounds_strict = generate_bounds(2.0, 3.0, 2.0); - // ========================================== - // NEW LOGIC: Run through both sets of bounds (fatcat and strict) - // Calculate final TM-scores for both, and select the one with the higher TM-score. - // ========================================== std::vector, std::vector>> all_bounds; - - // Add the loose logic bounds all_bounds.push_back(bounds_fatcat); - - // Add the strict logic bounds only if they differ from the loose one to save computation time if (bounds_strict.first != bounds_fatcat.first || bounds_strict.second != bounds_fatcat.second) { all_bounds.push_back(bounds_strict); } - // Variables to store the best global metrics across all tested bounds - double best_global_max_TM = -1.0; - std::vector> best_tu_vec; - double best_t0[3], best_u0[3][3]; - double best_TM1 = 0.0, best_TM2 = 0.0, best_TM3 = 0.0, best_TM4 = 0.0, best_TM5 = 0.0; - double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; - int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; - std::string best_seqM = "", best_seqxA = "", best_seqyA = ""; - std::vector best_do_vec; - - // To store best normalization factors - double best_d0A = 0.0, best_d0B = 0.0, best_d0a = 0.0, best_d0u = 0.0; - - // Loop through both bound sets + // Loop through both bound sets, updating best_global_max_TM if we beat the -mm 9 defender for (size_t b_idx = 0; b_idx < all_bounds.size(); b_idx++) { std::vector& bounds1 = all_bounds[b_idx].first; std::vector& bounds2 = all_bounds[b_idx].second; - // Skip if no valid bounds were found in this logic - if (bounds1.empty()) continue; - - // ========================================== - // INSERT NEW DEBUG CODE HERE - // ========================================== - // DEBUG: Print the end-to-end continuous regions sent to flexalign - // std::cout << "DEBUG (Mode " << (b_idx == 0 ? "FATCAT" : "STRICT") << "): " - // << "Sequence partitioned into " << bounds1.size() - 1 << " continuous sub-regions.\n"; - // for (size_t k = 0; k < bounds1.size() - 1; k++) { - // int L1_sub = bounds1[k + 1] - bounds1[k]; - // int L2_sub = bounds2[k + 1] - bounds2[k]; - // bool will_skip = (L1_sub < 3 || L2_sub < 3); - // std::cout << " Sub-region " << k + 1 << ": " - // << "Seq1 [" << bounds1[k] << " to " << bounds1[k + 1] << ") <-> " - // << "Seq2 [" << bounds2[k] << " to " << bounds2[k + 1] << ") " - // << (will_skip ? "[SKIPPED: fill gaps]" : "[SENT TO flexalign]") << "\n"; + // Skip if only one interval (block) is generated, as the full unbroken sequence + // has already been processed by the baseline (-mm 9) above. + if (bounds1.size() <= 2) continue; + + // ================== DEBUG START ================== + // Output the interval mapping for the current boundary set + // std::cout << "\n[DEBUG] --- Region Mapping Table ---" << std::endl; + // std::cout << "[DEBUG] Mode: " << (b_idx == 0 ? "FATCAT Bounds" : "Strict Bounds") << std::endl; + // std::cout << "[DEBUG] Total Blocks: " << (bounds1.size() - 1) << std::endl; + + // for (size_t k = 0; k < bounds1.size() - 1; k++) + // { + // std::cout << "[DEBUG] Block " << (k + 1) << ": " + // << "Chain1 [" << bounds1[k] << " -> " << bounds1[k + 1] << "] <==> " + // << "Chain2 [" << bounds2[k] << " -> " << bounds2[k + 1] << "]" + // << std::endl; // } - // ========================================== + // std::cout << "[DEBUG] ----------------------------\n" << std::endl; + // =================== DEBUG END =================== - // ========================================== - // Step 5: Iteratively align each block using TRUE flexalign_best logic - // ========================================== + // Step 5: Iteratively align each block std::string cur_global_seqM = "", cur_global_seqxA = "", cur_global_seqyA = ""; cur_global_seqM.reserve(xlen + ylen + max_gap); cur_global_seqxA.reserve(xlen + ylen + max_gap); @@ -3651,25 +3584,19 @@ int flexalign_fatcat_main(double **xa, double **ya, int L1_sub = x_e - x_s; int L2_sub = y_e - y_s; - // If the block is too short, just assign gaps if (L1_sub < 3 || L2_sub < 3) { for (int i = 0; i < L1_sub; i++) { - cur_global_seqxA += seqx[x_s + i]; - cur_global_seqyA += '-'; - cur_global_seqM += ' '; + cur_global_seqxA += seqx[x_s + i]; cur_global_seqyA += '-'; cur_global_seqM += ' '; } for (int i = 0; i < L2_sub; i++) { - cur_global_seqxA += '-'; - cur_global_seqyA += seqy[y_s + i]; - cur_global_seqM += ' '; + cur_global_seqxA += '-'; cur_global_seqyA += seqy[y_s + i]; cur_global_seqM += ' '; } continue; } - // Extract sub-sequences and coordinates double **xa_sub, **ya_sub; NewArray(&xa_sub, L1_sub, 3); NewArray(&ya_sub, L2_sub, 3); @@ -3680,25 +3607,17 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int i = 0; i < L1_sub; i++) { - xa_sub[i][0] = xa[x_s + i][0]; - xa_sub[i][1] = xa[x_s + i][1]; - xa_sub[i][2] = xa[x_s + i][2]; - seqx_sub[i] = seqx[x_s + i]; - secx_sub[i] = secx[x_s + i]; + xa_sub[i][0] = xa[x_s + i][0]; xa_sub[i][1] = xa[x_s + i][1]; xa_sub[i][2] = xa[x_s + i][2]; + seqx_sub[i] = seqx[x_s + i]; secx_sub[i] = secx[x_s + i]; } - seqx_sub[L1_sub] = '\0'; - secx_sub[L1_sub] = '\0'; + seqx_sub[L1_sub] = '\0'; secx_sub[L1_sub] = '\0'; for (int i = 0; i < L2_sub; i++) { - ya_sub[i][0] = ya[y_s + i][0]; - ya_sub[i][1] = ya[y_s + i][1]; - ya_sub[i][2] = ya[y_s + i][2]; - seqy_sub[i] = seqy[y_s + i]; - secy_sub[i] = secy[y_s + i]; + ya_sub[i][0] = ya[y_s + i][0]; ya_sub[i][1] = ya[y_s + i][1]; ya_sub[i][2] = ya[y_s + i][2]; + seqy_sub[i] = seqy[y_s + i]; secy_sub[i] = secy[y_s + i]; } - seqy_sub[L2_sub] = '\0'; - secy_sub[L2_sub] = '\0'; + seqy_sub[L2_sub] = '\0'; secy_sub[L2_sub] = '\0'; double t0_best[3], u0_best[3][3]; double TM_best_max = -1.0; @@ -3706,18 +3625,18 @@ int flexalign_fatcat_main(double **xa, double **ya, std::vector> tu_vec_best; bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; - std::vector local_sequence = sequence; - // Iterate over Secondary Structure optimization choices + // Set hinge_opt to 0 if the block length is less than 2 * fragLen + int local_hinge_opt = (std::min(L1_sub, L2_sub) < 2 * fragLen) ? 0 : hinge_opt; + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) { FlexAlignResult cur_res; - execute_flexalign_with_fallback( xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_type, hinge_opt, cur_ss_opt, cur_res); + mol_type, local_hinge_opt, cur_ss_opt, cur_res); double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; if (cur_max_TM > TM_best_max) @@ -3726,41 +3645,28 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int a = 0; a < 3; a++) { t0_best[a] = cur_res.t0[a]; - for (int b = 0; b < 3; b++) - u0_best[a][b] = cur_res.u0[a][b]; + for (int b = 0; b < 3; b++) u0_best[a][b] = cur_res.u0[a][b]; } - seqM_best = cur_res.seqM; - seqxA_best = cur_res.seqxA; - seqyA_best = cur_res.seqyA; + seqM_best = cur_res.seqM; seqxA_best = cur_res.seqxA; seqyA_best = cur_res.seqyA; tu_vec_best = cur_res.tu_vec; } } - // Fallback for current block - if (TM_best_max < 0) + if (TM_best_max <= 0) { for (int i = 0; i < L1_sub; i++) { - cur_global_seqxA += seqx_sub[i]; - cur_global_seqyA += '-'; - cur_global_seqM += ' '; + cur_global_seqxA += seqx_sub[i]; cur_global_seqyA += '-'; cur_global_seqM += ' '; } for (int i = 0; i < L2_sub; i++) { - cur_global_seqxA += '-'; - cur_global_seqyA += seqy_sub[i]; - cur_global_seqM += ' '; + cur_global_seqxA += '-'; cur_global_seqyA += seqy_sub[i]; cur_global_seqM += ' '; } - DeleteArray(&xa_sub, L1_sub); - DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; - delete[] seqy_sub; - delete[] secx_sub; - delete[] secy_sub; + DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; continue; } - // Ensure tu_vec_best is populated if (tu_vec_best.empty()) { std::vector tu_tmp(12); @@ -3769,15 +3675,11 @@ int flexalign_fatcat_main(double **xa, double **ya, } int base_tu_idx = cur_tu_vec.size(); - for (size_t m = 0; m < tu_vec_best.size(); m++) - { - cur_tu_vec.push_back(tu_vec_best[m]); - } + for (size_t m = 0; m < tu_vec_best.size(); m++) cur_tu_vec.push_back(tu_vec_best[m]); int rx = x_s; int current_global_idx = base_tu_idx; - // Merge current block sequences to the global sequence for (size_t i = 0; i < seqxA_best.length(); i++) { char c = seqM_best[i]; @@ -3785,17 +3687,10 @@ int flexalign_fatcat_main(double **xa, double **ya, if (c != ' ' && c != '.' && c != ':') { int local_hinge_idx = -1; - if (c >= '0' && c <= '9') - local_hinge_idx = c - '0'; - else if (c >= 'a' && c <= 'z') - local_hinge_idx = c - 'a' + 10; - else if (c >= 'A' && c <= 'Z') - local_hinge_idx = c - 'A' + 36; - - if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) - { - current_global_idx = base_tu_idx + local_hinge_idx; - } + if (c >= '0' && c <= '9') local_hinge_idx = c - '0'; + else if (c >= 'a' && c <= 'z') local_hinge_idx = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') local_hinge_idx = c - 'A' + 36; + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) current_global_idx = base_tu_idx + local_hinge_idx; } if (seqxA_best[i] != '-') @@ -3809,15 +3704,10 @@ int flexalign_fatcat_main(double **xa, double **ya, if (c != ' ' && c != '.' && c != ':') { char global_c; - if (current_global_idx < 10) - global_c = '0' + current_global_idx; - else if (current_global_idx < 36) - global_c = 'a' + (current_global_idx - 10); - else if (current_global_idx < 62) - global_c = 'A' + (current_global_idx - 36); - else - global_c = '*'; - + if (current_global_idx < 10) global_c = '0' + current_global_idx; + else if (current_global_idx < 36) global_c = 'a' + (current_global_idx - 10); + else if (current_global_idx < 62) global_c = 'A' + (current_global_idx - 36); + else global_c = '*'; seqM_best[i] = global_c; } else @@ -3825,28 +3715,16 @@ int flexalign_fatcat_main(double **xa, double **ya, seqM_best[i] = c; } } - else - { - seqM_best[i] = ' '; - } + else { seqM_best[i] = ' '; } } - cur_global_seqM += seqM_best; - cur_global_seqxA += seqxA_best; - cur_global_seqyA += seqyA_best; + cur_global_seqM += seqM_best; cur_global_seqxA += seqxA_best; cur_global_seqyA += seqyA_best; - DeleteArray(&xa_sub, L1_sub); - DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; - delete[] seqy_sub; - delete[] secx_sub; - delete[] secy_sub; + DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; } - // ========================================== // Step 6: Recalculate global metrics correctly for current DP boundary - // Utilizing basic_fun.h transform() and dist() functions for matrix rotations and distance - // ========================================== double cur_d0A = 1.24 * std::pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; if (cur_d0A < 0.5) cur_d0A = 0.5; double cur_d0B = 1.24 * std::pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; @@ -3875,13 +3753,11 @@ int flexalign_fatcat_main(double **xa, double **ya, if (x_valid && y_valid) { int matrix_idx = cur_global_res_tu[i_res]; - if (matrix_idx >= 0 && matrix_idx < cur_tu_vec.size()) { double t_k[3], u_k[3][3]; tu2t_u(cur_tu_vec[matrix_idx], t_k, u_k); - // Use the transform() and dist() from basic_fun.h double x_rot[3]; transform(t_k, u_k, xa[i_res], x_rot); double dist2 = dist(x_rot, ya[j_res]); @@ -3900,22 +3776,12 @@ int flexalign_fatcat_main(double **xa, double **ya, { cur_rmsd0 += dist2; cur_n_ali8++; - - if (seqx[i_res] == seqy[j_res]) - { - cur_Liden += 1.0; - } + if (seqx[i_res] == seqy[j_res]) cur_Liden += 1.0; } } - else - { - cur_do_vec.push_back(-1); - } - } - else - { - cur_do_vec.push_back(-1); + else { cur_do_vec.push_back(-1); } } + else { cur_do_vec.push_back(-1); } if (x_valid) i_res++; if (y_valid) j_res++; @@ -3927,40 +3793,22 @@ int flexalign_fatcat_main(double **xa, double **ya, if (a_opt) cur_TM3 /= (xlen + ylen) * 0.5; if (u_opt) cur_TM4 /= Lnorm_ass; if (d_opt) cur_TM5 /= ylen; + if (cur_n_ali8 > 0) cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); + else cur_rmsd0 = 0.0; - if (cur_n_ali8 > 0) - cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); - else - cur_rmsd0 = 0.0; - - // Compare the current iteration against the best found so far + // Compare against the -mm 9 defender! double cur_global_max_TM = (cur_TM1 > cur_TM2) ? cur_TM1 : cur_TM2; if (cur_global_max_TM > best_global_max_TM) { best_global_max_TM = cur_global_max_TM; best_tu_vec = cur_tu_vec; - best_TM1 = cur_TM1; - best_TM2 = cur_TM2; - best_TM3 = cur_TM3; - best_TM4 = cur_TM4; - best_TM5 = cur_TM5; - best_rmsd0 = cur_rmsd0; - best_Liden = cur_Liden; - best_TM_ali = cur_TM1; - best_rmsd_ali = cur_rmsd0; - best_L_ali = cur_n_ali; - best_n_ali = cur_n_ali; - best_n_ali8 = cur_n_ali8; - best_seqM = cur_global_seqM; - best_seqxA = cur_global_seqxA; - best_seqyA = cur_global_seqyA; + best_TM1 = cur_TM1; best_TM2 = cur_TM2; best_TM3 = cur_TM3; best_TM4 = cur_TM4; best_TM5 = cur_TM5; + best_rmsd0 = cur_rmsd0; best_Liden = cur_Liden; best_TM_ali = cur_TM1; best_rmsd_ali = cur_rmsd0; + best_L_ali = cur_n_ali; best_n_ali = cur_n_ali; best_n_ali8 = cur_n_ali8; + best_seqM = cur_global_seqM; best_seqxA = cur_global_seqxA; best_seqyA = cur_global_seqyA; best_do_vec = cur_do_vec; - - best_d0A = cur_d0A; - best_d0B = cur_d0B; - best_d0a = cur_d0a; - best_d0u = cur_d0u; + best_d0A = cur_d0A; best_d0B = cur_d0B; best_d0a = cur_d0a; best_d0u = cur_d0u; if (!best_tu_vec.empty()) { tu2t_u(best_tu_vec[0], best_t0, best_u0); @@ -3968,32 +3816,16 @@ int flexalign_fatcat_main(double **xa, double **ya, } } - // Safety check if both attempts somehow failed + // Safety check if (best_global_max_TM < 0) return 0; // Output best values back to the reference parameters - TM1 = best_TM1; - TM2 = best_TM2; - TM3 = best_TM3; - TM4 = best_TM4; - TM5 = best_TM5; - rmsd0 = best_rmsd0; - Liden = best_Liden; - TM_ali = best_TM_ali; - rmsd_ali = best_rmsd_ali; - L_ali = best_L_ali; - n_ali = best_n_ali; - n_ali8 = best_n_ali8; - seqM = best_seqM; - seqxA = best_seqxA; - seqyA = best_seqyA; - do_vec = best_do_vec; - tu_vec = best_tu_vec; - - d0A = best_d0A; - d0B = best_d0B; - d0a = best_d0a; - d0u = best_d0u; + TM1 = best_TM1; TM2 = best_TM2; TM3 = best_TM3; TM4 = best_TM4; TM5 = best_TM5; + rmsd0 = best_rmsd0; Liden = best_Liden; TM_ali = best_TM_ali; rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; n_ali = best_n_ali; n_ali8 = best_n_ali8; + seqM = best_seqM; seqxA = best_seqxA; seqyA = best_seqyA; + do_vec = best_do_vec; tu_vec = best_tu_vec; + d0A = best_d0A; d0B = best_d0B; d0a = best_d0a; d0u = best_d0u; for (int a = 0; a < 3; a++) { t0[a] = best_t0[a]; @@ -4002,6 +3834,7 @@ int flexalign_fatcat_main(double **xa, double **ya, return tu_vec.size(); } + // Unified engine replacing flexalign, flexalign_best, and flexalign_fatcat int flexalign_unified(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, From fe6d9d25f0571f62dd020a356d716b8071abf027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Sat, 6 Jun 2026 15:41:05 +0800 Subject: [PATCH 20/23] strict hinge --- USalign.cpp | 107 +++++++++++++++++++++++++++++++++++++++++++++++----- flexalign.h | 4 +- 2 files changed, 100 insertions(+), 11 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 468b199..a0b7808 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -2962,7 +2962,7 @@ int flexalign_fatcat_main(double **xa, double **ya, const double d0_scale, const int i_opt, const int a_opt, const bool u_opt, const bool d_opt, const bool fast_opt, const int mol_type, const int hinge_opt, const int ss_opt, - int sparse_val = 0) + int sparse_val = 0, bool hinge_set = false) { // ========================================== // TRUE -mm 9 BASELINE (Defender) @@ -2990,7 +2990,7 @@ int flexalign_fatcat_main(double **xa, double **ya, xa, ya, (char*)seqx, (char*)seqy, (char*)secx, (char*)secy, xlen, ylen, local_sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt_global, - mol_type, 9, cur_ss_opt, base_res); // -mm 9 explicitly uses 9 hinges + mol_type, hinge_opt, cur_ss_opt, base_res); // -mm 9 explicitly uses 9 hinges double cur_max_TM = (base_res.TM1 > base_res.TM2) ? base_res.TM1 : base_res.TM2; if (cur_max_TM > best_global_max_TM) @@ -3568,6 +3568,80 @@ int flexalign_fatcat_main(double **xa, double **ya, // std::cout << "[DEBUG] ----------------------------\n" << std::endl; // =================== DEBUG END =================== + // Precalculate distributed local_hinge_opt for each block when hinge_set is true + int num_blocks = bounds1.size() - 1; + std::vector precalc_local_hinge(num_blocks, 0); + + if (hinge_set) + { + struct BlockMeta { + int index; + double rmsd; + }; + std::vector valid_blocks; + + // Calculate target hinges to distribute based on requested hinge_opt and current implicit blocks + int target_total_hinges = std::max(0, hinge_opt + 1 - num_blocks); + + // Calculate base amount of hinges per block + int base_hinge = (hinge_opt + 1) / num_blocks - 1; + if (base_hinge < 0) base_hinge = 0; + + for (int k = 0; k < num_blocks; k++) + { + int L1_sub = bounds1[k + 1] - bounds1[k]; + int L2_sub = bounds2[k + 1] - bounds2[k]; + int min_L = std::min(L1_sub, L2_sub); + + if (min_L < 2 * fragLen) + { + precalc_local_hinge[k] = 0; // Length < 2*fragLen gets 0 + } + else + { + // Calculate rough RMSD for this unaligned block section + double block_rmsd = 0.0; + if (min_L >= 3) + { + double **p1, **p2; + NewArray(&p1, min_L, 3); + NewArray(&p2, min_L, 3); + for (int i = 0; i < min_L; i++) { + p1[i][0] = xa[bounds1[k] + i][0]; p1[i][1] = xa[bounds1[k] + i][1]; p1[i][2] = xa[bounds1[k] + i][2]; + p2[i][0] = ya[bounds2[k] + i][0]; p2[i][1] = ya[bounds2[k] + i][1]; p2[i][2] = ya[bounds2[k] + i][2]; + } + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(p1, p2, min_L, 0, &rms_sum_sq, t_tmp, u_tmp); + block_rmsd = std::sqrt(rms_sum_sq / min_L); + DeleteArray(&p1, min_L); + DeleteArray(&p2, min_L); + } + valid_blocks.push_back({k, block_rmsd}); + precalc_local_hinge[k] = base_hinge; // Assign base hinges to valid blocks + } + } + + // Distribute remaining hinges strictly prioritizing top RMSD blocks + int assigned = valid_blocks.size() * base_hinge; + int remainder = target_total_hinges - assigned; + + if (remainder > 0 && !valid_blocks.empty()) + { + // Sort valid blocks by RMSD descending + std::sort(valid_blocks.begin(), valid_blocks.end(), [](const BlockMeta& a, const BlockMeta& b) { + return a.rmsd > b.rmsd; + }); + + int v_idx = 0; + while (remainder > 0) + { + precalc_local_hinge[valid_blocks[v_idx].index]++; // Give +1 to the front runners + remainder--; + v_idx = (v_idx + 1) % valid_blocks.size(); + } + } + } + // Step 5: Iteratively align each block std::string cur_global_seqM = "", cur_global_seqxA = "", cur_global_seqyA = ""; cur_global_seqM.reserve(xlen + ylen + max_gap); @@ -3626,8 +3700,18 @@ int flexalign_fatcat_main(double **xa, double **ya, bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; - // Set hinge_opt to 0 if the block length is less than 2 * fragLen - int local_hinge_opt = (std::min(L1_sub, L2_sub) < 2 * fragLen) ? 0 : hinge_opt; + // Determine local_hinge_opt based on user requirements. + // If hinge_set is true, we use the precalculated distributed hinges. + // Otherwise, set to 0 if the block length is less than 2 * fragLen, else 2. + int local_hinge_opt; + if (hinge_set) + { + local_hinge_opt = precalc_local_hinge[k]; + } + else + { + local_hinge_opt = (std::min(L1_sub, L2_sub) < 2 * fragLen) ? 0 : 2; + } for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) { @@ -3850,7 +3934,7 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt, - FlexAlignMode mode = FLEX_STANDARD) + FlexAlignMode mode = FLEX_STANDARD, bool hinge_set = false) { vector> PDB_lines1; vector> PDB_lines2; @@ -3946,7 +4030,7 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, fatcat_res.TM_ali, fatcat_res.rmsd_ali, fatcat_res.n_ali, fatcat_res.n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt, 0 /* sparse_val */ + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt, 0, hinge_set ); if (outfmt_opt == 0) @@ -4066,9 +4150,9 @@ int flexalign_best(string &xname, string &yname, const string &fname_super, cons return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt is ignored in BEST mode */, FLEX_BEST); } -int flexalign_fatcat(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) +int flexalign_fatcat(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, bool hinge_set = false) { - return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt ignore */, FLEX_FATCAT); + return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt ignore */, FLEX_FATCAT, hinge_set); } int main(int argc, char *argv[]) @@ -4113,6 +4197,7 @@ int main(int argc, char *argv[]) int closeK_opt = -1; // number of atoms for SOI initial alignment. // 5 and 0 for -mm 5 and 6 int hinge_opt = 9; // maximum number of hinge allowed for flexible + bool hinge_set = false; int mirror_opt = 0; // do not align mirror int het_opt = 0; // do not read HETATM residues int mm_opt = 0; // do not perform MM-align @@ -4238,6 +4323,7 @@ int main(int argc, char *argv[]) { if (i >= (argc - 1)) PrintErrorAndQuit("ERROR! Missing value for -hinge"); + hinge_set = true; hinge_opt = atoi(argv[i + 1]); i++; } @@ -4658,6 +4744,9 @@ int main(int argc, char *argv[]) { if (mm_opt == 2) cout << "#Query\tTemplate\tTM" << endl; + else if (mm_opt >= 7) + cout << "#PDBchain1\tPDBchain2\tTM1\tTM2\t" + << "RMSD\tID1\tID2\tIDali\tL1\tL2\tLali\tNblk" << endl; else cout << "#PDBchain1\tPDBchain2\tTM1\tTM2\t" << "RMSD\tID1\tID2\tIDali\tL1\tL2\tLali" << endl; @@ -4786,7 +4875,7 @@ int main(int argc, char *argv[]) split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt); + byresi_opt, chain1_list, chain2_list, hinge_opt, hinge_set); else cerr << "WARNING! -mm " << mm_opt << " not implemented" << endl; diff --git a/flexalign.h b/flexalign.h index 439ba51..9423d7d 100644 --- a/flexalign.h +++ b/flexalign.h @@ -2032,10 +2032,10 @@ void output_flexalign_results(const string xname, const string yname, } else if (outfmt_opt == 2) { - printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", + printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d\t%d", xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(), TM2, TM1, rmsd, Liden / xlen, Liden / ylen, (n_ali8 > 0) ? Liden / n_ali8 : 0, - xlen, ylen, n_ali8); + xlen, ylen, n_ali8, (int)tu_vec.size()); } cout << endl; From b2c1da9addc6bdfe75752f679ef03015b60b8070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Sun, 7 Jun 2026 07:43:39 +0800 Subject: [PATCH 21/23] add debug --- USalign.cpp | 686 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 462 insertions(+), 224 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index a0b7808..77e78b9 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -8,17 +8,16 @@ using namespace std; void print_version() { - cout << -"\n" -" ********************************************************************\n" -" * US-align (Version 20260527) *\n" -" * Universal Structure Alignment of Proteins and Nucleic Acids *\n" -" * Reference: C Zhang, L Freddolino, Y Zhang. (2026) Nat Protoc *\n" -" * C Zhang, M Shine, AM Pyle, Y Zhang. (2022) Nat Methods*\n" -" * C Zhang, AM Pyle (2022) iScience. *\n" -" * Please email comments and suggestions to zhang@zhanggroup.org *\n" -" ********************************************************************" - << endl; + cout << "\n" + " ********************************************************************\n" + " * US-align (Version 20260527) *\n" + " * Universal Structure Alignment of Proteins and Nucleic Acids *\n" + " * Reference: C Zhang, L Freddolino, Y Zhang. (2026) Nat Protoc *\n" + " * C Zhang, M Shine, AM Pyle, Y Zhang. (2022) Nat Methods*\n" + " * C Zhang, AM Pyle (2022) iScience. *\n" + " * Please email comments and suggestions to zhang@zhanggroup.org *\n" + " ********************************************************************" + << endl; } void print_extra_help() @@ -2987,7 +2986,7 @@ int flexalign_fatcat_main(double **xa, double **ya, FlexAlignResult base_res; // Pass full unbroken sequences directly to flexalign (identical to -mm 9) execute_flexalign_with_fallback( - xa, ya, (char*)seqx, (char*)seqy, (char*)secx, (char*)secy, + xa, ya, (char *)seqx, (char *)seqy, (char *)secx, (char *)secy, xlen, ylen, local_sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt_global, mol_type, hinge_opt, cur_ss_opt, base_res); // -mm 9 explicitly uses 9 hinges @@ -2996,35 +2995,68 @@ int flexalign_fatcat_main(double **xa, double **ya, if (cur_max_TM > best_global_max_TM) { best_global_max_TM = cur_max_TM; - for (int a = 0; a < 3; a++) { + for (int a = 0; a < 3; a++) + { best_t0[a] = base_res.t0[a]; - for (int b = 0; b < 3; b++) best_u0[a][b] = base_res.u0[a][b]; + for (int b = 0; b < 3; b++) + best_u0[a][b] = base_res.u0[a][b]; } best_tu_vec = base_res.tu_vec; - best_TM1 = base_res.TM1; best_TM2 = base_res.TM2; best_TM3 = base_res.TM3; - best_TM4 = base_res.TM4; best_TM5 = base_res.TM5; - best_rmsd0 = base_res.rmsd0; best_Liden = base_res.Liden; - best_TM_ali = base_res.TM_ali; best_rmsd_ali = base_res.rmsd_ali; - best_L_ali = base_res.L_ali; best_n_ali = base_res.n_ali; best_n_ali8 = base_res.n_ali8; - best_seqM = base_res.seqM; best_seqxA = base_res.seqxA; best_seqyA = base_res.seqyA; + best_TM1 = base_res.TM1; + best_TM2 = base_res.TM2; + best_TM3 = base_res.TM3; + best_TM4 = base_res.TM4; + best_TM5 = base_res.TM5; + best_rmsd0 = base_res.rmsd0; + best_Liden = base_res.Liden; + best_TM_ali = base_res.TM_ali; + best_rmsd_ali = base_res.rmsd_ali; + best_L_ali = base_res.L_ali; + best_n_ali = base_res.n_ali; + best_n_ali8 = base_res.n_ali8; + best_seqM = base_res.seqM; + best_seqxA = base_res.seqxA; + best_seqyA = base_res.seqyA; best_do_vec = base_res.do_vec; - best_d0A = base_res.d0A; best_d0B = base_res.d0B; - best_d0a = base_res.d0a; best_d0u = base_res.d0u; + best_d0A = base_res.d0A; + best_d0B = base_res.d0B; + best_d0a = base_res.d0a; + best_d0u = base_res.d0u; } } // Early exit if the true -mm 9 baseline is already excellent if (best_global_max_TM >= 0.85) { - TM1 = best_TM1; TM2 = best_TM2; TM3 = best_TM3; TM4 = best_TM4; TM5 = best_TM5; - rmsd0 = best_rmsd0; Liden = best_Liden; TM_ali = best_TM_ali; rmsd_ali = best_rmsd_ali; - L_ali = best_L_ali; n_ali = best_n_ali; n_ali8 = best_n_ali8; - seqM = best_seqM; seqxA = best_seqxA; seqyA = best_seqyA; - do_vec = best_do_vec; tu_vec = best_tu_vec; - d0A = best_d0A; d0B = best_d0B; d0a = best_d0a; d0u = best_d0u; - for (int a = 0; a < 3; a++) { + // <--- ADD DEBUG HERE: Output early exit confirmation + // std::cout << "[DEBUG] MM9" << std::endl; + + TM1 = best_TM1; + TM2 = best_TM2; + TM3 = best_TM3; + TM4 = best_TM4; + TM5 = best_TM5; + rmsd0 = best_rmsd0; + Liden = best_Liden; + TM_ali = best_TM_ali; + rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; + n_ali = best_n_ali; + n_ali8 = best_n_ali8; + seqM = best_seqM; + seqxA = best_seqxA; + seqyA = best_seqyA; + do_vec = best_do_vec; + tu_vec = best_tu_vec; + d0A = best_d0A; + d0B = best_d0B; + d0a = best_d0a; + d0u = best_d0u; + for (int a = 0; a < 3; a++) + { t0[a] = best_t0[a]; - for (int b = 0; b < 3; b++) u0[a][b] = best_u0[a][b]; + for (int b = 0; b < 3; b++) + u0[a][b] = best_u0[a][b]; } return tu_vec.size(); } @@ -3086,7 +3118,7 @@ int flexalign_fatcat_main(double **xa, double **ya, if (d3_term < 0.3 * std::min(xlen, ylen)) continue; - double dist1 = disTable1[i][fragLen - 1]; + double dist1 = disTable1[i][fragLen - 1]; double dist2 = disTable2[j][fragLen - 1]; if (std::fabs(dist1 - dist2) > 2.0 * cur_rmsdCut) @@ -3094,8 +3126,12 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int k = 0; k < fragLen; k++) { - r1[k][0] = xa[i + k][0]; r1[k][1] = xa[i + k][1]; r1[k][2] = xa[i + k][2]; - r2[k][0] = ya[j + k][0]; r2[k][1] = ya[j + k][1]; r2[k][2] = ya[j + k][2]; + r1[k][0] = xa[i + k][0]; + r1[k][1] = xa[i + k][1]; + r1[k][2] = xa[i + k][2]; + r2[k][0] = ya[j + k][0]; + r2[k][1] = ya[j + k][1]; + r2[k][2] = ya[j + k][2]; } double rms_sum_sq, t_tmp[3], u_tmp[3][3]; @@ -3105,12 +3141,15 @@ int flexalign_fatcat_main(double **xa, double **ya, if (rmsd_tmp < cur_rmsdCut) { FATCAT_AFP afp; - afp.i = i; afp.j = j; afp.len = fragLen; + afp.i = i; + afp.j = j; + afp.len = fragLen; afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); for (int a = 0; a < 3; a++) { afp.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) afp.R[a][b] = u_tmp[a][b]; + for (int b = 0; b < 3; b++) + afp.R[a][b] = u_tmp[a][b]; } initial_afps.push_back(afp); } @@ -3137,7 +3176,8 @@ int flexalign_fatcat_main(double **xa, double **ya, continue; std::vector &group = diagonals[d]; - std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) { return a.i < b.i; }); + std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { return a.i < b.i; }); int n_group = group.size(); std::vector invalid(n_group, false); @@ -3158,8 +3198,12 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int k = 0; k < new_len; k++) { - r1_merge[k][0] = xa[curr.i + k][0]; r1_merge[k][1] = xa[curr.i + k][1]; r1_merge[k][2] = xa[curr.i + k][2]; - r2_merge[k][0] = ya[curr.j + k][0]; r2_merge[k][1] = ya[curr.j + k][1]; r2_merge[k][2] = ya[curr.j + k][2]; + r1_merge[k][0] = xa[curr.i + k][0]; + r1_merge[k][1] = xa[curr.i + k][1]; + r1_merge[k][2] = xa[curr.i + k][2]; + r2_merge[k][0] = ya[curr.j + k][0]; + r2_merge[k][1] = ya[curr.j + k][1]; + r2_merge[k][2] = ya[curr.j + k][2]; } double rms_sum_sq, t_tmp[3], u_tmp[3][3]; @@ -3172,7 +3216,8 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int a = 0; a < 3; a++) { curr.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) curr.R[a][b] = u_tmp[a][b]; + for (int b = 0; b < 3; b++) + curr.R[a][b] = u_tmp[a][b]; } curr.score = resScore * new_len * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); invalid[nxt_idx] = true; @@ -3185,10 +3230,10 @@ int flexalign_fatcat_main(double **xa, double **ya, DeleteArray(&r1_merge, max_merge_len); DeleteArray(&r2_merge, max_merge_len); - std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) { + std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { if (a.i == b.i) return a.j < b.j; - return a.i < b.i; - }); + return a.i < b.i; }); int n_afps = merged_afps.size(); std::vector ret_b1, ret_b2; @@ -3207,7 +3252,8 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int i_val = 0; i_val < xlen; i_val++) { - if (i_to_j[i_val].empty()) continue; + if (i_to_j[i_val].empty()) + continue; for (size_t p = 0; p < i_to_j[i_val].size(); p++) { int j_val = i_to_j[i_val][p].first; @@ -3217,14 +3263,18 @@ int flexalign_fatcat_main(double **xa, double **ya, int curr_bef = -1; for (int j_val = 0; j_val < ylen; j_val++) { - if (afp_bef_index[i_val * ylen + j_val] != -1) curr_bef = afp_bef_index[i_val * ylen + j_val]; - else afp_bef_index[i_val * ylen + j_val] = curr_bef; + if (afp_bef_index[i_val * ylen + j_val] != -1) + curr_bef = afp_bef_index[i_val * ylen + j_val]; + else + afp_bef_index[i_val * ylen + j_val] = curr_bef; } int curr_aft = -1; for (int j_val = ylen - 1; j_val >= 0; j_val--) { - if (afp_aft_index[i_val * ylen + j_val] != -1) curr_aft = afp_aft_index[i_val * ylen + j_val]; - else afp_aft_index[i_val * ylen + j_val] = curr_aft; + if (afp_aft_index[i_val * ylen + j_val] != -1) + curr_aft = afp_aft_index[i_val * ylen + j_val]; + else + afp_aft_index[i_val * ylen + j_val] = curr_aft; } } @@ -3237,17 +3287,22 @@ int flexalign_fatcat_main(double **xa, double **ya, { double dist1, dist2; int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; - if (idx1_a >= idx1_b) dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; - if (idx2_a >= idx2_b) dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } } - if (rms_sq > afp_dis_cut) return 1e9; + if (rms_sq > afp_dis_cut) + return 1e9; return std::sqrt(rms_sq / (fragLen * fragLen)); }; @@ -3263,26 +3318,38 @@ int flexalign_fatcat_main(double **xa, double **ya, } } int n = r1.size(); - if (n < 3) return 0.0; - double **p1; NewArray(&p1, n, 3); - double **p2; NewArray(&p2, n, 3); + if (n < 3) + return 0.0; + double **p1; + NewArray(&p1, n, 3); + double **p2; + NewArray(&p2, n, 3); for (int i = 0; i < n; i++) { - p1[i][0] = xa[r1[i]][0]; p1[i][1] = xa[r1[i]][1]; p1[i][2] = xa[r1[i]][2]; - p2[i][0] = ya[r2[i]][0]; p2[i][1] = ya[r2[i]][1]; p2[i][2] = ya[r2[i]][2]; + p1[i][0] = xa[r1[i]][0]; + p1[i][1] = xa[r1[i]][1]; + p1[i][2] = xa[r1[i]][2]; + p2[i][0] = ya[r2[i]][0]; + p2[i][1] = ya[r2[i]][1]; + p2[i][2] = ya[r2[i]][2]; } double rms_sq_sum, t_tmp[3], u_tmp[3][3]; Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); - DeleteArray(&p1, n); DeleteArray(&p2, n); + DeleteArray(&p1, n); + DeleteArray(&p2, n); return std::sqrt(rms_sq_sum / n); }; - struct Region { int s1, e1, s2, e2; }; + struct Region + { + int s1, e1, s2, e2; + }; std::vector sco(n_afps); std::vector twi(n_afps, 0); std::vector pre(n_afps, -1); - for (int m = 0; m < n_afps; m++) sco[m] = merged_afps[m].score; + for (int m = 0; m < n_afps; m++) + sco[m] = merged_afps[m].score; for (int m = 0; m < n_afps; m++) { @@ -3301,22 +3368,28 @@ int flexalign_fatcat_main(double **xa, double **ya, int a_s, a_e, b_s, b_e; if (st == 0) { - a_s = std::max(a1, 0); a_e = std::min(a3, xlen - 1); - b_s = std::max(b2, 0); b_e = std::min(b3, ylen - 1); + a_s = std::max(a1, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b2, 0); + b_e = std::min(b3, ylen - 1); } else { - a_s = std::max(a2, 0); a_e = std::min(a3, xlen - 1); - b_s = std::max(b1, 0); b_e = std::min(b2 - 1, ylen - 1); + a_s = std::max(a2, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b1, 0); + b_e = std::min(b2 - 1, ylen - 1); } - if (b_s >= ylen || b_e < 0) continue; + if (b_s >= ylen || b_e < 0) + continue; for (int prev_i = a_s; prev_i <= a_e; prev_i++) { int s1 = afp_aft_index[prev_i * ylen + b_s]; int s2 = afp_bef_index[prev_i * ylen + b_e]; if (s1 != -1 && s2 != -1 && s1 <= s2) - for (int s = s1; s <= s2; s++) valid_prevs.push_back(s); + for (int s = s1; s <= s2; s++) + valid_prevs.push_back(s); } } @@ -3325,7 +3398,8 @@ int flexalign_fatcat_main(double **xa, double **ya, { int prev = valid_prevs[v]; int prev_twi = twi[prev]; - if (prev_twi > max_twists) continue; + if (prev_twi > max_twists) + continue; int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); @@ -3333,10 +3407,13 @@ int flexalign_fatcat_main(double **xa, double **ya, double gp = 0.0; int m_mis = 0; - if (gap_i < 0 || gap_j < 0) m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + if (gap_i < 0 || gap_j < 0) + m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; gp = gap_ext * m_mis; - if (m_gap > 0) gp += gap_ext * m_gap; - if (gp < max_penalty) gp = max_penalty; + if (m_gap > 0) + gp += gap_ext * m_gap; + if (gp < max_penalty) + gp = max_penalty; double rms_sq = 0; for (int k = 0; k < fragLen; k++) @@ -3345,12 +3422,16 @@ int flexalign_fatcat_main(double **xa, double **ya, { double dist1, dist2; int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; - if (idx1_a >= idx1_b) dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; - if (idx2_a >= idx2_b) dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; rms_sq += (dist1 - dist2) * (dist1 - dist2); } @@ -3360,37 +3441,45 @@ int flexalign_fatcat_main(double **xa, double **ya, int is_twist = 0; if (rms_sq >= afp_dis_cut) { - tp = twist_pen; is_twist = 1; + tp = twist_pen; + is_twist = 1; } else { double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); - if (dvar > disCut - disSmooth) tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); + if (dvar > disCut - disSmooth) + tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); } - if (prev_twi + is_twist > max_twists) continue; + if (prev_twi + is_twist > max_twists) + continue; double stmp = sco[prev] + curr_sco + tp + gp; if (stmp > sco[m]) { - sco[m] = stmp; pre[m] = prev; twi[m] = prev_twi + is_twist; + sco[m] = stmp; + pre[m] = prev; + twi[m] = prev_twi + is_twist; } } } int best_m = 0; for (int m = 1; m < n_afps; m++) - if (sco[m] > sco[best_m]) best_m = m; + if (sco[m] > sco[best_m]) + best_m = m; std::vector path; int curr_m = best_m; while (curr_m != -1) { - path.push_back(curr_m); curr_m = pre[curr_m]; + path.push_back(curr_m); + curr_m = pre[curr_m]; } std::reverse(path.begin(), path.end()); - if (path.empty()) return std::make_pair(ret_b1, ret_b2); + if (path.empty()) + return std::make_pair(ret_b1, ret_b2); struct Block { @@ -3411,15 +3500,19 @@ int flexalign_fatcat_main(double **xa, double **ya, if (dvar >= disCut) { blocks.push_back(curr_block); - curr_block.afps.clear(); curr_block.dvars.clear(); - curr_block.afps.push_back(curr); curr_block.dvars.push_back(0.0); + curr_block.afps.clear(); + curr_block.dvars.clear(); + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(0.0); } else { - curr_block.afps.push_back(curr); curr_block.dvars.push_back(dvar); + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(dvar); } } - if (!curr_block.afps.empty()) blocks.push_back(curr_block); + if (!curr_block.afps.empty()) + blocks.push_back(curr_block); bool splitted = true; while (splitted && blocks.size() < (size_t)(max_twists + 1)) @@ -3433,16 +3526,25 @@ int flexalign_fatcat_main(double **xa, double **ya, if (blocks[b].afps.size() > 2) { double cur_rmsd = calc_block_rmsd(blocks[b].afps); - if (cur_rmsd > max_rmsd) { max_rmsd = cur_rmsd; target_b = b; } + if (cur_rmsd > max_rmsd) + { + max_rmsd = cur_rmsd; + target_b = b; + } } } if (max_rmsd >= cur_local_badRmsd && target_b != -1) { - double max_t = 0; int cut_idx = 0; + double max_t = 0; + int cut_idx = 0; for (size_t i = 1; i < blocks[target_b].afps.size(); i++) { - if (blocks[target_b].dvars[i] > max_t) { max_t = blocks[target_b].dvars[i]; cut_idx = i; } + if (blocks[target_b].dvars[i] > max_t) + { + max_t = blocks[target_b].dvars[i]; + cut_idx = i; + } } if (cut_idx > 0) @@ -3468,7 +3570,11 @@ int flexalign_fatcat_main(double **xa, double **ya, int b1 = (b > 0) ? blocks[b - 1].afps.back().i + blocks[b - 1].afps.back().len : 0; int b2 = (b > 0) ? blocks[b - 1].afps.back().j + blocks[b - 1].afps.back().len : 0; int span = std::min(e1 - b1, e2 - b2); - if (span < 2 * fragLen) { blocks.erase(blocks.begin() + b); b--; } + if (span < 2 * fragLen) + { + blocks.erase(blocks.begin() + b); + b--; + } } } @@ -3483,7 +3589,11 @@ int flexalign_fatcat_main(double **xa, double **ya, std::vector temp_merged = blocks[b].afps; temp_merged.insert(temp_merged.end(), blocks[b + 1].afps.begin(), blocks[b + 1].afps.end()); double cur_rmsd = calc_block_rmsd(temp_merged); - if (cur_rmsd < min_rmsd) { min_rmsd = cur_rmsd; min_b = b; } + if (cur_rmsd < min_rmsd) + { + min_rmsd = cur_rmsd; + min_b = b; + } } if (min_rmsd < cur_local_badRmsd && min_b != -1) @@ -3503,31 +3613,44 @@ int flexalign_fatcat_main(double **xa, double **ya, { FATCAT_AFP afp = blocks[b].afps[a]; int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); - if (skip >= afp.len) continue; + if (skip >= afp.len) + continue; - int eff_i = afp.i + skip; int eff_j = afp.j + skip; int eff_L = afp.len - skip; - if (b_s1 == -1) { b_s1 = eff_i; b_s2 = eff_j; } - b_e1 = eff_i + eff_L; b_e2 = eff_j + eff_L; - last_i = b_e1; last_j = b_e2; + int eff_i = afp.i + skip; + int eff_j = afp.j + skip; + int eff_L = afp.len - skip; + if (b_s1 == -1) + { + b_s1 = eff_i; + b_s2 = eff_j; + } + b_e1 = eff_i + eff_L; + b_e2 = eff_j + eff_L; + last_i = b_e1; + last_j = b_e2; } if (b_s1 != -1) { - if (b_e1 - b_s1 >= 4 && b_e2 - b_s2 >= 4) { + if (b_e1 - b_s1 >= 4 && b_e2 - b_s2 >= 4) + { Region r = {b_s1, b_e1, b_s2, b_e2}; real_blocks.push_back(r); } } } - if (real_blocks.empty()) return std::make_pair(ret_b1, ret_b2); + if (real_blocks.empty()) + return std::make_pair(ret_b1, ret_b2); - ret_b1.push_back(0); ret_b2.push_back(0); + ret_b1.push_back(0); + ret_b2.push_back(0); for (size_t k = 0; k < real_blocks.size() - 1; k++) { ret_b1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); ret_b2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); } - ret_b1.push_back(xlen); ret_b2.push_back(ylen); + ret_b1.push_back(xlen); + ret_b2.push_back(ylen); return std::make_pair(ret_b1, ret_b2); }; @@ -3545,24 +3668,25 @@ int flexalign_fatcat_main(double **xa, double **ya, // Loop through both bound sets, updating best_global_max_TM if we beat the -mm 9 defender for (size_t b_idx = 0; b_idx < all_bounds.size(); b_idx++) { - std::vector& bounds1 = all_bounds[b_idx].first; - std::vector& bounds2 = all_bounds[b_idx].second; + std::vector &bounds1 = all_bounds[b_idx].first; + std::vector &bounds2 = all_bounds[b_idx].second; // Skip if only one interval (block) is generated, as the full unbroken sequence // has already been processed by the baseline (-mm 9) above. - if (bounds1.size() <= 2) continue; + if (bounds1.size() <= 2) + continue; // ================== DEBUG START ================== // Output the interval mapping for the current boundary set // std::cout << "\n[DEBUG] --- Region Mapping Table ---" << std::endl; // std::cout << "[DEBUG] Mode: " << (b_idx == 0 ? "FATCAT Bounds" : "Strict Bounds") << std::endl; // std::cout << "[DEBUG] Total Blocks: " << (bounds1.size() - 1) << std::endl; - + // for (size_t k = 0; k < bounds1.size() - 1; k++) // { // std::cout << "[DEBUG] Block " << (k + 1) << ": " // << "Chain1 [" << bounds1[k] << " -> " << bounds1[k + 1] << "] <==> " - // << "Chain2 [" << bounds2[k] << " -> " << bounds2[k + 1] << "]" + // << "Chain2 [" << bounds2[k] << " -> " << bounds2[k + 1] << "]" // << std::endl; // } // std::cout << "[DEBUG] ----------------------------\n" << std::endl; @@ -3574,18 +3698,20 @@ int flexalign_fatcat_main(double **xa, double **ya, if (hinge_set) { - struct BlockMeta { + struct BlockMeta + { int index; double rmsd; }; std::vector valid_blocks; - + // Calculate target hinges to distribute based on requested hinge_opt and current implicit blocks int target_total_hinges = std::max(0, hinge_opt + 1 - num_blocks); - + // Calculate base amount of hinges per block int base_hinge = (hinge_opt + 1) / num_blocks - 1; - if (base_hinge < 0) base_hinge = 0; + if (base_hinge < 0) + base_hinge = 0; for (int k = 0; k < num_blocks; k++) { @@ -3606,9 +3732,14 @@ int flexalign_fatcat_main(double **xa, double **ya, double **p1, **p2; NewArray(&p1, min_L, 3); NewArray(&p2, min_L, 3); - for (int i = 0; i < min_L; i++) { - p1[i][0] = xa[bounds1[k] + i][0]; p1[i][1] = xa[bounds1[k] + i][1]; p1[i][2] = xa[bounds1[k] + i][2]; - p2[i][0] = ya[bounds2[k] + i][0]; p2[i][1] = ya[bounds2[k] + i][1]; p2[i][2] = ya[bounds2[k] + i][2]; + for (int i = 0; i < min_L; i++) + { + p1[i][0] = xa[bounds1[k] + i][0]; + p1[i][1] = xa[bounds1[k] + i][1]; + p1[i][2] = xa[bounds1[k] + i][2]; + p2[i][0] = ya[bounds2[k] + i][0]; + p2[i][1] = ya[bounds2[k] + i][1]; + p2[i][2] = ya[bounds2[k] + i][2]; } double rms_sum_sq, t_tmp[3], u_tmp[3][3]; Kabsch(p1, p2, min_L, 0, &rms_sum_sq, t_tmp, u_tmp); @@ -3628,10 +3759,9 @@ int flexalign_fatcat_main(double **xa, double **ya, if (remainder > 0 && !valid_blocks.empty()) { // Sort valid blocks by RMSD descending - std::sort(valid_blocks.begin(), valid_blocks.end(), [](const BlockMeta& a, const BlockMeta& b) { - return a.rmsd > b.rmsd; - }); - + std::sort(valid_blocks.begin(), valid_blocks.end(), [](const BlockMeta &a, const BlockMeta &b) + { return a.rmsd > b.rmsd; }); + int v_idx = 0; while (remainder > 0) { @@ -3662,11 +3792,15 @@ int flexalign_fatcat_main(double **xa, double **ya, { for (int i = 0; i < L1_sub; i++) { - cur_global_seqxA += seqx[x_s + i]; cur_global_seqyA += '-'; cur_global_seqM += ' '; + cur_global_seqxA += seqx[x_s + i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; } for (int i = 0; i < L2_sub; i++) { - cur_global_seqxA += '-'; cur_global_seqyA += seqy[y_s + i]; cur_global_seqM += ' '; + cur_global_seqxA += '-'; + cur_global_seqyA += seqy[y_s + i]; + cur_global_seqM += ' '; } continue; } @@ -3681,17 +3815,25 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int i = 0; i < L1_sub; i++) { - xa_sub[i][0] = xa[x_s + i][0]; xa_sub[i][1] = xa[x_s + i][1]; xa_sub[i][2] = xa[x_s + i][2]; - seqx_sub[i] = seqx[x_s + i]; secx_sub[i] = secx[x_s + i]; + xa_sub[i][0] = xa[x_s + i][0]; + xa_sub[i][1] = xa[x_s + i][1]; + xa_sub[i][2] = xa[x_s + i][2]; + seqx_sub[i] = seqx[x_s + i]; + secx_sub[i] = secx[x_s + i]; } - seqx_sub[L1_sub] = '\0'; secx_sub[L1_sub] = '\0'; + seqx_sub[L1_sub] = '\0'; + secx_sub[L1_sub] = '\0'; for (int i = 0; i < L2_sub; i++) { - ya_sub[i][0] = ya[y_s + i][0]; ya_sub[i][1] = ya[y_s + i][1]; ya_sub[i][2] = ya[y_s + i][2]; - seqy_sub[i] = seqy[y_s + i]; secy_sub[i] = secy[y_s + i]; + ya_sub[i][0] = ya[y_s + i][0]; + ya_sub[i][1] = ya[y_s + i][1]; + ya_sub[i][2] = ya[y_s + i][2]; + seqy_sub[i] = seqy[y_s + i]; + secy_sub[i] = secy[y_s + i]; } - seqy_sub[L2_sub] = '\0'; secy_sub[L2_sub] = '\0'; + seqy_sub[L2_sub] = '\0'; + secy_sub[L2_sub] = '\0'; double t0_best[3], u0_best[3][3]; double TM_best_max = -1.0; @@ -3729,9 +3871,12 @@ int flexalign_fatcat_main(double **xa, double **ya, for (int a = 0; a < 3; a++) { t0_best[a] = cur_res.t0[a]; - for (int b = 0; b < 3; b++) u0_best[a][b] = cur_res.u0[a][b]; + for (int b = 0; b < 3; b++) + u0_best[a][b] = cur_res.u0[a][b]; } - seqM_best = cur_res.seqM; seqxA_best = cur_res.seqxA; seqyA_best = cur_res.seqyA; + seqM_best = cur_res.seqM; + seqxA_best = cur_res.seqxA; + seqyA_best = cur_res.seqyA; tu_vec_best = cur_res.tu_vec; } } @@ -3740,14 +3885,22 @@ int flexalign_fatcat_main(double **xa, double **ya, { for (int i = 0; i < L1_sub; i++) { - cur_global_seqxA += seqx_sub[i]; cur_global_seqyA += '-'; cur_global_seqM += ' '; + cur_global_seqxA += seqx_sub[i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; } for (int i = 0; i < L2_sub; i++) { - cur_global_seqxA += '-'; cur_global_seqyA += seqy_sub[i]; cur_global_seqM += ' '; + cur_global_seqxA += '-'; + cur_global_seqyA += seqy_sub[i]; + cur_global_seqM += ' '; } - DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; continue; } @@ -3759,7 +3912,8 @@ int flexalign_fatcat_main(double **xa, double **ya, } int base_tu_idx = cur_tu_vec.size(); - for (size_t m = 0; m < tu_vec_best.size(); m++) cur_tu_vec.push_back(tu_vec_best[m]); + for (size_t m = 0; m < tu_vec_best.size(); m++) + cur_tu_vec.push_back(tu_vec_best[m]); int rx = x_s; int current_global_idx = base_tu_idx; @@ -3771,10 +3925,14 @@ int flexalign_fatcat_main(double **xa, double **ya, if (c != ' ' && c != '.' && c != ':') { int local_hinge_idx = -1; - if (c >= '0' && c <= '9') local_hinge_idx = c - '0'; - else if (c >= 'a' && c <= 'z') local_hinge_idx = c - 'a' + 10; - else if (c >= 'A' && c <= 'Z') local_hinge_idx = c - 'A' + 36; - if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) current_global_idx = base_tu_idx + local_hinge_idx; + if (c >= '0' && c <= '9') + local_hinge_idx = c - '0'; + else if (c >= 'a' && c <= 'z') + local_hinge_idx = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + local_hinge_idx = c - 'A' + 36; + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) + current_global_idx = base_tu_idx + local_hinge_idx; } if (seqxA_best[i] != '-') @@ -3788,10 +3946,14 @@ int flexalign_fatcat_main(double **xa, double **ya, if (c != ' ' && c != '.' && c != ':') { char global_c; - if (current_global_idx < 10) global_c = '0' + current_global_idx; - else if (current_global_idx < 36) global_c = 'a' + (current_global_idx - 10); - else if (current_global_idx < 62) global_c = 'A' + (current_global_idx - 36); - else global_c = '*'; + if (current_global_idx < 10) + global_c = '0' + current_global_idx; + else if (current_global_idx < 36) + global_c = 'a' + (current_global_idx - 10); + else if (current_global_idx < 62) + global_c = 'A' + (current_global_idx - 36); + else + global_c = '*'; seqM_best[i] = global_c; } else @@ -3799,28 +3961,41 @@ int flexalign_fatcat_main(double **xa, double **ya, seqM_best[i] = c; } } - else { seqM_best[i] = ' '; } + else + { + seqM_best[i] = ' '; + } } - cur_global_seqM += seqM_best; cur_global_seqxA += seqxA_best; cur_global_seqyA += seqyA_best; + cur_global_seqM += seqM_best; + cur_global_seqxA += seqxA_best; + cur_global_seqyA += seqyA_best; - DeleteArray(&xa_sub, L1_sub); DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; delete[] seqy_sub; delete[] secx_sub; delete[] secy_sub; + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; } // Step 6: Recalculate global metrics correctly for current DP boundary double cur_d0A = 1.24 * std::pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0A < 0.5) cur_d0A = 0.5; + if (cur_d0A < 0.5) + cur_d0A = 0.5; double cur_d0B = 1.24 * std::pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0B < 0.5) cur_d0B = 0.5; + if (cur_d0B < 0.5) + cur_d0B = 0.5; double cur_d0a = 1.24 * std::pow((xlen + ylen) * 0.5 - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0a < 0.5) cur_d0a = 0.5; - + if (cur_d0a < 0.5) + cur_d0a = 0.5; + double cur_d0u = 0.0; if (u_opt) { cur_d0u = 1.24 * std::pow(Lnorm_ass - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0u < 0.5) cur_d0u = 0.5; + if (cur_d0u < 0.5) + cur_d0u = 0.5; } double cur_TM1 = 0.0, cur_TM2 = 0.0, cur_TM3 = 0.0, cur_TM4 = 0.0, cur_TM5 = 0.0; @@ -3849,9 +4024,12 @@ int flexalign_fatcat_main(double **xa, double **ya, cur_TM2 += 1.0 / (1.0 + dist2 / (cur_d0B * cur_d0B)); cur_TM1 += 1.0 / (1.0 + dist2 / (cur_d0A * cur_d0A)); - if (a_opt) cur_TM3 += 1.0 / (1.0 + dist2 / (cur_d0a * cur_d0a)); - if (u_opt) cur_TM4 += 1.0 / (1.0 + dist2 / (cur_d0u * cur_d0u)); - if (d_opt) cur_TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); + if (a_opt) + cur_TM3 += 1.0 / (1.0 + dist2 / (cur_d0a * cur_d0a)); + if (u_opt) + cur_TM4 += 1.0 / (1.0 + dist2 / (cur_d0u * cur_d0u)); + if (d_opt) + cur_TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); cur_n_ali++; cur_do_vec.push_back(d); @@ -3860,60 +4038,113 @@ int flexalign_fatcat_main(double **xa, double **ya, { cur_rmsd0 += dist2; cur_n_ali8++; - if (seqx[i_res] == seqy[j_res]) cur_Liden += 1.0; + if (seqx[i_res] == seqy[j_res]) + cur_Liden += 1.0; } } - else { cur_do_vec.push_back(-1); } + else + { + cur_do_vec.push_back(-1); + } + } + else + { + cur_do_vec.push_back(-1); } - else { cur_do_vec.push_back(-1); } - if (x_valid) i_res++; - if (y_valid) j_res++; + if (x_valid) + i_res++; + if (y_valid) + j_res++; } // Normalize TM-scores cur_TM2 /= xlen; cur_TM1 /= ylen; - if (a_opt) cur_TM3 /= (xlen + ylen) * 0.5; - if (u_opt) cur_TM4 /= Lnorm_ass; - if (d_opt) cur_TM5 /= ylen; - if (cur_n_ali8 > 0) cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); - else cur_rmsd0 = 0.0; + if (a_opt) + cur_TM3 /= (xlen + ylen) * 0.5; + if (u_opt) + cur_TM4 /= Lnorm_ass; + if (d_opt) + cur_TM5 /= ylen; + if (cur_n_ali8 > 0) + cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); + else + cur_rmsd0 = 0.0; // Compare against the -mm 9 defender! double cur_global_max_TM = (cur_TM1 > cur_TM2) ? cur_TM1 : cur_TM2; if (cur_global_max_TM > best_global_max_TM) { + // <--- ADD DEBUG HERE + // if (b_idx == 1) + // { + // std::cout << "[DEBUG] strict" << std::endl; + // } + best_global_max_TM = cur_global_max_TM; best_tu_vec = cur_tu_vec; - best_TM1 = cur_TM1; best_TM2 = cur_TM2; best_TM3 = cur_TM3; best_TM4 = cur_TM4; best_TM5 = cur_TM5; - best_rmsd0 = cur_rmsd0; best_Liden = cur_Liden; best_TM_ali = cur_TM1; best_rmsd_ali = cur_rmsd0; - best_L_ali = cur_n_ali; best_n_ali = cur_n_ali; best_n_ali8 = cur_n_ali8; - best_seqM = cur_global_seqM; best_seqxA = cur_global_seqxA; best_seqyA = cur_global_seqyA; + best_TM1 = cur_TM1; + best_TM2 = cur_TM2; + best_TM3 = cur_TM3; + best_TM4 = cur_TM4; + best_TM5 = cur_TM5; + best_rmsd0 = cur_rmsd0; + best_Liden = cur_Liden; + best_TM_ali = cur_TM1; + best_rmsd_ali = cur_rmsd0; + best_L_ali = cur_n_ali; + best_n_ali = cur_n_ali; + best_n_ali8 = cur_n_ali8; + best_seqM = cur_global_seqM; + best_seqxA = cur_global_seqxA; + best_seqyA = cur_global_seqyA; best_do_vec = cur_do_vec; - best_d0A = cur_d0A; best_d0B = cur_d0B; best_d0a = cur_d0a; best_d0u = cur_d0u; + best_d0A = cur_d0A; + best_d0B = cur_d0B; + best_d0a = cur_d0a; + best_d0u = cur_d0u; - if (!best_tu_vec.empty()) { + if (!best_tu_vec.empty()) + { tu2t_u(best_tu_vec[0], best_t0, best_u0); } } } // Safety check - if (best_global_max_TM < 0) return 0; + if (best_global_max_TM < 0) + return 0; // Output best values back to the reference parameters - TM1 = best_TM1; TM2 = best_TM2; TM3 = best_TM3; TM4 = best_TM4; TM5 = best_TM5; - rmsd0 = best_rmsd0; Liden = best_Liden; TM_ali = best_TM_ali; rmsd_ali = best_rmsd_ali; - L_ali = best_L_ali; n_ali = best_n_ali; n_ali8 = best_n_ali8; - seqM = best_seqM; seqxA = best_seqxA; seqyA = best_seqyA; - do_vec = best_do_vec; tu_vec = best_tu_vec; - d0A = best_d0A; d0B = best_d0B; d0a = best_d0a; d0u = best_d0u; - - for (int a = 0; a < 3; a++) { + TM1 = best_TM1; + TM2 = best_TM2; + TM3 = best_TM3; + TM4 = best_TM4; + TM5 = best_TM5; + rmsd0 = best_rmsd0; + Liden = best_Liden; + TM_ali = best_TM_ali; + rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; + n_ali = best_n_ali; + n_ali8 = best_n_ali8; + seqM = best_seqM; + seqxA = best_seqxA; + seqyA = best_seqyA; + do_vec = best_do_vec; + tu_vec = best_tu_vec; + d0A = best_d0A; + d0B = best_d0B; + d0a = best_d0a; + d0u = best_d0u; + + for (int a = 0; a < 3; a++) + { t0[a] = best_t0[a]; - for (int b = 0; b < 3; b++) u0[a][b] = best_u0[a][b]; + for (int b = 0; b < 3; b++) + u0[a][b] = best_u0[a][b]; } return tu_vec.size(); @@ -4030,8 +4261,7 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, fatcat_res.TM_ali, fatcat_res.rmsd_ali, fatcat_res.n_ali, fatcat_res.n_ali8, xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt, 0, hinge_set - ); + mol_vec1[chain_i] + mol_vec2[chain_j], hinge_opt, ss_opt, 0, hinge_set); if (outfmt_opt == 0) print_version(); @@ -4187,16 +4417,16 @@ int main(int argc, char *argv[]) bool full_opt = false; // do not show chain level alignment double TMcut = -1; bool se_opt = false; - int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 - int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 - int ter_opt = -1; // default change to 2 (END, or different chainID) - int split_opt = -1; // default change to 2 (split each chains) - int outfmt_opt = 0; // set -outfmt to full output - bool fast_opt = false; // flags for -fast, fTM-align algorithm - int cp_opt = 0; // do not check circular permutation - int closeK_opt = -1; // number of atoms for SOI initial alignment. - // 5 and 0 for -mm 5 and 6 - int hinge_opt = 9; // maximum number of hinge allowed for flexible + int infmt1_opt = -1; // PDB or PDBx/mmCIF format for chain_1 + int infmt2_opt = -1; // PDB or PDBx/mmCIF format for chain_2 + int ter_opt = -1; // default change to 2 (END, or different chainID) + int split_opt = -1; // default change to 2 (split each chains) + int outfmt_opt = 0; // set -outfmt to full output + bool fast_opt = false; // flags for -fast, fTM-align algorithm + int cp_opt = 0; // do not check circular permutation + int closeK_opt = -1; // number of atoms for SOI initial alignment. + // 5 and 0 for -mm 5 and 6 + int hinge_opt = 9; // maximum number of hinge allowed for flexible bool hinge_set = false; int mirror_opt = 0; // do not align mirror int het_opt = 0; // do not read HETATM residues @@ -4669,15 +4899,20 @@ int main(int argc, char *argv[]) if (mm_opt) { - if (i_opt) PrintErrorAndQuit("-mm cannot be used with -i or -I"); - if (u_opt) PrintErrorAndQuit("-mm cannot be used with -u or -L"); - //if (cp_opt) PrintErrorAndQuit("-mm cannot be used with -cp"); - if (dir_opt.size() && mm_opt==2) PrintErrorAndQuit("-mm 2 cannot be used with -dir"); - if (byresi_opt) PrintErrorAndQuit("-mm cannot be used with -byresi"); - if (ter_opt>=2 && (mm_opt==1 || mm_opt==2)) PrintErrorAndQuit("-mm 1 or 2 must be used with -ter 0 or -ter 1"); - if (mm_opt==4 && (yname.size() || dir2_opt.size())) - cerr<<"WARNING! structure_2 is ignored for -mm 4"<= 2 && (mm_opt == 1 || mm_opt == 2)) + PrintErrorAndQuit("-mm 1 or 2 must be used with -ter 0 or -ter 1"); + if (mm_opt == 4 && (yname.size() || dir2_opt.size())) + cerr << "WARNING! structure_2 is ignored for -mm 4" << endl; + if (dirpair_opt.size() && (mm_opt == 2 || mm_opt == 4)) PrintErrorAndQuit("-mm 2 or 4 cannot be used with -dirpair"); } else if (full_opt) @@ -4754,46 +4989,49 @@ int main(int argc, char *argv[]) /* real alignment. entry functions are MMalign_main and * TMalign_main */ - if (mm_opt==0) TMalign(xname, yname, fname_super, fname_lign, fname_matrix, - sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, - u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, cp_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, se_opt, do_opt); - else if (mm_opt==1) - { - if (dir_opt.size()>0 || dir1_opt.size()>0 || dir2_opt.size()>0) + if (mm_opt == 0) + TMalign(xname, yname, fname_super, fname_lign, fname_matrix, + sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, + u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, + split_opt, outfmt_opt, fast_opt, cp_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, + dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, + byresi_opt, chain1_list, chain2_list, se_opt, do_opt); + else if (mm_opt == 1) + { + if (dir_opt.size() > 0 || dir1_opt.size() > 0 || dir2_opt.size() > 0) { - for (int ii=0; ii tmp_vec1(1, xname); - for (int jj=0; jj0 && jj<=ii) continue; + if (dir_opt.size() > 0 && jj <= ii) + continue; yname = chain2_list[jj]; vector tmp_vec2(1, yname); MMalign(xname, yname, fname_super, - fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, - a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, - ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, - het_opt, atom_opt, autojustify, mol_opt, - dir_opt+dir1_opt, dir_opt+dir2_opt, - chain2parse1, chain2parse2, model2parse1, model2parse2, - tmp_vec1, tmp_vec2, byresi_opt, chainmapfile, se_opt); + fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, + a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, + ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, + het_opt, atom_opt, autojustify, mol_opt, + dir_opt + dir1_opt, dir_opt + dir2_opt, + chain2parse1, chain2parse2, model2parse1, model2parse2, + tmp_vec1, tmp_vec2, byresi_opt, chainmapfile, se_opt); vector().swap(tmp_vec2); } vector().swap(tmp_vec1); } } - else if (dirpair_opt.size()==0) MMalign(xname, yname, fname_super, - fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, - a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, - ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir1_opt, dir2_opt, - chain2parse1, chain2parse2, model2parse1, model2parse2, - chain1_list, chain2_list, byresi_opt,chainmapfile, se_opt); + else if (dirpair_opt.size() == 0) + MMalign(xname, yname, fname_super, + fname_lign, fname_matrix, sequence, d0_scale, m_opt, o_opt, + a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt, + ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir1_opt, dir2_opt, + chain2parse1, chain2parse2, model2parse1, model2parse2, + chain1_list, chain2_list, byresi_opt, chainmapfile, se_opt); else { vector tmp_vec1; From e3646e9b4b743b481db40546c2628a6e0e444ac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Tue, 16 Jun 2026 10:23:47 +0800 Subject: [PATCH 22/23] rename symbol --- USalign.cpp | 74 ++++++++++++++++++++++++++--------------------------- flexalign.h | 2 +- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 77e78b9..25c9039 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -3486,7 +3486,7 @@ int flexalign_fatcat_main(double **xa, double **ya, std::vector afps; std::vector dvars; }; - std::vector blocks; + std::vector candidate_blocks; Block curr_block; curr_block.afps.push_back(merged_afps[path[0]]); curr_block.dvars.push_back(0.0); @@ -3499,7 +3499,7 @@ int flexalign_fatcat_main(double **xa, double **ya, if (dvar >= disCut) { - blocks.push_back(curr_block); + candidate_blocks.push_back(curr_block); curr_block.afps.clear(); curr_block.dvars.clear(); curr_block.afps.push_back(curr); @@ -3512,20 +3512,20 @@ int flexalign_fatcat_main(double **xa, double **ya, } } if (!curr_block.afps.empty()) - blocks.push_back(curr_block); + candidate_blocks.push_back(curr_block); bool splitted = true; - while (splitted && blocks.size() < (size_t)(max_twists + 1)) + while (splitted && candidate_blocks.size() < (size_t)(max_twists + 1)) { splitted = false; double max_rmsd = 0.0; int target_b = -1; - for (size_t b = 0; b < blocks.size(); b++) + for (size_t b = 0; b < candidate_blocks.size(); b++) { - if (blocks[b].afps.size() > 2) + if (candidate_blocks[b].afps.size() > 2) { - double cur_rmsd = calc_block_rmsd(blocks[b].afps); + double cur_rmsd = calc_block_rmsd(candidate_blocks[b].afps); if (cur_rmsd > max_rmsd) { max_rmsd = cur_rmsd; @@ -3538,11 +3538,11 @@ int flexalign_fatcat_main(double **xa, double **ya, { double max_t = 0; int cut_idx = 0; - for (size_t i = 1; i < blocks[target_b].afps.size(); i++) + for (size_t i = 1; i < candidate_blocks[target_b].afps.size(); i++) { - if (blocks[target_b].dvars[i] > max_t) + if (candidate_blocks[target_b].dvars[i] > max_t) { - max_t = blocks[target_b].dvars[i]; + max_t = candidate_blocks[target_b].dvars[i]; cut_idx = i; } } @@ -3550,44 +3550,44 @@ int flexalign_fatcat_main(double **xa, double **ya, if (cut_idx > 0) { Block right_blk; - right_blk.afps.assign(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); - right_blk.dvars.assign(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); + right_blk.afps.assign(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); + right_blk.dvars.assign(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); right_blk.dvars[0] = 0.0; - blocks[target_b].afps.erase(blocks[target_b].afps.begin() + cut_idx, blocks[target_b].afps.end()); - blocks[target_b].dvars.erase(blocks[target_b].dvars.begin() + cut_idx, blocks[target_b].dvars.end()); - blocks.insert(blocks.begin() + target_b + 1, right_blk); + candidate_blocks[target_b].afps.erase(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); + candidate_blocks[target_b].dvars.erase(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); + candidate_blocks.insert(candidate_blocks.begin() + target_b + 1, right_blk); splitted = true; } } } - for (int b = 0; b < (int)blocks.size(); b++) + for (int b = 0; b < (int)candidate_blocks.size(); b++) { - if (blocks[b].afps.size() <= 1) + if (candidate_blocks[b].afps.size() <= 1) { - int e1 = (b < (int)blocks.size() - 1) ? blocks[b + 1].afps.front().i : xlen; - int e2 = (b < (int)blocks.size() - 1) ? blocks[b + 1].afps.front().j : ylen; - int b1 = (b > 0) ? blocks[b - 1].afps.back().i + blocks[b - 1].afps.back().len : 0; - int b2 = (b > 0) ? blocks[b - 1].afps.back().j + blocks[b - 1].afps.back().len : 0; + int e1 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().i : xlen; + int e2 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().j : ylen; + int b1 = (b > 0) ? candidate_blocks[b - 1].afps.back().i + candidate_blocks[b - 1].afps.back().len : 0; + int b2 = (b > 0) ? candidate_blocks[b - 1].afps.back().j + candidate_blocks[b - 1].afps.back().len : 0; int span = std::min(e1 - b1, e2 - b2); if (span < 2 * fragLen) { - blocks.erase(blocks.begin() + b); + candidate_blocks.erase(candidate_blocks.begin() + b); b--; } } } bool merged = true; - while (merged && blocks.size() > 1) + while (merged && candidate_blocks.size() > 1) { merged = false; double min_rmsd = 1e9; int min_b = -1; - for (size_t b = 0; b < blocks.size() - 1; b++) + for (size_t b = 0; b < candidate_blocks.size() - 1; b++) { - std::vector temp_merged = blocks[b].afps; - temp_merged.insert(temp_merged.end(), blocks[b + 1].afps.begin(), blocks[b + 1].afps.end()); + std::vector temp_merged = candidate_blocks[b].afps; + temp_merged.insert(temp_merged.end(), candidate_blocks[b + 1].afps.begin(), candidate_blocks[b + 1].afps.end()); double cur_rmsd = calc_block_rmsd(temp_merged); if (cur_rmsd < min_rmsd) { @@ -3598,20 +3598,20 @@ int flexalign_fatcat_main(double **xa, double **ya, if (min_rmsd < cur_local_badRmsd && min_b != -1) { - blocks[min_b].afps.insert(blocks[min_b].afps.end(), blocks[min_b + 1].afps.begin(), blocks[min_b + 1].afps.end()); - blocks.erase(blocks.begin() + min_b + 1); + candidate_blocks[min_b].afps.insert(candidate_blocks[min_b].afps.end(), candidate_blocks[min_b + 1].afps.begin(), candidate_blocks[min_b + 1].afps.end()); + candidate_blocks.erase(candidate_blocks.begin() + min_b + 1); merged = true; } } - std::vector real_blocks; + std::vector fatcat_domains; int last_i = 0, last_j = 0; - for (size_t b = 0; b < blocks.size(); b++) + for (size_t b = 0; b < candidate_blocks.size(); b++) { int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; - for (size_t a = 0; a < blocks[b].afps.size(); a++) + for (size_t a = 0; a < candidate_blocks[b].afps.size(); a++) { - FATCAT_AFP afp = blocks[b].afps[a]; + FATCAT_AFP afp = candidate_blocks[b].afps[a]; int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); if (skip >= afp.len) continue; @@ -3634,20 +3634,20 @@ int flexalign_fatcat_main(double **xa, double **ya, if (b_e1 - b_s1 >= 4 && b_e2 - b_s2 >= 4) { Region r = {b_s1, b_e1, b_s2, b_e2}; - real_blocks.push_back(r); + fatcat_domains.push_back(r); } } } - if (real_blocks.empty()) + if (fatcat_domains.empty()) return std::make_pair(ret_b1, ret_b2); ret_b1.push_back(0); ret_b2.push_back(0); - for (size_t k = 0; k < real_blocks.size() - 1; k++) + for (size_t k = 0; k < fatcat_domains.size() - 1; k++) { - ret_b1.push_back((real_blocks[k].e1 + real_blocks[k + 1].s1) / 2); - ret_b2.push_back((real_blocks[k].e2 + real_blocks[k + 1].s2) / 2); + ret_b1.push_back((fatcat_domains[k].e1 + fatcat_domains[k + 1].s1) / 2); + ret_b2.push_back((fatcat_domains[k].e2 + fatcat_domains[k + 1].s2) / 2); } ret_b1.push_back(xlen); ret_b2.push_back(ylen); diff --git a/flexalign.h b/flexalign.h index 9423d7d..0eb1d1c 100644 --- a/flexalign.h +++ b/flexalign.h @@ -1999,7 +1999,7 @@ void output_flexalign_results(const string xname, const string yname, printf("(You should use TM-score normalized by length of the reference structure)\n"); // output alignment - printf("\n([0-9] denote different aligned fragment pairs separated by different hinges)\n"); + printf("\n([0-9,a-z,A-Z] denote different aligned fragment pairs separated by different hinges)\n"); printf("%s\n", seqxA); printf("%s\n", seqM); printf("%s\n", seqyA); From 9b42a79f90d163eac16fbc23cfa4a3efa17b883d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E4=BA=91=E7=AF=AA?= Date: Mon, 29 Jun 2026 17:25:21 +0800 Subject: [PATCH 23/23] merge all flexalign to -mm 7 --- USalign.cpp | 1369 ++------------------------------------------------- flexalign.h | 1276 +++++++++++++++++++++++++++++++++++++++++++++++ param_set.h | 112 +++-- 3 files changed, 1376 insertions(+), 1381 deletions(-) diff --git a/USalign.cpp b/USalign.cpp index 25c9039..9b0933b 100644 --- a/USalign.cpp +++ b/USalign.cpp @@ -94,6 +94,9 @@ void print_extra_help() "\n" " -hinge Maximum number of hinge allowed in flexible alignment. default: 9\n" "\n" + " -fatcat Enable FATCAT-based alignment mechanism. Only functional\n" + " when used in combination with '-mm 7' (calls flexalign_fatcat).\n" + "\n" " -se Do not perform superposition. Useful for extracting alignment from\n" " superposed structure pairs\n" "\n" @@ -2853,1304 +2856,7 @@ int SOIalign(string &xname, string &yname, const string &fname_super, return 0; } -// ======================================================================= -// Data structures and Helpers for flexalign unified pipeline -// ======================================================================= - -// Data structure to hold outputs of flexalign_main to avoid parameter clutter -struct FlexAlignResult -{ - double t0[3]; - double u0[3][3]; - vector> tu_vec; - double TM1, TM2, TM3, TM4, TM5; - double d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out; - string seqM, seqxA, seqyA; - vector do_vec; - double rmsd0, Liden, TM_ali, rmsd_ali; - int L_ali, n_ali, n_ali8, hingeNum; - - FlexAlignResult() : TM1(-1.0), TM2(-1.0), TM3(-1.0), TM4(-1.0), TM5(-1.0), - d0_0(0.0), TM_0(0.0), d0A(0.0), d0B(0.0), d0u(0.0), d0a(0.0), d0_out(5.0), - rmsd0(0.0), Liden(0.0), TM_ali(0.0), rmsd_ali(0.0), - L_ali(0), n_ali(0), n_ali8(0), hingeNum(0) - { - for (int i = 0; i < 3; i++) - { - t0[i] = 0.0; - for (int j = 0; j < 3; j++) - u0[i][j] = (i == j) ? 1.0 : 0.0; - } - } -}; - -enum FlexAlignMode -{ - FLEX_STANDARD = 0, - FLEX_BEST = 1, - FLEX_FATCAT = 2 -}; - -// Encapsulates the execution of flexalign_main and its fallback refinement logic -void execute_flexalign_with_fallback( - double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy, - int xlen, int ylen, vector &sequence, const double Lnorm_ass, const double d0_scale, - const int i_opt, const int a_opt, const bool u_opt, const bool d_opt, const bool force_fast_opt, - const int mol_type, const int hinge_opt, const int ss_opt, FlexAlignResult &res) -{ - res.hingeNum = flexalign_main( - xa, ya, seqx, seqy, secx, secy, - res.t0, res.u0, res.tu_vec, res.TM1, res.TM2, res.TM3, res.TM4, res.TM5, - res.d0_0, res.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res.d0_out, - res.seqM, res.seqxA, res.seqyA, res.do_vec, - res.rmsd0, res.L_ali, res.Liden, res.TM_ali, res.rmsd_ali, res.n_ali, res.n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_type, hinge_opt, ss_opt); - - // Fallback compensation when too few hinges are found - if (hinge_opt && res.hingeNum <= 1 && res.n_ali8 < 0.6 * getmin(xlen, ylen)) - { - FlexAlignResult res_h; - res_h.tu_vec.push_back(res.tu_vec[0]); - tu2t_u(res.tu_vec[0], res_h.t0, res_h.u0); - - res_h.hingeNum = flexalign_main( - xa, ya, seqx, seqy, secx, secy, - res_h.t0, res_h.u0, res_h.tu_vec, - res_h.TM1, res_h.TM2, res_h.TM3, res_h.TM4, res_h.TM5, - res_h.d0_0, res_h.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res_h.d0_out, - res_h.seqM, res_h.seqxA, res_h.seqyA, res_h.do_vec, - res_h.rmsd0, res_h.L_ali, res_h.Liden, res_h.TM_ali, res_h.rmsd_ali, - res_h.n_ali, res_h.n_ali8, - xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, - a_opt, u_opt, d_opt, force_fast_opt, - mol_type, hinge_opt, ss_opt); - - double TM = (res.TM1 > res.TM2) ? res.TM1 : res.TM2; - double TM_h = (res_h.TM1 > res_h.TM2) ? res_h.TM1 : res_h.TM2; - if (TM_h > TM) - { - res = res_h; // Safely overwrite with the better refined results - } - } -} - -// ========================================== -// FATCAT Core Algorithm (flexalign_fatcat_main) -// ========================================== -struct FATCAT_AFP -{ - int i, j, len; - double score; - double R[3][3]; - double t[3]; -}; - -int flexalign_fatcat_main(double **xa, double **ya, - const char *seqx, const char *seqy, const char *secx, const char *secy, - double t0[3], double u0[3][3], std::vector> &tu_vec, - double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, - double &d0_0, double &TM_0, - double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, - std::string &seqM, std::string &seqxA, std::string &seqyA, std::vector &do_vec, - double &rmsd0, int &L_ali, double &Liden, - double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, - const int xlen, const int ylen, - const std::vector sequence, const double Lnorm_ass, - const double d0_scale, const int i_opt, const int a_opt, - const bool u_opt, const bool d_opt, const bool fast_opt, - const int mol_type, const int hinge_opt, const int ss_opt, - int sparse_val = 0, bool hinge_set = false) -{ - // ========================================== - // TRUE -mm 9 BASELINE (Defender) - // Run full sequence without generate_bounds slicing! - // This perfectly simulates FLEX_BEST (-mm 9) behavior. - // ========================================== - double best_global_max_TM = -1.0; - std::vector> best_tu_vec; - double best_t0[3], best_u0[3][3]; - double best_TM1 = 0.0, best_TM2 = 0.0, best_TM3 = 0.0, best_TM4 = 0.0, best_TM5 = 0.0; - double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; - int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; - std::string best_seqM = "", best_seqxA = "", best_seqyA = ""; - std::vector best_do_vec; - double best_d0A = 0.0, best_d0B = 0.0, best_d0a = 0.0, best_d0u = 0.0; - - bool force_fast_opt_global = (std::min(xlen, ylen) > 1500) ? true : fast_opt; - std::vector local_sequence = sequence; - - for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) - { - FlexAlignResult base_res; - // Pass full unbroken sequences directly to flexalign (identical to -mm 9) - execute_flexalign_with_fallback( - xa, ya, (char *)seqx, (char *)seqy, (char *)secx, (char *)secy, - xlen, ylen, local_sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt_global, - mol_type, hinge_opt, cur_ss_opt, base_res); // -mm 9 explicitly uses 9 hinges - - double cur_max_TM = (base_res.TM1 > base_res.TM2) ? base_res.TM1 : base_res.TM2; - if (cur_max_TM > best_global_max_TM) - { - best_global_max_TM = cur_max_TM; - for (int a = 0; a < 3; a++) - { - best_t0[a] = base_res.t0[a]; - for (int b = 0; b < 3; b++) - best_u0[a][b] = base_res.u0[a][b]; - } - best_tu_vec = base_res.tu_vec; - best_TM1 = base_res.TM1; - best_TM2 = base_res.TM2; - best_TM3 = base_res.TM3; - best_TM4 = base_res.TM4; - best_TM5 = base_res.TM5; - best_rmsd0 = base_res.rmsd0; - best_Liden = base_res.Liden; - best_TM_ali = base_res.TM_ali; - best_rmsd_ali = base_res.rmsd_ali; - best_L_ali = base_res.L_ali; - best_n_ali = base_res.n_ali; - best_n_ali8 = base_res.n_ali8; - best_seqM = base_res.seqM; - best_seqxA = base_res.seqxA; - best_seqyA = base_res.seqyA; - best_do_vec = base_res.do_vec; - best_d0A = base_res.d0A; - best_d0B = base_res.d0B; - best_d0a = base_res.d0a; - best_d0u = base_res.d0u; - } - } - - // Early exit if the true -mm 9 baseline is already excellent - if (best_global_max_TM >= 0.85) - { - // <--- ADD DEBUG HERE: Output early exit confirmation - // std::cout << "[DEBUG] MM9" << std::endl; - - TM1 = best_TM1; - TM2 = best_TM2; - TM3 = best_TM3; - TM4 = best_TM4; - TM5 = best_TM5; - rmsd0 = best_rmsd0; - Liden = best_Liden; - TM_ali = best_TM_ali; - rmsd_ali = best_rmsd_ali; - L_ali = best_L_ali; - n_ali = best_n_ali; - n_ali8 = best_n_ali8; - seqM = best_seqM; - seqxA = best_seqxA; - seqyA = best_seqyA; - do_vec = best_do_vec; - tu_vec = best_tu_vec; - d0A = best_d0A; - d0B = best_d0B; - d0a = best_d0a; - d0u = best_d0u; - for (int a = 0; a < 3; a++) - { - t0[a] = best_t0[a]; - for (int b = 0; b < 3; b++) - u0[a][b] = best_u0[a][b]; - } - return tu_vec.size(); - } - - // ========================================== - // Proceed to FATCAT sliced bounds logic... - // ========================================== - - // FATCAT base parameters - int fragLen = 8; - double resScore = 3.0; - double gap_ext = -0.5; - double disCut = 5.0; - double disSmooth = 4.0; - double twist_pen = -25.0; - int max_gap = 40; - double max_penalty = -5.0; - int misCut = 2 * fragLen; - int maxGapFrag = fragLen + max_gap; - double afp_dis_cut = fragLen * fragLen * (disCut * disCut); - int max_twists = hinge_opt; - - // OPTIMIZATION 1: Precompute local intra-protein distance matrices - int max_dist_window = max_gap + 2 * fragLen + 1; - std::vector> disTable1(xlen, std::vector(max_dist_window, 0.0)); - std::vector> disTable2(ylen, std::vector(max_dist_window, 0.0)); - - for (int i = 0; i < xlen; i++) - { - for (int j = i; j < std::min(xlen, i + max_dist_window); j++) - disTable1[i][j - i] = std::sqrt(dist(xa[i], xa[j])); - } - for (int i = 0; i < ylen; i++) - { - for (int j = i; j < std::min(ylen, i + max_dist_window); j++) - disTable2[i][j - i] = std::sqrt(dist(ya[i], ya[j])); - } - - // Wrapper for generating bounds - auto generate_bounds = [&](double cur_rmsdCut, double cur_badRmsd, double cur_local_badRmsd) -> std::pair, std::vector> - { - // Step 1: Extract initial AFPs in batches - std::vector initial_afps; - int step = sparse_val + 1; - - double r1_static[8][3], r2_static[8][3]; - double *r1[8], *r2[8]; - for (int k = 0; k < 8; k++) - { - r1[k] = r1_static[k]; - r2[k] = r2_static[k]; - } - - for (int i = 0; i <= xlen - fragLen; i += step) - { - for (int j = 0; j <= ylen - fragLen; j += step) - { - int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen - 1), ylen - (j + fragLen)) + fragLen; - if (d3_term < 0.3 * std::min(xlen, ylen)) - continue; - - double dist1 = disTable1[i][fragLen - 1]; - double dist2 = disTable2[j][fragLen - 1]; - - if (std::fabs(dist1 - dist2) > 2.0 * cur_rmsdCut) - continue; - - for (int k = 0; k < fragLen; k++) - { - r1[k][0] = xa[i + k][0]; - r1[k][1] = xa[i + k][1]; - r1[k][2] = xa[i + k][2]; - r2[k][0] = ya[j + k][0]; - r2[k][1] = ya[j + k][1]; - r2[k][2] = ya[j + k][2]; - } - - double rms_sum_sq, t_tmp[3], u_tmp[3][3]; - Kabsch(r1, r2, fragLen, 0, &rms_sum_sq, t_tmp, u_tmp); - double rmsd_tmp = std::sqrt(rms_sum_sq / fragLen); - - if (rmsd_tmp < cur_rmsdCut) - { - FATCAT_AFP afp; - afp.i = i; - afp.j = j; - afp.len = fragLen; - afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); - for (int a = 0; a < 3; a++) - { - afp.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) - afp.R[a][b] = u_tmp[a][b]; - } - initial_afps.push_back(afp); - } - } - } - - // Step 2: Merge diagonal AFPs - int max_diagonal_idx = xlen + ylen + 1; - std::vector> diagonals(max_diagonal_idx); - for (size_t k = 0; k < initial_afps.size(); k++) - { - diagonals[initial_afps[k].i - initial_afps[k].j + ylen].push_back(initial_afps[k]); - } - - std::vector merged_afps; - int max_merge_len = std::min(xlen, ylen); - double **r1_merge, **r2_merge; - NewArray(&r1_merge, max_merge_len, 3); - NewArray(&r2_merge, max_merge_len, 3); - - for (int d = 0; d < max_diagonal_idx; d++) - { - if (diagonals[d].empty()) - continue; - std::vector &group = diagonals[d]; - - std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) - { return a.i < b.i; }); - - int n_group = group.size(); - std::vector invalid(n_group, false); - for (int idx = 0; idx < n_group; idx++) - { - if (invalid[idx]) - continue; - FATCAT_AFP curr = group[idx]; - for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) - { - FATCAT_AFP nxt = group[nxt_idx]; - if (nxt.i > curr.i + curr.len) - break; - - if (nxt.i + nxt.len > curr.i + curr.len) - { - int new_len = (nxt.i + nxt.len) - curr.i; - - for (int k = 0; k < new_len; k++) - { - r1_merge[k][0] = xa[curr.i + k][0]; - r1_merge[k][1] = xa[curr.i + k][1]; - r1_merge[k][2] = xa[curr.i + k][2]; - r2_merge[k][0] = ya[curr.j + k][0]; - r2_merge[k][1] = ya[curr.j + k][1]; - r2_merge[k][2] = ya[curr.j + k][2]; - } - - double rms_sum_sq, t_tmp[3], u_tmp[3][3]; - Kabsch(r1_merge, r2_merge, new_len, 0, &rms_sum_sq, t_tmp, u_tmp); - double rmsd_tmp = std::sqrt(rms_sum_sq / new_len); - - if (rmsd_tmp < cur_rmsdCut) - { - curr.len = new_len; - for (int a = 0; a < 3; a++) - { - curr.t[a] = t_tmp[a]; - for (int b = 0; b < 3; b++) - curr.R[a][b] = u_tmp[a][b]; - } - curr.score = resScore * new_len * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); - invalid[nxt_idx] = true; - } - } - } - merged_afps.push_back(curr); - } - } - DeleteArray(&r1_merge, max_merge_len); - DeleteArray(&r2_merge, max_merge_len); - - std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) - { - if (a.i == b.i) return a.j < b.j; - return a.i < b.i; }); - - int n_afps = merged_afps.size(); - std::vector ret_b1, ret_b2; - if (n_afps == 0) - return std::make_pair(ret_b1, ret_b2); - - // Step 3 & 4: Dual Dynamic Programming and Domain Splitting - std::vector afp_aft_index(xlen * ylen, -1); - std::vector afp_bef_index(xlen * ylen, -1); - - std::vector>> i_to_j(xlen); - for (int m = 0; m < n_afps; m++) - { - i_to_j[merged_afps[m].i].push_back(std::make_pair(merged_afps[m].j, m)); - } - - for (int i_val = 0; i_val < xlen; i_val++) - { - if (i_to_j[i_val].empty()) - continue; - for (size_t p = 0; p < i_to_j[i_val].size(); p++) - { - int j_val = i_to_j[i_val][p].first; - afp_aft_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; - afp_bef_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; - } - int curr_bef = -1; - for (int j_val = 0; j_val < ylen; j_val++) - { - if (afp_bef_index[i_val * ylen + j_val] != -1) - curr_bef = afp_bef_index[i_val * ylen + j_val]; - else - afp_bef_index[i_val * ylen + j_val] = curr_bef; - } - int curr_aft = -1; - for (int j_val = ylen - 1; j_val >= 0; j_val--) - { - if (afp_aft_index[i_val * ylen + j_val] != -1) - curr_aft = afp_aft_index[i_val * ylen + j_val]; - else - afp_aft_index[i_val * ylen + j_val] = curr_aft; - } - } - - auto get_dvar = [&](const FATCAT_AFP &prv, const FATCAT_AFP &curr) -> double - { - double rms_sq = 0; - for (int i_idx = 0; i_idx < fragLen; i_idx++) - { - for (int j_idx = 0; j_idx < fragLen; j_idx++) - { - double dist1, dist2; - int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; - if (idx1_a >= idx1_b) - dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else - dist1 = disTable1[idx1_a][idx1_b - idx1_a]; - - int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; - if (idx2_a >= idx2_b) - dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else - dist2 = disTable2[idx2_a][idx2_b - idx2_a]; - - rms_sq += (dist1 - dist2) * (dist1 - dist2); - } - } - if (rms_sq > afp_dis_cut) - return 1e9; - return std::sqrt(rms_sq / (fragLen * fragLen)); - }; - - auto calc_block_rmsd = [&](const std::vector &afp_list) -> double - { - std::vector r1, r2; - for (size_t a = 0; a < afp_list.size(); a++) - { - for (int l = 0; l < afp_list[a].len; l++) - { - r1.push_back(afp_list[a].i + l); - r2.push_back(afp_list[a].j + l); - } - } - int n = r1.size(); - if (n < 3) - return 0.0; - double **p1; - NewArray(&p1, n, 3); - double **p2; - NewArray(&p2, n, 3); - for (int i = 0; i < n; i++) - { - p1[i][0] = xa[r1[i]][0]; - p1[i][1] = xa[r1[i]][1]; - p1[i][2] = xa[r1[i]][2]; - p2[i][0] = ya[r2[i]][0]; - p2[i][1] = ya[r2[i]][1]; - p2[i][2] = ya[r2[i]][2]; - } - double rms_sq_sum, t_tmp[3], u_tmp[3][3]; - Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); - DeleteArray(&p1, n); - DeleteArray(&p2, n); - return std::sqrt(rms_sq_sum / n); - }; - - struct Region - { - int s1, e1, s2, e2; - }; - - std::vector sco(n_afps); - std::vector twi(n_afps, 0); - std::vector pre(n_afps, -1); - for (int m = 0; m < n_afps; m++) - sco[m] = merged_afps[m].score; - - for (int m = 0; m < n_afps; m++) - { - int curr_i = merged_afps[m].i; - int curr_j = merged_afps[m].j; - int a3 = curr_i - fragLen; - int a2 = std::max(0, a3 - misCut); - int a1 = std::max(0, curr_i - maxGapFrag); - int b3 = curr_j - fragLen; - int b2 = std::max(0, b3 - misCut); - int b1 = std::max(0, curr_j - maxGapFrag); - - std::vector valid_prevs; - for (int st = 0; st < 2; st++) - { - int a_s, a_e, b_s, b_e; - if (st == 0) - { - a_s = std::max(a1, 0); - a_e = std::min(a3, xlen - 1); - b_s = std::max(b2, 0); - b_e = std::min(b3, ylen - 1); - } - else - { - a_s = std::max(a2, 0); - a_e = std::min(a3, xlen - 1); - b_s = std::max(b1, 0); - b_e = std::min(b2 - 1, ylen - 1); - } - - if (b_s >= ylen || b_e < 0) - continue; - for (int prev_i = a_s; prev_i <= a_e; prev_i++) - { - int s1 = afp_aft_index[prev_i * ylen + b_s]; - int s2 = afp_bef_index[prev_i * ylen + b_e]; - if (s1 != -1 && s2 != -1 && s1 <= s2) - for (int s = s1; s <= s2; s++) - valid_prevs.push_back(s); - } - } - - double curr_sco = merged_afps[m].score; - for (size_t v = 0; v < valid_prevs.size(); v++) - { - int prev = valid_prevs[v]; - int prev_twi = twi[prev]; - if (prev_twi > max_twists) - continue; - - int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); - int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); - int m_gap = std::max(gap_i, gap_j); - - double gp = 0.0; - int m_mis = 0; - if (gap_i < 0 || gap_j < 0) - m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; - gp = gap_ext * m_mis; - if (m_gap > 0) - gp += gap_ext * m_gap; - if (gp < max_penalty) - gp = max_penalty; - - double rms_sq = 0; - for (int k = 0; k < fragLen; k++) - { - for (int l = 0; l < fragLen; l++) - { - double dist1, dist2; - int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; - if (idx1_a >= idx1_b) - dist1 = disTable1[idx1_b][idx1_a - idx1_b]; - else - dist1 = disTable1[idx1_a][idx1_b - idx1_a]; - - int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; - if (idx2_a >= idx2_b) - dist2 = disTable2[idx2_b][idx2_a - idx2_b]; - else - dist2 = disTable2[idx2_a][idx2_b - idx2_a]; - - rms_sq += (dist1 - dist2) * (dist1 - dist2); - } - } - - double tp = 0.0; - int is_twist = 0; - if (rms_sq >= afp_dis_cut) - { - tp = twist_pen; - is_twist = 1; - } - else - { - double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); - if (dvar > disCut - disSmooth) - tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); - } - - if (prev_twi + is_twist > max_twists) - continue; - - double stmp = sco[prev] + curr_sco + tp + gp; - if (stmp > sco[m]) - { - sco[m] = stmp; - pre[m] = prev; - twi[m] = prev_twi + is_twist; - } - } - } - - int best_m = 0; - for (int m = 1; m < n_afps; m++) - if (sco[m] > sco[best_m]) - best_m = m; - - std::vector path; - int curr_m = best_m; - while (curr_m != -1) - { - path.push_back(curr_m); - curr_m = pre[curr_m]; - } - std::reverse(path.begin(), path.end()); - - if (path.empty()) - return std::make_pair(ret_b1, ret_b2); - - struct Block - { - std::vector afps; - std::vector dvars; - }; - std::vector candidate_blocks; - Block curr_block; - curr_block.afps.push_back(merged_afps[path[0]]); - curr_block.dvars.push_back(0.0); - - for (size_t k = 1; k < path.size(); k++) - { - FATCAT_AFP curr = merged_afps[path[k]]; - FATCAT_AFP prv = merged_afps[path[k - 1]]; - double dvar = get_dvar(prv, curr); - - if (dvar >= disCut) - { - candidate_blocks.push_back(curr_block); - curr_block.afps.clear(); - curr_block.dvars.clear(); - curr_block.afps.push_back(curr); - curr_block.dvars.push_back(0.0); - } - else - { - curr_block.afps.push_back(curr); - curr_block.dvars.push_back(dvar); - } - } - if (!curr_block.afps.empty()) - candidate_blocks.push_back(curr_block); - - bool splitted = true; - while (splitted && candidate_blocks.size() < (size_t)(max_twists + 1)) - { - splitted = false; - double max_rmsd = 0.0; - int target_b = -1; - - for (size_t b = 0; b < candidate_blocks.size(); b++) - { - if (candidate_blocks[b].afps.size() > 2) - { - double cur_rmsd = calc_block_rmsd(candidate_blocks[b].afps); - if (cur_rmsd > max_rmsd) - { - max_rmsd = cur_rmsd; - target_b = b; - } - } - } - - if (max_rmsd >= cur_local_badRmsd && target_b != -1) - { - double max_t = 0; - int cut_idx = 0; - for (size_t i = 1; i < candidate_blocks[target_b].afps.size(); i++) - { - if (candidate_blocks[target_b].dvars[i] > max_t) - { - max_t = candidate_blocks[target_b].dvars[i]; - cut_idx = i; - } - } - - if (cut_idx > 0) - { - Block right_blk; - right_blk.afps.assign(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); - right_blk.dvars.assign(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); - right_blk.dvars[0] = 0.0; - candidate_blocks[target_b].afps.erase(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); - candidate_blocks[target_b].dvars.erase(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); - candidate_blocks.insert(candidate_blocks.begin() + target_b + 1, right_blk); - splitted = true; - } - } - } - - for (int b = 0; b < (int)candidate_blocks.size(); b++) - { - if (candidate_blocks[b].afps.size() <= 1) - { - int e1 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().i : xlen; - int e2 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().j : ylen; - int b1 = (b > 0) ? candidate_blocks[b - 1].afps.back().i + candidate_blocks[b - 1].afps.back().len : 0; - int b2 = (b > 0) ? candidate_blocks[b - 1].afps.back().j + candidate_blocks[b - 1].afps.back().len : 0; - int span = std::min(e1 - b1, e2 - b2); - if (span < 2 * fragLen) - { - candidate_blocks.erase(candidate_blocks.begin() + b); - b--; - } - } - } - - bool merged = true; - while (merged && candidate_blocks.size() > 1) - { - merged = false; - double min_rmsd = 1e9; - int min_b = -1; - for (size_t b = 0; b < candidate_blocks.size() - 1; b++) - { - std::vector temp_merged = candidate_blocks[b].afps; - temp_merged.insert(temp_merged.end(), candidate_blocks[b + 1].afps.begin(), candidate_blocks[b + 1].afps.end()); - double cur_rmsd = calc_block_rmsd(temp_merged); - if (cur_rmsd < min_rmsd) - { - min_rmsd = cur_rmsd; - min_b = b; - } - } - - if (min_rmsd < cur_local_badRmsd && min_b != -1) - { - candidate_blocks[min_b].afps.insert(candidate_blocks[min_b].afps.end(), candidate_blocks[min_b + 1].afps.begin(), candidate_blocks[min_b + 1].afps.end()); - candidate_blocks.erase(candidate_blocks.begin() + min_b + 1); - merged = true; - } - } - - std::vector fatcat_domains; - int last_i = 0, last_j = 0; - for (size_t b = 0; b < candidate_blocks.size(); b++) - { - int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; - for (size_t a = 0; a < candidate_blocks[b].afps.size(); a++) - { - FATCAT_AFP afp = candidate_blocks[b].afps[a]; - int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); - if (skip >= afp.len) - continue; - - int eff_i = afp.i + skip; - int eff_j = afp.j + skip; - int eff_L = afp.len - skip; - if (b_s1 == -1) - { - b_s1 = eff_i; - b_s2 = eff_j; - } - b_e1 = eff_i + eff_L; - b_e2 = eff_j + eff_L; - last_i = b_e1; - last_j = b_e2; - } - if (b_s1 != -1) - { - if (b_e1 - b_s1 >= 4 && b_e2 - b_s2 >= 4) - { - Region r = {b_s1, b_e1, b_s2, b_e2}; - fatcat_domains.push_back(r); - } - } - } - - if (fatcat_domains.empty()) - return std::make_pair(ret_b1, ret_b2); - - ret_b1.push_back(0); - ret_b2.push_back(0); - for (size_t k = 0; k < fatcat_domains.size() - 1; k++) - { - ret_b1.push_back((fatcat_domains[k].e1 + fatcat_domains[k + 1].s1) / 2); - ret_b2.push_back((fatcat_domains[k].e2 + fatcat_domains[k + 1].s2) / 2); - } - ret_b1.push_back(xlen); - ret_b2.push_back(ylen); - - return std::make_pair(ret_b1, ret_b2); - }; - - auto bounds_fatcat = generate_bounds(3.0, 4.0, 4.0); - auto bounds_strict = generate_bounds(2.0, 3.0, 2.0); - - std::vector, std::vector>> all_bounds; - all_bounds.push_back(bounds_fatcat); - if (bounds_strict.first != bounds_fatcat.first || bounds_strict.second != bounds_fatcat.second) - { - all_bounds.push_back(bounds_strict); - } - - // Loop through both bound sets, updating best_global_max_TM if we beat the -mm 9 defender - for (size_t b_idx = 0; b_idx < all_bounds.size(); b_idx++) - { - std::vector &bounds1 = all_bounds[b_idx].first; - std::vector &bounds2 = all_bounds[b_idx].second; - - // Skip if only one interval (block) is generated, as the full unbroken sequence - // has already been processed by the baseline (-mm 9) above. - if (bounds1.size() <= 2) - continue; - - // ================== DEBUG START ================== - // Output the interval mapping for the current boundary set - // std::cout << "\n[DEBUG] --- Region Mapping Table ---" << std::endl; - // std::cout << "[DEBUG] Mode: " << (b_idx == 0 ? "FATCAT Bounds" : "Strict Bounds") << std::endl; - // std::cout << "[DEBUG] Total Blocks: " << (bounds1.size() - 1) << std::endl; - - // for (size_t k = 0; k < bounds1.size() - 1; k++) - // { - // std::cout << "[DEBUG] Block " << (k + 1) << ": " - // << "Chain1 [" << bounds1[k] << " -> " << bounds1[k + 1] << "] <==> " - // << "Chain2 [" << bounds2[k] << " -> " << bounds2[k + 1] << "]" - // << std::endl; - // } - // std::cout << "[DEBUG] ----------------------------\n" << std::endl; - // =================== DEBUG END =================== - - // Precalculate distributed local_hinge_opt for each block when hinge_set is true - int num_blocks = bounds1.size() - 1; - std::vector precalc_local_hinge(num_blocks, 0); - - if (hinge_set) - { - struct BlockMeta - { - int index; - double rmsd; - }; - std::vector valid_blocks; - - // Calculate target hinges to distribute based on requested hinge_opt and current implicit blocks - int target_total_hinges = std::max(0, hinge_opt + 1 - num_blocks); - - // Calculate base amount of hinges per block - int base_hinge = (hinge_opt + 1) / num_blocks - 1; - if (base_hinge < 0) - base_hinge = 0; - - for (int k = 0; k < num_blocks; k++) - { - int L1_sub = bounds1[k + 1] - bounds1[k]; - int L2_sub = bounds2[k + 1] - bounds2[k]; - int min_L = std::min(L1_sub, L2_sub); - - if (min_L < 2 * fragLen) - { - precalc_local_hinge[k] = 0; // Length < 2*fragLen gets 0 - } - else - { - // Calculate rough RMSD for this unaligned block section - double block_rmsd = 0.0; - if (min_L >= 3) - { - double **p1, **p2; - NewArray(&p1, min_L, 3); - NewArray(&p2, min_L, 3); - for (int i = 0; i < min_L; i++) - { - p1[i][0] = xa[bounds1[k] + i][0]; - p1[i][1] = xa[bounds1[k] + i][1]; - p1[i][2] = xa[bounds1[k] + i][2]; - p2[i][0] = ya[bounds2[k] + i][0]; - p2[i][1] = ya[bounds2[k] + i][1]; - p2[i][2] = ya[bounds2[k] + i][2]; - } - double rms_sum_sq, t_tmp[3], u_tmp[3][3]; - Kabsch(p1, p2, min_L, 0, &rms_sum_sq, t_tmp, u_tmp); - block_rmsd = std::sqrt(rms_sum_sq / min_L); - DeleteArray(&p1, min_L); - DeleteArray(&p2, min_L); - } - valid_blocks.push_back({k, block_rmsd}); - precalc_local_hinge[k] = base_hinge; // Assign base hinges to valid blocks - } - } - - // Distribute remaining hinges strictly prioritizing top RMSD blocks - int assigned = valid_blocks.size() * base_hinge; - int remainder = target_total_hinges - assigned; - - if (remainder > 0 && !valid_blocks.empty()) - { - // Sort valid blocks by RMSD descending - std::sort(valid_blocks.begin(), valid_blocks.end(), [](const BlockMeta &a, const BlockMeta &b) - { return a.rmsd > b.rmsd; }); - - int v_idx = 0; - while (remainder > 0) - { - precalc_local_hinge[valid_blocks[v_idx].index]++; // Give +1 to the front runners - remainder--; - v_idx = (v_idx + 1) % valid_blocks.size(); - } - } - } - - // Step 5: Iteratively align each block - std::string cur_global_seqM = "", cur_global_seqxA = "", cur_global_seqyA = ""; - cur_global_seqM.reserve(xlen + ylen + max_gap); - cur_global_seqxA.reserve(xlen + ylen + max_gap); - cur_global_seqyA.reserve(xlen + ylen + max_gap); - - std::vector> cur_tu_vec; - std::vector cur_global_res_tu(xlen, -1); - - for (size_t k = 0; k < bounds1.size() - 1; k++) - { - int x_s = bounds1[k], x_e = bounds1[k + 1]; - int y_s = bounds2[k], y_e = bounds2[k + 1]; - int L1_sub = x_e - x_s; - int L2_sub = y_e - y_s; - - if (L1_sub < 3 || L2_sub < 3) - { - for (int i = 0; i < L1_sub; i++) - { - cur_global_seqxA += seqx[x_s + i]; - cur_global_seqyA += '-'; - cur_global_seqM += ' '; - } - for (int i = 0; i < L2_sub; i++) - { - cur_global_seqxA += '-'; - cur_global_seqyA += seqy[y_s + i]; - cur_global_seqM += ' '; - } - continue; - } - - double **xa_sub, **ya_sub; - NewArray(&xa_sub, L1_sub, 3); - NewArray(&ya_sub, L2_sub, 3); - char *seqx_sub = new char[L1_sub + 1]; - char *seqy_sub = new char[L2_sub + 1]; - char *secx_sub = new char[L1_sub + 1]; - char *secy_sub = new char[L2_sub + 1]; - - for (int i = 0; i < L1_sub; i++) - { - xa_sub[i][0] = xa[x_s + i][0]; - xa_sub[i][1] = xa[x_s + i][1]; - xa_sub[i][2] = xa[x_s + i][2]; - seqx_sub[i] = seqx[x_s + i]; - secx_sub[i] = secx[x_s + i]; - } - seqx_sub[L1_sub] = '\0'; - secx_sub[L1_sub] = '\0'; - - for (int i = 0; i < L2_sub; i++) - { - ya_sub[i][0] = ya[y_s + i][0]; - ya_sub[i][1] = ya[y_s + i][1]; - ya_sub[i][2] = ya[y_s + i][2]; - seqy_sub[i] = seqy[y_s + i]; - secy_sub[i] = secy[y_s + i]; - } - seqy_sub[L2_sub] = '\0'; - secy_sub[L2_sub] = '\0'; - - double t0_best[3], u0_best[3][3]; - double TM_best_max = -1.0; - std::string seqM_best, seqxA_best, seqyA_best; - std::vector> tu_vec_best; - - bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; - - // Determine local_hinge_opt based on user requirements. - // If hinge_set is true, we use the precalculated distributed hinges. - // Otherwise, set to 0 if the block length is less than 2 * fragLen, else 2. - int local_hinge_opt; - if (hinge_set) - { - local_hinge_opt = precalc_local_hinge[k]; - } - else - { - local_hinge_opt = (std::min(L1_sub, L2_sub) < 2 * fragLen) ? 0 : 2; - } - - for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) - { - FlexAlignResult cur_res; - execute_flexalign_with_fallback( - xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, - L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, - i_opt, a_opt, u_opt, d_opt, force_fast_opt, - mol_type, local_hinge_opt, cur_ss_opt, cur_res); - - double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; - if (cur_max_TM > TM_best_max) - { - TM_best_max = cur_max_TM; - for (int a = 0; a < 3; a++) - { - t0_best[a] = cur_res.t0[a]; - for (int b = 0; b < 3; b++) - u0_best[a][b] = cur_res.u0[a][b]; - } - seqM_best = cur_res.seqM; - seqxA_best = cur_res.seqxA; - seqyA_best = cur_res.seqyA; - tu_vec_best = cur_res.tu_vec; - } - } - - if (TM_best_max <= 0) - { - for (int i = 0; i < L1_sub; i++) - { - cur_global_seqxA += seqx_sub[i]; - cur_global_seqyA += '-'; - cur_global_seqM += ' '; - } - for (int i = 0; i < L2_sub; i++) - { - cur_global_seqxA += '-'; - cur_global_seqyA += seqy_sub[i]; - cur_global_seqM += ' '; - } - DeleteArray(&xa_sub, L1_sub); - DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; - delete[] seqy_sub; - delete[] secx_sub; - delete[] secy_sub; - continue; - } - - if (tu_vec_best.empty()) - { - std::vector tu_tmp(12); - t_u2tu(t0_best, u0_best, tu_tmp); - tu_vec_best.push_back(tu_tmp); - } - - int base_tu_idx = cur_tu_vec.size(); - for (size_t m = 0; m < tu_vec_best.size(); m++) - cur_tu_vec.push_back(tu_vec_best[m]); - - int rx = x_s; - int current_global_idx = base_tu_idx; - - for (size_t i = 0; i < seqxA_best.length(); i++) - { - char c = seqM_best[i]; - - if (c != ' ' && c != '.' && c != ':') - { - int local_hinge_idx = -1; - if (c >= '0' && c <= '9') - local_hinge_idx = c - '0'; - else if (c >= 'a' && c <= 'z') - local_hinge_idx = c - 'a' + 10; - else if (c >= 'A' && c <= 'Z') - local_hinge_idx = c - 'A' + 36; - if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) - current_global_idx = base_tu_idx + local_hinge_idx; - } - - if (seqxA_best[i] != '-') - { - cur_global_res_tu[rx] = current_global_idx; - rx++; - } - - if (seqxA_best[i] != '-' && seqyA_best[i] != '-') - { - if (c != ' ' && c != '.' && c != ':') - { - char global_c; - if (current_global_idx < 10) - global_c = '0' + current_global_idx; - else if (current_global_idx < 36) - global_c = 'a' + (current_global_idx - 10); - else if (current_global_idx < 62) - global_c = 'A' + (current_global_idx - 36); - else - global_c = '*'; - seqM_best[i] = global_c; - } - else - { - seqM_best[i] = c; - } - } - else - { - seqM_best[i] = ' '; - } - } - - cur_global_seqM += seqM_best; - cur_global_seqxA += seqxA_best; - cur_global_seqyA += seqyA_best; - - DeleteArray(&xa_sub, L1_sub); - DeleteArray(&ya_sub, L2_sub); - delete[] seqx_sub; - delete[] seqy_sub; - delete[] secx_sub; - delete[] secy_sub; - } - - // Step 6: Recalculate global metrics correctly for current DP boundary - double cur_d0A = 1.24 * std::pow(ylen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0A < 0.5) - cur_d0A = 0.5; - double cur_d0B = 1.24 * std::pow(xlen * 1.0 - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0B < 0.5) - cur_d0B = 0.5; - double cur_d0a = 1.24 * std::pow((xlen + ylen) * 0.5 - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0a < 0.5) - cur_d0a = 0.5; - - double cur_d0u = 0.0; - if (u_opt) - { - cur_d0u = 1.24 * std::pow(Lnorm_ass - 15.0, 1.0 / 3.0) - 1.8; - if (cur_d0u < 0.5) - cur_d0u = 0.5; - } - - double cur_TM1 = 0.0, cur_TM2 = 0.0, cur_TM3 = 0.0, cur_TM4 = 0.0, cur_TM5 = 0.0; - double cur_rmsd0 = 0.0, cur_Liden = 0.0; - int cur_n_ali8 = 0, cur_n_ali = 0; - std::vector cur_do_vec; - - int i_res = 0, j_res = 0; - for (size_t r = 0; r < cur_global_seqxA.length(); r++) - { - bool x_valid = (cur_global_seqxA[r] != '-'); - bool y_valid = (cur_global_seqyA[r] != '-'); - - if (x_valid && y_valid) - { - int matrix_idx = cur_global_res_tu[i_res]; - if (matrix_idx >= 0 && matrix_idx < cur_tu_vec.size()) - { - double t_k[3], u_k[3][3]; - tu2t_u(cur_tu_vec[matrix_idx], t_k, u_k); - - double x_rot[3]; - transform(t_k, u_k, xa[i_res], x_rot); - double dist2 = dist(x_rot, ya[j_res]); - double d = std::sqrt(dist2); - - cur_TM2 += 1.0 / (1.0 + dist2 / (cur_d0B * cur_d0B)); - cur_TM1 += 1.0 / (1.0 + dist2 / (cur_d0A * cur_d0A)); - if (a_opt) - cur_TM3 += 1.0 / (1.0 + dist2 / (cur_d0a * cur_d0a)); - if (u_opt) - cur_TM4 += 1.0 / (1.0 + dist2 / (cur_d0u * cur_d0u)); - if (d_opt) - cur_TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); - - cur_n_ali++; - cur_do_vec.push_back(d); - - if (d <= d0_out) - { - cur_rmsd0 += dist2; - cur_n_ali8++; - if (seqx[i_res] == seqy[j_res]) - cur_Liden += 1.0; - } - } - else - { - cur_do_vec.push_back(-1); - } - } - else - { - cur_do_vec.push_back(-1); - } - - if (x_valid) - i_res++; - if (y_valid) - j_res++; - } - - // Normalize TM-scores - cur_TM2 /= xlen; - cur_TM1 /= ylen; - if (a_opt) - cur_TM3 /= (xlen + ylen) * 0.5; - if (u_opt) - cur_TM4 /= Lnorm_ass; - if (d_opt) - cur_TM5 /= ylen; - if (cur_n_ali8 > 0) - cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); - else - cur_rmsd0 = 0.0; - - // Compare against the -mm 9 defender! - double cur_global_max_TM = (cur_TM1 > cur_TM2) ? cur_TM1 : cur_TM2; - - if (cur_global_max_TM > best_global_max_TM) - { - // <--- ADD DEBUG HERE - // if (b_idx == 1) - // { - // std::cout << "[DEBUG] strict" << std::endl; - // } - - best_global_max_TM = cur_global_max_TM; - best_tu_vec = cur_tu_vec; - best_TM1 = cur_TM1; - best_TM2 = cur_TM2; - best_TM3 = cur_TM3; - best_TM4 = cur_TM4; - best_TM5 = cur_TM5; - best_rmsd0 = cur_rmsd0; - best_Liden = cur_Liden; - best_TM_ali = cur_TM1; - best_rmsd_ali = cur_rmsd0; - best_L_ali = cur_n_ali; - best_n_ali = cur_n_ali; - best_n_ali8 = cur_n_ali8; - best_seqM = cur_global_seqM; - best_seqxA = cur_global_seqxA; - best_seqyA = cur_global_seqyA; - best_do_vec = cur_do_vec; - best_d0A = cur_d0A; - best_d0B = cur_d0B; - best_d0a = cur_d0a; - best_d0u = cur_d0u; - - if (!best_tu_vec.empty()) - { - tu2t_u(best_tu_vec[0], best_t0, best_u0); - } - } - } - - // Safety check - if (best_global_max_TM < 0) - return 0; - - // Output best values back to the reference parameters - TM1 = best_TM1; - TM2 = best_TM2; - TM3 = best_TM3; - TM4 = best_TM4; - TM5 = best_TM5; - rmsd0 = best_rmsd0; - Liden = best_Liden; - TM_ali = best_TM_ali; - rmsd_ali = best_rmsd_ali; - L_ali = best_L_ali; - n_ali = best_n_ali; - n_ali8 = best_n_ali8; - seqM = best_seqM; - seqxA = best_seqxA; - seqyA = best_seqyA; - do_vec = best_do_vec; - tu_vec = best_tu_vec; - d0A = best_d0A; - d0B = best_d0B; - d0a = best_d0a; - d0u = best_d0u; - - for (int a = 0; a < 3; a++) - { - t0[a] = best_t0[a]; - for (int b = 0; b < 3; b++) - u0[a][b] = best_u0[a][b]; - } - - return tu_vec.size(); -} - -// Unified engine replacing flexalign, flexalign_best, and flexalign_fatcat +// Unified engine replacing flexalign_greedy and flexalign_fatcat int flexalign_unified(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, @@ -4370,12 +3076,12 @@ int flexalign_unified(string &xname, string &yname, const string &fname_super, // Direct Drop-in Wrappers (No changes needed in main() bindings) // ======================================================================= -int flexalign(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt) -{ - return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, ss_opt, FLEX_STANDARD); -} +// int flexalign(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt, const int ss_opt) +// { +// return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, ss_opt, FLEX_STANDARD); +// } -int flexalign_best(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) +int flexalign_greedy(string &xname, string &yname, const string &fname_super, const string &fname_lign, const string &fname_matrix, vector &sequence, const double Lnorm_ass, const double d0_scale, const bool m_opt, const int i_opt, const int o_opt, const int a_opt, const bool u_opt, const bool d_opt, const double TMcut, const int infmt1_opt, const int infmt2_opt, const int ter_opt, const int split_opt, const int outfmt_opt, const bool fast_opt, const int mirror_opt, const int het_opt, const string &atom_opt, const bool autojustify, const string &mol_opt, const string &dir_opt, const string &dirpair_opt, const string &dir1_opt, const string &dir2_opt, const vector &chain2parse1, const vector &chain2parse2, const vector &model2parse1, const vector &model2parse2, const int byresi_opt, const vector &chain1_list, const vector &chain2_list, const int hinge_opt) { return flexalign_unified(xname, yname, fname_super, fname_lign, fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, byresi_opt, chain1_list, chain2_list, hinge_opt, 0 /* ss_opt is ignored in BEST mode */, FLEX_BEST); } @@ -4431,6 +3137,7 @@ int main(int argc, char *argv[]) int mirror_opt = 0; // do not align mirror int het_opt = 0; // do not read HETATM residues int mm_opt = 0; // do not perform MM-align + bool fatcat_opt = false; // flag for -fatcat, only valid with -mm 7 string atom_opt = "auto"; // use C alpha atom for protein and C3' for RNA string mol_opt = "auto"; // auto-detect the molecule type as protein/RNA string suffix_opt = ""; // set -suffix to empty @@ -4783,6 +3490,10 @@ int main(int argc, char *argv[]) mm_opt = atoi(argv[i + 1]); i++; } + else if (!strcmp(argv[i], "-fatcat")) + { + fatcat_opt = true; + } else if (xname.size() == 0) xname = argv[i]; else if (yname.size() == 0) @@ -4940,6 +3651,9 @@ int main(int argc, char *argv[]) if (mm_opt >= 7 && hinge_opt >= 10) PrintErrorAndQuit("ERROR! -hinge must be <10"); + if (fatcat_opt && mm_opt != 7) + PrintErrorAndQuit("ERROR! -fatcat parameter can only be used when -mm 7 is set"); + if (chainmapfile.size() && mm_opt != 1) PrintErrorAndQuit("ERROR! -chainmap must be used with -mm 1"); @@ -5083,37 +3797,24 @@ int main(int argc, char *argv[]) dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, chain1_list, chain2_list, se_opt, closeK_opt, mm_opt); else if (mm_opt == 7) - flexalign(xname, yname, fname_super, fname_lign, - fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, - a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt, 0); - else if (mm_opt == 8) - flexalign(xname, yname, fname_super, fname_lign, - fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, - a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt, 1); - else if (mm_opt == 9) - flexalign_best(xname, yname, fname_super, fname_lign, - fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, - a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt); - else if (mm_opt == 10) - flexalign_fatcat(xname, yname, fname_super, fname_lign, - fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, - a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, - split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, - atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, - dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, - byresi_opt, chain1_list, chain2_list, hinge_opt, hinge_set); + { + if (fatcat_opt) + flexalign_fatcat(xname, yname, fname_super, fname_lign, + fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, + a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, + split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, + dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, + byresi_opt, chain1_list, chain2_list, hinge_opt, hinge_set); + else + flexalign_greedy(xname, yname, fname_super, fname_lign, + fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, + a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt, + split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt, + atom_opt, autojustify, mol_opt, dir_opt, dirpair_opt, dir1_opt, + dir2_opt, chain2parse1, chain2parse2, model2parse1, model2parse2, + byresi_opt, chain1_list, chain2_list, hinge_opt); + } else cerr << "WARNING! -mm " << mm_opt << " not implemented" << endl; diff --git a/flexalign.h b/flexalign.h index 0eb1d1c..e5134d5 100644 --- a/flexalign.h +++ b/flexalign.h @@ -2054,4 +2054,1280 @@ void output_flexalign_results(const string xname, const string yname, xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); } +// Data structure to hold outputs of flexalign_main to avoid parameter clutter +struct FlexAlignResult +{ + double t0[3]; + double u0[3][3]; + vector> tu_vec; + double TM1, TM2, TM3, TM4, TM5; + double d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out; + string seqM, seqxA, seqyA; + vector do_vec; + double rmsd0, Liden, TM_ali, rmsd_ali; + int L_ali, n_ali, n_ali8, hingeNum; + + FlexAlignResult() : TM1(-1.0), TM2(-1.0), TM3(-1.0), TM4(-1.0), TM5(-1.0), + d0_0(0.0), TM_0(0.0), d0A(0.0), d0B(0.0), d0u(0.0), d0a(0.0), d0_out(5.0), + rmsd0(0.0), Liden(0.0), TM_ali(0.0), rmsd_ali(0.0), + L_ali(0), n_ali(0), n_ali8(0), hingeNum(0) + { + for (int i = 0; i < 3; i++) + { + t0[i] = 0.0; + for (int j = 0; j < 3; j++) + u0[i][j] = (i == j) ? 1.0 : 0.0; + } + } +}; + +enum FlexAlignMode +{ + FLEX_STANDARD = 0, + FLEX_BEST = 1, + FLEX_FATCAT = 2 +}; + +// Encapsulates the execution of flexalign_main and its fallback refinement logic +void execute_flexalign_with_fallback( + double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy, + int xlen, int ylen, vector &sequence, const double Lnorm_ass, const double d0_scale, + const int i_opt, const int a_opt, const bool u_opt, const bool d_opt, const bool force_fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt, FlexAlignResult &res) +{ + res.hingeNum = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + res.t0, res.u0, res.tu_vec, res.TM1, res.TM2, res.TM3, res.TM4, res.TM5, + res.d0_0, res.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res.d0_out, + res.seqM, res.seqxA, res.seqyA, res.do_vec, + res.rmsd0, res.L_ali, res.Liden, res.TM_ali, res.rmsd_ali, res.n_ali, res.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, ss_opt); + + // Fallback compensation when too few hinges are found + if (hinge_opt && res.hingeNum <= 1 && res.n_ali8 < 0.6 * getmin(xlen, ylen)) + { + FlexAlignResult res_h; + res_h.tu_vec.push_back(res.tu_vec[0]); + tu2t_u(res.tu_vec[0], res_h.t0, res_h.u0); + + res_h.hingeNum = flexalign_main( + xa, ya, seqx, seqy, secx, secy, + res_h.t0, res_h.u0, res_h.tu_vec, + res_h.TM1, res_h.TM2, res_h.TM3, res_h.TM4, res_h.TM5, + res_h.d0_0, res_h.TM_0, res.d0A, res.d0B, res.d0u, res.d0a, res_h.d0_out, + res_h.seqM, res_h.seqxA, res_h.seqyA, res_h.do_vec, + res_h.rmsd0, res_h.L_ali, res_h.Liden, res_h.TM_ali, res_h.rmsd_ali, + res_h.n_ali, res_h.n_ali8, + xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt, + a_opt, u_opt, d_opt, force_fast_opt, + mol_type, hinge_opt, ss_opt); + + double TM = (res.TM1 > res.TM2) ? res.TM1 : res.TM2; + double TM_h = (res_h.TM1 > res_h.TM2) ? res_h.TM1 : res_h.TM2; + if (TM_h > TM) + { + res = res_h; // Safely overwrite with the better refined results + } + } +} + +// ========================================== +// FATCAT Core Algorithm (flexalign_fatcat_main) +// ========================================== +struct FATCAT_AFP +{ + int i, j, len; + double score; +}; + +int flexalign_fatcat_main(double **xa, double **ya, + const char *seqx, const char *seqy, const char *secx, const char *secy, + double t0[3], double u0[3][3], std::vector> &tu_vec, + double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, + double &d0_0, double &TM_0, + double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, + std::string &seqM, std::string &seqxA, std::string &seqyA, std::vector &do_vec, + double &rmsd0, int &L_ali, double &Liden, + double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, + const int xlen, const int ylen, + const std::vector sequence, const double Lnorm_ass, + const double d0_scale, const int i_opt, const int a_opt, + const bool u_opt, const bool d_opt, const bool fast_opt, + const int mol_type, const int hinge_opt, const int ss_opt, + int sparse_val = 0, bool hinge_set = false) +{ + // ========================================== + // TRUE flexalign_greedy BASELINE (Defender) + // Run full sequence without generate_bounds slicing! + // This perfectly simulates FLEX_BEST (flexalign_greedy) behavior. + // ========================================== + double best_global_max_TM = -1.0; + std::vector> best_tu_vec; + double best_t0[3], best_u0[3][3]; + double best_TM1 = 0.0, best_TM2 = 0.0, best_TM3 = 0.0, best_TM4 = 0.0, best_TM5 = 0.0; + double best_rmsd0 = 0.0, best_Liden = 0.0, best_TM_ali = 0.0, best_rmsd_ali = 0.0; + int best_L_ali = 0, best_n_ali = 0, best_n_ali8 = 0; + std::string best_seqM = "", best_seqxA = "", best_seqyA = ""; + std::vector best_do_vec; + double best_d0A = 0.0, best_d0B = 0.0, best_d0a = 0.0, best_d0u = 0.0; + + bool force_fast_opt_global = (std::min(xlen, ylen) > 1500) ? true : fast_opt; + std::vector local_sequence = sequence; + + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + { + FlexAlignResult base_res; + // Pass full unbroken sequences directly to flexalign (identical to flexalign_greedy) + execute_flexalign_with_fallback( + xa, ya, (char *)seqx, (char *)seqy, (char *)secx, (char *)secy, + xlen, ylen, local_sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt_global, + mol_type, hinge_opt, cur_ss_opt, base_res); // flexalign_greedy explicitly uses 9 hinges + + double cur_max_TM = (base_res.TM1 > base_res.TM2) ? base_res.TM1 : base_res.TM2; + if (cur_max_TM > best_global_max_TM) + { + best_global_max_TM = cur_max_TM; + for (int a = 0; a < 3; a++) + { + best_t0[a] = base_res.t0[a]; + for (int b = 0; b < 3; b++) + best_u0[a][b] = base_res.u0[a][b]; + } + best_tu_vec = base_res.tu_vec; + best_TM1 = base_res.TM1; + best_TM2 = base_res.TM2; + best_TM3 = base_res.TM3; + best_TM4 = base_res.TM4; + best_TM5 = base_res.TM5; + best_rmsd0 = base_res.rmsd0; + best_Liden = base_res.Liden; + best_TM_ali = base_res.TM_ali; + best_rmsd_ali = base_res.rmsd_ali; + best_L_ali = base_res.L_ali; + best_n_ali = base_res.n_ali; + best_n_ali8 = base_res.n_ali8; + best_seqM = base_res.seqM; + best_seqxA = base_res.seqxA; + best_seqyA = base_res.seqyA; + best_do_vec = base_res.do_vec; + best_d0A = base_res.d0A; + best_d0B = base_res.d0B; + best_d0a = base_res.d0a; + best_d0u = base_res.d0u; + } + } + + // Early exit if the true flexalign_greedy baseline is already excellent + if (best_global_max_TM >= 0.85) + { + // <--- ADD DEBUG HERE: Output early exit confirmation + // std::cout << "[DEBUG] MM9" << std::endl; + + TM1 = best_TM1; + TM2 = best_TM2; + TM3 = best_TM3; + TM4 = best_TM4; + TM5 = best_TM5; + rmsd0 = best_rmsd0; + Liden = best_Liden; + TM_ali = best_TM_ali; + rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; + n_ali = best_n_ali; + n_ali8 = best_n_ali8; + seqM = best_seqM; + seqxA = best_seqxA; + seqyA = best_seqyA; + do_vec = best_do_vec; + tu_vec = best_tu_vec; + d0A = best_d0A; + d0B = best_d0B; + d0a = best_d0a; + d0u = best_d0u; + for (int a = 0; a < 3; a++) + { + t0[a] = best_t0[a]; + for (int b = 0; b < 3; b++) + u0[a][b] = best_u0[a][b]; + } + return tu_vec.size(); + } + + // ========================================== + // Proceed to FATCAT sliced bounds logic... + // ========================================== + + // FATCAT base parameters + int fragLen = 8; + double resScore = 3.0; + double gap_ext = -0.5; + double disCut = 5.0; + double disSmooth = 4.0; + double twist_pen = -25.0; + int max_gap = 40; + double max_penalty = -5.0; + int misCut = 2 * fragLen; + int maxGapFrag = fragLen + max_gap; + double afp_dis_cut = fragLen * fragLen * (disCut * disCut); + int max_twists = hinge_opt; + + // OPTIMIZATION 1: Precompute local intra-protein distance matrices + int max_dist_window = max_gap + 2 * fragLen + 1; + std::vector> disTable1(xlen, std::vector(max_dist_window, 0.0)); + std::vector> disTable2(ylen, std::vector(max_dist_window, 0.0)); + + for (int i = 0; i < xlen; i++) + { + for (int j = i; j < std::min(xlen, i + max_dist_window); j++) + disTable1[i][j - i] = std::sqrt(dist(xa[i], xa[j])); + } + for (int i = 0; i < ylen; i++) + { + for (int j = i; j < std::min(ylen, i + max_dist_window); j++) + disTable2[i][j - i] = std::sqrt(dist(ya[i], ya[j])); + } + + // Wrapper for generating bounds + auto generate_bounds = [&](double cur_rmsdCut, double cur_badRmsd, double cur_local_badRmsd) -> std::pair, std::vector> + { + // Step 1: Extract initial AFPs in batches + std::vector initial_afps; + int step = sparse_val + 1; + + double r1_static[8][3], r2_static[8][3]; + double *r1[8], *r2[8]; + for (int k = 0; k < 8; k++) + { + r1[k] = r1_static[k]; + r2[k] = r2_static[k]; + } + + for (int i = 0; i <= xlen - fragLen; i += step) + { + for (int j = 0; j <= ylen - fragLen; j += step) + { + int d3_term = std::min(i, j) + std::min(xlen - (i + fragLen - 1), ylen - (j + fragLen)) + fragLen; + if (d3_term < 0.3 * std::min(xlen, ylen)) + continue; + + double dist1 = disTable1[i][fragLen - 1]; + double dist2 = disTable2[j][fragLen - 1]; + + if (std::fabs(dist1 - dist2) > 2.0 * cur_rmsdCut) + continue; + + for (int k = 0; k < fragLen; k++) + { + r1[k][0] = xa[i + k][0]; + r1[k][1] = xa[i + k][1]; + r1[k][2] = xa[i + k][2]; + r2[k][0] = ya[j + k][0]; + r2[k][1] = ya[j + k][1]; + r2[k][2] = ya[j + k][2]; + } + + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1, r2, fragLen, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = std::sqrt(rms_sum_sq / fragLen); + + if (rmsd_tmp < cur_rmsdCut) + { + FATCAT_AFP afp; + afp.i = i; + afp.j = j; + afp.len = fragLen; + afp.score = resScore * fragLen * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); + initial_afps.push_back(afp); + } + } + } + + // Step 2: Merge diagonal AFPs + int max_diagonal_idx = xlen + ylen + 1; + std::vector> diagonals(max_diagonal_idx); + for (size_t k = 0; k < initial_afps.size(); k++) + { + diagonals[initial_afps[k].i - initial_afps[k].j + ylen].push_back(initial_afps[k]); + } + + std::vector merged_afps; + int max_merge_len = std::min(xlen, ylen); + double **r1_merge, **r2_merge; + NewArray(&r1_merge, max_merge_len, 3); + NewArray(&r2_merge, max_merge_len, 3); + + for (int d = 0; d < max_diagonal_idx; d++) + { + if (diagonals[d].empty()) + continue; + std::vector &group = diagonals[d]; + + std::sort(group.begin(), group.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { return a.i < b.i; }); + + int n_group = group.size(); + std::vector invalid(n_group, false); + for (int idx = 0; idx < n_group; idx++) + { + if (invalid[idx]) + continue; + FATCAT_AFP curr = group[idx]; + for (int nxt_idx = idx + 1; nxt_idx < n_group; nxt_idx++) + { + FATCAT_AFP nxt = group[nxt_idx]; + if (nxt.i > curr.i + curr.len) + break; + + if (nxt.i + nxt.len > curr.i + curr.len) + { + int new_len = (nxt.i + nxt.len) - curr.i; + + for (int k = 0; k < new_len; k++) + { + r1_merge[k][0] = xa[curr.i + k][0]; + r1_merge[k][1] = xa[curr.i + k][1]; + r1_merge[k][2] = xa[curr.i + k][2]; + r2_merge[k][0] = ya[curr.j + k][0]; + r2_merge[k][1] = ya[curr.j + k][1]; + r2_merge[k][2] = ya[curr.j + k][2]; + } + + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(r1_merge, r2_merge, new_len, 0, &rms_sum_sq, t_tmp, u_tmp); + double rmsd_tmp = std::sqrt(rms_sum_sq / new_len); + + if (rmsd_tmp < cur_rmsdCut) + { + curr.len = new_len; + curr.score = resScore * new_len * (1.0 - (rmsd_tmp / cur_badRmsd) * (rmsd_tmp / cur_badRmsd)); + invalid[nxt_idx] = true; + } + } + } + merged_afps.push_back(curr); + } + } + DeleteArray(&r1_merge, max_merge_len); + DeleteArray(&r2_merge, max_merge_len); + + std::sort(merged_afps.begin(), merged_afps.end(), [](const FATCAT_AFP &a, const FATCAT_AFP &b) + { + if (a.i == b.i) return a.j < b.j; + return a.i < b.i; }); + + int n_afps = merged_afps.size(); + std::vector ret_b1, ret_b2; + if (n_afps == 0) + return std::make_pair(ret_b1, ret_b2); + + // Step 3 & 4: Dual Dynamic Programming and Domain Splitting + std::vector afp_aft_index(xlen * ylen, -1); + std::vector afp_bef_index(xlen * ylen, -1); + + std::vector>> i_to_j(xlen); + for (int m = 0; m < n_afps; m++) + { + i_to_j[merged_afps[m].i].push_back(std::make_pair(merged_afps[m].j, m)); + } + + for (int i_val = 0; i_val < xlen; i_val++) + { + if (i_to_j[i_val].empty()) + continue; + for (size_t p = 0; p < i_to_j[i_val].size(); p++) + { + int j_val = i_to_j[i_val][p].first; + afp_aft_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; + afp_bef_index[i_val * ylen + j_val] = i_to_j[i_val][p].second; + } + int curr_bef = -1; + for (int j_val = 0; j_val < ylen; j_val++) + { + if (afp_bef_index[i_val * ylen + j_val] != -1) + curr_bef = afp_bef_index[i_val * ylen + j_val]; + else + afp_bef_index[i_val * ylen + j_val] = curr_bef; + } + int curr_aft = -1; + for (int j_val = ylen - 1; j_val >= 0; j_val--) + { + if (afp_aft_index[i_val * ylen + j_val] != -1) + curr_aft = afp_aft_index[i_val * ylen + j_val]; + else + afp_aft_index[i_val * ylen + j_val] = curr_aft; + } + } + + auto get_dvar = [&](const FATCAT_AFP &prv, const FATCAT_AFP &curr) -> double + { + double rms_sq = 0; + for (int i_idx = 0; i_idx < fragLen; i_idx++) + { + for (int j_idx = 0; j_idx < fragLen; j_idx++) + { + double dist1, dist2; + int idx1_a = curr.i + i_idx, idx1_b = prv.i + j_idx; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr.j + i_idx, idx2_b = prv.j + j_idx; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } + } + if (rms_sq > afp_dis_cut) + return 1e9; + return std::sqrt(rms_sq / (fragLen * fragLen)); + }; + + auto calc_block_rmsd = [&](const std::vector &afp_list) -> double + { + std::vector r1, r2; + for (size_t a = 0; a < afp_list.size(); a++) + { + for (int l = 0; l < afp_list[a].len; l++) + { + r1.push_back(afp_list[a].i + l); + r2.push_back(afp_list[a].j + l); + } + } + int n = r1.size(); + if (n < 3) + return 0.0; + double **p1; + NewArray(&p1, n, 3); + double **p2; + NewArray(&p2, n, 3); + for (int i = 0; i < n; i++) + { + p1[i][0] = xa[r1[i]][0]; + p1[i][1] = xa[r1[i]][1]; + p1[i][2] = xa[r1[i]][2]; + p2[i][0] = ya[r2[i]][0]; + p2[i][1] = ya[r2[i]][1]; + p2[i][2] = ya[r2[i]][2]; + } + double rms_sq_sum, t_tmp[3], u_tmp[3][3]; + Kabsch(p1, p2, n, 0, &rms_sq_sum, t_tmp, u_tmp); + DeleteArray(&p1, n); + DeleteArray(&p2, n); + return std::sqrt(rms_sq_sum / n); + }; + + struct Region + { + int s1, e1, s2, e2; + }; + + std::vector sco(n_afps); + std::vector twi(n_afps, 0); + std::vector pre(n_afps, -1); + for (int m = 0; m < n_afps; m++) + sco[m] = merged_afps[m].score; + + for (int m = 0; m < n_afps; m++) + { + int curr_i = merged_afps[m].i; + int curr_j = merged_afps[m].j; + int a3 = curr_i - fragLen; + int a2 = std::max(0, a3 - misCut); + int a1 = std::max(0, curr_i - maxGapFrag); + int b3 = curr_j - fragLen; + int b2 = std::max(0, b3 - misCut); + int b1 = std::max(0, curr_j - maxGapFrag); + + std::vector valid_prevs; + for (int st = 0; st < 2; st++) + { + int a_s, a_e, b_s, b_e; + if (st == 0) + { + a_s = std::max(a1, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b2, 0); + b_e = std::min(b3, ylen - 1); + } + else + { + a_s = std::max(a2, 0); + a_e = std::min(a3, xlen - 1); + b_s = std::max(b1, 0); + b_e = std::min(b2 - 1, ylen - 1); + } + + if (b_s >= ylen || b_e < 0) + continue; + for (int prev_i = a_s; prev_i <= a_e; prev_i++) + { + int s1 = afp_aft_index[prev_i * ylen + b_s]; + int s2 = afp_bef_index[prev_i * ylen + b_e]; + if (s1 != -1 && s2 != -1 && s1 <= s2) + for (int s = s1; s <= s2; s++) + valid_prevs.push_back(s); + } + } + + double curr_sco = merged_afps[m].score; + for (size_t v = 0; v < valid_prevs.size(); v++) + { + int prev = valid_prevs[v]; + int prev_twi = twi[prev]; + if (prev_twi > max_twists) + continue; + + int gap_i = curr_i - (merged_afps[prev].i + merged_afps[prev].len); + int gap_j = curr_j - (merged_afps[prev].j + merged_afps[prev].len); + int m_gap = std::max(gap_i, gap_j); + + double gp = 0.0; + int m_mis = 0; + if (gap_i < 0 || gap_j < 0) + m_mis = (gap_i < gap_j) ? -gap_i : -gap_j; + gp = gap_ext * m_mis; + if (m_gap > 0) + gp += gap_ext * m_gap; + if (gp < max_penalty) + gp = max_penalty; + + double rms_sq = 0; + for (int k = 0; k < fragLen; k++) + { + for (int l = 0; l < fragLen; l++) + { + double dist1, dist2; + int idx1_a = curr_i + k, idx1_b = merged_afps[prev].i + l; + if (idx1_a >= idx1_b) + dist1 = disTable1[idx1_b][idx1_a - idx1_b]; + else + dist1 = disTable1[idx1_a][idx1_b - idx1_a]; + + int idx2_a = curr_j + k, idx2_b = merged_afps[prev].j + l; + if (idx2_a >= idx2_b) + dist2 = disTable2[idx2_b][idx2_a - idx2_b]; + else + dist2 = disTable2[idx2_a][idx2_b - idx2_a]; + + rms_sq += (dist1 - dist2) * (dist1 - dist2); + } + } + + double tp = 0.0; + int is_twist = 0; + if (rms_sq >= afp_dis_cut) + { + tp = twist_pen; + is_twist = 1; + } + else + { + double dvar = std::sqrt(rms_sq / (fragLen * fragLen)); + if (dvar > disCut - disSmooth) + tp = twist_pen * std::sqrt((dvar - disCut + disSmooth) / disSmooth); + } + + if (prev_twi + is_twist > max_twists) + continue; + + double stmp = sco[prev] + curr_sco + tp + gp; + if (stmp > sco[m]) + { + sco[m] = stmp; + pre[m] = prev; + twi[m] = prev_twi + is_twist; + } + } + } + + int best_m = 0; + for (int m = 1; m < n_afps; m++) + if (sco[m] > sco[best_m]) + best_m = m; + + std::vector path; + int curr_m = best_m; + while (curr_m != -1) + { + path.push_back(curr_m); + curr_m = pre[curr_m]; + } + std::reverse(path.begin(), path.end()); + + if (path.empty()) + return std::make_pair(ret_b1, ret_b2); + + struct Block + { + std::vector afps; + std::vector dvars; + }; + std::vector candidate_blocks; + Block curr_block; + curr_block.afps.push_back(merged_afps[path[0]]); + curr_block.dvars.push_back(0.0); + + for (size_t k = 1; k < path.size(); k++) + { + FATCAT_AFP curr = merged_afps[path[k]]; + FATCAT_AFP prv = merged_afps[path[k - 1]]; + double dvar = get_dvar(prv, curr); + + if (dvar >= disCut) + { + candidate_blocks.push_back(curr_block); + curr_block.afps.clear(); + curr_block.dvars.clear(); + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(0.0); + } + else + { + curr_block.afps.push_back(curr); + curr_block.dvars.push_back(dvar); + } + } + if (!curr_block.afps.empty()) + candidate_blocks.push_back(curr_block); + + bool splitted = true; + while (splitted && candidate_blocks.size() < (size_t)(max_twists + 1)) + { + splitted = false; + double max_rmsd = 0.0; + int target_b = -1; + + for (size_t b = 0; b < candidate_blocks.size(); b++) + { + if (candidate_blocks[b].afps.size() > 2) + { + double cur_rmsd = calc_block_rmsd(candidate_blocks[b].afps); + if (cur_rmsd > max_rmsd) + { + max_rmsd = cur_rmsd; + target_b = b; + } + } + } + + if (max_rmsd >= cur_local_badRmsd && target_b != -1) + { + double max_t = 0; + int cut_idx = 0; + for (size_t i = 1; i < candidate_blocks[target_b].afps.size(); i++) + { + if (candidate_blocks[target_b].dvars[i] > max_t) + { + max_t = candidate_blocks[target_b].dvars[i]; + cut_idx = i; + } + } + + if (cut_idx > 0) + { + Block right_blk; + right_blk.afps.assign(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); + right_blk.dvars.assign(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); + right_blk.dvars[0] = 0.0; + candidate_blocks[target_b].afps.erase(candidate_blocks[target_b].afps.begin() + cut_idx, candidate_blocks[target_b].afps.end()); + candidate_blocks[target_b].dvars.erase(candidate_blocks[target_b].dvars.begin() + cut_idx, candidate_blocks[target_b].dvars.end()); + candidate_blocks.insert(candidate_blocks.begin() + target_b + 1, right_blk); + splitted = true; + } + } + } + + for (int b = 0; b < (int)candidate_blocks.size(); b++) + { + if (candidate_blocks[b].afps.size() <= 1) + { + int e1 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().i : xlen; + int e2 = (b < (int)candidate_blocks.size() - 1) ? candidate_blocks[b + 1].afps.front().j : ylen; + int b1 = (b > 0) ? candidate_blocks[b - 1].afps.back().i + candidate_blocks[b - 1].afps.back().len : 0; + int b2 = (b > 0) ? candidate_blocks[b - 1].afps.back().j + candidate_blocks[b - 1].afps.back().len : 0; + int span = std::min(e1 - b1, e2 - b2); + if (span < 2 * fragLen) + { + candidate_blocks.erase(candidate_blocks.begin() + b); + b--; + } + } + } + + bool merged = true; + while (merged && candidate_blocks.size() > 1) + { + merged = false; + double min_rmsd = 1e9; + int min_b = -1; + for (size_t b = 0; b < candidate_blocks.size() - 1; b++) + { + std::vector temp_merged = candidate_blocks[b].afps; + temp_merged.insert(temp_merged.end(), candidate_blocks[b + 1].afps.begin(), candidate_blocks[b + 1].afps.end()); + double cur_rmsd = calc_block_rmsd(temp_merged); + if (cur_rmsd < min_rmsd) + { + min_rmsd = cur_rmsd; + min_b = b; + } + } + + if (min_rmsd < cur_local_badRmsd && min_b != -1) + { + candidate_blocks[min_b].afps.insert(candidate_blocks[min_b].afps.end(), candidate_blocks[min_b + 1].afps.begin(), candidate_blocks[min_b + 1].afps.end()); + candidate_blocks.erase(candidate_blocks.begin() + min_b + 1); + merged = true; + } + } + + std::vector fatcat_domains; + int last_i = 0, last_j = 0; + for (size_t b = 0; b < candidate_blocks.size(); b++) + { + int b_s1 = -1, b_e1 = -1, b_s2 = -1, b_e2 = -1; + for (size_t a = 0; a < candidate_blocks[b].afps.size(); a++) + { + FATCAT_AFP afp = candidate_blocks[b].afps[a]; + int skip = std::max(std::max(last_i - afp.i, last_j - afp.j), 0); + if (skip >= afp.len) + continue; + + int eff_i = afp.i + skip; + int eff_j = afp.j + skip; + int eff_L = afp.len - skip; + if (b_s1 == -1) + { + b_s1 = eff_i; + b_s2 = eff_j; + } + b_e1 = eff_i + eff_L; + b_e2 = eff_j + eff_L; + last_i = b_e1; + last_j = b_e2; + } + if (b_s1 != -1) + { + if (b_e1 - b_s1 >= 4 && b_e2 - b_s2 >= 4) + { + Region r = {b_s1, b_e1, b_s2, b_e2}; + fatcat_domains.push_back(r); + } + } + } + + if (fatcat_domains.empty()) + return std::make_pair(ret_b1, ret_b2); + + ret_b1.push_back(0); + ret_b2.push_back(0); + for (size_t k = 0; k < fatcat_domains.size() - 1; k++) + { + ret_b1.push_back((fatcat_domains[k].e1 + fatcat_domains[k + 1].s1) / 2); + ret_b2.push_back((fatcat_domains[k].e2 + fatcat_domains[k + 1].s2) / 2); + } + ret_b1.push_back(xlen); + ret_b2.push_back(ylen); + + return std::make_pair(ret_b1, ret_b2); + }; + + auto bounds_fatcat = generate_bounds(3.0, 4.0, 4.0); + auto bounds_strict = generate_bounds(2.0, 3.0, 2.0); + + std::vector, std::vector>> all_bounds; + all_bounds.push_back(bounds_fatcat); + if (bounds_strict.first != bounds_fatcat.first || bounds_strict.second != bounds_fatcat.second) + { + all_bounds.push_back(bounds_strict); + } + + // Loop through both bound sets, updating best_global_max_TM if we beat the flexalign_greedy defender + for (size_t b_idx = 0; b_idx < all_bounds.size(); b_idx++) + { + std::vector &bounds1 = all_bounds[b_idx].first; + std::vector &bounds2 = all_bounds[b_idx].second; + + // Skip if only one interval (block) is generated, as the full unbroken sequence + // has already been processed by the baseline (flexalign_greedy) above. + if (bounds1.size() <= 2) + continue; + + // ================== DEBUG START ================== + // Output the interval mapping for the current boundary set + // std::cout << "\n[DEBUG] --- Region Mapping Table ---" << std::endl; + // std::cout << "[DEBUG] Mode: " << (b_idx == 0 ? "FATCAT Bounds" : "Strict Bounds") << std::endl; + // std::cout << "[DEBUG] Total Blocks: " << (bounds1.size() - 1) << std::endl; + + // for (size_t k = 0; k < bounds1.size() - 1; k++) + // { + // std::cout << "[DEBUG] Block " << (k + 1) << ": " + // << "Chain1 [" << bounds1[k] << " -> " << bounds1[k + 1] << "] <==> " + // << "Chain2 [" << bounds2[k] << " -> " << bounds2[k + 1] << "]" + // << std::endl; + // } + // std::cout << "[DEBUG] ----------------------------\n" << std::endl; + // =================== DEBUG END =================== + + // Precalculate distributed local_hinge_opt for each block when hinge_set is true + int num_blocks = bounds1.size() - 1; + std::vector precalc_local_hinge(num_blocks, 0); + + if (hinge_set) + { + struct BlockMeta + { + int index; + double rmsd; + }; + std::vector valid_blocks; + + // Calculate target hinges to distribute based on requested hinge_opt and current implicit blocks + int target_total_hinges = std::max(0, hinge_opt + 1 - num_blocks); + + // Calculate base amount of hinges per block + int base_hinge = (hinge_opt + 1) / num_blocks - 1; + if (base_hinge < 0) + base_hinge = 0; + + for (int k = 0; k < num_blocks; k++) + { + int L1_sub = bounds1[k + 1] - bounds1[k]; + int L2_sub = bounds2[k + 1] - bounds2[k]; + int min_L = std::min(L1_sub, L2_sub); + + if (min_L < 2 * fragLen) + { + precalc_local_hinge[k] = 0; // Length < 2*fragLen gets 0 + } + else + { + // Calculate rough RMSD for this unaligned block section + double block_rmsd = 0.0; + if (min_L >= 3) + { + double **p1, **p2; + NewArray(&p1, min_L, 3); + NewArray(&p2, min_L, 3); + for (int i = 0; i < min_L; i++) + { + p1[i][0] = xa[bounds1[k] + i][0]; + p1[i][1] = xa[bounds1[k] + i][1]; + p1[i][2] = xa[bounds1[k] + i][2]; + p2[i][0] = ya[bounds2[k] + i][0]; + p2[i][1] = ya[bounds2[k] + i][1]; + p2[i][2] = ya[bounds2[k] + i][2]; + } + double rms_sum_sq, t_tmp[3], u_tmp[3][3]; + Kabsch(p1, p2, min_L, 0, &rms_sum_sq, t_tmp, u_tmp); + block_rmsd = std::sqrt(rms_sum_sq / min_L); + DeleteArray(&p1, min_L); + DeleteArray(&p2, min_L); + } + valid_blocks.push_back({k, block_rmsd}); + precalc_local_hinge[k] = base_hinge; // Assign base hinges to valid blocks + } + } + + // Distribute remaining hinges strictly prioritizing top RMSD blocks + int assigned = valid_blocks.size() * base_hinge; + int remainder = target_total_hinges - assigned; + + if (remainder > 0 && !valid_blocks.empty()) + { + // Sort valid blocks by RMSD descending + std::sort(valid_blocks.begin(), valid_blocks.end(), [](const BlockMeta &a, const BlockMeta &b) + { return a.rmsd > b.rmsd; }); + + int v_idx = 0; + while (remainder > 0) + { + precalc_local_hinge[valid_blocks[v_idx].index]++; // Give +1 to the front runners + remainder--; + v_idx = (v_idx + 1) % valid_blocks.size(); + } + } + } + + // Step 5: Iteratively align each block + std::string cur_global_seqM = "", cur_global_seqxA = "", cur_global_seqyA = ""; + cur_global_seqM.reserve(xlen + ylen + max_gap); + cur_global_seqxA.reserve(xlen + ylen + max_gap); + cur_global_seqyA.reserve(xlen + ylen + max_gap); + + std::vector> cur_tu_vec; + std::vector cur_global_res_tu(xlen, -1); + + for (size_t k = 0; k < bounds1.size() - 1; k++) + { + int x_s = bounds1[k], x_e = bounds1[k + 1]; + int y_s = bounds2[k], y_e = bounds2[k + 1]; + int L1_sub = x_e - x_s; + int L2_sub = y_e - y_s; + + if (L1_sub < 3 || L2_sub < 3) + { + for (int i = 0; i < L1_sub; i++) + { + cur_global_seqxA += seqx[x_s + i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; + } + for (int i = 0; i < L2_sub; i++) + { + cur_global_seqxA += '-'; + cur_global_seqyA += seqy[y_s + i]; + cur_global_seqM += ' '; + } + continue; + } + + double **xa_sub, **ya_sub; + NewArray(&xa_sub, L1_sub, 3); + NewArray(&ya_sub, L2_sub, 3); + char *seqx_sub = new char[L1_sub + 1]; + char *seqy_sub = new char[L2_sub + 1]; + char *secx_sub = new char[L1_sub + 1]; + char *secy_sub = new char[L2_sub + 1]; + + for (int i = 0; i < L1_sub; i++) + { + xa_sub[i][0] = xa[x_s + i][0]; + xa_sub[i][1] = xa[x_s + i][1]; + xa_sub[i][2] = xa[x_s + i][2]; + seqx_sub[i] = seqx[x_s + i]; + secx_sub[i] = secx[x_s + i]; + } + seqx_sub[L1_sub] = '\0'; + secx_sub[L1_sub] = '\0'; + + for (int i = 0; i < L2_sub; i++) + { + ya_sub[i][0] = ya[y_s + i][0]; + ya_sub[i][1] = ya[y_s + i][1]; + ya_sub[i][2] = ya[y_s + i][2]; + seqy_sub[i] = seqy[y_s + i]; + secy_sub[i] = secy[y_s + i]; + } + seqy_sub[L2_sub] = '\0'; + secy_sub[L2_sub] = '\0'; + + double t0_best[3], u0_best[3][3]; + double TM_best_max = -1.0; + std::string seqM_best, seqxA_best, seqyA_best; + std::vector> tu_vec_best; + + bool force_fast_opt = (std::min(L1_sub, L2_sub) > 1500) ? true : fast_opt; + + // Determine local_hinge_opt based on user requirements. + // If hinge_set is true, we use the precalculated distributed hinges. + // Otherwise, set to 0 if the block length is less than 2 * fragLen, else 2. + int local_hinge_opt; + if (hinge_set) + { + local_hinge_opt = precalc_local_hinge[k]; + } + else + { + local_hinge_opt = (std::min(L1_sub, L2_sub) < 2 * fragLen) ? 0 : 2; + } + + for (int cur_ss_opt = 0; cur_ss_opt <= 1; cur_ss_opt++) + { + FlexAlignResult cur_res; + execute_flexalign_with_fallback( + xa_sub, ya_sub, seqx_sub, seqy_sub, secx_sub, secy_sub, + L1_sub, L2_sub, local_sequence, Lnorm_ass, d0_scale, + i_opt, a_opt, u_opt, d_opt, force_fast_opt, + mol_type, local_hinge_opt, cur_ss_opt, cur_res); + + double cur_max_TM = (cur_res.TM1 > cur_res.TM2) ? cur_res.TM1 : cur_res.TM2; + if (cur_max_TM > TM_best_max) + { + TM_best_max = cur_max_TM; + for (int a = 0; a < 3; a++) + { + t0_best[a] = cur_res.t0[a]; + for (int b = 0; b < 3; b++) + u0_best[a][b] = cur_res.u0[a][b]; + } + seqM_best = cur_res.seqM; + seqxA_best = cur_res.seqxA; + seqyA_best = cur_res.seqyA; + tu_vec_best = cur_res.tu_vec; + } + } + + if (TM_best_max <= 0) + { + for (int i = 0; i < L1_sub; i++) + { + cur_global_seqxA += seqx_sub[i]; + cur_global_seqyA += '-'; + cur_global_seqM += ' '; + } + for (int i = 0; i < L2_sub; i++) + { + cur_global_seqxA += '-'; + cur_global_seqyA += seqy_sub[i]; + cur_global_seqM += ' '; + } + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; + continue; + } + + if (tu_vec_best.empty()) + { + std::vector tu_tmp(12); + t_u2tu(t0_best, u0_best, tu_tmp); + tu_vec_best.push_back(tu_tmp); + } + + int base_tu_idx = cur_tu_vec.size(); + for (size_t m = 0; m < tu_vec_best.size(); m++) + cur_tu_vec.push_back(tu_vec_best[m]); + + int rx = x_s; + int current_global_idx = base_tu_idx; + + for (size_t i = 0; i < seqxA_best.length(); i++) + { + char c = seqM_best[i]; + + if (c != ' ' && c != '.' && c != ':') + { + int local_hinge_idx = -1; + if (c >= '0' && c <= '9') + local_hinge_idx = c - '0'; + else if (c >= 'a' && c <= 'z') + local_hinge_idx = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + local_hinge_idx = c - 'A' + 36; + if (local_hinge_idx >= 0 && local_hinge_idx < tu_vec_best.size()) + current_global_idx = base_tu_idx + local_hinge_idx; + } + + if (seqxA_best[i] != '-') + { + cur_global_res_tu[rx] = current_global_idx; + rx++; + } + + if (seqxA_best[i] != '-' && seqyA_best[i] != '-') + { + if (c != ' ' && c != '.' && c != ':') + { + char global_c; + if (current_global_idx < 10) + global_c = '0' + current_global_idx; + else if (current_global_idx < 36) + global_c = 'a' + (current_global_idx - 10); + else if (current_global_idx < 62) + global_c = 'A' + (current_global_idx - 36); + else + global_c = '*'; + seqM_best[i] = global_c; + } + else + { + seqM_best[i] = c; + } + } + else + { + seqM_best[i] = ' '; + } + } + + cur_global_seqM += seqM_best; + cur_global_seqxA += seqxA_best; + cur_global_seqyA += seqyA_best; + + DeleteArray(&xa_sub, L1_sub); + DeleteArray(&ya_sub, L2_sub); + delete[] seqx_sub; + delete[] seqy_sub; + delete[] secx_sub; + delete[] secy_sub; + } + + // Step 6: Recalculate global metrics correctly for current DP boundary + // Variables to receive dummy outputs from parameter_set4final + double dummy_D0_MIN, dummy_Lnorm, dummy_d0_search; + double cur_d0A, cur_d0B, cur_d0a, cur_d0u = 0.0; + + // Calculate d0 using parameter_set4final to correctly handle both proteins and RNA/DNA, + // and to prevent std::pow domain errors (NaN) when sequence length <= 15. + parameter_set4final(ylen, dummy_D0_MIN, dummy_Lnorm, cur_d0A, dummy_d0_search, mol_type); + parameter_set4final(xlen, dummy_D0_MIN, dummy_Lnorm, cur_d0B, dummy_d0_search, mol_type); + parameter_set4final((xlen + ylen) * 0.5, dummy_D0_MIN, dummy_Lnorm, cur_d0a, dummy_d0_search, mol_type); + + if (u_opt) + { + parameter_set4final(Lnorm_ass, dummy_D0_MIN, dummy_Lnorm, cur_d0u, dummy_d0_search, mol_type); + } + + double cur_TM1 = 0.0, cur_TM2 = 0.0, cur_TM3 = 0.0, cur_TM4 = 0.0, cur_TM5 = 0.0; + double cur_rmsd0 = 0.0, cur_Liden = 0.0; + int cur_n_ali8 = 0, cur_n_ali = 0; + std::vector cur_do_vec; + + int i_res = 0, j_res = 0; + for (size_t r = 0; r < cur_global_seqxA.length(); r++) + { + bool x_valid = (cur_global_seqxA[r] != '-'); + bool y_valid = (cur_global_seqyA[r] != '-'); + + if (x_valid && y_valid) + { + int matrix_idx = cur_global_res_tu[i_res]; + if (matrix_idx >= 0 && matrix_idx < cur_tu_vec.size()) + { + double t_k[3], u_k[3][3]; + tu2t_u(cur_tu_vec[matrix_idx], t_k, u_k); + + double x_rot[3]; + transform(t_k, u_k, xa[i_res], x_rot); + double dist2 = dist(x_rot, ya[j_res]); + double d = std::sqrt(dist2); + + cur_TM2 += 1.0 / (1.0 + dist2 / (cur_d0B * cur_d0B)); + cur_TM1 += 1.0 / (1.0 + dist2 / (cur_d0A * cur_d0A)); + if (a_opt) + cur_TM3 += 1.0 / (1.0 + dist2 / (cur_d0a * cur_d0a)); + if (u_opt) + cur_TM4 += 1.0 / (1.0 + dist2 / (cur_d0u * cur_d0u)); + if (d_opt) + cur_TM5 += 1.0 / (1.0 + dist2 / (d0_scale * d0_scale)); + + cur_n_ali++; + cur_do_vec.push_back(d); + + if (d <= d0_out) + { + cur_rmsd0 += dist2; + cur_n_ali8++; + if (seqx[i_res] == seqy[j_res]) + cur_Liden += 1.0; + } + } + else + { + cur_do_vec.push_back(-1); + } + } + else + { + cur_do_vec.push_back(-1); + } + + if (x_valid) + i_res++; + if (y_valid) + j_res++; + } + + // Normalize TM-scores + cur_TM2 /= xlen; + cur_TM1 /= ylen; + if (a_opt) + cur_TM3 /= (xlen + ylen) * 0.5; + if (u_opt) + cur_TM4 /= Lnorm_ass; + if (d_opt) + cur_TM5 /= ylen; + if (cur_n_ali8 > 0) + cur_rmsd0 = std::sqrt(cur_rmsd0 / cur_n_ali8); + else + cur_rmsd0 = 0.0; + + // Compare against the flexalign_greedy defender! + double cur_global_max_TM = (cur_TM1 > cur_TM2) ? cur_TM1 : cur_TM2; + + if (cur_global_max_TM > best_global_max_TM) + { + // <--- ADD DEBUG HERE + // if (b_idx == 1) + // { + // std::cout << "[DEBUG] strict" << std::endl; + // } + + best_global_max_TM = cur_global_max_TM; + best_tu_vec = cur_tu_vec; + best_TM1 = cur_TM1; + best_TM2 = cur_TM2; + best_TM3 = cur_TM3; + best_TM4 = cur_TM4; + best_TM5 = cur_TM5; + best_rmsd0 = cur_rmsd0; + best_Liden = cur_Liden; + best_TM_ali = cur_TM1; + best_rmsd_ali = cur_rmsd0; + best_L_ali = cur_n_ali; + best_n_ali = cur_n_ali; + best_n_ali8 = cur_n_ali8; + best_seqM = cur_global_seqM; + best_seqxA = cur_global_seqxA; + best_seqyA = cur_global_seqyA; + best_do_vec = cur_do_vec; + best_d0A = cur_d0A; + best_d0B = cur_d0B; + best_d0a = cur_d0a; + best_d0u = cur_d0u; + + if (!best_tu_vec.empty()) + { + tu2t_u(best_tu_vec[0], best_t0, best_u0); + } + } + } + + // Safety check + if (best_global_max_TM < 0) + return 0; + + // Output best values back to the reference parameters + TM1 = best_TM1; + TM2 = best_TM2; + TM3 = best_TM3; + TM4 = best_TM4; + TM5 = best_TM5; + rmsd0 = best_rmsd0; + Liden = best_Liden; + TM_ali = best_TM_ali; + rmsd_ali = best_rmsd_ali; + L_ali = best_L_ali; + n_ali = best_n_ali; + n_ali8 = best_n_ali8; + seqM = best_seqM; + seqxA = best_seqxA; + seqyA = best_seqyA; + do_vec = best_do_vec; + tu_vec = best_tu_vec; + d0A = best_d0A; + d0B = best_d0B; + d0a = best_d0a; + d0u = best_d0u; + + for (int a = 0; a < 3; a++) + { + t0[a] = best_t0[a]; + for (int b = 0; b < 3; b++) + u0[a][b] = best_u0[a][b]; + } + + return tu_vec.size(); +} + #endif diff --git a/param_set.h b/param_set.h index 9300404..1cc4807 100644 --- a/param_set.h +++ b/param_set.h @@ -7,71 +7,89 @@ #include "basic_fun.h" void parameter_set4search(const int xlen, const int ylen, - double &D0_MIN, double &Lnorm, - double &score_d8, double &d0, double &d0_search, double &dcu0) + double &D0_MIN, double &Lnorm, + double &score_d8, double &d0, double &d0_search, double &dcu0) { - //parameter initialization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8 - D0_MIN=0.5; - dcu0=4.25; //update 3.85-->4.25 - - Lnorm=getmin(xlen, ylen); //normalize TMscore by this in searching - if (Lnorm<=19) //update 15-->19 - d0=0.168; //update 0.5-->0.168 - else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8); - D0_MIN=d0+0.8; //this should be moved to above - d0=D0_MIN; //update: best for search + // parameter initialization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8 + D0_MIN = 0.5; + dcu0 = 4.25; // update 3.85-->4.25 - d0_search=d0; - if (d0_search>8) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + Lnorm = getmin(xlen, ylen); // normalize TMscore by this in searching + if (Lnorm <= 19) // update 15-->19 + d0 = 0.168; // update 0.5-->0.168 + else + d0 = (1.24 * pow((Lnorm * 1.0 - 15), 1.0 / 3) - 1.8); + D0_MIN = d0 + 0.8; // this should be moved to above + d0 = D0_MIN; // update: best for search - score_d8=1.5*pow(Lnorm*1.0, 0.3)+3.5; //remove pairs with dis>d8 during search & final + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; + + score_d8 = 1.5 * pow(Lnorm * 1.0, 0.3) + 3.5; // remove pairs with dis>d8 during search & final } void parameter_set4final_C3prime(const double len, double &D0_MIN, - double &Lnorm, double &d0, double &d0_search) + double &Lnorm, double &d0, double &d0_search) { - D0_MIN=0.3; - - Lnorm=len; //normalize TMscore by this in searching - if(Lnorm<=11) d0=0.3; - else if(Lnorm>11&&Lnorm<=15) d0=0.4; - else if(Lnorm>15&&Lnorm<=19) d0=0.5; - else if(Lnorm>19&&Lnorm<=23) d0=0.6; - else if(Lnorm>23&&Lnorm<30) d0=0.7; - else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5); + D0_MIN = 0.3; + + Lnorm = len; // normalize TMscore by this in searching + if (Lnorm <= 11) + d0 = 0.3; + else if (Lnorm > 11 && Lnorm <= 15) + d0 = 0.4; + else if (Lnorm > 15 && Lnorm <= 19) + d0 = 0.5; + else if (Lnorm > 19 && Lnorm <= 23) + d0 = 0.6; + else if (Lnorm > 23 && Lnorm < 30) + d0 = 0.7; + else + d0 = (0.6 * pow((Lnorm * 1.0 - 0.5), 1.0 / 2) - 2.5); - d0_search=d0; - if (d0_search>8) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; } void parameter_set4final(const double len, double &D0_MIN, double &Lnorm, - double &d0, double &d0_search, const int mol_type) + double &d0, double &d0_search, const int mol_type) { - if (mol_type>0) // RNA + if (mol_type > 0) // RNA { parameter_set4final_C3prime(len, D0_MIN, Lnorm, - d0, d0_search); + d0, d0_search); return; } - D0_MIN=0.5; - - Lnorm=len; //normalize TMscore by this in searching - if (Lnorm<=21) d0=0.5; - else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8); - if (d08) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + D0_MIN = 0.5; + + Lnorm = len; // normalize TMscore by this in searching + if (Lnorm <= 21) + d0 = 0.5; + else + d0 = (1.24 * pow((Lnorm * 1.0 - 15), 1.0 / 3) - 1.8); + if (d0 < D0_MIN) + d0 = D0_MIN; + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; } void parameter_set4scale(const int len, const double d_s, double &Lnorm, - double &d0, double &d0_search) + double &d0, double &d0_search) { - d0=d_s; - Lnorm=len; //normalize TMscore by this in searching - d0_search=d0; - if (d0_search>8) d0_search=8; - if (d0_search<4.5) d0_search=4.5; + d0 = d_s; + Lnorm = len; // normalize TMscore by this in searching + d0_search = d0; + if (d0_search > 8) + d0_search = 8; + if (d0_search < 4.5) + d0_search = 4.5; }