diff --git a/source/source_hsolver/test/diago_cg_float_test.cpp b/source/source_hsolver/test/diago_cg_float_test.cpp index 32514c92e3..1b0de9b613 100644 --- a/source/source_hsolver/test/diago_cg_float_test.cpp +++ b/source/source_hsolver/test/diago_cg_float_test.cpp @@ -58,6 +58,35 @@ void lapackEigen(int &npw, std::vector> &hm, float *e, bool delete[] work2; } +// LAPACK reference for generalized eigenproblem when S is diagonal (complex) +void lapackGeneralEigen(int &npw, std::vector> &hm, const std::vector> &sdiag, float *e, bool outtime = false) +{ + // build transformed matrix tmp = S^{-1/2} H S^{-1/2} + std::vector> tmp(npw * npw); + for (int i = 0; i < npw; ++i) { + std::complex si = std::sqrt(sdiag[i]); + for (int j = 0; j < npw; ++j) { + std::complex sj = std::sqrt(sdiag[j]); + tmp[i * npw + j] = hm[i * npw + j] / (si * sj); + } + } + + // call cheev_ on transformed matrix + clock_t start = clock(), end; + int lwork = 2 * npw; + std::complex *work2 = new std::complex[lwork]; + float *rwork = new float[3 * npw - 2]; + int info = 0; + char tmp_c1 = 'V', tmp_c2 = 'U'; + cheev_(&tmp_c1, &tmp_c2, &npw, tmp.data(), &npw, e, work2, &lwork, rwork, &info); + end = clock(); + if (outtime) { + std::cout << "Lapack General Run time: " << (float)(end - start) / CLOCKS_PER_SEC << " S" << std::endl; + } + delete[] rwork; + delete[] work2; +} + class DiagoCGPrepare { public: @@ -85,8 +114,14 @@ class DiagoCGPrepare float *e_lapack = new float[npw]; auto ev = DIAGOTEST::hmatrix_f; - if(mypnum == 0) { lapackEigen(npw, ev, e_lapack, false); -} + if(mypnum == 0) { + if (DIAGOTEST::sdiag_f.empty()) { + lapackEigen(npw, ev, e_lapack); + } else { + auto hm_copy = ev; // operate on a copy + lapackGeneralEigen(npw, hm_copy, DIAGOTEST::sdiag_f, e_lapack); + } + } // initial guess of psi by perturbing lapack psi std::vector> psiguess(nband * npw); @@ -135,6 +170,7 @@ class DiagoCGPrepare precondition_local = new float[DIAGOTEST::npw]; for(int i=0;i> cg(precondition_local); psi_local.fix_k(0); // cg.diag(ha,psi_local,en); @@ -228,12 +264,33 @@ TEST_P(DiagoCGFloatTest, RandomHamilt) //std::cout<<"eps "<>::PW_DIAG_THR<> hpsi(dcp.nband, dcp.npw, dcp.sparsity); DIAGOTEST::hmatrix_f = hpsi.hamilt(); + DIAGOTEST::sdiag_f.clear(); // ensure sdiag is empty for this test DIAGOTEST::npw = dcp.npw; // ModuleBase::ComplexMatrix psi = hpsi.psi(); dcp.CompareEigen(hpsi.precond()); } +TEST_P(DiagoCGFloatTest, RandomHamiltAndS) +{ + DiagoCGPrepare dcp = GetParam(); + hsolver::DiagoIterAssist>::PW_DIAG_NMAX = dcp.maxiter; + hsolver::DiagoIterAssist>::PW_DIAG_THR = dcp.eps; + HPsi> hpsi(dcp.nband, dcp.npw, dcp.sparsity); + DIAGOTEST::hmatrix_f = hpsi.hamilt(); + + DIAGOTEST::npw = dcp.npw; + + DIAGOTEST::sdiag_f.resize(dcp.npw); + std::default_random_engine eng(123); + std::uniform_real_distribution ud(0.5f, 1.5f); + for (int i = 0; i < dcp.npw; ++i) { + DIAGOTEST::sdiag_f[i] = std::complex(ud(eng), 0.0f); + } + + dcp.CompareEigen(hpsi.precond()); +} + INSTANTIATE_TEST_SUITE_P(VerifyCG, DiagoCGFloatTest, ::testing::Values( diff --git a/source/source_hsolver/test/diago_cg_real_test.cpp b/source/source_hsolver/test/diago_cg_real_test.cpp index eacb1341eb..90c7bd440c 100644 --- a/source/source_hsolver/test/diago_cg_real_test.cpp +++ b/source/source_hsolver/test/diago_cg_real_test.cpp @@ -136,6 +136,7 @@ class DiagoCGPrepare precondition_local = new double[DIAGOTEST::npw]; for (int i = 0;i < DIAGOTEST::npw;i++) precondition_local[i] = precondition[i]; #endif + DIAGOTEST::sync_sdiag(DIAGOTEST::sdiag_d, DIAGOTEST::sdiag_local_d); // hsolver::DiagoCG cg(precondition_local); psi_local.fix_k(0); // double start, end; diff --git a/source/source_hsolver/test/diago_cg_test.cpp b/source/source_hsolver/test/diago_cg_test.cpp index bc373a4d80..a7d7ada928 100644 --- a/source/source_hsolver/test/diago_cg_test.cpp +++ b/source/source_hsolver/test/diago_cg_test.cpp @@ -58,6 +58,36 @@ void lapackEigen(int &npw, std::vector> &hm, double *e, boo delete[] work2; } +// LAPACK reference for generalized eigenproblem when S is diagonal (complex) +void lapackGeneralEigen(int &npw, std::vector> &hm, const std::vector> &sdiag, double *e, bool outtime = false) +{ + // build transformed matrix tmp = S^{-1/2} H S^{-1/2} + std::vector> tmp(npw * npw); + for (int i = 0; i < npw; ++i) { + std::complex si = std::sqrt(sdiag[i]); + for (int j = 0; j < npw; ++j) { + std::complex sj = std::sqrt(sdiag[j]); + tmp[i * npw + j] = hm[i * npw + j] / (si * sj); + } + } + + // call cheev_ on transformed matrix + clock_t start = clock(), end; + int lwork = 2 * npw; + std::complex *work2 = new std::complex[lwork]; + double *rwork = new double[3 * npw - 2]; + int info = 0; + char tmp_c1 = 'V', tmp_c2 = 'U'; + zheev_(&tmp_c1, &tmp_c2, &npw, tmp.data(), &npw, e, work2, &lwork, rwork, &info); + end = clock(); + if (outtime) { + std::cout << "Lapack General Run time: " << (double)(end - start) / CLOCKS_PER_SEC << " S" << std::endl; + } + delete[] rwork; + delete[] work2; +} + + class DiagoCGPrepare { public: @@ -84,8 +114,14 @@ class DiagoCGPrepare // calculate eigenvalues by LAPACK; double *e_lapack = new double[npw]; auto ev = DIAGOTEST::hmatrix; - if(mypnum == 0) { lapackEigen(npw, ev, e_lapack, false); -} + if(mypnum == 0) { + if (DIAGOTEST::sdiag.empty()) { + lapackEigen(npw, ev, e_lapack); + } else { + auto hm_copy = ev; // operate on a copy + lapackGeneralEigen(npw, hm_copy, DIAGOTEST::sdiag, e_lapack); + } + } // initial guess of psi by perturbing lapack psi ModuleBase::ComplexMatrix psiguess(nband, npw); std::default_random_engine p(1); @@ -131,6 +167,7 @@ class DiagoCGPrepare precondition_local = new double[DIAGOTEST::npw]; for(int i=0;i>::PW_DIAG_THR<> hpsi(dcp.nband, dcp.npw, dcp.sparsity); DIAGOTEST::hmatrix = hpsi.hamilt(); + DIAGOTEST::sdiag.clear(); // ensure sdiag is empty for this test DIAGOTEST::npw = dcp.npw; // ModuleBase::ComplexMatrix psi = hpsi.psi(); dcp.CompareEigen(hpsi.precond()); } +TEST_P(DiagoCGTest, RandomHamiltAndS) +{ + DiagoCGPrepare dcp = GetParam(); + hsolver::DiagoIterAssist>::PW_DIAG_NMAX = dcp.maxiter; + hsolver::DiagoIterAssist>::PW_DIAG_THR = dcp.eps; + HPsi> hpsi(dcp.nband, dcp.npw, dcp.sparsity); + DIAGOTEST::hmatrix = hpsi.hamilt(); + DIAGOTEST::sdiag.clear(); //ensure sdiag is empty before generating a new one + + DIAGOTEST::npw = dcp.npw; + + DIAGOTEST::sdiag.resize(dcp.npw); + std::default_random_engine eng(123); + std::uniform_real_distribution ud(0.5f, 1.5f); + for (int i = 0; i < dcp.npw; ++i) { + DIAGOTEST::sdiag[i] = std::complex(ud(eng), 0.0L); + } + + dcp.CompareEigen(hpsi.precond()); +} + + INSTANTIATE_TEST_SUITE_P(VerifyCG, DiagoCGTest, ::testing::Values( diff --git a/source/source_hsolver/test/diago_mock.h b/source/source_hsolver/test/diago_mock.h index 75cced8409..0628ece161 100644 --- a/source/source_hsolver/test/diago_mock.h +++ b/source/source_hsolver/test/diago_mock.h @@ -11,6 +11,15 @@ namespace DIAGOTEST std::vector> hmatrix_local; std::vector> hmatrix_f; std::vector> hmatrix_local_f; + + // diagonal representation of overlap (S) for simple mock of generalized eigenproblem + // if empty, sPsi will treat S as identity + std::vector sdiag_d; + std::vector sdiag_local_d; + std::vector > sdiag_f; + std::vector> sdiag_local_f; + std::vector> sdiag; + std::vector> sdiag_local; int h_nr; int h_nc; int npw; @@ -121,6 +130,31 @@ namespace DIAGOTEST template void divide_psi(double* psi, double* psi_local); template void divide_psi>(std::complex* psi, std::complex* psi_local); template void divide_psi>(std::complex* psi, std::complex* psi_local); + + template + void sync_sdiag(const std::vector& sdiag, std::vector& sdiag_local) + { + if (sdiag.empty()) + { + sdiag_local.clear(); + return; + } + + int nprocs = 1, mypnum = 0; +#ifdef __MPI + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &mypnum); +#endif + + sdiag_local.resize(npw_local[mypnum]); + divide_psi(const_cast(sdiag.data()), sdiag_local.data()); + } + + template void sync_sdiag(const std::vector& sdiag, std::vector& sdiag_local); + template void sync_sdiag>(const std::vector>& sdiag, + std::vector>& sdiag_local); + template void sync_sdiag>(const std::vector>& sdiag, + std::vector>& sdiag_local); #ifdef __MPI void cal_division(int &npw) { @@ -409,11 +443,21 @@ void hamilt::HamiltPW::sPsi(const double* psi_i const int npw, const int nbands) const { - for (size_t i = 0; i < static_cast(nbands * nrow); i++) - { - spsi[i] = psi_in[i]; + std::vector& sdiag_local +#ifdef __MPI + = DIAGOTEST::sdiag_local_d; +#else + = DIAGOTEST::sdiag_d; +#endif + if (sdiag_local.size() < static_cast(nrow)) { + sdiag_local.assign(nrow, 1.0); + } + for (int v = 0; v < nbands; ++v) { + for (int i = 0; i < nrow; ++i) { + size_t idx = static_cast(v) * nrow + i; + spsi[idx] = psi_in[idx] * sdiag_local[i]; + } } - return; } template <> void hamilt::HamiltPW, base_device::DEVICE_CPU>::sPsi(const std::complex* psi_in, @@ -422,11 +466,21 @@ void hamilt::HamiltPW, base_device::DEVICE_CPU>::sPsi(const const int npw, const int nbands) const { - for (size_t i = 0; i < static_cast(nbands * nrow); i++) - { - spsi[i] = psi_in[i]; + std::vector>& sdiag_local +#ifdef __MPI + = DIAGOTEST::sdiag_local; +#else + = DIAGOTEST::sdiag; +#endif + if (sdiag_local.size() < static_cast(nrow)) { + sdiag_local.assign(nrow, std::complex(1.0, 0.0)); + } + for (int v = 0; v < nbands; ++v) { + for (int i = 0; i < nrow; ++i) { + size_t idx = static_cast(v) * nrow + i; + spsi[idx] = psi_in[idx] * sdiag_local[i]; + } } - return; } template <> void hamilt::HamiltPW, base_device::DEVICE_CPU>::sPsi(const std::complex* psi_in, @@ -435,11 +489,21 @@ void hamilt::HamiltPW, base_device::DEVICE_CPU>::sPsi(const const int npw, const int nbands) const { - for (size_t i = 0; i < static_cast(nbands * nrow); i++) - { - spsi[i] = psi_in[i]; + std::vector>& sdiag_local +#ifdef __MPI + = DIAGOTEST::sdiag_local_f; +#else + = DIAGOTEST::sdiag_f; +#endif + if (sdiag_local.size() < static_cast(nrow)) { + sdiag_local.assign(nrow, std::complex(1.0f, 0.0f)); + } + for (int v = 0; v < nbands; ++v) { + for (int i = 0; i < nrow; ++i) { + size_t idx = static_cast(v) * nrow + i; + spsi[idx] = psi_in[idx] * sdiag_local[i]; + } } - return; } //Mock function h_psi