From 9536fcfd5b50a8b913fddfa5cb393962ad206c81 Mon Sep 17 00:00:00 2001 From: dyzheng Date: Sat, 30 May 2026 16:18:24 +0800 Subject: [PATCH] feat: add PPCG eigenvalue solver implementation --- source/source_hsolver/CMakeLists.txt | 1 + source/source_hsolver/diago_ppcg.cpp | 622 ++++++++++++++++++ source/source_hsolver/diago_ppcg.h | 179 +++++ source/source_hsolver/hsolver_pw.cpp | 36 +- source/source_hsolver/test/CMakeLists.txt | 8 + .../source_hsolver/test/diago_ppcg_test.cpp | 238 +++++++ .../read_input_item_elec_stru.cpp | 2 +- 7 files changed, 1084 insertions(+), 2 deletions(-) create mode 100644 source/source_hsolver/diago_ppcg.cpp create mode 100644 source/source_hsolver/diago_ppcg.h create mode 100644 source/source_hsolver/test/diago_ppcg_test.cpp diff --git a/source/source_hsolver/CMakeLists.txt b/source/source_hsolver/CMakeLists.txt index b115d6d4cd2..95f7e23e230 100644 --- a/source/source_hsolver/CMakeLists.txt +++ b/source/source_hsolver/CMakeLists.txt @@ -4,6 +4,7 @@ list(APPEND objects diago_david.cpp diago_dav_subspace.cpp diago_bpcg.cpp + diago_ppcg.cpp para_linear_transform.cpp hsolver_pw.cpp hsolver_lcaopw.cpp diff --git a/source/source_hsolver/diago_ppcg.cpp b/source/source_hsolver/diago_ppcg.cpp new file mode 100644 index 00000000000..54336200629 --- /dev/null +++ b/source/source_hsolver/diago_ppcg.cpp @@ -0,0 +1,622 @@ +#include "source_hsolver/diago_ppcg.h" + +#include "diago_iter_assist.h" +#include "source_base/constants.h" +#include "source_base/global_function.h" +#include "source_base/memory_recorder.h" +#include "source_base/parallel_reduce.h" +#include "source_base/timer.h" +#include "source_base/tool_title.h" + +#include +#include +#include +#include +#include +#include + +namespace hsolver { + +template +DiagoPPCG::DiagoPPCG(const std::string& basis_type, const std::string& calculation) +{ + basis_type_ = basis_type; + calculation_ = calculation; + this->one_ = new T(static_cast(1.0)); + this->zero_ = new T(static_cast(0.0)); + this->neg_one_ = new T(static_cast(-1.0)); +} + +template +DiagoPPCG::DiagoPPCG(const std::string& basis_type, + const std::string& calculation, + const bool& need_subspace, + const SubspaceFunc& subspace_func, + const Real& pw_diag_thr, + const int& pw_diag_nmax, + const int& nproc_in_pool) +{ + basis_type_ = basis_type; + calculation_ = calculation; + need_subspace_ = need_subspace; + subspace_func_ = subspace_func; + pw_diag_thr_ = pw_diag_thr; + pw_diag_nmax_ = pw_diag_nmax; + nproc_in_pool_ = nproc_in_pool; + this->one_ = new T(static_cast(1.0)); + this->zero_ = new T(static_cast(0.0)); + this->neg_one_ = new T(static_cast(-1.0)); +} + +template +DiagoPPCG::~DiagoPPCG() +{ + delete this->one_; + delete this->zero_; + delete this->neg_one_; +} + +template +void DiagoPPCG::compute_gradient(const ct::Tensor& prec, + const ct::Tensor& hpsi, + const ct::Tensor& spsi, + const Real& eigenvalue, + ct::Tensor& grad, + ct::Tensor& ppsi) +{ + // Following ABACUS DiagoCG::calc_grad pattern: + // grad = P * H|ψ> + ModuleBase::vector_div_vector_op()(this->n_basis_, grad.data(), hpsi.data(), prec.data()); + // ppsi = P * S|ψ> + ModuleBase::vector_div_vector_op()(this->n_basis_, ppsi.data(), spsi.data(), prec.data()); + + // λ_c = / + Real eh = ModuleBase::dot_real_op()(this->n_basis_, spsi.data(), grad.data()); + Real es = ModuleBase::dot_real_op()(this->n_basis_, spsi.data(), ppsi.data()); + + // g = P*H|ψ> - λ_c * P*S|ψ> (gradient orthogonal to S|ψ>) + Real lambda_c = (es > std::numeric_limits::epsilon()) ? eh / es : 0.0; + ModuleBase::vector_add_vector_op()(this->n_basis_, + grad.data(), + grad.data(), + 1.0, + ppsi.data(), + (-lambda_c)); +} + +template +void DiagoPPCG::orth_gradient(const ct::Tensor& psi, + const int& m, + ct::Tensor& grad, + ct::Tensor& s_grad, + ct::Tensor& lagrange) +{ + // s_grad = S|grad> + this->spsi_func_(grad.data(), s_grad.data(), this->n_basis_, 1); + + // lagrange[i] = <ψ_i|S|grad> for i = 0..m-1 + ModuleBase::gemv_op()('C', + this->n_basis_, + m, + this->one_, + psi.data(), + this->ld_psi_, + s_grad.data(), + 1, + this->zero_, + lagrange.data(), + 1); + + // All-reduce over MPI pools + Parallel_Reduce::reduce_pool(lagrange.data(), m); + + // |grad> -= Σ ψ_i * lagrange_i + ModuleBase::gemv_op()('N', + this->n_basis_, + m, + this->neg_one_, + psi.data(), + this->ld_psi_, + lagrange.data(), + 1, + this->one_, + grad.data(), + 1); + + // |s_grad> -= Σ ψ_i * lagrange_i + ModuleBase::gemv_op()('N', + this->n_basis_, + m, + this->neg_one_, + psi.data(), + this->ld_psi_, + lagrange.data(), + 1, + this->one_, + s_grad.data(), + 1); +} + +template +typename DiagoPPCG::Real DiagoPPCG::update_cg_direction( + ct::Tensor& cg, + ct::Tensor& scg, + const ct::Tensor& grad, + const ct::Tensor& s_grad, + const ct::Tensor& prec, + ct::Tensor& g0, + Real& gg_last, + const int& iter) +{ + // Following ABACUS DiagoCG::calc_gamma_cg pattern: + // g0 is persistent between iterations + // First compute gg_inter using g0 from previous iteration + Real gg_inter = 0.0; + if (iter > 0) + { + gg_inter = ModuleBase::dot_real_op()(this->n_basis_, grad.data(), g0.data()); + } + + // g0 = P * S|grad> = prec * scg (element-wise multiply) + for (int i = 0; i < this->n_basis_; i++) + { + g0.data()[i] = s_grad.data()[i] * prec.data()[i]; + } + + const Real gg_now = ModuleBase::dot_real_op()(this->n_basis_, grad.data(), g0.data()); + + Real gamma = 0.0; + if (iter == 0) + { + gg_last = gg_now; + // cg = grad + cg.sync(grad); + scg.sync(s_grad); + } + else + { + // Polak-Ribiere: gamma = (gg_now - gg_inter) / gg_last + if (gg_last > std::numeric_limits::epsilon()) + { + gamma = (gg_now - gg_inter) / gg_last; + if (gamma < 0.0) gamma = 0.0; // Safety: restart if negative + } + gg_last = gg_now; + + // cg = gamma * cg + grad + ModuleBase::vector_add_vector_op()(this->n_basis_, + cg.data(), + cg.data(), + gamma, + grad.data(), + 1.0); + // scg = gamma * scg + s_grad + ModuleBase::vector_add_vector_op()(this->n_basis_, + scg.data(), + scg.data(), + gamma, + s_grad.data(), + 1.0); + } + return gamma; +} + +template +bool DiagoPPCG::line_minimization(const ct::Tensor& ppsi, + const ct::Tensor& cg, + const ct::Tensor& scg, + const double& ethreshold, + Real& cg_norm, + Real& theta, + Real& eigenvalue, + ct::Tensor& psi_m, + ct::Tensor& spsi_m, + ct::Tensor& hpsi_m) +{ + // Compute for normalization + cg_norm = std::sqrt(ModuleBase::dot_real_op()(this->n_basis_, cg.data(), scg.data())); + + if (cg_norm < 1.0e-12) + { + return true; + } + + // Compute Rayleigh-Ritz coefficients in the 2D subspace [ψ, cg] + // a0 = 2 * <ψ|H|cg> / ||cg||_S + // b0 = / ||cg||_S^2 + // e0 = <ψ|H|ψ> (current eigenvalue) + const Real a0 = ModuleBase::dot_real_op()(this->n_basis_, psi_m.data(), ppsi.data()) * 2.0 / cg_norm; + const Real b0 = ModuleBase::dot_real_op()(this->n_basis_, cg.data(), ppsi.data()) / (cg_norm * cg_norm); + + const Real e0 = eigenvalue; + // Use atan matching ABACUS CG exactly: theta = atan(a0 / (e0 - b0)) / 2 + const Real denom = e0 - b0; + theta = (std::abs(denom) > std::numeric_limits::epsilon()) + ? std::atan(a0 / denom) / 2.0 + : 0.0; + + const Real new_e = (e0 - b0) * std::cos(2.0 * theta) + a0 * std::sin(2.0 * theta); + const Real e1 = (e0 + b0 + new_e) / 2.0; + const Real e2 = (e0 + b0 - new_e) / 2.0; + + // Select the smaller eigenvalue + if (e1 > e2) + { + theta += ModuleBase::PI_HALF; + } + eigenvalue = std::min(e1, e2); + + const Real cost = std::cos(theta); + const Real sint_norm = std::sin(theta) / cg_norm; + + // ψ_new = ψ * cos(θ) + cg * sin(θ)/||cg|| + ModuleBase::vector_add_vector_op()(this->n_basis_, + psi_m.data(), + psi_m.data(), + cost, + cg.data(), + sint_norm); + + if (std::abs(eigenvalue - e0) < ethreshold) + { + return true; + } + else + { + // Update S|ψ> and H|ψ> for the next iteration + ModuleBase::vector_add_vector_op()(this->n_basis_, + spsi_m.data(), + spsi_m.data(), + cost, + scg.data(), + sint_norm); + ModuleBase::vector_add_vector_op()(this->n_basis_, + hpsi_m.data(), + hpsi_m.data(), + cost, + ppsi.data(), + sint_norm); + return false; + } +} + +template +void DiagoPPCG::schmidt_orth(const int& m, + const ct::Tensor& psi, + const ct::Tensor& spsi_m, + ct::Tensor& psi_m) +{ + REQUIRES_OK(m >= 0, "DiagoPPCG::schmidt_orth: m < 0"); + REQUIRES_OK(this->n_band_ >= m, "DiagoPPCG::schmidt_orth: n_band < m"); + + ct::Tensor lagrange(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {m + 1}); + + // lagrange[i] = <ψ_i|S|ψ_m> for i = 0..m + ModuleBase::gemv_op()('C', + this->n_basis_, + m + 1, + this->one_, + psi.data(), + this->ld_psi_, + spsi_m.data(), + 1, + this->zero_, + lagrange.data(), + 1); + + Parallel_Reduce::reduce_pool(lagrange.data(), m + 1); + + // Gram-Schmidt: ψ_m -= Σ ψ_i * lagrange_i (i = 0..m-1) + ModuleBase::gemv_op()('N', + this->n_basis_, + m, + this->neg_one_, + psi.data(), + this->ld_psi_, + lagrange.data(), + 1, + this->one_, + psi_m.data(), + 1); + + // Compute new norm and renormalize + Real psi_norm = ModuleBase::dot_real_op()(this->n_basis_, psi_m.data(), psi_m.data()); + if (psi_norm <= 0.0) + { + std::cout << " DiagoPPGC: psi_norm <= 0.0, m = " << m << std::endl; + ModuleBase::WARNING_QUIT("schmidt_orth", "psi_norm <= 0.0"); + } + psi_norm = std::sqrt(psi_norm); + + // Normalize + ModuleBase::vector_mul_real_op()(this->n_basis_, psi_m.data(), psi_m.data(), Real(1.0 / psi_norm)); +} + +template +bool DiagoPPCG::test_exit_cond(const int& ntry, const int& notconv) const +{ + const bool scf = calculation_ != "nscf"; + const bool f1 = ntry <= 5; + const bool f2 = !scf && notconv > 0; + const bool f3 = scf && notconv > 5; + return f1 && (f2 || f3); +} + +template +double DiagoPPCG::diag(const HPsiFunc& hpsi_func, + const SPsiFunc& spsi_func, + const int ld_psi, + const int nband, + const int dim, + T* psi_in, + Real* eigenvalue_in, + const std::vector& ethr_band, + const Real* prec) +{ + ModuleBase::TITLE("DiagoPPCG", "diag"); + ModuleBase::timer::start("DiagoPPCG", "diag"); + + REQUIRES_OK(ld_psi >= dim, "DiagoPPCG::diag: ld_psi must be >= dim"); + REQUIRES_OK(static_cast(ethr_band.size()) >= nband, + "DiagoPPCG::diag: ethr_band size must be >= nband"); + + // Set up tensor views + auto psi = ct::TensorMap(psi_in, + ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + ct::TensorShape({nband, ld_psi})); + auto eigen = ct::TensorMap(eigenvalue_in, + ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + ct::TensorShape({nband})); + + // Store parameters + this->n_band_ = nband; + this->n_basis_ = dim; + this->ld_psi_ = ld_psi; + this->notconv_ = 0; + + this->hpsi_func_ = hpsi_func; + this->spsi_func_ = spsi_func; + + // Get convergence parameters from DiagoIterAssist + this->pw_diag_nmax_ = DiagoIterAssist::PW_DIAG_NMAX; + this->pw_diag_thr_ = DiagoIterAssist::PW_DIAG_THR; + + // Allocate working arrays + // psi_m: current band vector + auto psi_m = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // hpsi_m: H|psi_m> + auto hpsi_m = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // spsi_m: S|psi_m> + auto spsi_m = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // ppsi: preconditioned workspace / H|cg> + auto ppsi = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // gradient + auto grad = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // s_grad: S|grad> + auto s_grad = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // cg: CG direction + auto cg = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // scg: S|cg> + auto scg = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + // lagrange: for orthogonalization + auto lagrange = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_band_}); + + // Set up preconditioner tensor + ct::Tensor prec_tensor; + if (prec != nullptr) + { + prec_tensor = ct::TensorMap(const_cast(prec), + ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + ct::TensorShape({dim})) + .template to_device(); + } + else + { + prec_tensor = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + prec_tensor.set_value(static_cast(1.0)); + } + + ModuleBase::Memory::record("DiagoPPCG", this->n_basis_ * 12 * sizeof(T)); + + // Outer retry loop matching ABACUS CG pattern + int ntry = 0; + this->notconv_ = 0; + int avg = 0; + eigen.zero(); + auto eigen_pack = eigen.accessor(); + + // Slice of psi for subspace function + auto psi_temp = psi.slice({0, 0}, {nband, dim}); + + do + { + // Reset for this try + this->notconv_ = 0; + avg = 0; + + // Subspace diagonalization to improve initial guess (matching ABACUS CG) + if (ntry > 0 && subspace_func_) + { + ct::TensorMap psi_map(psi.data(), + ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + ct::TensorShape({nband, dim})); + const bool assume_S_orthogonal = true; + this->subspace_func_(psi_temp.data(), + psi_map.data(), + dim, + nband, + assume_S_orthogonal); + psi_temp.sync(psi_map); + } + else if (ntry == 0 && need_subspace_ && subspace_func_) + { + ct::TensorMap psi_map(psi.data(), + ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + ct::TensorShape({nband, dim})); + const bool assume_S_orthogonal = false; + this->subspace_func_(psi_temp.data(), + psi_map.data(), + dim, + nband, + assume_S_orthogonal); + psi_temp.sync(psi_map); + } + + // Main band-by-band loop + for (int m = 0; m < this->n_band_; m++) + { + // Copy initial guess + psi_m.sync(psi[m]); + + // Compute S|psi_m> + this->spsi_func_(psi_m.data(), spsi_m.data(), this->n_basis_, 1); + + // Schmidt orthogonalize against lower bands + this->schmidt_orth(m, psi, spsi_m, psi_m); + + // Recompute S|psi_m> after orthogonalization + this->spsi_func_(psi_m.data(), spsi_m.data(), this->n_basis_, 1); + + // Compute H|psi_m> + this->hpsi_func_(psi_m.data(), hpsi_m.data(), this->n_basis_, 1); + + // Initial eigenvalue estimate: λ = <ψ|H|ψ> + eigen_pack[m] = ModuleBase::dot_real_op()(this->n_basis_, psi_m.data(), hpsi_m.data()); + + int iter = 0; + Real gg_last = 0.0; + Real cg_norm = 0.0; + Real theta = 0.0; + bool converged = false; + // g0 = P * S|grad> for CG update (persistent between iterations) + auto g0_tensor = ct::Tensor(ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {this->n_basis_}); + + do + { + // Compute preconditioned gradient (ABACUS calc_grad) + this->compute_gradient(prec_tensor, hpsi_m, spsi_m, eigen_pack[m], grad, ppsi); + + // Orthogonalize gradient against lower bands (ABACUS orth_grad) + this->orth_gradient(psi, m, grad, s_grad, lagrange); + + // Update CG direction with persistent g0 and Polak-Ribiere (ABACUS calc_gamma_cg) + Real gamma = this->update_cg_direction(cg, scg, grad, s_grad, prec_tensor, g0_tensor, gg_last, iter); + + // ABACUS CG correction: cg -= gamma * cg_norm * sin(theta) * psi_m + if (iter > 0 && std::abs(gamma) > std::numeric_limits::epsilon()) + { + const Real norma = gamma * cg_norm * std::sin(theta); + if (std::abs(norma) > std::numeric_limits::epsilon()) + { + T znorma = static_cast(-norma); + ModuleBase::axpy_op()(this->n_basis_, &znorma, psi_m.data(), 1, cg.data(), 1); + ModuleBase::axpy_op()(this->n_basis_, &znorma, spsi_m.data(), 1, scg.data(), 1); + } + } + + // Compute H|cg> and S|cg> + this->hpsi_func_(cg.data(), ppsi.data(), this->n_basis_, 1); + this->spsi_func_(cg.data(), scg.data(), this->n_basis_, 1); + + // Line minimization in [ψ, cg] subspace (ABACUS update_psi) + converged = this->line_minimization(ppsi, + cg, + scg, + ethr_band[m], + cg_norm, + theta, + eigen_pack[m], + psi_m, + spsi_m, + hpsi_m); + + iter++; + } while (!converged && iter < this->pw_diag_nmax_); + + // Store converged eigenvector + psi[m].sync(psi_m); + + if (!converged) + { + ++this->notconv_; + } + + avg += iter; + + // Reorder eigenvalues if necessary + if (m > 0) + { + ModuleBase::GlobalFunc::NOTE("reorder bands!"); + if (eigen_pack[m] - eigen_pack[m - 1] < -2.0 * this->pw_diag_thr_) + { + int ii = 0; + for (ii = m - 2; ii >= 0; ii--) + { + if (eigen_pack[m] - eigen_pack[ii] > 2.0 * this->pw_diag_thr_) + { + break; + } + } + ii++; + Real e0 = eigen_pack[m]; + ppsi.sync(psi[m]); + + for (int jj = m; jj >= ii + 1; jj--) + { + eigen_pack[jj] = eigen_pack[jj - 1]; + psi[jj].sync(psi[jj - 1]); + } + eigen_pack[ii] = e0; + psi[ii].sync(ppsi); + } + } + } + + avg = (this->n_band_ > 0) ? avg / this->n_band_ : 0; + avg_iter_ += avg; + ntry++; + + } while (this->test_exit_cond(ntry, this->notconv_)); + + ModuleBase::timer::end("DiagoPPCG", "diag"); + return avg; +} + +// Template instantiations +template class DiagoPPCG, base_device::DEVICE_CPU>; +template class DiagoPPCG, base_device::DEVICE_CPU>; +#if ((defined __CUDA) || (defined __ROCM)) +template class DiagoPPCG, base_device::DEVICE_GPU>; +template class DiagoPPCG, base_device::DEVICE_GPU>; +#endif + +} // namespace hsolver diff --git a/source/source_hsolver/diago_ppcg.h b/source/source_hsolver/diago_ppcg.h new file mode 100644 index 00000000000..71e84403702 --- /dev/null +++ b/source/source_hsolver/diago_ppcg.h @@ -0,0 +1,179 @@ +#ifndef DIAGO_PPCG_H_ +#define DIAGO_PPCG_H_ + +#include "source_base/kernels/math_kernel_op.h" +#include "source_base/module_device/memory_op.h" +#include "source_base/module_device/types.h" +#include "source_base/parallel_reduce.h" + +#include +#include +#include +#include +#include +#include + +namespace hsolver { + +/** + * @class DiagoPPCG + * @brief Projected Preconditioned Conjugate Gradient method for eigenvalue problems. + * + * PPCG solves the generalized eigenvalue problem H*psi = lambda*psi + * using a preconditioned conjugate gradient approach with explicit + * projection to maintain orthogonality against converged eigenvectors. + * + * @tparam T The floating-point type (float, double, complex, complex) + * @tparam Device The device type (CPU or GPU) + */ +template , typename Device = base_device::DEVICE_CPU> +class DiagoPPCG +{ + private: + using Real = typename GetTypeReal::type; + using ct_Device = typename ct::PsiToContainer::type; + + public: + using HPsiFunc = std::function; + using SPsiFunc = std::function; + using SubspaceFunc = std::function; + + /** + * @brief Constructor. + * @param basis_type The basis type ("pw" or "lcao") + * @param calculation The calculation type + */ + DiagoPPCG(const std::string& basis_type, const std::string& calculation); + + /** + * @brief Constructor with subspace function support. + * @param basis_type The basis type ("pw" or "lcao") + * @param calculation The calculation type + * @param need_subspace Whether to use subspace diagonalization + * @param subspace_func Subspace diagonalization function + * @param pw_diag_thr Convergence threshold + * @param pw_diag_nmax Max iterations + * @param nproc_in_pool Number of processors in pool + */ + DiagoPPCG(const std::string& basis_type, + const std::string& calculation, + const bool& need_subspace, + const SubspaceFunc& subspace_func, + const Real& pw_diag_thr, + const int& pw_diag_nmax, + const int& nproc_in_pool); + + /// Destructor + ~DiagoPPCG(); + + /** + * @brief Solve eigenvalue problem using PPCG method. + * + * @param hpsi_func Function to compute H|psi> + * @param spsi_func Function to compute S|psi> (identity for norm-conserving) + * @param ld_psi Leading dimension of psi matrix + * @param nband Number of bands + * @param dim Dimension of basis + * @param psi_in Input/output wavefunction matrix [dim: nband x ld_psi] + * @param eigenvalue_in Output eigenvalues [dim: nband] + * @param ethr_band Convergence threshold per band + * @param prec Preconditioner array [dim: dim] + * @return Average number of iterations + */ + double diag(const HPsiFunc& hpsi_func, + const SPsiFunc& spsi_func, + const int ld_psi, + const int nband, + const int dim, + T* psi_in, + Real* eigenvalue_in, + const std::vector& ethr_band, + const Real* prec = nullptr); + + private: + Device* ctx_ = {}; + int n_band_ = 0; + int n_basis_ = 0; + int ld_psi_ = 0; + + double avg_iter_ = 0.0; + int notconv_ = 0; + int pw_diag_nmax_ = 0; + Real pw_diag_thr_ = 1e-5; + + std::string basis_type_; + std::string calculation_; + + HPsiFunc hpsi_func_ = nullptr; + SPsiFunc spsi_func_ = nullptr; + SubspaceFunc subspace_func_ = nullptr; + + bool need_subspace_ = false; + int nproc_in_pool_ = 0; + + /// Constants + const T* one_ = nullptr; + const T* zero_ = nullptr; + const T* neg_one_ = nullptr; + + /** + * @brief Project gradient to be orthogonal to all lower states. + */ + void orth_gradient(const ct::Tensor& psi, + const int& m, + ct::Tensor& grad, + ct::Tensor& s_grad, + ct::Tensor& lagrange); + + /** + * @brief Compute gradient: g = H|ψ> - λ*S|ψ>, precondition and project. + */ + void compute_gradient(const ct::Tensor& prec, + const ct::Tensor& hpsi, + const ct::Tensor& spsi, + const Real& eigenvalue, + ct::Tensor& grad, + ct::Tensor& ppsi); + + /** + * @brief Update CG direction using Polak-Ribiere formula. + * @return gamma coefficient for correction term + */ + Real update_cg_direction(ct::Tensor& cg, + ct::Tensor& scg, + const ct::Tensor& grad, + const ct::Tensor& s_grad, + const ct::Tensor& prec, + ct::Tensor& g0, + Real& gg_last, + const int& iter); + + /** + * @brief Perform line minimization in [ψ, cg] subspace and update ψ. + * @return true if converged + */ + bool line_minimization(const ct::Tensor& ppsi, + const ct::Tensor& cg, + const ct::Tensor& scg, + const double& ethreshold, + Real& cg_norm, + Real& theta, + Real& eigenvalue, + ct::Tensor& psi_m, + ct::Tensor& spsi_m, + ct::Tensor& hpsi_m); + + /** + * @brief Schmidt orthogonalize ψ_m against previously converged states. + */ + void schmidt_orth(const int& m, + const ct::Tensor& psi, + const ct::Tensor& spsi_m, + ct::Tensor& psi_m); + + bool test_exit_cond(const int& ntry, const int& notconv) const; +}; + +} // namespace hsolver + +#endif // DIAGO_PPCG_H_ diff --git a/source/source_hsolver/hsolver_pw.cpp b/source/source_hsolver/hsolver_pw.cpp index b88bc3b90dd..363ac0ec006 100644 --- a/source/source_hsolver/hsolver_pw.cpp +++ b/source/source_hsolver/hsolver_pw.cpp @@ -8,6 +8,7 @@ #include "source_hamilt/hamilt.h" #include "source_hsolver/diag_comm_info.h" #include "source_hsolver/diago_bpcg.h" +#include "source_hsolver/diago_ppcg.h" #include "source_hsolver/diago_cg.h" #include "source_hsolver/diago_dav_subspace.h" #include "source_hsolver/diago_david.h" @@ -83,7 +84,7 @@ void HSolverPW::solve(hamilt::Hamilt* pHamilt, this->nproc_in_pool = nproc_in_pool_in; // report if the specified diagonalization method is not supported - const std::initializer_list _methods = {"cg", "dav", "dav_subspace", "bpcg"}; + const std::initializer_list _methods = {"cg", "dav", "dav_subspace", "bpcg", "ppcg"}; if (std::find(std::begin(_methods), std::end(_methods), this->method) == std::end(_methods)) { ModuleBase::WARNING_QUIT("HSolverPW::solve", "This type of eigensolver is not supported!"); @@ -323,6 +324,39 @@ void HSolverPW::hamiltSolvePsiK(hamilt::Hamilt* hm, bpcg.init_iter(PARAM.inp.nbands, nband_l, nbasis, ndim); bpcg.diag(hpsi_func, psi.get_pointer(), eigenvalue, this->ethr_band); } + else if (this->method == "ppcg") + { + // wrap the subspace_func into a lambda function (matching CG pattern) + auto subspace_func = [hm, cur_nbasis](T* psi_in, + T* psi_out, + const int ld_psi, + const int nband, + const bool S_orth) { + auto psi_in_wrapper = psi::Psi(psi_in, 1, nband, ld_psi, cur_nbasis); + auto psi_out_wrapper = psi::Psi(psi_out, 1, nband, ld_psi, cur_nbasis); + std::vector eigen(nband, 0.0); + DiagoIterAssist::diag_subspace(hm, psi_in_wrapper, psi_out_wrapper, eigen.data()); + }; + DiagoPPCG ppcg(this->basis_type, + this->calculation_type, + this->need_subspace, + subspace_func, + this->diag_thr, + this->diag_iter_max, + this->nproc_in_pool); + + DiagoIterAssist::avg_iter += static_cast( + ppcg.diag(hpsi_func, + spsi_func, + psi.get_nbasis(), + psi.get_nbands(), + psi.get_current_ngk(), + psi.get_pointer(), + eigenvalue, + this->ethr_band, + pre_condition.data()) + ); + } else if (this->method == "dav_subspace") { bool scf = this->calculation_type == "nscf" ? false : true; diff --git a/source/source_hsolver/test/CMakeLists.txt b/source/source_hsolver/test/CMakeLists.txt index 1b1529adb4a..843041cf7c1 100644 --- a/source/source_hsolver/test/CMakeLists.txt +++ b/source/source_hsolver/test/CMakeLists.txt @@ -16,6 +16,14 @@ if (ENABLE_MPI) ../../source_hamilt/operator.cpp ../../source_pw/module_pwdft/op_pw.cpp ) + AddTest( + TARGET MODULE_HSOLVER_ppcg + LIBS parameter ${math_libs} base psi device container + SOURCES diago_ppcg_test.cpp ../diago_ppcg.cpp ../diago_iter_assist.cpp ../diag_const_nums.cpp + ../../source_basis/module_pw/test/test_tool.cpp + ../../source_hamilt/operator.cpp + ../../source_pw/module_pwdft/op_pw.cpp + ) AddTest( TARGET MODULE_HSOLVER_cg LIBS parameter ${math_libs} base psi device container diff --git a/source/source_hsolver/test/diago_ppcg_test.cpp b/source/source_hsolver/test/diago_ppcg_test.cpp new file mode 100644 index 00000000000..361f87dd55e --- /dev/null +++ b/source/source_hsolver/test/diago_ppcg_test.cpp @@ -0,0 +1,238 @@ +#include "source_base/inverse_matrix.h" +#include "source_base/module_external/lapack_connector.h" +#include "source_hamilt/hamilt.h" +#include "source_psi/psi.h" +#include "../diago_iter_assist.h" +#include "../diago_ppcg.h" +#include "diago_mock.h" + +#include +#include +#include + +/************************************************ + * unit test of functions in DiagoPPCG + ***********************************************/ + +/** + * Test the PPCG (Projected Preconditioned Conjugate Gradient) method + * for eigenvalue problems. + * + * The test generates a random Hermitian matrix, computes eigenvalues + * using LAPACK as reference, and compares with PPCG results. + */ + +// call lapack to get reference eigenvalues +void lapackEigen(int &npw, std::vector> &hm, double *e) +{ + int lwork = 2 * npw; + std::complex *work2 = new std::complex[lwork]; + double *rwork = new double[3 * npw - 2]; + int info = 0; + char tmp_c1 = 'V', tmp_c2 = 'U'; + zheev_(&tmp_c1, &tmp_c2, &npw, hm.data(), &npw, e, work2, &lwork, rwork, &info); + delete[] rwork; + delete[] work2; +} + +class DiagoPPCGPrepare +{ + public: + DiagoPPCGPrepare(int nband, int npw, int sparsity, double eps, int maxiter, double threshold) + : nband(nband), npw(npw), sparsity(sparsity), eps(eps), maxiter(maxiter), threshold(threshold) + { +#ifdef __MPI + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &mypnum); +#endif + } + + int nband, npw, sparsity, maxiter; + double eps, avg_iter; + double threshold; + int nprocs = 1, mypnum = 0; + + void CompareEigen(double *precondition) + { + // Calculate eigenvalues by LAPACK + double *e_lapack = new double[npw]; + auto ev = DIAGOTEST::hmatrix; + if (mypnum == 0) + { + lapackEigen(npw, ev, e_lapack); + } + + // Initial guess of psi by perturbing lapack psi + ModuleBase::ComplexMatrix psiguess(nband, npw); + std::default_random_engine p(1); + std::uniform_int_distribution u(1, 10); + for (int i = 0; i < nband; i++) + { + for (int j = 0; j < npw; j++) + { + double rand = static_cast(u(p)) / 10.; + psiguess(i, j) = ev[j * DIAGOTEST::h_nc + i] * rand; + } + } + + double *en = new double[npw]; + hamilt::Hamilt> *ha = new hamilt::HamiltPW>( + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); + + psi::Psi> psi; + psi.resize(1, nband, npw); + for (int i = 0; i < nband; i++) + { + for (int j = 0; j < npw; j++) + { + psi(i, j) = psiguess(i, j); + } + } + + psi::Psi> psi_local; + double *precondition_local; + DIAGOTEST::npw_local = new int[nprocs]; +#ifdef __MPI + DIAGOTEST::cal_division(DIAGOTEST::npw); + DIAGOTEST::divide_hpsi(psi, psi_local, DIAGOTEST::hmatrix, DIAGOTEST::hmatrix_local); + precondition_local = new double[DIAGOTEST::npw_local[mypnum]]; + DIAGOTEST::divide_psi(precondition, precondition_local); +#else + DIAGOTEST::hmatrix_local = DIAGOTEST::hmatrix; + DIAGOTEST::npw_local[0] = DIAGOTEST::npw; + psi_local = psi; + precondition_local = new double[DIAGOTEST::npw]; + for (int i = 0; i < DIAGOTEST::npw; i++) + precondition_local[i] = precondition[i]; +#endif + + // Run PPCG + psi_local.fix_k(0); + using T = std::complex; + const int dim = DIAGOTEST::npw; + const std::vector &h_mat = DIAGOTEST::hmatrix_local; + + auto hpsi_func = [h_mat, dim](T *psi_in, T *hpsi_out, + const int ld_psi, const int nvec) + { + auto one = std::make_unique(1.0); + auto zero = std::make_unique(0.0); + const T *one_ = one.get(); + const T *zero_ = zero.get(); + + base_device::DEVICE_CPU *ctx = {}; + ModuleBase::gemm_op()( + 'N', 'N', + dim, nvec, dim, + one_, + h_mat.data(), dim, + psi_in, ld_psi, + zero_, + hpsi_out, ld_psi); + }; + + // For PPCG (non-generalized), S = I + auto spsi_func = [](T *psi_in, T *spsi_out, + const int ld_psi, const int nvec) + { + for (int i = 0; i < ld_psi * nvec; i++) + { + spsi_out[i] = psi_in[i]; + } + }; + + hsolver::DiagoPPCG ppcg("pw", "scf"); + std::vector ethr_band(nband, 1e-5); + + double avg_iter = ppcg.diag(hpsi_func, + spsi_func, + npw, + nband, + dim, + psi_local.get_pointer(), + en, + ethr_band, + precondition_local); + + // Run multiple times to ensure convergence + for (int r = 0; r < 3; r++) + { + avg_iter = ppcg.diag(hpsi_func, + spsi_func, + npw, + nband, + dim, + psi_local.get_pointer(), + en, + ethr_band, + precondition_local); + } + + // Compare with LAPACK reference + for (int i = 0; i < nband; i++) + { + EXPECT_NEAR(en[i], e_lapack[i], threshold); + } + + delete[] DIAGOTEST::npw_local; + delete[] precondition_local; + delete[] en; + delete[] e_lapack; + delete ha; + } +}; + +class DiagoPPCGTest : public ::testing::TestWithParam +{ +}; + +TEST_P(DiagoPPCGTest, RandomHamilt) +{ + DiagoPPCGPrepare dcp = GetParam(); + hsolver::DiagoIterAssist>::PW_DIAG_NMAX = dcp.maxiter; + hsolver::DiagoIterAssist>::PW_DIAG_THR = dcp.eps; + + HPsi> hpsi(dcp.nband, dcp.npw, dcp.sparsity); + DIAGOTEST::hmatrix = hpsi.hamilt(); + DIAGOTEST::npw = dcp.npw; + + dcp.CompareEigen(hpsi.precond()); +} + +INSTANTIATE_TEST_SUITE_P(VerifyPPCG, + DiagoPPCGTest, + ::testing::Values( + // nband, npw, sparsity, eps, maxiter, threshold + DiagoPPCGPrepare(4, 100, 0, 1e-5, 300, 5e-2) + )); + +int main(int argc, char **argv) +{ + int nproc = 1, myrank = 0; + +#ifdef __MPI + int nproc_in_pool, kpar = 1, mypool, rank_in_pool; + setupmpi(argc, argv, nproc, myrank); + divide_pools(nproc, myrank, nproc_in_pool, kpar, mypool, rank_in_pool); + MPI_Comm_split(MPI_COMM_WORLD, myrank, 0, &BP_WORLD); + GlobalV::NPROC_IN_POOL = nproc; +#else + MPI_Init(&argc, &argv); +#endif + + testing::InitGoogleTest(&argc, argv); + ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); + if (myrank != 0) + { + delete listeners.Release(listeners.default_result_printer()); + } + + int result = RUN_ALL_TESTS(); + if (myrank == 0 && result != 0) + { + std::cout << "Some tests failed. Check the output above for details." << std::endl; + } + + MPI_Finalize(); + return result; +} diff --git a/source/source_io/module_parameter/read_input_item_elec_stru.cpp b/source/source_io/module_parameter/read_input_item_elec_stru.cpp index 39f37febc54..a5d0bf98219 100644 --- a/source/source_io/module_parameter/read_input_item_elec_stru.cpp +++ b/source/source_io/module_parameter/read_input_item_elec_stru.cpp @@ -131,7 +131,7 @@ Then the user has to correct the input file and restart the calculation.)"; }; item.check_value = [](const Input_Item& item, const Parameter& para) { const std::string& ks_solver = para.input.ks_solver; - const std::vector pw_solvers = {"cg", "dav", "bpcg", "dav_subspace"}; + const std::vector pw_solvers = {"cg", "dav", "bpcg", "dav_subspace", "ppcg"}; const std::vector lcao_solvers = { "genelpa", "elpa",