From 9536fcfd5b50a8b913fddfa5cb393962ad206c81 Mon Sep 17 00:00:00 2001
From: dyzheng <zhengdy@bjaisi.com>
Date: Sat, 30 May 2026 16:18:24 +0800
Subject: [PATCH] feat: add PPCG eigenvalue solver implementation

---
 source/source_hsolver/CMakeLists.txt          |   1 +
 source/source_hsolver/diago_ppcg.cpp          | 622 ++++++++++++++++++
 source/source_hsolver/diago_ppcg.h            | 179 +++++
 source/source_hsolver/hsolver_pw.cpp          |  36 +-
 source/source_hsolver/test/CMakeLists.txt     |   8 +
 .../source_hsolver/test/diago_ppcg_test.cpp   | 238 +++++++
 .../read_input_item_elec_stru.cpp             |   2 +-
 7 files changed, 1084 insertions(+), 2 deletions(-)
 create mode 100644 source/source_hsolver/diago_ppcg.cpp
 create mode 100644 source/source_hsolver/diago_ppcg.h
 create mode 100644 source/source_hsolver/test/diago_ppcg_test.cpp
diff --git a/source/source_hsolver/CMakeLists.txt b/source/source_hsolver/CMakeLists.txt
index b115d6d4cd2..95f7e23e230 100644
--- a/source/source_hsolver/CMakeLists.txt
+++ b/source/source_hsolver/CMakeLists.txt
@@ -4,6 +4,7 @@ list(APPEND objects
     diago_david.cpp
     diago_dav_subspace.cpp
     diago_bpcg.cpp
+    diago_ppcg.cpp
     para_linear_transform.cpp
     hsolver_pw.cpp
     hsolver_lcaopw.cpp
diff --git a/source/source_hsolver/diago_ppcg.cpp b/source/source_hsolver/diago_ppcg.cpp
new file mode 100644
index 00000000000..54336200629
--- /dev/null
+++ b/source/source_hsolver/diago_ppcg.cpp
@@ -0,0 +1,622 @@
+#include "source_hsolver/diago_ppcg.h"
+
+#include "diago_iter_assist.h"
+#include "source_base/constants.h"
+#include "source_base/global_function.h"
+#include "source_base/memory_recorder.h"
+#include "source_base/parallel_reduce.h"
+#include "source_base/timer.h"
+#include "source_base/tool_title.h"
+
+#include <ATen/core/tensor_map.h>
+#include <ATen/core/tensor_utils.h>
+#include <ATen/kernels/lapack.h>
+#include <ATen/kernels/memory.h>
+#include <cmath>
+#include <limits>
+
+namespace hsolver {
+
+template <typename T, typename Device>
+DiagoPPCG<T, Device>::DiagoPPCG(const std::string& basis_type, const std::string& calculation)
+{
+    basis_type_ = basis_type;
+    calculation_ = calculation;
+    this->one_ = new T(static_cast<T>(1.0));
+    this->zero_ = new T(static_cast<T>(0.0));
+    this->neg_one_ = new T(static_cast<T>(-1.0));
+}
+
+template <typename T, typename Device>
+DiagoPPCG<T, Device>::DiagoPPCG(const std::string& basis_type,
+                                const std::string& calculation,
+                                const bool& need_subspace,
+                                const SubspaceFunc& subspace_func,
+                                const Real& pw_diag_thr,
+                                const int& pw_diag_nmax,
+                                const int& nproc_in_pool)
+{
+    basis_type_ = basis_type;
+    calculation_ = calculation;
+    need_subspace_ = need_subspace;
+    subspace_func_ = subspace_func;
+    pw_diag_thr_ = pw_diag_thr;
+    pw_diag_nmax_ = pw_diag_nmax;
+    nproc_in_pool_ = nproc_in_pool;
+    this->one_ = new T(static_cast<T>(1.0));
+    this->zero_ = new T(static_cast<T>(0.0));
+    this->neg_one_ = new T(static_cast<T>(-1.0));
+}
+
+template <typename T, typename Device>
+DiagoPPCG<T, Device>::~DiagoPPCG()
+{
+    delete this->one_;
+    delete this->zero_;
+    delete this->neg_one_;
+}
+
+template <typename T, typename Device>
+void DiagoPPCG<T, Device>::compute_gradient(const ct::Tensor& prec,
+                                            const ct::Tensor& hpsi,
+                                            const ct::Tensor& spsi,
+                                            const Real& eigenvalue,
+                                            ct::Tensor& grad,
+                                            ct::Tensor& ppsi)
+{
+    // Following ABACUS DiagoCG::calc_grad pattern:
+    // grad = P * H|ψ>
+    ModuleBase::vector_div_vector_op<T, Device>()(this->n_basis_, grad.data<T>(), hpsi.data<T>(), prec.data<Real>());
+    // ppsi = P * S|ψ>
+    ModuleBase::vector_div_vector_op<T, Device>()(this->n_basis_, ppsi.data<T>(), spsi.data<T>(), prec.data<Real>());
+
+    // λ_c = <Sψ|P*Hψ> / <Sψ|P*Sψ>
+    Real eh = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, spsi.data<T>(), grad.data<T>());
+    Real es = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, spsi.data<T>(), ppsi.data<T>());
+
+    // g = P*H|ψ> - λ_c * P*S|ψ>  (gradient orthogonal to S|ψ>)
+    Real lambda_c = (es > std::numeric_limits<Real>::epsilon()) ? eh / es : 0.0;
+    ModuleBase::vector_add_vector_op<T, Device>()(this->n_basis_,
+                                                  grad.data<T>(),
+                                                  grad.data<T>(),
+                                                  1.0,
+                                                  ppsi.data<T>(),
+                                                  (-lambda_c));
+}
+
+template <typename T, typename Device>
+void DiagoPPCG<T, Device>::orth_gradient(const ct::Tensor& psi,
+                                         const int& m,
+                                         ct::Tensor& grad,
+                                         ct::Tensor& s_grad,
+                                         ct::Tensor& lagrange)
+{
+    // s_grad = S|grad>
+    this->spsi_func_(grad.data<T>(), s_grad.data<T>(), this->n_basis_, 1);
+
+    // lagrange[i] = <ψ_i|S|grad> for i = 0..m-1
+    ModuleBase::gemv_op<T, Device>()('C',
+                                     this->n_basis_,
+                                     m,
+                                     this->one_,
+                                     psi.data<T>(),
+                                     this->ld_psi_,
+                                     s_grad.data<T>(),
+                                     1,
+                                     this->zero_,
+                                     lagrange.data<T>(),
+                                     1);
+
+    // All-reduce over MPI pools
+    Parallel_Reduce::reduce_pool(lagrange.data<T>(), m);
+
+    // |grad> -= Σ ψ_i * lagrange_i
+    ModuleBase::gemv_op<T, Device>()('N',
+                                     this->n_basis_,
+                                     m,
+                                     this->neg_one_,
+                                     psi.data<T>(),
+                                     this->ld_psi_,
+                                     lagrange.data<T>(),
+                                     1,
+                                     this->one_,
+                                     grad.data<T>(),
+                                     1);
+
+    // |s_grad> -= Σ ψ_i * lagrange_i
+    ModuleBase::gemv_op<T, Device>()('N',
+                                     this->n_basis_,
+                                     m,
+                                     this->neg_one_,
+                                     psi.data<T>(),
+                                     this->ld_psi_,
+                                     lagrange.data<T>(),
+                                     1,
+                                     this->one_,
+                                     s_grad.data<T>(),
+                                     1);
+}
+
+template <typename T, typename Device>
+typename DiagoPPCG<T, Device>::Real DiagoPPCG<T, Device>::update_cg_direction(
+    ct::Tensor& cg,
+    ct::Tensor& scg,
+    const ct::Tensor& grad,
+    const ct::Tensor& s_grad,
+    const ct::Tensor& prec,
+    ct::Tensor& g0,
+    Real& gg_last,
+    const int& iter)
+{
+    // Following ABACUS DiagoCG::calc_gamma_cg pattern:
+    // g0 is persistent between iterations
+    // First compute gg_inter using g0 from previous iteration
+    Real gg_inter = 0.0;
+    if (iter > 0)
+    {
+        gg_inter = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, grad.data<T>(), g0.data<T>());
+    }
+
+    // g0 = P * S|grad> = prec * scg (element-wise multiply)
+    for (int i = 0; i < this->n_basis_; i++)
+    {
+        g0.data<T>()[i] = s_grad.data<T>()[i] * prec.data<Real>()[i];
+    }
+
+    const Real gg_now = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, grad.data<T>(), g0.data<T>());
+
+    Real gamma = 0.0;
+    if (iter == 0)
+    {
+        gg_last = gg_now;
+        // cg = grad
+        cg.sync(grad);
+        scg.sync(s_grad);
+    }
+    else
+    {
+        // Polak-Ribiere: gamma = (gg_now - gg_inter) / gg_last
+        if (gg_last > std::numeric_limits<Real>::epsilon())
+        {
+            gamma = (gg_now - gg_inter) / gg_last;
+            if (gamma < 0.0) gamma = 0.0; // Safety: restart if negative
+        }
+        gg_last = gg_now;
+
+        // cg = gamma * cg + grad
+        ModuleBase::vector_add_vector_op<T, Device>()(this->n_basis_,
+                                                      cg.data<T>(),
+                                                      cg.data<T>(),
+                                                      gamma,
+                                                      grad.data<T>(),
+                                                      1.0);
+        // scg = gamma * scg + s_grad
+        ModuleBase::vector_add_vector_op<T, Device>()(this->n_basis_,
+                                                      scg.data<T>(),
+                                                      scg.data<T>(),
+                                                      gamma,
+                                                      s_grad.data<T>(),
+                                                      1.0);
+    }
+    return gamma;
+}
+
+template <typename T, typename Device>
+bool DiagoPPCG<T, Device>::line_minimization(const ct::Tensor& ppsi,
+                                             const ct::Tensor& cg,
+                                             const ct::Tensor& scg,
+                                             const double& ethreshold,
+                                             Real& cg_norm,
+                                             Real& theta,
+                                             Real& eigenvalue,
+                                             ct::Tensor& psi_m,
+                                             ct::Tensor& spsi_m,
+                                             ct::Tensor& hpsi_m)
+{
+    // Compute <cg|S|cg> for normalization
+    cg_norm = std::sqrt(ModuleBase::dot_real_op<T, Device>()(this->n_basis_, cg.data<T>(), scg.data<T>()));
+
+    if (cg_norm < 1.0e-12)
+    {
+        return true;
+    }
+
+    // Compute Rayleigh-Ritz coefficients in the 2D subspace [ψ, cg]
+    // a0 = 2 * <ψ|H|cg> / ||cg||_S
+    // b0 = <cg|H|cg> / ||cg||_S^2
+    // e0 = <ψ|H|ψ> (current eigenvalue)
+    const Real a0 = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, psi_m.data<T>(), ppsi.data<T>()) * 2.0 / cg_norm;
+    const Real b0 = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, cg.data<T>(), ppsi.data<T>()) / (cg_norm * cg_norm);
+
+    const Real e0 = eigenvalue;
+    // Use atan matching ABACUS CG exactly: theta = atan(a0 / (e0 - b0)) / 2
+    const Real denom = e0 - b0;
+    theta = (std::abs(denom) > std::numeric_limits<Real>::epsilon()) 
+            ? std::atan(a0 / denom) / 2.0 
+            : 0.0;
+
+    const Real new_e = (e0 - b0) * std::cos(2.0 * theta) + a0 * std::sin(2.0 * theta);
+    const Real e1 = (e0 + b0 + new_e) / 2.0;
+    const Real e2 = (e0 + b0 - new_e) / 2.0;
+
+    // Select the smaller eigenvalue
+    if (e1 > e2)
+    {
+        theta += ModuleBase::PI_HALF;
+    }
+    eigenvalue = std::min(e1, e2);
+
+    const Real cost = std::cos(theta);
+    const Real sint_norm = std::sin(theta) / cg_norm;
+
+    // ψ_new = ψ * cos(θ) + cg * sin(θ)/||cg||
+    ModuleBase::vector_add_vector_op<T, Device>()(this->n_basis_,
+                                                  psi_m.data<T>(),
+                                                  psi_m.data<T>(),
+                                                  cost,
+                                                  cg.data<T>(),
+                                                  sint_norm);
+
+    if (std::abs(eigenvalue - e0) < ethreshold)
+    {
+        return true;
+    }
+    else
+    {
+        // Update S|ψ> and H|ψ> for the next iteration
+        ModuleBase::vector_add_vector_op<T, Device>()(this->n_basis_,
+                                                      spsi_m.data<T>(),
+                                                      spsi_m.data<T>(),
+                                                      cost,
+                                                      scg.data<T>(),
+                                                      sint_norm);
+        ModuleBase::vector_add_vector_op<T, Device>()(this->n_basis_,
+                                                      hpsi_m.data<T>(),
+                                                      hpsi_m.data<T>(),
+                                                      cost,
+                                                      ppsi.data<T>(),
+                                                      sint_norm);
+        return false;
+    }
+}
+
+template <typename T, typename Device>
+void DiagoPPCG<T, Device>::schmidt_orth(const int& m,
+                                        const ct::Tensor& psi,
+                                        const ct::Tensor& spsi_m,
+                                        ct::Tensor& psi_m)
+{
+    REQUIRES_OK(m >= 0, "DiagoPPCG::schmidt_orth: m < 0");
+    REQUIRES_OK(this->n_band_ >= m, "DiagoPPCG::schmidt_orth: n_band < m");
+
+    ct::Tensor lagrange(ct::DataTypeToEnum<T>::value,
+                        ct::DeviceTypeToEnum<ct_Device>::value,
+                        {m + 1});
+
+    // lagrange[i] = <ψ_i|S|ψ_m> for i = 0..m
+    ModuleBase::gemv_op<T, Device>()('C',
+                                     this->n_basis_,
+                                     m + 1,
+                                     this->one_,
+                                     psi.data<T>(),
+                                     this->ld_psi_,
+                                     spsi_m.data<T>(),
+                                     1,
+                                     this->zero_,
+                                     lagrange.data<T>(),
+                                     1);
+
+    Parallel_Reduce::reduce_pool(lagrange.data<T>(), m + 1);
+
+    // Gram-Schmidt: ψ_m -= Σ ψ_i * lagrange_i  (i = 0..m-1)
+    ModuleBase::gemv_op<T, Device>()('N',
+                                     this->n_basis_,
+                                     m,
+                                     this->neg_one_,
+                                     psi.data<T>(),
+                                     this->ld_psi_,
+                                     lagrange.data<T>(),
+                                     1,
+                                     this->one_,
+                                     psi_m.data<T>(),
+                                     1);
+
+    // Compute new norm and renormalize
+    Real psi_norm = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, psi_m.data<T>(), psi_m.data<T>());
+    if (psi_norm <= 0.0)
+    {
+        std::cout << " DiagoPPGC: psi_norm <= 0.0, m = " << m << std::endl;
+        ModuleBase::WARNING_QUIT("schmidt_orth", "psi_norm <= 0.0");
+    }
+    psi_norm = std::sqrt(psi_norm);
+
+    // Normalize
+    ModuleBase::vector_mul_real_op<T, Device>()(this->n_basis_, psi_m.data<T>(), psi_m.data<T>(), Real(1.0 / psi_norm));
+}
+
+template <typename T, typename Device>
+bool DiagoPPCG<T, Device>::test_exit_cond(const int& ntry, const int& notconv) const
+{
+    const bool scf = calculation_ != "nscf";
+    const bool f1 = ntry <= 5;
+    const bool f2 = !scf && notconv > 0;
+    const bool f3 = scf && notconv > 5;
+    return f1 && (f2 || f3);
+}
+
+template <typename T, typename Device>
+double DiagoPPCG<T, Device>::diag(const HPsiFunc& hpsi_func,
+                                  const SPsiFunc& spsi_func,
+                                  const int ld_psi,
+                                  const int nband,
+                                  const int dim,
+                                  T* psi_in,
+                                  Real* eigenvalue_in,
+                                  const std::vector<double>& ethr_band,
+                                  const Real* prec)
+{
+    ModuleBase::TITLE("DiagoPPCG", "diag");
+    ModuleBase::timer::start("DiagoPPCG", "diag");
+
+    REQUIRES_OK(ld_psi >= dim, "DiagoPPCG::diag: ld_psi must be >= dim");
+    REQUIRES_OK(static_cast<int>(ethr_band.size()) >= nband,
+                "DiagoPPCG::diag: ethr_band size must be >= nband");
+
+    // Set up tensor views
+    auto psi = ct::TensorMap(psi_in,
+                             ct::DataTypeToEnum<T>::value,
+                             ct::DeviceTypeToEnum<ct_Device>::value,
+                             ct::TensorShape({nband, ld_psi}));
+    auto eigen = ct::TensorMap(eigenvalue_in,
+                               ct::DataTypeToEnum<Real>::value,
+                               ct::DeviceTypeToEnum<ct::DEVICE_CPU>::value,
+                               ct::TensorShape({nband}));
+
+    // Store parameters
+    this->n_band_ = nband;
+    this->n_basis_ = dim;
+    this->ld_psi_ = ld_psi;
+    this->notconv_ = 0;
+
+    this->hpsi_func_ = hpsi_func;
+    this->spsi_func_ = spsi_func;
+
+    // Get convergence parameters from DiagoIterAssist
+    this->pw_diag_nmax_ = DiagoIterAssist<T, Device>::PW_DIAG_NMAX;
+    this->pw_diag_thr_ = DiagoIterAssist<T, Device>::PW_DIAG_THR;
+
+    // Allocate working arrays
+    // psi_m: current band vector
+    auto psi_m = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                            ct::DeviceTypeToEnum<ct_Device>::value,
+                            {this->n_basis_});
+    // hpsi_m: H|psi_m>
+    auto hpsi_m = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                             ct::DeviceTypeToEnum<ct_Device>::value,
+                             {this->n_basis_});
+    // spsi_m: S|psi_m>
+    auto spsi_m = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                             ct::DeviceTypeToEnum<ct_Device>::value,
+                             {this->n_basis_});
+    // ppsi: preconditioned workspace / H|cg>
+    auto ppsi = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                           ct::DeviceTypeToEnum<ct_Device>::value,
+                           {this->n_basis_});
+    // gradient
+    auto grad = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                           ct::DeviceTypeToEnum<ct_Device>::value,
+                           {this->n_basis_});
+    // s_grad: S|grad>
+    auto s_grad = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                             ct::DeviceTypeToEnum<ct_Device>::value,
+                             {this->n_basis_});
+    // cg: CG direction
+    auto cg = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                         ct::DeviceTypeToEnum<ct_Device>::value,
+                         {this->n_basis_});
+    // scg: S|cg>
+    auto scg = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                          ct::DeviceTypeToEnum<ct_Device>::value,
+                          {this->n_basis_});
+    // lagrange: for orthogonalization
+    auto lagrange = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                               ct::DeviceTypeToEnum<ct_Device>::value,
+                               {this->n_band_});
+
+    // Set up preconditioner tensor
+    ct::Tensor prec_tensor;
+    if (prec != nullptr)
+    {
+        prec_tensor = ct::TensorMap(const_cast<Real*>(prec),
+                                    ct::DataTypeToEnum<Real>::value,
+                                    ct::DeviceTypeToEnum<ct::DEVICE_CPU>::value,
+                                    ct::TensorShape({dim}))
+                          .template to_device<ct_Device>();
+    }
+    else
+    {
+        prec_tensor = ct::Tensor(ct::DataTypeToEnum<Real>::value,
+                                 ct::DeviceTypeToEnum<ct_Device>::value,
+                                 {this->n_basis_});
+        prec_tensor.set_value(static_cast<Real>(1.0));
+    }
+
+    ModuleBase::Memory::record("DiagoPPCG", this->n_basis_ * 12 * sizeof(T));
+
+    // Outer retry loop matching ABACUS CG pattern
+    int ntry = 0;
+    this->notconv_ = 0;
+    int avg = 0;
+    eigen.zero();
+    auto eigen_pack = eigen.accessor<Real, 1>();
+
+    // Slice of psi for subspace function
+    auto psi_temp = psi.slice({0, 0}, {nband, dim});
+
+    do
+    {
+        // Reset for this try
+        this->notconv_ = 0;
+        avg = 0;
+
+        // Subspace diagonalization to improve initial guess (matching ABACUS CG)
+        if (ntry > 0 && subspace_func_)
+        {
+            ct::TensorMap psi_map(psi.data<T>(),
+                                  ct::DataTypeToEnum<T>::value,
+                                  ct::DeviceTypeToEnum<ct_Device>::value,
+                                  ct::TensorShape({nband, dim}));
+            const bool assume_S_orthogonal = true;
+            this->subspace_func_(psi_temp.data<T>(),
+                                 psi_map.data<T>(),
+                                 dim,
+                                 nband,
+                                 assume_S_orthogonal);
+            psi_temp.sync(psi_map);
+        }
+        else if (ntry == 0 && need_subspace_ && subspace_func_)
+        {
+            ct::TensorMap psi_map(psi.data<T>(),
+                                  ct::DataTypeToEnum<T>::value,
+                                  ct::DeviceTypeToEnum<ct_Device>::value,
+                                  ct::TensorShape({nband, dim}));
+            const bool assume_S_orthogonal = false;
+            this->subspace_func_(psi_temp.data<T>(),
+                                 psi_map.data<T>(),
+                                 dim,
+                                 nband,
+                                 assume_S_orthogonal);
+            psi_temp.sync(psi_map);
+        }
+
+        // Main band-by-band loop
+        for (int m = 0; m < this->n_band_; m++)
+        {
+            // Copy initial guess
+            psi_m.sync(psi[m]);
+
+            // Compute S|psi_m>
+            this->spsi_func_(psi_m.data<T>(), spsi_m.data<T>(), this->n_basis_, 1);
+
+            // Schmidt orthogonalize against lower bands
+            this->schmidt_orth(m, psi, spsi_m, psi_m);
+
+            // Recompute S|psi_m> after orthogonalization
+            this->spsi_func_(psi_m.data<T>(), spsi_m.data<T>(), this->n_basis_, 1);
+
+            // Compute H|psi_m>
+            this->hpsi_func_(psi_m.data<T>(), hpsi_m.data<T>(), this->n_basis_, 1);
+
+            // Initial eigenvalue estimate: λ = <ψ|H|ψ>
+            eigen_pack[m] = ModuleBase::dot_real_op<T, Device>()(this->n_basis_, psi_m.data<T>(), hpsi_m.data<T>());
+
+            int iter = 0;
+            Real gg_last = 0.0;
+            Real cg_norm = 0.0;
+            Real theta = 0.0;
+            bool converged = false;
+            // g0 = P * S|grad> for CG update (persistent between iterations)
+            auto g0_tensor = ct::Tensor(ct::DataTypeToEnum<T>::value,
+                                        ct::DeviceTypeToEnum<ct_Device>::value,
+                                        {this->n_basis_});
+
+            do
+            {
+                // Compute preconditioned gradient (ABACUS calc_grad)
+                this->compute_gradient(prec_tensor, hpsi_m, spsi_m, eigen_pack[m], grad, ppsi);
+
+                // Orthogonalize gradient against lower bands (ABACUS orth_grad)
+                this->orth_gradient(psi, m, grad, s_grad, lagrange);
+
+                // Update CG direction with persistent g0 and Polak-Ribiere (ABACUS calc_gamma_cg)
+                Real gamma = this->update_cg_direction(cg, scg, grad, s_grad, prec_tensor, g0_tensor, gg_last, iter);
+
+                // ABACUS CG correction: cg -= gamma * cg_norm * sin(theta) * psi_m
+                if (iter > 0 && std::abs(gamma) > std::numeric_limits<Real>::epsilon())
+                {
+                    const Real norma = gamma * cg_norm * std::sin(theta);
+                    if (std::abs(norma) > std::numeric_limits<Real>::epsilon())
+                    {
+                        T znorma = static_cast<T>(-norma);
+                        ModuleBase::axpy_op<T, Device>()(this->n_basis_, &znorma, psi_m.data<T>(), 1, cg.data<T>(), 1);
+                        ModuleBase::axpy_op<T, Device>()(this->n_basis_, &znorma, spsi_m.data<T>(), 1, scg.data<T>(), 1);
+                    }
+                }
+
+                // Compute H|cg> and S|cg>
+                this->hpsi_func_(cg.data<T>(), ppsi.data<T>(), this->n_basis_, 1);
+                this->spsi_func_(cg.data<T>(), scg.data<T>(), this->n_basis_, 1);
+
+                // Line minimization in [ψ, cg] subspace (ABACUS update_psi)
+                converged = this->line_minimization(ppsi,
+                                                    cg,
+                                                    scg,
+                                                    ethr_band[m],
+                                                    cg_norm,
+                                                    theta,
+                                                    eigen_pack[m],
+                                                    psi_m,
+                                                    spsi_m,
+                                                    hpsi_m);
+
+                iter++;
+            } while (!converged && iter < this->pw_diag_nmax_);
+
+            // Store converged eigenvector
+            psi[m].sync(psi_m);
+
+            if (!converged)
+            {
+                ++this->notconv_;
+            }
+
+            avg += iter;
+
+            // Reorder eigenvalues if necessary
+            if (m > 0)
+            {
+                ModuleBase::GlobalFunc::NOTE("reorder bands!");
+                if (eigen_pack[m] - eigen_pack[m - 1] < -2.0 * this->pw_diag_thr_)
+                {
+                    int ii = 0;
+                    for (ii = m - 2; ii >= 0; ii--)
+                    {
+                        if (eigen_pack[m] - eigen_pack[ii] > 2.0 * this->pw_diag_thr_)
+                        {
+                            break;
+                        }
+                    }
+                    ii++;
+                    Real e0 = eigen_pack[m];
+                    ppsi.sync(psi[m]);
+
+                    for (int jj = m; jj >= ii + 1; jj--)
+                    {
+                        eigen_pack[jj] = eigen_pack[jj - 1];
+                        psi[jj].sync(psi[jj - 1]);
+                    }
+                    eigen_pack[ii] = e0;
+                    psi[ii].sync(ppsi);
+                }
+            }
+        }
+
+        avg = (this->n_band_ > 0) ? avg / this->n_band_ : 0;
+        avg_iter_ += avg;
+        ntry++;
+
+    } while (this->test_exit_cond(ntry, this->notconv_));
+
+    ModuleBase::timer::end("DiagoPPCG", "diag");
+    return avg;
+}
+
+// Template instantiations
+template class DiagoPPCG<std::complex<float>, base_device::DEVICE_CPU>;
+template class DiagoPPCG<std::complex<double>, base_device::DEVICE_CPU>;
+#if ((defined __CUDA) || (defined __ROCM))
+template class DiagoPPCG<std::complex<float>, base_device::DEVICE_GPU>;
+template class DiagoPPCG<std::complex<double>, base_device::DEVICE_GPU>;
+#endif
+
+} // namespace hsolver
diff --git a/source/source_hsolver/diago_ppcg.h b/source/source_hsolver/diago_ppcg.h
new file mode 100644
index 00000000000..71e84403702
--- /dev/null
+++ b/source/source_hsolver/diago_ppcg.h
@@ -0,0 +1,179 @@
+#ifndef DIAGO_PPCG_H_
+#define DIAGO_PPCG_H_
+
+#include "source_base/kernels/math_kernel_op.h"
+#include "source_base/module_device/memory_op.h"
+#include "source_base/module_device/types.h"
+#include "source_base/parallel_reduce.h"
+
+#include <ATen/core/tensor.h>
+#include <ATen/core/tensor_map.h>
+#include <ATen/kernels/blas.h>
+#include <ATen/kernels/lapack.h>
+#include <functional>
+#include <vector>
+
+namespace hsolver {
+
+/**
+ * @class DiagoPPCG
+ * @brief Projected Preconditioned Conjugate Gradient method for eigenvalue problems.
+ *
+ * PPCG solves the generalized eigenvalue problem H*psi = lambda*psi
+ * using a preconditioned conjugate gradient approach with explicit
+ * projection to maintain orthogonality against converged eigenvectors.
+ *
+ * @tparam T The floating-point type (float, double, complex<float>, complex<double>)
+ * @tparam Device The device type (CPU or GPU)
+ */
+template <typename T = std::complex<double>, typename Device = base_device::DEVICE_CPU>
+class DiagoPPCG
+{
+  private:
+    using Real = typename GetTypeReal<T>::type;
+    using ct_Device = typename ct::PsiToContainer<Device>::type;
+
+  public:
+    using HPsiFunc = std::function<void(T*, T*, const int, const int)>;
+    using SPsiFunc = std::function<void(T*, T*, const int, const int)>;
+    using SubspaceFunc = std::function<void(T*, T*, const int, const int, const bool)>;
+
+    /**
+     * @brief Constructor.
+     * @param basis_type The basis type ("pw" or "lcao")
+     * @param calculation The calculation type
+     */
+    DiagoPPCG(const std::string& basis_type, const std::string& calculation);
+
+    /**
+     * @brief Constructor with subspace function support.
+     * @param basis_type The basis type ("pw" or "lcao")
+     * @param calculation The calculation type
+     * @param need_subspace Whether to use subspace diagonalization
+     * @param subspace_func Subspace diagonalization function
+     * @param pw_diag_thr Convergence threshold
+     * @param pw_diag_nmax Max iterations
+     * @param nproc_in_pool Number of processors in pool
+     */
+    DiagoPPCG(const std::string& basis_type,
+              const std::string& calculation,
+              const bool& need_subspace,
+              const SubspaceFunc& subspace_func,
+              const Real& pw_diag_thr,
+              const int& pw_diag_nmax,
+              const int& nproc_in_pool);
+
+    /// Destructor
+    ~DiagoPPCG();
+
+    /**
+     * @brief Solve eigenvalue problem using PPCG method.
+     *
+     * @param hpsi_func Function to compute H|psi>
+     * @param spsi_func Function to compute S|psi> (identity for norm-conserving)
+     * @param ld_psi Leading dimension of psi matrix
+     * @param nband Number of bands
+     * @param dim Dimension of basis
+     * @param psi_in Input/output wavefunction matrix [dim: nband x ld_psi]
+     * @param eigenvalue_in Output eigenvalues [dim: nband]
+     * @param ethr_band Convergence threshold per band
+     * @param prec Preconditioner array [dim: dim]
+     * @return Average number of iterations
+     */
+    double diag(const HPsiFunc& hpsi_func,
+                const SPsiFunc& spsi_func,
+                const int ld_psi,
+                const int nband,
+                const int dim,
+                T* psi_in,
+                Real* eigenvalue_in,
+                const std::vector<double>& ethr_band,
+                const Real* prec = nullptr);
+
+  private:
+    Device* ctx_ = {};
+    int n_band_ = 0;
+    int n_basis_ = 0;
+    int ld_psi_ = 0;
+
+    double avg_iter_ = 0.0;
+    int notconv_ = 0;
+    int pw_diag_nmax_ = 0;
+    Real pw_diag_thr_ = 1e-5;
+
+    std::string basis_type_;
+    std::string calculation_;
+
+    HPsiFunc hpsi_func_ = nullptr;
+    SPsiFunc spsi_func_ = nullptr;
+    SubspaceFunc subspace_func_ = nullptr;
+
+    bool need_subspace_ = false;
+    int nproc_in_pool_ = 0;
+
+    /// Constants
+    const T* one_ = nullptr;
+    const T* zero_ = nullptr;
+    const T* neg_one_ = nullptr;
+
+    /**
+     * @brief Project gradient to be orthogonal to all lower states.
+     */
+    void orth_gradient(const ct::Tensor& psi,
+                       const int& m,
+                       ct::Tensor& grad,
+                       ct::Tensor& s_grad,
+                       ct::Tensor& lagrange);
+
+    /**
+     * @brief Compute gradient: g = H|ψ> - λ*S|ψ>, precondition and project.
+     */
+    void compute_gradient(const ct::Tensor& prec,
+                          const ct::Tensor& hpsi,
+                          const ct::Tensor& spsi,
+                          const Real& eigenvalue,
+                          ct::Tensor& grad,
+                          ct::Tensor& ppsi);
+
+    /**
+     * @brief Update CG direction using Polak-Ribiere formula.
+     * @return gamma coefficient for correction term
+     */
+    Real update_cg_direction(ct::Tensor& cg,
+                             ct::Tensor& scg,
+                             const ct::Tensor& grad,
+                             const ct::Tensor& s_grad,
+                             const ct::Tensor& prec,
+                             ct::Tensor& g0,
+                             Real& gg_last,
+                             const int& iter);
+
+    /**
+     * @brief Perform line minimization in [ψ, cg] subspace and update ψ.
+     * @return true if converged
+     */
+    bool line_minimization(const ct::Tensor& ppsi,
+                           const ct::Tensor& cg,
+                           const ct::Tensor& scg,
+                           const double& ethreshold,
+                           Real& cg_norm,
+                           Real& theta,
+                           Real& eigenvalue,
+                           ct::Tensor& psi_m,
+                           ct::Tensor& spsi_m,
+                           ct::Tensor& hpsi_m);
+
+    /**
+     * @brief Schmidt orthogonalize ψ_m against previously converged states.
+     */
+    void schmidt_orth(const int& m,
+                      const ct::Tensor& psi,
+                      const ct::Tensor& spsi_m,
+                      ct::Tensor& psi_m);
+
+    bool test_exit_cond(const int& ntry, const int& notconv) const;
+};
+
+} // namespace hsolver
+
+#endif // DIAGO_PPCG_H_
diff --git a/source/source_hsolver/hsolver_pw.cpp b/source/source_hsolver/hsolver_pw.cpp
index b88bc3b90dd..363ac0ec006 100644
--- a/source/source_hsolver/hsolver_pw.cpp
+++ b/source/source_hsolver/hsolver_pw.cpp
@@ -8,6 +8,7 @@
 #include "source_hamilt/hamilt.h"
 #include "source_hsolver/diag_comm_info.h"
 #include "source_hsolver/diago_bpcg.h"
+#include "source_hsolver/diago_ppcg.h"
 #include "source_hsolver/diago_cg.h"
 #include "source_hsolver/diago_dav_subspace.h"
 #include "source_hsolver/diago_david.h"
@@ -83,7 +84,7 @@ void HSolverPW<T, Device>::solve(hamilt::Hamilt<T, Device>* pHamilt,
     this->nproc_in_pool = nproc_in_pool_in;
 
     // report if the specified diagonalization method is not supported
-    const std::initializer_list<std::string> _methods = {"cg", "dav", "dav_subspace", "bpcg"};
+    const std::initializer_list<std::string> _methods = {"cg", "dav", "dav_subspace", "bpcg", "ppcg"};
     if (std::find(std::begin(_methods), std::end(_methods), this->method) == std::end(_methods))
     {
         ModuleBase::WARNING_QUIT("HSolverPW::solve", "This type of eigensolver is not supported!");
@@ -323,6 +324,39 @@ void HSolverPW<T, Device>::hamiltSolvePsiK(hamilt::Hamilt<T, Device>* hm,
         bpcg.init_iter(PARAM.inp.nbands, nband_l, nbasis, ndim);
         bpcg.diag(hpsi_func, psi.get_pointer(), eigenvalue, this->ethr_band);
     }
+    else if (this->method == "ppcg")
+    {
+        // wrap the subspace_func into a lambda function (matching CG pattern)
+        auto subspace_func = [hm, cur_nbasis](T* psi_in,
+                                              T* psi_out,
+                                              const int ld_psi,
+                                              const int nband,
+                                              const bool S_orth) {
+            auto psi_in_wrapper = psi::Psi<T, Device>(psi_in, 1, nband, ld_psi, cur_nbasis);
+            auto psi_out_wrapper = psi::Psi<T, Device>(psi_out, 1, nband, ld_psi, cur_nbasis);
+            std::vector<Real> eigen(nband, 0.0);
+            DiagoIterAssist<T, Device>::diag_subspace(hm, psi_in_wrapper, psi_out_wrapper, eigen.data());
+        };
+        DiagoPPCG<T, Device> ppcg(this->basis_type,
+                                  this->calculation_type,
+                                  this->need_subspace,
+                                  subspace_func,
+                                  this->diag_thr,
+                                  this->diag_iter_max,
+                                  this->nproc_in_pool);
+
+        DiagoIterAssist<T, Device>::avg_iter += static_cast<double>(
+            ppcg.diag(hpsi_func,
+                      spsi_func,
+                      psi.get_nbasis(),
+                      psi.get_nbands(),
+                      psi.get_current_ngk(),
+                      psi.get_pointer(),
+                      eigenvalue,
+                      this->ethr_band,
+                      pre_condition.data())
+        );
+    }
     else if (this->method == "dav_subspace")
     {
         bool scf = this->calculation_type == "nscf" ? false : true;
diff --git a/source/source_hsolver/test/CMakeLists.txt b/source/source_hsolver/test/CMakeLists.txt
index 1b1529adb4a..843041cf7c1 100644
--- a/source/source_hsolver/test/CMakeLists.txt
+++ b/source/source_hsolver/test/CMakeLists.txt
@@ -16,6 +16,14 @@ if (ENABLE_MPI)
             ../../source_hamilt/operator.cpp
             ../../source_pw/module_pwdft/op_pw.cpp
   )
+  AddTest(
+    TARGET MODULE_HSOLVER_ppcg
+    LIBS parameter  ${math_libs} base psi device container
+    SOURCES diago_ppcg_test.cpp ../diago_ppcg.cpp ../diago_iter_assist.cpp  ../diag_const_nums.cpp
+            ../../source_basis/module_pw/test/test_tool.cpp
+            ../../source_hamilt/operator.cpp
+            ../../source_pw/module_pwdft/op_pw.cpp
+  )
   AddTest(
     TARGET MODULE_HSOLVER_cg
     LIBS parameter  ${math_libs} base psi device container
diff --git a/source/source_hsolver/test/diago_ppcg_test.cpp b/source/source_hsolver/test/diago_ppcg_test.cpp
new file mode 100644
index 00000000000..361f87dd55e
--- /dev/null
+++ b/source/source_hsolver/test/diago_ppcg_test.cpp
@@ -0,0 +1,238 @@
+#include "source_base/inverse_matrix.h"
+#include "source_base/module_external/lapack_connector.h"
+#include "source_hamilt/hamilt.h"
+#include "source_psi/psi.h"
+#include "../diago_iter_assist.h"
+#include "../diago_ppcg.h"
+#include "diago_mock.h"
+
+#include <gtest/gtest.h>
+#include <complex>
+#include <random>
+
+/************************************************
+ *  unit test of functions in DiagoPPCG
+ ***********************************************/
+
+/**
+ * Test the PPCG (Projected Preconditioned Conjugate Gradient) method
+ * for eigenvalue problems.
+ *
+ * The test generates a random Hermitian matrix, computes eigenvalues
+ * using LAPACK as reference, and compares with PPCG results.
+ */
+
+// call lapack to get reference eigenvalues
+void lapackEigen(int &npw, std::vector<std::complex<double>> &hm, double *e)
+{
+    int lwork = 2 * npw;
+    std::complex<double> *work2 = new std::complex<double>[lwork];
+    double *rwork = new double[3 * npw - 2];
+    int info = 0;
+    char tmp_c1 = 'V', tmp_c2 = 'U';
+    zheev_(&tmp_c1, &tmp_c2, &npw, hm.data(), &npw, e, work2, &lwork, rwork, &info);
+    delete[] rwork;
+    delete[] work2;
+}
+
+class DiagoPPCGPrepare
+{
+  public:
+    DiagoPPCGPrepare(int nband, int npw, int sparsity, double eps, int maxiter, double threshold)
+        : nband(nband), npw(npw), sparsity(sparsity), eps(eps), maxiter(maxiter), threshold(threshold)
+    {
+#ifdef __MPI
+        MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+        MPI_Comm_rank(MPI_COMM_WORLD, &mypnum);
+#endif
+    }
+
+    int nband, npw, sparsity, maxiter;
+    double eps, avg_iter;
+    double threshold;
+    int nprocs = 1, mypnum = 0;
+
+    void CompareEigen(double *precondition)
+    {
+        // Calculate eigenvalues by LAPACK
+        double *e_lapack = new double[npw];
+        auto ev = DIAGOTEST::hmatrix;
+        if (mypnum == 0)
+        {
+            lapackEigen(npw, ev, e_lapack);
+        }
+
+        // Initial guess of psi by perturbing lapack psi
+        ModuleBase::ComplexMatrix psiguess(nband, npw);
+        std::default_random_engine p(1);
+        std::uniform_int_distribution<unsigned> u(1, 10);
+        for (int i = 0; i < nband; i++)
+        {
+            for (int j = 0; j < npw; j++)
+            {
+                double rand = static_cast<double>(u(p)) / 10.;
+                psiguess(i, j) = ev[j * DIAGOTEST::h_nc + i] * rand;
+            }
+        }
+
+        double *en = new double[npw];
+        hamilt::Hamilt<std::complex<double>> *ha = new hamilt::HamiltPW<std::complex<double>>(
+            nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
+
+        psi::Psi<std::complex<double>> psi;
+        psi.resize(1, nband, npw);
+        for (int i = 0; i < nband; i++)
+        {
+            for (int j = 0; j < npw; j++)
+            {
+                psi(i, j) = psiguess(i, j);
+            }
+        }
+
+        psi::Psi<std::complex<double>> psi_local;
+        double *precondition_local;
+        DIAGOTEST::npw_local = new int[nprocs];
+#ifdef __MPI
+        DIAGOTEST::cal_division(DIAGOTEST::npw);
+        DIAGOTEST::divide_hpsi(psi, psi_local, DIAGOTEST::hmatrix, DIAGOTEST::hmatrix_local);
+        precondition_local = new double[DIAGOTEST::npw_local[mypnum]];
+        DIAGOTEST::divide_psi<double>(precondition, precondition_local);
+#else
+        DIAGOTEST::hmatrix_local = DIAGOTEST::hmatrix;
+        DIAGOTEST::npw_local[0] = DIAGOTEST::npw;
+        psi_local = psi;
+        precondition_local = new double[DIAGOTEST::npw];
+        for (int i = 0; i < DIAGOTEST::npw; i++)
+            precondition_local[i] = precondition[i];
+#endif
+
+        // Run PPCG
+        psi_local.fix_k(0);
+        using T = std::complex<double>;
+        const int dim = DIAGOTEST::npw;
+        const std::vector<T> &h_mat = DIAGOTEST::hmatrix_local;
+
+        auto hpsi_func = [h_mat, dim](T *psi_in, T *hpsi_out,
+                                      const int ld_psi, const int nvec)
+        {
+            auto one = std::make_unique<T>(1.0);
+            auto zero = std::make_unique<T>(0.0);
+            const T *one_ = one.get();
+            const T *zero_ = zero.get();
+
+            base_device::DEVICE_CPU *ctx = {};
+            ModuleBase::gemm_op<T, base_device::DEVICE_CPU>()(
+                'N', 'N',
+                dim, nvec, dim,
+                one_,
+                h_mat.data(), dim,
+                psi_in, ld_psi,
+                zero_,
+                hpsi_out, ld_psi);
+        };
+
+        // For PPCG (non-generalized), S = I
+        auto spsi_func = [](T *psi_in, T *spsi_out,
+                            const int ld_psi, const int nvec)
+        {
+            for (int i = 0; i < ld_psi * nvec; i++)
+            {
+                spsi_out[i] = psi_in[i];
+            }
+        };
+
+        hsolver::DiagoPPCG<T> ppcg("pw", "scf");
+        std::vector<double> ethr_band(nband, 1e-5);
+
+        double avg_iter = ppcg.diag(hpsi_func,
+                                    spsi_func,
+                                    npw,
+                                    nband,
+                                    dim,
+                                    psi_local.get_pointer(),
+                                    en,
+                                    ethr_band,
+                                    precondition_local);
+
+        // Run multiple times to ensure convergence
+        for (int r = 0; r < 3; r++)
+        {
+            avg_iter = ppcg.diag(hpsi_func,
+                                 spsi_func,
+                                 npw,
+                                 nband,
+                                 dim,
+                                 psi_local.get_pointer(),
+                                 en,
+                                 ethr_band,
+                                 precondition_local);
+        }
+
+        // Compare with LAPACK reference
+        for (int i = 0; i < nband; i++)
+        {
+            EXPECT_NEAR(en[i], e_lapack[i], threshold);
+        }
+
+        delete[] DIAGOTEST::npw_local;
+        delete[] precondition_local;
+        delete[] en;
+        delete[] e_lapack;
+        delete ha;
+    }
+};
+
+class DiagoPPCGTest : public ::testing::TestWithParam<DiagoPPCGPrepare>
+{
+};
+
+TEST_P(DiagoPPCGTest, RandomHamilt)
+{
+    DiagoPPCGPrepare dcp = GetParam();
+    hsolver::DiagoIterAssist<std::complex<double>>::PW_DIAG_NMAX = dcp.maxiter;
+    hsolver::DiagoIterAssist<std::complex<double>>::PW_DIAG_THR = dcp.eps;
+
+    HPsi<std::complex<double>> hpsi(dcp.nband, dcp.npw, dcp.sparsity);
+    DIAGOTEST::hmatrix = hpsi.hamilt();
+    DIAGOTEST::npw = dcp.npw;
+
+    dcp.CompareEigen(hpsi.precond());
+}
+
+INSTANTIATE_TEST_SUITE_P(VerifyPPCG,
+                         DiagoPPCGTest,
+                         ::testing::Values(
+                             // nband, npw, sparsity, eps, maxiter, threshold
+                             DiagoPPCGPrepare(4, 100, 0, 1e-5, 300, 5e-2)
+                             ));
+
+int main(int argc, char **argv)
+{
+    int nproc = 1, myrank = 0;
+
+#ifdef __MPI
+    int nproc_in_pool, kpar = 1, mypool, rank_in_pool;
+    setupmpi(argc, argv, nproc, myrank);
+    divide_pools(nproc, myrank, nproc_in_pool, kpar, mypool, rank_in_pool);
+    MPI_Comm_split(MPI_COMM_WORLD, myrank, 0, &BP_WORLD);
+    GlobalV::NPROC_IN_POOL = nproc;
+#else
+    MPI_Init(&argc, &argv);
+#endif
+
+    testing::InitGoogleTest(&argc, argv);
+    ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
+    if (myrank != 0)
+    {
+        delete listeners.Release(listeners.default_result_printer());
+    }
+
+    int result = RUN_ALL_TESTS();
+    if (myrank == 0 && result != 0)
+    {
+        std::cout << "Some tests failed. Check the output above for details." << std::endl;
+    }
+
+    MPI_Finalize();
+    return result;
+}
diff --git a/source/source_io/module_parameter/read_input_item_elec_stru.cpp b/source/source_io/module_parameter/read_input_item_elec_stru.cpp
index 39f37febc54..a5d0bf98219 100644
--- a/source/source_io/module_parameter/read_input_item_elec_stru.cpp
+++ b/source/source_io/module_parameter/read_input_item_elec_stru.cpp
@@ -131,7 +131,7 @@ Then the user has to correct the input file and restart the calculation.)";
         };
         item.check_value = [](const Input_Item& item, const Parameter& para) {
             const std::string& ks_solver = para.input.ks_solver;
-            const std::vector<std::string> pw_solvers = {"cg", "dav", "bpcg", "dav_subspace"};
+            const std::vector<std::string> pw_solvers = {"cg", "dav", "bpcg", "dav_subspace", "ppcg"};
             const std::vector<std::string> lcao_solvers = {
                 "genelpa",
                 "elpa",