deepmodeling · Absolutely-Daisy · May 29, 2026
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
@@ -379,6 +379,7 @@ OBJS_HCONTAINER=base_matrix.o\
     transfer.o\
 
 OBJS_HSOLVER=diago_cg.o\
+    diago_cg_mixed.o\
     diago_david.o\
     diago_dav_subspace.o\
     diago_bpcg.o\

diff --git a/source/source_hsolver/CMakeLists.txt b/source/source_hsolver/CMakeLists.txt
@@ -1,6 +1,7 @@
 list(APPEND objects
     diag_const_nums.cpp
     diago_cg.cpp
+    diago_cg_mixed.cpp
     diago_david.cpp
     diago_dav_subspace.cpp
     diago_bpcg.cpp

diff --git a/source/source_hsolver/diago_cg_mixed.cpp b/source/source_hsolver/diago_cg_mixed.cpp
@@ -0,0 +1,285 @@
+#include <source_hsolver/diago_cg_mixed.h>
+#include <ATen/core/tensor_map.h>
+#include <ATen/core/tensor_utils.h>
+#include <ATen/kernels/memory.h>
+#include <source_base/constants.h>
+#include <source_base/memory.h>
+#include <source_base/parallel_reduce.h>
+#include <source_base/timer.h>
+#include <source_base/tool_title.h>
+#include <source_base/global_function.h>
+
+using namespace hsolver;
+
+template <typename T, typename Device>
+DiagoCGMixed<T, Device>::DiagoCGMixed(const std::string& basis_type, const std::string& calculation)
+{
+    basis_type_ = basis_type; calculation_ = calculation;
+}
+
+template <typename T, typename Device>
+DiagoCGMixed<T, Device>::DiagoCGMixed(const std::string& basis_type, const std::string& calculation,
+                                       const bool& need_subspace, const Func& subspace_func,
+                                       const Real& pw_diag_thr, const int& pw_diag_nmax, const int& nproc_in_pool)
+{
+    basis_type_ = basis_type; calculation_ = calculation;
+    need_subspace_ = need_subspace; subspace_func_ = subspace_func;
+    pw_diag_thr_ = pw_diag_thr; pw_diag_nmax_ = pw_diag_nmax; nproc_in_pool_ = nproc_in_pool;
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::convert_d2f(const ct::Tensor& d_src, ct::Tensor& f_dst)
+{
+    const int n = d_src.NumElements();
+    const T* d = d_src.data<T>();
+    T_float* f = f_dst.data<T_float>();
+    for (int i = 0; i < n; i++) f[i] = static_cast<T_float>(d[i]);
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::convert_f2d(const ct::Tensor& f_src, ct::Tensor& d_dst)
+{
+    const int n = f_src.NumElements();
+    const T_float* f = f_src.data<T_float>();
+    T* d = d_dst.data<T>();
+    for (int i = 0; i < n; i++) d[i] = static_cast<T>(f[i]);
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::diag_mock(const ct::Tensor& prec_in, ct::Tensor& psi,
+                                         ct::Tensor& eigen, const std::vector<double>& ethr_band)
+{
+    ModuleBase::TITLE("DiagoCGMixed", "diag_once");
+    ModuleBase::timer::tick("DiagoCGMixed", "diag_once");
+
+    notconv_ = 0;
+    n_band_ = psi.shape().dim_size(0);
+    n_basis_ = psi.shape().dim_size(1);
+    int avg = 0;
+
+    auto dt = ct::DataTypeToEnum<T>::value;
+    auto dtf = ct::DataTypeToEnum<T_float>::value;
+    auto dev = ct::DeviceTypeToEnum<ct_Device>::value;
+    auto dtr = ct::DataTypeToEnum<Real>::value;
+    auto dtfr = ct::DataTypeToEnum<Real_float>::value;
+
+    auto phi_m = ct::Tensor(dt, dev, {n_basis_}), hphi = ct::Tensor(dt, dev, {n_basis_});
+    auto sphi = ct::Tensor(dt, dev, {n_basis_}), pphi = ct::Tensor(dt, dev, {n_basis_});
+    auto cg = ct::Tensor(dt, dev, {n_basis_}), scg = ct::Tensor(dt, dev, {n_basis_});
+    auto grad = ct::Tensor(dt, dev, {n_basis_}), g0 = ct::Tensor(dt, dev, {n_basis_});
+    auto lagrange = ct::Tensor(dt, dev, {n_band_});
+
+    auto phi_m_f = ct::Tensor(dtf, dev, {n_basis_}), hphi_f = ct::Tensor(dtf, dev, {n_basis_});
+    auto sphi_f = ct::Tensor(dtf, dev, {n_basis_});
+
+    auto prec = prec_in;
+    if (prec.NumElements() == 0) {
+        prec = ct::Tensor(ct::DataTypeToEnum<Real>::value, ct::DeviceTypeToEnum<ct_Device>::value, {n_basis_});
+        prec.set_value(static_cast<Real>(1.0));
+    }
+    auto prec_f = ct::Tensor(ct::DataTypeToEnum<Real_float>::value, ct::DeviceTypeToEnum<ct_Device>::value, {n_basis_});
+    {
+        auto* p = prec.data<Real>();
+        auto* q = prec_f.data<Real_float>();
+        for (int i = 0; i < n_basis_; i++) q[i] = static_cast<Real_float>(p[i]);
+    }
+
+    ModuleBase::Memory::record("DiagoCGMixed", n_basis_ * 12);
+    eigen.zero();
+    auto eig = eigen.accessor<Real, 1>();
+
+    for (int m = 0; m < n_band_; m++)
+    {
+        phi_m.sync(psi[m]);
+        spsi_func_(phi_m, sphi);
+        schmit_orth(m, psi, sphi, phi_m);
+
+        convert_d2f(phi_m, phi_m_f);
+        hpsi_func_(phi_m_f, hphi_f);
+        spsi_func_(phi_m_f, sphi_f);
+        convert_f2d(hphi_f, hphi);
+        convert_f2d(sphi_f, sphi);
+
+        eig[m] = ModuleBase::dot_real_op<T, Device>()(n_basis_, phi_m.data<T>(), hphi.data<T>());
+
+        int iter = 0;
+        Real gg_last = 0, cg_norm = 0, theta = 0;
+        bool converged = false;
+
+        do {
+            calc_grad(prec_f, grad, hphi, sphi, pphi);
+            orth_grad(psi, m, grad, scg, lagrange);
+            calc_gamma_cg(iter, cg_norm, theta, prec_f, scg, grad, phi_m, gg_last, g0, cg);
+
+            {
+                auto cg_f = ct::Tensor(dtf, dev, {n_basis_}), pphi_f = ct::Tensor(dtf, dev, {n_basis_}), scg_f = ct::Tensor(dtf, dev, {n_basis_});
+                convert_d2f(cg, cg_f);
+                hpsi_func_(cg_f, pphi_f);
+                spsi_func_(cg_f, scg_f);
+                convert_f2d(pphi_f, pphi);
+                convert_f2d(scg_f, scg);
+            }
+
+            converged = update_psi(pphi, cg, scg, ethr_band[m], cg_norm, theta, eig[m], phi_m, sphi, hphi);
+        } while (!converged && ++iter < pw_diag_nmax_);
+
+        psi[m].sync(phi_m);
+        if (!converged) ++notconv_;
+        avg += static_cast<Real>(iter) + 1;
+
+        if (m > 0 && eig[m] - eig[m - 1] < -2.0 * pw_diag_thr_) {
+            int ii = m - 2;
+            while (ii >= 0 && eig[m] - eig[ii] <= 2.0 * pw_diag_thr_) ii--;
+            ii++;
+            Real e0 = eig[m]; pphi.sync(psi[m]);
+            for (int jj = m; jj > ii; jj--) { eig[jj] = eig[jj - 1]; psi[jj].sync(psi[jj - 1]); }
+            eig[ii] = e0; psi[ii].sync(pphi);
+        }
+    }
+    avg /= n_band_; avg_iter_ += avg;
+    ModuleBase::timer::tick("DiagoCGMixed", "diag_once");
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::calc_grad(const ct::Tensor& prec_f, ct::Tensor& grad,
+                                         ct::Tensor& hphi, ct::Tensor& sphi, ct::Tensor& pphi)
+{
+    auto dtf = ct::DataTypeToEnum<T_float>::value;
+    auto dev = ct::DeviceTypeToEnum<ct_Device>::value;
+    auto hphi_f = ct::Tensor(dtf, dev, {n_basis_}), sphi_f = ct::Tensor(dtf, dev, {n_basis_});
+    auto grad_f = ct::Tensor(dtf, dev, {n_basis_}), pphi_f = ct::Tensor(dtf, dev, {n_basis_});
+    convert_d2f(hphi, hphi_f);
+    convert_d2f(sphi, sphi_f);
+
+    ModuleBase::vector_div_vector_op<T_float, Device>()(n_basis_, grad_f.data<T_float>(), hphi_f.data<T_float>(), prec_f.data<Real_float>());
+    ModuleBase::vector_div_vector_op<T_float, Device>()(n_basis_, pphi_f.data<T_float>(), sphi_f.data<T_float>(), prec_f.data<Real_float>());
+
+    convert_f2d(grad_f, grad);
+    convert_f2d(pphi_f, pphi);
+
+    const Real eh = ModuleBase::dot_real_op<T, Device>()(n_basis_, sphi.data<T>(), grad.data<T>());
+    const Real es = ModuleBase::dot_real_op<T, Device>()(n_basis_, sphi.data<T>(), pphi.data<T>());
+    ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, grad.data<T>(), grad.data<T>(), 1.0, pphi.data<T>(), -(eh / es));
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::orth_grad(const ct::Tensor& psi, const int& m,
+                                         ct::Tensor& grad, ct::Tensor& scg, ct::Tensor& lagrange)
+{
+    const T one(1.0), zero(0.0), neg_one(-1.0);
+    spsi_func_(grad, scg);
+
+    ModuleBase::gemv_op<T, Device>()('C', n_basis_, m, &one, psi.data<T>(), n_basis_, scg.data<T>(), 1, &zero, lagrange.data<T>(), 1);
+    Parallel_Reduce::reduce_pool(lagrange.data<T>(), m);
+
+    ModuleBase::gemv_op<T, Device>()('N', n_basis_, m, &neg_one, psi.data<T>(), n_basis_, lagrange.data<T>(), 1, &one, grad.data<T>(), 1);
+    ModuleBase::gemv_op<T, Device>()('N', n_basis_, m, &neg_one, psi.data<T>(), n_basis_, lagrange.data<T>(), 1, &one, scg.data<T>(), 1);
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::calc_gamma_cg(const int& iter, const Real& cg_norm, const Real& theta,
+                                             const ct::Tensor& prec_f, const ct::Tensor& scg,
+                                             const ct::Tensor& grad, const ct::Tensor& phi_m,
+                                             Real& gg_last, ct::Tensor& g0, ct::Tensor& cg)
+{
+    Real gg_inter;
+    if (iter > 0) gg_inter = ModuleBase::dot_real_op<T, Device>()(n_basis_, grad.data<T>(), g0.data<T>());
+
+    auto dtf = ct::DataTypeToEnum<T_float>::value;
+    auto dev = ct::DeviceTypeToEnum<ct_Device>::value;
+    auto scg_f = ct::Tensor(dtf, dev, {n_basis_}), g0_f = ct::Tensor(dtf, dev, {n_basis_});
+    convert_d2f(scg, scg_f);
+    ModuleBase::vector_mul_vector_op<T_float, Device>()(n_basis_, g0_f.data<T_float>(), scg_f.data<T_float>(), prec_f.data<Real_float>());
+    convert_f2d(g0_f, g0);
+
+    const Real gg_now = ModuleBase::dot_real_op<T, Device>()(n_basis_, grad.data<T>(), g0.data<T>());
+
+    if (iter == 0) {
+        gg_last = gg_now;
+        cg.sync(grad);
+    } else {
+        const Real gamma = (gg_now - gg_inter) / gg_last;
+        gg_last = gg_now;
+        ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, cg.data<T>(), cg.data<T>(), gamma, grad.data<T>(), 1.0);
+        T znorma = static_cast<T>(-gamma * cg_norm * sin(theta));
+        ModuleBase::axpy_op<T, Device>()(n_basis_, &znorma, phi_m.data<T>(), 1, cg.data<T>(), 1);
+    }
+}
+
+template <typename T, typename Device>
+bool DiagoCGMixed<T, Device>::update_psi(const ct::Tensor& pphi, const ct::Tensor& cg, const ct::Tensor& scg,
+                                          const double& ethreshold, Real& cg_norm, Real& theta, Real& eigen,
+                                          ct::Tensor& phi_m, ct::Tensor& sphi, ct::Tensor& hphi)
+{
+    cg_norm = sqrt(ModuleBase::dot_real_op<T, Device>()(n_basis_, cg.data<T>(), scg.data<T>()));
+    if (cg_norm < 1e-10) return true;
+
+    const Real a0 = ModuleBase::dot_real_op<T, Device>()(n_basis_, phi_m.data<T>(), pphi.data<T>()) * 2.0 / cg_norm;
+    const Real b0 = ModuleBase::dot_real_op<T, Device>()(n_basis_, cg.data<T>(), pphi.data<T>()) / (cg_norm * cg_norm);
+    const Real e0 = eigen;
+    theta = atan(a0 / (e0 - b0)) / 2.0;
+    const Real new_e = (e0 - b0) * cos(2.0 * theta) + a0 * sin(2.0 * theta);
+    const Real e1 = (e0 + b0 + new_e) / 2.0, e2 = (e0 + b0 - new_e) / 2.0;
+    if (e1 > e2) theta += ModuleBase::PI_HALF;
+    eigen = std::min(e1, e2);
+
+    const Real cost = cos(theta), sint_norm = sin(theta) / cg_norm;
+    ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, phi_m.data<T>(), phi_m.data<T>(), cost, cg.data<T>(), sint_norm);
+
+    if (std::abs(eigen - e0) < ethreshold) return true;
+
+    ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, sphi.data<T>(), sphi.data<T>(), cost, scg.data<T>(), sint_norm);
+    ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, hphi.data<T>(), hphi.data<T>(), cost, pphi.data<T>(), sint_norm);
+    return false;
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::schmit_orth(const int& m, const ct::Tensor& psi,
+                                           const ct::Tensor& sphi, ct::Tensor& phi_m)
+{
+    const T one(1.0), zero(0.0), neg_one(-1.0);
+    ct::Tensor lagrange_so(ct::DataTypeToEnum<T>::value, ct::DeviceTypeToEnum<ct_Device>::value, {m + 1});
+
+    ModuleBase::gemv_op<T, Device>()('C', n_basis_, m + 1, &one, psi.data<T>(), n_basis_, sphi.data<T>(), 1, &zero, lagrange_so.data<T>(), 1);
+    Parallel_Reduce::reduce_pool(lagrange_so.data<T>(), m + 1);
+    ModuleBase::gemv_op<T, Device>()('N', n_basis_, m, &neg_one, psi.data<T>(), n_basis_, lagrange_so.data<T>(), 1, &one, phi_m.data<T>(), 1);
+
+    auto psi_norm = ct::extract<Real>(lagrange_so[m])
+                    - ModuleBase::dot_real_op<T, Device>()(m, lagrange_so.data<T>(), lagrange_so.data<T>(), false);
+    ModuleBase::vector_div_constant_op<T, Device>()(n_basis_, phi_m.data<T>(), phi_m.data<T>(), sqrt(psi_norm));
+}
+
+template <typename T, typename Device>
+bool DiagoCGMixed<T, Device>::test_exit_cond(const int& ntry, const int& notconv) const
+{
+    return ntry <= 5 && (calculation_ != "nscf" ? notconv > 5 : notconv > 0);
+}
+
+template <typename T, typename Device>
+void DiagoCGMixed<T, Device>::diag(const Func& hpsi_func, const Func& spsi_func,
+                                    ct::Tensor& psi, ct::Tensor& eigen,
+                                    const std::vector<double>& ethr_band, const ct::Tensor& prec)
+{
+    int ntry = 0; notconv_ = 0;
+    hpsi_func_ = hpsi_func; spsi_func_ = spsi_func;
+    ct::Tensor psi_temp = psi.slice({0, 0}, {int(psi.shape().dim_size(0)), int(prec.shape().dim_size(0))});
+    do {
+        if (need_subspace_ || ntry > 0) {
+            ct::TensorMap psi_map = ct::TensorMap(psi.data(), psi_temp);
+            subspace_func_(psi_temp, psi_map);
+            psi_temp.sync(psi_map);
+        }
+        ++ntry; avg_iter_ += 1.0;
+        diag_mock(prec, psi_temp, eigen, ethr_band);
+    } while (test_exit_cond(ntry, notconv_));
+    psi.zero(); psi.sync(psi_temp);
+}
+
+namespace hsolver {
+template class DiagoCGMixed<std::complex<double>, base_device::DEVICE_CPU>;
+template class DiagoCGMixed<std::complex<float>, base_device::DEVICE_CPU>;
+#if ((defined __CUDA) || (defined __ROCM))
+template class DiagoCGMixed<std::complex<double>, base_device::DEVICE_GPU>;
+template class DiagoCGMixed<std::complex<float>, base_device::DEVICE_GPU>;
+#endif
+} // namespace hsolver
diff --git a/source/source_hsolver/diago_cg_mixed.h b/source/source_hsolver/diago_cg_mixed.h
@@ -0,0 +1,69 @@
+#ifndef SOURCE_HSOLVER_DIAGO_CG_MIXED_H_
+#define SOURCE_HSOLVER_DIAGO_CG_MIXED_H_
+
+#include <functional>
+#include <source_base/macros.h>
+#include <source_base/kernels/math_kernel_op.h>
+#include <ATen/core/tensor.h>
+#include <ATen/core/tensor_types.h>
+
+namespace hsolver {
+
+template <typename T> struct GetFloatType { using type = T; };
+template <> struct GetFloatType<std::complex<double>> { using type = std::complex<float>; };
+template <> struct GetFloatType<double> { using type = float; };
+
+template <typename T> struct GetFloatRealType { using type = typename GetTypeReal<T>::type; };
+template <> struct GetFloatRealType<std::complex<double>> { using type = float; };
+template <> struct GetFloatRealType<double> { using type = float; };
+
+template <typename T, typename Device = base_device::DEVICE_CPU>
+class DiagoCGMixed final
+{
+    using Real = typename GetTypeReal<T>::type;
+    using ct_Device = typename ct::PsiToContainer<Device>::type;
+    using T_float = typename GetFloatType<T>::type;
+    using Real_float = typename GetFloatRealType<T>::type;
+
+  public:
+    using Func = std::function<void(const ct::Tensor&, ct::Tensor&)>;
+
+    DiagoCGMixed(const std::string& basis_type, const std::string& calculation);
+    DiagoCGMixed(const std::string& basis_type, const std::string& calculation,
+                 const bool& need_subspace, const Func& subspace_func,
+                 const Real& pw_diag_thr, const int& pw_diag_nmax, const int& nproc_in_pool);
+    ~DiagoCGMixed() = default;
+
+    void diag(const Func& hpsi_func, const Func& spsi_func,
+              ct::Tensor& psi, ct::Tensor& eigen,
+              const std::vector<double>& ethr_band, const ct::Tensor& prec = {});
+
+  private:
+    int notconv_ = 0, n_band_ = 0, n_basis_ = 0, avg_iter_ = 0, pw_diag_nmax_ = 0, nproc_in_pool_ = 0;
+    Real pw_diag_thr_ = 1e-5;
+    std::string basis_type_, calculation_;
+    bool need_subspace_ = false;
+    Func hpsi_func_, spsi_func_, subspace_func_;
+
+    void convert_d2f(const ct::Tensor& d, ct::Tensor& f);
+    void convert_f2d(const ct::Tensor& f, ct::Tensor& d);
+
+    void calc_grad(const ct::Tensor& prec_f, ct::Tensor& grad, ct::Tensor& hphi,
+                   ct::Tensor& sphi, ct::Tensor& pphi);
+    void orth_grad(const ct::Tensor& psi, const int& m, ct::Tensor& grad,
+                   ct::Tensor& scg, ct::Tensor& lagrange);
+    void calc_gamma_cg(const int& iter, const Real& cg_norm, const Real& theta,
+                       const ct::Tensor& prec_f, const ct::Tensor& scg,
+                       const ct::Tensor& grad, const ct::Tensor& phi_m,
+                       Real& gg_last, ct::Tensor& g0, ct::Tensor& cg);
+    bool update_psi(const ct::Tensor& pphi, const ct::Tensor& cg, const ct::Tensor& scg,
+                    const double& ethreshold, Real& cg_norm, Real& theta, Real& eigen,
+                    ct::Tensor& phi_m, ct::Tensor& sphi, ct::Tensor& hphi);
+    void schmit_orth(const int& m, const ct::Tensor& psi, const ct::Tensor& sphi, ct::Tensor& phi_m);
+    void diag_mock(const ct::Tensor& prec, ct::Tensor& psi, ct::Tensor& eigen,
+                   const std::vector<double>& ethr_band);
+    bool test_exit_cond(const int& ntry, const int& notconv) const;
+};
+
+} // namespace hsolver
+#endif