From 96d5e833562b7e007ec9371edc30e91bb8591e53 Mon Sep 17 00:00:00 2001 From: someone Date: Sat, 31 Jan 2026 02:32:45 +0800 Subject: [PATCH 1/5] Life Savior --- source/source_esolver/esolver_ks_pw.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/source_esolver/esolver_ks_pw.cpp b/source/source_esolver/esolver_ks_pw.cpp index 511ccf3b5fa..d017c64cf99 100644 --- a/source/source_esolver/esolver_ks_pw.cpp +++ b/source/source_esolver/esolver_ks_pw.cpp @@ -307,10 +307,7 @@ void ESolver_KS_PW::iter_finish(UnitCell& ucell, const int istep, int if (PARAM.inp.exx_thr_type == "energy") { dexx = exx_helper.cal_exx_energy(this->stp.psi_t); - } - exx_helper.set_psi(this->stp.psi_t); - if (PARAM.inp.exx_thr_type == "energy") - { + exx_helper.set_psi(this->stp.psi_t); dexx -= exx_helper.cal_exx_energy(this->stp.psi_t); // std::cout << "dexx = " << dexx << std::endl; } @@ -319,6 +316,10 @@ void ESolver_KS_PW::iter_finish(UnitCell& ucell, const int istep, int conv_esolver = exx_helper.exx_after_converge(iter, conv_ene); if (!conv_esolver) { + if (PARAM.inp.exx_thr_type != "energy") + { + exx_helper.set_psi(this->stp.psi_t); + } auto duration = std::chrono::high_resolution_clock::now() - start; std::cout << " Setting Psi for EXX PW Inner Loop took " << std::chrono::duration_cast(duration).count() / 1000.0 << "s" From 18ed2ddee204c094e2561651c852ff199ce6efe8 Mon Sep 17 00:00:00 2001 From: someone Date: Mon, 2 Mar 2026 18:46:13 +0800 Subject: [PATCH 2/5] INIT MGGA VELOCITY --- source/source_pw/module_pwdft/elecond.cpp | 21 ++++- source/source_pw/module_pwdft/op_pw_vel.cpp | 83 ++++++++++++++++++- source/source_pw/module_pwdft/op_pw_vel.h | 16 +++- .../source_pw/module_stodft/sto_elecond.cpp | 36 +++++++- 4 files changed, 147 insertions(+), 9 deletions(-) diff --git a/source/source_pw/module_pwdft/elecond.cpp b/source/source_pw/module_pwdft/elecond.cpp index dbf78ea2a40..57cf6cf38b4 100644 --- a/source/source_pw/module_pwdft/elecond.cpp +++ b/source/source_pw/module_pwdft/elecond.cpp @@ -5,6 +5,8 @@ #include "source_base/kernels/math_kernel_op.h" #include "source_base/parallel_device.h" #include "source_estate/occupy.h" +#include "source_estate/module_pot/potential_new.h" +#include "source_base/module_device/types.h" #include "source_io/binstream.h" #include "source_io/module_parameter/parameter.h" @@ -84,7 +86,24 @@ void EleCond::KG(const int& smear_type, std::vector ct12(nt, 0); std::vector ct22(nt, 0); - hamilt::Velocity velop(this->p_wfcpw, this->p_kv->isk.data(), this->p_ppcell, this->p_ucell, nonlocal); + using Real = typename GetTypeReal::type; + const Real* vtau_ptr = (this->p_elec != nullptr && this->p_elec->pot != nullptr) + ? this->p_elec->pot->template get_vofk_smooth_data() + : nullptr; + const int vtau_col = (this->p_elec != nullptr && this->p_elec->pot != nullptr) + ? this->p_elec->pot->get_vofk_smooth().nc + : 0; + const int vtau_row = (this->p_elec != nullptr && this->p_elec->pot != nullptr) + ? this->p_elec->pot->get_vofk_smooth().nr + : 0; + hamilt::Velocity velop(this->p_wfcpw, + this->p_kv->isk.data(), + this->p_ppcell, + this->p_ucell, + nonlocal, + vtau_ptr, + vtau_col, + vtau_row); double decut = (wcut + fwhmin) / ModuleBase::Ry_to_eV; std::cout << "Recommended dt: " << 0.25 * M_PI / decut << " a.u." << std::endl; for (int ik = 0; ik < nk; ++ik) diff --git a/source/source_pw/module_pwdft/op_pw_vel.cpp b/source/source_pw/module_pwdft/op_pw_vel.cpp index b4f01275866..925473b3427 100644 --- a/source/source_pw/module_pwdft/op_pw_vel.cpp +++ b/source/source_pw/module_pwdft/op_pw_vel.cpp @@ -3,6 +3,8 @@ #include "source_base/kernels/math_kernel_op.h" #include "source_base/parallel_reduce.h" #include "source_base/timer.h" +#include "source_hamilt/module_xc/xc_functional.h" +#include "source_pw/module_pwdft/kernels/meta_op.h" namespace hamilt { @@ -11,7 +13,10 @@ Velocity::Velocity(const ModulePW::PW_Basis_K* wfcpw_in, const int* isk_in, pseudopot_cell_vnl* ppcell_in, const UnitCell* ucell_in, - const bool nonlocal_in) + const bool nonlocal_in, + const typename GetTypeReal::type* vtau_in, + const int vtau_col_in, + const int vtau_row_in) { if (wfcpw_in == nullptr || isk_in == nullptr || ppcell_in == nullptr || ucell_in == nullptr) { @@ -23,10 +28,16 @@ Velocity::Velocity(const ModulePW::PW_Basis_K* wfcpw_in, this->ucell = ucell_in; this->nonlocal = nonlocal_in; this->tpiba = ucell_in->tpiba; + this->vtau_ = vtau_in; + this->vtau_col_ = vtau_col_in; + this->vtau_row_ = vtau_row_in; if (this->nonlocal) { this->ppcell->initgradq_vnl(*this->ucell); } + // workspace for meta-GGA correction (size follows wfcpw grids) + resmem_complex_op()(porter1_, this->wfcpw->nmaxgr); + resmem_complex_op()(porter2_, this->wfcpw->nmaxgr); } template @@ -37,6 +48,8 @@ Velocity::~Velocity() delmem_var_op()(this->gz_); delmem_complex_op()(vkb_); delmem_complex_op()(gradvkb_); + delmem_complex_op()(porter1_); + delmem_complex_op()(porter2_); } template @@ -93,6 +106,7 @@ void Velocity::act(const psi::Psi, Device>* const int npw = this->wfcpw->npwk[this->ik]; const int max_npw = this->wfcpw->npwk_max; const int npol = psi_in->get_npol(); + using Real = typename GetTypeReal::type; std::vector gtmp_ptr = {this->gx_, this->gy_, this->gz_}; // ------------- @@ -110,6 +124,71 @@ void Velocity::act(const psi::Psi, Device>* } } + // --------------------------------------------- + // meta-GGA velocity correction: i[V_tau, r] + // V_tau implemented in ABACUS as -∇·(v_tau ∇), + // so [V_tau, r]_j = -[ ∂_j(v_tau ψ) + v_tau ∂_j ψ ]. + // The contribution to velocity is -i times the bracket. + // --------------------------------------------- + if (this->vtau_ != nullptr && this->vtau_col_ > 0 && XC_Functional::get_func_type() == 3) + { + const int current_spin = (this->vtau_row_ > 1 && psi_in->get_npol() == 2) ? this->isk[this->ik] : 0; + const Real* vtau_spin = this->vtau_ + current_spin * this->vtau_col_; + Complex minus_i(0.0, -1.0); + for (int ib = 0; ib < n_npwx; ++ib) + { + const Complex* bandpsi = psi0 + ib * max_npw; + for (int id = 0; id < 3; ++id) + { + // term1: ∂_id (v_tau * psi) + this->wfcpw->recip_to_real(this->ctx, bandpsi, this->porter1_, this->ik); + ModuleBase::vector_mul_vector_op()(this->vtau_col_, this->porter1_, this->porter1_, vtau_spin); + this->wfcpw->real_to_recip(this->ctx, this->porter1_, this->porter1_, this->ik); + meta_pw_op()(this->ctx, + this->ik, + id, + npw, + max_npw, + this->tpiba, + this->wfcpw->template get_gcar_data(), + this->wfcpw->template get_kvec_c_data(), + this->porter1_, + this->porter2_, + false); + + // term2: v_tau * ∂_id psi (reuse porter1_) + meta_pw_op()(this->ctx, + this->ik, + id, + npw, + max_npw, + this->tpiba, + this->wfcpw->template get_gcar_data(), + this->wfcpw->template get_kvec_c_data(), + bandpsi, + this->porter1_, + false); + this->wfcpw->recip_to_real(this->ctx, this->porter1_, this->porter1_, this->ik); + ModuleBase::vector_mul_vector_op()(this->vtau_col_, this->porter1_, this->porter1_, vtau_spin); + this->wfcpw->real_to_recip(this->ctx, this->porter1_, this->porter1_, this->ik); + + // sum and apply -i + ModuleBase::vector_add_vector_op()(npw, + this->porter1_, + this->porter1_, + static_cast(1.0), + this->porter2_, + static_cast(1.0)); + ModuleBase::scal_op()(npw, &minus_i, this->porter1_, 1); + + // accumulate to vpsi (component id, band ib) + Complex* vpsi_slice = vpsi + id * n_npwx * max_npw + ib * max_npw; + Complex one = 1.0; + ModuleBase::axpy_op()(npw, &one, this->porter1_, 1, vpsi_slice, 1); + } + } + } + // --------------------------------------------- // i[V_NL, r] = (\nabla_q+\nabla_q')V_{NL}(q,q') // |\beta><\beta|\psi> @@ -334,4 +413,4 @@ template class Velocity; template class Velocity; #endif -} // namespace hamilt \ No newline at end of file +} // namespace hamilt diff --git a/source/source_pw/module_pwdft/op_pw_vel.h b/source/source_pw/module_pwdft/op_pw_vel.h index 211ce4007e0..b5aa6a42a45 100644 --- a/source/source_pw/module_pwdft/op_pw_vel.h +++ b/source/source_pw/module_pwdft/op_pw_vel.h @@ -2,6 +2,7 @@ #define VELOCITY_PW_H #include "op_pw.h" #include "source_cell/unitcell.h" +#include "source_base/module_device/types.h" #include "source_pw/module_pwdft/vnl_pw.h" #include "source_basis/module_pw/pw_basis_k.h" namespace hamilt @@ -17,7 +18,10 @@ class Velocity const int* isk_in, pseudopot_cell_vnl* ppcell_in, const UnitCell* ucell_in, - const bool nonlocal_in = true + const bool nonlocal_in = true, + const typename GetTypeReal::type* vtau_in = nullptr, + const int vtau_col_in = 0, + const int vtau_row_in = 0 ); ~Velocity(); @@ -54,7 +58,13 @@ class Velocity int ik=0; double tpiba=0.0; - + const typename GetTypeReal::type* vtau_ = nullptr; ///< [CPU] meta-GGA vtau on real grid (nspin x nrxx_smooth) + int vtau_col_ = 0; ///< number of grid points per spin for vtau + int vtau_row_ = 0; ///< number of spin channels stored in vtau_ + std::complex* porter1_ = nullptr; ///< workspace on real grid + std::complex* porter2_ = nullptr; ///< workspace on real grid / recip grid + base_device::DEVICE_CPU* ctx = {}; + private: FPTYPE* gx_ = nullptr; ///<[Device, npwx] x component of G+K FPTYPE* gy_ = nullptr; ///<[Device, npwx] y component of G+K @@ -76,4 +86,4 @@ class Velocity using syncmem_complex_h2d_op = base_device::memory::synchronize_memory_op, Device, base_device::DEVICE_CPU>; }; } -#endif \ No newline at end of file +#endif diff --git a/source/source_pw/module_stodft/sto_elecond.cpp b/source/source_pw/module_stodft/sto_elecond.cpp index 1a32e9db83b..f9d9d99e2c6 100644 --- a/source/source_pw/module_stodft/sto_elecond.cpp +++ b/source/source_pw/module_stodft/sto_elecond.cpp @@ -7,6 +7,8 @@ #include "source_base/parallel_device.h" #include "source_base/timer.h" #include "source_base/vector3.h" +#include "source_estate/module_pot/potential_new.h" +#include "source_base/module_device/types.h" #include "source_io/module_parameter/parameter.h" #include "sto_tool.h" @@ -614,8 +616,37 @@ void Sto_EleCond::sKG(const int& smear_type, // ik loop ModuleBase::timer::tick("Sto_EleCond", "kloop"); - hamilt::Velocity velop(this->p_wfcpw, this->p_kv->isk.data(), this->p_ppcell, this->p_ucell, nonlocal); - hamilt::Velocity low_velop(this->p_wfcpw, this->p_kv->isk.data(), this->p_ppcell, this->p_ucell, nonlocal); + using Real = typename GetTypeReal::type; + using LowReal = typename GetTypeReal::type; + const Real* vtau_ptr = (this->p_elec != nullptr && this->p_elec->pot != nullptr) + ? this->p_elec->pot->template get_vofk_smooth_data() + : nullptr; + const LowReal* vtau_ptr_low = (this->p_elec != nullptr && this->p_elec->pot != nullptr) + ? this->p_elec->pot->template get_vofk_smooth_data() + : nullptr; + const int vtau_col = (this->p_elec != nullptr && this->p_elec->pot != nullptr) + ? this->p_elec->pot->get_vofk_smooth().nc + : 0; + const int vtau_row = (this->p_elec != nullptr && this->p_elec->pot != nullptr) + ? this->p_elec->pot->get_vofk_smooth().nr + : 0; + + hamilt::Velocity velop(this->p_wfcpw, + this->p_kv->isk.data(), + this->p_ppcell, + this->p_ucell, + nonlocal, + vtau_ptr, + vtau_col, + vtau_row); + hamilt::Velocity low_velop(this->p_wfcpw, + this->p_kv->isk.data(), + this->p_ppcell, + this->p_ucell, + nonlocal, + vtau_ptr_low, + vtau_col, + vtau_row); for (int ik = 0; ik < nk; ++ik) { velop.init(ik); @@ -1078,4 +1109,3 @@ template class Sto_EleCond; #if ((defined __CUDA) || (defined __ROCM)) template class Sto_EleCond; #endif - From bed1ff65afc50546b788691d65e50bfcb12e18b3 Mon Sep 17 00:00:00 2001 From: someone Date: Sat, 11 Apr 2026 17:38:17 +0800 Subject: [PATCH 3/5] Test --- source/source_pw/module_pwdft/op_pw_vel.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/source_pw/module_pwdft/op_pw_vel.cpp b/source/source_pw/module_pwdft/op_pw_vel.cpp index 925473b3427..c5a5859a515 100644 --- a/source/source_pw/module_pwdft/op_pw_vel.cpp +++ b/source/source_pw/module_pwdft/op_pw_vel.cpp @@ -123,7 +123,11 @@ void Velocity::act(const psi::Psi, Device>* tmpsi_in += max_npw; } } - + if (this->ppcell->nkb <= 0 || !this->nonlocal) + { + ModuleBase::timer::tick("Operator", "Velocity"); + return; + } // --------------------------------------------- // meta-GGA velocity correction: i[V_tau, r] // V_tau implemented in ABACUS as -∇·(v_tau ∇), @@ -188,7 +192,8 @@ void Velocity::act(const psi::Psi, Device>* } } } - + ModuleBase::timer::tick("Operator", "Velocity"); + return; // --------------------------------------------- // i[V_NL, r] = (\nabla_q+\nabla_q')V_{NL}(q,q') // |\beta><\beta|\psi> From 60d9423a900e8c3ee52006acdc73c46e3c4bedb8 Mon Sep 17 00:00:00 2001 From: someone Date: Sat, 11 Apr 2026 18:13:57 +0800 Subject: [PATCH 4/5] Revert "Test" This reverts commit bed1ff65afc50546b788691d65e50bfcb12e18b3. --- source/source_pw/module_pwdft/op_pw_vel.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/source/source_pw/module_pwdft/op_pw_vel.cpp b/source/source_pw/module_pwdft/op_pw_vel.cpp index c5a5859a515..925473b3427 100644 --- a/source/source_pw/module_pwdft/op_pw_vel.cpp +++ b/source/source_pw/module_pwdft/op_pw_vel.cpp @@ -123,11 +123,7 @@ void Velocity::act(const psi::Psi, Device>* tmpsi_in += max_npw; } } - if (this->ppcell->nkb <= 0 || !this->nonlocal) - { - ModuleBase::timer::tick("Operator", "Velocity"); - return; - } + // --------------------------------------------- // meta-GGA velocity correction: i[V_tau, r] // V_tau implemented in ABACUS as -∇·(v_tau ∇), @@ -192,8 +188,7 @@ void Velocity::act(const psi::Psi, Device>* } } } - ModuleBase::timer::tick("Operator", "Velocity"); - return; + // --------------------------------------------- // i[V_NL, r] = (\nabla_q+\nabla_q')V_{NL}(q,q') // |\beta><\beta|\psi> From 8c57ab0a979eb915b39a0cf8bf9a91f3a63993cd Mon Sep 17 00:00:00 2001 From: someone Date: Sat, 11 Apr 2026 23:27:17 +0800 Subject: [PATCH 5/5] Fix times 2 --- source/source_pw/module_pwdft/op_pw_vel.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/source_pw/module_pwdft/op_pw_vel.cpp b/source/source_pw/module_pwdft/op_pw_vel.cpp index 925473b3427..537eb3d8735 100644 --- a/source/source_pw/module_pwdft/op_pw_vel.cpp +++ b/source/source_pw/module_pwdft/op_pw_vel.cpp @@ -126,15 +126,18 @@ void Velocity::act(const psi::Psi, Device>* // --------------------------------------------- // meta-GGA velocity correction: i[V_tau, r] - // V_tau implemented in ABACUS as -∇·(v_tau ∇), - // so [V_tau, r]_j = -[ ∂_j(v_tau ψ) + v_tau ∂_j ψ ]. + // Libxc returns d(exc)/d(tau) with tau = |∇ψ|^2 / 2, while the PW path + // applies the operator through the unscaled |∇ψ|^2 channel. Restrict the + // fix to the velocity path by adding the missing 1/2 here: + // V_tau = -(1/2) ∇·(v_tau ∇), so + // [V_tau, r]_j = -(1/2) [ ∂_j(v_tau ψ) + v_tau ∂_j ψ ]. // The contribution to velocity is -i times the bracket. // --------------------------------------------- if (this->vtau_ != nullptr && this->vtau_col_ > 0 && XC_Functional::get_func_type() == 3) { const int current_spin = (this->vtau_row_ > 1 && psi_in->get_npol() == 2) ? this->isk[this->ik] : 0; const Real* vtau_spin = this->vtau_ + current_spin * this->vtau_col_; - Complex minus_i(0.0, -1.0); + Complex minus_half_i(0.0, -0.5); for (int ib = 0; ib < n_npwx; ++ib) { const Complex* bandpsi = psi0 + ib * max_npw; @@ -172,14 +175,14 @@ void Velocity::act(const psi::Psi, Device>* ModuleBase::vector_mul_vector_op()(this->vtau_col_, this->porter1_, this->porter1_, vtau_spin); this->wfcpw->real_to_recip(this->ctx, this->porter1_, this->porter1_, this->ik); - // sum and apply -i + // sum and apply -(i/2) ModuleBase::vector_add_vector_op()(npw, this->porter1_, this->porter1_, static_cast(1.0), this->porter2_, static_cast(1.0)); - ModuleBase::scal_op()(npw, &minus_i, this->porter1_, 1); + ModuleBase::scal_op()(npw, &minus_half_i, this->porter1_, 1); // accumulate to vpsi (component id, band ib) Complex* vpsi_slice = vpsi + id * n_npwx * max_npw + ib * max_npw;