From f8bb7ab8f962051105f6b734da6121be212f11be Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Fri, 30 Jan 2026 13:22:41 +0800 Subject: [PATCH 1/8] fix comupte bug --- source/source_pw/module_pwdft/vl_pw.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/source/source_pw/module_pwdft/vl_pw.cpp b/source/source_pw/module_pwdft/vl_pw.cpp index 672fbd6185..c1a3f4e8d0 100644 --- a/source/source_pw/module_pwdft/vl_pw.cpp +++ b/source/source_pw/module_pwdft/vl_pw.cpp @@ -3,7 +3,7 @@ #include "source_base/libm/libm.h" #include "source_base/math_integral.h" #include "source_base/timer.h" - +#include "source_base/tool_quit.h" pseudopot_cell_vl::pseudopot_cell_vl() { numeric = nullptr; @@ -226,8 +226,14 @@ void pseudopot_cell_vl::vloc_of_g(const int& msh, aux [ir] = aux1 [ir] * ModuleBase::libm::sin(gx * r [ir]) / gx; } ModuleBase::Integral::Simpson_Integral(msh, aux, rab, vloc_1d[ig] ); - // here we add the analytic fourier transform of the erf function - vloc_1d[ig] -= fac * ModuleBase::libm::exp(- gx2 * 0.25)/ gx2; + // here we add the analytic fourier transform of the erf function + // here exp(-gx2*0.25) < 1e-100, skip it to avoid + // float underflow and abundant calculation + if (gx2 > 921) + { + continue; + } + vloc_1d[ig] -= fac * std::exp(- gx2 * 0.25)/ gx2; } // enddo const double d_fpi_omega = ModuleBase::FOUR_PI/ucell.omega;//mohan add 2008-06-04 From d6aaca7f7b624f7d92abb48152010b7bcf4a69f0 Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Fri, 30 Jan 2026 13:29:08 +0800 Subject: [PATCH 2/8] clean format --- source/source_pw/module_pwdft/vl_pw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/source_pw/module_pwdft/vl_pw.cpp b/source/source_pw/module_pwdft/vl_pw.cpp index c1a3f4e8d0..fb949bc20a 100644 --- a/source/source_pw/module_pwdft/vl_pw.cpp +++ b/source/source_pw/module_pwdft/vl_pw.cpp @@ -3,7 +3,7 @@ #include "source_base/libm/libm.h" #include "source_base/math_integral.h" #include "source_base/timer.h" -#include "source_base/tool_quit.h" + pseudopot_cell_vl::pseudopot_cell_vl() { numeric = nullptr; From 64f07a30b56c227b57bd252c2efc2f7426410f8b Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Fri, 30 Jan 2026 13:31:22 +0800 Subject: [PATCH 3/8] add change --- source/source_pw/module_pwdft/vl_pw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/source_pw/module_pwdft/vl_pw.cpp b/source/source_pw/module_pwdft/vl_pw.cpp index fb949bc20a..5ff21a9aaf 100644 --- a/source/source_pw/module_pwdft/vl_pw.cpp +++ b/source/source_pw/module_pwdft/vl_pw.cpp @@ -233,7 +233,7 @@ void pseudopot_cell_vl::vloc_of_g(const int& msh, { continue; } - vloc_1d[ig] -= fac * std::exp(- gx2 * 0.25)/ gx2; + vloc_1d[ig] -= fac * ModuleBase::libm::exp(- gx2 * 0.25)/ gx2; } // enddo const double d_fpi_omega = ModuleBase::FOUR_PI/ucell.omega;//mohan add 2008-06-04 From 9fc712b1e9f84631b9047143cf0d0b5df61e2599 Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Fri, 30 Jan 2026 16:11:01 +0800 Subject: [PATCH 4/8] add exp --- source/source_base/libm/libm.h | 27 ++++++++++++++++++++++++ source/source_pw/module_pwdft/forces.cpp | 17 +++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/source/source_base/libm/libm.h b/source/source_base/libm/libm.h index 13bf65de5c..65996ad43b 100644 --- a/source/source_base/libm/libm.h +++ b/source/source_base/libm/libm.h @@ -49,6 +49,33 @@ template<> inline double cos(double x) { return __cos(x); } template<> inline double sin(double x) { return __sin(x); } template<> inline void sincos(double x, double *s, double *c) { __sincos(x, s, c); } template<> inline std::complex exp(const std::complex &x) { return __cexp(x); } +// A safe exponential function that truncates to zero for very small values +// to avoid underflow and expensive calculations. +// Threshold: exp(-230) approx 1e-100. +inline std::complex truncated_exp(const std::complex &x) +{ + if (x.real() < -230) return std::complex(0.0, 0.0); + return __cexp(x); +} + +inline std::complex truncated_exp(const std::complex &x) +{ + if (x.real() < -230) return std::complex(0.0, 0.0); + return __cexpf(x); +} + +inline double truncated_exp(const double &x) +{ + if (x < -230) return 0.0; + return __exp(x); +} + +inline float truncated_exp(const float &x) +{ + if (x < -230) return 0.0; + return __expf(x); +} + template<> inline float exp(float x) { return __expf(x); } template<> inline float cos(float x) { return __cosf(x); } diff --git a/source/source_pw/module_pwdft/forces.cpp b/source/source_pw/module_pwdft/forces.cpp index fbcebe6304..5a9604dc41 100644 --- a/source/source_pw/module_pwdft/forces.cpp +++ b/source/source_pw/module_pwdft/forces.cpp @@ -537,8 +537,21 @@ void Forces::cal_force_ew(const UnitCell& ucell, { ModuleBase::WARNING_QUIT("ewald", "Can't find optimal alpha."); } + const double damping_factor = ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha; + // damping_factor corresponds to the exponent (G^2 / 4alpha) + // in the Ewald error estimate. + // It measures how strongly the terms are damped at the plane-wave cutoff. + // Asymptotic behavior: erfc(x) ~ exp(-x^2) for large x. + // Here x^2 = damping_factor. + // If damping_factor > 230.0, exp(-230.0) approx 1e-100, + // meaning the error is negligible. + if (damping_factor > 230.0) + { + upperbound = 0.0; + break; + } upperbound = 2.0 * charge * charge * sqrt(2.0 * alpha / ModuleBase::TWO_PI) - * erfc(sqrt(ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha)); + * erfc(sqrt(damping_factor)); } while (upperbound > 1.0e-6); const int ig0 = rho_basis->ig_gge0; #pragma omp parallel for @@ -548,7 +561,7 @@ void Forces::cal_force_ew(const UnitCell& ucell, { continue; // skip G=0 } - aux[ig] *= ModuleBase::libm::exp(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0) + aux[ig] *= ModuleBase::libm::truncated_exp(std::complex(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0, 0.0)) / (rho_basis->gg[ig] * ucell.tpiba2); } From 25fc4357e8c12a72eb25615183a7e7a064ab2b08 Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Mon, 2 Feb 2026 17:54:26 +0800 Subject: [PATCH 5/8] fix bug --- .../module_pwdft/kernels/force_op.cpp | 24 +++++++++++++- .../module_pwdft/kernels/stress_op.cpp | 32 +++++++++++++++++-- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/source/source_pw/module_pwdft/kernels/force_op.cpp b/source/source_pw/module_pwdft/kernels/force_op.cpp index 029afdbf7c..85ac655148 100644 --- a/source/source_pw/module_pwdft/kernels/force_op.cpp +++ b/source/source_pw/module_pwdft/kernels/force_op.cpp @@ -109,8 +109,30 @@ struct cal_force_nl_op for (int ipol = 0; ipol < 3; ipol++) { + #ifdef __SW + const std::complex& dbecp_val = dbecp[ipol * nbands * nkb + ib * nkb + inkb]; + uint64_t* parts = reinterpret_cast(const_cast*>(&dbecp_val)); + uint64_t exp_real = (parts[0] >> 52) & 0x7FF; + uint64_t exp_imag = (parts[1] >> 52) & 0x7FF; + if ((exp_real <= 923) || (exp_imag <= 923)) { + continue; + } + const std::complex& becp_val = becp[ib * nkb + inkb]; + uint64_t* becp_parts = reinterpret_cast(const_cast*>(&becp_val)); + uint64_t becp_exp_real = (becp_parts[0] >> 52) & 0x7FF; + uint64_t becp_exp_imag = (becp_parts[1] >> 52) & 0x7FF; + if ((becp_exp_real <= 923) || (becp_exp_imag <= 923)) { + continue; + } + uint64_t* lf_parts = reinterpret_cast(const_cast(&local_force[ipol])); + uint64_t lf_exp = (lf_parts[0] >> 52) & 0x7FF; + if (lf_exp <= 923) { + continue; + } + #endif const FPTYPE dbb - = (conj(dbecp[ipol * nbands * nkb + ib * nkb + inkb]) * becp[ib * nkb + inkb]) + = (conj(dbecp[ipol * nbands * nkb + ib * nkb + inkb]) + * becp[ib * nkb + inkb]) .real(); local_force[ipol] -= ps * fac * dbb; // cf[iat*3+ipol] += ps * fac * dbb; diff --git a/source/source_pw/module_pwdft/kernels/stress_op.cpp b/source/source_pw/module_pwdft/kernels/stress_op.cpp index 46fccdeed6..08eb2d0d0b 100644 --- a/source/source_pw/module_pwdft/kernels/stress_op.cpp +++ b/source/source_pw/module_pwdft/kernels/stress_op.cpp @@ -140,8 +140,28 @@ struct cal_stress_nl_op FPTYPE ps = deeq[((spin * deeq_2 + iat + ia) * deeq_3 + ip1) * deeq_4 + ip2] + ps_qq; const int inkb1 = sum + ia * nproj + ip1; const int inkb2 = sum + ia * nproj + ip2; - // out<<"\n ps = "<& dbecp_val = dbecp[ib * nkb + inkb1]; + uint64_t* parts = reinterpret_cast(const_cast*>(&dbecp_val)); + uint64_t exp_real = (parts[0] >> 52) & 0x7FF; + uint64_t exp_imag = (parts[1] >> 52) & 0x7FF; + if ((exp_real <= 923) || (exp_imag <= 923)) { + continue; + } + const std::complex& becp_val = becp[ib * nkb + inkb2]; + uint64_t* becp_parts = reinterpret_cast(const_cast*>(&becp_val)); + uint64_t becp_exp_real = (becp_parts[0] >> 52) & 0x7FF; + uint64_t becp_exp_imag = (becp_parts[1] >> 52) & 0x7FF; + if ((becp_exp_real <= 923) || (becp_exp_imag <= 923)) { + continue; + } + uint64_t* ls_parts = reinterpret_cast(const_cast(&local_stress)); + uint64_t ls_exp = (ls_parts[0] >> 52) & 0x7FF; + if (ls_exp <= 923) { + continue; + } + #endif const FPTYPE dbb = (conj(dbecp[ib * nkb + inkb1]) * becp[ib * nkb + inkb2]).real(); local_stress -= ps * fac * dbb; } @@ -618,7 +638,8 @@ struct cal_stress_drhoc_aux_op { { rhocg1 *= ModuleBase::FOUR_PI / omega / 2.0 / gx_arr[igl]; FPTYPE g2a = (gx_arr[igl]*gx_arr[igl]) / 4.0; - rhocg1 += ModuleBase::FOUR_PI / omega * gx_arr[ngg] * ModuleBase::libm::exp(-g2a) * (g2a + 1) + rhocg1 += ModuleBase::FOUR_PI / omega * gx_arr[ngg] * + ModuleBase::libm::truncated_exp(-g2a) * (g2a + 1) / pow(gx_arr[igl] * gx_arr[igl], 2); drhocg [igl] = rhocg1; } @@ -644,6 +665,13 @@ struct cal_multi_dot_op { #endif for (int i = 0; i < npw; i++) { + #ifdef __SW + const std::complex& psi_i = psi[i]; + uint64_t* parts = reinterpret_cast(const_cast*>(&psi_i)); + uint64_t exp_real = (parts[0] >> 52) & 0x7FF; + uint64_t exp_imag = (parts[1] >> 52) & 0x7FF; + if ((exp_real <= 923) || (exp_imag <= 923)) {continue;} + #endif sum += fac * gk1[i] * gk2[i] * d_kfac[i] * std::norm(psi[i]); } return sum; From 65af1da4b704e607ab0c3143f29d2f07ffb60c2f Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Tue, 3 Feb 2026 13:36:29 +0800 Subject: [PATCH 6/8] fix compute bug --- source/source_pw/module_pwdft/kernels/stress_op.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/source/source_pw/module_pwdft/kernels/stress_op.cpp b/source/source_pw/module_pwdft/kernels/stress_op.cpp index 08eb2d0d0b..a3b555a110 100644 --- a/source/source_pw/module_pwdft/kernels/stress_op.cpp +++ b/source/source_pw/module_pwdft/kernels/stress_op.cpp @@ -156,11 +156,6 @@ struct cal_stress_nl_op if ((becp_exp_real <= 923) || (becp_exp_imag <= 923)) { continue; } - uint64_t* ls_parts = reinterpret_cast(const_cast(&local_stress)); - uint64_t ls_exp = (ls_parts[0] >> 52) & 0x7FF; - if (ls_exp <= 923) { - continue; - } #endif const FPTYPE dbb = (conj(dbecp[ib * nkb + inkb1]) * becp[ib * nkb + inkb2]).real(); local_stress -= ps * fac * dbb; From ae01d71d9f82648bc59874bef70c5cd2dd00c18f Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Fri, 6 Feb 2026 22:51:04 +0800 Subject: [PATCH 7/8] fix avoid compute in libm --- source/source_base/libm/libm.h | 27 ------------------- source/source_pw/module_pwdft/forces.cpp | 7 ++++- .../module_pwdft/kernels/stress_op.cpp | 6 ++++- 3 files changed, 11 insertions(+), 29 deletions(-) diff --git a/source/source_base/libm/libm.h b/source/source_base/libm/libm.h index 65996ad43b..13bf65de5c 100644 --- a/source/source_base/libm/libm.h +++ b/source/source_base/libm/libm.h @@ -49,33 +49,6 @@ template<> inline double cos(double x) { return __cos(x); } template<> inline double sin(double x) { return __sin(x); } template<> inline void sincos(double x, double *s, double *c) { __sincos(x, s, c); } template<> inline std::complex exp(const std::complex &x) { return __cexp(x); } -// A safe exponential function that truncates to zero for very small values -// to avoid underflow and expensive calculations. -// Threshold: exp(-230) approx 1e-100. -inline std::complex truncated_exp(const std::complex &x) -{ - if (x.real() < -230) return std::complex(0.0, 0.0); - return __cexp(x); -} - -inline std::complex truncated_exp(const std::complex &x) -{ - if (x.real() < -230) return std::complex(0.0, 0.0); - return __cexpf(x); -} - -inline double truncated_exp(const double &x) -{ - if (x < -230) return 0.0; - return __exp(x); -} - -inline float truncated_exp(const float &x) -{ - if (x < -230) return 0.0; - return __expf(x); -} - template<> inline float exp(float x) { return __expf(x); } template<> inline float cos(float x) { return __cosf(x); } diff --git a/source/source_pw/module_pwdft/forces.cpp b/source/source_pw/module_pwdft/forces.cpp index b9e6823d8b..8f8b5e7650 100644 --- a/source/source_pw/module_pwdft/forces.cpp +++ b/source/source_pw/module_pwdft/forces.cpp @@ -561,7 +561,12 @@ void Forces::cal_force_ew(const UnitCell& ucell, { continue; // skip G=0 } - aux[ig] *= ModuleBase::libm::truncated_exp(std::complex(-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0, 0.0)) + const double damping_factor = -1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0; + if (damping_factor < -230.0) + { + continue;// if the damping factor is too small, the contribution is negligible + } + aux[ig] *= ModuleBase::libm::exp(std::complex(damping_factor, 0.0)) / (rho_basis->gg[ig] * ucell.tpiba2); } diff --git a/source/source_pw/module_pwdft/kernels/stress_op.cpp b/source/source_pw/module_pwdft/kernels/stress_op.cpp index a3b555a110..7c3489e335 100644 --- a/source/source_pw/module_pwdft/kernels/stress_op.cpp +++ b/source/source_pw/module_pwdft/kernels/stress_op.cpp @@ -633,8 +633,12 @@ struct cal_stress_drhoc_aux_op { { rhocg1 *= ModuleBase::FOUR_PI / omega / 2.0 / gx_arr[igl]; FPTYPE g2a = (gx_arr[igl]*gx_arr[igl]) / 4.0; + if (-g2a < -230.0) + { + continue; + } rhocg1 += ModuleBase::FOUR_PI / omega * gx_arr[ngg] * - ModuleBase::libm::truncated_exp(-g2a) * (g2a + 1) + ModuleBase::libm::exp(-g2a) * (g2a + 1) / pow(gx_arr[igl] * gx_arr[igl], 2); drhocg [igl] = rhocg1; } From 7a739ecf6ced322f773b0b7ede7f2ddf295c7871 Mon Sep 17 00:00:00 2001 From: ubuntu <3158793232@qq.com> Date: Sun, 8 Feb 2026 22:36:12 +0800 Subject: [PATCH 8/8] add truncated func --- source/source_base/truncated_func.h | 116 ++++++++++++++++++ source/source_pw/module_pwdft/forces.cpp | 25 +--- .../module_pwdft/kernels/force_op.cpp | 33 ++--- .../module_pwdft/kernels/stress_op.cpp | 38 ++---- source/source_pw/module_pwdft/vl_pw.cpp | 10 +- 5 files changed, 139 insertions(+), 83 deletions(-) create mode 100644 source/source_base/truncated_func.h diff --git a/source/source_base/truncated_func.h b/source/source_base/truncated_func.h new file mode 100644 index 0000000000..55ce64953a --- /dev/null +++ b/source/source_base/truncated_func.h @@ -0,0 +1,116 @@ +#ifndef MODULE_BASE_TRUNCATED_FUNC_H +#define MODULE_BASE_TRUNCATED_FUNC_H + +#include "source_base/libm/libm.h" +#include +#include +#include + +namespace ModuleBase +{ + +/** + * @brief Truncated exponential function to avoid underflow. + * + * This function returns 0 if the real part of the input is less than -230.0, + * otherwise it calls ModuleBase::libm::exp(x). + * + * @tparam FPTYPE The floating point type (float, double, or complex). + * @param x The input value. + * @return FPTYPE The result of the exponential function. + */ +template +inline FPTYPE truncated_exp(FPTYPE x) +{ + if (std::real(x) < -230.0) + { + return static_cast(0.0); + } + return ModuleBase::libm::exp(x); +} + +/** + * @brief Truncated complementary error function to avoid underflow for large arguments. + * + * This function returns 0 if the real part of the input is greater than 20.0, + * otherwise it calls std::erfc(x). + * + * @tparam FPTYPE The floating point type (float, double, or complex). + * @param x The input value. + * @return FPTYPE The result of the erfc function. + */ +template +inline FPTYPE truncated_erfc(FPTYPE x) +{ + if (std::real(x) > 20.0) + { + return static_cast(0.0); + } + return std::erfc(x); +} + +/** + * @brief Truncated value function to avoid underflow. + * + * This function returns 0 if the absolute value of the input is less than 1.0e-30, + * otherwise it returns the input x. + * + * @tparam FPTYPE The floating point type (float, double, or complex). + * @param x The input value. + * @return FPTYPE The result of the truncation. + */ +/** + * @brief Truncated value function to avoid underflow. + * + * This function modifies the input to 0 if its absolute value is less than 1.0e-30. + * + * @tparam FPTYPE The floating point type (float, double, or complex). + * @param x The input value to be checked and possibly truncated. + */ +template +inline void truncated_underflow(FPTYPE& x) +{ + if (std::abs(x) < 1.0e-30) + { + x = static_cast(0.0); + } +} + +template <> +inline void truncated_underflow(double& x) +{ + const uint64_t u = *reinterpret_cast(&x); + // The exponent bits are 52-62 (11 bits). The bias is 1023. + // 1e-30 corresponds to -100 in base-2 exponent roughly. + // 923 = 1023 - 100. + if (((u >> 52) & 0x7FF) <= 923) + { + x = 0.0; + } +} + +template <> +inline void truncated_underflow(float& x) +{ + const uint32_t u = *reinterpret_cast(&x); + // The exponent bits are 23-30 (8 bits). The bias is 127. + // 1e-30 corresponds to -100 in base-2 exponent roughly. + // 27 = 127 - 100. + if (((u >> 23) & 0xFF) <= 27) + { + x = 0.0f; + } +} + +template +inline void truncated_underflow(std::complex& x) +{ + T* ptr = reinterpret_cast(&x); + truncated_underflow(ptr[0]); + truncated_underflow(ptr[1]); +} + + +} // namespace ModuleBase + +#endif // MODULE_BASE_TRUNCATED_FUNC_H \ No newline at end of file diff --git a/source/source_pw/module_pwdft/forces.cpp b/source/source_pw/module_pwdft/forces.cpp index 8f8b5e7650..a6894c49ca 100644 --- a/source/source_pw/module_pwdft/forces.cpp +++ b/source/source_pw/module_pwdft/forces.cpp @@ -5,6 +5,7 @@ // new #include "source_base/complexmatrix.h" #include "source_base/libm/libm.h" +#include "source_base/truncated_func.h" #include "source_base/math_integral.h" #include "source_base/mathzone.h" #include "source_base/timer.h" @@ -537,21 +538,7 @@ void Forces::cal_force_ew(const UnitCell& ucell, { ModuleBase::WARNING_QUIT("ewald", "Can't find optimal alpha."); } - const double damping_factor = ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha; - // damping_factor corresponds to the exponent (G^2 / 4alpha) - // in the Ewald error estimate. - // It measures how strongly the terms are damped at the plane-wave cutoff. - // Asymptotic behavior: erfc(x) ~ exp(-x^2) for large x. - // Here x^2 = damping_factor. - // If damping_factor > 230.0, exp(-230.0) approx 1e-100, - // meaning the error is negligible. - if (damping_factor > 230.0) - { - upperbound = 0.0; - break; - } - upperbound = 2.0 * charge * charge * sqrt(2.0 * alpha / ModuleBase::TWO_PI) - * erfc(sqrt(damping_factor)); + upperbound = 2.0 * charge * charge * sqrt(2.0 * alpha / ModuleBase::TWO_PI)* ModuleBase::truncated_erfc(sqrt( ucell.tpiba2 * rho_basis->ggecut / 4.0 / alpha)); } while (upperbound > 1.0e-6); const int ig0 = rho_basis->ig_gge0; #pragma omp parallel for @@ -561,12 +548,8 @@ void Forces::cal_force_ew(const UnitCell& ucell, { continue; // skip G=0 } - const double damping_factor = -1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0; - if (damping_factor < -230.0) - { - continue;// if the damping factor is too small, the contribution is negligible - } - aux[ig] *= ModuleBase::libm::exp(std::complex(damping_factor, 0.0)) + aux[ig] *= ModuleBase::truncated_exp + (-1.0 * rho_basis->gg[ig] * ucell.tpiba2 / alpha / 4.0) / (rho_basis->gg[ig] * ucell.tpiba2); } diff --git a/source/source_pw/module_pwdft/kernels/force_op.cpp b/source/source_pw/module_pwdft/kernels/force_op.cpp index 85ac655148..0e0c34ccdd 100644 --- a/source/source_pw/module_pwdft/kernels/force_op.cpp +++ b/source/source_pw/module_pwdft/kernels/force_op.cpp @@ -1,5 +1,7 @@ #include "source_pw/module_pwdft/kernels/force_op.h" +#include "source_base/truncated_func.h" + #ifdef _OPENMP #include #endif @@ -109,31 +111,12 @@ struct cal_force_nl_op for (int ipol = 0; ipol < 3; ipol++) { - #ifdef __SW - const std::complex& dbecp_val = dbecp[ipol * nbands * nkb + ib * nkb + inkb]; - uint64_t* parts = reinterpret_cast(const_cast*>(&dbecp_val)); - uint64_t exp_real = (parts[0] >> 52) & 0x7FF; - uint64_t exp_imag = (parts[1] >> 52) & 0x7FF; - if ((exp_real <= 923) || (exp_imag <= 923)) { - continue; - } - const std::complex& becp_val = becp[ib * nkb + inkb]; - uint64_t* becp_parts = reinterpret_cast(const_cast*>(&becp_val)); - uint64_t becp_exp_real = (becp_parts[0] >> 52) & 0x7FF; - uint64_t becp_exp_imag = (becp_parts[1] >> 52) & 0x7FF; - if ((becp_exp_real <= 923) || (becp_exp_imag <= 923)) { - continue; - } - uint64_t* lf_parts = reinterpret_cast(const_cast(&local_force[ipol])); - uint64_t lf_exp = (lf_parts[0] >> 52) & 0x7FF; - if (lf_exp <= 923) { - continue; - } - #endif - const FPTYPE dbb - = (conj(dbecp[ipol * nbands * nkb + ib * nkb + inkb]) - * becp[ib * nkb + inkb]) - .real(); +#ifdef __SW + ModuleBase::truncated_underflow(dbecp[ipol * nbands * nkb + ib * nkb + inkb]); + ModuleBase::truncated_underflow(becp[ib * nkb + inkb]); + ModuleBase::truncated_underflow(local_force[ipol]); +#endif + const FPTYPE dbb = (conj(dbecp[ipol * nbands * nkb + ib * nkb + inkb]) * becp[ib * nkb + inkb]).real(); local_force[ipol] -= ps * fac * dbb; // cf[iat*3+ipol] += ps * fac * dbb; } diff --git a/source/source_pw/module_pwdft/kernels/stress_op.cpp b/source/source_pw/module_pwdft/kernels/stress_op.cpp index 7c3489e335..6034a52710 100644 --- a/source/source_pw/module_pwdft/kernels/stress_op.cpp +++ b/source/source_pw/module_pwdft/kernels/stress_op.cpp @@ -1,5 +1,6 @@ #include "source_pw/module_pwdft/kernels/stress_op.h" +#include "source_base/truncated_func.h" #include "source_base/constants.h" #include "source_base/libm/libm.h" #include "source_base/math_polyint.h" @@ -140,23 +141,10 @@ struct cal_stress_nl_op FPTYPE ps = deeq[((spin * deeq_2 + iat + ia) * deeq_3 + ip1) * deeq_4 + ip2] + ps_qq; const int inkb1 = sum + ia * nproj + ip1; const int inkb2 = sum + ia * nproj + ip2; - - #ifdef __SW - const std::complex& dbecp_val = dbecp[ib * nkb + inkb1]; - uint64_t* parts = reinterpret_cast(const_cast*>(&dbecp_val)); - uint64_t exp_real = (parts[0] >> 52) & 0x7FF; - uint64_t exp_imag = (parts[1] >> 52) & 0x7FF; - if ((exp_real <= 923) || (exp_imag <= 923)) { - continue; - } - const std::complex& becp_val = becp[ib * nkb + inkb2]; - uint64_t* becp_parts = reinterpret_cast(const_cast*>(&becp_val)); - uint64_t becp_exp_real = (becp_parts[0] >> 52) & 0x7FF; - uint64_t becp_exp_imag = (becp_parts[1] >> 52) & 0x7FF; - if ((becp_exp_real <= 923) || (becp_exp_imag <= 923)) { - continue; - } - #endif +#ifdef __SW + ModuleBase::truncated_underflow(dbecp[ib * nkb + inkb1]); + ModuleBase::truncated_underflow(becp[ib * nkb + inkb2]); +#endif const FPTYPE dbb = (conj(dbecp[ib * nkb + inkb1]) * becp[ib * nkb + inkb2]).real(); local_stress -= ps * fac * dbb; } @@ -633,12 +621,8 @@ struct cal_stress_drhoc_aux_op { { rhocg1 *= ModuleBase::FOUR_PI / omega / 2.0 / gx_arr[igl]; FPTYPE g2a = (gx_arr[igl]*gx_arr[igl]) / 4.0; - if (-g2a < -230.0) - { - continue; - } rhocg1 += ModuleBase::FOUR_PI / omega * gx_arr[ngg] * - ModuleBase::libm::exp(-g2a) * (g2a + 1) + ModuleBase::truncated_exp(-g2a) * (g2a + 1) / pow(gx_arr[igl] * gx_arr[igl], 2); drhocg [igl] = rhocg1; } @@ -664,13 +648,9 @@ struct cal_multi_dot_op { #endif for (int i = 0; i < npw; i++) { - #ifdef __SW - const std::complex& psi_i = psi[i]; - uint64_t* parts = reinterpret_cast(const_cast*>(&psi_i)); - uint64_t exp_real = (parts[0] >> 52) & 0x7FF; - uint64_t exp_imag = (parts[1] >> 52) & 0x7FF; - if ((exp_real <= 923) || (exp_imag <= 923)) {continue;} - #endif +#ifdef __SW + ModuleBase::truncated_underflow(psi[i]); +#endif sum += fac * gk1[i] * gk2[i] * d_kfac[i] * std::norm(psi[i]); } return sum; diff --git a/source/source_pw/module_pwdft/vl_pw.cpp b/source/source_pw/module_pwdft/vl_pw.cpp index 5ff21a9aaf..76ee526fbc 100644 --- a/source/source_pw/module_pwdft/vl_pw.cpp +++ b/source/source_pw/module_pwdft/vl_pw.cpp @@ -1,6 +1,7 @@ #include "vl_pw.h" #include "source_io/module_parameter/parameter.h" #include "source_base/libm/libm.h" +#include "source_base/truncated_func.h" #include "source_base/math_integral.h" #include "source_base/timer.h" @@ -226,14 +227,7 @@ void pseudopot_cell_vl::vloc_of_g(const int& msh, aux [ir] = aux1 [ir] * ModuleBase::libm::sin(gx * r [ir]) / gx; } ModuleBase::Integral::Simpson_Integral(msh, aux, rab, vloc_1d[ig] ); - // here we add the analytic fourier transform of the erf function - // here exp(-gx2*0.25) < 1e-100, skip it to avoid - // float underflow and abundant calculation - if (gx2 > 921) - { - continue; - } - vloc_1d[ig] -= fac * ModuleBase::libm::exp(- gx2 * 0.25)/ gx2; + vloc_1d[ig] -= fac * ModuleBase::truncated_exp(- gx2 * 0.25)/ gx2; } // enddo const double d_fpi_omega = ModuleBase::FOUR_PI/ucell.omega;//mohan add 2008-06-04