Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions source/source_esolver/esolver_ks_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,7 @@ void ESolver_KS_PW<T, Device>::iter_finish(UnitCell& ucell, const int istep, int
if (PARAM.inp.exx_thr_type == "energy")
{
dexx = exx_helper.cal_exx_energy(this->stp.psi_t);
}
exx_helper.set_psi(this->stp.psi_t);
if (PARAM.inp.exx_thr_type == "energy")
{
exx_helper.set_psi(this->stp.psi_t);
dexx -= exx_helper.cal_exx_energy(this->stp.psi_t);
// std::cout << "dexx = " << dexx << std::endl;
}
Expand All @@ -319,6 +316,10 @@ void ESolver_KS_PW<T, Device>::iter_finish(UnitCell& ucell, const int istep, int
conv_esolver = exx_helper.exx_after_converge(iter, conv_ene);
if (!conv_esolver)
{
if (PARAM.inp.exx_thr_type != "energy")
{
exx_helper.set_psi(this->stp.psi_t);
}
auto duration = std::chrono::high_resolution_clock::now() - start;
std::cout << " Setting Psi for EXX PW Inner Loop took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(duration).count() / 1000.0 << "s"
Expand Down
21 changes: 20 additions & 1 deletion source/source_pw/module_pwdft/elecond.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "source_base/kernels/math_kernel_op.h"
#include "source_base/parallel_device.h"
#include "source_estate/occupy.h"
#include "source_estate/module_pot/potential_new.h"
#include "source_base/module_device/types.h"
#include "source_io/binstream.h"
#include "source_io/module_parameter/parameter.h"

Expand Down Expand Up @@ -84,7 +86,24 @@ void EleCond<FPTYPE, Device>::KG(const int& smear_type,
std::vector<double> ct12(nt, 0);
std::vector<double> ct22(nt, 0);

hamilt::Velocity<FPTYPE, Device> velop(this->p_wfcpw, this->p_kv->isk.data(), this->p_ppcell, this->p_ucell, nonlocal);
using Real = typename GetTypeReal<FPTYPE>::type;
const Real* vtau_ptr = (this->p_elec != nullptr && this->p_elec->pot != nullptr)
? this->p_elec->pot->template get_vofk_smooth_data<Real>()
: nullptr;
const int vtau_col = (this->p_elec != nullptr && this->p_elec->pot != nullptr)
? this->p_elec->pot->get_vofk_smooth().nc
: 0;
const int vtau_row = (this->p_elec != nullptr && this->p_elec->pot != nullptr)
? this->p_elec->pot->get_vofk_smooth().nr
: 0;
hamilt::Velocity<FPTYPE, Device> velop(this->p_wfcpw,
this->p_kv->isk.data(),
this->p_ppcell,
this->p_ucell,
nonlocal,
vtau_ptr,
vtau_col,
vtau_row);
double decut = (wcut + fwhmin) / ModuleBase::Ry_to_eV;
std::cout << "Recommended dt: " << 0.25 * M_PI / decut << " a.u." << std::endl;
for (int ik = 0; ik < nk; ++ik)
Expand Down
86 changes: 84 additions & 2 deletions source/source_pw/module_pwdft/op_pw_vel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "source_base/kernels/math_kernel_op.h"
#include "source_base/parallel_reduce.h"
#include "source_base/timer.h"
#include "source_hamilt/module_xc/xc_functional.h"
#include "source_pw/module_pwdft/kernels/meta_op.h"
namespace hamilt
{

Expand All @@ -11,7 +13,10 @@ Velocity<FPTYPE, Device>::Velocity(const ModulePW::PW_Basis_K* wfcpw_in,
const int* isk_in,
pseudopot_cell_vnl* ppcell_in,
const UnitCell* ucell_in,
const bool nonlocal_in)
const bool nonlocal_in,
const typename GetTypeReal<FPTYPE>::type* vtau_in,
const int vtau_col_in,
const int vtau_row_in)
{
if (wfcpw_in == nullptr || isk_in == nullptr || ppcell_in == nullptr || ucell_in == nullptr)
{
Expand All @@ -23,10 +28,16 @@ Velocity<FPTYPE, Device>::Velocity(const ModulePW::PW_Basis_K* wfcpw_in,
this->ucell = ucell_in;
this->nonlocal = nonlocal_in;
this->tpiba = ucell_in->tpiba;
this->vtau_ = vtau_in;
this->vtau_col_ = vtau_col_in;
this->vtau_row_ = vtau_row_in;
if (this->nonlocal)
{
this->ppcell->initgradq_vnl(*this->ucell);
}
// workspace for meta-GGA correction (size follows wfcpw grids)
resmem_complex_op()(porter1_, this->wfcpw->nmaxgr);
resmem_complex_op()(porter2_, this->wfcpw->nmaxgr);
Comment on lines 34 to +40
}

template <typename FPTYPE, typename Device>
Expand All @@ -37,6 +48,8 @@ Velocity<FPTYPE, Device>::~Velocity()
delmem_var_op()(this->gz_);
delmem_complex_op()(vkb_);
delmem_complex_op()(gradvkb_);
delmem_complex_op()(porter1_);
delmem_complex_op()(porter2_);
}

template <typename FPTYPE, typename Device>
Expand Down Expand Up @@ -93,6 +106,7 @@ void Velocity<FPTYPE, Device>::act(const psi::Psi<std::complex<FPTYPE>, Device>*
const int npw = this->wfcpw->npwk[this->ik];
const int max_npw = this->wfcpw->npwk_max;
const int npol = psi_in->get_npol();
using Real = typename GetTypeReal<FPTYPE>::type;

std::vector<FPTYPE*> gtmp_ptr = {this->gx_, this->gy_, this->gz_};
// -------------
Expand All @@ -110,6 +124,74 @@ void Velocity<FPTYPE, Device>::act(const psi::Psi<std::complex<FPTYPE>, Device>*
}
}

// ---------------------------------------------
// meta-GGA velocity correction: i[V_tau, r]
// Libxc returns d(exc)/d(tau) with tau = |∇ψ|^2 / 2, while the PW path
// applies the operator through the unscaled |∇ψ|^2 channel. Restrict the
// fix to the velocity path by adding the missing 1/2 here:
// V_tau = -(1/2) ∇·(v_tau ∇), so
// [V_tau, r]_j = -(1/2) [ ∂_j(v_tau ψ) + v_tau ∂_j ψ ].
// The contribution to velocity is -i times the bracket.
// ---------------------------------------------
if (this->vtau_ != nullptr && this->vtau_col_ > 0 && XC_Functional::get_func_type() == 3)
{
const int current_spin = (this->vtau_row_ > 1 && psi_in->get_npol() == 2) ? this->isk[this->ik] : 0;
const Real* vtau_spin = this->vtau_ + current_spin * this->vtau_col_;
Complex minus_half_i(0.0, -0.5);
for (int ib = 0; ib < n_npwx; ++ib)
{
const Complex* bandpsi = psi0 + ib * max_npw;
for (int id = 0; id < 3; ++id)
{
// term1: ∂_id (v_tau * psi)
this->wfcpw->recip_to_real(this->ctx, bandpsi, this->porter1_, this->ik);
ModuleBase::vector_mul_vector_op<Complex, Device>()(this->vtau_col_, this->porter1_, this->porter1_, vtau_spin);
this->wfcpw->real_to_recip(this->ctx, this->porter1_, this->porter1_, this->ik);
meta_pw_op<Real, Device>()(this->ctx,
Comment on lines +146 to +150
this->ik,
id,
npw,
max_npw,
this->tpiba,
this->wfcpw->template get_gcar_data<Real>(),
this->wfcpw->template get_kvec_c_data<Real>(),
this->porter1_,
this->porter2_,
false);

// term2: v_tau * ∂_id psi (reuse porter1_)
meta_pw_op<Real, Device>()(this->ctx,
this->ik,
id,
npw,
max_npw,
this->tpiba,
this->wfcpw->template get_gcar_data<Real>(),
this->wfcpw->template get_kvec_c_data<Real>(),
bandpsi,
this->porter1_,
false);
this->wfcpw->recip_to_real(this->ctx, this->porter1_, this->porter1_, this->ik);
ModuleBase::vector_mul_vector_op<Complex, Device>()(this->vtau_col_, this->porter1_, this->porter1_, vtau_spin);
this->wfcpw->real_to_recip(this->ctx, this->porter1_, this->porter1_, this->ik);

// sum and apply -(i/2)
ModuleBase::vector_add_vector_op<Complex, Device>()(npw,
this->porter1_,
this->porter1_,
static_cast<Real>(1.0),
this->porter2_,
static_cast<Real>(1.0));
ModuleBase::scal_op<Real, Device>()(npw, &minus_half_i, this->porter1_, 1);

// accumulate to vpsi (component id, band ib)
Complex* vpsi_slice = vpsi + id * n_npwx * max_npw + ib * max_npw;
Complex one = 1.0;
ModuleBase::axpy_op<Complex, Device>()(npw, &one, this->porter1_, 1, vpsi_slice, 1);
}
}
}

// ---------------------------------------------
// i[V_NL, r] = (\nabla_q+\nabla_q')V_{NL}(q,q')
// |\beta><\beta|\psi>
Expand Down Expand Up @@ -334,4 +416,4 @@ template class Velocity<double, base_device::DEVICE_GPU>;
template class Velocity<float, base_device::DEVICE_GPU>;
#endif

} // namespace hamilt
} // namespace hamilt
16 changes: 13 additions & 3 deletions source/source_pw/module_pwdft/op_pw_vel.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define VELOCITY_PW_H
#include "op_pw.h"
#include "source_cell/unitcell.h"
#include "source_base/module_device/types.h"
#include "source_pw/module_pwdft/vnl_pw.h"
#include "source_basis/module_pw/pw_basis_k.h"
namespace hamilt
Expand All @@ -17,7 +18,10 @@ class Velocity
const int* isk_in,
pseudopot_cell_vnl* ppcell_in,
const UnitCell* ucell_in,
const bool nonlocal_in = true
const bool nonlocal_in = true,
const typename GetTypeReal<FPTYPE>::type* vtau_in = nullptr,
const int vtau_col_in = 0,
const int vtau_row_in = 0
);

~Velocity();
Expand Down Expand Up @@ -54,7 +58,13 @@ class Velocity
int ik=0;

double tpiba=0.0;

const typename GetTypeReal<FPTYPE>::type* vtau_ = nullptr; ///< [CPU] meta-GGA vtau on real grid (nspin x nrxx_smooth)
int vtau_col_ = 0; ///< number of grid points per spin for vtau
int vtau_row_ = 0; ///< number of spin channels stored in vtau_
std::complex<FPTYPE>* porter1_ = nullptr; ///< workspace on real grid
std::complex<FPTYPE>* porter2_ = nullptr; ///< workspace on real grid / recip grid
base_device::DEVICE_CPU* ctx = {};

private:
FPTYPE* gx_ = nullptr; ///<[Device, npwx] x component of G+K
FPTYPE* gy_ = nullptr; ///<[Device, npwx] y component of G+K
Expand All @@ -76,4 +86,4 @@ class Velocity
using syncmem_complex_h2d_op = base_device::memory::synchronize_memory_op<std::complex<FPTYPE>, Device, base_device::DEVICE_CPU>;
};
}
#endif
#endif
36 changes: 33 additions & 3 deletions source/source_pw/module_stodft/sto_elecond.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "source_base/parallel_device.h"
#include "source_base/timer.h"
#include "source_base/vector3.h"
#include "source_estate/module_pot/potential_new.h"
#include "source_base/module_device/types.h"
#include "source_io/module_parameter/parameter.h"
#include "sto_tool.h"

Expand Down Expand Up @@ -614,8 +616,37 @@ void Sto_EleCond<FPTYPE, Device>::sKG(const int& smear_type,

// ik loop
ModuleBase::timer::tick("Sto_EleCond", "kloop");
hamilt::Velocity<FPTYPE, Device> velop(this->p_wfcpw, this->p_kv->isk.data(), this->p_ppcell, this->p_ucell, nonlocal);
hamilt::Velocity<lowTYPE, Device> low_velop(this->p_wfcpw, this->p_kv->isk.data(), this->p_ppcell, this->p_ucell, nonlocal);
using Real = typename GetTypeReal<FPTYPE>::type;
using LowReal = typename GetTypeReal<lowTYPE>::type;
const Real* vtau_ptr = (this->p_elec != nullptr && this->p_elec->pot != nullptr)
? this->p_elec->pot->template get_vofk_smooth_data<Real>()
: nullptr;
const LowReal* vtau_ptr_low = (this->p_elec != nullptr && this->p_elec->pot != nullptr)
? this->p_elec->pot->template get_vofk_smooth_data<LowReal>()
: nullptr;
const int vtau_col = (this->p_elec != nullptr && this->p_elec->pot != nullptr)
? this->p_elec->pot->get_vofk_smooth().nc
: 0;
const int vtau_row = (this->p_elec != nullptr && this->p_elec->pot != nullptr)
? this->p_elec->pot->get_vofk_smooth().nr
: 0;

hamilt::Velocity<FPTYPE, Device> velop(this->p_wfcpw,
this->p_kv->isk.data(),
this->p_ppcell,
this->p_ucell,
nonlocal,
vtau_ptr,
vtau_col,
vtau_row);
hamilt::Velocity<lowTYPE, Device> low_velop(this->p_wfcpw,
this->p_kv->isk.data(),
this->p_ppcell,
this->p_ucell,
nonlocal,
vtau_ptr_low,
vtau_col,
vtau_row);
for (int ik = 0; ik < nk; ++ik)
{
velop.init(ik);
Expand Down Expand Up @@ -1078,4 +1109,3 @@ template class Sto_EleCond<double, base_device::DEVICE_CPU>;
#if ((defined __CUDA) || (defined __ROCM))
template class Sto_EleCond<double, base_device::DEVICE_GPU>;
#endif