Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions source/Makefile.Objects
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ OBJS_HCONTAINER=base_matrix.o\
transfer.o\

OBJS_HSOLVER=diago_cg.o\
diago_cg_mixed.o\
diago_david.o\
diago_dav_subspace.o\
diago_bpcg.o\
Expand Down
1 change: 1 addition & 0 deletions source/source_hsolver/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
list(APPEND objects
diag_const_nums.cpp
diago_cg.cpp
diago_cg_mixed.cpp
diago_david.cpp
diago_dav_subspace.cpp
diago_bpcg.cpp
Expand Down
285 changes: 285 additions & 0 deletions source/source_hsolver/diago_cg_mixed.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
#include <source_hsolver/diago_cg_mixed.h>
#include <ATen/core/tensor_map.h>
#include <ATen/core/tensor_utils.h>
#include <ATen/kernels/memory.h>
#include <source_base/constants.h>
#include <source_base/memory.h>
#include <source_base/parallel_reduce.h>
#include <source_base/timer.h>
#include <source_base/tool_title.h>
#include <source_base/global_function.h>

using namespace hsolver;

template <typename T, typename Device>
DiagoCGMixed<T, Device>::DiagoCGMixed(const std::string& basis_type, const std::string& calculation)
{
basis_type_ = basis_type; calculation_ = calculation;
}

template <typename T, typename Device>
DiagoCGMixed<T, Device>::DiagoCGMixed(const std::string& basis_type, const std::string& calculation,
const bool& need_subspace, const Func& subspace_func,
const Real& pw_diag_thr, const int& pw_diag_nmax, const int& nproc_in_pool)
{
basis_type_ = basis_type; calculation_ = calculation;
need_subspace_ = need_subspace; subspace_func_ = subspace_func;
pw_diag_thr_ = pw_diag_thr; pw_diag_nmax_ = pw_diag_nmax; nproc_in_pool_ = nproc_in_pool;
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::convert_d2f(const ct::Tensor& d_src, ct::Tensor& f_dst)
{
const int n = d_src.NumElements();
const T* d = d_src.data<T>();
T_float* f = f_dst.data<T_float>();
for (int i = 0; i < n; i++) f[i] = static_cast<T_float>(d[i]);
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::convert_f2d(const ct::Tensor& f_src, ct::Tensor& d_dst)
{
const int n = f_src.NumElements();
const T_float* f = f_src.data<T_float>();
T* d = d_dst.data<T>();
for (int i = 0; i < n; i++) d[i] = static_cast<T>(f[i]);
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::diag_mock(const ct::Tensor& prec_in, ct::Tensor& psi,
ct::Tensor& eigen, const std::vector<double>& ethr_band)
{
ModuleBase::TITLE("DiagoCGMixed", "diag_once");
ModuleBase::timer::tick("DiagoCGMixed", "diag_once");

notconv_ = 0;
n_band_ = psi.shape().dim_size(0);
n_basis_ = psi.shape().dim_size(1);
int avg = 0;

auto dt = ct::DataTypeToEnum<T>::value;
auto dtf = ct::DataTypeToEnum<T_float>::value;
auto dev = ct::DeviceTypeToEnum<ct_Device>::value;
auto dtr = ct::DataTypeToEnum<Real>::value;
auto dtfr = ct::DataTypeToEnum<Real_float>::value;

auto phi_m = ct::Tensor(dt, dev, {n_basis_}), hphi = ct::Tensor(dt, dev, {n_basis_});
auto sphi = ct::Tensor(dt, dev, {n_basis_}), pphi = ct::Tensor(dt, dev, {n_basis_});
auto cg = ct::Tensor(dt, dev, {n_basis_}), scg = ct::Tensor(dt, dev, {n_basis_});
auto grad = ct::Tensor(dt, dev, {n_basis_}), g0 = ct::Tensor(dt, dev, {n_basis_});
auto lagrange = ct::Tensor(dt, dev, {n_band_});

auto phi_m_f = ct::Tensor(dtf, dev, {n_basis_}), hphi_f = ct::Tensor(dtf, dev, {n_basis_});
auto sphi_f = ct::Tensor(dtf, dev, {n_basis_});

auto prec = prec_in;
if (prec.NumElements() == 0) {
prec = ct::Tensor(ct::DataTypeToEnum<Real>::value, ct::DeviceTypeToEnum<ct_Device>::value, {n_basis_});
prec.set_value(static_cast<Real>(1.0));
}
auto prec_f = ct::Tensor(ct::DataTypeToEnum<Real_float>::value, ct::DeviceTypeToEnum<ct_Device>::value, {n_basis_});
{
auto* p = prec.data<Real>();
auto* q = prec_f.data<Real_float>();
for (int i = 0; i < n_basis_; i++) q[i] = static_cast<Real_float>(p[i]);
}

ModuleBase::Memory::record("DiagoCGMixed", n_basis_ * 12);
eigen.zero();
auto eig = eigen.accessor<Real, 1>();

for (int m = 0; m < n_band_; m++)
{
phi_m.sync(psi[m]);
spsi_func_(phi_m, sphi);
schmit_orth(m, psi, sphi, phi_m);

convert_d2f(phi_m, phi_m_f);
hpsi_func_(phi_m_f, hphi_f);
spsi_func_(phi_m_f, sphi_f);
convert_f2d(hphi_f, hphi);
convert_f2d(sphi_f, sphi);

eig[m] = ModuleBase::dot_real_op<T, Device>()(n_basis_, phi_m.data<T>(), hphi.data<T>());

int iter = 0;
Real gg_last = 0, cg_norm = 0, theta = 0;
bool converged = false;

do {
calc_grad(prec_f, grad, hphi, sphi, pphi);
orth_grad(psi, m, grad, scg, lagrange);
calc_gamma_cg(iter, cg_norm, theta, prec_f, scg, grad, phi_m, gg_last, g0, cg);

{
auto cg_f = ct::Tensor(dtf, dev, {n_basis_}), pphi_f = ct::Tensor(dtf, dev, {n_basis_}), scg_f = ct::Tensor(dtf, dev, {n_basis_});
convert_d2f(cg, cg_f);
hpsi_func_(cg_f, pphi_f);
spsi_func_(cg_f, scg_f);
convert_f2d(pphi_f, pphi);
convert_f2d(scg_f, scg);
}

converged = update_psi(pphi, cg, scg, ethr_band[m], cg_norm, theta, eig[m], phi_m, sphi, hphi);
} while (!converged && ++iter < pw_diag_nmax_);

psi[m].sync(phi_m);
if (!converged) ++notconv_;
avg += static_cast<Real>(iter) + 1;

if (m > 0 && eig[m] - eig[m - 1] < -2.0 * pw_diag_thr_) {
int ii = m - 2;
while (ii >= 0 && eig[m] - eig[ii] <= 2.0 * pw_diag_thr_) ii--;
ii++;
Real e0 = eig[m]; pphi.sync(psi[m]);
for (int jj = m; jj > ii; jj--) { eig[jj] = eig[jj - 1]; psi[jj].sync(psi[jj - 1]); }
eig[ii] = e0; psi[ii].sync(pphi);
}
}
avg /= n_band_; avg_iter_ += avg;
ModuleBase::timer::tick("DiagoCGMixed", "diag_once");
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::calc_grad(const ct::Tensor& prec_f, ct::Tensor& grad,
ct::Tensor& hphi, ct::Tensor& sphi, ct::Tensor& pphi)
{
auto dtf = ct::DataTypeToEnum<T_float>::value;
auto dev = ct::DeviceTypeToEnum<ct_Device>::value;
auto hphi_f = ct::Tensor(dtf, dev, {n_basis_}), sphi_f = ct::Tensor(dtf, dev, {n_basis_});
auto grad_f = ct::Tensor(dtf, dev, {n_basis_}), pphi_f = ct::Tensor(dtf, dev, {n_basis_});
convert_d2f(hphi, hphi_f);
convert_d2f(sphi, sphi_f);

ModuleBase::vector_div_vector_op<T_float, Device>()(n_basis_, grad_f.data<T_float>(), hphi_f.data<T_float>(), prec_f.data<Real_float>());
ModuleBase::vector_div_vector_op<T_float, Device>()(n_basis_, pphi_f.data<T_float>(), sphi_f.data<T_float>(), prec_f.data<Real_float>());

convert_f2d(grad_f, grad);
convert_f2d(pphi_f, pphi);

const Real eh = ModuleBase::dot_real_op<T, Device>()(n_basis_, sphi.data<T>(), grad.data<T>());
const Real es = ModuleBase::dot_real_op<T, Device>()(n_basis_, sphi.data<T>(), pphi.data<T>());
ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, grad.data<T>(), grad.data<T>(), 1.0, pphi.data<T>(), -(eh / es));
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::orth_grad(const ct::Tensor& psi, const int& m,
ct::Tensor& grad, ct::Tensor& scg, ct::Tensor& lagrange)
{
const T one(1.0), zero(0.0), neg_one(-1.0);
spsi_func_(grad, scg);

ModuleBase::gemv_op<T, Device>()('C', n_basis_, m, &one, psi.data<T>(), n_basis_, scg.data<T>(), 1, &zero, lagrange.data<T>(), 1);
Parallel_Reduce::reduce_pool(lagrange.data<T>(), m);

ModuleBase::gemv_op<T, Device>()('N', n_basis_, m, &neg_one, psi.data<T>(), n_basis_, lagrange.data<T>(), 1, &one, grad.data<T>(), 1);
ModuleBase::gemv_op<T, Device>()('N', n_basis_, m, &neg_one, psi.data<T>(), n_basis_, lagrange.data<T>(), 1, &one, scg.data<T>(), 1);
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::calc_gamma_cg(const int& iter, const Real& cg_norm, const Real& theta,
const ct::Tensor& prec_f, const ct::Tensor& scg,
const ct::Tensor& grad, const ct::Tensor& phi_m,
Real& gg_last, ct::Tensor& g0, ct::Tensor& cg)
{
Real gg_inter;
if (iter > 0) gg_inter = ModuleBase::dot_real_op<T, Device>()(n_basis_, grad.data<T>(), g0.data<T>());

auto dtf = ct::DataTypeToEnum<T_float>::value;
auto dev = ct::DeviceTypeToEnum<ct_Device>::value;
auto scg_f = ct::Tensor(dtf, dev, {n_basis_}), g0_f = ct::Tensor(dtf, dev, {n_basis_});
convert_d2f(scg, scg_f);
ModuleBase::vector_mul_vector_op<T_float, Device>()(n_basis_, g0_f.data<T_float>(), scg_f.data<T_float>(), prec_f.data<Real_float>());
convert_f2d(g0_f, g0);

const Real gg_now = ModuleBase::dot_real_op<T, Device>()(n_basis_, grad.data<T>(), g0.data<T>());

if (iter == 0) {
gg_last = gg_now;
cg.sync(grad);
} else {
const Real gamma = (gg_now - gg_inter) / gg_last;
gg_last = gg_now;
ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, cg.data<T>(), cg.data<T>(), gamma, grad.data<T>(), 1.0);
T znorma = static_cast<T>(-gamma * cg_norm * sin(theta));
ModuleBase::axpy_op<T, Device>()(n_basis_, &znorma, phi_m.data<T>(), 1, cg.data<T>(), 1);
}
}

template <typename T, typename Device>
bool DiagoCGMixed<T, Device>::update_psi(const ct::Tensor& pphi, const ct::Tensor& cg, const ct::Tensor& scg,
const double& ethreshold, Real& cg_norm, Real& theta, Real& eigen,
ct::Tensor& phi_m, ct::Tensor& sphi, ct::Tensor& hphi)
{
cg_norm = sqrt(ModuleBase::dot_real_op<T, Device>()(n_basis_, cg.data<T>(), scg.data<T>()));
if (cg_norm < 1e-10) return true;

const Real a0 = ModuleBase::dot_real_op<T, Device>()(n_basis_, phi_m.data<T>(), pphi.data<T>()) * 2.0 / cg_norm;
const Real b0 = ModuleBase::dot_real_op<T, Device>()(n_basis_, cg.data<T>(), pphi.data<T>()) / (cg_norm * cg_norm);
const Real e0 = eigen;
theta = atan(a0 / (e0 - b0)) / 2.0;
const Real new_e = (e0 - b0) * cos(2.0 * theta) + a0 * sin(2.0 * theta);
const Real e1 = (e0 + b0 + new_e) / 2.0, e2 = (e0 + b0 - new_e) / 2.0;
if (e1 > e2) theta += ModuleBase::PI_HALF;
eigen = std::min(e1, e2);

const Real cost = cos(theta), sint_norm = sin(theta) / cg_norm;
ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, phi_m.data<T>(), phi_m.data<T>(), cost, cg.data<T>(), sint_norm);

if (std::abs(eigen - e0) < ethreshold) return true;

ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, sphi.data<T>(), sphi.data<T>(), cost, scg.data<T>(), sint_norm);
ModuleBase::vector_add_vector_op<T, Device>()(n_basis_, hphi.data<T>(), hphi.data<T>(), cost, pphi.data<T>(), sint_norm);
return false;
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::schmit_orth(const int& m, const ct::Tensor& psi,
const ct::Tensor& sphi, ct::Tensor& phi_m)
{
const T one(1.0), zero(0.0), neg_one(-1.0);
ct::Tensor lagrange_so(ct::DataTypeToEnum<T>::value, ct::DeviceTypeToEnum<ct_Device>::value, {m + 1});

ModuleBase::gemv_op<T, Device>()('C', n_basis_, m + 1, &one, psi.data<T>(), n_basis_, sphi.data<T>(), 1, &zero, lagrange_so.data<T>(), 1);
Parallel_Reduce::reduce_pool(lagrange_so.data<T>(), m + 1);
ModuleBase::gemv_op<T, Device>()('N', n_basis_, m, &neg_one, psi.data<T>(), n_basis_, lagrange_so.data<T>(), 1, &one, phi_m.data<T>(), 1);

auto psi_norm = ct::extract<Real>(lagrange_so[m])
- ModuleBase::dot_real_op<T, Device>()(m, lagrange_so.data<T>(), lagrange_so.data<T>(), false);
ModuleBase::vector_div_constant_op<T, Device>()(n_basis_, phi_m.data<T>(), phi_m.data<T>(), sqrt(psi_norm));
}

template <typename T, typename Device>
bool DiagoCGMixed<T, Device>::test_exit_cond(const int& ntry, const int& notconv) const
{
return ntry <= 5 && (calculation_ != "nscf" ? notconv > 5 : notconv > 0);
}

template <typename T, typename Device>
void DiagoCGMixed<T, Device>::diag(const Func& hpsi_func, const Func& spsi_func,
ct::Tensor& psi, ct::Tensor& eigen,
const std::vector<double>& ethr_band, const ct::Tensor& prec)
{
int ntry = 0; notconv_ = 0;
hpsi_func_ = hpsi_func; spsi_func_ = spsi_func;
ct::Tensor psi_temp = psi.slice({0, 0}, {int(psi.shape().dim_size(0)), int(prec.shape().dim_size(0))});
do {
if (need_subspace_ || ntry > 0) {
ct::TensorMap psi_map = ct::TensorMap(psi.data(), psi_temp);
subspace_func_(psi_temp, psi_map);
psi_temp.sync(psi_map);
}
++ntry; avg_iter_ += 1.0;
diag_mock(prec, psi_temp, eigen, ethr_band);
} while (test_exit_cond(ntry, notconv_));
psi.zero(); psi.sync(psi_temp);
}

namespace hsolver {
template class DiagoCGMixed<std::complex<double>, base_device::DEVICE_CPU>;
template class DiagoCGMixed<std::complex<float>, base_device::DEVICE_CPU>;
#if ((defined __CUDA) || (defined __ROCM))
template class DiagoCGMixed<std::complex<double>, base_device::DEVICE_GPU>;
template class DiagoCGMixed<std::complex<float>, base_device::DEVICE_GPU>;
#endif
} // namespace hsolver
69 changes: 69 additions & 0 deletions source/source_hsolver/diago_cg_mixed.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#ifndef SOURCE_HSOLVER_DIAGO_CG_MIXED_H_
#define SOURCE_HSOLVER_DIAGO_CG_MIXED_H_

#include <functional>
#include <source_base/macros.h>
#include <source_base/kernels/math_kernel_op.h>
#include <ATen/core/tensor.h>
#include <ATen/core/tensor_types.h>

namespace hsolver {

template <typename T> struct GetFloatType { using type = T; };
template <> struct GetFloatType<std::complex<double>> { using type = std::complex<float>; };
template <> struct GetFloatType<double> { using type = float; };

template <typename T> struct GetFloatRealType { using type = typename GetTypeReal<T>::type; };
template <> struct GetFloatRealType<std::complex<double>> { using type = float; };
template <> struct GetFloatRealType<double> { using type = float; };

template <typename T, typename Device = base_device::DEVICE_CPU>
class DiagoCGMixed final
{
using Real = typename GetTypeReal<T>::type;
using ct_Device = typename ct::PsiToContainer<Device>::type;
using T_float = typename GetFloatType<T>::type;
using Real_float = typename GetFloatRealType<T>::type;

public:
using Func = std::function<void(const ct::Tensor&, ct::Tensor&)>;

DiagoCGMixed(const std::string& basis_type, const std::string& calculation);
DiagoCGMixed(const std::string& basis_type, const std::string& calculation,
const bool& need_subspace, const Func& subspace_func,
const Real& pw_diag_thr, const int& pw_diag_nmax, const int& nproc_in_pool);
~DiagoCGMixed() = default;

void diag(const Func& hpsi_func, const Func& spsi_func,
ct::Tensor& psi, ct::Tensor& eigen,
const std::vector<double>& ethr_band, const ct::Tensor& prec = {});

private:
int notconv_ = 0, n_band_ = 0, n_basis_ = 0, avg_iter_ = 0, pw_diag_nmax_ = 0, nproc_in_pool_ = 0;
Real pw_diag_thr_ = 1e-5;
std::string basis_type_, calculation_;
bool need_subspace_ = false;
Func hpsi_func_, spsi_func_, subspace_func_;

void convert_d2f(const ct::Tensor& d, ct::Tensor& f);
void convert_f2d(const ct::Tensor& f, ct::Tensor& d);

void calc_grad(const ct::Tensor& prec_f, ct::Tensor& grad, ct::Tensor& hphi,
ct::Tensor& sphi, ct::Tensor& pphi);
void orth_grad(const ct::Tensor& psi, const int& m, ct::Tensor& grad,
ct::Tensor& scg, ct::Tensor& lagrange);
void calc_gamma_cg(const int& iter, const Real& cg_norm, const Real& theta,
const ct::Tensor& prec_f, const ct::Tensor& scg,
const ct::Tensor& grad, const ct::Tensor& phi_m,
Real& gg_last, ct::Tensor& g0, ct::Tensor& cg);
bool update_psi(const ct::Tensor& pphi, const ct::Tensor& cg, const ct::Tensor& scg,
const double& ethreshold, Real& cg_norm, Real& theta, Real& eigen,
ct::Tensor& phi_m, ct::Tensor& sphi, ct::Tensor& hphi);
void schmit_orth(const int& m, const ct::Tensor& psi, const ct::Tensor& sphi, ct::Tensor& phi_m);
void diag_mock(const ct::Tensor& prec, ct::Tensor& psi, ct::Tensor& eigen,
const std::vector<double>& ethr_band);
bool test_exit_cond(const int& ntry, const int& notconv) const;
};

} // namespace hsolver
#endif
Loading