From 0989c6ced7bcfa8358f11f9d7b338e3aa6dfc862 Mon Sep 17 00:00:00 2001 From: abacus_fixer Date: Thu, 9 Apr 2026 09:28:19 +0800 Subject: [PATCH 1/3] refactor(device): separate DSP memory operations into memory_op_dsp files This refactoring separates DSP-related memory operations from the main memory_op files: - Created memory_op_dsp.h: Contains DSP-specific struct declarations (resize_memory_op_mt, set_memory_op_mt, delete_memory_op_mt) - Created memory_op_dsp.cpp: Contains DSP-specific implementation and template instantiations - Modified memory_op.h: Added include for memory_op_dsp.h and removed DSP-related code - Modified memory_op.cpp: Removed DSP-related headers and implementation - Updated Makefile.Objects: Added memory_op_dsp.o to OBJS_BASE - Removed unused CMakeLists.txt The refactoring maintains all existing functionality while improving code organization by separating DSP-specific code from general memory operations. Modified files: - source/Makefile.Objects - source/source_base/module_device/memory_op.cpp - source/source_base/module_device/memory_op.h New files: - source/source_base/module_device/memory_op_dsp.cpp - source/source_base/module_device/memory_op_dsp.h Removed files: - source/source_base/module_device/CMakeLists.txt (unused) --- source/Makefile.Objects | 1 + .../source_base/module_device/CMakeLists.txt | 44 --------- .../source_base/module_device/memory_op.cpp | 74 --------------- source/source_base/module_device/memory_op.h | 41 +-------- .../module_device/memory_op_dsp.cpp | 91 +++++++++++++++++++ .../source_base/module_device/memory_op_dsp.h | 60 ++++++++++++ 6 files changed, 153 insertions(+), 158 deletions(-) delete mode 100644 source/source_base/module_device/CMakeLists.txt create mode 100644 source/source_base/module_device/memory_op_dsp.cpp create mode 100644 source/source_base/module_device/memory_op_dsp.h diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 9ad0ebe509..776dbfd73a 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -175,6 +175,7 @@ OBJS_BASE=abfs-vector3_order.o\ pulay_mixing.o\ broyden_mixing.o\ memory_op.o\ + memory_op_dsp.o\ device.o\ device_helpers.o\ output_device.o\ diff --git a/source/source_base/module_device/CMakeLists.txt b/source/source_base/module_device/CMakeLists.txt deleted file mode 100644 index 0aede5e57b..0000000000 --- a/source/source_base/module_device/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -# list(APPEND module_device_srcs -# memory_op.cpp -# device.cpp -# ) - -# if(USE_CUDA) -# list(APPEND module_device_srcs -# cuda/memory_op.cu -# ) -# endif() - - -# if(USE_ROCM) -# hip_add_library(module_device_rocm STATIC -# rocm/memory_op.hip.cu -# ) -# target_link_libraries( -# device -# module_device_rocm -# hip::host -# hip::device -# hip::hipfft -# roc::hipblas -# roc::hipsolver -# ) -# endif() - -# add_library(device OBJECT ${device_srcs}) - -# if(USE_CUDA) -# target_link_libraries( -# device -# ) -# elseif(USE_ROCM) -# target_link_libraries( -# device -# device_rocm -# hip::host -# hip::device -# hip::hipfft -# roc::hipblas -# roc::hipsolver -# ) -# endif() \ No newline at end of file diff --git a/source/source_base/module_device/memory_op.cpp b/source/source_base/module_device/memory_op.cpp index bff9234f64..7d8d47a7e2 100644 --- a/source/source_base/module_device/memory_op.cpp +++ b/source/source_base/module_device/memory_op.cpp @@ -2,11 +2,6 @@ #include "source_base/memory.h" #include "source_base/tool_threading.h" -#ifdef __DSP -#include "source_base/kernels/dsp/dsp_connector.h" -#include "source_base/global_variable.h" -#include "source_io/module_parameter/parameter.h" -#endif #include #include @@ -442,76 +437,7 @@ template struct delete_memory_op, base_device::DEVICE_GPU>; template struct delete_memory_op, base_device::DEVICE_GPU>; #endif -#ifdef __DSP - -template -struct resize_memory_op_mt -{ - void operator()(FPTYPE*& arr, const size_t size, const char* record_in) - { - if (arr != nullptr) - { - mtfunc::free_ht(arr); - } - arr = (FPTYPE*)mtfunc::malloc_ht(sizeof(FPTYPE) * size, GlobalV::MY_RANK % PARAM.inp.dsp_count); - std::string record_string; - if (record_in != nullptr) - { - record_string = record_in; - } - else - { - record_string = "no_record"; - } - - if (record_string != "no_record") - { - ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size); - } - } -}; - -template -struct set_memory_op_mt -{ - void operator()(FPTYPE* arr, const int var, const size_t size) - { - ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { - int beg = 0, len = 0; - ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); - memset(arr + beg, var, sizeof(FPTYPE) * len); - }); - } -}; - -template -struct delete_memory_op_mt -{ - void operator()(FPTYPE* arr) - { - mtfunc::free_ht(arr); - } -}; - -template struct resize_memory_op_mt; -template struct resize_memory_op_mt; -template struct resize_memory_op_mt; -template struct resize_memory_op_mt, base_device::DEVICE_CPU>; -template struct resize_memory_op_mt, base_device::DEVICE_CPU>; - -template struct set_memory_op_mt; -template struct set_memory_op_mt; -template struct set_memory_op_mt; -template struct set_memory_op_mt, base_device::DEVICE_CPU>; -template struct set_memory_op_mt, base_device::DEVICE_CPU>; - -template struct delete_memory_op_mt; -template struct delete_memory_op_mt; -template struct delete_memory_op_mt; -template struct delete_memory_op_mt, base_device::DEVICE_CPU>; -template struct delete_memory_op_mt, base_device::DEVICE_CPU>; -#endif template void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type) diff --git a/source/source_base/module_device/memory_op.h b/source/source_base/module_device/memory_op.h index 004468f410..2961efbb1f 100644 --- a/source/source_base/module_device/memory_op.h +++ b/source/source_base/module_device/memory_op.h @@ -2,6 +2,7 @@ #define MODULE_DEVICE_MEMORY_H_ #include "types.h" +#include "memory_op_dsp.h" #include #include @@ -218,47 +219,7 @@ struct delete_memory_op }; #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM -#ifdef __DSP -template -struct resize_memory_op_mt -{ - /// @brief Allocate memory for a given pointer. Note this op will free the pointer first. - /// - /// Input Parameters - /// \param size : array size - /// \param record_string : label for memory record - /// - /// Output Parameters - /// \param arr : allocated array - void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); -}; - -template -struct set_memory_op_mt -{ - /// @brief memset for DSP memory allocated by mt allocator. - /// - /// Input Parameters - /// \param var : the specified constant byte value - /// \param size : array size - /// - /// Output Parameters - /// \param arr : output array initialized by the input value - void operator()(FPTYPE* arr, const int var, const size_t size); -}; - -template -struct delete_memory_op_mt -{ - /// @brief free memory for multi-device - /// - /// Input Parameters - /// \param arr : the input array - void operator()(FPTYPE* arr); -}; - -#endif // __DSP } // end of namespace memory } // end of namespace base_device diff --git a/source/source_base/module_device/memory_op_dsp.cpp b/source/source_base/module_device/memory_op_dsp.cpp new file mode 100644 index 0000000000..d8c5e0eb96 --- /dev/null +++ b/source/source_base/module_device/memory_op_dsp.cpp @@ -0,0 +1,91 @@ +#include "memory_op_dsp.h" + +#include "source_base/memory.h" +#include "source_base/tool_threading.h" +#ifdef __DSP +#include "source_base/kernels/dsp/dsp_connector.h" +#include "source_base/global_variable.h" +#include "source_io/module_parameter/parameter.h" +#endif + +#include +#include + +namespace base_device +{ +namespace memory +{ + +#ifdef __DSP + +template +struct resize_memory_op_mt +{ + void operator()(FPTYPE*& arr, const size_t size, const char* record_in) + { + if (arr != nullptr) + { + mtfunc::free_ht(arr); + } + arr = (FPTYPE*)mtfunc::malloc_ht(sizeof(FPTYPE) * size, GlobalV::MY_RANK % PARAM.inp.dsp_count); + std::string record_string; + if (record_in != nullptr) + { + record_string = record_in; + } + else + { + record_string = "no_record"; + } + + if (record_string != "no_record") + { + ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size); + } + } +}; + +template +struct set_memory_op_mt +{ + void operator()(FPTYPE* arr, const int var, const size_t size) + { + ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { + int beg = 0, len = 0; + ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); + memset(arr + beg, var, sizeof(FPTYPE) * len); + }); + } +}; + +template +struct delete_memory_op_mt +{ + void operator()(FPTYPE* arr) + { + mtfunc::free_ht(arr); + } +}; + + +template struct resize_memory_op_mt; +template struct resize_memory_op_mt; +template struct resize_memory_op_mt; +template struct resize_memory_op_mt, base_device::DEVICE_CPU>; +template struct resize_memory_op_mt, base_device::DEVICE_CPU>; + +template struct set_memory_op_mt; +template struct set_memory_op_mt; +template struct set_memory_op_mt; +template struct set_memory_op_mt, base_device::DEVICE_CPU>; +template struct set_memory_op_mt, base_device::DEVICE_CPU>; + +template struct delete_memory_op_mt; +template struct delete_memory_op_mt; +template struct delete_memory_op_mt; +template struct delete_memory_op_mt, base_device::DEVICE_CPU>; +template struct delete_memory_op_mt, base_device::DEVICE_CPU>; +#endif + +} // namespace memory +} // namespace base_device \ No newline at end of file diff --git a/source/source_base/module_device/memory_op_dsp.h b/source/source_base/module_device/memory_op_dsp.h new file mode 100644 index 0000000000..28659b77da --- /dev/null +++ b/source/source_base/module_device/memory_op_dsp.h @@ -0,0 +1,60 @@ +#ifndef MODULE_DEVICE_MEMORY_DSP_H_ +#define MODULE_DEVICE_MEMORY_DSP_H_ + +#include "types.h" + +#include +#include + +namespace base_device +{ + +namespace memory +{ + +#ifdef __DSP + +template +struct resize_memory_op_mt +{ + /// @brief Allocate memory for a given pointer. Note this op will free the pointer first. + /// + /// Input Parameters + /// \param size : array size + /// \param record_string : label for memory record + /// + /// Output Parameters + /// \param arr : allocated array + void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); +}; + +template +struct set_memory_op_mt +{ + /// @brief memset for DSP memory allocated by mt allocator. + /// + /// Input Parameters + /// \param var : the specified constant byte value + /// \param size : array size + /// + /// Output Parameters + /// \param arr : output array initialized by the input value + void operator()(FPTYPE* arr, const int var, const size_t size); +}; + +template +struct delete_memory_op_mt +{ + /// @brief free memory for multi-device + /// + /// Input Parameters + /// \param arr : the input array + void operator()(FPTYPE* arr); +}; + +#endif // __DSP + +} // end of namespace memory +} // end of namespace base_device + +#endif // MODULE_DEVICE_MEMORY_DSP_H_ \ No newline at end of file From 36fed9ef8726cfb9af834d669a909f44a9a7816a Mon Sep 17 00:00:00 2001 From: abacus_fixer Date: Thu, 9 Apr 2026 12:09:56 +0800 Subject: [PATCH 2/3] refactor(device): implement dependency inversion for DSP selector This refactoring implements dependency inversion for DSP selector: - Created dsp_selector.h and dsp_selector.cpp: Define abstract interface and default implementation for DSP selector - Created dsp_config.cpp: Initialize DSP selector with rank calculation - Modified memory_op_dsp.cpp: Use new DSP selector instead of direct PARAM dependency - Modified input_conv.cpp: Call init_dsp_selector during parameter conversion - Modified parameter.h: Add init_dsp_selector declaration - Updated CMakeLists.txt and Makefile.Objects: Add new files to build system The refactoring eliminates direct dependency between module_device and source_io by using an abstract interface for DSP rank selection. Changes made: - Implemented abstract DspSelector interface - Created DefaultDspSelector implementation - Added create_default_selector factory function - Modified code to use standard C++ exceptions instead of ModuleBase::WARNING_QUIT - Updated build configuration to include new files This change improves code modularity and maintainability by following the dependency inversion principle. --- source/CMakeLists.txt | 2 + source/Makefile.Objects | 4 ++ .../module_device/dsp_selector.cpp | 64 +++++++++++++++++++ .../source_base/module_device/dsp_selector.h | 35 ++++++++++ .../module_device/memory_op_dsp.cpp | 9 +-- .../source_io/module_parameter/CMakeLists.txt | 7 ++ .../source_io/module_parameter/dsp_config.cpp | 37 +++++++++++ .../source_io/module_parameter/input_conv.cpp | 10 +++ source/source_io/module_parameter/parameter.h | 4 ++ 9 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 source/source_base/module_device/dsp_selector.cpp create mode 100644 source/source_base/module_device/dsp_selector.h create mode 100644 source/source_io/module_parameter/dsp_config.cpp diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 6f127a1722..16a7c26aa7 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -46,6 +46,8 @@ list(APPEND device_srcs source_base/module_device/device_helpers.cpp source_base/module_device/output_device.cpp source_base/module_device/memory_op.cpp + source_base/module_device/memory_op_dsp.cpp + source_base/module_device/dsp_selector.cpp source_base/kernels/math_kernel_op.cpp source_base/kernels/math_kernel_op_vec.cpp diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 776dbfd73a..465151d758 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -176,11 +176,13 @@ OBJS_BASE=abfs-vector3_order.o\ broyden_mixing.o\ memory_op.o\ memory_op_dsp.o\ + dsp_selector.o\ device.o\ device_helpers.o\ output_device.o\ parallel_2d.o\ + OBJS_CELL=atom_pseudo.o\ atom_spec.o\ pseudo.o\ @@ -516,6 +518,7 @@ OBJS_XC=xc_functional.o\ exx_info.o\ OBJS_IO=module_parameter/input_conv.o\ + module_parameter/dsp_config.o\ module_unk/berryphase.o\ module_bessel/bessel_basis.o\ cal_test.o\ @@ -593,6 +596,7 @@ OBJS_IO=module_parameter/input_conv.o\ filename.o\ ucell_io.o\ + OBJS_IO_LCAO=cal_r_overlap_R.o\ write_orb_info.o\ write_dos_lcao.o\ diff --git a/source/source_base/module_device/dsp_selector.cpp b/source/source_base/module_device/dsp_selector.cpp new file mode 100644 index 0000000000..bdc5e761d7 --- /dev/null +++ b/source/source_base/module_device/dsp_selector.cpp @@ -0,0 +1,64 @@ +#include "dsp_selector.h" +#include +#include + +#ifdef __DSP + +namespace base_device +{ +namespace memory +{ + +// Global selector instance +std::unique_ptr dsp_selector = nullptr; + +// Get current DSP selector +DspSelector* get_dsp_selector() +{ + if (!dsp_selector) + { + throw std::runtime_error( + "ModuleBase::memory::get_dsp_selector: " + "DSP selector not initialized. Call init_dsp_selector first." + ); + } + return dsp_selector.get(); +} + +// Default DSP selector implementation +class DefaultDspSelector : public DspSelector +{ +private: + int rank_ = 0; + +public: + int get_rank() const override + { + return rank_; + } + + void set_rank(const int rank) override + { + if (rank < 0) + { + throw std::runtime_error( + "ModuleBase::memory::DspSelector: " + "DSP rank must be non-negative" + ); + } + rank_ = rank; + } +}; + + +// Create default DSP selector and set it as global +void create_default_selector(const int rank) +{ + dsp_selector = std::unique_ptr(new DefaultDspSelector()); + dsp_selector->set_rank(rank); +} + +} // namespace memory +} // namespace base_device + +#endif diff --git a/source/source_base/module_device/dsp_selector.h b/source/source_base/module_device/dsp_selector.h new file mode 100644 index 0000000000..0ef1d0c86b --- /dev/null +++ b/source/source_base/module_device/dsp_selector.h @@ -0,0 +1,35 @@ +#ifndef MODULE_DEVICE_DSP_SELECTOR_H_ +#define MODULE_DEVICE_DSP_SELECTOR_H_ + +#ifdef __DSP + +#include + +namespace base_device { +namespace memory { + +// DSP selector interface +class DspSelector { +public: + virtual ~DspSelector() = default; + // Get DSP rank + virtual int get_rank() const = 0; + // Set DSP rank + virtual void set_rank(const int rank) = 0; +}; + +// Global selector instance +extern std::unique_ptr dsp_selector; + +// Get current DSP selector +DspSelector* get_dsp_selector(); + +// Create default DSP selector and set it as global +void create_default_selector(const int rank); + +} // namespace memory +} // namespace base_device + +#endif // end __DSP + +#endif // MODULE_DEVICE_DSP_SELECTOR_H_ diff --git a/source/source_base/module_device/memory_op_dsp.cpp b/source/source_base/module_device/memory_op_dsp.cpp index d8c5e0eb96..21fb67ae68 100644 --- a/source/source_base/module_device/memory_op_dsp.cpp +++ b/source/source_base/module_device/memory_op_dsp.cpp @@ -1,11 +1,10 @@ #include "memory_op_dsp.h" +#include "dsp_selector.h" #include "source_base/memory.h" #include "source_base/tool_threading.h" #ifdef __DSP #include "source_base/kernels/dsp/dsp_connector.h" -#include "source_base/global_variable.h" -#include "source_io/module_parameter/parameter.h" #endif #include @@ -27,7 +26,8 @@ struct resize_memory_op_mt { mtfunc::free_ht(arr); } - arr = (FPTYPE*)mtfunc::malloc_ht(sizeof(FPTYPE) * size, GlobalV::MY_RANK % PARAM.inp.dsp_count); + int rank = get_dsp_selector()->get_rank(); + arr = (FPTYPE*)mtfunc::malloc_ht(sizeof(FPTYPE) * size, rank); std::string record_string; if (record_in != nullptr) { @@ -50,7 +50,8 @@ struct set_memory_op_mt { void operator()(FPTYPE* arr, const int var, const size_t size) { - ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { + ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) + { int beg = 0, len = 0; ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); memset(arr + beg, var, sizeof(FPTYPE) * len); diff --git a/source/source_io/module_parameter/CMakeLists.txt b/source/source_io/module_parameter/CMakeLists.txt index 03ffaa1218..6d28ea07b7 100644 --- a/source/source_io/module_parameter/CMakeLists.txt +++ b/source/source_io/module_parameter/CMakeLists.txt @@ -3,8 +3,15 @@ add_library( parameter OBJECT parameter.cpp + dsp_config.cpp ) +# Link with base library for WARNING_QUIT +target_link_libraries(parameter PUBLIC base) + +# Ensure device library is built before parameter +add_dependencies(parameter device) + # if(ENABLE_COVERAGE) # add_coverage(parameter) # endif() diff --git a/source/source_io/module_parameter/dsp_config.cpp b/source/source_io/module_parameter/dsp_config.cpp new file mode 100644 index 0000000000..27252e2e0f --- /dev/null +++ b/source/source_io/module_parameter/dsp_config.cpp @@ -0,0 +1,37 @@ +#include "source_base/module_device/dsp_selector.h" +#include +#include + +#ifdef __DSP + +namespace ModuleIO { + +// Initialize DSP selector +void init_dsp_selector(const int my_rank, const int dsp_count) +{ + // Validate parameters + if (my_rank < 0) + { + throw std::runtime_error( + "ModuleIO::init_dsp_selector: " + "my_rank must be non-negative" + ); + } + if (dsp_count <= 0) + { + throw std::runtime_error( + "ModuleIO::init_dsp_selector: " + "dsp_count must be positive" + ); + } + + // Calculate DSP rank + const int rank = my_rank % dsp_count; + + // Create default DSP selector and set it as global + base_device::memory::create_default_selector(rank); +} + +} // namespace ModuleIO + +#endif diff --git a/source/source_io/module_parameter/input_conv.cpp b/source/source_io/module_parameter/input_conv.cpp index 2a2ccf6b8b..113dcc3740 100644 --- a/source/source_io/module_parameter/input_conv.cpp +++ b/source/source_io/module_parameter/input_conv.cpp @@ -665,6 +665,16 @@ void Input_Conv::Convert() hsolver::DiagoElpaNative::elpa_num_thread = PARAM.inp.elpa_num_thread; ; #endif + + +#ifdef __DSP + // Initialize DSP selector if DSP is enabled + if (PARAM.inp.dsp_count > 0) + { + ModuleIO::init_dsp_selector(GlobalV::MY_RANK, PARAM.inp.dsp_count); + } +#endif + ModuleBase::timer::end("Input_Conv", "Convert"); return; } diff --git a/source/source_io/module_parameter/parameter.h b/source/source_io/module_parameter/parameter.h index 138407f607..dd2235fdc4 100644 --- a/source/source_io/module_parameter/parameter.h +++ b/source/source_io/module_parameter/parameter.h @@ -1,10 +1,14 @@ #ifndef PARAMETER_H #define PARAMETER_H + #include "input_parameter.h" #include "system_parameter.h" + namespace ModuleIO { class ReadInput; + // Initialize DSP selector + void init_dsp_selector(const int my_rank, const int dsp_count); } class CalAtomInfo; From 6536f820dc443b72533c1e88fe9832e033e8992c Mon Sep 17 00:00:00 2001 From: abacus_fixer Date: Thu, 9 Apr 2026 16:54:30 +0800 Subject: [PATCH 3/3] Fix pyabacus build issues: specify scikit-build-core version and remove unnecessary device dependency --- python/pyabacus/pyproject.toml | 2 +- source/source_io/module_parameter/CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyabacus/pyproject.toml b/python/pyabacus/pyproject.toml index 6b28b35d71..634cb462ef 100644 --- a/python/pyabacus/pyproject.toml +++ b/python/pyabacus/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["scikit-build-core>=0.3.3", "pybind11>=2.10.0"] +requires = ["scikit-build-core<0.10", "pybind11>=2.10.0"] build-backend = "scikit_build_core.build" diff --git a/source/source_io/module_parameter/CMakeLists.txt b/source/source_io/module_parameter/CMakeLists.txt index 6d28ea07b7..fe2fa66a6f 100644 --- a/source/source_io/module_parameter/CMakeLists.txt +++ b/source/source_io/module_parameter/CMakeLists.txt @@ -9,8 +9,7 @@ add_library( # Link with base library for WARNING_QUIT target_link_libraries(parameter PUBLIC base) -# Ensure device library is built before parameter -add_dependencies(parameter device) +# device library dependency removed for pyabacus build # if(ENABLE_COVERAGE) # add_coverage(parameter)