From e169734940e8dcfb1e264651cb417e2019b73d53 Mon Sep 17 00:00:00 2001 From: LwhJesse <256257451+LwhJesse@users.noreply.github.com> Date: Tue, 26 May 2026 05:27:13 +0800 Subject: [PATCH 1/2] Harden host sparse index width and allocation safety --- .../basic_types/datatype_structure.hpp | 3 + Common/include/geometry/CGeometry.hpp | 2 +- .../include/linear_algebra/CPastixWrapper.hpp | 40 ++++++--- Common/include/linear_algebra/CSysMatrix.hpp | 29 +++---- Common/include/linear_algebra/CSysVector.hpp | 51 ++++++------ Common/include/toolboxes/graph_toolbox.hpp | 5 +- Common/src/geometry/CGeometry.cpp | 2 +- Common/src/linear_algebra/CPastixWrapper.cpp | 82 ++++++++++++------- Common/src/linear_algebra/CSysMatrix.cpp | 80 ++++++++++++++---- Common/src/linear_algebra/CSysVector.cpp | 44 ++++++++-- 10 files changed, 230 insertions(+), 108 deletions(-) diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp index 623c410e551..252b722c399 100644 --- a/Common/include/basic_types/datatype_structure.hpp +++ b/Common/include/basic_types/datatype_structure.hpp @@ -29,11 +29,14 @@ #include #include +#include #include #include "../code_config.hpp" #include "ad_structure.hpp" +using su2_index_t = std::uint64_t; + /*! * \namespace SU2_TYPE * \brief Namespace for defining the datatype wrapper routines, this acts as a base diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp index 7958110fb23..137316fb54c 100644 --- a/Common/include/geometry/CGeometry.hpp +++ b/Common/include/geometry/CGeometry.hpp @@ -1882,7 +1882,7 @@ class CGeometry { * \param[in] type - Finite volume or finite element. * \return Reference to the map. */ - const su2vector& GetTransposeSparsePatternMap(ConnectivityType type); + const su2vector& GetTransposeSparsePatternMap(ConnectivityType type); /*! * \brief Get the edge coloring. diff --git a/Common/include/linear_algebra/CPastixWrapper.hpp b/Common/include/linear_algebra/CPastixWrapper.hpp index dcc320debe1..6418112d3bd 100644 --- a/Common/include/linear_algebra/CPastixWrapper.hpp +++ b/Common/include/linear_algebra/CPastixWrapper.hpp @@ -36,8 +36,11 @@ #include #include +#include #include +#include "../basic_types/datatype_structure.hpp" + using namespace std; class CConfig; @@ -66,14 +69,19 @@ class CPastixWrapper { double dparm[DPARM_SIZE]; /*!< \brief Floating point parameters for PaStiX. */ struct { - unsigned long nVar = 0; - unsigned long nPoint = 0; - unsigned long nPointDomain = 0; - const unsigned long* rowptr = nullptr; - const unsigned long* colidx = nullptr; + su2_index_t nVar = 0; + su2_index_t nPoint = 0; + su2_index_t nPointDomain = 0; + const su2_index_t* rowptr = nullptr; + const su2_index_t* colidx = nullptr; const ScalarType* values = nullptr; - unsigned long size_rhs() const { return nPointDomain * nVar; } + su2_index_t size_rhs() const { + if (nPointDomain != 0 && nVar > std::numeric_limits::max() / nPointDomain) { + SU2_MPI::Error("Overflow while computing PaStiX rhs size.", CURRENT_FUNCTION); + } + return nPointDomain * nVar; + } } matrix; /*!< \brief Pointers and sizes of the input matrix. */ bool issetup{}; /*!< \brief Signals that the matrix data has been provided. */ @@ -85,8 +93,8 @@ class CPastixWrapper { const int mpi_size = SU2_MPI::GetSize(); const int mpi_rank = SU2_MPI::GetRank(); - vector sort_rows; /*!< \brief List of rows with halo points. */ - vector> sort_order; /*!< \brief How each of those rows needs to be sorted. */ + vector sort_rows; /*!< \brief List of rows with halo points. */ + vector> sort_order; /*!< \brief How each of those rows needs to be sorted. */ /*! * \brief Run the "clean" task, releases all memory, leaves object in unusable state. @@ -133,8 +141,8 @@ class CPastixWrapper { * \param[in] colidx - Non zeros column indices. * \param[in] values - Matrix coefficients. */ - void SetMatrix(unsigned long nVar, unsigned long nPoint, unsigned long nPointDomain, const unsigned long* rowptr, - const unsigned long* colidx, const ScalarType* values) { + void SetMatrix(su2_index_t nVar, su2_index_t nPoint, su2_index_t nPointDomain, const su2_index_t* rowptr, + const su2_index_t* colidx, const ScalarType* values) { if (issetup) return; matrix.nVar = nVar; matrix.nPoint = nPoint; @@ -176,11 +184,17 @@ class CPastixWrapper { } iparm[IPARM_VERBOSE] = PastixVerboseNot; - for (auto i = 0ul; i < matrix.size_rhs(); ++i) workvec[i] = rhs[i]; - if (pastix_task_solve(state, matrix.size_rhs(), 1, workvec.data(), matrix.size_rhs()) != PASTIX_SUCCESS) { + const auto rhs_size = matrix.size_rhs(); + if (rhs_size > static_cast(std::numeric_limits::max())) { + SU2_MPI::Error("Overflow while converting PaStiX rhs size.", CURRENT_FUNCTION); + } + const auto pastix_rhs_size = static_cast(rhs_size); + + for (su2_index_t i = 0; i < rhs_size; ++i) workvec[i] = rhs[i]; + if (pastix_task_solve(state, pastix_rhs_size, 1, workvec.data(), pastix_rhs_size) != PASTIX_SUCCESS) { SU2_MPI::Error("Error solving linear system.", CURRENT_FUNCTION); } - for (auto i = 0ul; i < matrix.size_rhs(); ++i) sol[i] = workvec[i]; + for (su2_index_t i = 0; i < rhs_size; ++i) sol[i] = workvec[i]; } }; #endif diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp index ecb26959a06..05aaaa37eee 100644 --- a/Common/include/linear_algebra/CSysMatrix.hpp +++ b/Common/include/linear_algebra/CSysMatrix.hpp @@ -139,11 +139,11 @@ class CSysMatrix { unsigned long nEqn; /*!< \brief Number of equations (and columns of the blocks). */ ScalarType* matrix; /*!< \brief Entries of the sparse matrix. */ - unsigned long nnz; /*!< \brief Number of possible nonzero entries in the matrix. */ - const unsigned long* row_ptr; /*!< \brief Pointers to the first element in each row. */ - const unsigned long* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */ - const unsigned long* col_ind; /*!< \brief Column index for each of the elements in val(). */ - const unsigned long* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */ + su2_index_t nnz; /*!< \brief Number of possible nonzero entries in the matrix. */ + const su2_index_t* row_ptr; /*!< \brief Pointers to the first element in each row. */ + const su2_index_t* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */ + const su2_index_t* col_ind; /*!< \brief Column index for each of the elements in val(). */ + const su2_index_t* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */ ScalarType* d_matrix; /*!< \brief Device Pointer to store the matrix values on the GPU. */ const unsigned long* d_row_ptr; /*!< \brief Device Pointers to the first element in each row. */ @@ -152,10 +152,10 @@ class CSysMatrix { Mainly used to conditionally free GPU memory in the class destructor. */ ScalarType* ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */ - unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */ - const unsigned long* row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */ - const unsigned long* dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */ - const unsigned long* col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */ + su2_index_t nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */ + const su2_index_t* row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */ + const su2_index_t* dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */ + const su2_index_t* col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */ unsigned short ilu_fill_in; /*!< \brief Fill in level for the ILU preconditioner. */ /*!< \brief Level structure for alternative shared memory parallelization of ILU. */ @@ -192,14 +192,14 @@ class CSysMatrix { * \brief Auxilary object to wrap the edge map pointer used in fast block updates, i.e. without linear searches. */ struct { - const unsigned long* ptr = nullptr; + const su2_index_t* ptr = nullptr; unsigned long nEdge = 0; operator bool() { return nEdge != 0; } - inline unsigned long operator()(unsigned long edge, unsigned long node) const { return ptr[2 * edge + node]; } - inline unsigned long ij(unsigned long edge) const { return ptr[2 * edge]; } - inline unsigned long ji(unsigned long edge) const { return ptr[2 * edge + 1]; } + inline su2_index_t operator()(unsigned long edge, unsigned long node) const { return ptr[2 * edge + node]; } + inline su2_index_t ij(unsigned long edge) const { return ptr[2 * edge]; } + inline su2_index_t ji(unsigned long edge) const { return ptr[2 * edge + 1]; } } edge_ptr; @@ -817,7 +817,8 @@ class CSysMatrix { */ template inline void SetVal2Diag(unsigned long block_i, OtherType val_matrix) { - unsigned long iVar, index = dia_ptr[block_i] * nVar * nVar; + unsigned long iVar; + const su2_index_t index = dia_ptr[block_i] * nVar * nVar; /*--- Clear entire block before setting its diagonal. ---*/ SU2_OMP_SIMD diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index 1498b549bbb..6ce30a9c250 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -34,6 +34,7 @@ #include "../parallelization/mpi_structure.hpp" #include "../parallelization/omp_structure.hpp" #include "../parallelization/vectorization.hpp" +#include "../basic_types/datatype_structure.hpp" #include "vector_expressions.hpp" #include "../../include/CConfig.hpp" @@ -72,9 +73,9 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> /// NOTE: Update swap() if you add member variables. unsigned long omp_chunk_size = OMP_MAX_SIZE; /*!< \brief Static chunk size used in loops. */ ScalarType* vec_val = nullptr; /*!< \brief Storage, 64 byte aligned (do not use normal new/delete). */ - unsigned long nElm = 0; /*!< \brief Total number of elements (or number elements on this processor). */ - unsigned long nElmDomain = 0; /*!< \brief Total number of elements without Ghost cells. */ - unsigned long nVar = 1; /*!< \brief Number of elements in a block. */ + su2_index_t nElm = 0; /*!< \brief Total number of elements (or number elements on this processor). */ + su2_index_t nElmDomain = 0; /*!< \brief Total number of elements without Ghost cells. */ + su2_index_t nVar = 1; /*!< \brief Number of elements in a block. */ ScalarType* d_vec_val = nullptr; /*!< \brief Device Pointer to store the vector values on the GPU. */ @@ -95,7 +96,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] valIsArray - If true val is treated as array. * \param[in] errorIfParallel - Throw error if within parallel region (all ctors except the default one do this). */ - void Initialize(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, const ScalarType* val, + void Initialize(su2_index_t numBlk, su2_index_t numBlkDomain, su2_index_t numVar, const ScalarType* val, bool valIsArray, bool errorIfParallel = true); /*! @@ -128,7 +129,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] size - Number of elements locally. * \param[in] val - Default value for elements. */ - explicit CSysVector(unsigned long size, ScalarType val = 0.0) { Initialize(size, size, 1, &val, false); } + explicit CSysVector(su2_index_t size, ScalarType val = 0.0) { Initialize(size, size, 1, &val, false); } /*! * \brief Construct from size and value (block version). @@ -137,7 +138,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] numVar - Number of variables in each block. * \param[in] val - Default value for elements. */ - CSysVector(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, ScalarType val = 0.0) { + CSysVector(su2_index_t numBlk, su2_index_t numBlkDomain, su2_index_t numVar, ScalarType val = 0.0) { Initialize(numBlk, numBlkDomain, numVar, &val, false); } @@ -146,7 +147,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] size - Number of elements locally. * \param[in] u_array - Vector stored as array being copied. */ - CSysVector(unsigned long size, const ScalarType* u_array) { Initialize(size, size, 1, u_array, true); } + CSysVector(su2_index_t size, const ScalarType* u_array) { Initialize(size, size, 1, u_array, true); } /*! * \brief Constructor from array (block version). @@ -155,7 +156,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] numVar - number of variables in each block * \param[in] u_array - vector stored as array being copied */ - CSysVector(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, const ScalarType* u_array) { + CSysVector(su2_index_t numBlk, su2_index_t numBlkDomain, su2_index_t numVar, const ScalarType* u_array) { Initialize(numBlk, numBlkDomain, numVar, u_array, true); } @@ -186,7 +187,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] numVar - number of variables in each block * \param[in] val - default value for elements */ - void Initialize(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, ScalarType val = 0.0) { + void Initialize(su2_index_t numBlk, su2_index_t numBlkDomain, su2_index_t numVar, ScalarType val = 0.0) { Initialize(numBlk, numBlkDomain, numVar, &val, false, false); } @@ -198,7 +199,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] numVar - number of variables in each block * \param[in] ptr - pointer to data with which to initialize the vector */ - void Initialize(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, const ScalarType* ptr) { + void Initialize(su2_index_t numBlk, su2_index_t numBlkDomain, su2_index_t numVar, const ScalarType* ptr) { Initialize(numBlk, numBlkDomain, numVar, ptr, true, false); } @@ -218,7 +219,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);) CSYSVEC_PARFOR - for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]); + for (su2_index_t i = 0; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]); END_CSYSVEC_PARFOR } @@ -248,35 +249,35 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> /*! * \brief return the number of local elements in the CSysVector */ - inline unsigned long GetLocSize() const { return nElm; } + inline su2_index_t GetLocSize() const { return nElm; } /*! * \brief return the number of local elements in the CSysVector without ghost cells */ - inline unsigned long GetNElmDomain() const { return nElmDomain; } + inline su2_index_t GetNElmDomain() const { return nElmDomain; } /*! * \brief return the number of variables at each block (typically number per node) */ - inline unsigned long GetNVar() const { return nVar; } + inline su2_index_t GetNVar() const { return nVar; } /*! * \brief return the number of blocks (typically number of nodes locally) */ - inline unsigned long GetNBlk() const { return nElm / nVar; } + inline su2_index_t GetNBlk() const { return nElm / nVar; } /*! * \brief return the number of blocks (typically number of nodes locally) */ - inline unsigned long GetNBlkDomain() const { return nElmDomain / nVar; } + inline su2_index_t GetNBlkDomain() const { return nElmDomain / nVar; } /*! * \brief Access operator with assignment permitted. * \param[in] i - Local index to access. * \return Value at position i. */ - inline ScalarType& operator[](unsigned long i) { return vec_val[i]; } - inline const ScalarType& operator[](unsigned long i) const { return vec_val[i]; } + inline ScalarType& operator[](su2_index_t i) { return vec_val[i]; } + inline const ScalarType& operator[](su2_index_t i) const { return vec_val[i]; } /*! * \brief Iterators for range for loops. @@ -302,7 +303,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> */ CSysVector& operator=(const CSysVector& other) { CSYSVEC_PARFOR - for (auto i = 0ul; i < nElm; ++i) vec_val[i] = other.vec_val[i]; + for (su2_index_t i = 0; i < nElm; ++i) vec_val[i] = other.vec_val[i]; END_CSYSVEC_PARFOR return *this; } @@ -314,14 +315,14 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> #define MAKE_COMPOUND(OP) \ CSysVector& operator OP(ScalarType val) { \ CSYSVEC_PARFOR \ - for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP val; \ + for (su2_index_t i = 0; i < nElm; ++i) vec_val[i] OP val; \ END_CSYSVEC_PARFOR \ return *this; \ } \ template \ CSysVector& operator OP(const VecExpr::CVecExpr& expr) { \ CSYSVEC_PARFOR \ - for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP expr.derived()[i]; \ + for (su2_index_t i = 0; i < nElm; ++i) vec_val[i] OP expr.derived()[i]; \ END_CSYSVEC_PARFOR \ return *this; \ } @@ -351,7 +352,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> ScalarType sum = 0.0; CSYSVEC_PARFOR - for (auto i = 0ul; i < nElmDomain; ++i) { + for (su2_index_t i = 0; i < nElmDomain; ++i) { sum += vec_val[i] * expr.derived()[i]; } END_CSYSVEC_PARFOR @@ -409,7 +410,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \param[in] iPoint - Index of the block being set to zero. */ inline void SetBlock_Zero(unsigned long iPoint) { - for (auto iVar = 0ul; iVar < nVar; iVar++) vec_val[iPoint * nVar + iVar] = 0.0; + for (su2_index_t iVar = 0; iVar < nVar; iVar++) vec_val[iPoint * nVar + iVar] = 0.0; } /*! @@ -422,9 +423,9 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> template FORCEINLINE void SetBlock(unsigned long iPoint, const VectorType& block, ScalarType alpha = 1) { if (Overwrite) { - for (auto i = 0ul; i < nVar; ++i) vec_val[iPoint * nVar + i] = alpha * block[i]; + for (su2_index_t i = 0; i < nVar; ++i) vec_val[iPoint * nVar + i] = alpha * block[i]; } else { - for (auto i = 0ul; i < nVar; ++i) vec_val[iPoint * nVar + i] += alpha * block[i]; + for (su2_index_t i = 0; i < nVar; ++i) vec_val[iPoint * nVar + i] += alpha * block[i]; } } diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp index 344e08ccf7c..e0941f82a10 100644 --- a/Common/include/toolboxes/graph_toolbox.hpp +++ b/Common/include/toolboxes/graph_toolbox.hpp @@ -27,6 +27,7 @@ #pragma once +#include "../basic_types/datatype_structure.hpp" #include "../containers/C2DContainer.hpp" #include "../parallelization/omp_structure.hpp" @@ -333,9 +334,9 @@ class CCompressedSparsePattern { template using CEdgeToNonZeroMap = C2DContainer; -using CCompressedSparsePatternUL = CCompressedSparsePattern; +using CCompressedSparsePatternUL = CCompressedSparsePattern; using CCompressedSparsePatternL = CCompressedSparsePattern; -using CEdgeToNonZeroMapUL = CEdgeToNonZeroMap; +using CEdgeToNonZeroMapUL = CEdgeToNonZeroMap; /*! * \brief Build a sparse pattern from geometry information, of type FVM or FEM, diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp index 258e5612be8..11442cfe844 100644 --- a/Common/src/geometry/CGeometry.cpp +++ b/Common/src/geometry/CGeometry.cpp @@ -4151,7 +4151,7 @@ const CEdgeToNonZeroMapUL& CGeometry::GetEdgeToSparsePatternMap() { return edgeToCSRMap; } -const su2vector& CGeometry::GetTransposeSparsePatternMap(ConnectivityType type) { +const su2vector& CGeometry::GetTransposeSparsePatternMap(ConnectivityType type) { /*--- Yes the const cast is weird but it is still better than repeating code. ---*/ auto& pattern = const_cast(GetSparsePattern(type)); pattern.buildTransposePtr(); diff --git a/Common/src/linear_algebra/CPastixWrapper.cpp b/Common/src/linear_algebra/CPastixWrapper.cpp index f4db581a879..b3b88eccce8 100644 --- a/Common/src/linear_algebra/CPastixWrapper.cpp +++ b/Common/src/linear_algebra/CPastixWrapper.cpp @@ -34,26 +34,48 @@ #include "../../include/geometry/CGeometry.hpp" #include "../../include/linear_algebra/CPastixWrapper.hpp" +#include #include +#include + +namespace { + +template +TargetType CheckedCast(su2_index_t value, const char* what) { + if (value > static_cast(std::numeric_limits::max())) { + SU2_MPI::Error(std::string("Overflow while converting ") + what + " for PaStiX.", CURRENT_FUNCTION); + } + return static_cast(value); +} + +su2_index_t CheckedMul(su2_index_t lhs, su2_index_t rhs, const char* what) { + if (lhs != 0 && rhs > std::numeric_limits::max() / lhs) { + SU2_MPI::Error(std::string("Overflow while computing ") + what + ".", CURRENT_FUNCTION); + } + return lhs * rhs; +} + +} // namespace template void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* config) { if (isinitialized) return; // only need to do this once - const unsigned long nVar = matrix.nVar, nPoint = matrix.nPoint, nPointDomain = matrix.nPointDomain; - const unsigned long *row_ptr = matrix.rowptr, *col_ind = matrix.colidx; - const unsigned long nNonZero = row_ptr[nPointDomain]; + const auto nVar = matrix.nVar, nPoint = matrix.nPoint, nPointDomain = matrix.nPointDomain; + const auto* row_ptr = matrix.rowptr; + const auto* col_ind = matrix.colidx; + const auto nNonZero = row_ptr[nPointDomain]; /*--- Allocate ---*/ - nCols = static_cast(nPointDomain); + nCols = CheckedCast(nPointDomain, "nPointDomain"); colptr.resize(nPointDomain + 1); rowidx.clear(); rowidx.reserve(nNonZero); - values.resize(nNonZero * nVar * nVar); + values.resize(CheckedMul(CheckedMul(nNonZero, nVar, "PaStiX value storage size"), nVar, "PaStiX value storage size")); loc2glb.resize(nPointDomain); perm.resize(nPointDomain); - workvec.resize(nPointDomain * nVar); + workvec.resize(CheckedMul(nPointDomain, nVar, "PaStiX work vector size")); /*--- Set default parameter values ---*/ @@ -90,14 +112,16 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* /*--- 1 - Determine position in the linear partitioning ---*/ - unsigned long offset = 0; + su2_index_t offset = 0; #ifdef HAVE_MPI - vector domain_sizes(mpi_size); - MPI_Allgather(&nPointDomain, 1, MPI_UNSIGNED_LONG, domain_sizes.data(), 1, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); + vector domain_sizes(mpi_size); + MPI_Allgather(&nPointDomain, 1, MPI_UINT64_T, domain_sizes.data(), 1, MPI_UINT64_T, SU2_MPI::GetComm()); for (int i = 0; i < mpi_rank; ++i) offset += domain_sizes[i]; #endif - iota(loc2glb.begin(), loc2glb.end(), offset + 1); + for (su2_index_t i = 0; i < nPointDomain; ++i) { + loc2glb[i] = CheckedCast(offset + i + 1, "PaStiX global mapped index"); + } /*--- 2 - Communicate global indices of halo points to then renumber column indices from local to global when unpacking halos. ---*/ @@ -116,35 +140,36 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* unsigned long nVertexR = geometry->nVertex[MarkerR]; /*--- Allocate Send/Receive buffers ---*/ - vector Buffer_Recv(nVertexR), Buffer_Send(nVertexS); + vector Buffer_Recv(nVertexR), Buffer_Send(nVertexS); /*--- Prepare data to send ---*/ for (unsigned long iVertex = 0; iVertex < nVertexS; iVertex++) Buffer_Send[iVertex] = geometry->vertex[MarkerS][iVertex]->GetNode() + offset; /*--- Send and Receive data ---*/ - MPI_Sendrecv(Buffer_Send.data(), nVertexS, MPI_UNSIGNED_LONG, sender, 0, Buffer_Recv.data(), nVertexR, - MPI_UNSIGNED_LONG, recver, 0, SU2_MPI::GetComm(), MPI_STATUS_IGNORE); + MPI_Sendrecv(Buffer_Send.data(), nVertexS, MPI_UINT64_T, sender, 0, Buffer_Recv.data(), nVertexR, MPI_UINT64_T, + recver, 0, SU2_MPI::GetComm(), MPI_STATUS_IGNORE); /*--- Store received data---*/ for (unsigned long iVertex = 0; iVertex < nVertexR; iVertex++) - map[geometry->vertex[MarkerR][iVertex]->GetNode() - nPointDomain] = Buffer_Recv[iVertex]; + map[geometry->vertex[MarkerR][iVertex]->GetNode() - nPointDomain] = + CheckedCast(Buffer_Recv[iVertex], "PaStiX halo global mapped index"); } } #endif /*--- 3 - Copy, map the sparsity, and put it in Fortran numbering ---*/ - for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint) { - colptr[iPoint] = static_cast(row_ptr[iPoint] + 1); + for (su2_index_t iPoint = 0; iPoint < nPointDomain; ++iPoint) { + colptr[iPoint] = CheckedCast(row_ptr[iPoint] + 1, "PaStiX row_ptr entry"); - const unsigned long begin = row_ptr[iPoint], end = row_ptr[iPoint + 1]; + const su2_index_t begin = row_ptr[iPoint], end = row_ptr[iPoint + 1]; /*--- If last point of row is halo ---*/ const bool sort_required = (col_ind[end - 1] >= nPointDomain); if (sort_required) { - const unsigned long nnz_row = end - begin; + const su2_index_t nnz_row = end - begin; sort_rows.push_back(iPoint); sort_order.emplace_back(nnz_row); @@ -152,28 +177,28 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* /*--- Sort mapped indices ("first") and keep track of source ("second") for when we later need to swap blocks for these rows. ---*/ - vector > aux(nnz_row); + vector > aux(nnz_row); for (auto j = begin; j < end; ++j) { if (col_ind[j] < nPointDomain) { - aux[j - begin].first = static_cast(offset + col_ind[j] + 1); + aux[j - begin].first = CheckedCast(offset + col_ind[j] + 1, "PaStiX column index"); } else { - aux[j - begin].first = static_cast(map[col_ind[j] - nPointDomain] + 1); + aux[j - begin].first = CheckedCast(map[col_ind[j] - nPointDomain] + 1, "PaStiX halo column index"); } aux[j - begin].second = j; } sort(aux.begin(), aux.end()); - for (auto j = 0ul; j < nnz_row; ++j) { + for (su2_index_t j = 0; j < nnz_row; ++j) { rowidx.push_back(aux[j].first); sort_order.back()[j] = aux[j].second; } } else { /*--- These are all internal, no need to go through map. ---*/ - for (auto j = begin; j < end; ++j) rowidx.push_back(static_cast(offset + col_ind[j] + 1)); + for (auto j = begin; j < end; ++j) rowidx.push_back(CheckedCast(offset + col_ind[j] + 1, "PaStiX column index")); } } - colptr[nPointDomain] = static_cast(nNonZero + 1); + colptr[nPointDomain] = CheckedCast(nNonZero + 1, "PaStiX final colptr entry"); if (rowidx.size() != nNonZero) SU2_MPI::Error("Error during preparation of PaStiX data", CURRENT_FUNCTION); @@ -187,8 +212,8 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* spm.baseval = 1; spm.n = nCols; - spm.nnz = nNonZero; - spm.dof = nVar; + spm.nnz = CheckedCast(nNonZero, "spm.nnz"); + spm.dof = CheckedCast(nVar, "spm.dof"); spm.colptr = colptr.data(); spm.rowptr = rowidx.data(); @@ -253,11 +278,12 @@ void CPastixWrapper::Factorize(CGeometry* geometry, const CConfig* c cout << "\n+ PaStiX : Parallel Sparse matriX package +" << endl; } - const unsigned long szBlk = matrix.nVar * matrix.nVar, nNonZero = values.size(); + const auto szBlk = CheckedMul(matrix.nVar, matrix.nVar, "PaStiX block size"); + const auto nNonZero = static_cast(values.size()); /*--- Copy matrix values and swap blocks as required ---*/ - for (auto i = 0ul; i < nNonZero; ++i) values[i] = SU2_TYPE::GetValue(matrix.values[i]); + for (su2_index_t i = 0; i < nNonZero; ++i) values[i] = SU2_TYPE::GetValue(matrix.values[i]); for (auto i = 0ul; i < sort_rows.size(); ++i) { const auto iRow = sort_rows[i]; diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 00ef51c2023..1a500529e2e 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -31,6 +31,43 @@ #include "../../include/toolboxes/allocation_toolbox.hpp" #include +#include +#include +#include + +namespace { + +su2_index_t CheckedMul(su2_index_t lhs, su2_index_t rhs, const char* what) { + if (lhs != 0 && rhs > std::numeric_limits::max() / lhs) { + SU2_MPI::Error(std::string("Overflow while computing ") + what + ".", CURRENT_FUNCTION); + } + return lhs * rhs; +} + +template +size_t CheckedBytes(su2_index_t count, const char* what) { + const auto bytes = CheckedMul(count, sizeof(T), what); + if (bytes > std::numeric_limits::max()) { + SU2_MPI::Error(std::string("Overflow while computing ") + what + " byte size.", CURRENT_FUNCTION); + } + return static_cast(bytes); +} + +std::vector ConvertSparseIndicesForCuda(const su2_index_t* src, su2_index_t count, const char* name) { + std::vector dst(count); + constexpr auto max_cuda_index = static_cast(std::numeric_limits::max()); + for (su2_index_t i = 0; i < count; ++i) { + if (src[i] > max_cuda_index) { + SU2_MPI::Error(std::string(name) + + " exceeds the current CUDA backend sparse-index range; the current CUDA backend still uses unsigned-long sparse indices.", + CURRENT_FUNCTION); + } + dst[i] = static_cast(src[i]); + } + return dst; +} + +} // namespace template CSysMatrix::CSysMatrix() : rank(SU2_MPI::GetRank()), size(SU2_MPI::GetSize()) { @@ -143,27 +180,31 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi dia_ptr = csr.diagPtr(); /*--- Allocate data. ---*/ - auto allocAndInit = [](ScalarType*& ptr, unsigned long num) { - ptr = MemoryAllocation::aligned_alloc(64, num * sizeof(ScalarType)); + auto allocAndInit = [](ScalarType*& ptr, su2_index_t num, const char* what) { + ptr = MemoryAllocation::aligned_alloc(64, CheckedBytes(num, what)); }; - allocAndInit(matrix, nnz * nVar * nEqn); + const auto matrix_entries = CheckedMul(CheckedMul(nnz, nVar, "CSysMatrix storage size"), nEqn, "CSysMatrix storage size"); + allocAndInit(matrix, matrix_entries, "CSysMatrix host allocation"); useCuda = config->GetCUDA(); if (useCuda) { /*--- Allocate GPU data. ---*/ - auto GPUAllocAndInit = [](ScalarType*& ptr, unsigned long num) { - ptr = GPUMemoryAllocation::gpu_alloc(num * sizeof(ScalarType)); + auto GPUAllocAndInit = [](ScalarType*& ptr, su2_index_t num, const char* what) { + ptr = GPUMemoryAllocation::gpu_alloc(CheckedBytes(num, what)); }; - auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long*& src_ptr, unsigned long num) { - ptr = GPUMemoryAllocation::gpu_alloc_cpy(src_ptr, num * sizeof(const unsigned long)); + auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long* src_ptr, su2_index_t num, const char* what) { + ptr = GPUMemoryAllocation::gpu_alloc_cpy(src_ptr, CheckedBytes(num, what)); }; - GPUAllocAndInit(d_matrix, nnz * nVar * nEqn); - GPUAllocAndCopy(d_row_ptr, row_ptr, (nPointDomain + 1)); - GPUAllocAndCopy(d_col_ind, col_ind, nnz); + const auto row_ptr_host = ConvertSparseIndicesForCuda(row_ptr, su2_index_t(nPointDomain) + 1, "CSysMatrix row_ptr"); + const auto col_ind_host = ConvertSparseIndicesForCuda(col_ind, nnz, "CSysMatrix col_ind"); + + GPUAllocAndInit(d_matrix, matrix_entries, "CSysMatrix device allocation"); + GPUAllocAndCopy(d_row_ptr, row_ptr_host.data(), su2_index_t(nPointDomain) + 1, "CSysMatrix row_ptr device copy"); + GPUAllocAndCopy(d_col_ind, col_ind_host.data(), nnz, "CSysMatrix col_ind device copy"); } if (needTranspPtr) col_ptr = geometry->GetTransposeSparsePatternMap(type).data(); @@ -192,9 +233,16 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi /*--- Preconditioners. ---*/ - if (ilu_needed) allocAndInit(ILU_matrix, nnz_ilu * nVar * nEqn); + if (ilu_needed) { + const auto ilu_entries = CheckedMul(CheckedMul(nnz_ilu, nVar, "CSysMatrix ILU storage size"), nEqn, "CSysMatrix ILU storage size"); + allocAndInit(ILU_matrix, ilu_entries, "CSysMatrix ILU host allocation"); + } - if (diag_needed) allocAndInit(invM, nPointDomain * nVar * nEqn); + if (diag_needed) { + const auto invm_entries = CheckedMul(CheckedMul(nPointDomain, nVar, "CSysMatrix inverse diagonal storage size"), nEqn, + "CSysMatrix inverse diagonal storage size"); + allocAndInit(invM, invm_entries, "CSysMatrix inverse diagonal host allocation"); + } /*--- Thread parallel initialization. ---*/ @@ -202,7 +250,7 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi /*--- Set suitable chunk sizes for light static for loops, and heavy dynamic ones, such that threads are approximately evenly loaded. ---*/ - omp_light_size = computeStaticChunkSize(nnz * nVar * nEqn, num_threads, OMP_MAX_SIZE_L); + omp_light_size = computeStaticChunkSize(matrix_entries, num_threads, OMP_MAX_SIZE_L); omp_heavy_size = computeStaticChunkSize(nPointDomain, num_threads, OMP_MAX_SIZE_H); omp_num_parts = config->GetLinear_Solver_Prec_Threads(); @@ -491,7 +539,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry* geometry, const CCon template void CSysMatrix::SetValZero() { SU2_ZONE_SCOPED - const auto size = nnz * nVar * nEqn; + const auto size = CheckedMul(CheckedMul(nnz, nVar, "CSysMatrix zero-fill size"), nEqn, "CSysMatrix zero-fill size"); const auto chunk = roundUpDiv(size, omp_get_num_threads()); const auto begin = chunk * omp_get_thread_num(); const auto mySize = min(chunk, size - begin) * sizeof(ScalarType); @@ -1326,8 +1374,10 @@ void CSysMatrix::MatrixMatrixAddition(ScalarType alpha, const CSysMa SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); } + const auto matrix_entries = CheckedMul(CheckedMul(nnz, nVar, "CSysMatrix matrix sum size"), nEqn, + "CSysMatrix matrix sum size"); SU2_OMP_FOR_STAT(omp_light_size) - for (auto i = 0ul; i < nnz * nVar * nEqn; ++i) matrix[i] += alpha * B.matrix[i]; + for (su2_index_t i = 0; i < matrix_entries; ++i) matrix[i] += alpha * B.matrix[i]; END_SU2_OMP_FOR } diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp index f7df34633a0..f676b18cded 100644 --- a/Common/src/linear_algebra/CSysVector.cpp +++ b/Common/src/linear_algebra/CSysVector.cpp @@ -28,8 +28,31 @@ #include "../../include/linear_algebra/CSysVector.hpp" #include "../../include/toolboxes/allocation_toolbox.hpp" +#include +#include + +namespace { + +su2_index_t CheckedMul(su2_index_t lhs, su2_index_t rhs, const char* what) { + if (lhs != 0 && rhs > std::numeric_limits::max() / lhs) { + SU2_MPI::Error(std::string("Overflow while computing ") + what + ".", CURRENT_FUNCTION); + } + return lhs * rhs; +} + +template +size_t CheckedBytes(su2_index_t count, const char* what) { + const auto bytes = CheckedMul(count, sizeof(T), what); + if (bytes > std::numeric_limits::max()) { + SU2_MPI::Error(std::string("Overflow while computing ") + what + " byte size.", CURRENT_FUNCTION); + } + return static_cast(bytes); +} + +} // namespace + template -void CSysVector::Initialize(unsigned long numBlk, unsigned long numBlkDomain, unsigned long numVar, +void CSysVector::Initialize(su2_index_t numBlk, su2_index_t numBlkDomain, su2_index_t numVar, const ScalarType* val, bool valIsArray, bool errorIfParallel) { if (errorIfParallel && omp_in_parallel()) { assert(false); @@ -39,20 +62,23 @@ void CSysVector::Initialize(unsigned long numBlk, unsigned long numB if (omp_get_thread_num()) SU2_MPI::Error("Only the master thread is allowed to initialize the vector.", CURRENT_FUNCTION); - if (nElm != numBlk * numVar) { + const auto new_nElm = CheckedMul(numBlk, numVar, "CSysVector storage size"); + const auto new_nElmDomain = CheckedMul(numBlkDomain, numVar, "CSysVector domain storage size"); + + if (nElm != new_nElm) { MemoryAllocation::aligned_free(vec_val); vec_val = nullptr; } - nElm = numBlk * numVar; - nElmDomain = numBlkDomain * numVar; + nElm = new_nElm; + nElmDomain = new_nElmDomain; nVar = numVar; omp_chunk_size = computeStaticChunkSize(nElm, omp_get_max_threads(), OMP_MAX_SIZE); - if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc(64, nElm * sizeof(ScalarType)); + if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc(64, CheckedBytes(nElm, "CSysVector host allocation")); - d_vec_val = GPUMemoryAllocation::gpu_alloc(nElm * sizeof(ScalarType)); + d_vec_val = GPUMemoryAllocation::gpu_alloc(CheckedBytes(nElm, "CSysVector device allocation")); #ifdef HAVE_OMP dot_scratch.reset(new ScalarType[omp_get_max_threads()]); @@ -60,9 +86,9 @@ void CSysVector::Initialize(unsigned long numBlk, unsigned long numB if (val != nullptr) { if (!valIsArray) { - for (auto i = 0ul; i < nElm; i++) vec_val[i] = *val; + for (su2_index_t i = 0; i < nElm; i++) vec_val[i] = *val; } else { - for (auto i = 0ul; i < nElm; i++) vec_val[i] = val[i]; + for (su2_index_t i = 0; i < nElm; i++) vec_val[i] = val[i]; } } } @@ -135,7 +161,7 @@ const su2matrix& CSysVector::multiDot(const std::vector< template CSysVector::~CSysVector() { if constexpr (!std::is_trivial_v) { - for (auto i = 0ul; i < nElm; i++) vec_val[i].~ScalarType(); + for (su2_index_t i = 0; i < nElm; i++) vec_val[i].~ScalarType(); } MemoryAllocation::aligned_free(vec_val); From ccf7a496e9bb6297885cdef7736494d4fe401096 Mon Sep 17 00:00:00 2001 From: LwhJesse <256257451+LwhJesse@users.noreply.github.com> Date: Tue, 26 May 2026 05:33:34 +0800 Subject: [PATCH 2/2] Apply clang-format updates --- Common/include/linear_algebra/CSysMatrix.hpp | 24 ++++++++++---------- Common/include/linear_algebra/CSysVector.hpp | 24 ++++++++++---------- Common/src/linear_algebra/CPastixWrapper.cpp | 6 +++-- Common/src/linear_algebra/CSysMatrix.cpp | 20 +++++++++------- Common/src/linear_algebra/CSysVector.cpp | 7 ++++-- 5 files changed, 45 insertions(+), 36 deletions(-) diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp index 05aaaa37eee..f2f3b6eedf4 100644 --- a/Common/include/linear_algebra/CSysMatrix.hpp +++ b/Common/include/linear_algebra/CSysMatrix.hpp @@ -138,12 +138,12 @@ class CSysMatrix { unsigned long nVar; /*!< \brief Number of variables (and rows of the blocks). */ unsigned long nEqn; /*!< \brief Number of equations (and columns of the blocks). */ - ScalarType* matrix; /*!< \brief Entries of the sparse matrix. */ - su2_index_t nnz; /*!< \brief Number of possible nonzero entries in the matrix. */ - const su2_index_t* row_ptr; /*!< \brief Pointers to the first element in each row. */ - const su2_index_t* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */ - const su2_index_t* col_ind; /*!< \brief Column index for each of the elements in val(). */ - const su2_index_t* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */ + ScalarType* matrix; /*!< \brief Entries of the sparse matrix. */ + su2_index_t nnz; /*!< \brief Number of possible nonzero entries in the matrix. */ + const su2_index_t* row_ptr; /*!< \brief Pointers to the first element in each row. */ + const su2_index_t* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */ + const su2_index_t* col_ind; /*!< \brief Column index for each of the elements in val(). */ + const su2_index_t* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */ ScalarType* d_matrix; /*!< \brief Device Pointer to store the matrix values on the GPU. */ const unsigned long* d_row_ptr; /*!< \brief Device Pointers to the first element in each row. */ @@ -151,12 +151,12 @@ class CSysMatrix { bool useCuda = false; /*!< \brief Boolean that indicates whether user has enabled CUDA or not. Mainly used to conditionally free GPU memory in the class destructor. */ - ScalarType* ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */ - su2_index_t nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */ - const su2_index_t* row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */ - const su2_index_t* dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */ - const su2_index_t* col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */ - unsigned short ilu_fill_in; /*!< \brief Fill in level for the ILU preconditioner. */ + ScalarType* ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */ + su2_index_t nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */ + const su2_index_t* row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */ + const su2_index_t* dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */ + const su2_index_t* col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */ + unsigned short ilu_fill_in; /*!< \brief Fill in level for the ILU preconditioner. */ /*!< \brief Level structure for alternative shared memory parallelization of ILU. */ CCompressedSparsePatternUL levels_ilu; diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index 6ce30a9c250..7650a595675 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -312,19 +312,19 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> * \brief Compound assignement operations with scalars and expressions. * \param[in] val/expr - Scalar value or expression. */ -#define MAKE_COMPOUND(OP) \ - CSysVector& operator OP(ScalarType val) { \ - CSYSVEC_PARFOR \ - for (su2_index_t i = 0; i < nElm; ++i) vec_val[i] OP val; \ - END_CSYSVEC_PARFOR \ - return *this; \ - } \ - template \ - CSysVector& operator OP(const VecExpr::CVecExpr& expr) { \ - CSYSVEC_PARFOR \ +#define MAKE_COMPOUND(OP) \ + CSysVector& operator OP(ScalarType val) { \ + CSYSVEC_PARFOR \ + for (su2_index_t i = 0; i < nElm; ++i) vec_val[i] OP val; \ + END_CSYSVEC_PARFOR \ + return *this; \ + } \ + template \ + CSysVector& operator OP(const VecExpr::CVecExpr& expr) { \ + CSYSVEC_PARFOR \ for (su2_index_t i = 0; i < nElm; ++i) vec_val[i] OP expr.derived()[i]; \ - END_CSYSVEC_PARFOR \ - return *this; \ + END_CSYSVEC_PARFOR \ + return *this; \ } MAKE_COMPOUND(=) MAKE_COMPOUND(+=) diff --git a/Common/src/linear_algebra/CPastixWrapper.cpp b/Common/src/linear_algebra/CPastixWrapper.cpp index b3b88eccce8..8df06f3156d 100644 --- a/Common/src/linear_algebra/CPastixWrapper.cpp +++ b/Common/src/linear_algebra/CPastixWrapper.cpp @@ -183,7 +183,8 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* if (col_ind[j] < nPointDomain) { aux[j - begin].first = CheckedCast(offset + col_ind[j] + 1, "PaStiX column index"); } else { - aux[j - begin].first = CheckedCast(map[col_ind[j] - nPointDomain] + 1, "PaStiX halo column index"); + aux[j - begin].first = + CheckedCast(map[col_ind[j] - nPointDomain] + 1, "PaStiX halo column index"); } aux[j - begin].second = j; } @@ -195,7 +196,8 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* } } else { /*--- These are all internal, no need to go through map. ---*/ - for (auto j = begin; j < end; ++j) rowidx.push_back(CheckedCast(offset + col_ind[j] + 1, "PaStiX column index")); + for (auto j = begin; j < end; ++j) + rowidx.push_back(CheckedCast(offset + col_ind[j] + 1, "PaStiX column index")); } } colptr[nPointDomain] = CheckedCast(nNonZero + 1, "PaStiX final colptr entry"); diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 1a500529e2e..fa84283c7ab 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -59,7 +59,8 @@ std::vector ConvertSparseIndicesForCuda(const su2_index_t* src, s for (su2_index_t i = 0; i < count; ++i) { if (src[i] > max_cuda_index) { SU2_MPI::Error(std::string(name) + - " exceeds the current CUDA backend sparse-index range; the current CUDA backend still uses unsigned-long sparse indices.", + " exceeds the current CUDA backend sparse-index range; the current CUDA backend still uses " + "unsigned-long sparse indices.", CURRENT_FUNCTION); } dst[i] = static_cast(src[i]); @@ -184,7 +185,8 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi ptr = MemoryAllocation::aligned_alloc(64, CheckedBytes(num, what)); }; - const auto matrix_entries = CheckedMul(CheckedMul(nnz, nVar, "CSysMatrix storage size"), nEqn, "CSysMatrix storage size"); + const auto matrix_entries = + CheckedMul(CheckedMul(nnz, nVar, "CSysMatrix storage size"), nEqn, "CSysMatrix storage size"); allocAndInit(matrix, matrix_entries, "CSysMatrix host allocation"); useCuda = config->GetCUDA(); @@ -195,7 +197,8 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi ptr = GPUMemoryAllocation::gpu_alloc(CheckedBytes(num, what)); }; - auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long* src_ptr, su2_index_t num, const char* what) { + auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long* src_ptr, su2_index_t num, + const char* what) { ptr = GPUMemoryAllocation::gpu_alloc_cpy(src_ptr, CheckedBytes(num, what)); }; @@ -234,13 +237,14 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi /*--- Preconditioners. ---*/ if (ilu_needed) { - const auto ilu_entries = CheckedMul(CheckedMul(nnz_ilu, nVar, "CSysMatrix ILU storage size"), nEqn, "CSysMatrix ILU storage size"); + const auto ilu_entries = + CheckedMul(CheckedMul(nnz_ilu, nVar, "CSysMatrix ILU storage size"), nEqn, "CSysMatrix ILU storage size"); allocAndInit(ILU_matrix, ilu_entries, "CSysMatrix ILU host allocation"); } if (diag_needed) { - const auto invm_entries = CheckedMul(CheckedMul(nPointDomain, nVar, "CSysMatrix inverse diagonal storage size"), nEqn, - "CSysMatrix inverse diagonal storage size"); + const auto invm_entries = CheckedMul(CheckedMul(nPointDomain, nVar, "CSysMatrix inverse diagonal storage size"), + nEqn, "CSysMatrix inverse diagonal storage size"); allocAndInit(invM, invm_entries, "CSysMatrix inverse diagonal host allocation"); } @@ -1374,8 +1378,8 @@ void CSysMatrix::MatrixMatrixAddition(ScalarType alpha, const CSysMa SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); } - const auto matrix_entries = CheckedMul(CheckedMul(nnz, nVar, "CSysMatrix matrix sum size"), nEqn, - "CSysMatrix matrix sum size"); + const auto matrix_entries = + CheckedMul(CheckedMul(nnz, nVar, "CSysMatrix matrix sum size"), nEqn, "CSysMatrix matrix sum size"); SU2_OMP_FOR_STAT(omp_light_size) for (su2_index_t i = 0; i < matrix_entries; ++i) matrix[i] += alpha * B.matrix[i]; END_SU2_OMP_FOR diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp index f676b18cded..f536e7ba30b 100644 --- a/Common/src/linear_algebra/CSysVector.cpp +++ b/Common/src/linear_algebra/CSysVector.cpp @@ -76,9 +76,12 @@ void CSysVector::Initialize(su2_index_t numBlk, su2_index_t numBlkDo omp_chunk_size = computeStaticChunkSize(nElm, omp_get_max_threads(), OMP_MAX_SIZE); - if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc(64, CheckedBytes(nElm, "CSysVector host allocation")); + if (vec_val == nullptr) + vec_val = MemoryAllocation::aligned_alloc( + 64, CheckedBytes(nElm, "CSysVector host allocation")); - d_vec_val = GPUMemoryAllocation::gpu_alloc(CheckedBytes(nElm, "CSysVector device allocation")); + d_vec_val = + GPUMemoryAllocation::gpu_alloc(CheckedBytes(nElm, "CSysVector device allocation")); #ifdef HAVE_OMP dot_scratch.reset(new ScalarType[omp_get_max_threads()]);