From ebf59ffe830dddb57b5a3918e5b1fa9f366da3d0 Mon Sep 17 00:00:00 2001 From: Damien Dooley Date: Thu, 4 Jun 2026 15:56:37 +0100 Subject: [PATCH 1/3] SME2/NEON heuristic to handle difficult tensor shapes at high thread count Signed-off-by: Damien Dooley --- arm_compute/core/CPP/CPPTypes.h | 10 ++++++++++ src/core/CPP/CPPTypes.cpp | 35 ++++++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index 1a931dd829e..f6fb103d365 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -168,6 +168,16 @@ class CPUInfo final * @return true if the cpu supports sme_b16f32, false otherwise */ bool has_sme_b16f32() const; + /** Sets whether SME and SME2 implementations are allowed to be selected at runtime. + * + * @param[in] is_allowed True to expose detected SME/SME2 support, false to hide SME/SME2 from runtime selectors. + */ + void set_sme_allowed(bool is_allowed); + /** Returns whether SME and SME2 implementations are allowed to be selected at runtime. + * + * @return true if detected SME/SME2 support is exposed to runtime selectors, false otherwise. + */ + bool is_sme_allowed() const; /** Gets the cpu model for a given cpuid. * * @param[in] cpuid the id of the cpu core to be retrieved, diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index fafadc0aac3..fe8304949c3 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -39,6 +39,7 @@ struct CPUInfo::Impl cpuinfo::CpuInfo info{}; unsigned int L1_cache_size = 32768; unsigned int L2_cache_size = 262144; + bool sme_allowed = true; }; CPUInfo &CPUInfo::get() @@ -111,32 +112,42 @@ bool CPUInfo::has_sve2() const bool CPUInfo::has_sme() const { - return _impl->info.has_sme(); + return _impl->sme_allowed && _impl->info.has_sme(); } bool CPUInfo::has_sme2() const { - return _impl->info.has_sme2(); + return _impl->sme_allowed && _impl->info.has_sme2(); } bool CPUInfo::has_sme_i8i32() const { - return _impl->info.has_sme_i8i32(); + return _impl->sme_allowed && _impl->info.has_sme_i8i32(); } bool CPUInfo::has_sme_f16f32() const { - return _impl->info.has_sme_f16f32(); + return _impl->sme_allowed && _impl->info.has_sme_f16f32(); } bool CPUInfo::has_sme_f32f32() const { - return _impl->info.has_sme_f32f32(); + return _impl->sme_allowed && _impl->info.has_sme_f32f32(); } bool CPUInfo::has_sme_b16f32() const { - return _impl->info.has_sme_b16f32(); + return _impl->sme_allowed && _impl->info.has_sme_b16f32(); +} + +void CPUInfo::set_sme_allowed(bool is_allowed) +{ + _impl->sme_allowed = is_allowed; +} + +bool CPUInfo::is_sme_allowed() const +{ + return _impl->sme_allowed; } CPUModel CPUInfo::get_cpu_model() const @@ -151,7 +162,17 @@ CPUModel CPUInfo::get_cpu_model(unsigned int cpuid) const cpuinfo::CpuIsaInfo CPUInfo::get_isa() const { - return _impl->info.isa(); + cpuinfo::CpuIsaInfo isa = _impl->info.isa(); + if (!_impl->sme_allowed) + { + isa.sme = false; + isa.sme2 = false; + isa.sme_b16f32 = false; + isa.sme_f16f32 = false; + isa.sme_f32f32 = false; + isa.sme_i8i32 = false; + } + return isa; } unsigned int CPUInfo::get_L1_cache_size() const From 05a0c37df05156422479bb0f5dce6f8d5c07bc07 Mon Sep 17 00:00:00 2001 From: Damien Dooley Date: Mon, 8 Jun 2026 14:52:42 +0100 Subject: [PATCH 2/3] Updated heuristic for better coverage FP16 and FP32 use cases --- arm_compute/core/CPP/CPPTypes.h | 10 ++++++++++ src/core/CPP/CPPTypes.cpp | 29 ++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index f6fb103d365..b7c3498c037 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -168,6 +168,16 @@ class CPUInfo final * @return true if the cpu supports sme_b16f32, false otherwise */ bool has_sme_b16f32() const; + /** Sets whether SVE and SVE2 implementations are allowed to be selected at runtime. + * + * @param[in] is_allowed True to expose detected SVE/SVE2 support, false to hide SVE/SVE2 from runtime selectors. + */ + void set_sve_allowed(bool is_allowed); + /** Returns whether SVE and SVE2 implementations are allowed to be selected at runtime. + * + * @return true if detected SVE/SVE2 support is exposed to runtime selectors, false otherwise. + */ + bool is_sve_allowed() const; /** Sets whether SME and SME2 implementations are allowed to be selected at runtime. * * @param[in] is_allowed True to expose detected SME/SME2 support, false to hide SME/SME2 from runtime selectors. diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index fe8304949c3..4328415f653 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -39,6 +39,7 @@ struct CPUInfo::Impl cpuinfo::CpuInfo info{}; unsigned int L1_cache_size = 32768; unsigned int L2_cache_size = 262144; + bool sve_allowed = true; bool sme_allowed = true; }; @@ -72,7 +73,7 @@ bool CPUInfo::has_bf16() const bool CPUInfo::has_svebf16() const { - return _impl->info.has_svebf16(); + return _impl->sve_allowed && _impl->info.has_svebf16(); } bool CPUInfo::has_dotprod() const @@ -82,7 +83,7 @@ bool CPUInfo::has_dotprod() const bool CPUInfo::has_svef32mm() const { - return _impl->info.has_svef32mm(); + return _impl->sve_allowed && _impl->info.has_svef32mm(); } bool CPUInfo::has_i8mm() const @@ -92,7 +93,7 @@ bool CPUInfo::has_i8mm() const bool CPUInfo::has_svei8mm() const { - return _impl->info.has_svei8mm(); + return _impl->sve_allowed && _impl->info.has_svei8mm(); } bool CPUInfo::has_fhm() const @@ -102,12 +103,12 @@ bool CPUInfo::has_fhm() const bool CPUInfo::has_sve() const { - return _impl->info.has_sve(); + return _impl->sve_allowed && _impl->info.has_sve(); } bool CPUInfo::has_sve2() const { - return _impl->info.has_sve2(); + return _impl->sve_allowed && _impl->info.has_sve2(); } bool CPUInfo::has_sme() const @@ -140,6 +141,16 @@ bool CPUInfo::has_sme_b16f32() const return _impl->sme_allowed && _impl->info.has_sme_b16f32(); } +void CPUInfo::set_sve_allowed(bool is_allowed) +{ + _impl->sve_allowed = is_allowed; +} + +bool CPUInfo::is_sve_allowed() const +{ + return _impl->sve_allowed; +} + void CPUInfo::set_sme_allowed(bool is_allowed) { _impl->sme_allowed = is_allowed; @@ -163,6 +174,14 @@ CPUModel CPUInfo::get_cpu_model(unsigned int cpuid) const cpuinfo::CpuIsaInfo CPUInfo::get_isa() const { cpuinfo::CpuIsaInfo isa = _impl->info.isa(); + if (!_impl->sve_allowed) + { + isa.sve = false; + isa.sve2 = false; + isa.svebf16 = false; + isa.svei8mm = false; + isa.svef32mm = false; + } if (!_impl->sme_allowed) { isa.sme = false; From 992d4028fc3e4b0a383bcf127bb7387d028f076a Mon Sep 17 00:00:00 2001 From: Damien Dooley Date: Fri, 12 Jun 2026 11:36:06 +0100 Subject: [PATCH 3/3] Tightened up policy flags --- arm_compute/core/CPP/CPPTypes.h | 5 ----- src/core/CPP/CPPTypes.cpp | 5 ----- 2 files changed, 10 deletions(-) diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index b7c3498c037..fcd1b301e3a 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -173,11 +173,6 @@ class CPUInfo final * @param[in] is_allowed True to expose detected SVE/SVE2 support, false to hide SVE/SVE2 from runtime selectors. */ void set_sve_allowed(bool is_allowed); - /** Returns whether SVE and SVE2 implementations are allowed to be selected at runtime. - * - * @return true if detected SVE/SVE2 support is exposed to runtime selectors, false otherwise. - */ - bool is_sve_allowed() const; /** Sets whether SME and SME2 implementations are allowed to be selected at runtime. * * @param[in] is_allowed True to expose detected SME/SME2 support, false to hide SME/SME2 from runtime selectors. diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index 4328415f653..a489fcfca37 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -146,11 +146,6 @@ void CPUInfo::set_sve_allowed(bool is_allowed) _impl->sve_allowed = is_allowed; } -bool CPUInfo::is_sve_allowed() const -{ - return _impl->sve_allowed; -} - void CPUInfo::set_sme_allowed(bool is_allowed) { _impl->sme_allowed = is_allowed;