From 2c76593485674c07ac293af0ac3701428ce3b3fd Mon Sep 17 00:00:00 2001
From: Francesco Bertolotti <francesco.bertolotti@igenius.ai>
Date: Fri, 29 May 2026 19:18:07 +0200
Subject: [PATCH] increasing precision tolerance

Signed-off-by: Francesco Bertolotti <francesco.bertolotti@igenius.ai>
---
 tests/pytorch/attention/test_kv_cache.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/pytorch/attention/test_kv_cache.py b/tests/pytorch/attention/test_kv_cache.py
index c662252f9e..56a6261236 100644
--- a/tests/pytorch/attention/test_kv_cache.py
+++ b/tests/pytorch/attention/test_kv_cache.py
@@ -381,8 +381,13 @@ def get_tols(config, module, backend, dtype):
                     torch.bfloat16: (1.2e-1, 1e-1),
                 }
             else:
+                # head_dim > 128 in fp16 is the worst case for accumulated rounding, and the
+                # full-sequence vs incremental-KV-cache paths use different kernels/mask types.
+                # On sm80 with older cuDNN the agreement grazes 1e-2 on a single element, so the
+                # fp16 tolerance is widened slightly. Tolerances were originally calibrated on
+                # Hopper/Blackwell + newer cuDNN.
                 tols = {
-                    torch.half: (1e-2, 1e-2),
+                    torch.half: (1.5e-2, 1.5e-2),
                     torch.bfloat16: (8e-2, 7e-2),
                 }
     if module == "DotProductAttention":