diff --git a/benchmark/CDT.Benchmarks/PredicatesBenchmarks.cs b/benchmark/CDT.Benchmarks/PredicatesBenchmarks.cs
new file mode 100644
index 0000000..942eb7d
--- /dev/null
+++ b/benchmark/CDT.Benchmarks/PredicatesBenchmarks.cs
@@ -0,0 +1,418 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Configs;
+using BenchmarkDotNet.Diagnosers;
+using BenchmarkDotNet.Running;
+using CDT.Predicates;
+
+// ---------------------------------------------------------------------------
+// Predicates microbenchmarks
+// ---------------------------------------------------------------------------
+
+/// <summary>
+/// Microbenchmarks that compare <see cref="PredicatesAdaptive"/> and
+/// <see cref="PredicatesExact"/> head-to-head for every predicate variant.
+///
+/// <para>
+/// Two input scenarios are covered for each method:
+/// <list type="bullet">
+///   <item><b>General</b> – random coordinates in general position.
+///   The adaptive fast-path (Stage A) returns immediately without any
+///   expansion arithmetic, so these numbers reflect the absolute best
+///   case for <see cref="PredicatesAdaptive"/>.</item>
+///   <item><b>NearDegenerate</b> – nearly-collinear / nearly-co-circular
+///   points that force the adaptive predicate through Stage B–D (and
+///   therefore all the way to the exact expansion code).  These numbers
+///   show the cost of the slow path and are comparable to what
+///   <see cref="PredicatesExact"/> always pays.</item>
+/// </list>
+/// </para>
+/// </summary>
+[MemoryDiagnoser]
+[EventPipeProfiler(EventPipeProfile.CpuSampling)]
+[GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory)]
+[CategoriesColumn]
+[ShortRunJob]
+public class PredicatesBenchmarks
+{
+    // Number of predicate calls per benchmark iteration – large enough to
+    // dominate timer resolution, small enough for a short-run job.
+    private const int N = 4_000;
+
+    // ── General-position input arrays ──────────────────────────────────────
+    // Orient2d uses 3 points (6 doubles / 6 floats) per call.
+    // InCircle  uses 4 points (8 doubles / 8 floats) per call.
+    private double[] _orient2dD = null!;
+    private float[]  _orient2dF = null!;
+    private double[] _inCircleD = null!;
+    private float[]  _inCircleF = null!;
+
+    // ── Near-degenerate input arrays ───────────────────────────────────────
+    // Orient2d: nearly-collinear triples.
+    // InCircle:  points nearly on a common circumcircle.
+    private double[] _orient2dDeg = null!;
+    private float[]  _orient2dFDeg = null!;
+    private double[] _inCircleDeg = null!;
+    private float[]  _inCircleFDeg = null!;
+
+    [GlobalSetup]
+    public void Setup()
+    {
+        var rng = new Random(unchecked((int)0xDEAD_BEEF));
+
+        // ── general-position ───────────────────────────────────────────────
+        _orient2dD = FillDoubles(rng, 6 * N, scale: 1000.0);
+        _orient2dF = ToFloats(_orient2dD);
+        _inCircleD = FillDoubles(rng, 8 * N, scale: 1000.0);
+        _inCircleF = ToFloats(_inCircleD);
+
+        // ── near-degenerate ────────────────────────────────────────────────
+        // Orient2d: triple (A, B, C) where C is nearly on line AB.
+        // We construct A=(0,0), B=(1,1), C=(t, t+ε) for varying t and ε≈1e-14.
+        _orient2dDeg = BuildNearCollinear(N);
+        _orient2dFDeg = ToFloats(_orient2dDeg);
+
+        // InCircle: quadruple (A,B,C,D) where D is nearly on the
+        // circumcircle of A, B, C.
+        _inCircleDeg = BuildNearCircle(N);
+        _inCircleFDeg = ToFloats(_inCircleDeg);
+    }
+
+    // =========================================================================
+    // Orient2d – double
+    // =========================================================================
+
+    [Benchmark(Description = "Adaptive – Orient2d double (general)")]
+    [BenchmarkCategory("Orient2d-double")]
+    public double Adaptive_Orient2d_Double_General()
+    {
+        double acc = 0.0;
+        double[] pts = _orient2dD;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesAdaptive.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – Orient2d double (general)")]
+    [BenchmarkCategory("Orient2d-double")]
+    public double Exact_Orient2d_Double_General()
+    {
+        double acc = 0.0;
+        double[] pts = _orient2dD;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesExact.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Adaptive – Orient2d double (near-degenerate)")]
+    [BenchmarkCategory("Orient2d-double-hard")]
+    public double Adaptive_Orient2d_Double_NearDegenerate()
+    {
+        double acc = 0.0;
+        double[] pts = _orient2dDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesAdaptive.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – Orient2d double (near-degenerate)")]
+    [BenchmarkCategory("Orient2d-double-hard")]
+    public double Exact_Orient2d_Double_NearDegenerate()
+    {
+        double acc = 0.0;
+        double[] pts = _orient2dDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesExact.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    // =========================================================================
+    // Orient2d – float
+    // =========================================================================
+
+    [Benchmark(Description = "Adaptive – Orient2d float (general)")]
+    [BenchmarkCategory("Orient2d-float")]
+    public float Adaptive_Orient2d_Float_General()
+    {
+        float acc = 0f;
+        float[] pts = _orient2dF;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesAdaptive.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – Orient2d float (general)")]
+    [BenchmarkCategory("Orient2d-float")]
+    public float Exact_Orient2d_Float_General()
+    {
+        float acc = 0f;
+        float[] pts = _orient2dF;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesExact.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Adaptive – Orient2d float (near-degenerate)")]
+    [BenchmarkCategory("Orient2d-float-hard")]
+    public float Adaptive_Orient2d_Float_NearDegenerate()
+    {
+        float acc = 0f;
+        float[] pts = _orient2dFDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesAdaptive.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – Orient2d float (near-degenerate)")]
+    [BenchmarkCategory("Orient2d-float-hard")]
+    public float Exact_Orient2d_Float_NearDegenerate()
+    {
+        float acc = 0f;
+        float[] pts = _orient2dFDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 6;
+            acc += PredicatesExact.Orient2d(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+        }
+        return acc;
+    }
+
+    // =========================================================================
+    // InCircle – double
+    // =========================================================================
+
+    [Benchmark(Description = "Adaptive – InCircle double (general)")]
+    [BenchmarkCategory("InCircle-double")]
+    public double Adaptive_InCircle_Double_General()
+    {
+        double acc = 0.0;
+        double[] pts = _inCircleD;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesAdaptive.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – InCircle double (general)")]
+    [BenchmarkCategory("InCircle-double")]
+    public double Exact_InCircle_Double_General()
+    {
+        double acc = 0.0;
+        double[] pts = _inCircleD;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesExact.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Adaptive – InCircle double (near-degenerate)")]
+    [BenchmarkCategory("InCircle-double-hard")]
+    public double Adaptive_InCircle_Double_NearDegenerate()
+    {
+        double acc = 0.0;
+        double[] pts = _inCircleDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesAdaptive.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – InCircle double (near-degenerate)")]
+    [BenchmarkCategory("InCircle-double-hard")]
+    public double Exact_InCircle_Double_NearDegenerate()
+    {
+        double acc = 0.0;
+        double[] pts = _inCircleDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesExact.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    // =========================================================================
+    // InCircle – float
+    // =========================================================================
+
+    [Benchmark(Description = "Adaptive – InCircle float (general)")]
+    [BenchmarkCategory("InCircle-float")]
+    public float Adaptive_InCircle_Float_General()
+    {
+        float acc = 0f;
+        float[] pts = _inCircleF;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesAdaptive.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – InCircle float (general)")]
+    [BenchmarkCategory("InCircle-float")]
+    public float Exact_InCircle_Float_General()
+    {
+        float acc = 0f;
+        float[] pts = _inCircleF;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesExact.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Adaptive – InCircle float (near-degenerate)")]
+    [BenchmarkCategory("InCircle-float-hard")]
+    public float Adaptive_InCircle_Float_NearDegenerate()
+    {
+        float acc = 0f;
+        float[] pts = _inCircleFDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesAdaptive.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    [Benchmark(Description = "Exact – InCircle float (near-degenerate)")]
+    [BenchmarkCategory("InCircle-float-hard")]
+    public float Exact_InCircle_Float_NearDegenerate()
+    {
+        float acc = 0f;
+        float[] pts = _inCircleFDeg;
+        for (int i = 0; i < N; i++)
+        {
+            int j = i * 8;
+            acc += PredicatesExact.InCircle(
+                pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+                pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+        }
+        return acc;
+    }
+
+    // =========================================================================
+    // Helpers
+    // =========================================================================
+
+    private static double[] FillDoubles(Random rng, int count, double scale)
+    {
+        var arr = new double[count];
+        for (int i = 0; i < arr.Length; i++)
+            arr[i] = (rng.NextDouble() - 0.5) * 2.0 * scale;
+        return arr;
+    }
+
+    private static float[] ToFloats(double[] src)
+    {
+        var arr = new float[src.Length];
+        for (int i = 0; i < src.Length; i++)
+            arr[i] = (float)src[i];
+        return arr;
+    }
+
+    /// <summary>
+    /// Builds an array of nearly-collinear Orient2d triples.
+    /// A=(0,0), B=(t,t), C=(2t, 2t+ε) – nearly collinear because ε is tiny.
+    /// Varies t across [1,100] and ε across [1e-14, 1e-13] to keep results
+    /// diverse while reliably triggering Stage B/C of the adaptive predicate.
+    /// </summary>
+    private static double[] BuildNearCollinear(int n)
+    {
+        var arr = new double[6 * n];
+        for (int i = 0; i < n; i++)
+        {
+            double t = 1.0 + i * (99.0 / n);          // t in [1, 100]
+            double eps = 1e-14 * (1.0 + i % 10);      // ε in [1e-14, 1e-13]
+            int j = i * 6;
+            arr[j]     = 0.0;  arr[j + 1] = 0.0;     // A
+            arr[j + 2] = t;    arr[j + 3] = t;        // B
+            arr[j + 4] = 2 * t;  arr[j + 5] = 2 * t + eps; // C (nearly collinear)
+        }
+        return arr;
+    }
+
+    /// <summary>
+    /// Builds nearly-co-circular InCircle quadruples.
+    /// A, B, C are vertices of an equilateral triangle inscribed in the unit
+    /// circle; D is placed at radius 1 + ε so it is just barely outside –
+    /// but close enough that the fast floating-point test cannot decide.
+    /// </summary>
+    private static double[] BuildNearCircle(int n)
+    {
+        var arr = new double[8 * n];
+        for (int i = 0; i < n; i++)
+        {
+            double r = 100.0 + i * (900.0 / n);       // radius in [100, 1000]
+            double eps = 1e-11 * r * (1.0 + i % 5);   // ε scaled to radius
+
+            double ax = r, ay = 0.0;
+            double bx = r * Math.Cos(2 * Math.PI / 3),
+                   by = r * Math.Sin(2 * Math.PI / 3);
+            double cx = r * Math.Cos(4 * Math.PI / 3),
+                   cy = r * Math.Sin(4 * Math.PI / 3);
+            double dx = r + eps, dy = 0.0;             // D just outside
+
+            int j = i * 8;
+            arr[j]     = ax; arr[j + 1] = ay;
+            arr[j + 2] = bx; arr[j + 3] = by;
+            arr[j + 4] = cx; arr[j + 5] = cy;
+            arr[j + 6] = dx; arr[j + 7] = dy;
+        }
+        return arr;
+    }
+}
diff --git a/src/CDT.Core/Predicates/PredicatesAdaptive.cs b/src/CDT.Core/Predicates/PredicatesAdaptive.cs
index 41b19d3..153b709 100644
--- a/src/CDT.Core/Predicates/PredicatesAdaptive.cs
+++ b/src/CDT.Core/Predicates/PredicatesAdaptive.cs
@@ -6,6 +6,8 @@
 // artem-ogre/CDT (predicates.h, predicates::adaptive namespace).
 
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
 
 namespace CDT.Predicates;
 
@@ -59,7 +61,8 @@ public static class PredicatesAdaptive
     /// zero if collinear, or a negative value if to the right.
     /// </returns>
     /// <seealso cref="PredicatesExact.Orient2d(double, double, double, double, double, double)"/>
-    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]  // opt-15: aggressive JIT opt for fast-path Stage A
+    [SkipLocalsInit]
     public static double Orient2d(
         double ax, double ay, double bx, double by, double cx, double cy)
     {
@@ -192,7 +195,8 @@ public static float Orient2d(
     /// zero if on, or a negative value if outside.
     /// </returns>
     /// <seealso cref="PredicatesExact.InCircle(double, double, double, double, double, double, double, double)"/>
-    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]  // opt-16: keep only AggressiveInlining (AggressiveOptimization inflated the Stage-B frame and hurt Stage-A)
+    [SkipLocalsInit]
     public static double InCircle(
         double ax, double ay, double bx, double by,
         double cx, double cy, double dx, double dy)
@@ -367,8 +371,14 @@ internal static (double aHi, double aLo) Split(double a)
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
     internal static double MultTail(double a, double b, double p)
     {
-        var (aHi, aLo) = Split(a);
-        var (bHi, bLo) = Split(b);
+        double c = SplitterD * a;
+        double aBig = c - a;
+        double aHi = c - aBig;
+        double aLo = a - aHi;
+        c = SplitterD * b;
+        double bBig = c - b;
+        double bHi = c - bBig;
+        double bLo = b - bHi;
         double y = p - aHi * bHi;
         y -= aLo * bHi;
         y -= aHi * bLo;
@@ -379,6 +389,8 @@ internal static double MultTail(double a, double b, double p)
     /// Exact expansion of <c>ax*by - ay*bx</c> (up to 4 non-zero terms).
     /// Matches Lenthe <c>ExpansionBase::TwoTwoDiff</c>.
     /// </summary>
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    [SkipLocalsInit]  // opt-11: x0..x3 are all unconditionally computed before conditional write
     internal static int TwoTwoDiff(double ax, double by, double ay, double bx, Span<double> h)
     {
         double axby1 = ax * by;
@@ -408,6 +420,8 @@ internal static int TwoTwoDiff(double ax, double by, double ay, double bx, Span<
     /// Matches Lenthe <c>ExpansionBase::ScaleExpansion</c>.
     /// Output has up to <c>2*elen</c> terms.
     /// </summary>
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]  // opt-7, opt-18
+    [SkipLocalsInit]  // opt-8: locals (hIdx, Q, hh, Ti, ti, Qi) are all written before read
     internal static int ScaleExpansion(Span<double> e, int elen, double b, Span<double> h)
     {
         if (elen == 0 || b == 0.0)
@@ -416,31 +430,40 @@ internal static int ScaleExpansion(Span<double> e, int elen, double b, Span<doub
         }
 
         var (bHi, bLo) = Split(b);
-        double Q = e[0] * b;
-        double hh = DekkersPresplit(e[0], bHi, bLo, Q);
+
+        // opt-9: bounds-check-free loop via ref locals
+        ref double eRef = ref MemoryMarshal.GetReference(e);
+        ref double hRef = ref MemoryMarshal.GetReference(h);
+
+        double Q = Unsafe.Add(ref eRef, 0) * b;
+        double hh = DekkersPresplit(Unsafe.Add(ref eRef, 0), bHi, bLo, Q);
         int hIdx = 0;
-        if (hh != 0.0) { h[hIdx++] = hh; }
+        if (hh != 0.0) { Unsafe.Add(ref hRef, hIdx++) = hh; }
 
         for (int i = 1; i < elen; i++)
         {
-            double Ti = e[i] * b;
-            double ti = DekkersPresplit(e[i], bHi, bLo, Ti);
+            double ei = Unsafe.Add(ref eRef, i);
+            double Ti = ei * b;
+            double ti = DekkersPresplit(ei, bHi, bLo, Ti);
             double Qi = Q + ti;
             hh = PlusTail(Q, ti, Qi);
-            if (hh != 0.0) { h[hIdx++] = hh; }
+            if (hh != 0.0) { Unsafe.Add(ref hRef, hIdx++) = hh; }
             Q = Ti + Qi;
             hh = FastPlusTail(Ti, Qi, Q);
-            if (hh != 0.0) { h[hIdx++] = hh; }
+            if (hh != 0.0) { Unsafe.Add(ref hRef, hIdx++) = hh; }
         }
 
-        if (Q != 0.0) { h[hIdx++] = Q; }
+        if (Q != 0.0) { Unsafe.Add(ref hRef, hIdx++) = Q; }
         return hIdx;
     }
 
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
     internal static double DekkersPresplit(double a, double bHi, double bLo, double p)
     {
-        var (aHi, aLo) = Split(a);
+        double c = SplitterD * a;
+        double aBig = c - a;
+        double aHi = c - aBig;
+        double aLo = a - aHi;
         double y = p - aHi * bHi;
         y -= aLo * bHi;
         y -= aHi * bLo;
@@ -451,6 +474,7 @@ internal static double DekkersPresplit(double a, double bHi, double bLo, double
     /// Computes <c>e*s*s + e*t*t</c> as an expansion (two ScaleExpansion calls each, then sum).
     /// Max output: 32 terms for 4-term input (used for InCircle Stage B lift terms).
     /// </summary>
+    [SkipLocalsInit]  // keep SkipLocalsInit; remove AggressiveInlining (inlining 3× into AdaptiveInCircle bloats Stage-A JIT frame)
     internal static int ScaleExpansionSum(Span<double> e, int elen, double s, double t, Span<double> h)
     {
         Span<double> es = stackalloc double[8];
@@ -470,63 +494,148 @@ internal static int ScaleExpansionSum(Span<double> e, int elen, double s, double
     /// Merge-then-accumulate two expansions. Matches Lenthe <c>ExpansionBase::ExpansionSum</c>:
     /// std::merge by |value| (stable), then sequential grow-expansion accumulation.
     /// </summary>
+    [MethodImpl(MethodImplOptions.AggressiveOptimization)]  // opt-20: aggressive JIT optimization for this hot method
+    [SkipLocalsInit]
     internal static int ExpansionSum(Span<double> e, int elen, Span<double> f, int flen, Span<double> h)
     {
         if (elen == 0 && flen == 0) { return 0; }
-        if (elen == 0) { f[..flen].CopyTo(h); return flen; }
-        if (flen == 0) { e[..elen].CopyTo(h); return elen; }
+        if (elen == 0)
+        {
+            // opt-5: Unsafe.CopyBlockUnaligned replaces Span.CopyTo (eliminates Memmove call overhead)
+            Unsafe.CopyBlockUnaligned(
+                ref Unsafe.As<double, byte>(ref MemoryMarshal.GetReference(h)),
+                ref Unsafe.As<double, byte>(ref MemoryMarshal.GetReference(f)),
+                (uint)(flen * sizeof(double)));
+            return flen;
+        }
+        if (flen == 0)
+        {
+            // opt-5: same as above for flen==0 fast path
+            Unsafe.CopyBlockUnaligned(
+                ref Unsafe.As<double, byte>(ref MemoryMarshal.GetReference(h)),
+                ref Unsafe.As<double, byte>(ref MemoryMarshal.GetReference(e)),
+                (uint)(elen * sizeof(double)));
+            return elen;
+        }
 
         int total = elen + flen;
 
-        // Merge sorted by |value| into temporary buffer.
-        // Maximum merged size for InCircle Stage D is 192+192=384 ≤ 400, so
-        // the stackalloc path is always taken for that call site.
-        Span<double> merged = total <= 400 ? stackalloc double[400] : new double[total];
+        // opt-1: Tiered stackalloc — allocate only as much as the actual input size requires.
+        // Using unsafe ref to the first element lets us hold the pointer across the branches
+        // without assigning the Span itself to an outer variable (which Roslyn disallows for
+        // stack-allocated Spans that might escape).
+        if (total <= 16)
+        {
+            Span<double> merged16 = stackalloc double[16];
+            return ExpansionSumCore(e, elen, f, flen, h, merged16);
+        }
+        if (total <= 64)
+        {
+            Span<double> merged64 = stackalloc double[64];
+            return ExpansionSumCore(e, elen, f, flen, h, merged64);
+        }
+        if (total <= 400)
+        {
+            Span<double> merged400 = stackalloc double[400];
+            return ExpansionSumCore(e, elen, f, flen, h, merged400);
+        }
+        return ExpansionSumCore(e, elen, f, flen, h, new double[total]);
+    }
+
+    // opt-2, opt-3, opt-4: Core merge+accumulate logic — receives a pre-sized scratch buffer.
+    // All span accesses use MemoryMarshal.GetReference + Unsafe.Add to eliminate bounds checks.
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+    private static int ExpansionSumCore(
+        Span<double> e, int elen, Span<double> f, int flen, Span<double> h, Span<double> merged)
+    {
+        ref double eRef = ref MemoryMarshal.GetReference(e);
+        ref double fRef = ref MemoryMarshal.GetReference(f);
+        ref double mRef = ref MemoryMarshal.GetReference(merged);
+
         int ei = 0, fi = 0, mi = 0;
         while (ei < elen && fi < flen)
         {
-            if (Math.Abs(f[fi]) < Math.Abs(e[ei]))
+            double eVal = Unsafe.Add(ref eRef, ei);
+            double fVal = Unsafe.Add(ref fRef, fi);
+            if (Math.Abs(fVal) < Math.Abs(eVal))
             {
-                merged[mi++] = f[fi++];
+                Unsafe.Add(ref mRef, mi++) = fVal;
+                fi++;
             }
             else
             {
-                merged[mi++] = e[ei++];
+                Unsafe.Add(ref mRef, mi++) = eVal;
+                ei++;
             }
         }
 
-        while (ei < elen) { merged[mi++] = e[ei++]; }
-        while (fi < flen) { merged[mi++] = f[fi++]; }
+        // opt-4: tail copy loops → Unsafe.CopyBlockUnaligned
+        if (ei < elen)
+        {
+            int rem = elen - ei;
+            Unsafe.CopyBlockUnaligned(
+                ref Unsafe.As<double, byte>(ref Unsafe.Add(ref mRef, mi)),
+                ref Unsafe.As<double, byte>(ref Unsafe.Add(ref eRef, ei)),
+                (uint)(rem * sizeof(double)));
+            mi += rem;
+        }
+        if (fi < flen)
+        {
+            int rem = flen - fi;
+            Unsafe.CopyBlockUnaligned(
+                ref Unsafe.As<double, byte>(ref Unsafe.Add(ref mRef, mi)),
+                ref Unsafe.As<double, byte>(ref Unsafe.Add(ref fRef, fi)),
+                (uint)(rem * sizeof(double)));
+            mi += rem;
+        }
 
-        // Sequential accumulation
+        // opt-3: bounds-check-free accumulation loop using ref locals
+        ref double hRef = ref MemoryMarshal.GetReference(h);
         int hIdx = 0;
-        double Q = merged[0];
-        double Qnew = merged[1] + Q;
-        double hh = FastPlusTail(merged[1], Q, Qnew);
+        double Q = Unsafe.Add(ref mRef, 0);
+        double m1 = Unsafe.Add(ref mRef, 1);
+        double Qnew = m1 + Q;
+        double hh = FastPlusTail(m1, Q, Qnew);
         Q = Qnew;
-        if (hh != 0.0) { h[hIdx++] = hh; }
+        if (hh != 0.0) { Unsafe.Add(ref hRef, hIdx++) = hh; }
 
         for (int g = 2; g < mi; g++)
         {
-            Qnew = Q + merged[g];
-            hh = PlusTail(Q, merged[g], Qnew);
+            double mg = Unsafe.Add(ref mRef, g);
+            Qnew = Q + mg;
+            hh = PlusTail(Q, mg, Qnew);
             Q = Qnew;
-            if (hh != 0.0) { h[hIdx++] = hh; }
+            if (hh != 0.0) { Unsafe.Add(ref hRef, hIdx++) = hh; }
         }
 
-        if (Q != 0.0) { h[hIdx++] = Q; }
+        if (Q != 0.0) { Unsafe.Add(ref hRef, hIdx++) = Q; }
         return hIdx;
     }
 
     [MethodImpl(MethodImplOptions.AggressiveInlining)]
     internal static double Estimate(Span<double> e, int elen)
     {
-        double sum = 0.0;
-        for (int i = 0; i < elen; i++) { sum += e[i]; }
-        return sum;
+        if (Vector256.IsHardwareAccelerated && elen >= 4)
+        {
+            ref double eRef = ref MemoryMarshal.GetReference(e);
+            Vector256<double> acc = Vector256<double>.Zero;
+            int i = 0;
+            for (; i <= elen - 4; i += 4)
+                acc = Vector256.Add(acc, Vector256.LoadUnsafe(ref eRef, (nuint)i));
+            double sum = Vector256.Sum(acc);
+            // opt-13: bounds-check-free scalar tail using Unsafe.Add
+            for (; i < elen; i++) sum += Unsafe.Add(ref eRef, i);
+            return sum;
+        }
+
+        // opt-13: bounds-check-free scalar loop
+        ref double sRef = ref MemoryMarshal.GetReference(e);
+        double s = 0.0;
+        for (int i = 0; i < elen; i++) { s += Unsafe.Add(ref sRef, i); }
+        return s;
     }
 
-    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]  // opt-12
     internal static double MostSignificant(Span<double> e, int elen)
     {
         for (int i = elen - 1; i >= 0; i--)
@@ -536,8 +645,21 @@ internal static double MostSignificant(Span<double> e, int elen)
         return 0.0;
     }
 
+    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]  // opt-14
     internal static void NegateInto(Span<double> src, int len, Span<double> dst)
     {
+        if (Vector256.IsHardwareAccelerated && len >= 4)
+        {
+            ref double srcRef = ref MemoryMarshal.GetReference(src);
+            ref double dstRef = ref MemoryMarshal.GetReference(dst);
+            int i = 0;
+            for (; i <= len - 4; i += 4)
+                Vector256.Negate(Vector256.LoadUnsafe(ref srcRef, (nuint)i))
+                         .StoreUnsafe(ref dstRef, (nuint)i);
+            for (; i < len; i++) dst[i] = -src[i];
+            return;
+        }
+
         for (int i = 0; i < len; i++) { dst[i] = -src[i]; }
     }
 }
diff --git a/src/CDT.Core/Predicates/PredicatesExact.cs b/src/CDT.Core/Predicates/PredicatesExact.cs
index b99ceb1..11f7360 100644
--- a/src/CDT.Core/Predicates/PredicatesExact.cs
+++ b/src/CDT.Core/Predicates/PredicatesExact.cs
@@ -4,6 +4,8 @@
 //
 // Geometric predicates — exact (arbitrary-precision) paths.
 
+using System.Runtime.CompilerServices;
+
 namespace CDT.Predicates;
 
 /// <summary>
@@ -41,6 +43,8 @@ public static class PredicatesExact
     /// zero if collinear, or a negative value if to the right.
     /// </returns>
     /// <seealso cref="PredicatesAdaptive.Orient2d(double, double, double, double, double, double)"/>
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]  // opt-17: inline into Adaptive.Orient2d Stage D
+    [SkipLocalsInit]
     public static double Orient2d(
         double ax, double ay, double bx, double by, double cx, double cy)
     {
@@ -122,6 +126,8 @@ public static float Orient2d(
     /// zero if on, or a negative value if outside.
     /// </returns>
     /// <seealso cref="PredicatesAdaptive.InCircle(double, double, double, double, double, double, double, double)"/>
+    [MethodImpl(MethodImplOptions.AggressiveOptimization)]  // opt-20: aggressive JIT opt for this hot method
+    [SkipLocalsInit]
     public static double InCircle(
         double ax, double ay, double bx, double by,
         double cx, double cy, double dx, double dy)
@@ -172,22 +178,22 @@ public static double InCircle(
         int adetLen = PredicatesAdaptive.ScaleExpansionSum(bcd, bcdLen, ax, ay, adet);
 
         // bdet = -(cda*bx*bx + cda*by*by)
-        Span<double> bdetPos = stackalloc double[96];
-        int bdetPosLen = PredicatesAdaptive.ScaleExpansionSum(cda, cdaLen, bx, by, bdetPos);
+        // opt-18: eliminate bdetPos[96] by computing ScaleExpansionSum directly into bdet,
+        // then negating in-place. Saves 768 bytes of stack per call.
         Span<double> bdet = stackalloc double[96];
-        PredicatesAdaptive.NegateInto(bdetPos, bdetPosLen, bdet);
-        int bdetLen = bdetPosLen;
+        int bdetLen = PredicatesAdaptive.ScaleExpansionSum(cda, cdaLen, bx, by, bdet);
+        PredicatesAdaptive.NegateInto(bdet, bdetLen, bdet);
 
         // cdet = dab*cx*cx + dab*cy*cy
         Span<double> cdet = stackalloc double[96];
         int cdetLen = PredicatesAdaptive.ScaleExpansionSum(dab, dabLen, cx, cy, cdet);
 
         // ddet = -(abc*dx*dx + abc*dy*dy)
-        Span<double> ddetPos = stackalloc double[96];
-        int ddetPosLen = PredicatesAdaptive.ScaleExpansionSum(abc, abcLen, dx, dy, ddetPos);
+        // opt-19: same as opt-18 — eliminate ddetPos[96] stackalloc with in-place negate.
+        // Saves another 768 bytes of stack per call (total savings: 1 536 bytes).
         Span<double> ddet = stackalloc double[96];
-        PredicatesAdaptive.NegateInto(ddetPos, ddetPosLen, ddet);
-        int ddetLen = ddetPosLen;
+        int ddetLen = PredicatesAdaptive.ScaleExpansionSum(abc, abcLen, dx, dy, ddet);
+        PredicatesAdaptive.NegateInto(ddet, ddetLen, ddet);
 
         // deter = (adet + bdet) + (cdet + ddet)
         Span<double> ab2 = stackalloc double[192];