From 0ed82fbc922120882dc973a991483646fb9800bd Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 14:36:49 -0400 Subject: [PATCH 1/4] Add SubSequence.startsWith / equalsIgnoreCase (CharSequence-friendly comparisons) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are the ops a real classification parse needs but CharSequence lacks — surfaced by porting SQLCommenter.getFirstWord (firstWord.startsWith("{"), firstWord.equalsIgnoreCase("call")). Lets transient parse views be compared without materializing a String. + JUnit 5 tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../java/datadog/trace/util/SubSequence.java | 31 +++++++++++++++++++ .../datadog/trace/util/SubSequenceTest.java | 26 ++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/internal-api/src/main/java/datadog/trace/util/SubSequence.java b/internal-api/src/main/java/datadog/trace/util/SubSequence.java index 3cda505d1db..37e18327d8e 100644 --- a/internal-api/src/main/java/datadog/trace/util/SubSequence.java +++ b/internal-api/src/main/java/datadog/trace/util/SubSequence.java @@ -109,6 +109,37 @@ public final boolean equals(CharSequence that) { return true; } + /** Case-insensitive content comparison; mirrors {@link String#equalsIgnoreCase(String)}. */ + public final boolean equalsIgnoreCase(CharSequence that) { + int len = this.length(); + if (that == null || len != that.length()) return false; + + for (int i = 0; i < len; ++i) { + char a = this.charAt(i); + char b = that.charAt(i); + if (a != b) { + // Same two-way fold String.regionMatches(ignoreCase) uses (handles locale edge cases). + char au = Character.toUpperCase(a); + char bu = Character.toUpperCase(b); + if (au != bu && Character.toLowerCase(au) != Character.toLowerCase(bu)) { + return false; + } + } + } + return true; + } + + /** True if this sub-sequence begins with {@code prefix} (content comparison, no allocation). */ + public final boolean startsWith(CharSequence prefix) { + int prefixLen = prefix.length(); + if (prefixLen > this.length()) return false; + + for (int i = 0; i < prefixLen; ++i) { + if (this.charAt(i) != prefix.charAt(i)) return false; + } + return true; + } + @Override public String toString() { String cached = this.cachedSubstr; diff --git a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java index 78d52fa6931..86e27039de6 100644 --- a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java +++ b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java @@ -1,6 +1,7 @@ package datadog.trace.util; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -108,4 +109,29 @@ public void appendToBuilder() { subSeq.appendTo(builder1); assertEquals(expectedStr, builder1.toString()); } + + @Test + public void equalsIgnoreCase() { + SubSequence call = SubSequence.of("xx CALL yy", 3, 7); // "CALL" + assertTrue(call.equalsIgnoreCase("call")); + assertTrue(call.equalsIgnoreCase("CALL")); + assertTrue(call.equalsIgnoreCase("CaLl")); + assertFalse(call.equalsIgnoreCase("calls")); // length differs + assertFalse(call.equalsIgnoreCase("cant")); // same length, content differs + + // case-sensitive equals stays case-sensitive + assertFalse(call.equals("call")); + assertTrue(call.equals("CALL")); + } + + @Test + public void startsWith() { + SubSequence braceCall = SubSequence.of("xx{call} yy", 2, 7); // "{call" + assertTrue(braceCall.startsWith("")); + assertTrue(braceCall.startsWith("{")); + assertTrue(braceCall.startsWith("{ca")); + assertTrue(braceCall.startsWith("{call")); + assertFalse(braceCall.startsWith("call")); // not the prefix + assertFalse(braceCall.startsWith("{calls and more")); // prefix longer than sequence + } } From ea771f32f3b3f259e578f9651151efe92f0828ae Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 17:41:58 -0400 Subject: [PATCH 2/4] Return SubSequence from SQLCommenter.getFirstWord to avoid per-inject substring getFirstWord's result is only consumed by startsWith("{") and equalsIgnoreCase("call"), so materializing a substring on every inject() was pure waste. Return a zero-copy SubSequence view instead and use its startsWith/equalsIgnoreCase. Behavior is unchanged (91 SQLCommenterTest cases pass); the test compares getFirstWord(sql).toString(). This is the motivating consumer for SubSequence.startsWith/equalsIgnoreCase (retracted from #10640 and carried here so they land with their use case). Co-Authored-By: Claude Opus 4.8 --- .../instrumentation/jdbc/SQLCommenter.java | 17 ++++++++++++++--- .../instrumentation/jdbc/SQLCommenterTest.java | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java index 3171550aba3..87261cdcd63 100644 --- a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java +++ b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java @@ -1,6 +1,7 @@ package datadog.trace.instrumentation.jdbc; import datadog.trace.bootstrap.instrumentation.dbm.SharedDBCommenter; +import datadog.trace.util.SubSequence; public class SQLCommenter { // SQL-specific constants, rest defined in SharedDBCommenter @@ -15,7 +16,17 @@ public class SQLCommenter { private static final int BUFFER_EXTRA = 4; private static final int SQL_COMMENT_OVERHEAD = SPACE_CHARS + COMMENT_DELIMITERS + BUFFER_EXTRA; - protected static String getFirstWord(String sql) { + /** + * Returns the first whitespace-delimited word of {@code sql} as a zero-copy {@link SubSequence} + * -- avoiding a substring allocation on every {@link #inject} call, since the callers only need + * to {@code startsWith}/{@code equalsIgnoreCase} it. + * + *

Transient view -- do not retain. The returned {@code SubSequence} references the + * entire {@code sql} string, so holding onto it keeps the whole query reachable (a memory hazard + * for large SQL). Consume it in place and discard; if the value must be retained, call {@link + * SubSequence#toString()} to detach a standalone copy. + */ + protected static SubSequence getFirstWord(String sql) { int beginIndex = 0; while (beginIndex < sql.length() && Character.isWhitespace(sql.charAt(beginIndex))) { beginIndex++; @@ -24,7 +35,7 @@ protected static String getFirstWord(String sql) { while (endIndex < sql.length() && !Character.isWhitespace(sql.charAt(endIndex))) { endIndex++; } - return sql.substring(beginIndex, endIndex); + return SubSequence.of(sql, beginIndex, endIndex); } public static String inject( @@ -40,7 +51,7 @@ public static String inject( } boolean appendComment = preferAppend; if (dbType != null) { - final String firstWord = getFirstWord(sql); + final SubSequence firstWord = getFirstWord(sql); // The Postgres JDBC parser doesn't allow SQL comments anywhere in a JDBC // callable statements diff --git a/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java b/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java index 5383bf03921..c006fef5b89 100644 --- a/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java +++ b/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java @@ -29,7 +29,7 @@ class SQLCommenterTest extends AbstractInstrumentationTest { @MethodSource("testFindFirstWordArguments") void testFindFirstWord(String scenario, String sql, String firstWord) { // when - String word = SQLCommenter.getFirstWord(sql); + String word = SQLCommenter.getFirstWord(sql).toString(); // then assertEquals(firstWord, word); From e9f9aea3b2ee832fa91b3fb38bff21a7d247203a Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 18:11:30 -0400 Subject: [PATCH 3/4] Add SQLCommenterGetFirstWordBenchmark (getFirstWord alloc: 48 -> 0 B/op) Benchmarks the getFirstWord first-word scan, consuming the result via the real transient pattern (startsWith, returning the boolean) so EA isn't faked away. @Threads(8), @Fork(2), -prof gc: the old substring allocated 48 B/op per call; the SubSequence view is fully EA-elided (~0 B/op), ~1.6x throughput. Numbers in the class Javadoc. Co-Authored-By: Claude Opus 4.8 --- .../SQLCommenterGetFirstWordBenchmark.java | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java diff --git a/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java new file mode 100644 index 00000000000..8899073f363 --- /dev/null +++ b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java @@ -0,0 +1,81 @@ +package datadog.trace.instrumentation.jdbc; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Benchmark for {@link SQLCommenter#getFirstWord(String)} -- the per-{@code inject} first-word + * scan. + * + *

What we're measuring. {@code getFirstWord} used to return {@code sql.substring(b, e)} -- + * a fresh {@code String} (with its own backing array) allocated on every {@code inject} call, just + * to {@code startsWith}/{@code equalsIgnoreCase} it. It now returns a zero-copy {@code SubSequence} + * view. The question is empirical: does escape analysis elide the view in the transient consumption + * (→ 0 B/op), while the old {@code substring} always allocated? + * + *

Honest EA measurement. The view is consumed exactly as {@code inject} consumes it -- a + * boolean decision ({@code startsWith("{")}) -- and the benchmark returns that boolean. It does NOT + * return/Blackhole the view itself, which would force it to escape and fake away the very EA win + * under test. The chained {@code getFirstWord(sql).startsWith("{")} (no typed local) also lets one + * source compile both before (String.startsWith) and after (SubSequence.startsWith), so before/after + * is a clean toggle of the production method. + * + *

Run at {@code @Threads(8)} so the allocation delta surfaces as throughput; {@code -prof gc} + * (gc.alloc.rate.norm) is the headline mechanism and is fork-stable. + * + *

+ *   ./gradlew :dd-java-agent:instrumentation:jdbc:jmh   # add -prof gc
+ * 
+ * + *

Results (JDK 25, MacBook M-series, {@code @Threads(8)}, {@code @Fork(2)}, {@code -prof + * gc}): + * + *

+ *                        throughput            gc.alloc.rate.norm
+ *   before (substring)   234.8M ± 23.2M ops/s   48 B/op
+ *   after  (SubSequence) 378.2M ± 30.4M ops/s   ~0 B/op  (0.001)
+ * 
+ * + * Escape analysis fully elides the view in the transient consumption (it never escapes the + * decision), so the per-call 48 B/op of the old {@code substring} (a String + its backing array) + * drops to ~0 and throughput rises ~1.6x at {@code @Threads(8)}. The wide {@code @Fork(2)} error is + * bimodal JIT, not the signal: the allocation delta is exact and the throughput gap clears it. + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +@Threads(8) +public class SQLCommenterGetFirstWordBenchmark { + + // Representative first-word shapes: plain keywords, a stored-proc brace, a CALL, leading space. + static final String[] SQL = { + "SELECT * FROM foo WHERE id = 42", + "{call dogshelterProc(?, ?)}", + "CALL dogshelterProc(?, ?)", + "UPDATE accounts SET balance = balance - 100 WHERE id = 42", + " INSERT INTO logs VALUES (?)", + }; + + /** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */ + @State(Scope.Thread) + public static class Cursor { + int index = 0; + + String next() { + int i = index; + index = (i + 1) % SQL.length; + return SQL[i]; + } + } + + @Benchmark + public boolean firstWordCheck(Cursor cursor) { + // Mirrors inject(): take the first word, make a boolean decision, discard it. + return SQLCommenter.getFirstWord(cursor.next()).startsWith("{"); + } +} From 2f648c75a5095e5a74007355c99d7da69059ce28 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 25 Jun 2026 07:33:48 -0400 Subject: [PATCH 4/4] Refresh SQLCommenterGetFirstWordBenchmark results to JDK 17 @Fork(5) zulu-17 @Fork(5), -prof gc: 258.1M -> 508.0M ops/s (~2x), 48 -> ~0 B/op. @Fork(5) tightens the earlier bimodal @Fork(2) spread. Co-Authored-By: Claude Opus 4.8 --- .../jdbc/SQLCommenterGetFirstWordBenchmark.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java index 8899073f363..1c556646167 100644 --- a/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java +++ b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java @@ -32,21 +32,22 @@ * ./gradlew :dd-java-agent:instrumentation:jdbc:jmh # add -prof gc * * - *

Results (JDK 25, MacBook M-series, {@code @Threads(8)}, {@code @Fork(2)}, {@code -prof + *

Results (JDK 17, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof * gc}): * *

  *                        throughput            gc.alloc.rate.norm
- *   before (substring)   234.8M ± 23.2M ops/s   48 B/op
- *   after  (SubSequence) 378.2M ± 30.4M ops/s   ~0 B/op  (0.001)
+ *   before (substring)   258.1M ± 21.0M ops/s   48 B/op
+ *   after  (SubSequence) 508.0M ± 21.6M ops/s   ~0 B/op  (10^-4)
  * 
* * Escape analysis fully elides the view in the transient consumption (it never escapes the * decision), so the per-call 48 B/op of the old {@code substring} (a String + its backing array) - * drops to ~0 and throughput rises ~1.6x at {@code @Threads(8)}. The wide {@code @Fork(2)} error is - * bimodal JIT, not the signal: the allocation delta is exact and the throughput gap clears it. + * drops to ~0 and throughput rises ~2x at {@code @Threads(8)} — the allocation win surfacing as + * throughput. At {@code @Fork(5)} the error tightens (the earlier {@code @Fork(2)} spread was + * bimodal JIT, not signal); the allocation delta is exact and the throughput gap clears it. */ -@Fork(2) +@Fork(5) @Warmup(iterations = 2) @Measurement(iterations = 5) @Threads(8)