diff --git a/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java new file mode 100644 index 00000000000..1c556646167 --- /dev/null +++ b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterGetFirstWordBenchmark.java @@ -0,0 +1,82 @@ +package datadog.trace.instrumentation.jdbc; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Benchmark for {@link SQLCommenter#getFirstWord(String)} -- the per-{@code inject} first-word + * scan. + * + *

What we're measuring. {@code getFirstWord} used to return {@code sql.substring(b, e)} -- + * a fresh {@code String} (with its own backing array) allocated on every {@code inject} call, just + * to {@code startsWith}/{@code equalsIgnoreCase} it. It now returns a zero-copy {@code SubSequence} + * view. The question is empirical: does escape analysis elide the view in the transient consumption + * (→ 0 B/op), while the old {@code substring} always allocated? + * + *

Honest EA measurement. The view is consumed exactly as {@code inject} consumes it -- a + * boolean decision ({@code startsWith("{")}) -- and the benchmark returns that boolean. It does NOT + * return/Blackhole the view itself, which would force it to escape and fake away the very EA win + * under test. The chained {@code getFirstWord(sql).startsWith("{")} (no typed local) also lets one + * source compile both before (String.startsWith) and after (SubSequence.startsWith), so before/after + * is a clean toggle of the production method. + * + *

Run at {@code @Threads(8)} so the allocation delta surfaces as throughput; {@code -prof gc} + * (gc.alloc.rate.norm) is the headline mechanism and is fork-stable. + * + *

+ *   ./gradlew :dd-java-agent:instrumentation:jdbc:jmh   # add -prof gc
+ * 
+ * + *

Results (JDK 17, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof + * gc}): + * + *

+ *                        throughput            gc.alloc.rate.norm
+ *   before (substring)   258.1M ± 21.0M ops/s   48 B/op
+ *   after  (SubSequence) 508.0M ± 21.6M ops/s   ~0 B/op  (10^-4)
+ * 
+ * + * Escape analysis fully elides the view in the transient consumption (it never escapes the + * decision), so the per-call 48 B/op of the old {@code substring} (a String + its backing array) + * drops to ~0 and throughput rises ~2x at {@code @Threads(8)} — the allocation win surfacing as + * throughput. At {@code @Fork(5)} the error tightens (the earlier {@code @Fork(2)} spread was + * bimodal JIT, not signal); the allocation delta is exact and the throughput gap clears it. + */ +@Fork(5) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +@Threads(8) +public class SQLCommenterGetFirstWordBenchmark { + + // Representative first-word shapes: plain keywords, a stored-proc brace, a CALL, leading space. + static final String[] SQL = { + "SELECT * FROM foo WHERE id = 42", + "{call dogshelterProc(?, ?)}", + "CALL dogshelterProc(?, ?)", + "UPDATE accounts SET balance = balance - 100 WHERE id = 42", + " INSERT INTO logs VALUES (?)", + }; + + /** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */ + @State(Scope.Thread) + public static class Cursor { + int index = 0; + + String next() { + int i = index; + index = (i + 1) % SQL.length; + return SQL[i]; + } + } + + @Benchmark + public boolean firstWordCheck(Cursor cursor) { + // Mirrors inject(): take the first word, make a boolean decision, discard it. + return SQLCommenter.getFirstWord(cursor.next()).startsWith("{"); + } +} diff --git a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java index 3171550aba3..87261cdcd63 100644 --- a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java +++ b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java @@ -1,6 +1,7 @@ package datadog.trace.instrumentation.jdbc; import datadog.trace.bootstrap.instrumentation.dbm.SharedDBCommenter; +import datadog.trace.util.SubSequence; public class SQLCommenter { // SQL-specific constants, rest defined in SharedDBCommenter @@ -15,7 +16,17 @@ public class SQLCommenter { private static final int BUFFER_EXTRA = 4; private static final int SQL_COMMENT_OVERHEAD = SPACE_CHARS + COMMENT_DELIMITERS + BUFFER_EXTRA; - protected static String getFirstWord(String sql) { + /** + * Returns the first whitespace-delimited word of {@code sql} as a zero-copy {@link SubSequence} + * -- avoiding a substring allocation on every {@link #inject} call, since the callers only need + * to {@code startsWith}/{@code equalsIgnoreCase} it. + * + *

Transient view -- do not retain. The returned {@code SubSequence} references the + * entire {@code sql} string, so holding onto it keeps the whole query reachable (a memory hazard + * for large SQL). Consume it in place and discard; if the value must be retained, call {@link + * SubSequence#toString()} to detach a standalone copy. + */ + protected static SubSequence getFirstWord(String sql) { int beginIndex = 0; while (beginIndex < sql.length() && Character.isWhitespace(sql.charAt(beginIndex))) { beginIndex++; @@ -24,7 +35,7 @@ protected static String getFirstWord(String sql) { while (endIndex < sql.length() && !Character.isWhitespace(sql.charAt(endIndex))) { endIndex++; } - return sql.substring(beginIndex, endIndex); + return SubSequence.of(sql, beginIndex, endIndex); } public static String inject( @@ -40,7 +51,7 @@ public static String inject( } boolean appendComment = preferAppend; if (dbType != null) { - final String firstWord = getFirstWord(sql); + final SubSequence firstWord = getFirstWord(sql); // The Postgres JDBC parser doesn't allow SQL comments anywhere in a JDBC // callable statements diff --git a/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java b/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java index 5383bf03921..c006fef5b89 100644 --- a/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java +++ b/dd-java-agent/instrumentation/jdbc/src/test/java/datadog/trace/instrumentation/jdbc/SQLCommenterTest.java @@ -29,7 +29,7 @@ class SQLCommenterTest extends AbstractInstrumentationTest { @MethodSource("testFindFirstWordArguments") void testFindFirstWord(String scenario, String sql, String firstWord) { // when - String word = SQLCommenter.getFirstWord(sql); + String word = SQLCommenter.getFirstWord(sql).toString(); // then assertEquals(firstWord, word); diff --git a/internal-api/src/main/java/datadog/trace/util/SubSequence.java b/internal-api/src/main/java/datadog/trace/util/SubSequence.java index 3cda505d1db..37e18327d8e 100644 --- a/internal-api/src/main/java/datadog/trace/util/SubSequence.java +++ b/internal-api/src/main/java/datadog/trace/util/SubSequence.java @@ -109,6 +109,37 @@ public final boolean equals(CharSequence that) { return true; } + /** Case-insensitive content comparison; mirrors {@link String#equalsIgnoreCase(String)}. */ + public final boolean equalsIgnoreCase(CharSequence that) { + int len = this.length(); + if (that == null || len != that.length()) return false; + + for (int i = 0; i < len; ++i) { + char a = this.charAt(i); + char b = that.charAt(i); + if (a != b) { + // Same two-way fold String.regionMatches(ignoreCase) uses (handles locale edge cases). + char au = Character.toUpperCase(a); + char bu = Character.toUpperCase(b); + if (au != bu && Character.toLowerCase(au) != Character.toLowerCase(bu)) { + return false; + } + } + } + return true; + } + + /** True if this sub-sequence begins with {@code prefix} (content comparison, no allocation). */ + public final boolean startsWith(CharSequence prefix) { + int prefixLen = prefix.length(); + if (prefixLen > this.length()) return false; + + for (int i = 0; i < prefixLen; ++i) { + if (this.charAt(i) != prefix.charAt(i)) return false; + } + return true; + } + @Override public String toString() { String cached = this.cachedSubstr; diff --git a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java index 78d52fa6931..86e27039de6 100644 --- a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java +++ b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java @@ -1,6 +1,7 @@ package datadog.trace.util; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -108,4 +109,29 @@ public void appendToBuilder() { subSeq.appendTo(builder1); assertEquals(expectedStr, builder1.toString()); } + + @Test + public void equalsIgnoreCase() { + SubSequence call = SubSequence.of("xx CALL yy", 3, 7); // "CALL" + assertTrue(call.equalsIgnoreCase("call")); + assertTrue(call.equalsIgnoreCase("CALL")); + assertTrue(call.equalsIgnoreCase("CaLl")); + assertFalse(call.equalsIgnoreCase("calls")); // length differs + assertFalse(call.equalsIgnoreCase("cant")); // same length, content differs + + // case-sensitive equals stays case-sensitive + assertFalse(call.equals("call")); + assertTrue(call.equals("CALL")); + } + + @Test + public void startsWith() { + SubSequence braceCall = SubSequence.of("xx{call} yy", 2, 7); // "{call" + assertTrue(braceCall.startsWith("")); + assertTrue(braceCall.startsWith("{")); + assertTrue(braceCall.startsWith("{ca")); + assertTrue(braceCall.startsWith("{call")); + assertFalse(braceCall.startsWith("call")); // not the prefix + assertFalse(braceCall.startsWith("{calls and more")); // prefix longer than sequence + } }