Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e729013
Adding to StringUtiils
dougqh Feb 19, 2026
b0a04fc
Checking hashCode
dougqh Feb 19, 2026
f75da1c
Reducing whitespace
dougqh Feb 19, 2026
ab70701
Fixing ugly spotless formatting
dougqh Feb 19, 2026
fb3ae00
Fixing ugly spotless formatting
dougqh Feb 19, 2026
a71b449
Renaming benchmark method
dougqh Feb 19, 2026
aa070b4
Clarifying Javadoc
dougqh Feb 19, 2026
fa1b8bd
grammar
dougqh Feb 19, 2026
2822711
Allowing forbidden API usage
dougqh Feb 23, 2026
6f9ca53
Merge branch 'master' into dougqh/strings-improvements
dougqh Feb 23, 2026
d6623eb
Merge branch 'master' into dougqh/strings-improvements
dougqh Feb 24, 2026
1beb704
Adding String.replace to the benchmark
dougqh Feb 24, 2026
f516f32
grammar
dougqh Feb 24, 2026
a149be6
Added comment explaining implementation approach
dougqh Feb 25, 2026
479a3e0
Clean-up imports
dougqh Feb 26, 2026
c03a85a
spotless
dougqh Feb 27, 2026
018496b
Merge branch 'master' into dougqh/strings-improvements
dougqh Feb 27, 2026
0bfdbd4
Fixing package after moving code from stand-alone reproduction
dougqh Feb 27, 2026
80b454c
Merge branch 'dougqh/strings-improvements' of github.com:DataDog/dd-t…
dougqh Feb 27, 2026
606c5fd
Add motivation to SubSequence Javadoc (why avoid the substring alloca…
dougqh Jun 24, 2026
db6ebcd
Merge remote-tracking branch 'origin/master' into dougqh/strings-impr…
dougqh Jun 24, 2026
9e35178
Hoist containsTraceComment "<key>=" needles to constants
dougqh Jun 24, 2026
046ed91
Add SharedDBCommenterBenchmark for containsTraceComment
dougqh Jun 24, 2026
7ba4558
Merge branch 'master' into dougqh/strings-improvements
dougqh Jun 24, 2026
2480046
Check the DBM comment region in place, dropping the extractCommentCon…
dougqh Jun 24, 2026
d1a0e4f
Merge remote-tracking branch 'origin/dougqh/strings-improvements' int…
dougqh Jun 24, 2026
e79eb97
Read the comment-region check as a SubSequence view (the followable i…
dougqh Jun 24, 2026
454802b
Add SQLCommenterDuplicateCommentBenchmark (substring 140 -> 0 B/op)
dougqh Jun 24, 2026
bd0983f
Refresh SQLCommenterDuplicateCommentBenchmark results to JDK 17 @Fork(5)
dougqh Jun 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package datadog.trace.bootstrap.instrumentation.dbm;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

/**
* Benchmark for {@link SharedDBCommenter#containsTraceComment(String)} — the per-query check run
* during inject (via {@code hasDDComment}) to avoid double-commenting an already-tagged statement.
*
* <p><b>What we're measuring.</b> {@code containsTraceComment} currently does {@code
* commentContent.contains(KEY + "=")} for nine keys. The keys are {@code static final} but assigned
* via {@code encode(...)}, so they are <i>not</i> compile-time constants — each {@code KEY + "="}
* is a fresh {@code StringBuilder} concat on every call. A non-matching comment runs all nine
* checks = nine throwaway Strings per call. The proposed fix precomputes nine {@code KEY_EQ}
* constants once.
*
* <p><b>How we make the win visible (our usual approach).</b> Run at {@code @Threads(8)} so the
* allocation churn manifests as a <i>throughput</i> delta — GC is a shared-heap tax, so a
* single-threaded run (cheap TLAB bumps) hides it, while concurrent allocation across threads
* drives GC pauses that every thread pays. Read the ops/s delta as the headline win. Corroborate
* the mechanism with the GC profiler: {@code -prof gc} → {@code gc.alloc.rate.norm} (B/op) should
* drop by ~nine small Strings per call on the non-matching path.
*
* <p><b>Protocol.</b> Run this on the current code (baseline), then after the {@code
* KEY_EQ}-constant fix, and compare. The input mix is mostly non-DD comments (the common case —
* they run all nine checks, the exact all-nine-concats path the fix removes); the DD comment
* short-circuits on the first check.
*
* <pre>
* # agent-bootstrap has no -Pjmh.includes wiring yet (a generalization is in flight), so for now
* # either run the whole module (only a handful of benchmarks) ...
* ./gradlew :dd-java-agent:agent-bootstrap:jmh
* # ... or hack a temporary filter into agent-bootstrap/build.gradle: jmh { includes = ['SharedDBCommenter.*'] }
* # add -prof gc (gc.alloc.rate.norm) to corroborate the allocation delta.
* </pre>
*
* <p><b>Results</b> (JDK 25, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof
* gc}):
*
* <pre>
* throughput gc.alloc.rate.norm
* before (concat) 29.7M ± 1.7M ops/s 156 B/op
* after (*_EQ) 55.4M ± 2.5M ops/s ~0 B/op (10^-5)
* </pre>
*
* Removing the per-call concatenation drops allocation to ~0 and lifts throughput ~1.9x at
* {@code @Threads(8)} — the allocation win surfacing as throughput, exactly as intended; {@code
* -prof gc} confirms the mechanism (156 -> 0 B/op).
*/
@Fork(5)
@Warmup(iterations = 2)
@Measurement(iterations = 5)
@Threads(8)
public class SharedDBCommenterBenchmark {

// Inner comment content (the surrounding "/*" "*/" already stripped by extractCommentContent),
// as a realistic mix: most queries carry a non-DD comment (or none); some already have ours.
static final String[] COMMENT_CONTENTS = {
"app generated comment", // non-DD -> all 9 contains checks (9 concats)
"route='/api/v1/users',batch=true", // non-DD
"framework='hibernate',layer='orm'", // non-DD
"ddps='web',dddbs='orders',traceparent='00-abc-def-01'", // DD -> short-circuits on 1st check
};

/** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */
@State(Scope.Thread)
public static class Cursor {
int index = 0;

String next() {
int i = index;
index = (i + 1) % COMMENT_CONTENTS.length;
return COMMENT_CONTENTS[i];
}
}

@Benchmark
public boolean containsTraceComment(Cursor cursor) {
return SharedDBCommenter.containsTraceComment(cursor.next());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import datadog.trace.api.internal.VisibleForTesting;
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
import datadog.trace.bootstrap.instrumentation.api.Tags;
import datadog.trace.util.SubSequence;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
Expand All @@ -33,25 +34,49 @@ public class SharedDBCommenter {
private static final String TRACEPARENT = encode("traceparent");
private static final String DD_SERVICE_HASH = encode("ddsh");

// Pre-built "<key>=" needles for containsTraceComment, computed once at class init. The keys
// are assigned via encode(...), so "KEY + =" is a runtime concat, not a compile-time constant;
// doing it per call allocated nine throwaway Strings on every non-matching check.
private static final String PARENT_SERVICE_EQ = PARENT_SERVICE + "=";
private static final String DATABASE_SERVICE_EQ = DATABASE_SERVICE + "=";
private static final String DD_HOSTNAME_EQ = DD_HOSTNAME + "=";
private static final String DD_DB_NAME_EQ = DD_DB_NAME + "=";
private static final String DD_PEER_SERVICE_EQ = DD_PEER_SERVICE + "=";
private static final String DD_ENV_EQ = DD_ENV + "=";
private static final String DD_VERSION_EQ = DD_VERSION + "=";
private static final String TRACEPARENT_EQ = TRACEPARENT + "=";
private static final String DD_SERVICE_HASH_EQ = DD_SERVICE_HASH + "=";

// Pre-encoded "key='encoded_value'" fragments for the invariant fields (the values
// come from Config are effectively immutable post-init in production).
// Note about the visibility: needs to be visible but can tolerate races (reason why it's not
// atomic)
private static volatile boolean staticPrefixComputed = false;
private static volatile String staticPrefix;

// Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection
// Note: this should be "better" done and avoid this bunch of string contains/concatenation
// Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection. Mongo
// passes the already-extracted comment body; SQLCommenter uses the range overload to check it
// in place. Both run the same nine "<key>=" needle checks.
public static boolean containsTraceComment(String commentContent) {
return commentContent.contains(PARENT_SERVICE + "=")
|| commentContent.contains(DATABASE_SERVICE + "=")
|| commentContent.contains(DD_HOSTNAME + "=")
|| commentContent.contains(DD_DB_NAME + "=")
|| commentContent.contains(DD_PEER_SERVICE + "=")
|| commentContent.contains(DD_ENV + "=")
|| commentContent.contains(DD_VERSION + "=")
|| commentContent.contains(TRACEPARENT + "=")
|| commentContent.contains(DD_SERVICE_HASH + "=");
return containsTraceComment(commentContent, 0, commentContent.length());
}

/**
* Range overload: true if {@code sql} contains a trace-comment needle fully within {@code [from,
* to)} -- checks the comment body in place, with no substring allocation of the region.
*/
public static boolean containsTraceComment(String sql, int from, int to) {
// Zero-copy view of the comment body; reads like ordinary String.contains, no substring.
SubSequence comment = SubSequence.of(sql, from, to);
return comment.contains(PARENT_SERVICE_EQ)
|| comment.contains(DATABASE_SERVICE_EQ)
|| comment.contains(DD_HOSTNAME_EQ)
|| comment.contains(DD_DB_NAME_EQ)
|| comment.contains(DD_PEER_SERVICE_EQ)
|| comment.contains(DD_ENV_EQ)
|| comment.contains(DD_VERSION_EQ)
|| comment.contains(TRACEPARENT_EQ)
|| comment.contains(DD_SERVICE_HASH_EQ);
}

// Build database comment content without comment delimiters such as /* */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package datadog.trace.bootstrap.instrumentation.dbm;

import static datadog.trace.bootstrap.instrumentation.dbm.SharedDBCommenter.containsTraceComment;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.junit.jupiter.api.Test;

/**
* DB-level behavior of {@link SharedDBCommenter#containsTraceComment}: the nine "<key>=" needle
* set, the {@code String} delegate, and the range overload checking the comment body in place. (The
* {@code [from, to)} boundary semantics are unit-tested on {@code Strings.regionContains}.)
*/
class SharedDBCommenterContainsTraceCommentTest {

@Test
void delegate_wholeString() {
assertTrue(containsTraceComment("ddps='svc',dde='test'"));
assertFalse(containsTraceComment("just a plain comment"));
assertFalse(containsTraceComment(""));
}

@Test
void range_needleInsideCommentBody() {
String sql = "SELECT 1 /*ddps='svc',dde='test'*/";
int from = sql.indexOf("/*") + 2;
int to = sql.indexOf("*/");
assertTrue(containsTraceComment(sql, from, to));
}

@Test
void range_nonDdCommentBody() {
String sql = "SELECT 1 /* just a customer comment */";
int from = sql.indexOf("/*") + 2;
int to = sql.indexOf("*/");
assertFalse(containsTraceComment(sql, from, to));
}

@Test
void range_ddNeedleOutsideCommentRegionNotMatched() {
// The DD needle sits in the statement body, not the comment region we pass -- a whole-string
// contains would false-positive; the range check must scope to [from, to).
String sql = "ddps='x' /* clean */";
int from = sql.indexOf("/*") + 2;
int to = sql.indexOf("*/");
assertFalse(containsTraceComment(sql, from, to));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package datadog.trace.instrumentation.jdbc;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

/**
* Benchmark for the duplicate-comment guard in {@link SQLCommenter#inject} -- the {@code
* hasDDComment} path that avoids double-commenting an already-instrumented statement.
*
* <p><b>What we're measuring.</b> The guard used to materialize {@code sql.substring(commentStart,
* commentEnd)} (the comment body) just to scan it for trace-comment needles. (B) checks the comment
* region in place via {@code SharedDBCommenter.containsTraceComment(sql, from, to)} -- no
* substring.
*
* <p><b>Isolation.</b> The substring only happens when the SQL already carries a comment in the
* checked position; for a DD comment {@code inject} then returns early. Passing {@code dbType=null}
* skips the first-word scan (benchmarked separately for the {@code getFirstWord} change), so over
* already-DD-commented SQL the <i>only</i> allocation left in {@code inject} is the substring (B)
* removes. Run at {@code @Threads(8)} with {@code -prof gc}.
*
* <pre>
* ./gradlew :dd-java-agent:instrumentation:jdbc:jmh # add -prof gc
* </pre>
*
* <p><b>Results</b> (JDK 17, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof
* gc}):
*
* <pre>
* throughput gc.alloc.rate.norm
* before (substring) 23.5M ± 1.1M ops/s 140 B/op
* after (range/view) 26.2M ± 1.5M ops/s ~0 B/op (10^-5)
* </pre>
*
* The extractCommentContent substring (140 B/op) is gone -- the in-place range scan and the
* SubSequence view it flows through are both EA-elided. The allocation delta is exact and
* fork-stable; that's the win. At {@code @Fork(5)} the spread tightens and a small throughput
* uplift (~1.1x) resolves -- but this path is dominated by the nine indexOf scans (CPU the
* alloc-removal doesn't touch), so the headline win is the allocation, a small cut that compounds
* across comment-bearing injects, not a per-call throughput jump.
*/
@Fork(5)
@Warmup(iterations = 2)
@Measurement(iterations = 5)
@Threads(8)
public class SQLCommenterDuplicateCommentBenchmark {

// Already-DD-commented SQL (append style, comment at the end). First needle hits at different
// depths: ddps first (cheap), traceparent-only (scans 8 before the match).
static final String[] SQL = {
"SELECT * FROM foo /*ddps='svc',dde='test',dddbs='mydb',ddh='h',dddb='n',traceparent='00-00000000000000007fffffffffffffff-000000024cb016ea-00'*/",
"SELECT * FROM bar WHERE id = 42 /*traceparent='00-00000000000000007fffffffffffffff-000000024cb016ea-01'*/",
};

/** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */
@State(Scope.Thread)
public static class Cursor {
int index = 0;

String next() {
int i = index;
index = (i + 1) % SQL.length;
return SQL[i];
}
}

@Benchmark
public boolean alreadyCommented(Cursor cursor) {
// dbType=null skips the first-word scan; the DD comment makes inject return early after the
// duplicate-comment check -- the path (B) optimizes. Returns the input sql (no new String).
return SQLCommenter.inject(cursor.next(), "mydb", null, "h", "n", null, true) != null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,6 @@ private static boolean hasDDComment(String sql, boolean appendComment) {
return false;
}

String commentContent = extractCommentContent(sql, appendComment);
return SharedDBCommenter.containsTraceComment(commentContent);
}

private static String extractCommentContent(String sql, boolean appendComment) {
int startIdx;
int endIdx;
if (appendComment) {
Expand All @@ -127,9 +122,10 @@ private static String extractCommentContent(String sql, boolean appendComment) {
endIdx = sql.indexOf(CLOSE_COMMENT);
}
if (startIdx != -1 && endIdx != -1 && endIdx > startIdx) {
return sql.substring(startIdx + OPEN_COMMENT_LEN, endIdx);
// Check the comment body in place -- no substring of the comment region.
return SharedDBCommenter.containsTraceComment(sql, startIdx + OPEN_COMMENT_LEN, endIdx);
}
return "";
return false;
}

/**
Expand Down
Loading