diff --git a/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterBenchmark.java b/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterBenchmark.java new file mode 100644 index 00000000000..20276db1412 --- /dev/null +++ b/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterBenchmark.java @@ -0,0 +1,86 @@ +package datadog.trace.bootstrap.instrumentation.dbm; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Benchmark for {@link SharedDBCommenter#containsTraceComment(String)} — the per-query check run + * during inject (via {@code hasDDComment}) to avoid double-commenting an already-tagged statement. + * + *
What we're measuring. {@code containsTraceComment} currently does {@code + * commentContent.contains(KEY + "=")} for nine keys. The keys are {@code static final} but assigned + * via {@code encode(...)}, so they are not compile-time constants — each {@code KEY + "="} + * is a fresh {@code StringBuilder} concat on every call. A non-matching comment runs all nine + * checks = nine throwaway Strings per call. The proposed fix precomputes nine {@code KEY_EQ} + * constants once. + * + *
How we make the win visible (our usual approach). Run at {@code @Threads(8)} so the + * allocation churn manifests as a throughput delta — GC is a shared-heap tax, so a + * single-threaded run (cheap TLAB bumps) hides it, while concurrent allocation across threads + * drives GC pauses that every thread pays. Read the ops/s delta as the headline win. Corroborate + * the mechanism with the GC profiler: {@code -prof gc} → {@code gc.alloc.rate.norm} (B/op) should + * drop by ~nine small Strings per call on the non-matching path. + * + *
Protocol. Run this on the current code (baseline), then after the {@code + * KEY_EQ}-constant fix, and compare. The input mix is mostly non-DD comments (the common case — + * they run all nine checks, the exact all-nine-concats path the fix removes); the DD comment + * short-circuits on the first check. + * + *
+ * # agent-bootstrap has no -Pjmh.includes wiring yet (a generalization is in flight), so for now
+ * # either run the whole module (only a handful of benchmarks) ...
+ * ./gradlew :dd-java-agent:agent-bootstrap:jmh
+ * # ... or hack a temporary filter into agent-bootstrap/build.gradle: jmh { includes = ['SharedDBCommenter.*'] }
+ * # add -prof gc (gc.alloc.rate.norm) to corroborate the allocation delta.
+ *
+ *
+ * Results (JDK 25, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof + * gc}): + * + *
+ * throughput gc.alloc.rate.norm + * before (concat) 29.7M ± 1.7M ops/s 156 B/op + * after (*_EQ) 55.4M ± 2.5M ops/s ~0 B/op (10^-5) + *+ * + * Removing the per-call concatenation drops allocation to ~0 and lifts throughput ~1.9x at + * {@code @Threads(8)} — the allocation win surfacing as throughput, exactly as intended; {@code + * -prof gc} confirms the mechanism (156 -> 0 B/op). + */ +@Fork(5) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +@Threads(8) +public class SharedDBCommenterBenchmark { + + // Inner comment content (the surrounding "/*" "*/" already stripped by extractCommentContent), + // as a realistic mix: most queries carry a non-DD comment (or none); some already have ours. + static final String[] COMMENT_CONTENTS = { + "app generated comment", // non-DD -> all 9 contains checks (9 concats) + "route='/api/v1/users',batch=true", // non-DD + "framework='hibernate',layer='orm'", // non-DD + "ddps='web',dddbs='orders',traceparent='00-abc-def-01'", // DD -> short-circuits on 1st check + }; + + /** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */ + @State(Scope.Thread) + public static class Cursor { + int index = 0; + + String next() { + int i = index; + index = (i + 1) % COMMENT_CONTENTS.length; + return COMMENT_CONTENTS[i]; + } + } + + @Benchmark + public boolean containsTraceComment(Cursor cursor) { + return SharedDBCommenter.containsTraceComment(cursor.next()); + } +} diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java index 3dbf916362c..ccd3bad8505 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java @@ -7,6 +7,7 @@ import datadog.trace.api.internal.VisibleForTesting; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.Tags; +import datadog.trace.util.SubSequence; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -33,6 +34,19 @@ public class SharedDBCommenter { private static final String TRACEPARENT = encode("traceparent"); private static final String DD_SERVICE_HASH = encode("ddsh"); + // Pre-built "
What we're measuring. The guard used to materialize {@code sql.substring(commentStart, + * commentEnd)} (the comment body) just to scan it for trace-comment needles. (B) checks the comment + * region in place via {@code SharedDBCommenter.containsTraceComment(sql, from, to)} -- no + * substring. + * + *
Isolation. The substring only happens when the SQL already carries a comment in the + * checked position; for a DD comment {@code inject} then returns early. Passing {@code dbType=null} + * skips the first-word scan (benchmarked separately for the {@code getFirstWord} change), so over + * already-DD-commented SQL the only allocation left in {@code inject} is the substring (B) + * removes. Run at {@code @Threads(8)} with {@code -prof gc}. + * + *
+ * ./gradlew :dd-java-agent:instrumentation:jdbc:jmh # add -prof gc + *+ * + *
Results (JDK 17, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof + * gc}): + * + *
+ * throughput gc.alloc.rate.norm + * before (substring) 23.5M ± 1.1M ops/s 140 B/op + * after (range/view) 26.2M ± 1.5M ops/s ~0 B/op (10^-5) + *+ * + * The extractCommentContent substring (140 B/op) is gone -- the in-place range scan and the + * SubSequence view it flows through are both EA-elided. The allocation delta is exact and + * fork-stable; that's the win. At {@code @Fork(5)} the spread tightens and a small throughput + * uplift (~1.1x) resolves -- but this path is dominated by the nine indexOf scans (CPU the + * alloc-removal doesn't touch), so the headline win is the allocation, a small cut that compounds + * across comment-bearing injects, not a per-call throughput jump. + */ +@Fork(5) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +@Threads(8) +public class SQLCommenterDuplicateCommentBenchmark { + + // Already-DD-commented SQL (append style, comment at the end). First needle hits at different + // depths: ddps first (cheap), traceparent-only (scans 8 before the match). + static final String[] SQL = { + "SELECT * FROM foo /*ddps='svc',dde='test',dddbs='mydb',ddh='h',dddb='n',traceparent='00-00000000000000007fffffffffffffff-000000024cb016ea-00'*/", + "SELECT * FROM bar WHERE id = 42 /*traceparent='00-00000000000000007fffffffffffffff-000000024cb016ea-01'*/", + }; + + /** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */ + @State(Scope.Thread) + public static class Cursor { + int index = 0; + + String next() { + int i = index; + index = (i + 1) % SQL.length; + return SQL[i]; + } + } + + @Benchmark + public boolean alreadyCommented(Cursor cursor) { + // dbType=null skips the first-word scan; the DD comment makes inject return early after the + // duplicate-comment check -- the path (B) optimizes. Returns the input sql (no new String). + return SQLCommenter.inject(cursor.next(), "mydb", null, "h", "n", null, true) != null; + } +} diff --git a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java index 3171550aba3..9c8e81c3036 100644 --- a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java +++ b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java @@ -112,11 +112,6 @@ private static boolean hasDDComment(String sql, boolean appendComment) { return false; } - String commentContent = extractCommentContent(sql, appendComment); - return SharedDBCommenter.containsTraceComment(commentContent); - } - - private static String extractCommentContent(String sql, boolean appendComment) { int startIdx; int endIdx; if (appendComment) { @@ -127,9 +122,10 @@ private static String extractCommentContent(String sql, boolean appendComment) { endIdx = sql.indexOf(CLOSE_COMMENT); } if (startIdx != -1 && endIdx != -1 && endIdx > startIdx) { - return sql.substring(startIdx + OPEN_COMMENT_LEN, endIdx); + // Check the comment body in place -- no substring of the comment region. + return SharedDBCommenter.containsTraceComment(sql, startIdx + OPEN_COMMENT_LEN, endIdx); } - return ""; + return false; } /** diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java new file mode 100644 index 00000000000..492b56bb654 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java @@ -0,0 +1,109 @@ +package datadog.trace.util; + +import de.thetaphi.forbiddenapis.SuppressForbidden; +import java.util.regex.Pattern; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + *
For simple replacements, Strings.replaceAll is recommened. + * + *
+ * For simple replacements, Strings.replaceAll or String.replace out performs the regex based + * methods String.replaceAll and regex.Matcher.replaceAll by 3x in terms of throughput. + * + *
String.replace and Strings.replaceAll also require less allocation. + * + *
Strings.replaceAll out performs String.replace by 1.2x in terms of throughput, + * but results may vary depending on the JVM version being used. + * + *
When pattern matching is needed, compiling the regex to Pattern slightly improves overhead,
+ * but dramatically reduces memory allocation to 1/4x of String.replaceAll. NOTE: The higher allocation rate is misleading because 5x the work was performed. After
+ * accounting for the 5x throughput difference, the actual allocation rate is 0.25x that of
+ * String.substring or String.subSequence / SubSequence.of. {@code indexOf} returns the earliest occurrence at or after {@code beginIndex}; if that one
+ * overshoots {@code endIndex} there is no earlier full occurrence in range, so the bound check is
+ * exact.
+ */
+ public static boolean regionContains(String s, int beginIndex, int endIndex, String needle) {
+ int idx = s.indexOf(needle, beginIndex);
+ return idx >= 0 && idx + needle.length() <= endIndex;
+ }
+
+ /** Low overhead replaceAll */
+ public static final String replaceAll(String input, String needle, String replacement) {
+ int index = input.indexOf(needle);
+ if (index == -1) return input;
+
+ int needleLen = needle.length();
+
+ StringBuilder builder = new StringBuilder(input.length() + 10);
+ builder.append(input, 0, index);
+ builder.append(replacement);
+
+ int prevIndex = index;
+ index = input.indexOf(needle, index + needleLen);
+ for (; index != -1; prevIndex = index, index = input.indexOf(needle, index + needleLen)) {
+ builder.append(input, prevIndex + needleLen, index);
+ builder.append(replacement);
+ }
+ builder.append(input, prevIndex + needleLen, input.length());
+
+ return builder.toString();
+ }
+
+ /**
+ * Provides a SubSequence which a view into the provided String Unlike String.subSequence (which
+ * is usually just a wrapper around String.substring), this routine doesn't allocate a new String
+ * or byte[]/char[].
+ */
+ public static final SubSequence subSequence(String str, int beginIndex) {
+ return new SubSequence(str, beginIndex, str.length());
+ }
+
+ /**
+ * Provides a SubSequence which a view into the provided String Unlike String.subSequence (which
+ * is usually just a wrapper around String.substring), this routine doesn't allocate a new Why that matters:
+ * MacBook M1 with 8 threads (Java 21)
+ *
+ *
+ * MacBook M1 - 8 Threads - Java 21
+ *
+ * StringReplaceAllBenchmark.regex_replaceAll thrpt 6 15500559.098 ± 8640183.754 ops/s
+ * StringReplaceAllBenchmark.regex_replaceAll:gc.alloc.rate thrpt 6 4516.464 ± 2561.063 MB/sec
+ *
+ * StringReplaceAllBenchmark.string_replace thrpt 6 35429131.963 ± 3203548.932 ops/s
+ * StringReplaceAllBenchmark.string_replace:gc.alloc.rate thrpt 6 3185.108 ± 152.601 MB/sec
+ *
+ * StringReplaceAllBenchmark.string_replaceAll thrpt 6 14253964.929 ± 4060225.866 ops/s
+ * StringReplaceAllBenchmark.string_replaceAll:gc.alloc.rate thrpt 6 11114.939 ± 3129.891 MB/sec
+ *
+ * StringReplaceAllBenchmark.strings_replaceAll thrpt 6 43789250.524 ± 1910948.420 ops/s
+ * StringReplaceAllBenchmark.strings_replaceAll:gc.alloc.rate thrpt 6 3079.973 ± 134.617 MB/sec
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+@SuppressForbidden
+public class StringReplaceAllBenchmark {
+ static final String[] INPUTS = {
+ "foo",
+ "baz",
+ "foobar",
+ "foobaz",
+ "foo=baz",
+ "bar=foo",
+ "foo=foo&bar=foo",
+ "lorem ipsum",
+ "datadog"
+ };
+
+ static int sharedInputIndex = 0;
+
+ static String nextInput() {
+ int localIndex = ++sharedInputIndex;
+ if (localIndex >= INPUTS.length) {
+ sharedInputIndex = localIndex = 0;
+ }
+ return INPUTS[localIndex];
+ }
+
+ @Benchmark
+ public String string_replaceAll() {
+ return _string_replaceAll(nextInput());
+ }
+
+ static String _string_replaceAll(String input) {
+ // Underneath, this does Pattern.compile("foo").matcher(str).replaceAll()
+ return input.replaceAll("foo", "*redacted*");
+ }
+
+ @Benchmark
+ public String string_replace() {
+ return _string_replace(nextInput());
+ }
+
+ static String _string_replace(String input) {
+ return input.replace("foo", "*redacted*");
+ }
+
+ static final Pattern REGEX_COMPILED = Pattern.compile("foo");
+
+ @Benchmark
+ public String regex_replaceAll() {
+ return _regex_replaceAll(nextInput());
+ }
+
+ static String _regex_replaceAll(String input) {
+ return REGEX_COMPILED.matcher(input).replaceAll("*redcated*");
+ }
+
+ @Benchmark
+ public String strings_replaceAll() {
+ return _strings_replaceAll(nextInput());
+ }
+
+ static String _strings_replaceAll(String input) {
+ return Strings.replaceAll(input, "foo", "*redacted*");
+ }
+}
diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java
new file mode 100644
index 00000000000..584c375cdea
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java
@@ -0,0 +1,94 @@
+package datadog.trace.util;
+
+import de.thetaphi.forbiddenapis.SuppressForbidden;
+import java.util.regex.Pattern;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Strings.split is generally faster for String processing, since it creates SubSequences that are
+ * views into the backing String rather than new String objects.
+ * Benchmark (testStr) Mode Cnt Score Error Units
+ * StringSplitBenchmark.pattern_split EMPTY thrpt 6 291274421.621 ± 14834420.899 ops/s
+ * StringSplitBenchmark.string_split EMPTY thrpt 6 1035461179.368 ± 60212686.921 ops/s
+ * StringSplitBenchmark.strings_split EMPTY thrpt 6 8161781738.019 ± 178530888.497 ops/s
+ *
+ * StringSplitBenchmark.pattern_split TRIVIAL thrpt 6 83982270.075 ± 10250565.633 ops/s
+ * StringSplitBenchmark.string_split TRIVIAL thrpt 6 848615850.339 ± 42453569.634 ops/s
+ * StringSplitBenchmark.strings_split TRIVIAL thrpt 6 1765290890.948 ± 160053487.111 ops/s
+ *
+ * StringSplitBenchmark.pattern_split SMALL thrpt 6 27383819.756 ± 5454020.100 ops/s
+ * StringSplitBenchmark.string_split SMALL thrpt 6 149047480.037 ± 6124271.615 ops/s
+ * StringSplitBenchmark.strings_split SMALL thrpt 6 564058097.162 ± 49305418.971 ops/s
+ *
+ * StringSplitBenchmark.pattern_split MEDIUM thrpt 6 14879131.729 ± 1981850.920 ops/s
+ * StringSplitBenchmark.string_split MEDIUM thrpt 6 51237769.598 ± 1808521.138 ops/s
+ * StringSplitBenchmark.strings_split MEDIUM thrpt 6 176976970.705 ± 6813886.658 ops/s
+ *
+ * StringSplitBenchmark.pattern_split LARGE thrpt 6 482340.838 ± 24903.187 ops/s
+ * StringSplitBenchmark.string_split LARGE thrpt 6 2460212.879 ± 86911.652 ops/s
+ * StringSplitBenchmark.strings_split LARGE thrpt 6 4023658.103 ± 30305.699 ops/s
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+@State(Scope.Benchmark)
+@SuppressForbidden
+public class StringSplitBenchmark {
+ public enum TestString {
+ EMPTY(""),
+ TRIVIAL("app_key=1111"),
+ SMALL("app_key=1111&foo=bar&baz=quux"),
+ MEDIUM(repeat("app_key=1111", '&', 100)),
+ LARGE(repeat("app_key=1111&application_key=2222&token=0894-4832", '&', 4096));
+
+ final String str;
+
+ TestString(String str) {
+ this.str = str;
+ }
+ };
+
+ @Param TestString testStr;
+
+ static final String repeat(String repeat, char separator, int length) {
+ StringBuilder builder = new StringBuilder(length);
+ builder.append(repeat);
+ while (builder.length() + repeat.length() + 1 < length) {
+ builder.append(separator).append(repeat);
+ }
+ return builder.toString();
+ }
+
+ @Benchmark
+ public void string_split(Blackhole bh) {
+ for (String substr : this.testStr.str.split("\\&")) {
+ bh.consume(substr);
+ }
+ }
+
+ static final Pattern PATTERN = Pattern.compile("\\&");
+
+ @Benchmark
+ public void pattern_split(Blackhole bh) {
+ for (String str : PATTERN.split(this.testStr.str)) {
+ bh.consume(str);
+ }
+ }
+
+ @Benchmark
+ public void strings_split(Blackhole bh) {
+ for (SubSequence subSeq : Strings.split(this.testStr.str, '&')) {
+ bh.consume(subSeq);
+ }
+ }
+}
diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java
new file mode 100644
index 00000000000..d24755e950b
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java
@@ -0,0 +1,64 @@
+package datadog.trace.util;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Strings.substring has 5x throughput. This is primarily achieved through less allocation.
+ *
+ *
+ * Benchmark Mode Cnt Score Error Units
+ * StringSubSequenceBenchmark.string_subSequence thrpt 6 140369998.493 ± 4387855.861 ops/s
+ * StringSubSequenceBenchmark.string_subSequence:gc.alloc.rate thrpt 6 88880.463 ± 2778.032 MB/sec
+ *
+ * StringSubSequenceBenchmark.string_substring thrpt 6 136916708.207 ± 12299226.575 ops/s
+ * StringSubSequenceBenchmark.string_substring:gc.alloc.rate thrpt 6 86689.852 ± 7777.642 MB/sec
+ *
+ * StringSubSequenceBenchmark.subSequence thrpt 6 679669385.260 ± 7194043.619 ops/s
+ * StringSubSequenceBenchmark.subSequence:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+public class StringSubSequenceBenchmark {
+ static final String LOREM_IPSUM =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
+
+ @Benchmark
+ public void string_substring(Blackhole bh) {
+ String str = LOREM_IPSUM;
+ int len = str.length();
+
+ for (int i = 0; i < str.length(); i += 100) {
+ bh.consume(str.substring(i, Math.min(i + 100, len)));
+ }
+ }
+
+ @Benchmark
+ public void string_subSequence(Blackhole bh) {
+ String str = LOREM_IPSUM;
+ int len = str.length();
+
+ for (int i = 0; i < str.length(); i += 100) {
+ bh.consume(str.subSequence(i, Math.min(i + 100, len)));
+ }
+ }
+
+ @Benchmark
+ public void subSequence(Blackhole bh) {
+ String str = LOREM_IPSUM;
+ int len = str.length();
+
+ for (int i = 0; i < str.length(); i += 100) {
+ bh.consume(SubSequence.of(str, i, Math.min(i + 100, len)));
+ }
+ }
+}
diff --git a/internal-api/src/main/java/datadog/trace/util/Strings.java b/internal-api/src/main/java/datadog/trace/util/Strings.java
index adf54c90fb2..18dc83a37b3 100644
--- a/internal-api/src/main/java/datadog/trace/util/Strings.java
+++ b/internal-api/src/main/java/datadog/trace/util/Strings.java
@@ -6,6 +6,9 @@
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
import java.util.concurrent.ThreadLocalRandom;
import javax.annotation.Nullable;
@@ -193,4 +196,161 @@ public static String coalesce(@Nullable final String first, @Nullable final Stri
return null;
}
}
+
+ /**
+ * True if {@code needle} occurs fully within {@code s[beginIndex, endIndex)} -- a range-limited,
+ * allocation-free alternative to {@code s.substring(beginIndex, endIndex).contains(needle)}.
+ *
+ *
+ * String or byte[] / char[].
+ */
+ public static final SubSequence subSequence(String str, int beginIndex, int endIndex) {
+ return new SubSequence(str, beginIndex, endIndex);
+ }
+
+ /**
+ * Provides an IterablesplitChar
+ * . Unlike other approaches to splitting, this routine doesn't allocate any new
+ * String or byte[] / char[]
+ */
+ public static final IterableCharSequence that is a view into a sub-sequence of a String. Unlike
+ * String.subSequence, this class doesn't allocate an additional String,
+ * char[], or byte[].
+ *
+ * String.substring / subSequence copy the selected
+ * range into a fresh backing array on every call, so scanning or splitting a string into many
+ * pieces — parsing headers, tags, or query strings on a hot path — allocates one intermediate
+ * String per slice. A SubSequence is a zero-copy window over the original
+ * (an offset + length into the existing backing array), so the same parse allocates nothing per
+ * slice. Use it for transient, read-only views; materialize a real String only when
+ * the value must be retained or handed off.
+ */
+public final class SubSequence implements CharSequence {
+ public static final SubSequence EMPTY = new SubSequence("", 0, 0);
+
+ /**
+ * SubSequence from beginIndex to end of str Equivalent to
+ * str.subSequence(str, startIndex)
+ */
+ public static final SubSequence of(String str, int startIndex) {
+ return new SubSequence(str, startIndex, str.length());
+ }
+
+ /**
+ * SubSequence from beginIndex inclusive to endIndex exclusive of
+ * str Equivalent to str.subSequence(str, startIndex, endIndex)
+ */
+ public static final SubSequence of(String str, int startIndex, int endIndex) {
+ return new SubSequence(str, startIndex, endIndex);
+ }
+
+ private final String str;
+ private final int beginIndex;
+ private final int endIndex;
+
+ private String cachedSubstr = null;
+
+ SubSequence(String str, int startIndex, int endIndex) {
+ this.str = str;
+ this.beginIndex = startIndex;
+ this.endIndex = endIndex;
+ }
+
+ /** Beginning index of the subseqence in the backing String - can be useful in text processing */
+ public int beginIndex() {
+ return this.beginIndex;
+ }
+
+ /** Ending index of the subsequence in the backing String - can be useful in text processing */
+ public int endIndex() {
+ return this.endIndex;
+ }
+
+ @Override
+ public char charAt(int index) {
+ return this.str.charAt(this.beginIndex + index);
+ }
+
+ @Override
+ public int length() {
+ return this.endIndex - this.beginIndex;
+ }
+
+ @Override
+ public SubSequence subSequence(int start, int end) {
+ int newBeginIndex = this.beginIndex + start;
+ int newEndIndex = this.beginIndex + start + end;
+
+ return new SubSequence(this.str, newBeginIndex, newEndIndex);
+ }
+
+ /** Appends this SubSequence to the StringBuilder Equivalent to builder.append(this) but faster */
+ public void appendTo(StringBuilder builder) {
+ int beginIndex = this.beginIndex;
+ int endIndex = this.endIndex;
+
+ // Guards against the special case empty SubSequence at this.str.length
+ if (beginIndex != endIndex) builder.append(this.str, beginIndex, endIndex);
+ }
+
+ /** Returns the hash code as backingStr.substr(beginIndex, endIndex).hashCode() */
+ @Override
+ public int hashCode() {
+ return this.toString().hashCode();
+ }
+
+ /**
+ * Also handles String comparisons this.equals(backingStr.substr(beginIndex, endIndex)) is true
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof CharSequence)) return false;
+
+ return this.equals((CharSequence) obj);
+ }
+
+ public final boolean equals(CharSequence that) {
+ int thisLen = this.length();
+ int thatLen = that.length();
+
+ if (thisLen != thatLen) return false;
+
+ for (int i = 0; i < Math.min(this.length(), that.length()); ++i) {
+ if (this.charAt(i) != that.charAt(i)) return false;
+ }
+ return true;
+ }
+
+ /**
+ * True if this sub-sequence contains {@code needle} -- the zero-copy equivalent of {@code
+ * toString().contains(needle)}, with no substring materialized.
+ */
+ public final boolean contains(String needle) {
+ return Strings.regionContains(this.str, this.beginIndex, this.endIndex, needle);
+ }
+
+ @Override
+ public String toString() {
+ String cached = this.cachedSubstr;
+ if (cached != null) return cached;
+
+ int beginIndex = this.beginIndex;
+ int endIndex = this.endIndex;
+
+ String substr = (beginIndex == endIndex) ? "" : this.str.substring(beginIndex, endIndex);
+ this.cachedSubstr = substr;
+ return substr;
+ }
+}
diff --git a/internal-api/src/test/java/datadog/trace/util/StringsRegionContainsTest.java b/internal-api/src/test/java/datadog/trace/util/StringsRegionContainsTest.java
new file mode 100644
index 00000000000..115fcd79a31
--- /dev/null
+++ b/internal-api/src/test/java/datadog/trace/util/StringsRegionContainsTest.java
@@ -0,0 +1,53 @@
+package datadog.trace.util;
+
+import static datadog.trace.util.Strings.regionContains;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.Test;
+
+/** Boundary semantics of {@link Strings#regionContains(String, int, int, String)}. */
+class StringsRegionContainsTest {
+
+ // "abXYZcd": a0 b1 X2 Y3 Z4 c5 d6 -> "XYZ" spans [2,5).
+ private static final String S = "abXYZcd";
+
+ @Test
+ void foundFullyInside() {
+ assertTrue(regionContains(S, 0, S.length(), "XYZ"));
+ }
+
+ @Test
+ void notPresent() {
+ assertFalse(regionContains(S, 0, S.length(), "QQ"));
+ }
+
+ @Test
+ void exactFit() {
+ // idx == 2, idx + len == 5 == endIndex -> included.
+ assertTrue(regionContains(S, 2, 5, "XYZ"));
+ }
+
+ @Test
+ void straddlingEndIndexExcluded() {
+ // endIndex == 4 cuts off the trailing 'Z' -> not fully inside.
+ assertFalse(regionContains(S, 2, 4, "XYZ"));
+ }
+
+ @Test
+ void occurrenceBeforeBeginIndexExcluded() {
+ // beginIndex == 3 starts past the needle's first char -> no occurrence at/after beginIndex.
+ assertFalse(regionContains(S, 3, S.length(), "XYZ"));
+ }
+
+ @Test
+ void emptyRegion() {
+ assertFalse(regionContains(S, 2, 2, "XYZ"));
+ }
+
+ @Test
+ void matchesWholeStringContains() {
+ assertTrue(regionContains("hello", 0, 5, "ll"));
+ assertFalse(regionContains("hello", 0, 5, "z"));
+ }
+}
diff --git a/internal-api/src/test/java/datadog/trace/util/StringsTest2.java b/internal-api/src/test/java/datadog/trace/util/StringsTest2.java
new file mode 100644
index 00000000000..c949da41a5d
--- /dev/null
+++ b/internal-api/src/test/java/datadog/trace/util/StringsTest2.java
@@ -0,0 +1,116 @@
+package datadog.trace.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Iterator;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+
+public class StringsTest2 {
+ @Test
+ @DisplayName("replaceAll - single replace")
+ public void replaceAllNoReplace() {
+ assertEquals("foobar", Strings.replaceAll("foobar", "dne", "unchanged"));
+ }
+
+ @Test
+ @DisplayName("replaceAll - single replace")
+ public void replaceAllSingleReplace() {
+ assertEquals("foobaz", Strings.replaceAll("foobar", "bar", "baz"));
+ }
+
+ @Test
+ @DisplayName("replaceAll - single replace")
+ public void replaceAllMultiReplace() {
+ assertEquals("foo=baz&quux=baz", Strings.replaceAll("foo=bar&quux=bar", "bar", "baz"));
+ }
+
+ @Test
+ @DisplayName("split - empty")
+ public void splitEmpty() {
+ Iterator