From e729013a359db59fd8c1ca60bb6c00c3fb08842d Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:34:48 -0500 Subject: [PATCH 01/22] Adding to StringUtiils - fast replaceAll for a fixed string & replacement, 3x throughput compared to regex based solutions, 1/2x allocation compared to regex solutions - added SubSequence which provides a view into a subsequence of a String without incurring extra allocation - Strings.spliit returns an Iterable can be used to do light weight processing of a String --- .../util/StringReplacementBenchmark.java | 85 +++++++++++ .../trace/util/StringSplitBenchmark.java | 92 ++++++++++++ .../util/StringSubSequenceBenchmark.java | 62 ++++++++ .../main/java/datadog/trace/util/Strings.java | 142 ++++++++++++++++++ .../java/datadog/trace/util/SubSequence.java | 116 ++++++++++++++ .../java/datadog/trace/util/StringsTest2.java | 116 ++++++++++++++ .../datadog/trace/util/SubSequenceTest.java | 111 ++++++++++++++ 7 files changed, 724 insertions(+) create mode 100644 internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java create mode 100644 internal-api/src/main/java/datadog/trace/util/SubSequence.java create mode 100644 internal-api/src/test/java/datadog/trace/util/StringsTest2.java create mode 100644 internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java new file mode 100644 index 00000000000..7acb31353c5 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java @@ -0,0 +1,85 @@ +package datadog.trace.util; + +import java.util.regex.Pattern; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * For simple replacements, Strings.replaceAll out performs String.replaceAll and + * regex.Matcher.replaceAll by 3x. Strings.replaceAll also requires less allocation. + * + *

When pattern matching is needed, compiling the regex to Pattern slightly improves overhead, + * but dramatically reduces memory allocation to 1/4x of String.replaceAll. + * MacBook M1 with 8 threads (Java 21) + * + * Benchmark Mode Cnt Score Error Units + * StringReplacementBenchmark.regex_replaceAll thrpt 6 13795837.811 ± 3635087.691 ops/s + * StringReplacementBenchmark.regex_replaceAll:gc.alloc.rate thrpt 6 3988.955 ± 1148.316 MB/sec + * + * StringReplacementBenchmark.string_replaceAll thrpt 6 14611046.391 ± 4865682.875 ops/s + * StringReplacementBenchmark.string_replaceAll:gc.alloc.rate thrpt 6 11391.346 ± 3790.917 MB/sec + * + * StringReplacementBenchmark.strings_replaceAll thrpt 6 39514695.575 ± 7169844.210 ops/s + * StringReplacementBenchmark.strings_replaceAll:gc.alloc.rate thrpt 6 2777.083 ± 506.909 MB/sec + * + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@Threads(8) +public class StringReplacementBenchmark { + static final String[] INPUTS = { + "foo", + "baz", + "foobar", + "foobaz", + "foo=baz", + "bar=foo", + "foo=foo&bar=foo", + "lorem ipsum", + "datadog" + }; + + static int sharedInputIndex = 0; + + static String nextInput() { + int localIndex = ++sharedInputIndex; + if (localIndex >= INPUTS.length) { + sharedInputIndex = localIndex = 0; + } + return INPUTS[localIndex]; + } + + @Benchmark + public String string_replaceAll() { + return _string_replaceAll(nextInput()); + } + + static String _string_replaceAll(String input) { + // Underneath, this does Pattern.compile("foo").matcher(str).replaceAll() + return input.replaceAll("foo", "*redacted*"); + } + + static final Pattern REGEX_COMPILED = Pattern.compile("foo"); + + @Benchmark + public String regex_replaceAll() { + return _regex_replaceAll(nextInput()); + } + + static String _regex_replaceAll(String input) { + return REGEX_COMPILED.matcher(input).replaceAll("*redcated*"); + } + + @Benchmark + public String strings_replaceAll() { + return _strings_replaceAll(nextInput()); + } + + static String _strings_replaceAll(String input) { + return Strings.replaceAll(input, "foo", "*redacted*"); + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java new file mode 100644 index 00000000000..3c21d341b55 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java @@ -0,0 +1,92 @@ +package datadog.trace.util; + +import java.util.regex.Pattern; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Strings.split is generally faster for String processing, since it create SubSequences that are + * view into the backing String rather than new String objects. + * + *

Benchmark (testStr) Mode Cnt Score Error Units StringSplitBenchmark.pattern_split EMPTY thrpt + * 6 291274421.621 ± 14834420.899 ops/s StringSplitBenchmark.string_split EMPTY thrpt 6 + * 1035461179.368 ± 60212686.921 ops/s StringSplitBenchmark.strings_split EMPTY thrpt 6 + * 8161781738.019 ± 178530888.497 ops/s + * + *

StringSplitBenchmark.pattern_split TRIVIAL thrpt 6 83982270.075 ± 10250565.633 ops/s + * StringSplitBenchmark.string_split TRIVIAL thrpt 6 848615850.339 ± 42453569.634 ops/s + * StringSplitBenchmark.strings_split TRIVIAL thrpt 6 1765290890.948 ± 160053487.111 ops/s + * + *

StringSplitBenchmark.pattern_split SMALL thrpt 6 27383819.756 ± 5454020.100 ops/s + * StringSplitBenchmark.string_split SMALL thrpt 6 149047480.037 ± 6124271.615 ops/s + * StringSplitBenchmark.strings_split SMALL thrpt 6 564058097.162 ± 49305418.971 ops/s + * + *

StringSplitBenchmark.pattern_split MEDIUM thrpt 6 14879131.729 ± 1981850.920 ops/s + * StringSplitBenchmark.string_split MEDIUM thrpt 6 51237769.598 ± 1808521.138 ops/s + * StringSplitBenchmark.strings_split MEDIUM thrpt 6 176976970.705 ± 6813886.658 ops/s + * + *

StringSplitBenchmark.pattern_split LARGE thrpt 6 482340.838 ± 24903.187 ops/s + * StringSplitBenchmark.string_split LARGE thrpt 6 2460212.879 ± 86911.652 ops/s + * StringSplitBenchmark.strings_split LARGE thrpt 6 4023658.103 ± 30305.699 ops/s + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@Threads(8) +@State(Scope.Benchmark) +public class StringSplitBenchmark { + public enum TestString { + EMPTY(""), + TRIVIAL("app_key=1111"), + SMALL("app_key=1111&foo=bar&baz=quux"), + MEDIUM(repeat("app_key=1111", '&', 100)), + LARGE(repeat("app_key=1111&application_key=2222&token=0894-4832", '&', 4096)); + + final String str; + + TestString(String str) { + this.str = str; + } + }; + + @Param TestString testStr; + + static final String repeat(String repeat, char separator, int length) { + StringBuilder builder = new StringBuilder(length); + builder.append(repeat); + while (builder.length() + repeat.length() + 1 < length) { + builder.append(separator).append(repeat); + } + return builder.toString(); + } + + @Benchmark + public void string_split(Blackhole bh) { + for (String substr : this.testStr.str.split("\\&")) { + bh.consume(substr); + } + } + + static final Pattern PATTERN = Pattern.compile("\\&"); + + @Benchmark + public void pattern_split(Blackhole bh) { + for (String str : PATTERN.split(this.testStr.str)) { + bh.consume(str); + } + } + + @Benchmark + public void strings_split(Blackhole bh) { + for (SubSequence subSeq : Strings.split(this.testStr.str, '&')) { + bh.consume(subSeq); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java new file mode 100644 index 00000000000..99e8fbef0cb --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java @@ -0,0 +1,62 @@ +package datadog.trace.util; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Strings.substring has 5x throughput. This is primarily achieved through less allocation. NOTE: + * The higher allocation rate is misleading because 5x the work was performed. After accounting for + * the 5x, the actual allocation rate is 0.25x that of String.substring or String.subSequence. + * + *

Benchmark Mode Cnt Score Error Units StringSubSequenceBenchmark.string_subSequence thrpt 6 + * 140369998.493 ± 4387855.861 ops/s StringSubSequenceBenchmark.string_subSequence:gc.alloc.rate + * thrpt 6 88880.463 ± 2778.032 MB/sec + * + *

StringSubSequenceBenchmark.string_substring thrpt 6 136916708.207 ± 12299226.575 ops/s + * StringSubSequenceBenchmark.string_substring:gc.alloc.rate thrpt 6 86689.852 ± 7777.642 MB/sec + * + *

StringSubSequenceBenchmark.strings_substring thrpt 6 679669385.260 ± 7194043.619 ops/s + * StringSubSequenceBenchmark.strings_substring:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@Threads(8) +public class StringSubSequenceBenchmark { + static final String LOREM_IPSUM = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; + + @Benchmark + public void string_substring(Blackhole bh) { + String str = LOREM_IPSUM; + int len = str.length(); + + for (int i = 0; i < str.length(); i += 100) { + bh.consume(str.substring(i, Math.min(i + 100, len))); + } + } + + @Benchmark + public void string_subSequence(Blackhole bh) { + String str = LOREM_IPSUM; + int len = str.length(); + + for (int i = 0; i < str.length(); i += 100) { + bh.consume(str.subSequence(i, Math.min(i + 100, len))); + } + } + + @Benchmark + public void strings_substring(Blackhole bh) { + String str = LOREM_IPSUM; + int len = str.length(); + + for (int i = 0; i < str.length(); i += 100) { + bh.consume(SubSequence.of(str, i, Math.min(i + 100, len))); + } + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/Strings.java b/internal-api/src/main/java/datadog/trace/util/Strings.java index efca9430007..5a8b1997121 100644 --- a/internal-api/src/main/java/datadog/trace/util/Strings.java +++ b/internal-api/src/main/java/datadog/trace/util/Strings.java @@ -5,6 +5,9 @@ import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Collections; +import java.util.Iterator; +import java.util.NoSuchElementException; import java.util.concurrent.ThreadLocalRandom; import javax.annotation.Nullable; @@ -180,4 +183,143 @@ public static String coalesce(@Nullable final String first, @Nullable final Stri return null; } } + + /** Low overhead replaceAll */ + public static String replaceAll(String input, String needle, String replacement) { + int index = input.indexOf(needle); + if (index == -1) return input; + + int needleLen = needle.length(); + + StringBuilder builder = new StringBuilder(input.length() + 10); + builder.append(input, 0, index); + builder.append(replacement); + + int prevIndex = index; + index = input.indexOf(needle, index + needleLen); + for (; index != -1; prevIndex = index, index = input.indexOf(needle, index + needleLen)) { + builder.append(input, prevIndex + needleLen, index); + builder.append(replacement); + } + builder.append(input, prevIndex + needleLen, input.length()); + + return builder.toString(); + } + + /** + * Provides a SubSequence which a view into the provided String Unlike String.subSequence (which + * is usually just a wrapper around String.substring), this routine doesn't allocate a new String + * or byte[]/char[]. + */ + public static final SubSequence subSequence(String str, int beginIndex) { + return new SubSequence(str, beginIndex, str.length()); + } + + /** + * Provides a SubSequence which a view into the provided String Unlike String.subSequence (which + * is usually just a wrapper around String.substring), this routine doesn't allocate a new + * String or byte[] / char[]. + */ + public static final SubSequence subSequence(String str, int beginIndex, int endIndex) { + return new SubSequence(str, beginIndex, endIndex); + } + + /** + * Provides an Iterable where the sub-sequences are separated by splitChar + * . Unlike other approaches to splitting, this routine doesn't allocate any new + * String or byte[] / char[] + */ + public static final Iterable split(String str, char splitChar) { + if (str.isEmpty()) { + return Collections.emptyList(); + } + + int firstIndex = str.indexOf(splitChar); + if (firstIndex == -1) { + return Collections.singletonList(subSequence(str, 0)); + } + + return new SplitIterable(str, splitChar, firstIndex); + } + + static final class SplitIterable implements Iterable { + private final String str; + private final int len; + private final char splitChar; + private final int firstIndex; + + SplitIterable(String str, char splitChar, int firstIndex) { + this.str = str; + this.len = str.length(); + this.splitChar = splitChar; + this.firstIndex = firstIndex; + } + + @Override + public SplitIterator iterator() { + return new SplitIterator(this.str, this.len, this.splitChar, this.firstIndex); + } + } + + static final class SplitIterator implements Iterator { + private final String str; + private final int len; + private final char splitChar; + + private int curIndex; + private int nextIndex; + + SplitIterator(String str, int len, char splitChar, int firstIndex) { + this.str = str; + this.len = len; + this.splitChar = splitChar; + + this.curIndex = 0; + this.nextIndex = firstIndex == -1 ? len : firstIndex; + } + + @Override + public boolean hasNext() { + return (this.curIndex <= this.len); + } + + @Override + public SubSequence next() { + int curIndex = this.curIndex; + int len = this.len; + + if (curIndex > len) throw new NoSuchElementException(); + + SubSequence subSeq; + + int nextIndex = this.nextIndex; + if (nextIndex == len - 1) { + // Handles the case where there's a trailing separator, + // curIndex is moved to len to represent the empty string + // after the trailing separator + + // Next call then goes into the special case below + subSeq = new SubSequence(this.str, curIndex, nextIndex); + this.curIndex = len; + this.nextIndex = len; + } else if (curIndex == len) { + // Handles the empty string after the trailing separator + // curIndex is given the terminating value `len + 1` + + // Don't use SubSequence.EMPTY because it wouldn't have + // the correct beginIndex + subSeq = new SubSequence(this.str, len, len); + this.curIndex = len + 1; + } else { + subSeq = new SubSequence(this.str, curIndex, nextIndex); + + // core advancing logic + this.curIndex = nextIndex + 1; + int searchIndex = this.str.indexOf(this.splitChar, nextIndex + 1); + this.nextIndex = (searchIndex == -1) ? len : searchIndex; + } + + return subSeq; + } + } } diff --git a/internal-api/src/main/java/datadog/trace/util/SubSequence.java b/internal-api/src/main/java/datadog/trace/util/SubSequence.java new file mode 100644 index 00000000000..280dae23365 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/SubSequence.java @@ -0,0 +1,116 @@ +package datadog.trace.util; + +/** + * A CharSequence that is view into a sub-sequencce of a String Unlike + * String.subSequence, this class doesn't allocate an additional String, + * char[], or byte[] + */ +public final class SubSequence implements CharSequence { + public static final SubSequence EMPTY = new SubSequence("", 0, 0); + + /** + * SubSequence from beginIndex to end of str Equivalent to + * str.subSequence(str, startIndex) + */ + public static final SubSequence of(String str, int startIndex) { + return new SubSequence(str, startIndex, str.length()); + } + + /** + * SubSequence from beginIndex inclusive to endIndex exclusive of + * str Equivalent to str.subSequence(str, startIndex, endIndex) + */ + public static final SubSequence of(String str, int startIndex, int endIndex) { + return new SubSequence(str, startIndex, endIndex); + } + + private final String str; + private final int beginIndex; + private final int endIndex; + + private String cachedSubstr = null; + + SubSequence(String str, int startIndex, int endIndex) { + this.str = str; + this.beginIndex = startIndex; + this.endIndex = endIndex; + } + + /** Beginning index of the subseqence in the backing String - can be useful in text processing */ + public int beginIndex() { + return this.beginIndex; + } + + /** Ending index of the subsequence in the backing String - can be useful in text processing */ + public int endIndex() { + return this.endIndex; + } + + @Override + public char charAt(int index) { + return this.str.charAt(this.beginIndex + index); + } + + @Override + public int length() { + return this.endIndex - this.beginIndex; + } + + @Override + public SubSequence subSequence(int start, int end) { + int newBeginIndex = this.beginIndex + start; + int newEndIndex = this.beginIndex + start + end; + + return new SubSequence(this.str, newBeginIndex, newEndIndex); + } + + /** Appends this SubSequence to the StringBuilder Equivalent to builder.append(this) but faster */ + public void appendTo(StringBuilder builder) { + int beginIndex = this.beginIndex; + int endIndex = this.endIndex; + + // Guards against the special case empty SubSequence at this.str.length + if (beginIndex != endIndex) builder.append(this.str, beginIndex, endIndex); + } + + /** Returns the hash code as backingStr.substr(beginIndex, endIndex).hashCode() */ + @Override + public int hashCode() { + return this.toString().hashCode(); + } + + /** + * Also handles String comparisons this.equals(backingStr.substr(beginIndex, endIndex)) is true + */ + @Override + public boolean equals(Object obj) { + if (!(obj instanceof CharSequence)) return false; + + return this.equals((CharSequence) obj); + } + + public final boolean equals(CharSequence that) { + int thisLen = this.length(); + int thatLen = that.length(); + + if (thisLen != thatLen) return false; + + for (int i = 0; i < Math.min(this.length(), that.length()); ++i) { + if (this.charAt(i) != that.charAt(i)) return false; + } + return true; + } + + @Override + public String toString() { + String cached = this.cachedSubstr; + if (cached != null) return cached; + + int beginIndex = this.beginIndex; + int endIndex = this.endIndex; + + String substr = (beginIndex == endIndex) ? "" : this.str.substring(beginIndex, endIndex); + this.cachedSubstr = substr; + return substr; + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/StringsTest2.java b/internal-api/src/test/java/datadog/trace/util/StringsTest2.java new file mode 100644 index 00000000000..c949da41a5d --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/StringsTest2.java @@ -0,0 +1,116 @@ +package datadog.trace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Iterator; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +public class StringsTest2 { + @Test + @DisplayName("replaceAll - single replace") + public void replaceAllNoReplace() { + assertEquals("foobar", Strings.replaceAll("foobar", "dne", "unchanged")); + } + + @Test + @DisplayName("replaceAll - single replace") + public void replaceAllSingleReplace() { + assertEquals("foobaz", Strings.replaceAll("foobar", "bar", "baz")); + } + + @Test + @DisplayName("replaceAll - single replace") + public void replaceAllMultiReplace() { + assertEquals("foo=baz&quux=baz", Strings.replaceAll("foo=bar&quux=bar", "bar", "baz")); + } + + @Test + @DisplayName("split - empty") + public void splitEmpty() { + Iterator iter = Strings.split("", '&').iterator(); + assertFalse(iter.hasNext()); + } + + @Test + @DisplayName("split - no separator") + public void splitNoSeparator() { + Iterator iter = Strings.split("foo=bar", '&').iterator(); + assertContentEquals("foo=bar", iter.next()); + assertFalse(iter.hasNext()); + } + + @Test + @DisplayName("split - with separator") + public void splitWithSeparator() { + Iterator iter = Strings.split("foo=bar&hello=world", '&').iterator(); + assertContentEquals("foo=bar", iter.next()); + assertContentEquals("hello=world", iter.next()); + assertFalse(iter.hasNext()); + } + + @Test + @DisplayName("split - leading separator") + public void splitLeadingSeparator() { + Iterator iter = Strings.split("&foo=bar", '&').iterator(); + assertSubSeq("", 0, 0, iter.next()); // empty string before the separator + assertSubSeq("foo=bar", 1, 8, iter.next()); + assertFalse(iter.hasNext()); + } + + @Test + @DisplayName("split - trailing separator") + public void splitTrailingSeparator() { + Iterator iter = Strings.split("foo=bar&", '&').iterator(); + assertSubSeq("foo=bar", 0, 7, iter.next()); + assertSubSeq("", 8, 8, iter.next()); // empty string after the separator + assertFalse(iter.hasNext()); + } + + @Test + @DisplayName("split - only separator") + public void splitOnlySeparator() { + Iterator iter = Strings.split("&", '&').iterator(); + assertSubSeq("", 0, 0, iter.next()); // empty string before the separator + assertSubSeq("", 1, 1, iter.next()); // empty string after the separator + assertFalse(iter.hasNext()); + } + + @Test + @DisplayName("split - sanity check") + public void splitSanityCheck() { + String testStr = "foo=bar&hello=world&baz=quux&firstName=Jon&lastName=Doe"; + + String[] strSplit = testStr.split("\\&"); + Iterable iterable = Strings.split(testStr, '&'); + + Iterator iter = iterable.iterator(); + for (String expected : strSplit) { + assertTrue(iter.hasNext()); + assertContentEquals(expected, iter.next()); + } + assertFalse(iter.hasNext()); + + // repeat, just to check iterable functionality + Iterator iter2 = iterable.iterator(); + for (String expected : strSplit) { + assertTrue(iter2.hasNext()); + assertContentEquals(expected, iter2.next()); + } + assertFalse(iter2.hasNext()); + } + + static void assertContentEquals(String expectedStr, SubSequence actualSeq) { + assertTrue(actualSeq.equals(expectedStr), "equals String"); + assertEquals(expectedStr, actualSeq.toString(), "toString"); + } + + static void assertSubSeq( + String expected, int expectedBeginIndex, int expectedEndIndex, SubSequence actualSeq) { + assertContentEquals(expected, actualSeq); + assertEquals(expectedBeginIndex, actualSeq.beginIndex(), "beginIndex"); + assertEquals(expectedEndIndex, actualSeq.endIndex(), "endIndex"); + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java new file mode 100644 index 00000000000..65d7c33ff1e --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java @@ -0,0 +1,111 @@ +package datadog.trace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +public class SubSequenceTest { + static final String LOREM_IPSUM = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; + + @Test + public void empty() { + SubSequence subSeq = SubSequence.EMPTY; + assertEquals("", subSeq.toString()); + + StringBuilder builder0 = new StringBuilder(); + builder0.append(subSeq); + assertEquals("", builder0.toString()); + + StringBuilder builder1 = new StringBuilder(); + subSeq.appendTo(builder1); + assertEquals("", builder1.toString()); + } + + @Test + public void emptyTail() { + // This is allowed to represent the logical empty string after a match at the end of + // backing string. There is an important distinction from the canonical empty + // SubSequence which wouldn't have the correct beginIndex / endIndex pair. + SubSequence subSeq = new SubSequence("foo", "foo".length(), "foo".length()); + assertEquals("foo".length(), subSeq.beginIndex()); + assertEquals("foo".length(), subSeq.endIndex()); + assertEquals("", subSeq.toString()); + + StringBuilder builder0 = new StringBuilder(); + builder0.append(subSeq); + assertEquals("", builder0.toString()); + + StringBuilder builder1 = new StringBuilder(); + subSeq.appendTo(builder1); + assertEquals("", builder1.toString()); + } + + @Test + public void subSequence() { + String str = LOREM_IPSUM; + int len = str.length(); + + for (int i = 0; i < str.length(); i += 100) { + int endIndex = Math.min(i + 100, len); + + String subStr = str.substring(i, endIndex); + CharSequence subCharSeq = str.subSequence(i, endIndex); + SubSequence subSeq = SubSequence.of(str, i, endIndex); + SubSequence altSubSeq = Strings.subSequence(str, i, endIndex); + + assertTrue(subSeq.equals(subStr)); + assertEquals(subStr, subSeq.toString()); + assertEquals(subStr.hashCode(), subSeq.hashCode()); + + assertTrue(subSeq.equals(subCharSeq)); + assertEquals(subCharSeq.toString(), subSeq.toString()); + + assertEquals(subSeq, altSubSeq); + + assertSame(subSeq.toString(), subSeq.toString()); + } + } + + @Test + public void subSequenceToEnd() { + String str = LOREM_IPSUM; + int len = str.length(); + + for (int i = 0; i < str.length(); i += 100) { + String subStr = str.substring(i); + SubSequence subSeq = SubSequence.of(str, i); + SubSequence altSubSeq = Strings.subSequence(str, i); + + assertTrue(subSeq.equals(subStr)); + assertEquals(subStr, subSeq.toString()); + assertEquals(subStr.hashCode(), subSeq.hashCode()); + + assertEquals(subSeq, altSubSeq); + + assertSame(subSeq.toString(), subSeq.toString()); + } + } + + @Test + public void appendToBuilder() { + SubSequence subSeq = SubSequence.of(LOREM_IPSUM, 50, 150); + + StringBuilder expectedBuilder = new StringBuilder(); + expectedBuilder.append(LOREM_IPSUM, 50, 150); + + String expectedStr = expectedBuilder.toString(); + + StringBuilder builder0 = new StringBuilder(); + builder0.append(subSeq); + + assertEquals(expectedStr, builder0.toString()); + + StringBuilder builder1 = new StringBuilder(); + subSeq.appendTo(builder1); + + assertEquals(expectedStr, builder1.toString()); + } +} From b0a04fcb53a541cf9456dd0ecb5ee65bbaa63815 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:39:56 -0500 Subject: [PATCH 02/22] Checking hashCode --- .../src/test/java/datadog/trace/util/SubSequenceTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java index 65d7c33ff1e..fdb58e737ad 100644 --- a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java +++ b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java @@ -14,6 +14,7 @@ public class SubSequenceTest { public void empty() { SubSequence subSeq = SubSequence.EMPTY; assertEquals("", subSeq.toString()); + assertEquals("".hashCode(), subSeq.hashCode()); StringBuilder builder0 = new StringBuilder(); builder0.append(subSeq); @@ -33,6 +34,7 @@ public void emptyTail() { assertEquals("foo".length(), subSeq.beginIndex()); assertEquals("foo".length(), subSeq.endIndex()); assertEquals("", subSeq.toString()); + assertEquals("".hashCode(), subSeq.hashCode()); StringBuilder builder0 = new StringBuilder(); builder0.append(subSeq); From f75da1cf662be252349fe3653fd5c85804c3119d Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:41:38 -0500 Subject: [PATCH 03/22] Reducing whitespace --- .../src/test/java/datadog/trace/util/SubSequenceTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java index fdb58e737ad..78d52fa6931 100644 --- a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java +++ b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java @@ -102,12 +102,10 @@ public void appendToBuilder() { StringBuilder builder0 = new StringBuilder(); builder0.append(subSeq); - assertEquals(expectedStr, builder0.toString()); StringBuilder builder1 = new StringBuilder(); subSeq.appendTo(builder1); - assertEquals(expectedStr, builder1.toString()); } } From ab707017fba8876bcabced606b48cbfaa0924eee Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:45:40 -0500 Subject: [PATCH 04/22] Fixing ugly spotless formatting --- .../util/StringSubSequenceBenchmark.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java index 99e8fbef0cb..c08b6f888dd 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java @@ -8,19 +8,21 @@ import org.openjdk.jmh.infra.Blackhole; /** - * Strings.substring has 5x throughput. This is primarily achieved through less allocation. NOTE: - * The higher allocation rate is misleading because 5x the work was performed. After accounting for - * the 5x, the actual allocation rate is 0.25x that of String.substring or String.subSequence. + * Strings.substring has 5x throughput. This is primarily achieved through less allocation. * - *

Benchmark Mode Cnt Score Error Units StringSubSequenceBenchmark.string_subSequence thrpt 6 - * 140369998.493 ± 4387855.861 ops/s StringSubSequenceBenchmark.string_subSequence:gc.alloc.rate - * thrpt 6 88880.463 ± 2778.032 MB/sec + *

NOTE: The higher allocation rate is misleading because 5x the work was performed. After + * accounting for the 5x throughput difference, the actual allocation rate is 0.25x that of + * String.substring or String.subSequence. + * Benchmark Mode Cnt Score Error Units + * StringSubSequenceBenchmark.string_subSequence thrpt 6 140369998.493 ± 4387855.861 ops/s + * StringSubSequenceBenchmark.string_subSequence:gc.alloc.rate thrpt 6 88880.463 ± 2778.032 MB/sec * - *

StringSubSequenceBenchmark.string_substring thrpt 6 136916708.207 ± 12299226.575 ops/s - * StringSubSequenceBenchmark.string_substring:gc.alloc.rate thrpt 6 86689.852 ± 7777.642 MB/sec + * StringSubSequenceBenchmark.string_substring thrpt 6 136916708.207 ± 12299226.575 ops/s + * StringSubSequenceBenchmark.string_substring:gc.alloc.rate thrpt 6 86689.852 ± 7777.642 MB/sec * - *

StringSubSequenceBenchmark.strings_substring thrpt 6 679669385.260 ± 7194043.619 ops/s - * StringSubSequenceBenchmark.strings_substring:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec + * StringSubSequenceBenchmark.strings_substring thrpt 6 679669385.260 ± 7194043.619 ops/s + * StringSubSequenceBenchmark.strings_substring:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec + * */ @Fork(2) @Warmup(iterations = 2) From fb3ae00807b267649e28143e28d877fd3acf9fdd Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:47:36 -0500 Subject: [PATCH 05/22] Fixing ugly spotless formatting --- .../trace/util/StringSplitBenchmark.java | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java index 3c21d341b55..ecab152cdc3 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java @@ -13,28 +13,28 @@ /** * Strings.split is generally faster for String processing, since it create SubSequences that are - * view into the backing String rather than new String objects. + * view into the backing String rather than new String objects. + * Benchmark (testStr) Mode Cnt Score Error Units + * StringSplitBenchmark.pattern_split EMPTY thrpt 6 291274421.621 ± 14834420.899 ops/s + * StringSplitBenchmark.string_split EMPTY thrpt 6 1035461179.368 ± 60212686.921 ops/s + * StringSplitBenchmark.strings_split EMPTY thrpt 6 8161781738.019 ± 178530888.497 ops/s * - *

Benchmark (testStr) Mode Cnt Score Error Units StringSplitBenchmark.pattern_split EMPTY thrpt - * 6 291274421.621 ± 14834420.899 ops/s StringSplitBenchmark.string_split EMPTY thrpt 6 - * 1035461179.368 ± 60212686.921 ops/s StringSplitBenchmark.strings_split EMPTY thrpt 6 - * 8161781738.019 ± 178530888.497 ops/s + * StringSplitBenchmark.pattern_split TRIVIAL thrpt 6 83982270.075 ± 10250565.633 ops/s + * StringSplitBenchmark.string_split TRIVIAL thrpt 6 848615850.339 ± 42453569.634 ops/s + * StringSplitBenchmark.strings_split TRIVIAL thrpt 6 1765290890.948 ± 160053487.111 ops/s * - *

StringSplitBenchmark.pattern_split TRIVIAL thrpt 6 83982270.075 ± 10250565.633 ops/s - * StringSplitBenchmark.string_split TRIVIAL thrpt 6 848615850.339 ± 42453569.634 ops/s - * StringSplitBenchmark.strings_split TRIVIAL thrpt 6 1765290890.948 ± 160053487.111 ops/s + * StringSplitBenchmark.pattern_split SMALL thrpt 6 27383819.756 ± 5454020.100 ops/s + * StringSplitBenchmark.string_split SMALL thrpt 6 149047480.037 ± 6124271.615 ops/s + * StringSplitBenchmark.strings_split SMALL thrpt 6 564058097.162 ± 49305418.971 ops/s * - *

StringSplitBenchmark.pattern_split SMALL thrpt 6 27383819.756 ± 5454020.100 ops/s - * StringSplitBenchmark.string_split SMALL thrpt 6 149047480.037 ± 6124271.615 ops/s - * StringSplitBenchmark.strings_split SMALL thrpt 6 564058097.162 ± 49305418.971 ops/s + * StringSplitBenchmark.pattern_split MEDIUM thrpt 6 14879131.729 ± 1981850.920 ops/s + * StringSplitBenchmark.string_split MEDIUM thrpt 6 51237769.598 ± 1808521.138 ops/s + * StringSplitBenchmark.strings_split MEDIUM thrpt 6 176976970.705 ± 6813886.658 ops/s * - *

StringSplitBenchmark.pattern_split MEDIUM thrpt 6 14879131.729 ± 1981850.920 ops/s - * StringSplitBenchmark.string_split MEDIUM thrpt 6 51237769.598 ± 1808521.138 ops/s - * StringSplitBenchmark.strings_split MEDIUM thrpt 6 176976970.705 ± 6813886.658 ops/s - * - *

StringSplitBenchmark.pattern_split LARGE thrpt 6 482340.838 ± 24903.187 ops/s - * StringSplitBenchmark.string_split LARGE thrpt 6 2460212.879 ± 86911.652 ops/s - * StringSplitBenchmark.strings_split LARGE thrpt 6 4023658.103 ± 30305.699 ops/s + * StringSplitBenchmark.pattern_split LARGE thrpt 6 482340.838 ± 24903.187 ops/s + * StringSplitBenchmark.string_split LARGE thrpt 6 2460212.879 ± 86911.652 ops/s + * StringSplitBenchmark.strings_split LARGE thrpt 6 4023658.103 ± 30305.699 ops/s + * */ @Fork(2) @Warmup(iterations = 2) From a71b449cd77aabe52d05560804ee8fcf9dc87569 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:52:07 -0500 Subject: [PATCH 06/22] Renaming benchmark method --- .../java/datadog/trace/util/StringSubSequenceBenchmark.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java index c08b6f888dd..d05959861a7 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java @@ -20,8 +20,8 @@ * StringSubSequenceBenchmark.string_substring thrpt 6 136916708.207 ± 12299226.575 ops/s * StringSubSequenceBenchmark.string_substring:gc.alloc.rate thrpt 6 86689.852 ± 7777.642 MB/sec * - * StringSubSequenceBenchmark.strings_substring thrpt 6 679669385.260 ± 7194043.619 ops/s - * StringSubSequenceBenchmark.strings_substring:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec + * StringSubSequenceBenchmark.subSequence thrpt 6 679669385.260 ± 7194043.619 ops/s + * StringSubSequenceBenchmark.subSequence:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec * */ @Fork(2) @@ -53,7 +53,7 @@ public void string_subSequence(Blackhole bh) { } @Benchmark - public void strings_substring(Blackhole bh) { + public void subSequence(Blackhole bh) { String str = LOREM_IPSUM; int len = str.length(); From aa070b4807c707c92170c0a671c47489b59c9431 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:52:46 -0500 Subject: [PATCH 07/22] Clarifying Javadoc --- .../jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java index d05959861a7..d24755e950b 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java @@ -12,7 +12,7 @@ * *

NOTE: The higher allocation rate is misleading because 5x the work was performed. After * accounting for the 5x throughput difference, the actual allocation rate is 0.25x that of - * String.substring or String.subSequence. + * String.substring or String.subSequence / SubSequence.of. * Benchmark Mode Cnt Score Error Units * StringSubSequenceBenchmark.string_subSequence thrpt 6 140369998.493 ± 4387855.861 ops/s * StringSubSequenceBenchmark.string_subSequence:gc.alloc.rate thrpt 6 88880.463 ± 2778.032 MB/sec From fa1b8bd1493f2b6aed9b3ddb231b6646cb66b028 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 19 Feb 2026 10:55:19 -0500 Subject: [PATCH 08/22] grammar --- .../src/jmh/java/datadog/trace/util/StringSplitBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java index ecab152cdc3..1c20fe1cfec 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java @@ -13,7 +13,7 @@ /** * Strings.split is generally faster for String processing, since it create SubSequences that are - * view into the backing String rather than new String objects. + * views into the backing String rather than new String objects. * Benchmark (testStr) Mode Cnt Score Error Units * StringSplitBenchmark.pattern_split EMPTY thrpt 6 291274421.621 ± 14834420.899 ops/s * StringSplitBenchmark.string_split EMPTY thrpt 6 1035461179.368 ± 60212686.921 ops/s From 2822711f03aa838036b47f5d111bc602a4894610 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Mon, 23 Feb 2026 15:44:19 -0500 Subject: [PATCH 09/22] Allowing forbidden API usage These benchmarks exist to show why the APIs are forbidden --- .../jmh/java/datadog/trace/util/StringReplacementBenchmark.java | 2 ++ .../src/jmh/java/datadog/trace/util/StringSplitBenchmark.java | 2 ++ 2 files changed, 4 insertions(+) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java index 7acb31353c5..2ca079b3ce8 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java @@ -1,5 +1,6 @@ package datadog.trace.util; +import de.thetaphi.forbiddenapis.SuppressForbidden; import java.util.regex.Pattern; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Fork; @@ -30,6 +31,7 @@ @Warmup(iterations = 2) @Measurement(iterations = 3) @Threads(8) +@SuppressForbidden public class StringReplacementBenchmark { static final String[] INPUTS = { "foo", diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java index 1c20fe1cfec..221912f4091 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java @@ -1,5 +1,6 @@ package datadog.trace.util; +import de.thetaphi.forbiddenapis.SuppressForbidden; import java.util.regex.Pattern; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Fork; @@ -41,6 +42,7 @@ @Measurement(iterations = 3) @Threads(8) @State(Scope.Benchmark) +@SuppressForbidden public class StringSplitBenchmark { public enum TestString { EMPTY(""), From 1beb7046ea5c7ee8dda3c197961ed0c360ab0939 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 24 Feb 2026 14:01:12 -0500 Subject: [PATCH 10/22] Adding String.replace to the benchmark --- ...rk.java => StringReplaceAllBenchmark.java} | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) rename internal-api/src/jmh/java/datadog/trace/util/{StringReplacementBenchmark.java => StringReplaceAllBenchmark.java} (51%) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java similarity index 51% rename from internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java rename to internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java index 2ca079b3ce8..aa84b90cdf7 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java @@ -1,4 +1,4 @@ -package datadog.trace.util; +package benchmarks.reference; import de.thetaphi.forbiddenapis.SuppressForbidden; import java.util.regex.Pattern; @@ -9,22 +9,35 @@ import org.openjdk.jmh.annotations.Warmup; /** - * For simple replacements, Strings.replaceAll out performs String.replaceAll and - * regex.Matcher.replaceAll by 3x. Strings.replaceAll also requires less allocation. + *

For simple replacements, Strings.replaceAll is recommened. + * + *

+ * For simple replacements, Strings.replaceAll or String.replace out performs the regex based + * methods String.replaceAll and regex.Matcher.replaceAll by 3x in terms of throughput. + * + *

String.replace and Strings.replaceAll also require less allocation. + * + *

Strings.replaceAll out performs String.replace by 1.2x in terms of throughput, + * but results may vary depending on the JVM version being used. * *

When pattern matching is needed, compiling the regex to Pattern slightly improves overhead, * but dramatically reduces memory allocation to 1/4x of String.replaceAll. * MacBook M1 with 8 threads (Java 21) * - * Benchmark Mode Cnt Score Error Units - * StringReplacementBenchmark.regex_replaceAll thrpt 6 13795837.811 ± 3635087.691 ops/s - * StringReplacementBenchmark.regex_replaceAll:gc.alloc.rate thrpt 6 3988.955 ± 1148.316 MB/sec + * + * MacBook M1 - 8 Threads - Java 21 + * + * StringReplaceAllBenchmark.regex_replaceAll thrpt 6 15500559.098 ± 8640183.754 ops/s + * StringReplaceAllBenchmark.regex_replaceAll:gc.alloc.rate thrpt 6 4516.464 ± 2561.063 MB/sec + * + * StringReplaceAllBenchmark.string_replace thrpt 6 35429131.963 ± 3203548.932 ops/s + * StringReplaceAllBenchmark.string_replace:gc.alloc.rate thrpt 6 3185.108 ± 152.601 MB/sec * - * StringReplacementBenchmark.string_replaceAll thrpt 6 14611046.391 ± 4865682.875 ops/s - * StringReplacementBenchmark.string_replaceAll:gc.alloc.rate thrpt 6 11391.346 ± 3790.917 MB/sec + * StringReplaceAllBenchmark.string_replaceAll thrpt 6 14253964.929 ± 4060225.866 ops/s + * StringReplaceAllBenchmark.string_replaceAll:gc.alloc.rate thrpt 6 11114.939 ± 3129.891 MB/sec * - * StringReplacementBenchmark.strings_replaceAll thrpt 6 39514695.575 ± 7169844.210 ops/s - * StringReplacementBenchmark.strings_replaceAll:gc.alloc.rate thrpt 6 2777.083 ± 506.909 MB/sec + * StringReplaceAllBenchmark.strings_replaceAll thrpt 6 43789250.524 ± 1910948.420 ops/s + * StringReplaceAllBenchmark.strings_replaceAll:gc.alloc.rate thrpt 6 3079.973 ± 134.617 MB/sec * */ @Fork(2) @@ -32,7 +45,7 @@ @Measurement(iterations = 3) @Threads(8) @SuppressForbidden -public class StringReplacementBenchmark { +public class StringReplaceAllBenchmark { static final String[] INPUTS = { "foo", "baz", @@ -65,6 +78,15 @@ static String _string_replaceAll(String input) { return input.replaceAll("foo", "*redacted*"); } + @Benchmark + public String string_replace() { + return _string_replace(nextInput()); + } + + static String _string_replace(String input) { + return input.replace("foo", "*redacted*"); + } + static final Pattern REGEX_COMPILED = Pattern.compile("foo"); @Benchmark From f516f3203c71f6ad909b6985b8787156fcb2d710 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 24 Feb 2026 15:06:51 -0500 Subject: [PATCH 11/22] grammar --- .../src/jmh/java/datadog/trace/util/StringSplitBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java index 221912f4091..584c375cdea 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java @@ -13,7 +13,7 @@ import org.openjdk.jmh.infra.Blackhole; /** - * Strings.split is generally faster for String processing, since it create SubSequences that are + * Strings.split is generally faster for String processing, since it creates SubSequences that are * views into the backing String rather than new String objects. * Benchmark (testStr) Mode Cnt Score Error Units * StringSplitBenchmark.pattern_split EMPTY thrpt 6 291274421.621 ± 14834420.899 ops/s From a149be6159288d46f70525b1c5c6d0eb384927c8 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 25 Feb 2026 10:32:10 -0500 Subject: [PATCH 12/22] Added comment explaining implementation approach --- internal-api/src/main/java/datadog/trace/util/Strings.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal-api/src/main/java/datadog/trace/util/Strings.java b/internal-api/src/main/java/datadog/trace/util/Strings.java index 5a8b1997121..62de2622637 100644 --- a/internal-api/src/main/java/datadog/trace/util/Strings.java +++ b/internal-api/src/main/java/datadog/trace/util/Strings.java @@ -290,6 +290,11 @@ public SubSequence next() { if (curIndex > len) throw new NoSuchElementException(); + // NOTE: Experimented with returning a single mutable SubSequence + // where the index range is updated each time. In typical usage, + // that was slightly worse -- likely because escape analysis was + // able to eliminate the allocation, but that hasn't been directly + // confirmed. SubSequence subSeq; int nextIndex = this.nextIndex; From 479a3e0e27d46a3145a3cbb17969fdbac07750cd Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 26 Feb 2026 16:59:35 -0500 Subject: [PATCH 13/22] Clean-up imports --- .../jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java | 2 ++ internal-api/src/main/java/datadog/trace/util/Strings.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java index aa84b90cdf7..12a0be99f66 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java @@ -1,7 +1,9 @@ package benchmarks.reference; import de.thetaphi.forbiddenapis.SuppressForbidden; + import java.util.regex.Pattern; + import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; diff --git a/internal-api/src/main/java/datadog/trace/util/Strings.java b/internal-api/src/main/java/datadog/trace/util/Strings.java index 62de2622637..4a8b40db7db 100644 --- a/internal-api/src/main/java/datadog/trace/util/Strings.java +++ b/internal-api/src/main/java/datadog/trace/util/Strings.java @@ -185,7 +185,7 @@ public static String coalesce(@Nullable final String first, @Nullable final Stri } /** Low overhead replaceAll */ - public static String replaceAll(String input, String needle, String replacement) { + public static final String replaceAll(String input, String needle, String replacement) { int index = input.indexOf(needle); if (index == -1) return input; From c03a85a2e071d4c3fa9653afb1c7bafd12f7eaf4 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Fri, 27 Feb 2026 09:33:56 -0500 Subject: [PATCH 14/22] spotless --- .../jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java index 12a0be99f66..aa84b90cdf7 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java @@ -1,9 +1,7 @@ package benchmarks.reference; import de.thetaphi.forbiddenapis.SuppressForbidden; - import java.util.regex.Pattern; - import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; From 0bfdbd4bf9df369f4becd8085ac92cfb406ffd46 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Fri, 27 Feb 2026 09:58:32 -0500 Subject: [PATCH 15/22] Fixing package after moving code from stand-alone reproduction --- .../jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java index aa84b90cdf7..492b56bb654 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplaceAllBenchmark.java @@ -1,4 +1,4 @@ -package benchmarks.reference; +package datadog.trace.util; import de.thetaphi.forbiddenapis.SuppressForbidden; import java.util.regex.Pattern; From 606c5fdb42e2e8a4987a123ae2a7dc020bf3af50 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 13:56:17 -0400 Subject: [PATCH 16/22] Add motivation to SubSequence Javadoc (why avoid the substring allocation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per bric3 review: the doc said what SubSequence avoids allocating but not why it matters. Explain the use case — allocation-free lightweight parsing: substring/ subSequence copy per call, so splitting a string into many pieces on a hot path allocates O(pieces) Strings; SubSequence is a zero-copy view. Also fixes a typo. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../main/java/datadog/trace/util/SubSequence.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/util/SubSequence.java b/internal-api/src/main/java/datadog/trace/util/SubSequence.java index 280dae23365..3cda505d1db 100644 --- a/internal-api/src/main/java/datadog/trace/util/SubSequence.java +++ b/internal-api/src/main/java/datadog/trace/util/SubSequence.java @@ -1,9 +1,17 @@ package datadog.trace.util; /** - * A CharSequence that is view into a sub-sequencce of a String Unlike + * A CharSequence that is a view into a sub-sequence of a String. Unlike * String.subSequence, this class doesn't allocate an additional String, - * char[], or byte[] + * char[], or byte[]. + * + *

Why that matters: String.substring / subSequence copy the selected + * range into a fresh backing array on every call, so scanning or splitting a string into many + * pieces — parsing headers, tags, or query strings on a hot path — allocates one intermediate + * String per slice. A SubSequence is a zero-copy window over the original + * (an offset + length into the existing backing array), so the same parse allocates nothing per + * slice. Use it for transient, read-only views; materialize a real String only when + * the value must be retained or handed off. */ public final class SubSequence implements CharSequence { public static final SubSequence EMPTY = new SubSequence("", 0, 0); From 9e3517840257ca41ae6620ed43bdc5a468415a7e Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 15:56:36 -0400 Subject: [PATCH 17/22] Hoist containsTraceComment "=" needles to constants SharedDBCommenter.containsTraceComment built nine "KEY + =" strings on every call: the keys are encode()-derived (not compile-time constants), so each "KEY + =" is a runtime concat. A non-matching comment ran all nine checks and allocated nine throwaway Strings per call. Precompute the needles once at class init as *_EQ constants; the contains-chain is unchanged. Co-Authored-By: Claude Opus 4.8 --- .../dbm/SharedDBCommenter.java | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java index 3dbf916362c..610ab9ca31f 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java @@ -33,6 +33,19 @@ public class SharedDBCommenter { private static final String TRACEPARENT = encode("traceparent"); private static final String DD_SERVICE_HASH = encode("ddsh"); + // Pre-built "=" needles for containsTraceComment, computed once at class init. The keys + // are assigned via encode(...), so "KEY + =" is a runtime concat, not a compile-time constant; + // doing it per call allocated nine throwaway Strings on every non-matching check. + private static final String PARENT_SERVICE_EQ = PARENT_SERVICE + "="; + private static final String DATABASE_SERVICE_EQ = DATABASE_SERVICE + "="; + private static final String DD_HOSTNAME_EQ = DD_HOSTNAME + "="; + private static final String DD_DB_NAME_EQ = DD_DB_NAME + "="; + private static final String DD_PEER_SERVICE_EQ = DD_PEER_SERVICE + "="; + private static final String DD_ENV_EQ = DD_ENV + "="; + private static final String DD_VERSION_EQ = DD_VERSION + "="; + private static final String TRACEPARENT_EQ = TRACEPARENT + "="; + private static final String DD_SERVICE_HASH_EQ = DD_SERVICE_HASH + "="; + // Pre-encoded "key='encoded_value'" fragments for the invariant fields (the values // come from Config are effectively immutable post-init in production). // Note about the visibility: needs to be visible but can tolerate races (reason why it's not @@ -40,18 +53,19 @@ public class SharedDBCommenter { private static volatile boolean staticPrefixComputed = false; private static volatile String staticPrefix; - // Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection - // Note: this should be "better" done and avoid this bunch of string contains/concatenation + // Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection. + // Note: the contains-chain could still be done "better" (a single scan), but the per-call + // "KEY + =" concatenation -- the allocating part -- is now hoisted to the *_EQ constants above. public static boolean containsTraceComment(String commentContent) { - return commentContent.contains(PARENT_SERVICE + "=") - || commentContent.contains(DATABASE_SERVICE + "=") - || commentContent.contains(DD_HOSTNAME + "=") - || commentContent.contains(DD_DB_NAME + "=") - || commentContent.contains(DD_PEER_SERVICE + "=") - || commentContent.contains(DD_ENV + "=") - || commentContent.contains(DD_VERSION + "=") - || commentContent.contains(TRACEPARENT + "=") - || commentContent.contains(DD_SERVICE_HASH + "="); + return commentContent.contains(PARENT_SERVICE_EQ) + || commentContent.contains(DATABASE_SERVICE_EQ) + || commentContent.contains(DD_HOSTNAME_EQ) + || commentContent.contains(DD_DB_NAME_EQ) + || commentContent.contains(DD_PEER_SERVICE_EQ) + || commentContent.contains(DD_ENV_EQ) + || commentContent.contains(DD_VERSION_EQ) + || commentContent.contains(TRACEPARENT_EQ) + || commentContent.contains(DD_SERVICE_HASH_EQ); } // Build database comment content without comment delimiters such as /* */ From 046ed916b242399c3cae70393d129889ac5cd173 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 15:58:08 -0400 Subject: [PATCH 18/22] Add SharedDBCommenterBenchmark for containsTraceComment Throughput benchmark at @Threads(8) over a realistic comment-content mix (mostly non-DD, the all-nine-checks worst case) to surface the per-call allocation removed by hoisting the "=" needles to constants. Run the parent commit (baseline) vs this branch and read the ops/s delta; -prof gc (gc.alloc.rate.norm) corroborates the mechanism. Co-Authored-By: Claude Opus 4.8 --- .../dbm/SharedDBCommenterBenchmark.java | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterBenchmark.java diff --git a/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterBenchmark.java b/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterBenchmark.java new file mode 100644 index 00000000000..20276db1412 --- /dev/null +++ b/dd-java-agent/agent-bootstrap/src/jmh/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterBenchmark.java @@ -0,0 +1,86 @@ +package datadog.trace.bootstrap.instrumentation.dbm; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Benchmark for {@link SharedDBCommenter#containsTraceComment(String)} — the per-query check run + * during inject (via {@code hasDDComment}) to avoid double-commenting an already-tagged statement. + * + *

What we're measuring. {@code containsTraceComment} currently does {@code + * commentContent.contains(KEY + "=")} for nine keys. The keys are {@code static final} but assigned + * via {@code encode(...)}, so they are not compile-time constants — each {@code KEY + "="} + * is a fresh {@code StringBuilder} concat on every call. A non-matching comment runs all nine + * checks = nine throwaway Strings per call. The proposed fix precomputes nine {@code KEY_EQ} + * constants once. + * + *

How we make the win visible (our usual approach). Run at {@code @Threads(8)} so the + * allocation churn manifests as a throughput delta — GC is a shared-heap tax, so a + * single-threaded run (cheap TLAB bumps) hides it, while concurrent allocation across threads + * drives GC pauses that every thread pays. Read the ops/s delta as the headline win. Corroborate + * the mechanism with the GC profiler: {@code -prof gc} → {@code gc.alloc.rate.norm} (B/op) should + * drop by ~nine small Strings per call on the non-matching path. + * + *

Protocol. Run this on the current code (baseline), then after the {@code + * KEY_EQ}-constant fix, and compare. The input mix is mostly non-DD comments (the common case — + * they run all nine checks, the exact all-nine-concats path the fix removes); the DD comment + * short-circuits on the first check. + * + *

+ *   # agent-bootstrap has no -Pjmh.includes wiring yet (a generalization is in flight), so for now
+ *   # either run the whole module (only a handful of benchmarks) ...
+ *   ./gradlew :dd-java-agent:agent-bootstrap:jmh
+ *   # ... or hack a temporary filter into agent-bootstrap/build.gradle: jmh { includes = ['SharedDBCommenter.*'] }
+ *   # add -prof gc (gc.alloc.rate.norm) to corroborate the allocation delta.
+ * 
+ * + *

Results (JDK 25, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof + * gc}): + * + *

+ *                    throughput            gc.alloc.rate.norm
+ *   before (concat)  29.7M ± 1.7M ops/s    156 B/op
+ *   after  (*_EQ)    55.4M ± 2.5M ops/s    ~0  B/op  (10^-5)
+ * 
+ * + * Removing the per-call concatenation drops allocation to ~0 and lifts throughput ~1.9x at + * {@code @Threads(8)} — the allocation win surfacing as throughput, exactly as intended; {@code + * -prof gc} confirms the mechanism (156 -> 0 B/op). + */ +@Fork(5) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +@Threads(8) +public class SharedDBCommenterBenchmark { + + // Inner comment content (the surrounding "/*" "*/" already stripped by extractCommentContent), + // as a realistic mix: most queries carry a non-DD comment (or none); some already have ours. + static final String[] COMMENT_CONTENTS = { + "app generated comment", // non-DD -> all 9 contains checks (9 concats) + "route='/api/v1/users',batch=true", // non-DD + "framework='hibernate',layer='orm'", // non-DD + "ddps='web',dddbs='orders',traceparent='00-abc-def-01'", // DD -> short-circuits on 1st check + }; + + /** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */ + @State(Scope.Thread) + public static class Cursor { + int index = 0; + + String next() { + int i = index; + index = (i + 1) % COMMENT_CONTENTS.length; + return COMMENT_CONTENTS[i]; + } + } + + @Benchmark + public boolean containsTraceComment(Cursor cursor) { + return SharedDBCommenter.containsTraceComment(cursor.next()); + } +} From 24800461ef03cd2e92f9c3849ffee1f918497b78 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 18:33:15 -0400 Subject: [PATCH 19/22] Check the DBM comment region in place, dropping the extractCommentContent substring SQLCommenter.hasDDComment materialized sql.substring(commentStart, commentEnd) on every duplicate-comment check just to feed containsTraceComment. Add a SharedDBCommenter.containsTraceComment(sql, from, to) range overload that scans the comment body in place, and a Strings.regionContains primitive it (and the String delegate) build on -- no per-call substring. The String overload now delegates to the range form, so Mongo behavior is unchanged. regionContains is the allocation-free, copy-free primitive; the natural-reading SubSequence.contains layer can delegate to it later. Boundary semantics unit- tested on Strings.regionContains; DB needle behavior on the overloads; existing SQLCommenter/SharedDBCommenter suites unchanged and green. Co-Authored-By: Claude Opus 4.8 --- .../dbm/SharedDBCommenter.java | 33 +++++++----- ...edDBCommenterContainsTraceCommentTest.java | 48 +++++++++++++++++ .../instrumentation/jdbc/SQLCommenter.java | 10 ++-- .../main/java/datadog/trace/util/Strings.java | 13 +++++ .../trace/util/StringsRegionContainsTest.java | 53 +++++++++++++++++++ 5 files changed, 138 insertions(+), 19 deletions(-) create mode 100644 dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java create mode 100644 internal-api/src/test/java/datadog/trace/util/StringsRegionContainsTest.java diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java index 610ab9ca31f..12cd68ccacf 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java @@ -1,6 +1,7 @@ package datadog.trace.bootstrap.instrumentation.dbm; import static datadog.trace.bootstrap.instrumentation.api.AgentTracer.activeSpan; +import static datadog.trace.util.Strings.regionContains; import datadog.trace.api.BaseHash; import datadog.trace.api.Config; @@ -53,19 +54,27 @@ public class SharedDBCommenter { private static volatile boolean staticPrefixComputed = false; private static volatile String staticPrefix; - // Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection. - // Note: the contains-chain could still be done "better" (a single scan), but the per-call - // "KEY + =" concatenation -- the allocating part -- is now hoisted to the *_EQ constants above. + // Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection. Mongo passes + // the already-extracted comment body; SQLCommenter uses the range overload to check the comment in + // place. Both run the same nine "=" needle checks. public static boolean containsTraceComment(String commentContent) { - return commentContent.contains(PARENT_SERVICE_EQ) - || commentContent.contains(DATABASE_SERVICE_EQ) - || commentContent.contains(DD_HOSTNAME_EQ) - || commentContent.contains(DD_DB_NAME_EQ) - || commentContent.contains(DD_PEER_SERVICE_EQ) - || commentContent.contains(DD_ENV_EQ) - || commentContent.contains(DD_VERSION_EQ) - || commentContent.contains(TRACEPARENT_EQ) - || commentContent.contains(DD_SERVICE_HASH_EQ); + return containsTraceComment(commentContent, 0, commentContent.length()); + } + + /** + * Range overload: true if {@code sql} contains a trace-comment needle fully within {@code [from, + * to)} -- checks the comment body in place, with no substring allocation of the region. + */ + public static boolean containsTraceComment(String sql, int from, int to) { + return regionContains(sql, from, to, PARENT_SERVICE_EQ) + || regionContains(sql, from, to, DATABASE_SERVICE_EQ) + || regionContains(sql, from, to, DD_HOSTNAME_EQ) + || regionContains(sql, from, to, DD_DB_NAME_EQ) + || regionContains(sql, from, to, DD_PEER_SERVICE_EQ) + || regionContains(sql, from, to, DD_ENV_EQ) + || regionContains(sql, from, to, DD_VERSION_EQ) + || regionContains(sql, from, to, TRACEPARENT_EQ) + || regionContains(sql, from, to, DD_SERVICE_HASH_EQ); } // Build database comment content without comment delimiters such as /* */ diff --git a/dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java b/dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java new file mode 100644 index 00000000000..35256b29d2c --- /dev/null +++ b/dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java @@ -0,0 +1,48 @@ +package datadog.trace.bootstrap.instrumentation.dbm; + +import static datadog.trace.bootstrap.instrumentation.dbm.SharedDBCommenter.containsTraceComment; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +/** + * DB-level behavior of {@link SharedDBCommenter#containsTraceComment}: the nine "=" needle set, + * the {@code String} delegate, and the range overload checking the comment body in place. (The + * {@code [from, to)} boundary semantics are unit-tested on {@code Strings.regionContains}.) + */ +class SharedDBCommenterContainsTraceCommentTest { + + @Test + void delegate_wholeString() { + assertTrue(containsTraceComment("ddps='svc',dde='test'")); + assertFalse(containsTraceComment("just a plain comment")); + assertFalse(containsTraceComment("")); + } + + @Test + void range_needleInsideCommentBody() { + String sql = "SELECT 1 /*ddps='svc',dde='test'*/"; + int from = sql.indexOf("/*") + 2; + int to = sql.indexOf("*/"); + assertTrue(containsTraceComment(sql, from, to)); + } + + @Test + void range_nonDdCommentBody() { + String sql = "SELECT 1 /* just a customer comment */"; + int from = sql.indexOf("/*") + 2; + int to = sql.indexOf("*/"); + assertFalse(containsTraceComment(sql, from, to)); + } + + @Test + void range_ddNeedleOutsideCommentRegionNotMatched() { + // The DD needle sits in the statement body, not the comment region we pass -- a whole-string + // contains would false-positive; the range check must scope to [from, to). + String sql = "ddps='x' /* clean */"; + int from = sql.indexOf("/*") + 2; + int to = sql.indexOf("*/"); + assertFalse(containsTraceComment(sql, from, to)); + } +} diff --git a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java index 3171550aba3..9c8e81c3036 100644 --- a/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java +++ b/dd-java-agent/instrumentation/jdbc/src/main/java/datadog/trace/instrumentation/jdbc/SQLCommenter.java @@ -112,11 +112,6 @@ private static boolean hasDDComment(String sql, boolean appendComment) { return false; } - String commentContent = extractCommentContent(sql, appendComment); - return SharedDBCommenter.containsTraceComment(commentContent); - } - - private static String extractCommentContent(String sql, boolean appendComment) { int startIdx; int endIdx; if (appendComment) { @@ -127,9 +122,10 @@ private static String extractCommentContent(String sql, boolean appendComment) { endIdx = sql.indexOf(CLOSE_COMMENT); } if (startIdx != -1 && endIdx != -1 && endIdx > startIdx) { - return sql.substring(startIdx + OPEN_COMMENT_LEN, endIdx); + // Check the comment body in place -- no substring of the comment region. + return SharedDBCommenter.containsTraceComment(sql, startIdx + OPEN_COMMENT_LEN, endIdx); } - return ""; + return false; } /** diff --git a/internal-api/src/main/java/datadog/trace/util/Strings.java b/internal-api/src/main/java/datadog/trace/util/Strings.java index adf54c90fb2..e1a6ec3f24c 100644 --- a/internal-api/src/main/java/datadog/trace/util/Strings.java +++ b/internal-api/src/main/java/datadog/trace/util/Strings.java @@ -193,4 +193,17 @@ public static String coalesce(@Nullable final String first, @Nullable final Stri return null; } } + + /** + * True if {@code needle} occurs fully within {@code s[beginIndex, endIndex)} -- a range-limited, + * allocation-free alternative to {@code s.substring(beginIndex, endIndex).contains(needle)}. + * + *

{@code indexOf} returns the earliest occurrence at or after {@code beginIndex}; if that one + * overshoots {@code endIndex} there is no earlier full occurrence in range, so the bound check is + * exact. + */ + public static boolean regionContains(String s, int beginIndex, int endIndex, String needle) { + int idx = s.indexOf(needle, beginIndex); + return idx >= 0 && idx + needle.length() <= endIndex; + } } diff --git a/internal-api/src/test/java/datadog/trace/util/StringsRegionContainsTest.java b/internal-api/src/test/java/datadog/trace/util/StringsRegionContainsTest.java new file mode 100644 index 00000000000..115fcd79a31 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/StringsRegionContainsTest.java @@ -0,0 +1,53 @@ +package datadog.trace.util; + +import static datadog.trace.util.Strings.regionContains; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +/** Boundary semantics of {@link Strings#regionContains(String, int, int, String)}. */ +class StringsRegionContainsTest { + + // "abXYZcd": a0 b1 X2 Y3 Z4 c5 d6 -> "XYZ" spans [2,5). + private static final String S = "abXYZcd"; + + @Test + void foundFullyInside() { + assertTrue(regionContains(S, 0, S.length(), "XYZ")); + } + + @Test + void notPresent() { + assertFalse(regionContains(S, 0, S.length(), "QQ")); + } + + @Test + void exactFit() { + // idx == 2, idx + len == 5 == endIndex -> included. + assertTrue(regionContains(S, 2, 5, "XYZ")); + } + + @Test + void straddlingEndIndexExcluded() { + // endIndex == 4 cuts off the trailing 'Z' -> not fully inside. + assertFalse(regionContains(S, 2, 4, "XYZ")); + } + + @Test + void occurrenceBeforeBeginIndexExcluded() { + // beginIndex == 3 starts past the needle's first char -> no occurrence at/after beginIndex. + assertFalse(regionContains(S, 3, S.length(), "XYZ")); + } + + @Test + void emptyRegion() { + assertFalse(regionContains(S, 2, 2, "XYZ")); + } + + @Test + void matchesWholeStringContains() { + assertTrue(regionContains("hello", 0, 5, "ll")); + assertFalse(regionContains("hello", 0, 5, "z")); + } +} From e79eb974cb53b4341f05aa3655a2caa6bd253918 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 18:41:55 -0400 Subject: [PATCH 20/22] Read the comment-region check as a SubSequence view (the followable idiom) Add SubSequence.contains (delegating to the Strings.regionContains primitive) and rewrite containsTraceComment(sql, from, to) as SubSequence.of(sql, from, to).contains(...) -- a 1-to-1 substitution for what you'd idiomatically write on a substring, with no copy. EA elides the view in the transient consumption; even if it doesn't, the string copy is still avoided. Couples this branch to the SubSequence base (#10640); regionContains stays as the allocation-free primitive the view delegates to. Co-Authored-By: Claude Opus 4.8 --- .../dbm/SharedDBCommenter.java | 28 ++++++++++--------- ...edDBCommenterContainsTraceCommentTest.java | 4 +-- .../java/datadog/trace/util/SubSequence.java | 8 ++++++ .../datadog/trace/util/SubSequenceTest.java | 15 ++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java index 12cd68ccacf..ccd3bad8505 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenter.java @@ -1,13 +1,13 @@ package datadog.trace.bootstrap.instrumentation.dbm; import static datadog.trace.bootstrap.instrumentation.api.AgentTracer.activeSpan; -import static datadog.trace.util.Strings.regionContains; import datadog.trace.api.BaseHash; import datadog.trace.api.Config; import datadog.trace.api.internal.VisibleForTesting; import datadog.trace.bootstrap.instrumentation.api.AgentSpan; import datadog.trace.bootstrap.instrumentation.api.Tags; +import datadog.trace.util.SubSequence; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -54,9 +54,9 @@ public class SharedDBCommenter { private static volatile boolean staticPrefixComputed = false; private static volatile String staticPrefix; - // Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection. Mongo passes - // the already-extracted comment body; SQLCommenter uses the range overload to check the comment in - // place. Both run the same nine "=" needle checks. + // Used by SQLCommenter and MongoCommentInjector to avoid duplicate comment injection. Mongo + // passes the already-extracted comment body; SQLCommenter uses the range overload to check it + // in place. Both run the same nine "=" needle checks. public static boolean containsTraceComment(String commentContent) { return containsTraceComment(commentContent, 0, commentContent.length()); } @@ -66,15 +66,17 @@ public static boolean containsTraceComment(String commentContent) { * to)} -- checks the comment body in place, with no substring allocation of the region. */ public static boolean containsTraceComment(String sql, int from, int to) { - return regionContains(sql, from, to, PARENT_SERVICE_EQ) - || regionContains(sql, from, to, DATABASE_SERVICE_EQ) - || regionContains(sql, from, to, DD_HOSTNAME_EQ) - || regionContains(sql, from, to, DD_DB_NAME_EQ) - || regionContains(sql, from, to, DD_PEER_SERVICE_EQ) - || regionContains(sql, from, to, DD_ENV_EQ) - || regionContains(sql, from, to, DD_VERSION_EQ) - || regionContains(sql, from, to, TRACEPARENT_EQ) - || regionContains(sql, from, to, DD_SERVICE_HASH_EQ); + // Zero-copy view of the comment body; reads like ordinary String.contains, no substring. + SubSequence comment = SubSequence.of(sql, from, to); + return comment.contains(PARENT_SERVICE_EQ) + || comment.contains(DATABASE_SERVICE_EQ) + || comment.contains(DD_HOSTNAME_EQ) + || comment.contains(DD_DB_NAME_EQ) + || comment.contains(DD_PEER_SERVICE_EQ) + || comment.contains(DD_ENV_EQ) + || comment.contains(DD_VERSION_EQ) + || comment.contains(TRACEPARENT_EQ) + || comment.contains(DD_SERVICE_HASH_EQ); } // Build database comment content without comment delimiters such as /* */ diff --git a/dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java b/dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java index 35256b29d2c..8a1e8edcb99 100644 --- a/dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java +++ b/dd-java-agent/agent-bootstrap/src/test/java/datadog/trace/bootstrap/instrumentation/dbm/SharedDBCommenterContainsTraceCommentTest.java @@ -7,8 +7,8 @@ import org.junit.jupiter.api.Test; /** - * DB-level behavior of {@link SharedDBCommenter#containsTraceComment}: the nine "=" needle set, - * the {@code String} delegate, and the range overload checking the comment body in place. (The + * DB-level behavior of {@link SharedDBCommenter#containsTraceComment}: the nine "=" needle + * set, the {@code String} delegate, and the range overload checking the comment body in place. (The * {@code [from, to)} boundary semantics are unit-tested on {@code Strings.regionContains}.) */ class SharedDBCommenterContainsTraceCommentTest { diff --git a/internal-api/src/main/java/datadog/trace/util/SubSequence.java b/internal-api/src/main/java/datadog/trace/util/SubSequence.java index 3cda505d1db..8a0ade178d3 100644 --- a/internal-api/src/main/java/datadog/trace/util/SubSequence.java +++ b/internal-api/src/main/java/datadog/trace/util/SubSequence.java @@ -109,6 +109,14 @@ public final boolean equals(CharSequence that) { return true; } + /** + * True if this sub-sequence contains {@code needle} -- the zero-copy equivalent of {@code + * toString().contains(needle)}, with no substring materialized. + */ + public final boolean contains(String needle) { + return Strings.regionContains(this.str, this.beginIndex, this.endIndex, needle); + } + @Override public String toString() { String cached = this.cachedSubstr; diff --git a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java index 78d52fa6931..9796a586cd4 100644 --- a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java +++ b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java @@ -1,6 +1,7 @@ package datadog.trace.util; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -108,4 +109,18 @@ public void appendToBuilder() { subSeq.appendTo(builder1); assertEquals(expectedStr, builder1.toString()); } + + @Test + public void contains() { + // "/*ddps='svc',dde='x'*/ rest" -- the comment body "ddps='svc',dde='x'" spans [2, 20). + String s = "/*ddps='svc',dde='x'*/ rest"; + SubSequence comment = SubSequence.of(s, 2, 20); + assertTrue(comment.contains("ddps=")); + assertTrue(comment.contains("dde=")); + assertFalse(comment.contains("ddh=")); + + // View-relative: a needle present in the backing string but outside this view is not found. + SubSequence dde = SubSequence.of(s, 13, 20); // "dde='x'" + assertFalse(dde.contains("ddps=")); // ddps= is before this view's range + } } From 454802b741378f863d58fe817a825101a41151db Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 18:55:40 -0400 Subject: [PATCH 21/22] Add SQLCommenterDuplicateCommentBenchmark (substring 140 -> 0 B/op) Isolates the duplicate-comment guard (dbType=null skips the first-word scan; already-DD-commented SQL makes inject return early). The extractCommentContent substring allocated 140 B/op; the in-place range/view scan is EA-elided (~0). @Threads(8), @Fork(2), -prof gc; numbers in the class Javadoc. Throughput is flat-to-slightly-up but within @Fork(2) noise -- this path is scan-CPU-dominated, so the win is the allocation, not throughput. Co-Authored-By: Claude Opus 4.8 --- ...SQLCommenterDuplicateCommentBenchmark.java | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java diff --git a/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java new file mode 100644 index 00000000000..8a2752a7cf9 --- /dev/null +++ b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java @@ -0,0 +1,77 @@ +package datadog.trace.instrumentation.jdbc; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Benchmark for the duplicate-comment guard in {@link SQLCommenter#inject} -- the {@code + * hasDDComment} path that avoids double-commenting an already-instrumented statement. + * + *

What we're measuring. The guard used to materialize {@code sql.substring(commentStart, + * commentEnd)} (the comment body) just to scan it for trace-comment needles. (B) checks the comment + * region in place via {@code SharedDBCommenter.containsTraceComment(sql, from, to)} -- no + * substring. + * + *

Isolation. The substring only happens when the SQL already carries a comment in the + * checked position; for a DD comment {@code inject} then returns early. Passing {@code dbType=null} + * skips the first-word scan (benchmarked separately for the {@code getFirstWord} change), so over + * already-DD-commented SQL the only allocation left in {@code inject} is the substring (B) + * removes. Run at {@code @Threads(8)} with {@code -prof gc}. + * + *

+ *   ./gradlew :dd-java-agent:instrumentation:jdbc:jmh   # add -prof gc
+ * 
+ * + *

Results (JDK 25, MacBook M-series, {@code @Threads(8)}, {@code @Fork(2)}, {@code -prof + * gc}): + * + *

+ *                        throughput            gc.alloc.rate.norm
+ *   before (substring)   15.4M ± 3.1M ops/s    140 B/op
+ *   after  (range/view)  16.8M ± 1.3M ops/s    ~0  B/op  (10^-5)
+ * 
+ * + * The extractCommentContent substring (140 B/op) is gone -- the in-place range scan and the + * SubSequence view it flows through are both EA-elided. The allocation delta is exact and + * fork-stable; that's the win. Throughput is flat-to-slightly-up but not cleanly resolved at + * {@code @Fork(2)} (the "before" +-20% is bimodal) -- this path is dominated by the nine indexOf + * scans (CPU the alloc-removal doesn't touch), so the win here is the allocation, a small cut that + * compounds across comment-bearing injects, not a per-call throughput jump. + */ +@Fork(2) +@Warmup(iterations = 2) +@Measurement(iterations = 5) +@Threads(8) +public class SQLCommenterDuplicateCommentBenchmark { + + // Already-DD-commented SQL (append style, comment at the end). First needle hits at different + // depths: ddps first (cheap), traceparent-only (scans 8 before the match). + static final String[] SQL = { + "SELECT * FROM foo /*ddps='svc',dde='test',dddbs='mydb',ddh='h',dddb='n',traceparent='00-00000000000000007fffffffffffffff-000000024cb016ea-00'*/", + "SELECT * FROM bar WHERE id = 42 /*traceparent='00-00000000000000007fffffffffffffff-000000024cb016ea-01'*/", + }; + + /** Per-thread cursor so threads don't contend on a shared index under {@code @Threads(8)}. */ + @State(Scope.Thread) + public static class Cursor { + int index = 0; + + String next() { + int i = index; + index = (i + 1) % SQL.length; + return SQL[i]; + } + } + + @Benchmark + public boolean alreadyCommented(Cursor cursor) { + // dbType=null skips the first-word scan; the DD comment makes inject return early after the + // duplicate-comment check -- the path (B) optimizes. Returns the input sql (no new String). + return SQLCommenter.inject(cursor.next(), "mydb", null, "h", "n", null, true) != null; + } +} From bd0983fd1a7887a966a1ce17b19322de16fb1767 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Thu, 25 Jun 2026 07:34:26 -0400 Subject: [PATCH 22/22] Refresh SQLCommenterDuplicateCommentBenchmark results to JDK 17 @Fork(5) zulu-17 @Fork(5), -prof gc: 23.5M -> 26.2M ops/s (~1.1x), 140 -> ~0 B/op. Headline win is the allocation; @Fork(5) tightens the earlier bimodal spread. Co-Authored-By: Claude Opus 4.8 --- .../SQLCommenterDuplicateCommentBenchmark.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java index 8a2752a7cf9..9c9a8e09a39 100644 --- a/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java +++ b/dd-java-agent/instrumentation/jdbc/src/jmh/java/datadog/trace/instrumentation/jdbc/SQLCommenterDuplicateCommentBenchmark.java @@ -27,23 +27,23 @@ * ./gradlew :dd-java-agent:instrumentation:jdbc:jmh # add -prof gc * * - *

Results (JDK 25, MacBook M-series, {@code @Threads(8)}, {@code @Fork(2)}, {@code -prof + *

Results (JDK 17, MacBook M-series, {@code @Threads(8)}, {@code @Fork(5)}, {@code -prof * gc}): * *

  *                        throughput            gc.alloc.rate.norm
- *   before (substring)   15.4M ± 3.1M ops/s    140 B/op
- *   after  (range/view)  16.8M ± 1.3M ops/s    ~0  B/op  (10^-5)
+ *   before (substring)   23.5M ± 1.1M ops/s    140 B/op
+ *   after  (range/view)  26.2M ± 1.5M ops/s    ~0  B/op  (10^-5)
  * 
* * The extractCommentContent substring (140 B/op) is gone -- the in-place range scan and the * SubSequence view it flows through are both EA-elided. The allocation delta is exact and - * fork-stable; that's the win. Throughput is flat-to-slightly-up but not cleanly resolved at - * {@code @Fork(2)} (the "before" +-20% is bimodal) -- this path is dominated by the nine indexOf - * scans (CPU the alloc-removal doesn't touch), so the win here is the allocation, a small cut that - * compounds across comment-bearing injects, not a per-call throughput jump. + * fork-stable; that's the win. At {@code @Fork(5)} the spread tightens and a small throughput + * uplift (~1.1x) resolves -- but this path is dominated by the nine indexOf scans (CPU the + * alloc-removal doesn't touch), so the headline win is the allocation, a small cut that compounds + * across comment-bearing injects, not a per-call throughput jump. */ -@Fork(2) +@Fork(5) @Warmup(iterations = 2) @Measurement(iterations = 5) @Threads(8)