From 4e19e0d169b9e100a0e7e141410e47e086c8f3f2 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Wed, 17 Jun 2026 21:51:02 -0700 Subject: [PATCH 01/10] client v2 binary string initial impl --- .../ClickHouseBinaryFormatWriter.java | 4 + .../RowBinaryFormatSerializer.java | 8 + .../data_formats/RowBinaryFormatWriter.java | 10 + .../client/api/data_formats/StringValue.java | 251 ++++++++++++++++++ .../internal/AbstractBinaryFormatReader.java | 28 +- .../internal/BinaryStreamReader.java | 67 ++++- .../internal/MapBackedRecord.java | 13 + .../internal/SerializerUtils.java | 37 ++- .../api/internal/DataTypeConverter.java | 5 +- .../api/data_formats/StringValueTests.java | 224 ++++++++++++++++ .../internal/BaseReaderTests.java | 75 ++++++ .../datatypes/RowBinaryFormatWriterTest.java | 67 +++++ .../clickhouse/client/insert/InsertTests.java | 14 +- .../clickhouse/client/insert/SamplePOJO.java | 3 + 14 files changed, 793 insertions(+), 13 deletions(-) create mode 100644 client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java create mode 100644 client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java index 8494c16c3..a9a212f6a 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java @@ -84,6 +84,10 @@ public interface ClickHouseBinaryFormatWriter { void setString(int colIndex, String value); + void setString(String column, byte[] value); + + void setString(int colIndex, byte[] value); + void setDate(String column, LocalDate value); void setDate(int colIndex, LocalDate value); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java index ad8ee680a..303a5e7f4 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java @@ -126,10 +126,18 @@ public void writeString(String value) throws IOException { BinaryStreamUtils.writeString(out, value); } + public void writeString(byte[] value) throws IOException { + BinaryStreamUtils.writeString(out, value); + } + public void writeFixedString(String value, int len) throws IOException { BinaryStreamUtils.writeFixedString(out, value, len); } + public void writeFixedString(byte[] value, int len) throws IOException { + SerializerUtils.writeFixedStringBytes(out, value, len); + } + public void writeDate(ZonedDateTime value) throws IOException { SerializerUtils.writeDate(out, value, value.getZone()); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java index a487da1b9..2a2ecedd3 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java @@ -203,6 +203,16 @@ public void setString(int colIndex, String value) { setValue(colIndex, value); } + @Override + public void setString(String column, byte[] value) { + setValue(column, value); + } + + @Override + public void setString(int colIndex, byte[] value) { + setValue(colIndex, value); + } + @Override public void setDate(String column, LocalDate value) { setValue(column, value); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java new file mode 100644 index 000000000..8a78455b2 --- /dev/null +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java @@ -0,0 +1,251 @@ +package com.clickhouse.client.api.data_formats; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * Holder for a ClickHouse {@code String} (or {@code FixedString}) value that keeps the original bytes + * as they were received from the server instead of eagerly decoding them into a {@link String}. + *

+ * ClickHouse {@code String} columns are arbitrary byte sequences and are not guaranteed to be valid + * text in any particular encoding (for example a {@code String} may store a hash, a serialized blob or + * text in a non UTF-8 charset). Decoding such values as UTF-8 is lossy. This class preserves the raw + * bytes so that: + *

+ *

+ * The value is backed by a {@link ByteBuffer} which exposes a richer API to callers and allows the + * implementation to use direct (off-heap) memory in the future without changing this contract. + * Instances are immutable: the backing buffer is never mutated and callers receive read-only views or + * copies. The {@link String} produced by {@link #asString()} is cached so repeated access (for example + * inside a row loop) does not allocate a new object every time. + */ +public final class StringValue { + + /** Charset used by {@link #asString()} and {@link #toString()} when no charset is provided. */ + public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; + + private final ByteBuffer buffer; + + private final Charset defaultCharset; + + private volatile String cached; + + /** + * Creates a value backed by the given bytes. The array is wrapped, not copied, so it must not be + * modified after being passed in. + * + * @param bytes raw value bytes (not null) + */ + public StringValue(byte[] bytes) { + this(ByteBuffer.wrap(bytes), DEFAULT_CHARSET); + } + + /** + * Creates a value backed by the given bytes using the provided default charset. The array is wrapped, + * not copied, so it must not be modified after being passed in. + * + * @param bytes raw value bytes (not null) + * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + */ + public StringValue(byte[] bytes, Charset defaultCharset) { + this(ByteBuffer.wrap(bytes), defaultCharset); + } + + /** + * Creates a value backed by a region of the given array. The array is referenced, not copied. + * + * @param bytes raw value bytes (not null) + * @param offset start offset in the array + * @param length number of bytes + */ + public StringValue(byte[] bytes, int offset, int length) { + this(ByteBuffer.wrap(bytes, offset, length), DEFAULT_CHARSET); + } + + /** + * Creates a value backed by the remaining content of the given buffer. + * + * @param buffer backing buffer (not null); its remaining bytes define the value + */ + public StringValue(ByteBuffer buffer) { + this(buffer, DEFAULT_CHARSET); + } + + /** + * Creates a value backed by the remaining content of the given buffer using the provided default charset. + * + * @param buffer backing buffer (not null); its remaining bytes define the value + * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + */ + public StringValue(ByteBuffer buffer, Charset defaultCharset) { + if (buffer == null) { + throw new NullPointerException("buffer is null"); + } + if (defaultCharset == null) { + throw new NullPointerException("defaultCharset is null"); + } + // Keep an independent view so external position/limit changes do not affect this value. + this.buffer = buffer.slice(); + this.defaultCharset = defaultCharset; + } + + /** + * Creates a value from a Java string encoded with UTF-8. + * + * @param value source string (not null) + * @return new value + */ + public static StringValue of(String value) { + return of(value, DEFAULT_CHARSET); + } + + /** + * Creates a value from a Java string encoded with the given charset. + * + * @param value source string (not null) + * @param charset charset used to encode the string (not null) + * @return new value + */ + public static StringValue of(String value, Charset charset) { + StringValue sv = new StringValue(value.getBytes(charset), charset); + if (charset.equals(DEFAULT_CHARSET)) { + sv.cached = value; + } + return sv; + } + + /** + * Creates a value from the given bytes. The array is wrapped, not copied. + * + * @param bytes raw value bytes (not null) + * @return new value + */ + public static StringValue of(byte[] bytes) { + return new StringValue(bytes); + } + + /** + * Returns a read-only view over the raw bytes of this value. The returned buffer is independent + * (its own position/limit) and shares no mutable state with this value. + * + * @return read-only buffer positioned at the first byte of the value + */ + public ByteBuffer asByteBuffer() { + return buffer.asReadOnlyBuffer(); + } + + /** + * Returns a fresh copy of the raw bytes of this value. + * + * @return new byte array with the value bytes + */ + public byte[] toByteArray() { + ByteBuffer view = buffer.duplicate(); + if (view.hasArray()) { + int start = view.arrayOffset() + view.position(); + return Arrays.copyOfRange(view.array(), start, start + view.remaining()); + } + byte[] out = new byte[view.remaining()]; + view.get(out); + return out; + } + + /** + * @return number of bytes in this value + */ + public int size() { + return buffer.remaining(); + } + + /** + * @return {@code true} if the value has no bytes + */ + public boolean isEmpty() { + return buffer.remaining() == 0; + } + + /** + * Decodes the value using the default charset (UTF-8 unless another was provided at construction). + * The result is cached so repeated calls do not allocate a new string. + * + * @return decoded string + */ + public String asString() { + String s = cached; + if (s == null) { + s = decode(defaultCharset); + cached = s; + } + return s; + } + + /** + * Decodes the value using the given charset. The result is cached only when the charset matches the + * default charset of this value. + * + * @param charset charset to decode with (not null) + * @return decoded string + */ + public String asString(Charset charset) { + if (charset == null) { + throw new NullPointerException("charset is null"); + } + if (charset.equals(defaultCharset)) { + return asString(); + } + return decode(charset); + } + + /** + * Returns a stream over the raw bytes of this value. Useful for JDBC binary/ascii stream access. + * + * @return input stream over the value bytes + */ + public InputStream asInputStream() { + ByteBuffer view = buffer.duplicate(); + if (view.hasArray()) { + int start = view.arrayOffset() + view.position(); + return new ByteArrayInputStream(view.array(), start, view.remaining()); + } + return new ByteArrayInputStream(toByteArray()); + } + + private String decode(Charset charset) { + ByteBuffer view = buffer.duplicate(); + if (view.hasArray()) { + return new String(view.array(), view.arrayOffset() + view.position(), view.remaining(), charset); + } + byte[] tmp = new byte[view.remaining()]; + view.get(tmp); + return new String(tmp, charset); + } + + @Override + public String toString() { + return asString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof StringValue)) { + return false; + } + return buffer.equals(((StringValue) o).buffer); + } + + @Override + public int hashCode() { + return buffer.hashCode(); + } +} diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java index e5892748d..54b907749 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java @@ -4,6 +4,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; import com.clickhouse.client.api.internal.MapUtils; import com.clickhouse.client.api.internal.ServerSettings; @@ -532,8 +533,9 @@ private T getPrimitiveArray(int index, Class componentType) { } return (T)array; } else if (componentType == byte.class) { - if (value instanceof String) { - return (T) ((String) value).getBytes(StandardCharsets.UTF_8); + byte[] bytes = stringLikeToBytes(value); + if (bytes != null) { + return (T) bytes; } else if (value instanceof InetAddress) { return (T) ((InetAddress) value).getAddress(); } @@ -676,6 +678,24 @@ public Instant getInstant(int index) { throw new ClientException("Column of type " + column.getDataType() + " cannot be converted to Instant"); } + /** + * Converts a string-like value into its raw bytes. For a {@link StringValue} the original bytes are + * returned without re-encoding (so binary content is preserved). For a {@link String} the bytes are + * produced using UTF-8, matching the historical behaviour. Returns {@code null} when the value is not + * a string-like type so callers can fall back to other handling. + * + * @param value value to convert + * @return raw bytes or {@code null} if the value is not string-like + */ + public static byte[] stringLikeToBytes(Object value) { + if (value instanceof StringValue) { + return ((StringValue) value).toByteArray(); + } else if (value instanceof String) { + return ((String) value).getBytes(StandardCharsets.UTF_8); + } + return null; + } + static Instant objectToInstant(Object value) { if (value instanceof LocalDateTime) { LocalDateTime dateTime = (LocalDateTime) value; @@ -866,6 +886,10 @@ public String[] getStringArray(int index) { BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value; if (array.itemType == String.class) { return (String[]) array.getArray(); + } else if (array.itemType == StringValue.class) { + StringValue[] stringValues = (StringValue[]) array.getArray(); + return Arrays.stream(stringValues) + .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new); } else if (array.itemType == BinaryStreamReader.EnumValue.class) { BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray(); return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java index 8a6b76a5a..fda7a88e2 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; import com.clickhouse.data.ClickHouseEnum; @@ -55,6 +56,8 @@ public class BinaryStreamReader { private final Class arrayDefaultTypeHint; + private final boolean stringAsBinaryDefault; + private static final int SB_INIT_SIZE = 100; private ClickHouseColumn lastDataColumn = null; @@ -69,7 +72,7 @@ public class BinaryStreamReader { * @param jsonAsString - use string to serialize/deserialize JSON columns * @param typeHintMapping - what type use as hint if hint is not set or may not be known. */ - BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) { + public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) { this.log = log == null ? NOPLogger.NOP_LOGGER : log; this.timeZone = timeZone; this.input = input; @@ -78,6 +81,26 @@ public class BinaryStreamReader { this.arrayDefaultTypeHint = typeHintMapping == null || typeHintMapping.isEmpty()? NO_TYPE_HINT : typeHintMapping.get(ClickHouseDataType.Array); + this.stringAsBinaryDefault = typeHintMapping != null && + typeHintMapping.get(ClickHouseDataType.String) == StringValue.class; + } + + /** + * Decides whether a {@code String}/{@code FixedString} value should be read as a {@link StringValue} + * (preserving raw bytes) instead of a {@link String}. A per-call type hint takes precedence over the + * default type hint mapping configured for the reader. + * + * @param typeHint per-call type hint or {@code null} + * @return {@code true} when the value should be read as {@link StringValue} + */ + private boolean readStringAsBinary(Class typeHint) { + if (typeHint == StringValue.class) { + return true; + } + if (typeHint == String.class) { + return false; + } + return stringAsBinaryDefault; } /** @@ -121,12 +144,18 @@ public T readValue(ClickHouseColumn column, Class typeHint) throws IOExce switch (dataType) { // Primitives case FixedString: { + if (readStringAsBinary(typeHint)) { + return (T) new StringValue(readStringBytes(input, precision)); + } byte[] bytes = precision > STRING_BUFF.length ? new byte[precision] : STRING_BUFF; readNBytes(input, bytes, 0, precision); return (T) new String(bytes, 0, precision, StandardCharsets.UTF_8); } case String: { + if (readStringAsBinary(typeHint)) { + return (T) readStringValue(); + } return (T) readString(); } case Int8: @@ -1119,17 +1148,41 @@ public String readString() throws IOException { } /** - * Reads a decimal value from input stream. + * Reads a string from the internal input stream preserving the raw bytes as a {@link StringValue}. + * Unlike {@link #readString()} this does not decode bytes into a {@link String} and never reuses the + * shared buffer, so the value is safe to keep after the next read. + * + * @return string value holding the raw bytes + * @throws IOException when IO error occurs + */ + public StringValue readStringValue() throws IOException { + return new StringValue(readStringBytes(input, readVarInt(input))); + } + + /** + * Reads the raw bytes of a string from the input stream given its length. + * * @param input - source of bytes - * @return String + * @param len - number of bytes to read + * @return byte[] containing the raw string bytes * @throws IOException when IO error occurs */ - public static String readString(InputStream input) throws IOException { - int len = readVarInt(input); + public static byte[] readStringBytes(InputStream input, int len) throws IOException { if (len == 0) { - return ""; + return new byte[0]; } - return new String(readNBytes(input, len), StandardCharsets.UTF_8); + return readNBytes(input, len); + } + + /** + * Reads a string value from input stream. + * @param input - source of bytes + * @return String + * @throws IOException when IO error occurs + */ + public static String readString(InputStream input) throws IOException { + byte[] bytes = readStringBytes(input, readVarInt(input)); + return bytes.length == 0 ? "" : new String(bytes, StandardCharsets.UTF_8); } public static int readByteOrEOF(InputStream input) throws IOException { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java index 75f7ea314..fe963417c 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; import com.clickhouse.client.api.metadata.NoSuchColumnException; import com.clickhouse.client.api.metadata.TableSchema; @@ -276,6 +277,14 @@ private T getPrimitiveArray(String colName) { @Override public byte[] getByteArray(String colName) { + Object value = readValue(colName); + if (value == null) { + return null; + } + byte[] bytes = AbstractBinaryFormatReader.stringLikeToBytes(value); + if (bytes != null) { + return bytes; + } return getPrimitiveArray(colName); } @@ -319,6 +328,10 @@ public String[] getStringArray(String colName) { BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value; if (array.itemType == String.class) { return (String[]) array.getArray(); + } else if (array.itemType == StringValue.class) { + StringValue[] stringValues = (StringValue[]) array.getArray(); + return Arrays.stream(stringValues) + .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new); } else if (array.itemType == BinaryStreamReader.EnumValue.class) { BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray(); return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java index cc7e91792..a4e8fb598 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.Client; import com.clickhouse.client.api.ClientException; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.serde.POJOFieldDeserializer; import com.clickhouse.data.ClickHouseAggregateFunction; import com.clickhouse.data.ClickHouseColumn; @@ -552,10 +553,22 @@ private static void serializePrimitiveData(OutputStream stream, Object value, Cl BinaryStreamUtils.writeBoolean(stream, (Boolean) value); break; case String: - BinaryStreamUtils.writeString(stream, convertToString(value)); + if (value instanceof byte[]) { + BinaryStreamUtils.writeString(stream, (byte[]) value); + } else if (value instanceof StringValue) { + BinaryStreamUtils.writeString(stream, ((StringValue) value).toByteArray()); + } else { + BinaryStreamUtils.writeString(stream, convertToString(value)); + } break; case FixedString: - BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision()); + if (value instanceof byte[]) { + writeFixedStringBytes(stream, (byte[]) value, column.getPrecision()); + } else if (value instanceof StringValue) { + writeFixedStringBytes(stream, ((StringValue) value).toByteArray(), column.getPrecision()); + } else { + BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision()); + } break; case Date: writeDate(stream, value, ZoneId.of("UTC")); // TODO: check @@ -912,6 +925,26 @@ public static String convertToString(Object value) { return java.lang.String.valueOf(value); } + /** + * Writes raw bytes as a ClickHouse {@code FixedString(length)} value. The bytes are written as-is and + * right-padded with zero bytes when shorter than {@code length}. + * + * @param stream output stream + * @param value raw bytes + * @param length fixed string length + * @throws IOException when failed to write to the stream + */ + public static void writeFixedStringBytes(OutputStream stream, byte[] value, int length) throws IOException { + if (value.length > length) { + throw new IllegalArgumentException("Value of length " + value.length + + " is longer than FixedString(" + length + ")"); + } + stream.write(value); + for (int i = value.length; i < length; i++) { + stream.write(0); + } + } + public static > Set parseEnumList(String value, Class enumType) { Set values = new HashSet<>(); for (StringTokenizer causes = new StringTokenizer(value, Client.VALUES_LIST_DELIMITER); causes.hasMoreTokens(); ) { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java index b1f6a8520..0faa2a7a8 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClickHouseException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; @@ -85,7 +86,9 @@ public String stringToString(Object bytesOrString, ClickHouseColumn column) { if (column.isArray()) { sb.append(QUOTE); } - if (bytesOrString instanceof CharSequence) { + if (bytesOrString instanceof StringValue) { + sb.append(((StringValue) bytesOrString).asString()); + } else if (bytesOrString instanceof CharSequence) { sb.append(((CharSequence) bytesOrString)); } else if (bytesOrString instanceof byte[]) { sb.append(new String((byte[]) bytesOrString)); diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java new file mode 100644 index 000000000..1d5528594 --- /dev/null +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -0,0 +1,224 @@ +package com.clickhouse.client.api.data_formats; + +import com.clickhouse.client.api.data_formats.internal.AbstractBinaryFormatReader; +import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; +import com.clickhouse.client.api.data_formats.internal.SerializerUtils; +import com.clickhouse.data.ClickHouseColumn; +import com.clickhouse.data.ClickHouseDataType; +import com.clickhouse.data.format.BinaryStreamUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.Map; +import java.util.TimeZone; + +public class StringValueTests { + + private static final Map> STRING_AS_BINARY = + Collections.singletonMap(ClickHouseDataType.String, (Class) StringValue.class); + + private static BinaryStreamReader reader(byte[] input, Map> hints) { + return new BinaryStreamReader(new ByteArrayInputStream(input), TimeZone.getTimeZone("UTC"), null, + new BinaryStreamReader.DefaultByteBufferAllocator(), false, hints); + } + + // ---- StringValue API ---- + + @Test + public void testStringValueApiBasics() { + byte[] bytes = "hello world".getBytes(StandardCharsets.UTF_8); + StringValue sv = new StringValue(bytes); + + Assert.assertEquals(sv.size(), bytes.length); + Assert.assertFalse(sv.isEmpty()); + Assert.assertEquals(sv.asString(), "hello world"); + Assert.assertEquals(sv.toString(), "hello world"); + Assert.assertEquals(sv.toByteArray(), bytes); + } + + @Test + public void testToByteArrayReturnsIndependentCopy() { + byte[] bytes = {1, 2, 3, 4}; + StringValue sv = new StringValue(bytes); + byte[] copy = sv.toByteArray(); + copy[0] = 42; + Assert.assertEquals(sv.toByteArray()[0], 1, "Mutating the returned array must not affect the value"); + } + + @Test + public void testAsByteBufferIsReadOnly() { + StringValue sv = new StringValue(new byte[]{1, 2, 3}); + ByteBuffer buffer = sv.asByteBuffer(); + Assert.assertTrue(buffer.isReadOnly()); + Assert.assertEquals(buffer.remaining(), 3); + } + + @Test + public void testAsStringIsCached() { + StringValue sv = new StringValue("cached".getBytes(StandardCharsets.UTF_8)); + String first = sv.asString(); + String second = sv.asString(); + Assert.assertSame(first, second, "asString() should cache and return the same instance"); + } + + @Test + public void testAsStringWithCharset() { + String original = "Привет, мир"; + StringValue sv = new StringValue(original.getBytes(StandardCharsets.UTF_16)); + Assert.assertEquals(sv.asString(StandardCharsets.UTF_16), original); + } + + @Test + public void testAsInputStream() throws IOException { + byte[] bytes = {(byte) 0x00, (byte) 0xFF, (byte) 0x10, (byte) 0x7F}; + StringValue sv = new StringValue(bytes); + try (InputStream is = sv.asInputStream()) { + byte[] read = new byte[bytes.length]; + int n = is.read(read); + Assert.assertEquals(n, bytes.length); + Assert.assertEquals(read, bytes); + } + } + + @Test + public void testEqualsAndHashCode() { + StringValue a = StringValue.of("abc"); + StringValue b = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); + StringValue c = StringValue.of("abd"); + Assert.assertEquals(a, b); + Assert.assertEquals(a.hashCode(), b.hashCode()); + Assert.assertNotEquals(a, c); + } + + @Test + public void testOfStringCachesValue() { + StringValue sv = StringValue.of("preset"); + Assert.assertSame(sv.asString(), sv.asString()); + Assert.assertEquals(sv.asString(), "preset"); + } + + @Test + public void testEmptyValue() { + StringValue sv = new StringValue(new byte[0]); + Assert.assertTrue(sv.isEmpty()); + Assert.assertEquals(sv.size(), 0); + Assert.assertEquals(sv.asString(), ""); + Assert.assertEquals(sv.toByteArray().length, 0); + } + + // ---- Reading String columns as StringValue ---- + + @DataProvider(name = "charsetStrings") + private Object[][] charsetStrings() { + return new Object[][]{ + {"plain ascii", StandardCharsets.UTF_8}, + {"unicode: Привет 你好 🚀", StandardCharsets.UTF_8}, + {"latin1 café", StandardCharsets.ISO_8859_1}, + {"utf16 текст", StandardCharsets.UTF_16}, + {" leading and trailing ", StandardCharsets.UTF_8}, + {"", StandardCharsets.UTF_8}, + }; + } + + @Test(dataProvider = "charsetStrings") + public void testReadStringAsStringValuePreservesBytes(String value, Charset charset) throws IOException { + byte[] encoded = value.getBytes(charset); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, encoded); // binary string write (raw bytes) + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof StringValue, "Expected StringValue but got " + read.getClass()); + StringValue sv = (StringValue) read; + Assert.assertEquals(sv.toByteArray(), encoded, "Raw bytes must be preserved"); + Assert.assertEquals(sv.asString(charset), value, "Decoding with the source charset must round-trip"); + } + + @Test + public void testReadBinaryNonUtf8IsPreserved() throws IOException { + // Bytes that are not valid UTF-8 (e.g. a binary hash). Decoding as UTF-8 would be lossy. + byte[] binary = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, + (byte) 0xFF, (byte) 0x00, (byte) 0x80, (byte) 0xC0, (byte) 0xFE}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, binary); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + StringValue sv = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertEquals(sv.toByteArray(), binary, "Binary content must be preserved exactly"); + Assert.assertEquals(AbstractBinaryFormatReader.stringLikeToBytes(sv), binary, + "Shared string->bytes conversion must preserve binary content"); + } + + @Test + public void testFixedStringAsStringValue() throws IOException { + byte[] binary = new byte[]{(byte) 0x01, (byte) 0xFF, (byte) 0x00, (byte) 0x10, (byte) 0x80}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(binary); // FixedString(5) is written as exactly 5 raw bytes + + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(5)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof StringValue); + Assert.assertEquals(((StringValue) read).toByteArray(), binary); + } + + @Test + public void testDefaultBehaviorReturnsString() throws IOException { + byte[] encoded = "still a string".getBytes(StandardCharsets.UTF_8); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, encoded); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + Object read = reader(baos.toByteArray(), AbstractBinaryFormatReader.NO_TYPE_HINT_MAPPING).readValue(column); + + Assert.assertTrue(read instanceof String, "Without a type hint Strings must still be returned as String"); + Assert.assertEquals(read, "still a string"); + } + + // ---- Writing binary String values ---- + + @Test + public void testWriteByteArrayToStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0xAB, (byte) 0xCD, (byte) 0x7F}; + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, binary, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriteStringValueToStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0x10, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, (byte) 0x00}; + StringValue value = new StringValue(binary); + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, value, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriteByteArrayToFixedStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC}; + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(3)"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, binary, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } +} diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index b3e9f0676..7cd08d957 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -7,6 +7,7 @@ import com.clickhouse.client.api.Client; import com.clickhouse.client.api.command.CommandSettings; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.enums.Protocol; import com.clickhouse.client.api.query.GenericRecord; import com.clickhouse.client.api.query.QueryResponse; @@ -572,4 +573,78 @@ private Client.Builder newClient() { .setPassword(ClickHouseServerForTest.getPassword()); } + @Test(groups = {"integration"}) + public void testReadingStringValue() throws Exception { + final String table = "test_reading_stringvalue"; + + client.execute("DROP TABLE IF EXISTS " + table).get(); + client.execute("CREATE TABLE " + table + " (id Int32, s String, fs FixedString(5), e FixedString(1)) ENGINE = Memory").get(); + client.execute("INSERT INTO " + table + " VALUES (1, 'hello', 'world', 'a'), (2, 'ClickHouse', 'Rocks', 'b')").get(); + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(ClickHouseDataType.String, StringValue.class); + typeHints.put(ClickHouseDataType.FixedString, StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + try { + try (QueryResponse response = customClient.query("SELECT * FROM " + table + " ORDER BY id").get()) { + ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response); + + // Test reading multiple strings in a row and check that their content differs + Assert.assertNotNull(reader.next()); + Assert.assertEquals(reader.getInteger("id"), 1); + StringValue s1 = (StringValue) reader.readValue("s"); + StringValue fs1 = (StringValue) reader.readValue("fs"); + StringValue e1 = (StringValue) reader.readValue("e"); + + Assert.assertEquals(s1.asString(), "hello"); + Assert.assertEquals(fs1.asString(), "world"); + Assert.assertEquals(e1.asString(), "a"); + + // Test getting read value multiple times + Assert.assertSame(s1, reader.readValue("s"), "Consecutive reads for the same row should return the same instance or equal value"); + Assert.assertEquals(reader.getString("s"), "hello"); + // Test reading byte[] from String columns + Assert.assertEquals(reader.getByteArray("s"), "hello".getBytes()); + Assert.assertEquals(reader.getByteArray("fs"), "world".getBytes()); + Assert.assertEquals(reader.getByteArray("e"), "a".getBytes()); + + Assert.assertNotNull(reader.next()); + Assert.assertEquals(reader.getInteger("id"), 2); + StringValue s2 = (StringValue) reader.readValue("s"); + StringValue fs2 = (StringValue) reader.readValue("fs"); + StringValue e2 = (StringValue) reader.readValue("e"); + + Assert.assertEquals(s2.asString(), "ClickHouse"); + Assert.assertEquals(fs2.asString(), "Rocks"); + Assert.assertEquals(e2.asString(), "b"); + + Assert.assertNotEquals(s1.asString(), s2.asString()); + Assert.assertNotEquals(fs1.asString(), fs2.asString()); + } + + // test queryAll with string value + List records = customClient.queryAll("SELECT * FROM " + table + " ORDER BY id"); + Assert.assertEquals(records.size(), 2); + + Assert.assertEquals(records.get(0).getInteger("id"), 1); + Assert.assertEquals(records.get(0).getString("s"), "hello"); + Assert.assertEquals(records.get(0).getString("fs"), "world"); + Assert.assertEquals(records.get(0).getByteArray("s"), "hello".getBytes()); + Assert.assertEquals(records.get(0).getByteArray("fs"), "world".getBytes()); + Assert.assertEquals(records.get(0).getByteArray("e"), "a".getBytes()); + + Assert.assertEquals(records.get(1).getInteger("id"), 2); + Assert.assertEquals(records.get(1).getString("s"), "ClickHouse"); + Assert.assertEquals(records.get(1).getString("fs"), "Rocks"); + Assert.assertEquals(records.get(1).getByteArray("s"), "ClickHouse".getBytes()); + Assert.assertEquals(records.get(1).getByteArray("fs"), "Rocks".getBytes()); + Assert.assertEquals(records.get(1).getByteArray("e"), "b".getBytes()); + } finally { + customClient.close(); + } + } } \ No newline at end of file diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index 1a0ee2287..e36f8a480 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -158,6 +158,11 @@ private static void assertEqualsKinda(Object actual, Object expected) { expected = ((BigDecimal) expected).stripTrailingZeros(); } + if (actual instanceof byte[] && expected instanceof byte[]) { + org.testng.Assert.assertEquals((byte[]) actual, (byte[]) expected); + return; + } + assertEquals(String.valueOf(actual), String.valueOf(expected)); } @@ -376,6 +381,68 @@ public void writeStringsTest() throws Exception { writeTest(tableName, tableCreate, rows); } + @Test (groups = { "integration" }) + public void writeBinaryStringsTest() throws Exception { + String tableName = "rowBinaryFormatWriterTest_writeBinaryStringsTests_" + UUID.randomUUID().toString().replace('-', '_'); + String tableCreate = "CREATE TABLE \"" + tableName + "\" " + + " (id Int32, " + + " string String, " + + " fixed_string FixedString(5), " + + " fixed_string_one FixedString(1) " + + " ) Engine = MergeTree ORDER BY id"; + + byte[] binaryData = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, (byte) 0x00, (byte) 0xFF, (byte) 0x80}; + byte[] fixedStringData = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC, (byte) 0xDD, (byte) 0xEE}; + byte[] fixedStringOneData = new byte[]{(byte) 0x7F}; + + // Instead of writeTest which reads back using default string decoding, we write manually + // and query back using typeHintMapping to preserve raw bytes + initTable(tableName, tableCreate, new CommandSettings()); + TableSchema schema = client.getTableSchema(tableName); + + ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults; + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 1); + w.setValue(schema.nameToColumnIndex("string"), binaryData); + w.setValue(schema.nameToColumnIndex("fixed_string"), fixedStringData); + w.setValue(schema.nameToColumnIndex("fixed_string_one"), fixedStringOneData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Rows written (Field-like): " + response.getWrittenRows()); + } + + // Also test inserting with byte[] directly via RowBinaryFormatWriter + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 2); + w.setString("string", binaryData); + w.setString("fixed_string", fixedStringData); + w.setString("fixed_string_one", fixedStringOneData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Rows written (manual): " + response.getWrittenRows()); + } + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class); + typeHints.put(com.clickhouse.data.ClickHouseDataType.FixedString, com.clickhouse.client.api.data_formats.StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id" ); + assertEquals(records.size(), 2); + + for (GenericRecord record : records) { + org.testng.Assert.assertEquals(record.getByteArray("string"), binaryData); + org.testng.Assert.assertEquals(record.getByteArray("fixed_string"), fixedStringData); + org.testng.Assert.assertEquals(record.getByteArray("fixed_string_one"), fixedStringOneData); + } + + customClient.close(); + } @Test (groups = { "integration" }) public void writeDatetimeTests() throws Exception { diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java index 6ffdfea5e..5045e290c 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java @@ -199,7 +199,14 @@ public void insertPOJOAndReadBack() throws Exception { try (QueryResponse queryResponse = client.query("SELECT * FROM " + tableName + " LIMIT 1").get(EXECUTE_CMD_TIMEOUT, TimeUnit.SECONDS)) { - ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(queryResponse); + // To read the binaryString properly as raw bytes, we must map String to StringValue + Client readerClient = client; + if (pojo.getBinaryString() != null) { + readerClient = newClient() + .typeHintMapping(java.util.Collections.singletonMap(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class)) + .build(); + } + ClickHouseBinaryFormatReader reader = readerClient.newBinaryFormatReader(queryResponse); Assert.assertNotNull(reader.next()); Assert.assertEquals(reader.getByte("byteValue"), pojo.getByteValue()); @@ -212,12 +219,17 @@ public void insertPOJOAndReadBack() throws Exception { Assert.assertEquals(reader.getDouble("float64"), pojo.getFloat64()); Assert.assertEquals(reader.getString("string"), pojo.getString()); Assert.assertEquals(reader.getString("fixedString"), pojo.getFixedString()); + Assert.assertEquals(reader.getByteArray("binaryString"), pojo.getBinaryString()); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime").isEqual(pojo.getZonedDateTime().withNano(0))); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime64").isEqual(pojo.getZonedDateTime64())); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime").isEqual(pojo.getOffsetDateTime().withNano(0))); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime64").isEqual(pojo.getOffsetDateTime64())); Assert.assertEquals(reader.getInstant("instant"), pojo.getInstant().with(ChronoField.MICRO_OF_SECOND, 0)); Assert.assertEquals(reader.getInstant("instant64"), pojo.getInstant64()); + + if (readerClient != client) { + readerClient.close(); + } } } diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java index 6661b94bc..4d0de854e 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java @@ -63,6 +63,7 @@ public class SamplePOJO { private String string; private String fixedString; + private byte[] binaryString; private LocalDate date; private LocalDate date32; @@ -145,6 +146,7 @@ public SamplePOJO() { string = RandomStringUtils.randomAlphabetic(1, 256); fixedString = RandomStringUtils.randomAlphabetic(3); + binaryString = new byte[] { (byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF }; date = LocalDate.now(); date32 = LocalDate.now(); @@ -308,6 +310,7 @@ public static String generateTableCreateSQL(String tableName) { // "boxedBool UInt8, " + "string String, " + "fixedString FixedString(3), " + + "binaryString String, " + "date Date, " + "date32 Date, " + "dateTime DateTime, " + From 3012f42a4ff4efd2aceeba9266c2a11be62835ea Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Wed, 24 Jun 2026 00:34:07 -0700 Subject: [PATCH 02/10] Cleaned code and added tests --- .../client/api/data_formats/StringValue.java | 150 ++++------------- .../internal/BinaryStreamReader.java | 20 +-- .../internal/MapBackedRecord.java | 2 - .../api/data_formats/StringValueTests.java | 154 +++++++++++++++--- .../internal/BaseReaderTests.java | 43 +++++ .../datatypes/RowBinaryFormatWriterTest.java | 73 +++++++++ .../clickhouse/client/insert/SamplePOJO.java | 23 ++- .../src/test/resources/clickhouse-logo.png | Bin 0 -> 9874 bytes 8 files changed, 310 insertions(+), 155 deletions(-) create mode 100644 client-v2/src/test/resources/clickhouse-logo.png diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java index 8a78455b2..3ca94c923 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java @@ -1,33 +1,28 @@ package com.clickhouse.client.api.data_formats; -import java.io.ByteArrayInputStream; -import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.Arrays; +import java.util.Objects; /** - * Holder for a ClickHouse {@code String} (or {@code FixedString}) value that keeps the original bytes - * as they were received from the server instead of eagerly decoding them into a {@link String}. + * Holder for ClickHouse {@code String} or {@code FixedString} values that preserves raw bytes + * to avoid lossy decoding and unnecessary allocations. *

- * ClickHouse {@code String} columns are arbitrary byte sequences and are not guaranteed to be valid - * text in any particular encoding (for example a {@code String} may store a hash, a serialized blob or - * text in a non UTF-8 charset). Decoding such values as UTF-8 is lossy. This class preserves the raw - * bytes so that: - *

+ * This is a mutable structure and must be used with care. To avoid copying, it does not + * duplicate the bytes it is given: the constructor wraps the supplied array/buffer instead of + * copying it, and {@link #toByteArray()} returns a direct reference to the backing array rather + * than a defensive copy. Consequently, mutating the source array, the array returned by + * {@link #toByteArray()}, or reading the same value concurrently while it is being modified will + * change the observed value. Callers that need an independent snapshot must copy the bytes + * themselves. *

- * The value is backed by a {@link ByteBuffer} which exposes a richer API to callers and allows the - * implementation to use direct (off-heap) memory in the future without changing this contract. - * Instances are immutable: the backing buffer is never mutated and callers receive read-only views or - * copies. The {@link String} produced by {@link #asString()} is cached so repeated access (for example - * inside a row loop) does not allocate a new object every time. + * Backed by a {@link ByteBuffer} for a richer API and future off-heap memory support. Only heap + * buffers (with an accessible backing array) are supported today; constructing a value from a + * direct (off-heap) buffer is rejected. The decoded {@link String} produced by {@link #asString()} + * is cached. */ -public final class StringValue { +public class StringValue { /** Charset used by {@link #asString()} and {@link #toString()} when no charset is provided. */ public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; @@ -45,7 +40,7 @@ public final class StringValue { * @param bytes raw value bytes (not null) */ public StringValue(byte[] bytes) { - this(ByteBuffer.wrap(bytes), DEFAULT_CHARSET); + this(bytes, DEFAULT_CHARSET); } /** @@ -59,79 +54,27 @@ public StringValue(byte[] bytes, Charset defaultCharset) { this(ByteBuffer.wrap(bytes), defaultCharset); } - /** - * Creates a value backed by a region of the given array. The array is referenced, not copied. - * - * @param bytes raw value bytes (not null) - * @param offset start offset in the array - * @param length number of bytes - */ - public StringValue(byte[] bytes, int offset, int length) { - this(ByteBuffer.wrap(bytes, offset, length), DEFAULT_CHARSET); - } - - /** - * Creates a value backed by the remaining content of the given buffer. - * - * @param buffer backing buffer (not null); its remaining bytes define the value - */ - public StringValue(ByteBuffer buffer) { - this(buffer, DEFAULT_CHARSET); - } - /** * Creates a value backed by the remaining content of the given buffer using the provided default charset. + * The buffer is referenced, not copied, so its content must not be modified afterwards. * - * @param buffer backing buffer (not null); its remaining bytes define the value + * @param buffer backing heap buffer (not null); its remaining bytes define the value * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + * @throws IllegalArgumentException if the buffer is a direct (off-heap) buffer with no accessible array */ public StringValue(ByteBuffer buffer, Charset defaultCharset) { - if (buffer == null) { - throw new NullPointerException("buffer is null"); - } - if (defaultCharset == null) { - throw new NullPointerException("defaultCharset is null"); + Objects.requireNonNull(buffer, "buffer cannot be null"); + Objects.requireNonNull(defaultCharset, "charset is required to convert buffer to String"); + + if (!buffer.hasArray()) { + throw new IllegalArgumentException("Can work only with heap buffer."); } + // Keep an independent view so external position/limit changes do not affect this value. this.buffer = buffer.slice(); this.defaultCharset = defaultCharset; } - /** - * Creates a value from a Java string encoded with UTF-8. - * - * @param value source string (not null) - * @return new value - */ - public static StringValue of(String value) { - return of(value, DEFAULT_CHARSET); - } - - /** - * Creates a value from a Java string encoded with the given charset. - * - * @param value source string (not null) - * @param charset charset used to encode the string (not null) - * @return new value - */ - public static StringValue of(String value, Charset charset) { - StringValue sv = new StringValue(value.getBytes(charset), charset); - if (charset.equals(DEFAULT_CHARSET)) { - sv.cached = value; - } - return sv; - } - - /** - * Creates a value from the given bytes. The array is wrapped, not copied. - * - * @param bytes raw value bytes (not null) - * @return new value - */ - public static StringValue of(byte[] bytes) { - return new StringValue(bytes); - } - /** * Returns a read-only view over the raw bytes of this value. The returned buffer is independent * (its own position/limit) and shares no mutable state with this value. @@ -143,19 +86,16 @@ public ByteBuffer asByteBuffer() { } /** - * Returns a fresh copy of the raw bytes of this value. + * Returns a direct reference to the backing byte array of this value (no copy is made). + *

+ * The returned array is the live backing storage: mutating it mutates this value, and any change + * to the underlying bytes is reflected here. Callers that need an independent, immutable snapshot + * must copy the result themselves. * - * @return new byte array with the value bytes + * @return the backing array holding the value bytes */ public byte[] toByteArray() { - ByteBuffer view = buffer.duplicate(); - if (view.hasArray()) { - int start = view.arrayOffset() + view.position(); - return Arrays.copyOfRange(view.array(), start, start + view.remaining()); - } - byte[] out = new byte[view.remaining()]; - view.get(out); - return out; + return buffer.array(); } /** @@ -195,37 +135,15 @@ public String asString() { * @return decoded string */ public String asString(Charset charset) { - if (charset == null) { - throw new NullPointerException("charset is null"); - } + Objects.requireNonNull(charset, "charset cannot be null"); if (charset.equals(defaultCharset)) { return asString(); } return decode(charset); } - /** - * Returns a stream over the raw bytes of this value. Useful for JDBC binary/ascii stream access. - * - * @return input stream over the value bytes - */ - public InputStream asInputStream() { - ByteBuffer view = buffer.duplicate(); - if (view.hasArray()) { - int start = view.arrayOffset() + view.position(); - return new ByteArrayInputStream(view.array(), start, view.remaining()); - } - return new ByteArrayInputStream(toByteArray()); - } - private String decode(Charset charset) { - ByteBuffer view = buffer.duplicate(); - if (view.hasArray()) { - return new String(view.array(), view.arrayOffset() + view.position(), view.remaining(), charset); - } - byte[] tmp = new byte[view.remaining()]; - view.get(tmp); - return new String(tmp, charset); + return new String(buffer.array(), charset); } @Override diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java index fda7a88e2..6d0f19971 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java @@ -85,20 +85,14 @@ public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, Byte typeHintMapping.get(ClickHouseDataType.String) == StringValue.class; } - /** - * Decides whether a {@code String}/{@code FixedString} value should be read as a {@link StringValue} - * (preserving raw bytes) instead of a {@link String}. A per-call type hint takes precedence over the - * default type hint mapping configured for the reader. - * - * @param typeHint per-call type hint or {@code null} - * @return {@code true} when the value should be read as {@link StringValue} - */ private boolean readStringAsBinary(Class typeHint) { - if (typeHint == StringValue.class) { - return true; - } - if (typeHint == String.class) { - return false; + if (typeHint != null) { + if (typeHint == StringValue.class) { + return true; + } + if (typeHint == String.class) { + return false; + } } return stringAsBinaryDefault; } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java index fe963417c..a8708cbfb 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java @@ -4,7 +4,6 @@ import com.clickhouse.client.api.DataTypeUtils; import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; -import com.clickhouse.client.api.metadata.NoSuchColumnException; import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.GenericRecord; import com.clickhouse.client.api.query.NullValueException; @@ -14,7 +13,6 @@ import com.clickhouse.data.value.ClickHouseGeoPointValue; import com.clickhouse.data.value.ClickHouseGeoPolygonValue; import com.clickhouse.data.value.ClickHouseGeoRingValue; -import com.google.common.collect.ImmutableList; import java.math.BigDecimal; import java.math.BigInteger; diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java index 1d5528594..feecf1d9f 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -13,7 +13,6 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -46,12 +45,14 @@ public void testStringValueApiBasics() { } @Test - public void testToByteArrayReturnsIndependentCopy() { + public void testToByteArrayReturnsBackingArrayReference() { byte[] bytes = {1, 2, 3, 4}; StringValue sv = new StringValue(bytes); - byte[] copy = sv.toByteArray(); - copy[0] = 42; - Assert.assertEquals(sv.toByteArray()[0], 1, "Mutating the returned array must not affect the value"); + byte[] backing = sv.toByteArray(); + // No copy is made: the returned array is the live backing storage and mutating it mutates the value. + Assert.assertSame(backing, bytes, "toByteArray() must return the backing array without copying"); + backing[0] = 42; + Assert.assertEquals(sv.toByteArray()[0], 42, "Mutating the returned array mutates the value (no copy)"); } @Test @@ -77,33 +78,69 @@ public void testAsStringWithCharset() { Assert.assertEquals(sv.asString(StandardCharsets.UTF_16), original); } - @Test - public void testAsInputStream() throws IOException { - byte[] bytes = {(byte) 0x00, (byte) 0xFF, (byte) 0x10, (byte) 0x7F}; - StringValue sv = new StringValue(bytes); - try (InputStream is = sv.asInputStream()) { - byte[] read = new byte[bytes.length]; - int n = is.read(read); - Assert.assertEquals(n, bytes.length); - Assert.assertEquals(read, bytes); - } - } - @Test public void testEqualsAndHashCode() { - StringValue a = StringValue.of("abc"); + StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); StringValue b = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); - StringValue c = StringValue.of("abd"); + StringValue c = new StringValue("abd".getBytes(StandardCharsets.UTF_8)); + + // Reflexive + Assert.assertEquals(a, a); + // Equal content -> equal value and equal hash code Assert.assertEquals(a, b); + Assert.assertEquals(b, a, "equals must be symmetric"); Assert.assertEquals(a.hashCode(), b.hashCode()); + // Different content -> not equal Assert.assertNotEquals(a, c); } @Test - public void testOfStringCachesValue() { - StringValue sv = StringValue.of("preset"); - Assert.assertSame(sv.asString(), sv.asString()); - Assert.assertEquals(sv.asString(), "preset"); + public void testEqualsRejectsNullAndOtherTypes() { + StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); + Assert.assertFalse(a.equals(null), "A value must never equal null"); + Assert.assertFalse(a.equals("abc"), "A value must not equal a raw String of the same text"); + Assert.assertNotEquals(a, new Object()); + } + + @Test + public void testEqualsIgnoresDefaultCharset() { + // equals/hashCode are defined on the raw bytes, so the default charset must not affect them. + byte[] bytes = "abc".getBytes(StandardCharsets.UTF_8); + StringValue utf8 = new StringValue(bytes, StandardCharsets.UTF_8); + StringValue latin1 = new StringValue("abc".getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1); + Assert.assertEquals(utf8, latin1, "Values with identical bytes must be equal regardless of default charset"); + Assert.assertEquals(utf8.hashCode(), latin1.hashCode()); + } + + @Test + public void testEqualsDistinguishesByContentAndLength() { + StringValue ab = new StringValue(new byte[]{1, 2}); + StringValue abc = new StringValue(new byte[]{1, 2, 3}); + StringValue empty = new StringValue(new byte[0]); + + // Same prefix but different length must not be equal. + Assert.assertNotEquals(ab, abc); + Assert.assertNotEquals(abc, ab); + // Empty values are only equal to other empty values. + Assert.assertEquals(empty, new StringValue(new byte[0])); + Assert.assertNotEquals(empty, ab); + } + + @Test + public void testEqualsIsConsistentWithBinaryReads() throws IOException { + // Two independently read StringValues over the same bytes must compare equal. + byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80, (byte) 0x7F}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, binary); + byte[] wire = baos.toByteArray(); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + StringValue first = reader(wire, STRING_AS_BINARY).readValue(column); + StringValue second = reader(wire, STRING_AS_BINARY).readValue(column); + + Assert.assertEquals(first, second); + Assert.assertEquals(first.hashCode(), second.hashCode()); + Assert.assertEquals(first, new StringValue(binary)); } @Test @@ -173,6 +210,77 @@ public void testFixedStringAsStringValue() throws IOException { Assert.assertEquals(((StringValue) read).toByteArray(), binary); } + @Test + public void testReadStringArrayAsStringValue() throws IOException { + // Array(String) elements must be preserved as StringValue (including non-UTF-8 content). + byte[][] elements = { + "plain".getBytes(StandardCharsets.UTF_8), + "Привет".getBytes(StandardCharsets.UTF_8), + new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}, + new byte[0], + }; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(baos, elements.length); + for (byte[] element : elements) { + BinaryStreamUtils.writeString(baos, element); + } + + ClickHouseColumn column = ClickHouseColumn.of("a", "Array(String)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof BinaryStreamReader.ArrayValue, + "Expected ArrayValue but got " + read.getClass()); + BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) read; + Assert.assertEquals(array.length(), elements.length); + + Object raw = array.getArray(); + Assert.assertTrue(raw instanceof StringValue[], "Array items must be StringValue, got " + raw.getClass()); + StringValue[] values = (StringValue[]) raw; + for (int i = 0; i < elements.length; i++) { + Assert.assertEquals(values[i].toByteArray(), elements[i], "Element " + i + " bytes must be preserved"); + } + } + + @Test + public void testReadStringMapAsStringValue() throws IOException { + // Map(String, String) keys and values must be preserved as StringValue. + byte[][] keys = { + "k1".getBytes(StandardCharsets.UTF_8), + "ключ".getBytes(StandardCharsets.UTF_8), + }; + byte[][] vals = { + "v1".getBytes(StandardCharsets.UTF_8), + new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80}, + }; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(baos, keys.length); + for (int i = 0; i < keys.length; i++) { + BinaryStreamUtils.writeString(baos, keys[i]); + BinaryStreamUtils.writeString(baos, vals[i]); + } + + ClickHouseColumn column = ClickHouseColumn.of("m", "Map(String, String)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof Map, "Expected Map but got " + read.getClass()); + Map map = (Map) read; + Assert.assertEquals(map.size(), keys.length); + + int i = 0; + for (Map.Entry entry : map.entrySet()) { + Assert.assertTrue(entry.getKey() instanceof StringValue, "Map key must be a StringValue"); + Assert.assertTrue(entry.getValue() instanceof StringValue, "Map value must be a StringValue"); + Assert.assertEquals(((StringValue) entry.getKey()).toByteArray(), keys[i], "Key " + i + " bytes"); + Assert.assertEquals(((StringValue) entry.getValue()).toByteArray(), vals[i], "Value " + i + " bytes"); + i++; + } + + // Lookup by an equal StringValue key must work (relies on equals/hashCode over raw bytes). + Assert.assertEquals(((StringValue) map.get(new StringValue(keys[0]))).toByteArray(), vals[0]); + } + @Test public void testDefaultBehaviorReturnsString() throws IOException { byte[] encoded = "still a string".getBytes(StandardCharsets.UTF_8); diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index 7cd08d957..1ffdb0782 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -647,4 +647,47 @@ public void testReadingStringValue() throws Exception { customClient.close(); } } + + /** + * Regression test for https://github.com/ClickHouse/clickhouse-java/issues/1397: a String value that holds + * arbitrary binary content (here a SHA-512 hash, which is almost never valid UTF-8) must be read back byte + * for byte instead of being mangled by lossy UTF-8 decoding. + */ + @Test(groups = {"integration"}) + public void testReadingBinaryStringFromHash() throws Exception { + final String message = "abc"; + final byte[] expectedHash = java.security.MessageDigest.getInstance("SHA-512") + .digest(message.getBytes(java.nio.charset.StandardCharsets.UTF_8)); + Assert.assertEquals(expectedHash.length, 64); + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(ClickHouseDataType.String, StringValue.class); + typeHints.put(ClickHouseDataType.FixedString, StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + final String query = "SELECT SHA512('" + message + "') AS hash"; + try { + try (QueryResponse response = customClient.query(query).get()) { + ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + + StringValue hash = (StringValue) reader.readValue("hash"); + Assert.assertEquals(hash.size(), expectedHash.length); + Assert.assertEquals(hash.toByteArray(), expectedHash, + "Binary hash bytes must be preserved exactly"); + // getByteArray must agree with the raw StringValue bytes + Assert.assertEquals(reader.getByteArray("hash"), expectedHash); + } + + List records = customClient.queryAll(query); + Assert.assertEquals(records.size(), 1); + Assert.assertEquals(records.get(0).getByteArray("hash"), expectedHash, + "Binary hash read via queryAll must match the locally computed digest"); + } finally { + customClient.close(); + } + } } \ No newline at end of file diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index e36f8a480..2ed1889c9 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -444,6 +444,79 @@ public void writeBinaryStringsTest() throws Exception { customClient.close(); } + @Test (groups = { "integration" }) + public void writeAndReadImageTest() throws Exception { + // Demonstrates that large binary blobs (here a ~10KB PNG) survive a full write/read round-trip + // through a String column without being corrupted by lossy UTF-8 decoding. + byte[] imageData = readResource("clickhouse-logo.png"); + org.testng.Assert.assertTrue(imageData.length > 1024, "Expected a non-trivial binary payload"); + + String tableName = "rowBinaryFormatWriterTest_writeAndReadImageTest_" + UUID.randomUUID().toString().replace('-', '_'); + String tableCreate = "CREATE TABLE \"" + tableName + "\" " + + " (id Int32, image String) Engine = MergeTree ORDER BY id"; + + initTable(tableName, tableCreate, new CommandSettings()); + TableSchema schema = client.getTableSchema(tableName); + + ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults; + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 1); + w.setValue(schema.nameToColumnIndex("image"), imageData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Image bytes written: " + imageData.length + ", rows: " + response.getWrittenRows()); + } + + Map> typeHints = new HashMap<>(); + typeHints.put(com.clickhouse.data.ClickHouseDataType.String, + com.clickhouse.client.api.data_formats.StringValue.class); + + try (Client customClient = newClient().typeHintMapping(typeHints).build()) { + // Idiomatic path: stream rows and read the binary payload via the index-based getByteArray(int). + try (com.clickhouse.client.api.query.QueryResponse response = + customClient.query("SELECT * FROM \"" + tableName + "\" ORDER BY id").get()) { + com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader reader = + customClient.newBinaryFormatReader(response); + org.testng.Assert.assertNotNull(reader.next()); + + int imageIndex = reader.getSchema().nameToColumnIndex("image"); + byte[] streamed = reader.getByteArray(imageIndex); + org.testng.Assert.assertEquals(streamed, imageData, + "Image bytes read via getByteArray(int) must match the source exactly"); + // The name-based overload must agree with the index-based one. + org.testng.Assert.assertEquals(reader.getByteArray("image"), streamed); + } + + List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id"); + assertEquals(records.size(), 1); + + GenericRecord record = records.get(0); + // Raw bytes must be preserved exactly, regardless of how they are accessed. + org.testng.Assert.assertEquals(record.getByteArray("image"), imageData, + "Image bytes read back via getByteArray must match the source exactly"); + + com.clickhouse.client.api.data_formats.StringValue value = + (com.clickhouse.client.api.data_formats.StringValue) record.getObject("image"); + org.testng.Assert.assertEquals(value.size(), imageData.length); + org.testng.Assert.assertEquals(value.toByteArray(), imageData, + "StringValue must preserve the full binary payload"); + } + } + + private byte[] readResource(String name) throws IOException { + try (java.io.InputStream is = getClass().getClassLoader().getResourceAsStream(name)) { + org.testng.Assert.assertNotNull(is, "Test resource not found on classpath: " + name); + java.io.ByteArrayOutputStream buffer = new java.io.ByteArrayOutputStream(); + byte[] chunk = new byte[8192]; + int read; + while ((read = is.read(chunk)) != -1) { + buffer.write(chunk, 0, read); + } + return buffer.toByteArray(); + } + } + @Test (groups = { "integration" }) public void writeDatetimeTests() throws Exception { String tableName = "rowBinaryFormatWriterTest_writeDatetimeTests_" + UUID.randomUUID().toString().replace('-', '_'); diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java index 4d0de854e..920f86317 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java @@ -5,6 +5,9 @@ import lombok.Setter; import org.apache.commons.lang3.RandomStringUtils; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; import java.math.BigDecimal; import java.math.BigInteger; import java.net.Inet4Address; @@ -146,7 +149,8 @@ public SamplePOJO() { string = RandomStringUtils.randomAlphabetic(1, 256); fixedString = RandomStringUtils.randomAlphabetic(3); - binaryString = new byte[] { (byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF }; + // Use a real binary blob (a PNG image) to exercise inserting/reading large non-UTF-8 String values. + binaryString = loadClickHouseLogo(); date = LocalDate.now(); date32 = LocalDate.now(); @@ -209,6 +213,23 @@ public SamplePOJO() { keyword = "database"; } + private static byte[] loadClickHouseLogo() { + try (InputStream is = SamplePOJO.class.getClassLoader().getResourceAsStream("clickhouse-logo.png")) { + if (is == null) { + throw new IllegalStateException("Test resource not found on classpath: clickhouse-logo.png"); + } + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + byte[] chunk = new byte[8192]; + int read; + while ((read = is.read(chunk)) != -1) { + buffer.write(chunk, 0, read); + } + return buffer.toByteArray(); + } catch (IOException e) { + throw new IllegalStateException("Failed to read test resource clickhouse-logo.png", e); + } + } + @Override public String toString() { return "SamplePOJO{" + diff --git a/client-v2/src/test/resources/clickhouse-logo.png b/client-v2/src/test/resources/clickhouse-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..d68e65e1107ac8b7ab5b091164df05b8551b0120 GIT binary patch literal 9874 zcmd6N`9G9x^#3(880!#OvNeTdZ?h(*EGeNR%OGpEq3m0h`;jHd^0bgWvc{mt3?us> z$`&$&7!nm(LYAoSP0#21dVQbQ_Ye5oKX`dvbKmE>&N=V1UMKdl;du^LK~?|&#|1rY zV*pTC`2PrICiri|rBW^cj#ymKK5G(q{o4mLMebTCqvw4+9P+Mt1avXwFxM5XHpm>u zsKdw|sV#db8Q7aK=RVEl<~aHz>#iP`gZtC|NCA#Mf!Mnc$0=Q%D`$^KQoa)d)DmgN*lb-N-|5x(l=YtAl%hy8cZgR6^MHQ3!v_WJY^+Ftc^0}e{?wE- zuyi4sfFBAfGm4;U5MK<0Iq7_IYHf?aiow>Jc*6;Pl=wkLSv_5w-wl3Mo@O6wNP}Or zDrn=*d`>n7<~&VAylmfDd~X;0xXXjU!XIHgJ8`s&jC4)1d2VAL{PuP_pT2K2?IMD{ zPQDziIS{-{*vY3EA^rx@y8B%zA3EtMn(HL|RN{qkxx%>J)h;(L5bF(&?=F|L{cn0% zb|2K(&*{?|PT-hR0KfBY*|rXlqEk~-QF}{P`vjoCr znzshvwNwjU4E72JOH%~|A3M z_fS{9;#h&I%rNL;Vow3^4;A9oyFCQo6yD~}L#ZJOFdD>L0e?ML+71e=6bHQjK zbnIc`IjE3>D#3Pk!{iws5y$~iTG)HY=@nvn4zJ$VLQXcA0sk~1T4AIKdhg=tg)S&% zq)XUQdXpRjx}1e^4S8)m-^$4DC(ucZ7%Z$STGF1Xcu^Jn5Mh-=6#D&k_JKW2Q4FS7 z|No(Q7-SusF=fAUrQHjDD;2Na96(Bt>qR*Jg}RHno2obsxIY1V3sA~I-`Og_FbA;N zSZ15uJAh8|voPZ1TIMD?bou}bI+%hnz@fSe0rP*}vdn0RYWe4Cl$%ttV;SfdKv*H# zf@xjH;yhtjiF_)M3iQN6YY!C3k(m~N|8x5L-0Yb1-3Dx`SAJz}jTv{VL0k|DSmmSf z5iL{W&D{sroITY3!>5kdB0HF!6S8PF$QfAAefwfOxTfTXokNQ?)fzHGY%oA`WZTxC zq?-vabdjDh6`)CGBLifJC`oVYgZ2LKTdw~(4Id-jHlrOYDH(;E9aa-@krZVX!OR z|B+}XvO+6LRiKeYtMmwl=%zoMx^oLLCNQ!bxT9PnjGIaS4^)mImu_?TPh4;1`Ax&u z>f3!P)BL#7*Vy?9oS69UX|9}gw)C?1?F$nMy|$F5041eEe~jXr!3zhRAc;* zCIX9f^S@9V1c8YSt_OaFiMF6sp)TeBNS=snO13*$%gs+S$5)pN<3<>5<$#cUknfg- z_x}rrVQEljGf|LlNqYlxYYUsLK250K4DTNCU;I zO9_2=yaMdIHIaG(n4e5w$PFc9+a4F7Up4g6U&1%d=ZwW1iKA&|CCH8Hh&&wrWH1AkIw2-V$F z&)>lk#>dpKOJvy?D@zvw=6 zlW_^3HA=Sj@B*)S)F$f0wkCor4r~}aQ3wwAaJHmgrX&)y@UL>*PrFz?*o$PttQ=u4 z4i=GT9-}%O?I?o>qO_t$JTt0}>)0TfA#1kv$;q0+K<1Id=%kw{JqjT(#ExDXKXLpt zL1>f?9we~U0Sb<@AGO`{`>Mw+kBqA*y4sn*^BcEcPhJmNDcFk`?#nxM786AbDKI;* z0jdEZ1jU?FT}%DYxY=lGE+-;V%Z|0($0rN-_}XV-+)FmB-8G7WE6Qm;Tt&)k6rYaL z69nP=OH)QWKM2v{G{1{@lqvlSQ-4p$C>q_~<+I3iv%>U5p3#ew z0Z%kiV4NiLD`mOAEJB~A3sYD9LHYIXn<|K=VmscFI1p03G_}w|`d|?CxS^*HC1I~Q zV+GI#miNWnRhtZiHq%bkw5&?vZ~w`^#L)D!TlNw!t7&Ng7_7`lQ>K49y@F@qUc+Nr z){*(ZTAyK#G_w~+RQ1B;&krHQ)g`lz`7E=JH{i?RG}jc>F}iA-8pcO$Te#4I<4dUV z8djc?!ONn|kiki`n^c5j5Ev&i@+|*+jCeHhINp>|Q1y#((7N>`T+R`_02Bz2QR0Y+QfIP1NXS=k?S# z`;-Wlg1UiI)|S7m6Xaog>tDYv_XIMNw)_^U3Xynl)V!5`AA#AiIU1)eEz!oqyB*fET2(ZwV+Vd%$8M!b4Qq3AV@10zD00sS1B{)f2(yiVYgMEk?){2u*v6y>Dz04CaH#NJyRr<+= zC$bkAGd*LgSK@DmY5Ks%hn!qkdT56A-A(om)I=EaXXg36vh6~M%c3n#97(ONs_qL8 zHCd{?ydMYZOxPsU@B}gprZCv$KNS_we*fPFEju8=QhEQdyE$1BntMiH?6q(@vdI@8QDLdil^`N4c+W7S> zPY9my*b}7QZwV%uw^ou~B}GOZ2sD#uCy)*080|jKvajM}qJ(h;J}{%y_zJv5XyM{3 zLD&fyQ!c*IC!1!IVUHwiwW0PZC8V?n3w5QBAf_(_Lk#Le-*U)qD9`c;nFdlV?F}3T zES|`ud`u+2N&+euvoABu?huRp+*o|_8w$|z<~vsO-5mCWJ)lX47*v`bOXC;AVB1|M z8O+{^N{&@2p_pUjWKX7^jMgtbQtov14oYtg7@t`GD1tAPW7G=!EK7XYiEumt762!Isj%~DrRJ4zx5>O5)gLrxUbBl=|=$9d{AKc$*>yW!KCX0~ipfY%8=(32;KioQ zKA~@}{gEiWL16q(zsbi<-Wo6_F&6Z?&qdWAo%EVASH~Ae$Iwr^W;V!-EsEc+Y&Hg% z!8_;c%-B?*OS7;>w)^|}rJFhM&a%3KxQjo233R@5=Z@2b#!0mynS>f(|X@g$!Z2Ughd@g03`%*_p6c&FHhhg#Mh7RGs62d$qkZYFgi z^6(&t!e{82M*DJc(s4DEWGD?YL(RxobZus+t|%W^%jL~yJeEzJoelHYb&AvqiGg#QsvukW0^%*ofNU>RbI^{>5?CTUct{ zyHXRtsr;ojB!zcA%8KkL<8ga5?;U2Ht;UYQ4kGmy#7)yIm^sYGecGqPadUZaUM>n5 zwCI-QQ@)OOQr_p$7Va#KuOt{G^sFD087^9Rny)zzPGrIWSavK_FhMq;?@-z-N10I_ zf8mccx7uCU#eh;u_Cz`T^w)M!WsP^H3FDqKa>*Y_a+qncWEO}1+Cn4vpHzf$8`Q3D|TXtx-Z2gzKnN_yctE)+sy+% zJ6jh-v3X_R^Fau|ospj^2J@!h8n<~Cz` zJK3?-QQ4D}&Bmd|OP)edJ&7Zf4vZxr_aYshx(jo@L{Ev*+(VHVtSfNm<|}q-$p@G( zTV>3UGC^ym)iPP*0-PENtT66{E#ZLfa>{yQH@G4^aHW z7Z(InArltt{HYl=m~{L@7xERBd(qZvP~JyRBf$Ultn^EbOZY{{EJmo3gz%Zl)X&cQ zqZ%~b2PPl*H|@ZkeN0JX))|fYd(@Aq3zvqQ)7AZ3Nph9bdk24}!(;M|lng2wm86@; z@se=8k#)>5O>5fwM?pv1zI}`Rj7n~(r3+`4GCaX+Iym7S)hMQuot`5E3nqf^{zd@t zxOggTD&1kl;pU$|jU>YbWHC3+pO(v#^~~=j6aC6a5d+#Kb3X=qLnQ#!RJaN~_PSbM zO(M(sHj84lCc%T8A~9I%nyl+`8_vX&Ot5ZiO3=LFw|<;;0>xj#5!)}SoK0b*G9-N< zyF?VaWtMqef1o6Nwy%E_gYAY>&pTLepW*#ps54 zNfgHyg{2-cXdKWrSrr3DeGVzeC4)0f|09VET#sr+>yY)XkP@O78oW!-l<73?Z;h|@Are5@S^O895zBrK|>tPa+Niw{LU`%=tbo?Y6HjX-1dk{*D z&U{OldQ~zvcl;`1qm=Ky%l7H_s&LJ5mXV8qh6Yv*E;a>W<8QTLUNfVZQyjX;_5n+W zp6|y|nrZh8>VbOck|FaQeaX3HqbXoqSc#(R$_X!P5c?ECA>kSK*tNMlfy-c0H?+n{ z6qk&wIBqtIS`zVaK`jn_%tLyX@x>Jd%YTCd9oz$5-ciR*a@aURCTsx%~v z_cjv`2%GS}XL=L0gpk|sWjRz!R-W1GxP@q{T0NcS`xZl{sl%3{dXDYLu+bd5&K>Zb zXZ+jS;QfXh!Vj+5AITIzC%GY}KjKxU@VgwDxjs8w_D--ER-R#>uK1JJi#HQ0OjadE zmM&QZ$=v2W18rxzb?g6#-n}?sZJApIbzA z90o=Vo@9-`*eB2x$+S$SrX0NZ+1ZJx_TTD4s5Nt&5_TLJ#RG#D~w1LFxu`B%9-dTJGm&c@1^D)SdG$22C zIQBZs4U9oah-+5QW%;gKdk=3zyH5q`y|3oov7QAM{4;|np_Bbs6m!(%m71KnTD)Wd zJ>Vk~2zaMf|7i~xGKgWziBk4mAhqHO(wV|7{+WzXSV6HE5ZHM_0K%_=pp3NS`rv9(&3(e z<`>-gr?Yc~B6Y~iCP{=eu71E31VMZTO+DY8@Fj~cnX&tm>wD^{dKvQ;q}g+DpXE6s z&yoMzL$cv0P}dUro9>5~J>OQwrt8KNsqNZPaJd>K_`*Hb_w65RV2~uAxf_voXapT zL+ibLe^1jcs>*7Dczm(kwtT;u$s7sM6g-?`*6KF5KDytS?8o#34C94DP0e+m&pIb%LI2!rAbv=rL& zrK?Y&b~sQ@_WtF5=b(I=^p!x#B_#2xWlQbBQ>(z+CiL7sWw-eKQTm!I0maK7s%d{v zu5I<*aDsAaERguyhWh|qM-|x?wdMx65gWel_R+OSF1d?{;mdP_FfNp#%agk0{3H=N zqV6nb8iWY^I+I=u)GgyAZPC8`#A|Hp9}r6(XIu($&`DyeEIRuY^ezvF*JlaUxe>4b zX~V@*>JFdg+nTvC75E{gDwNw`-%TmyuNCp?kT9|BWJcba8=Dro#&M=_ZPB>%Tj*&jw-aLc*<8c{babtU{cw%;7p3uzp26B^b&viyg+rsL5LH%@mScw6A^jN z+)1ye?e(u(*ZL#EI6ND6G|KP>%S~=dCasJ2N6zce@Ra8#)*Yl4g!0g@n=Z@?xprH@ z_oL=S_cTF-qm+E&Z)%kpg!D8ydu+!bs*jOVPGuq2CdR+I!CSc38elkuR>%1ge!J}i z$o%opCOMTuj*3Ya0?nc_k!6^c8{x`B9}RiRp16w|3WkeB=jK^u1^`zqyHUr7dVlV$ zieVOe^RdkIr(a$ibx(MeplbF-^$7d5Dc(1(=_wp1Ag=PmUlH71I=Q+cP(G_#5e6Ge z&5SY5xV@|dYTZ6OS-Ra?LY^;uJhg5l)RHAyhJMo;B^)UK9!D_ZSy2BjAHINDl5#^Q z>8J;IdnL{evWwZ}M2=1T)VZb9Lv+LOb2wy20aBxs-eE7zT=OZ<2%ydY=_A z@%&x;;OBv_($LZILzBA6b+#RgC$D#WJjXQF8T-4tkDWcwU?Eb@FRiMP1z1WZIO(+w z-DxDGM9=oJsk1L^-?~nkY%>_YGBxh3*?-V*8sxTd!Wk{E6oLA@wxM5U_qqNNMJHMVV zG<~#8b1;p_oPWG>9n@L|$bQjq&w+w(KeNY`O&UqCW@DT)Cbnr;`@;v02~(f5xlrY^_t6sFO~#Y--gF;yh~ zdL}ia2rXd4Kjk>GIXDYlI-OzVt8uW96QInCgl;T9fsPUQ{ zoLTrr)`-+lo*VQrIbpCLn7CMHP2?YT=KDLxKJ=>|{NcyZl-=Vg3M9qLt6DwG^QG1n zy6PCYn9dnG-m06uSaxjwCvVim9cxd}XCEt!D`j2KW257M&0FO<$-qxew>$#ikpG3M zZ7gqN?xR({UtQHZ{>K@Tf$Ek3?&z4E^p#T=-XZdSf`6Es5>Dbj-{C1ubUEQ$URB|{ zz&rAm&+F*e3qYe)$-Xg{r(6J&HLFQXJ1<8lZZwUNo-zbHd0@ezBwHHK&qhJNpN_W` z*`288H;Xz3}r$64lO?*}gMnBJd^zMMayUrcfCNcSz$E*3>|G1d07 z{QmuP&0ZMi#E;`c1ZtlT*suC3D|5k*%n=Ic2_Q=(TATdI*-orwZFNKXnGt+_pUfJ7 zn@FocnsLle+W=b|kAc#toQcBdnxmCXrybAx5Re|D(kC=z;qC@2Y*LcT6v@NJEb=Fd+#P)aCuBS)+VWZwtxUI ztb}MYP9am9Cnnwb=d<^XYxx6t_Ql0P{qX<|mBPDh!b>Y&#-BqrLaX}h3y+m@x7Bvm zJ@vWah$i^It115E3^Fiu&zq|SelgKndDOBQ$;ii38#~RsB?X%oD1lzO0^_H4zODg4 z;fHow7!QoI%q)p#iShRq);aVn_RqEa{=P!hoNar`iBOU7eI8Uz?Cbwf$%mBAMhDc) zRve8kevpH&(|dKObu2F_yF8|{N>m|skIU55_qZtAMlXRhD*DbvUzNT(mYoM2EjscI zm~`(ItDOKP44@%A!{raK)4e7a2LGOHK14NQ+P!0r+SE<=>d2-^{i&mN$AV1ZdKQj4 z@GwoPh1_0tvb*X;{xcY}yA0=fyW Date: Mon, 29 Jun 2026 22:12:37 -0700 Subject: [PATCH 03/10] implemented feature flag to enable only top level strings to be converted to StringValue --- .../com/clickhouse/client/api/Client.java | 25 +++++- .../client/api/ClientConfigProperties.java | 8 ++ .../api/data_formats/NativeFormatReader.java | 9 +- .../data_formats/RowBinaryFormatReader.java | 11 ++- ...owBinaryWithNamesAndTypesFormatReader.java | 10 ++- .../RowBinaryWithNamesFormatReader.java | 11 ++- .../internal/AbstractBinaryFormatReader.java | 8 +- .../internal/BinaryStreamReader.java | 60 +++++++------- .../com/clickhouse/client/ClientTests.java | 9 +- .../api/data_formats/StringValueTests.java | 82 ++++++++----------- .../internal/BaseReaderTests.java | 12 +-- .../internal/BinaryStreamReaderTests.java | 3 +- .../internal/SerializerUtilsTest.java | 2 +- .../datatypes/RowBinaryFormatWriterTest.java | 12 +-- .../clickhouse/client/insert/InsertTests.java | 4 +- 15 files changed, 152 insertions(+), 114 deletions(-) diff --git a/client-v2/src/main/java/com/clickhouse/client/api/Client.java b/client-v2/src/main/java/com/clickhouse/client/api/Client.java index 5cf21a52e..19ee54ac2 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/Client.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/Client.java @@ -135,6 +135,8 @@ public class Client implements AutoCloseable { private final Map> typeHintMapping; + private final boolean binaryStringSupport; + // Server context private String dbUser; private String serverVersion; @@ -200,6 +202,7 @@ private Client(Collection endpoints, Map configuration, this.serverVersion = configuration.getOrDefault(ClientConfigProperties.SERVER_VERSION.getKey(), "unknown"); this.dbUser = configuration.getOrDefault(ClientConfigProperties.USER.getKey(), ClientConfigProperties.USER.getDefObjVal()); this.typeHintMapping = (Map>) this.configuration.get(ClientConfigProperties.TYPE_HINT_MAPPING.getKey()); + this.binaryStringSupport = ClientConfigProperties.BINARY_STRING_SUPPORT.getOrDefault(this.configuration); } /** @@ -1119,6 +1122,20 @@ public Builder typeHintMapping(Map> typeHintMapping return this; } + /** + * Enables reading top-level {@code String} and {@code FixedString} columns as + * {@link com.clickhouse.client.api.data_formats.StringValue}, preserving the raw bytes instead of + * decoding them into a {@link String}. Values nested inside containers (Array, Map, Tuple, Nested, + * Variant) are still read as {@link String}. + * + * @param enable - if the feature is enabled + * @return this builder instance + */ + public Builder binaryStringSupport(boolean enable) { + this.configuration.put(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), String.valueOf(enable)); + return this; + } + /** * SNI SSL parameter that will be set for each outbound SSL socket. @@ -2109,17 +2126,17 @@ public ClickHouseBinaryFormatReader newBinaryFormatReader(QueryResponse response switch (response.getFormat()) { case Native: reader = new NativeFormatReader(response.getInputStream(), response.getSettings(), - byteBufferPool, typeHintMapping); + byteBufferPool, typeHintMapping, binaryStringSupport); break; case RowBinaryWithNamesAndTypes: - reader = new RowBinaryWithNamesAndTypesFormatReader(response.getInputStream(), response.getSettings(), byteBufferPool, typeHintMapping); + reader = new RowBinaryWithNamesAndTypesFormatReader(response.getInputStream(), response.getSettings(), byteBufferPool, typeHintMapping, binaryStringSupport); break; case RowBinaryWithNames: - reader = new RowBinaryWithNamesFormatReader(response.getInputStream(), response.getSettings(), schema, byteBufferPool, typeHintMapping); + reader = new RowBinaryWithNamesFormatReader(response.getInputStream(), response.getSettings(), schema, byteBufferPool, typeHintMapping, binaryStringSupport); break; case RowBinary: reader = new RowBinaryFormatReader(response.getInputStream(), response.getSettings(), schema, - byteBufferPool, typeHintMapping); + byteBufferPool, typeHintMapping, binaryStringSupport); break; default: throw new IllegalArgumentException("Binary readers doesn't support format: " + response.getFormat()); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java b/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java index 9a38232ba..605d92d86 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java @@ -181,6 +181,14 @@ public Object parseValue(String value) { */ TYPE_HINT_MAPPING("type_hint_mapping", Map.class), + /** + * When enabled, top-level {@code String} and {@code FixedString} columns are read into a + * {@link com.clickhouse.client.api.data_formats.StringValue} that preserves the raw bytes instead of + * decoding them into a {@link String}. Values nested inside containers (Array, Map, Tuple, Nested, Variant) + * are still read as {@link String}, since those types are not expected to carry large/binary strings. + */ + BINARY_STRING_SUPPORT("binary_string_support", Boolean.class, "false"), + /** * SNI SSL parameter that will be set for each outbound SSL socket. */ diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java index ddcf0049b..e097e9b16 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java @@ -29,7 +29,14 @@ public class NativeFormatReader extends AbstractBinaryFormatReader { public NativeFormatReader(InputStream inputStream, QuerySettings settings, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - super(inputStream, settings, null, byteBufferAllocator, typeHintMapping); + this(inputStream, settings, byteBufferAllocator, typeHintMapping, false); + } + + public NativeFormatReader(InputStream inputStream, QuerySettings settings, + BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, + Map> typeHintMapping, + boolean binaryStringSupport) { + super(inputStream, settings, null, byteBufferAllocator, typeHintMapping, binaryStringSupport); try { readBlock(); } catch (IOException e) { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java index 0d8575bb9..70507d8f9 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java @@ -19,7 +19,16 @@ public RowBinaryFormatReader(InputStream inputStream, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping); + this(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, false); + } + + public RowBinaryFormatReader(InputStream inputStream, + QuerySettings querySettings, + TableSchema schema, + BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, + Map> typeHintMapping, + boolean binaryStringSupport) { + super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, binaryStringSupport); readNextRecord(); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java index 67bda8b84..b20acd926 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java @@ -22,7 +22,15 @@ public RowBinaryWithNamesAndTypesFormatReader(InputStream inputStream, QuerySettings querySettings, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - super(inputStream, querySettings, null, byteBufferAllocator, typeHintMapping); + this(inputStream, querySettings, byteBufferAllocator, typeHintMapping, false); + } + + public RowBinaryWithNamesAndTypesFormatReader(InputStream inputStream, + QuerySettings querySettings, + BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, + Map> typeHintMapping, + boolean binaryStringSupport) { + super(inputStream, querySettings, null, byteBufferAllocator, typeHintMapping, binaryStringSupport); readSchema(); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java index fcfaa7625..850861dd4 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java @@ -23,7 +23,16 @@ public RowBinaryWithNamesFormatReader(InputStream inputStream, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping); + this(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, false); + } + + public RowBinaryWithNamesFormatReader(InputStream inputStream, + QuerySettings querySettings, + TableSchema schema, + BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, + Map> typeHintMapping, + boolean binaryStringSupport) { + super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, binaryStringSupport); int nCol = 0; try { nCol = BinaryStreamReader.readVarInt(input); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java index 54b907749..e1bb37b37 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java @@ -77,7 +77,11 @@ public abstract class AbstractBinaryFormatReader implements ClickHouseBinaryForm private long row = -1; // before first row private long lastNextCallTs; // for exception to detect slow reader - protected AbstractBinaryFormatReader(InputStream inputStream, QuerySettings querySettings, TableSchema schema,BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> defaultTypeHintMap) { + protected AbstractBinaryFormatReader(InputStream inputStream, QuerySettings querySettings, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> defaultTypeHintMap) { + this(inputStream, querySettings, schema, byteBufferAllocator, defaultTypeHintMap, false); + } + + protected AbstractBinaryFormatReader(InputStream inputStream, QuerySettings querySettings, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> defaultTypeHintMap, boolean binaryStringSupport) { this.input = inputStream; Map settings = querySettings == null ? Collections.emptyMap() : querySettings.getAllSettings(); Boolean useServerTimeZone = (Boolean) settings.get(ClientConfigProperties.USE_SERVER_TIMEZONE.getKey()); @@ -90,7 +94,7 @@ protected AbstractBinaryFormatReader(InputStream inputStream, QuerySettings quer boolean jsonAsString = MapUtils.getFlag(settings, ClientConfigProperties.serverSetting(ServerSettings.OUTPUT_FORMAT_BINARY_WRITE_JSON_AS_STRING), false); this.binaryStreamReader = new BinaryStreamReader(inputStream, timeZone, LOG, byteBufferAllocator, jsonAsString, - defaultTypeHintMap); + defaultTypeHintMap, binaryStringSupport); if (schema != null) { setSchema(schema); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java index 6d0f19971..84e029133 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java @@ -56,7 +56,7 @@ public class BinaryStreamReader { private final Class arrayDefaultTypeHint; - private final boolean stringAsBinaryDefault; + private final boolean binaryStringSupport; private static final int SB_INIT_SIZE = 100; @@ -71,8 +71,10 @@ public class BinaryStreamReader { * @param bufferAllocator - byte buffer allocator * @param jsonAsString - use string to serialize/deserialize JSON columns * @param typeHintMapping - what type use as hint if hint is not set or may not be known. + * @param binaryStringSupport - when {@code true}, top-level {@code String}/{@code FixedString} columns are read + * as {@link StringValue} preserving raw bytes; nested string values stay {@link String}. */ - public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) { + public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping, boolean binaryStringSupport) { this.log = log == null ? NOPLogger.NOP_LOGGER : log; this.timeZone = timeZone; this.input = input; @@ -81,20 +83,7 @@ public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, Byte this.arrayDefaultTypeHint = typeHintMapping == null || typeHintMapping.isEmpty()? NO_TYPE_HINT : typeHintMapping.get(ClickHouseDataType.Array); - this.stringAsBinaryDefault = typeHintMapping != null && - typeHintMapping.get(ClickHouseDataType.String) == StringValue.class; - } - - private boolean readStringAsBinary(Class typeHint) { - if (typeHint != null) { - if (typeHint == StringValue.class) { - return true; - } - if (typeHint == String.class) { - return false; - } - } - return stringAsBinaryDefault; + this.binaryStringSupport = binaryStringSupport; } /** @@ -118,8 +107,14 @@ public T readValue(ClickHouseColumn column) throws IOException { * @param - target type of the value * @throws IOException when IO error occurs */ - @SuppressWarnings("unchecked") public T readValue(ClickHouseColumn column, Class typeHint) throws IOException { + // Top-level reads honor the binary-string feature flag. Values nested inside containers always read + // strings as String (see readArray/readMap/readTuple/readNested/readVariant). + return readValue(column, typeHint, binaryStringSupport); + } + + @SuppressWarnings("unchecked") + private T readValue(ClickHouseColumn column, Class typeHint, boolean stringAsValue) throws IOException { if (column.isNullable()) { int isNull = readByteOrEOF(input); if (isNull == 1) { // is Null? @@ -138,7 +133,7 @@ public T readValue(ClickHouseColumn column, Class typeHint) throws IOExce switch (dataType) { // Primitives case FixedString: { - if (readStringAsBinary(typeHint)) { + if (stringAsValue) { return (T) new StringValue(readStringBytes(input, precision)); } byte[] bytes = precision > STRING_BUFF.length ? @@ -147,7 +142,7 @@ public T readValue(ClickHouseColumn column, Class typeHint) throws IOExce return (T) new String(bytes, 0, precision, StandardCharsets.UTF_8); } case String: { - if (readStringAsBinary(typeHint)) { + if (stringAsValue) { return (T) readStringValue(); } return (T) readString(); @@ -272,7 +267,7 @@ public T readValue(ClickHouseColumn column, Class typeHint) throws IOExce case Geometry: return (T) readVariant(actualColumn); case Dynamic: - return (T) readValue(actualColumn, typeHint); + return (T) readValue(actualColumn, typeHint, stringAsValue); case Nested: return convertArray(readNested(actualColumn), typeHint); default: @@ -654,10 +649,10 @@ public ArrayValue readArrayItem(ClickHouseColumn itemTypeColumn, int len) throws || itemTypeColumn.getDataType() == ClickHouseDataType.Geometry) { array = new ArrayValue(Object.class, len); for (int i = 0; i < len; i++) { - array.set(i, readValue(itemTypeColumn)); + array.set(i, readNestedValue(itemTypeColumn)); } } else { - Object firstValue = readValue(itemTypeColumn); + Object firstValue = readNestedValue(itemTypeColumn); Class itemClass = firstValue.getClass(); if (firstValue instanceof Byte) { itemClass = byte.class; @@ -684,12 +679,21 @@ public ArrayValue readArrayItem(ClickHouseColumn itemTypeColumn, int len) throws array = new ArrayValue(itemClass, len); array.set(0, firstValue); for (int i = 1; i < len; i++) { - array.set(i, readValue(itemTypeColumn)); + array.set(i, readNestedValue(itemTypeColumn)); } } return array; } + /** + * Reads a value nested inside a container (Array, Map, Tuple, Nested, Variant). Strings are always + * decoded into {@link String} here, regardless of the binary-string feature flag, because nested types + * are not expected to carry large/binary strings. + */ + private Object readNestedValue(ClickHouseColumn column) throws IOException { + return readValue(column, null, false); + } + public void skipValue(ClickHouseColumn column) throws IOException { readValue(column, null); } @@ -862,8 +866,8 @@ public String toString() { ClickHouseColumn valueType = column.getValueInfo(); LinkedHashMap map = new LinkedHashMap<>(len); for (int i = 0; i < len; i++) { - Object key = readValue(keyType); - Object value = readValue(valueType); + Object key = readNestedValue(keyType); + Object value = readNestedValue(valueType); map.put(key, value); } return map; @@ -879,7 +883,7 @@ public Object[] readTuple(ClickHouseColumn column) throws IOException { int len = column.getNestedColumns().size(); Object[] tuple = new Object[len]; for (int i = 0; i < len; i++) { - tuple[i] = readValue(column.getNestedColumns().get(i)); + tuple[i] = readNestedValue(column.getNestedColumns().get(i)); } return tuple; @@ -903,7 +907,7 @@ public ArrayValue readNested(ClickHouseColumn column) throws IOException { int tupleLen = column.getNestedColumns().size(); Object[] tuple = new Object[tupleLen]; for (int j = 0; j < tupleLen; j++) { - tuple[j] = readValue(column.getNestedColumns().get(j)); + tuple[j] = readNestedValue(column.getNestedColumns().get(j)); } array.set(i, tuple); @@ -917,7 +921,7 @@ public Object readVariant(ClickHouseColumn column) throws IOException { if (ordNum == 0xFF) { return null; } - return readValue(column.getNestedColumns().get(ordNum)); + return readNestedValue(column.getNestedColumns().get(ordNum)); } /** diff --git a/client-v2/src/test/java/com/clickhouse/client/ClientTests.java b/client-v2/src/test/java/com/clickhouse/client/ClientTests.java index 0b7b622d6..b50d3700b 100644 --- a/client-v2/src/test/java/com/clickhouse/client/ClientTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/ClientTests.java @@ -333,7 +333,7 @@ public void testDefaultSettings() { Assert.assertEquals(config.get(p.getKey()), p.getDefaultValue(), "Default value doesn't match"); } } - Assert.assertEquals(config.size(), 36); // to check everything is set. Increment when new added. + Assert.assertEquals(config.size(), 37); // to check everything is set. Increment when new added. } try (Client client = new Client.Builder() @@ -364,9 +364,10 @@ public void testDefaultSettings() { .setSocketTimeout(20, SECONDS) .setSocketRcvbuf(100000) .setSocketSndbuf(100000) + .binaryStringSupport(true) .build()) { Map config = client.getConfiguration(); - Assert.assertEquals(config.size(), 37); // to check everything is set. Increment when new added. + Assert.assertEquals(config.size(), 38); // to check everything is set. Increment when new added. Assert.assertEquals(config.get(ClientConfigProperties.DATABASE.getKey()), "mydb"); Assert.assertEquals(config.get(ClientConfigProperties.MAX_EXECUTION_TIME.getKey()), "10"); Assert.assertEquals(config.get(ClientConfigProperties.COMPRESSION_LZ4_UNCOMPRESSED_BUF_SIZE.getKey()), "300000"); @@ -391,6 +392,8 @@ public void testDefaultSettings() { Assert.assertEquals(config.get(ClientConfigProperties.SOCKET_RCVBUF_OPT.getKey()), "100000"); Assert.assertEquals(config.get(ClientConfigProperties.SOCKET_SNDBUF_OPT.getKey()), "100000"); Assert.assertEquals(config.get(ClientConfigProperties.SSL_MODE.getKey()), "STRICT"); + Assert.assertEquals(config.get(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey()), "true"); + } } @@ -434,7 +437,7 @@ public void testWithOldDefaults() { Assert.assertEquals(config.get(p.getKey()), p.getDefaultValue(), "Default value doesn't match"); } } - Assert.assertEquals(config.size(), 36); // to check everything is set. Increment when new added. + Assert.assertEquals(config.size(), 37); // to check everything is set. Increment when new added. } } diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java index feecf1d9f..144a0228c 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -4,7 +4,6 @@ import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; import com.clickhouse.client.api.data_formats.internal.SerializerUtils; import com.clickhouse.data.ClickHouseColumn; -import com.clickhouse.data.ClickHouseDataType; import com.clickhouse.data.format.BinaryStreamUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -16,18 +15,14 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.Collections; import java.util.Map; import java.util.TimeZone; public class StringValueTests { - private static final Map> STRING_AS_BINARY = - Collections.singletonMap(ClickHouseDataType.String, (Class) StringValue.class); - - private static BinaryStreamReader reader(byte[] input, Map> hints) { + private static BinaryStreamReader reader(byte[] input, boolean binaryStringSupport) { return new BinaryStreamReader(new ByteArrayInputStream(input), TimeZone.getTimeZone("UTC"), null, - new BinaryStreamReader.DefaultByteBufferAllocator(), false, hints); + new BinaryStreamReader.DefaultByteBufferAllocator(), false, null, binaryStringSupport); } // ---- StringValue API ---- @@ -135,8 +130,8 @@ public void testEqualsIsConsistentWithBinaryReads() throws IOException { byte[] wire = baos.toByteArray(); ClickHouseColumn column = ClickHouseColumn.of("s", "String"); - StringValue first = reader(wire, STRING_AS_BINARY).readValue(column); - StringValue second = reader(wire, STRING_AS_BINARY).readValue(column); + StringValue first = reader(wire, true).readValue(column); + StringValue second = reader(wire, true).readValue(column); Assert.assertEquals(first, second); Assert.assertEquals(first.hashCode(), second.hashCode()); @@ -173,7 +168,7 @@ public void testReadStringAsStringValuePreservesBytes(String value, Charset char BinaryStreamUtils.writeString(baos, encoded); // binary string write (raw bytes) ClickHouseColumn column = ClickHouseColumn.of("s", "String"); - Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Object read = reader(baos.toByteArray(), true).readValue(column); Assert.assertTrue(read instanceof StringValue, "Expected StringValue but got " + read.getClass()); StringValue sv = (StringValue) read; @@ -190,7 +185,7 @@ public void testReadBinaryNonUtf8IsPreserved() throws IOException { BinaryStreamUtils.writeString(baos, binary); ClickHouseColumn column = ClickHouseColumn.of("s", "String"); - StringValue sv = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + StringValue sv = reader(baos.toByteArray(), true).readValue(column); Assert.assertEquals(sv.toByteArray(), binary, "Binary content must be preserved exactly"); Assert.assertEquals(AbstractBinaryFormatReader.stringLikeToBytes(sv), binary, @@ -204,30 +199,26 @@ public void testFixedStringAsStringValue() throws IOException { baos.write(binary); // FixedString(5) is written as exactly 5 raw bytes ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(5)"); - Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Object read = reader(baos.toByteArray(), true).readValue(column); Assert.assertTrue(read instanceof StringValue); Assert.assertEquals(((StringValue) read).toByteArray(), binary); } @Test - public void testReadStringArrayAsStringValue() throws IOException { - // Array(String) elements must be preserved as StringValue (including non-UTF-8 content). - byte[][] elements = { - "plain".getBytes(StandardCharsets.UTF_8), - "Привет".getBytes(StandardCharsets.UTF_8), - new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}, - new byte[0], - }; + public void testReadStringArrayKeepsStringWhenBinarySupportEnabled() throws IOException { + // Even with the binary-string feature enabled, nested Array(String) elements are read as String: + // nested types are not expected to carry large/binary strings. + String[] elements = {"plain", "Привет", "中文", ""}; ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryStreamUtils.writeVarInt(baos, elements.length); - for (byte[] element : elements) { - BinaryStreamUtils.writeString(baos, element); + for (String element : elements) { + BinaryStreamUtils.writeString(baos, element.getBytes(StandardCharsets.UTF_8)); } ClickHouseColumn column = ClickHouseColumn.of("a", "Array(String)"); - Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Object read = reader(baos.toByteArray(), true).readValue(column); Assert.assertTrue(read instanceof BinaryStreamReader.ArrayValue, "Expected ArrayValue but got " + read.getClass()); @@ -235,34 +226,28 @@ public void testReadStringArrayAsStringValue() throws IOException { Assert.assertEquals(array.length(), elements.length); Object raw = array.getArray(); - Assert.assertTrue(raw instanceof StringValue[], "Array items must be StringValue, got " + raw.getClass()); - StringValue[] values = (StringValue[]) raw; + Assert.assertTrue(raw instanceof String[], "Nested array items must be String, got " + raw.getClass()); + String[] values = (String[]) raw; for (int i = 0; i < elements.length; i++) { - Assert.assertEquals(values[i].toByteArray(), elements[i], "Element " + i + " bytes must be preserved"); + Assert.assertEquals(values[i], elements[i], "Element " + i + " must round-trip as String"); } } @Test - public void testReadStringMapAsStringValue() throws IOException { - // Map(String, String) keys and values must be preserved as StringValue. - byte[][] keys = { - "k1".getBytes(StandardCharsets.UTF_8), - "ключ".getBytes(StandardCharsets.UTF_8), - }; - byte[][] vals = { - "v1".getBytes(StandardCharsets.UTF_8), - new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80}, - }; + public void testReadStringMapKeepsStringWhenBinarySupportEnabled() throws IOException { + // Even with the binary-string feature enabled, nested Map(String, String) keys and values are read as String. + String[] keys = {"k1", "ключ"}; + String[] vals = {"v1", "значение"}; ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryStreamUtils.writeVarInt(baos, keys.length); for (int i = 0; i < keys.length; i++) { - BinaryStreamUtils.writeString(baos, keys[i]); - BinaryStreamUtils.writeString(baos, vals[i]); + BinaryStreamUtils.writeString(baos, keys[i].getBytes(StandardCharsets.UTF_8)); + BinaryStreamUtils.writeString(baos, vals[i].getBytes(StandardCharsets.UTF_8)); } ClickHouseColumn column = ClickHouseColumn.of("m", "Map(String, String)"); - Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Object read = reader(baos.toByteArray(), true).readValue(column); Assert.assertTrue(read instanceof Map, "Expected Map but got " + read.getClass()); Map map = (Map) read; @@ -270,15 +255,14 @@ public void testReadStringMapAsStringValue() throws IOException { int i = 0; for (Map.Entry entry : map.entrySet()) { - Assert.assertTrue(entry.getKey() instanceof StringValue, "Map key must be a StringValue"); - Assert.assertTrue(entry.getValue() instanceof StringValue, "Map value must be a StringValue"); - Assert.assertEquals(((StringValue) entry.getKey()).toByteArray(), keys[i], "Key " + i + " bytes"); - Assert.assertEquals(((StringValue) entry.getValue()).toByteArray(), vals[i], "Value " + i + " bytes"); + Assert.assertTrue(entry.getKey() instanceof String, "Nested map key must be a String"); + Assert.assertTrue(entry.getValue() instanceof String, "Nested map value must be a String"); + Assert.assertEquals(entry.getKey(), keys[i], "Key " + i); + Assert.assertEquals(entry.getValue(), vals[i], "Value " + i); i++; } - // Lookup by an equal StringValue key must work (relies on equals/hashCode over raw bytes). - Assert.assertEquals(((StringValue) map.get(new StringValue(keys[0]))).toByteArray(), vals[0]); + Assert.assertEquals(map.get(keys[0]), vals[0]); } @Test @@ -288,7 +272,7 @@ public void testDefaultBehaviorReturnsString() throws IOException { BinaryStreamUtils.writeString(baos, encoded); ClickHouseColumn column = ClickHouseColumn.of("s", "String"); - Object read = reader(baos.toByteArray(), AbstractBinaryFormatReader.NO_TYPE_HINT_MAPPING).readValue(column); + Object read = reader(baos.toByteArray(), false).readValue(column); Assert.assertTrue(read instanceof String, "Without a type hint Strings must still be returned as String"); Assert.assertEquals(read, "still a string"); @@ -303,7 +287,7 @@ public void testWriteByteArrayToStringRoundTrip() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); SerializerUtils.serializeData(baos, binary, column); - StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + StringValue read = reader(baos.toByteArray(), true).readValue(column); Assert.assertEquals(read.toByteArray(), binary); } @@ -315,7 +299,7 @@ public void testWriteStringValueToStringRoundTrip() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); SerializerUtils.serializeData(baos, value, column); - StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + StringValue read = reader(baos.toByteArray(), true).readValue(column); Assert.assertEquals(read.toByteArray(), binary); } @@ -326,7 +310,7 @@ public void testWriteByteArrayToFixedStringRoundTrip() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); SerializerUtils.serializeData(baos, binary, column); - StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + StringValue read = reader(baos.toByteArray(), true).readValue(column); Assert.assertEquals(read.toByteArray(), binary); } } diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index 1ffdb0782..93ded550e 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -581,12 +581,8 @@ public void testReadingStringValue() throws Exception { client.execute("CREATE TABLE " + table + " (id Int32, s String, fs FixedString(5), e FixedString(1)) ENGINE = Memory").get(); client.execute("INSERT INTO " + table + " VALUES (1, 'hello', 'world', 'a'), (2, 'ClickHouse', 'Rocks', 'b')").get(); - java.util.Map> typeHints = new java.util.HashMap<>(); - typeHints.put(ClickHouseDataType.String, StringValue.class); - typeHints.put(ClickHouseDataType.FixedString, StringValue.class); - Client customClient = newClient() - .typeHintMapping(typeHints) + .binaryStringSupport(true) .build(); try { @@ -660,12 +656,8 @@ public void testReadingBinaryStringFromHash() throws Exception { .digest(message.getBytes(java.nio.charset.StandardCharsets.UTF_8)); Assert.assertEquals(expectedHash.length, 64); - java.util.Map> typeHints = new java.util.HashMap<>(); - typeHints.put(ClickHouseDataType.String, StringValue.class); - typeHints.put(ClickHouseDataType.FixedString, StringValue.class); - Client customClient = newClient() - .typeHintMapping(typeHints) + .binaryStringSupport(true) .build(); final String query = "SELECT SHA512('" + message + "') AS hash"; diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReaderTests.java index 0d94e0a5f..a0a8a7adb 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReaderTests.java @@ -200,7 +200,8 @@ public void testReadNullVariantReturnsNull() throws Exception { null, new BinaryStreamReader.CachingByteBufferAllocator(), false, - null); + null, + false); Assert.assertNull(reader.readValue(column)); } diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java index 265cd1cfb..eb2a83fb8 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java @@ -25,7 +25,7 @@ public class SerializerUtilsTest { private BinaryStreamReader newReader(byte[] data) { return new BinaryStreamReader(new ByteArrayInputStream(data), TimeZone.getTimeZone("UTC"), null, - new BinaryStreamReader.DefaultByteBufferAllocator(), false, null); + new BinaryStreamReader.DefaultByteBufferAllocator(), false, null, false); } @Test diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index 2ed1889c9..b355cb785 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -424,12 +424,8 @@ public void writeBinaryStringsTest() throws Exception { System.out.println("Rows written (manual): " + response.getWrittenRows()); } - java.util.Map> typeHints = new java.util.HashMap<>(); - typeHints.put(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class); - typeHints.put(com.clickhouse.data.ClickHouseDataType.FixedString, com.clickhouse.client.api.data_formats.StringValue.class); - Client customClient = newClient() - .typeHintMapping(typeHints) + .binaryStringSupport(true) .build(); List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id" ); @@ -468,11 +464,7 @@ public void writeAndReadImageTest() throws Exception { System.out.println("Image bytes written: " + imageData.length + ", rows: " + response.getWrittenRows()); } - Map> typeHints = new HashMap<>(); - typeHints.put(com.clickhouse.data.ClickHouseDataType.String, - com.clickhouse.client.api.data_formats.StringValue.class); - - try (Client customClient = newClient().typeHintMapping(typeHints).build()) { + try (Client customClient = newClient().binaryStringSupport(true).build()) { // Idiomatic path: stream rows and read the binary payload via the index-based getByteArray(int). try (com.clickhouse.client.api.query.QueryResponse response = customClient.query("SELECT * FROM \"" + tableName + "\" ORDER BY id").get()) { diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java index 5045e290c..7bb632ca3 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java @@ -199,11 +199,11 @@ public void insertPOJOAndReadBack() throws Exception { try (QueryResponse queryResponse = client.query("SELECT * FROM " + tableName + " LIMIT 1").get(EXECUTE_CMD_TIMEOUT, TimeUnit.SECONDS)) { - // To read the binaryString properly as raw bytes, we must map String to StringValue + // To read the binaryString properly as raw bytes, we must enable binary string support Client readerClient = client; if (pojo.getBinaryString() != null) { readerClient = newClient() - .typeHintMapping(java.util.Collections.singletonMap(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class)) + .binaryStringSupport(true) .build(); } ClickHouseBinaryFormatReader reader = readerClient.newBinaryFormatReader(queryResponse); From b3b12754f404f62c10086592e28a83b7f31a2976 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Mon, 29 Jun 2026 22:42:42 -0700 Subject: [PATCH 04/10] Added support of binary string to JDBC --- .../com/clickhouse/jdbc/ResultSetImpl.java | 23 +++- .../clickhouse/jdbc/JdbcDataTypeTests.java | 112 ++++++++++++++++++ 2 files changed, 130 insertions(+), 5 deletions(-) diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java index 62259be99..08bd9be25 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java @@ -287,7 +287,23 @@ public InputStream getUnicodeStream(int columnIndex) throws SQLException { @Override public InputStream getBinaryStream(int columnIndex) throws SQLException { - return getBinaryStream(columnIndexToName(columnIndex)); + try { + if (reader.hasValue(columnIndex)) { + byte[] bytes = reader.getByteArray(columnIndex); + if (bytes == null) { + wasNull = true; + return null; + } + wasNull = false; + return new ByteArrayInputStream(bytes); + } else { + wasNull = true; + return null; + } + } catch (Exception e) { + throw ExceptionUtils.toSqlState(String.format("Method: getBinaryStream(\"%d\") encountered an exception.", columnIndex), + String.format("SQL: [%s]", parentStatement.getLastStatementSql()), e); + } } @Override @@ -470,10 +486,7 @@ public InputStream getUnicodeStream(String columnLabel) throws SQLException { @Override public InputStream getBinaryStream(String columnLabel) throws SQLException { - checkClosed(); - featureManager.unsupportedFeatureThrow("getBinaryStream"); - - return null; + return getBinaryStream(getSchema().nameToColumnIndex(columnLabel)); } @Override diff --git a/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java b/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java index ed0b00cb9..30b2f2ccd 100644 --- a/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java +++ b/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java @@ -18,6 +18,7 @@ import org.testng.annotations.Test; import java.io.IOException; +import java.io.InputStream; import java.math.BigDecimal; import java.math.BigInteger; import java.net.Inet4Address; @@ -1544,6 +1545,117 @@ public void testStringsUsedAsBytes() throws Exception { } } + private static byte[] readClickHouseLogo() throws IOException { + try (InputStream is = JdbcDataTypeTests.class.getResourceAsStream("/ch_logo.png")) { + Assert.assertNotNull(is, "ch_logo.png not found in test resources"); + return is.readAllBytes(); + } + } + + /** + * Asserts that {@code actual} matches the {@code ch_logo.png} source file byte-for-byte. The source file is + * read fresh from test resources so the comparison is against the file content rather than an in-memory copy. + */ + private static void assertEqualsToClickHouseLogo(byte[] actual) throws IOException { + byte[] expected = readClickHouseLogo(); + Assert.assertNotNull(actual, "Read bytes must not be null"); + assertEquals(actual.length, expected.length, "Read byte count must match ch_logo.png size"); + assertEquals(actual, expected, "Read bytes must match ch_logo.png content"); + } + + @Test(groups = { "integration" }) + public void testBinaryStringSupportGetBytes() throws Exception { + // ch_logo.png is real binary content that is not valid UTF-8, so it must survive a + // round-trip through a String column byte-for-byte when binary_string_support is enabled. + byte[] imageBytes = readClickHouseLogo(); + + runQuery("CREATE TABLE test_binary_string_get_bytes (id Int8, str String) ENGINE = MergeTree ORDER BY ()"); + + try (Connection conn = getJdbcConnection(); + PreparedStatement insert = conn.prepareStatement("INSERT INTO test_binary_string_get_bytes VALUES (?, ?)")) { + insert.setInt(1, 1); + insert.setBytes(2, imageBytes); + insert.executeUpdate(); + } + + Properties props = new Properties(); + props.put(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), "true"); + + try (Connection conn = getJdbcConnection(props); + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SELECT * FROM test_binary_string_get_bytes ORDER BY id")) { + assertTrue(rs.next()); + assertEqualsToClickHouseLogo(rs.getBytes("str")); + assertEqualsToClickHouseLogo(rs.getBytes(2)); + assertFalse(rs.wasNull()); + assertFalse(rs.next()); + } + } + + @Test(groups = { "integration" }) + public void testBinaryStringSupportGetBinaryStream() throws Exception { + byte[] imageBytes = readClickHouseLogo(); + + runQuery("CREATE TABLE test_binary_string_stream (id Int8, str String) ENGINE = MergeTree ORDER BY ()"); + + try (Connection conn = getJdbcConnection(); + PreparedStatement insert = conn.prepareStatement("INSERT INTO test_binary_string_stream VALUES (?, ?)")) { + insert.setInt(1, 1); + insert.setBytes(2, imageBytes); + insert.executeUpdate(); + } + + Properties props = new Properties(); + props.put(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), "true"); + + try (Connection conn = getJdbcConnection(props); + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SELECT * FROM test_binary_string_stream ORDER BY id")) { + assertTrue(rs.next()); + + // by column label + try (InputStream stream = rs.getBinaryStream("str")) { + Assert.assertNotNull(stream); + assertEqualsToClickHouseLogo(stream.readAllBytes()); + } + assertFalse(rs.wasNull()); + + // by column index (delegates to the label-based implementation) + try (InputStream stream = rs.getBinaryStream(2)) { + Assert.assertNotNull(stream); + assertEqualsToClickHouseLogo(stream.readAllBytes()); + } + + assertFalse(rs.next()); + } + } + + @Test(groups = { "integration" }) + public void testBinaryStringSupportGetBinaryStreamNull() throws Exception { + runQuery("CREATE TABLE test_binary_string_stream_null (id Int8, str Nullable(String)) ENGINE = MergeTree ORDER BY ()"); + + try (Connection conn = getJdbcConnection(); + PreparedStatement insert = conn.prepareStatement("INSERT INTO test_binary_string_stream_null VALUES (?, ?)")) { + insert.setInt(1, 1); + insert.setNull(2, Types.VARCHAR); + insert.executeUpdate(); + } + + Properties props = new Properties(); + props.put(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), "true"); + + try (Connection conn = getJdbcConnection(props); + Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SELECT * FROM test_binary_string_stream_null ORDER BY id")) { + assertTrue(rs.next()); + assertNull(rs.getBinaryStream("str")); + assertTrue(rs.wasNull()); + assertNull(rs.getBytes("str")); + assertTrue(rs.wasNull()); + assertFalse(rs.next()); + } + } + @Test(groups = { "integration" }) public void testNestedArrays() throws Exception { try (Connection conn = getJdbcConnection()) { From e6b72ad99f54c8369d136b2f6f7484e33877cb66 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Mon, 29 Jun 2026 22:47:18 -0700 Subject: [PATCH 05/10] updated changelog and features.md --- CHANGELOG.md | 2 ++ docs/features.md | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 320805a55..8c1ee9047 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,8 @@ - **[client-v2, jdbc-v2]** Added support for ClickHouse `Geometry` type for ClickHouse `25.11+`, where `Geometry` changed from a `String` alias to `Variant(Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon)` (client still compatible with older versions). Includes client read/write handling and JDBC type mapping for retrieving and inserting geometry values. Current writes infer the target geometry variant from array nesting depth, so `Ring` vs `LineString` and `Polygon` vs `MultiLineString` are not yet distinguishable through the generic `Geometry` write path. (https://github.com/ClickHouse/clickhouse-java/pull/2815) +- **[client-v2, jdbc-v2]** Added opt-in binary string support through the `binary_string_support` configuration property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. When enabled, top-level `String` and `FixedString` columns are read into a `StringValue` that preserves the raw bytes instead of decoding them into a `String`, allowing non-UTF-8/binary content to round-trip byte-for-byte. `StringValue` exposes the bytes via `toByteArray()`/`asByteBuffer()` and lazily decodes a `String` via `asString()` (UTF-8 by default, or a caller-supplied `Charset`). Values nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`) continue to be read as `String`, since those types are not expected to carry large/binary strings. On the JDBC side, `ResultSet#getBinaryStream(int)` and `ResultSet#getBinaryStream(String)` are now implemented (previously unsupported) and, together with `getBytes(...)`, return the raw column bytes. + - **[jdbc-v2]** `ResultSet#getObject(int|String, Map>)` now accepts ClickHouse type names as map keys in addition to the JDBC `SQLType` names it has always accepted. Only unwrapped type names are used for the lookup — `Nullable(...)` and `LowCardinality(...)` wrappers are stripped and do not affect resolution, so a key like `"Int32"` matches both `Int32` and `Nullable(Int32)` columns; keys like `"Nullable(Int32)"` are not recognized. Lookup order is the `ClickHouseDataType` enum name (e.g. `"Int32"`, `"String"`, `"DateTime"`) then the JDBC `SQLType` name (e.g. `"INTEGER"`, `"VARCHAR"`, `"TIMESTAMP"`); a missing entry leaves the value uncoerced. The feature is supported for primitive ClickHouse types only — `Array`, `Tuple`, `Map`, `Nested`, and geometry types are not supported and continue to be returned in their native form regardless of the user-supplied map. Existing maps keyed only by JDBC `SQLType` names continue to work unchanged. ### Bug Fixes diff --git a/docs/features.md b/docs/features.md index 11a0e4ac4..39d37c691 100644 --- a/docs/features.md +++ b/docs/features.md @@ -16,6 +16,7 @@ This document lists stable, user-visible behavior in `client-v2` and `jdbc-v2` t - Parameterized SQL: Accepts named query parameters and can send them through supported HTTP request encodings. - Result materialization helpers: Provides streaming `Records`, generic row access, and convenience APIs that materialize all rows into generic records or typed POJOs. - Binary format readers: Reads ClickHouse binary result formats including `Native`, `RowBinary`, `RowBinaryWithNames`, and `RowBinaryWithNamesAndTypes`. +- Binary string support: Opt-in through the `binary_string_support` property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. When enabled, top-level `String` and `FixedString` columns are read as `StringValue`, which preserves the raw bytes (`toByteArray()`/`asByteBuffer()`) and lazily decodes a `String` (`asString()`), instead of eagerly decoding to a `String`. Strings nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`) are still read as `String`. - JSONEachRow text reader: Can stream `JSONEachRow` responses through a caller-supplied `JsonParser`, with Jackson and Gson parser factory implementations available as optional classpath dependencies, and infers a best-effort schema from the first row. - Data type conversion: Maps ClickHouse types to Java values for binary reads, POJO binding, and SQL parameter formatting, including date/time handling. - Geometry type support: For ClickHouse `25.11+`, where `Geometry` changed from a string alias to `Variant(Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon)`, the client reads and writes `Geometry` values through generic records, binary readers, POJO binding, and SQL parameter formatting, using Java array dimensionality to represent the geometry shape. @@ -47,6 +48,7 @@ Compatibility-sensitive traits: - Certificate-as-content support is compatibility-sensitive: any certificate or key value containing a PEM begin marker (`-----BEGIN`) is treated as inline PEM content, otherwise it is treated as a file path (also searched in the home directory and on the classpath). - JSONEachRow reading depends on the selected parser factory and request format settings: parser materialization determines Java value types, the reader infers minimal schema from the first row, and JSON number server settings are applied only when `QuerySettings` resolves to `ClickHouseFormat.JSONEachRow` and `json_disable_number_quoting` is enabled. - JSONEachRow schema inference is intentionally best-effort: scalar values use Java-to-ClickHouse type mappings, while JSON arrays and objects are identified structurally as `Array` and `Map`. For arrays, maps, and some nested or ambiguous values, the inferred type may not include the most specific element, key, value, or nested ClickHouse type. +- Binary string support is scope-sensitive and off by default: it only changes how top-level `String`/`FixedString` columns are read (to `StringValue` instead of `String`); nested string values are always read as `String`. `StringValue` is intentionally a mutable, zero-copy holder — it wraps (does not copy) the supplied bytes and `toByteArray()` returns the live backing array, so callers needing an independent snapshot must copy the bytes themselves. Only heap buffers are supported; constructing from a direct (off-heap) buffer is rejected. ## `jdbc-v2` @@ -66,6 +68,7 @@ Compatibility-sensitive traits: - SQL parsing and classification: Classifies SQL to distinguish queries, updates, inserts, `USE`, and role-changing statements, with selectable parser backends. - JDBC escape processing: Translates supported JDBC escape syntax for dates, timestamps, and functions before execution. - Result set streaming: Streams result sets from ClickHouse binary formats and `FORMAT JSONEachRow`, enforces max-row limits, and manages result-set lifecycle correctly. +- Binary string reads: `ResultSet#getBytes(int|String)` and `ResultSet#getBinaryStream(int|String)` return the raw bytes of a `String`/`FixedString` column. Combined with the `binary_string_support` connection property, non-UTF-8/binary content stored in `String` columns round-trips byte-for-byte; `NULL` values report `null` with `wasNull()` set. - Result-set metadata: Exposes JDBC `ResultSetMetaData` backed by ClickHouse column schema. - Database metadata: Implements JDBC `DatabaseMetaData` for ClickHouse catalogs, schemas, tables, columns, and related capability reporting. - Parameter metadata: Reports prepared-statement parameter counts. @@ -84,6 +87,7 @@ Compatibility-sensitive traits: - String parameters are escaped with backslash-based escaping: backslashes are doubled and single quotes are backslash-escaped before values are wrapped in single quotes. - `?` placeholder detection is SQL-aware and should not treat question marks inside quoted strings, quoted identifiers, comments, casts, or similar syntax as bind parameters. - String-like ClickHouse values have stable JDBC expectations: `String`, `FixedString`, and `Enum` values are returned as strings, while `UUID` is available both as `getString()` and `getObject(..., UUID.class)`. +- Binary access to `String`/`FixedString` columns is compatibility-sensitive: `getBytes(...)` and `getBinaryStream(...)` expose the raw column bytes (not a re-encoded text literal), and a `NULL` column returns `null` with `wasNull()` reporting `true`. The `binary_string_support` connection property is passed through to the underlying `client-v2` transport. - `Geometry` has a stable JDBC mapping: metadata reports SQL type `ARRAY` with type name `Geometry`, read paths return nested Java arrays rather than custom wrappers, and write paths depend on the caller preserving the intended point/array nesting shape. - JDBC `Geometry` writes share the same ambiguity as the client serializer: variant selection is inferred from nesting depth, so `Ring` versus `LineString` and `Polygon` versus `MultiLineString` are not currently distinguishable when writing through the generic `Geometry` path. - JDBC `FORMAT JSONEachRow` support is opt-in through the `jdbc_json_parser_factory` driver property, whose value must be a fully-qualified `JsonParserFactory` class name with a public no-argument constructor; JSONEachRow numeric and structured value behavior follows the selected parser and configured server output settings. Inferred JSON arrays are returned from `ResultSet.getObject(...)` as parser-native `List` values rather than JDBC `Array` values because JSONEachRow does not include element metadata. JDBC temporal typed accessors such as `getTimestamp(...)` are not guaranteed for JSONEachRow result sets; callers that need stable JDBC temporal conversions should use the binary default format or perform application-level conversion from string/object values. From 5335e5d7f806ddf70b52c99f9819ae4ec92c3cca Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Mon, 29 Jun 2026 23:13:14 -0700 Subject: [PATCH 06/10] Updated implementation for MapBackedRecord to match abstract reader logic --- .../internal/MapBackedRecord.java | 2 +- .../api/data_formats/StringValueTests.java | 112 ++++++++++++++++++ .../clickhouse/jdbc/JdbcDataTypeTests.java | 4 +- 3 files changed, 115 insertions(+), 3 deletions(-) diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java index a8708cbfb..4049009d4 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java @@ -476,7 +476,7 @@ public List getList(int index) { @Override public byte[] getByteArray(int index) { - return getPrimitiveArray(schema.columnIndexToName(index)); + return getByteArray(schema.columnIndexToName(index)); } @Override diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java index 144a0228c..5ce60aa7b 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -2,7 +2,9 @@ import com.clickhouse.client.api.data_formats.internal.AbstractBinaryFormatReader; import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; +import com.clickhouse.client.api.data_formats.internal.MapBackedRecord; import com.clickhouse.client.api.data_formats.internal.SerializerUtils; +import com.clickhouse.client.api.query.QuerySettings; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.format.BinaryStreamUtils; import org.testng.Assert; @@ -15,6 +17,8 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.TimeZone; @@ -313,4 +317,112 @@ public void testWriteByteArrayToFixedStringRoundTrip() throws IOException { StringValue read = reader(baos.toByteArray(), true).readValue(column); Assert.assertEquals(read.toByteArray(), binary); } + + // ---- MapBackedRecord (queryAll materialization) ---- + + /** + * Materializes a single-row {@link MapBackedRecord} the same way {@code Client.queryAll(...)} does: + * a {@code RowBinaryWithNamesAndTypes} stream is read into a map and wrapped together with the + * reader's converters and schema. {@code binaryStringSupport} mirrors the client property. + */ + private static MapBackedRecord materializeRow(String[] names, String[] types, Object[] values, + boolean binaryStringSupport) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(out, names.length); + for (String name : names) { + BinaryStreamUtils.writeString(out, name); + } + for (String type : types) { + BinaryStreamUtils.writeString(out, type); + } + for (int i = 0; i < names.length; i++) { + SerializerUtils.serializeData(out, values[i], ClickHouseColumn.of(names[i], types[i])); + } + + RowBinaryWithNamesAndTypesFormatReader reader = new RowBinaryWithNamesAndTypesFormatReader( + new ByteArrayInputStream(out.toByteArray()), + new QuerySettings().setUseTimeZone(TimeZone.getTimeZone("UTC").toZoneId().getId()), + new BinaryStreamReader.CachingByteBufferAllocator(), null, binaryStringSupport); + + Map record = new LinkedHashMap<>(); + Assert.assertTrue(reader.readRecord(record), "Expected a row to be read"); + return new MapBackedRecord(record, reader.getConvertions(), reader.getSchema()); + } + + @Test + public void testMapBackedRecordReadsStringAsStringValueWhenEnabled() throws IOException { + byte[] binary = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0x00, (byte) 0xBE, (byte) 0xEF}; + MapBackedRecord record = materializeRow(new String[]{"s"}, new String[]{"String"}, + new Object[]{binary}, true); + + // getObject exposes the raw holder + Object value = record.getObject("s"); + Assert.assertTrue(value instanceof StringValue, "Expected StringValue but got " + value.getClass()); + Assert.assertEquals(((StringValue) value).toByteArray(), binary); + + // getByteArray must preserve the raw bytes through both the label and index overloads + Assert.assertEquals(record.getByteArray("s"), binary, "getByteArray(name) must preserve raw bytes"); + Assert.assertEquals(record.getByteArray(1), binary, "getByteArray(index) must preserve raw bytes"); + } + + @Test + public void testMapBackedRecordGetByteArrayIndexAndLabelParity() throws IOException { + // The index and label overloads of getByteArray must behave identically for a String column. + byte[] binary = new byte[]{(byte) 0x01, (byte) 0x80, (byte) 0xFF, (byte) 0x00}; + MapBackedRecord record = materializeRow(new String[]{"s"}, new String[]{"String"}, + new Object[]{binary}, true); + Assert.assertEquals(record.getByteArray(1), record.getByteArray("s")); + Assert.assertEquals(record.getByteArray(1), binary); + } + + @Test + public void testMapBackedRecordFixedStringByteArray() throws IOException { + byte[] binary = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC}; + MapBackedRecord record = materializeRow(new String[]{"s"}, new String[]{"FixedString(3)"}, + new Object[]{binary}, true); + + Assert.assertTrue(record.getObject("s") instanceof StringValue); + Assert.assertEquals(record.getByteArray("s"), binary); + Assert.assertEquals(record.getByteArray(1), binary); + } + + @Test + public void testMapBackedRecordGetStringDecodesStringValue() throws IOException { + String text = "Привет, мир"; + MapBackedRecord record = materializeRow(new String[]{"s"}, new String[]{"String"}, + new Object[]{text}, true); + Assert.assertEquals(record.getString("s"), text); + Assert.assertEquals(record.getString(1), text); + } + + @Test + public void testMapBackedRecordNestedArrayStaysString() throws IOException { + // Even with the feature enabled, nested Array(String) elements stay String; getStringArray must + // work through both overloads and getList must expose String elements. + String[] elements = {"plain", "Привет", ""}; + MapBackedRecord record = materializeRow(new String[]{"a"}, new String[]{"Array(String)"}, + new Object[]{elements}, true); + + Assert.assertEquals(record.getStringArray("a"), elements); + Assert.assertEquals(record.getStringArray(1), elements); + + List list = record.getList("a"); + Assert.assertEquals(list.size(), elements.length); + for (Object element : list) { + Assert.assertTrue(element instanceof String, "Nested array element must be a String"); + } + } + + @Test + public void testMapBackedRecordDefaultReturnsString() throws IOException { + // Default behavior (feature off): top-level Strings are plain String and byte access re-encodes as UTF-8. + String text = "ascii text"; + MapBackedRecord record = materializeRow(new String[]{"s"}, new String[]{"String"}, + new Object[]{text}, false); + + Assert.assertTrue(record.getObject("s") instanceof String); + Assert.assertEquals(record.getString("s"), text); + Assert.assertEquals(record.getByteArray("s"), text.getBytes(StandardCharsets.UTF_8)); + Assert.assertEquals(record.getByteArray(1), text.getBytes(StandardCharsets.UTF_8)); + } } diff --git a/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java b/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java index 30b2f2ccd..16f7df71b 100644 --- a/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java +++ b/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java @@ -1613,14 +1613,14 @@ public void testBinaryStringSupportGetBinaryStream() throws Exception { ResultSet rs = stmt.executeQuery("SELECT * FROM test_binary_string_stream ORDER BY id")) { assertTrue(rs.next()); - // by column label + // by column label (delegates to the index-based implementation) try (InputStream stream = rs.getBinaryStream("str")) { Assert.assertNotNull(stream); assertEqualsToClickHouseLogo(stream.readAllBytes()); } assertFalse(rs.wasNull()); - // by column index (delegates to the label-based implementation) + // by column index try (InputStream stream = rs.getBinaryStream(2)) { Assert.assertNotNull(stream); assertEqualsToClickHouseLogo(stream.readAllBytes()); From c5b3173ff41c9703b365b85ae48feb4c5d2c51f3 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Tue, 30 Jun 2026 08:06:47 -0700 Subject: [PATCH 07/10] Added more tests --- .../RowBinaryFormatStringWriteTest.java | 100 +++++++++ .../api/data_formats/StringValueTests.java | 199 ++++++++++++++++++ .../internal/SerializerUtilsTest.java | 75 +++++++ .../datatypes/RowBinaryFormatWriterTest.java | 56 ++--- 4 files changed, 405 insertions(+), 25 deletions(-) create mode 100644 client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java new file mode 100644 index 000000000..da33964fc --- /dev/null +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java @@ -0,0 +1,100 @@ +package com.clickhouse.client.api.data_formats; + +import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; +import com.clickhouse.client.api.metadata.TableSchema; +import com.clickhouse.data.ClickHouseColumn; +import com.clickhouse.data.ClickHouseFormat; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.TimeZone; + +/** + * Unit coverage for the {@code writeString} helpers on {@link RowBinaryFormatSerializer} and the + * {@code setString(byte[])} overloads on {@link RowBinaryFormatWriter}, which the integration tests do + * not exercise in isolation. + */ +public class RowBinaryFormatStringWriteTest { + + private static final ClickHouseColumn STRING_COLUMN = ClickHouseColumn.of("s", "String"); + + private static BinaryStreamReader reader(byte[] data) { + return new BinaryStreamReader(new ByteArrayInputStream(data), TimeZone.getTimeZone("UTC"), null, + new BinaryStreamReader.DefaultByteBufferAllocator(), false, null, true); + } + + @Test + public void testSerializerWriteString() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + new RowBinaryFormatSerializer(out).writeString("hello world"); + + StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); + Assert.assertEquals(read.asString(), "hello world"); + } + + @Test + public void testSerializerWriteStringUnicode() throws IOException { + String value = "Привет 你好 🚀"; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + new RowBinaryFormatSerializer(out).writeString(value); + + StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); + Assert.assertEquals(read.asString(), value); + } + + @Test + public void testSerializerWriteStringFromBytesPreservesBinary() throws IOException { + byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80, (byte) 0x7F, (byte) 0xAB}; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + new RowBinaryFormatSerializer(out).writeString(binary); + + StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriterSetStringBytesByName() throws IOException { + byte[] binary = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0x00, (byte) 0xBE, (byte) 0xEF}; + ByteArrayOutputStream out = writeSingleStringRow(w -> w.setString("s", binary)); + + StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriterSetStringBytesByIndex() throws IOException { + byte[] binary = new byte[]{(byte) 0x01, (byte) 0x02, (byte) 0xFE, (byte) 0xFF, (byte) 0x00}; + ByteArrayOutputStream out = writeSingleStringRow(w -> w.setString(1, binary)); + + StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriterSetStringBytesUtf8Content() throws IOException { + String text = "ascii and Юникод"; + byte[] binary = text.getBytes(StandardCharsets.UTF_8); + ByteArrayOutputStream out = writeSingleStringRow(w -> w.setString("s", binary)); + + StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); + Assert.assertEquals(read.asString(), text); + } + + private interface RowWriter { + void write(RowBinaryFormatWriter writer); + } + + private static ByteArrayOutputStream writeSingleStringRow(RowWriter rowWriter) throws IOException { + TableSchema schema = new TableSchema(Collections.singletonList(STRING_COLUMN)); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + RowBinaryFormatWriter writer = new RowBinaryFormatWriter(out, schema, ClickHouseFormat.RowBinary); + rowWriter.write(writer); + writer.commitRow(); + return out; + } +} diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java index 5ce60aa7b..bf67f4856 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -4,6 +4,7 @@ import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; import com.clickhouse.client.api.data_formats.internal.MapBackedRecord; import com.clickhouse.client.api.data_formats.internal.SerializerUtils; +import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.QuerySettings; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.format.BinaryStreamUtils; @@ -17,6 +18,9 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -318,6 +322,32 @@ public void testWriteByteArrayToFixedStringRoundTrip() throws IOException { Assert.assertEquals(read.toByteArray(), binary); } + @Test + public void testWriteStringValueToFixedStringRoundTrip() throws IOException { + // FixedString serialization must accept a StringValue and write its raw bytes (padded to the + // column width), mirroring the byte[] branch. + byte[] binary = new byte[]{(byte) 0x10, (byte) 0x20, (byte) 0x00}; + StringValue value = new StringValue(binary); + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(3)"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, value, column); + StringValue read = reader(baos.toByteArray(), true).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriteStringValueToFixedStringPadsShorterValue() throws IOException { + // A StringValue shorter than the column width is right-padded with zero bytes. + byte[] binary = new byte[]{(byte) 0xAB, (byte) 0xCD}; + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(5)"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, new StringValue(binary), column); + StringValue read = reader(baos.toByteArray(), true).readValue(column); + Assert.assertEquals(read.toByteArray(), new byte[]{(byte) 0xAB, (byte) 0xCD, 0, 0, 0}); + } + // ---- MapBackedRecord (queryAll materialization) ---- /** @@ -425,4 +455,173 @@ public void testMapBackedRecordDefaultReturnsString() throws IOException { Assert.assertEquals(record.getByteArray("s"), text.getBytes(StandardCharsets.UTF_8)); Assert.assertEquals(record.getByteArray(1), text.getBytes(StandardCharsets.UTF_8)); } + + // ---- getStringArray over arrays whose items are StringValue ---- + + private static BinaryStreamReader.ArrayValue stringValueArray() { + // Build an array whose item type is StringValue (the branch that is not reachable through the + // wire, where nested array elements are always decoded as String). A null element exercises the + // null-guard inside the StringValue mapping. + BinaryStreamReader.ArrayValue array = new BinaryStreamReader.ArrayValue(StringValue.class, 3); + array.set(0, new StringValue("plain".getBytes(StandardCharsets.UTF_8))); + array.set(1, new StringValue("Привет".getBytes(StandardCharsets.UTF_8))); + array.set(2, null); + return array; + } + + @Test + public void testAbstractReaderGetStringArrayFromStringValueItems() { + TableSchema schema = new TableSchema( + Collections.singletonList(ClickHouseColumn.of("a", "Array(String)"))); + InjectableReader reader = new InjectableReader(schema); + reader.setCurrentRecord(new Object[]{stringValueArray()}); + + String[] expected = {"plain", "Привет", null}; + Assert.assertEquals(reader.getStringArray(1), expected); + Assert.assertEquals(reader.getStringArray("a"), expected); + } + + @Test + public void testMapBackedRecordGetStringArrayFromStringValueItems() { + MapBackedRecord record = singleColumnRecord("a", "Array(String)", stringValueArray()); + + String[] expected = {"plain", "Привет", null}; + Assert.assertEquals(record.getStringArray("a"), expected); + Assert.assertEquals(record.getStringArray(1), expected); + } + + // ---- MapBackedRecord.getByteArray edge cases ---- + + @Test + public void testMapBackedRecordGetByteArrayReturnsNullForNullValue() { + // A null value must short-circuit before any string-like or primitive-array handling. + MapBackedRecord record = singleColumnRecord("s", "String", null); + Assert.assertNull(record.getByteArray("s")); + Assert.assertNull(record.getByteArray(1)); + } + + @Test + public void testMapBackedRecordGetByteArrayFromPrimitiveArray() { + // A non-string, primitive Array(Int8) value is not string-like, so getByteArray falls through to + // getPrimitiveArray and returns the backing byte[] (the branch the string-based tests never reach). + BinaryStreamReader.ArrayValue array = new BinaryStreamReader.ArrayValue(byte.class, 3); + array.set(0, (byte) 1); + array.set(1, (byte) -2); + array.set(2, (byte) 127); + MapBackedRecord record = singleColumnRecord("b", "Array(Int8)", array); + + byte[] expected = {(byte) 1, (byte) -2, (byte) 127}; + Assert.assertEquals(record.getByteArray("b"), expected); + Assert.assertEquals(record.getByteArray(1), expected); + } + + // ---- Nested structures round-tripped through the real read path ---- + + @Test + public void testAbstractReaderArrayOfArrayOfString() throws IOException { + List> value = Arrays.asList( + Arrays.asList("a", "b"), + Collections.singletonList("c"), + Collections.emptyList()); + RowBinaryWithNamesAndTypesFormatReader reader = readerForRow( + new String[]{"a"}, new String[]{"Array(Array(String))"}, new Object[]{value}, true); + + Assert.assertEquals(reader.getList("a"), value); + + Object[] outer = reader.getObjectArray("a"); + Assert.assertEquals(outer.length, 3); + Assert.assertEquals((Object[]) outer[0], new Object[]{"a", "b"}); + Assert.assertEquals((Object[]) outer[1], new Object[]{"c"}); + Assert.assertEquals((Object[]) outer[2], new Object[0]); + } + + @Test + public void testMapBackedRecordArrayOfArrayOfString() throws IOException { + List> value = Arrays.asList( + Arrays.asList("a", "b"), + Collections.singletonList("c"), + Collections.emptyList()); + MapBackedRecord record = materializeRow( + new String[]{"a"}, new String[]{"Array(Array(String))"}, new Object[]{value}, true); + + Assert.assertEquals(record.getList("a"), value); + + Object[] outer = record.getObjectArray("a"); + Assert.assertEquals(outer.length, 3); + Assert.assertEquals((Object[]) outer[0], new Object[]{"a", "b"}); + Assert.assertEquals((Object[]) outer[1], new Object[]{"c"}); + Assert.assertEquals((Object[]) outer[2], new Object[0]); + } + + @Test + public void testAbstractReaderTupleStringIntString() throws IOException { + List value = Arrays.asList("first", 7, "third"); + RowBinaryWithNamesAndTypesFormatReader reader = readerForRow( + new String[]{"t"}, new String[]{"Tuple(String, Int32, String)"}, new Object[]{value}, true); + + Assert.assertEquals(reader.getTuple("t"), new Object[]{"first", 7, "third"}); + Assert.assertEquals(reader.getTuple(1), new Object[]{"first", 7, "third"}); + } + + @Test + public void testMapBackedRecordTupleStringIntString() throws IOException { + List value = Arrays.asList("first", 7, "third"); + MapBackedRecord record = materializeRow( + new String[]{"t"}, new String[]{"Tuple(String, Int32, String)"}, new Object[]{value}, true); + + Assert.assertEquals(record.getTuple("t"), new Object[]{"first", 7, "third"}); + Assert.assertEquals(record.getTuple(1), new Object[]{"first", 7, "third"}); + } + + /** + * Serializes a single row and returns a positioned {@link RowBinaryWithNamesAndTypesFormatReader} + * (which is an {@link AbstractBinaryFormatReader}) so its accessors can be exercised over real data. + */ + private static RowBinaryWithNamesAndTypesFormatReader readerForRow(String[] names, String[] types, + Object[] values, boolean binaryStringSupport) + throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(out, names.length); + for (String name : names) { + BinaryStreamUtils.writeString(out, name); + } + for (String type : types) { + BinaryStreamUtils.writeString(out, type); + } + for (int i = 0; i < names.length; i++) { + SerializerUtils.serializeData(out, values[i], ClickHouseColumn.of(names[i], types[i])); + } + + RowBinaryWithNamesAndTypesFormatReader reader = new RowBinaryWithNamesAndTypesFormatReader( + new ByteArrayInputStream(out.toByteArray()), + new QuerySettings().setUseTimeZone(TimeZone.getTimeZone("UTC").toZoneId().getId()), + new BinaryStreamReader.CachingByteBufferAllocator(), null, binaryStringSupport); + Assert.assertNotNull(reader.next(), "Expected a row to be read"); + return reader; + } + + private static MapBackedRecord singleColumnRecord(String name, String type, Object value) { + TableSchema schema = new TableSchema(Collections.singletonList(ClickHouseColumn.of(name, type))); + Map record = new HashMap<>(); + record.put(name, value); + return new MapBackedRecord(record, new Map[]{null}, schema); + } + + /** + * Minimal concrete {@link AbstractBinaryFormatReader} that lets a test inject a materialized record + * directly, so accessor branches can be exercised without a live binary stream. + */ + private static final class InjectableReader extends AbstractBinaryFormatReader { + InjectableReader(TableSchema schema) { + super(new ByteArrayInputStream(new byte[0]), + new QuerySettings().setUseTimeZone("UTC"), + schema, + new BinaryStreamReader.DefaultByteBufferAllocator(), + NO_TYPE_HINT_MAPPING); + } + + void setCurrentRecord(Object[] record) { + this.currentRecord = record; + } + } } diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java index eb2a83fb8..1a94e9fe9 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java @@ -252,6 +252,81 @@ private static Map newMap(Object... kv) { return map; } + @Test + public void testReadNestedReadsArrayOfTuples() throws Exception { + ClickHouseColumn nested = ClickHouseColumn.of("n", "Nested(a String, b Int32)"); + List fields = nested.getNestedColumns(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + SerializerUtils.writeVarInt(out, 2); + SerializerUtils.serializeData(out, "x", fields.get(0)); + SerializerUtils.serializeData(out, 1, fields.get(1)); + SerializerUtils.serializeData(out, "y", fields.get(0)); + SerializerUtils.serializeData(out, 2, fields.get(1)); + + BinaryStreamReader.ArrayValue array = newReader(out.toByteArray()).readNested(nested); + Assert.assertEquals(array.length(), 2); + Assert.assertEquals((Object[]) array.get(0), new Object[]{"x", 1}); + Assert.assertEquals((Object[]) array.get(1), new Object[]{"y", 2}); + } + + @Test + public void testReadNestedEmpty() throws Exception { + ClickHouseColumn nested = ClickHouseColumn.of("n", "Nested(a String, b Int32)"); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + SerializerUtils.writeVarInt(out, 0); + + BinaryStreamReader.ArrayValue array = newReader(out.toByteArray()).readNested(nested); + Assert.assertEquals(array.length(), 0); + } + + @Test + public void testReadValueOnNestedColumnReturnsArrayOfTuples() throws Exception { + ClickHouseColumn nested = ClickHouseColumn.of("n", "Nested(a String, b Int32)"); + List fields = nested.getNestedColumns(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + SerializerUtils.writeVarInt(out, 1); + SerializerUtils.serializeData(out, "only", fields.get(0)); + SerializerUtils.serializeData(out, 42, fields.get(1)); + + Object value = newReader(out.toByteArray()).readValue(nested); + Assert.assertTrue(value instanceof BinaryStreamReader.ArrayValue, + "Nested column must read back as an ArrayValue"); + BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value; + Assert.assertEquals(array.length(), 1); + Assert.assertEquals((Object[]) array.get(0), new Object[]{"only", 42}); + } + + @Test + public void testWriteFixedStringBytesPadsShorterValue() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + SerializerUtils.writeFixedStringBytes(out, new byte[]{1, 2}, 5); + Assert.assertEquals(out.toByteArray(), new byte[]{1, 2, 0, 0, 0}); + } + + @Test + public void testWriteFixedStringBytesWritesExactLength() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + SerializerUtils.writeFixedStringBytes(out, new byte[]{1, 2, 3}, 3); + Assert.assertEquals(out.toByteArray(), new byte[]{1, 2, 3}); + } + + @Test + public void testWriteFixedStringBytesEmptyValueIsAllPadding() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + SerializerUtils.writeFixedStringBytes(out, new byte[0], 3); + Assert.assertEquals(out.toByteArray(), new byte[]{0, 0, 0}); + } + + @Test + public void testWriteFixedStringBytesRejectsValueLongerThanLength() { + Assert.assertThrows(IllegalArgumentException.class, + () -> SerializerUtils.writeFixedStringBytes(new ByteArrayOutputStream(), + new byte[]{1, 2, 3, 4}, 3)); + } + private void assertCustomGeoTypeTag(String typeName) throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); SerializerUtils.writeDynamicTypeTag(out, ClickHouseColumn.of("v", typeName)); diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index b355cb785..700894d6f 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -391,52 +391,58 @@ public void writeBinaryStringsTest() throws Exception { " fixed_string_one FixedString(1) " + " ) Engine = MergeTree ORDER BY id"; - byte[] binaryData = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, (byte) 0x00, (byte) 0xFF, (byte) 0x80}; - byte[] fixedStringData = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC, (byte) 0xDD, (byte) 0xEE}; - byte[] fixedStringOneData = new byte[]{(byte) 0x7F}; + // Row 1 is written via setValue(byte[]), row 2 via setString(byte[]); use distinct + // payloads per row so the rows are not identical (identical rows would be collapsed + // by insert deduplication on Cloud's replicated/shared engines). + byte[] binaryData1 = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, (byte) 0x00, (byte) 0xFF, (byte) 0x80}; + byte[] fixedStringData1 = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC, (byte) 0xDD, (byte) 0xEE}; + byte[] fixedStringOneData1 = new byte[]{(byte) 0x7F}; + + byte[] binaryData2 = new byte[]{(byte) 0x01, (byte) 0x02, (byte) 0x03, (byte) 0xFE, (byte) 0xFD, (byte) 0x00, (byte) 0x7F}; + byte[] fixedStringData2 = new byte[]{(byte) 0x11, (byte) 0x22, (byte) 0x33, (byte) 0x44, (byte) 0x55}; + byte[] fixedStringOneData2 = new byte[]{(byte) 0x01}; // Instead of writeTest which reads back using default string decoding, we write manually // and query back using typeHintMapping to preserve raw bytes initTable(tableName, tableCreate, new CommandSettings()); TableSchema schema = client.getTableSchema(tableName); + // Write both rows in a single insert: row 1 exercises setValue(byte[]) and row 2 setString(byte[]). ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults; try (InsertResponse response = client.insert(tableName, out -> { RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); w.setValue(schema.nameToColumnIndex("id"), 1); - w.setValue(schema.nameToColumnIndex("string"), binaryData); - w.setValue(schema.nameToColumnIndex("fixed_string"), fixedStringData); - w.setValue(schema.nameToColumnIndex("fixed_string_one"), fixedStringOneData); + w.setValue(schema.nameToColumnIndex("string"), binaryData1); + w.setValue(schema.nameToColumnIndex("fixed_string"), fixedStringData1); + w.setValue(schema.nameToColumnIndex("fixed_string_one"), fixedStringOneData1); w.commitRow(); - }, format, settings).get()) { - System.out.println("Rows written (Field-like): " + response.getWrittenRows()); - } - // Also test inserting with byte[] directly via RowBinaryFormatWriter - try (InsertResponse response = client.insert(tableName, out -> { - RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); w.setValue(schema.nameToColumnIndex("id"), 2); - w.setString("string", binaryData); - w.setString("fixed_string", fixedStringData); - w.setString("fixed_string_one", fixedStringOneData); + w.setString("string", binaryData2); + w.setString("fixed_string", fixedStringData2); + w.setString("fixed_string_one", fixedStringOneData2); w.commitRow(); }, format, settings).get()) { - System.out.println("Rows written (manual): " + response.getWrittenRows()); + System.out.println("Rows written: " + response.getWrittenRows()); } - + Client customClient = newClient() .binaryStringSupport(true) .build(); - + List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id" ); assertEquals(records.size(), 2); - - for (GenericRecord record : records) { - org.testng.Assert.assertEquals(record.getByteArray("string"), binaryData); - org.testng.Assert.assertEquals(record.getByteArray("fixed_string"), fixedStringData); - org.testng.Assert.assertEquals(record.getByteArray("fixed_string_one"), fixedStringOneData); - } - + + GenericRecord row1 = records.get(0); + org.testng.Assert.assertEquals(row1.getByteArray("string"), binaryData1); + org.testng.Assert.assertEquals(row1.getByteArray("fixed_string"), fixedStringData1); + org.testng.Assert.assertEquals(row1.getByteArray("fixed_string_one"), fixedStringOneData1); + + GenericRecord row2 = records.get(1); + org.testng.Assert.assertEquals(row2.getByteArray("string"), binaryData2); + org.testng.Assert.assertEquals(row2.getByteArray("fixed_string"), fixedStringData2); + org.testng.Assert.assertEquals(row2.getByteArray("fixed_string_one"), fixedStringOneData2); + customClient.close(); } From 6262ef971b1ce0a347cee9d1b75b0f551e93a892 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Tue, 30 Jun 2026 08:53:18 -0700 Subject: [PATCH 08/10] Fixed issue with POJO reading when binary strings are enabled --- .../client/api/data_formats/StringValue.java | 10 ++- .../internal/SerializerUtils.java | 64 ++++++++++++++- .../api/data_formats/StringValueTests.java | 79 +++++++++++++++++++ docs/features.md | 2 +- .../com/clickhouse/jdbc/ResultSetImpl.java | 1 + 5 files changed, 153 insertions(+), 3 deletions(-) diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java index 3ca94c923..8b67e34d3 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java @@ -6,9 +6,17 @@ import java.util.Objects; /** - * Holder for ClickHouse {@code String} or {@code FixedString} values that preserves raw bytes + * Read-time holder for ClickHouse {@code String} or {@code FixedString} values that preserves raw bytes * to avoid lossy decoding and unnecessary allocations. *

+ * This is an internal value holder, not a general-purpose type for user code. It is produced only + * by the read path when the binary-string feature is enabled (for example {@code GenericRecord.getObject} + * or {@code BinaryStreamReader.readValue} without a type hint), so that callers that need exact bytes can + * obtain them via {@link #toByteArray()} and callers that need text can decode via {@link #asString()}. + * It is not a supported field type for POJO binding: declare POJO fields for String/FixedString + * columns as {@link String} or {@code byte[]} instead. Normal application code should generally consume + * {@link String} or {@code byte[]} rather than holding onto a {@code StringValue}. + *

* This is a mutable structure and must be used with care. To avoid copying, it does not * duplicate the bytes it is given: the constructor wraps the supplied array/buffer instead of * copying it, and {@link #toByteArray()} returns a direct reference to the backing array rather diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java index 60b6e3d62..355fffd5e 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java @@ -27,6 +27,7 @@ import java.math.BigInteger; import java.net.Inet4Address; import java.net.Inet6Address; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.time.Duration; import java.time.Instant; @@ -944,6 +945,48 @@ public static String convertToString(Object value) { return java.lang.String.valueOf(value); } + /** + * Converts a value read for a top-level {@code String}/{@code FixedString} column into a {@link String} + * for POJO binding. With the binary-string feature enabled the reader produces a {@link StringValue} + * holder (a read-time value holder, not a POJO field type); otherwise it produces a plain {@link String}. + * A {@code null} value (e.g. a SQL {@code NULL}) is passed through. Used from the bytecode generated by + * {@link #compilePOJOSetter(Method, ClickHouseColumn)}. + * + * @param value value returned by {@code BinaryStreamReader.readValue} + * @return the decoded string, or {@code null} + */ + public static String stringValueToString(Object value) { + if (value == null) { + return null; + } + if (value instanceof StringValue) { + return ((StringValue) value).asString(); + } + return (String) value; + } + + /** + * Converts a value read for a top-level {@code String}/{@code FixedString} column into the raw bytes for + * POJO binding. A {@link StringValue} yields its preserved bytes (no lossy decoding); a plain + * {@link String} is encoded as UTF-8, matching the historical behaviour. A {@code null} value is passed + * through. Used from the bytecode generated by {@link #compilePOJOSetter(Method, ClickHouseColumn)}. + * + * @param value value returned by {@code BinaryStreamReader.readValue} + * @return the raw bytes, or {@code null} + */ + public static byte[] stringValueToByteArray(Object value) { + if (value == null) { + return null; + } + if (value instanceof StringValue) { + return ((StringValue) value).toByteArray(); + } + if (value instanceof String) { + return ((String) value).getBytes(StandardCharsets.UTF_8); + } + return (byte[]) value; + } + /** * Writes raw bytes as a ClickHouse {@code FixedString(length)} value. The bytes are written as-is and * right-padded with zero bytes when shorter than {@code length}. @@ -1073,7 +1116,26 @@ public static POJOFieldDeserializer compilePOJOSetter(Method setterMethod, Click Type.getType(Class.class)), false); - if (List.class.isAssignableFrom(targetType) && column.getDataType() == ClickHouseDataType.Tuple) { + boolean stringLikeColumn = column.getDataType() == ClickHouseDataType.String + || column.getDataType() == ClickHouseDataType.FixedString; + + if (stringLikeColumn && targetType == String.class) { + // With binary-string support enabled readValue yields a StringValue holder; convert it (or a + // plain String when the feature is off) to the String the setter expects. StringValue is a + // read-time holder, not a supported POJO field type. + mv.visitMethodInsn(INVOKESTATIC, + Type.getInternalName(SerializerUtils.class), + "stringValueToString", + Type.getMethodDescriptor(Type.getType(String.class), Type.getType(Object.class)), + false); + } else if (stringLikeColumn && targetType == byte[].class) { + // Convert the StringValue (or plain String) to raw bytes for a byte[] field. + mv.visitMethodInsn(INVOKESTATIC, + Type.getInternalName(SerializerUtils.class), + "stringValueToByteArray", + Type.getMethodDescriptor(Type.getType(byte[].class), Type.getType(Object.class)), + false); + } else if (List.class.isAssignableFrom(targetType) && column.getDataType() == ClickHouseDataType.Tuple) { mv.visitTypeInsn(CHECKCAST, Type.getInternalName(Object[].class)); mv.visitMethodInsn(INVOKESTATIC, Type.getInternalName(Arrays.class), diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java index bf67f4856..3086afe24 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -6,6 +6,7 @@ import com.clickhouse.client.api.data_formats.internal.SerializerUtils; import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.QuerySettings; +import com.clickhouse.client.api.serde.POJOFieldDeserializer; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.format.BinaryStreamUtils; import org.testng.Assert; @@ -15,6 +16,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.lang.reflect.Method; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -286,6 +288,83 @@ public void testDefaultBehaviorReturnsString() throws IOException { Assert.assertEquals(read, "still a string"); } + // ---- POJO binding (queryAll/readToPOJO) over String columns with the feature enabled ---- + + /** + * Minimal POJO with the only two field representations supported for top-level String/FixedString + * columns: {@link String} and {@code byte[]}. {@link StringValue} is a read-time holder and is not a + * supported POJO field type. + */ + public static class StringPojo { + private String asString; + private byte[] asBytes; + + public void setAsString(String asString) { this.asString = asString; } + public void setAsBytes(byte[] asBytes) { this.asBytes = asBytes; } + } + + private static byte[] stringWire(byte[] value) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, value); + return baos.toByteArray(); + } + + private static POJOFieldDeserializer setterFor(String name, ClickHouseColumn column) throws Exception { + Method setter = StringPojo.class.getMethod(name, + name.equals("setAsString") ? String.class : byte[].class); + return SerializerUtils.compilePOJOSetter(setter, column); + } + + @Test + public void testPojoSetterStringFieldDecodesWhenFeatureEnabled() throws Exception { + // Regression: with the feature enabled the reader produces StringValue, but a String setter must + // still receive a decoded String (the compiled setter casts the readValue result to String). + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + byte[] wire = stringWire("hello".getBytes(StandardCharsets.UTF_8)); + + StringPojo pojo = new StringPojo(); + setterFor("setAsString", column).setValue(pojo, reader(wire, true), column); + Assert.assertEquals(pojo.asString, "hello"); + } + + @Test + public void testPojoSetterByteArrayFieldReceivesRawBytesWhenFeatureEnabled() throws Exception { + // A byte[] setter must receive the raw bytes (preserving non-UTF-8 content) instead of a StringValue. + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + byte[] binary = {(byte) 0xDE, (byte) 0xAD, (byte) 0x00, (byte) 0xBE, (byte) 0xEF}; + byte[] wire = stringWire(binary); + + StringPojo pojo = new StringPojo(); + setterFor("setAsBytes", column).setValue(pojo, reader(wire, true), column); + Assert.assertEquals(pojo.asBytes, binary); + } + + @Test + public void testPojoSetterFixedStringByteArrayFieldWhenFeatureEnabled() throws Exception { + // A byte[] setter over a FixedString column must receive the raw bytes, preserving binary content. + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(3)"); + byte[] binary = {(byte) 0xAA, (byte) 0xBB, (byte) 0xCC}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(binary); // FixedString(3) is exactly 3 raw bytes on the wire + + StringPojo pojo = new StringPojo(); + setterFor("setAsBytes", column).setValue(pojo, reader(baos.toByteArray(), true), column); + Assert.assertEquals(pojo.asBytes, binary); + } + + @Test + public void testPojoSetterFixedStringStringFieldWhenFeatureEnabled() throws Exception { + // A String setter over a FixedString column must receive the decoded String. + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(3)"); + byte[] bytes = "abc".getBytes(StandardCharsets.UTF_8); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(bytes); + + StringPojo pojo = new StringPojo(); + setterFor("setAsString", column).setValue(pojo, reader(baos.toByteArray(), true), column); + Assert.assertEquals(pojo.asString, "abc"); + } + // ---- Writing binary String values ---- @Test diff --git a/docs/features.md b/docs/features.md index 39d37c691..f03d0547a 100644 --- a/docs/features.md +++ b/docs/features.md @@ -16,7 +16,7 @@ This document lists stable, user-visible behavior in `client-v2` and `jdbc-v2` t - Parameterized SQL: Accepts named query parameters and can send them through supported HTTP request encodings. - Result materialization helpers: Provides streaming `Records`, generic row access, and convenience APIs that materialize all rows into generic records or typed POJOs. - Binary format readers: Reads ClickHouse binary result formats including `Native`, `RowBinary`, `RowBinaryWithNames`, and `RowBinaryWithNamesAndTypes`. -- Binary string support: Opt-in through the `binary_string_support` property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. When enabled, top-level `String` and `FixedString` columns are read as `StringValue`, which preserves the raw bytes (`toByteArray()`/`asByteBuffer()`) and lazily decodes a `String` (`asString()`), instead of eagerly decoding to a `String`. Strings nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`) are still read as `String`. +- Binary string support: Opt-in through the `binary_string_support` property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. When enabled, untyped reads (e.g. `GenericRecord.getObject(...)`/`BinaryStreamReader.readValue(...)`) of top-level `String` and `FixedString` columns return a `StringValue`, which preserves the raw bytes (`toByteArray()`/`asByteBuffer()`) and lazily decodes a `String` (`asString()`), instead of eagerly decoding to a `String`. `StringValue` is a read-time holder, not a supported POJO field type: typed `queryAll(...)`/`readToPOJO` binding still maps these columns to `String` (decoded) or `byte[]` (raw bytes) according to the POJO field type. Strings nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`) are still read as `String`. - JSONEachRow text reader: Can stream `JSONEachRow` responses through a caller-supplied `JsonParser`, with Jackson and Gson parser factory implementations available as optional classpath dependencies, and infers a best-effort schema from the first row. - Data type conversion: Maps ClickHouse types to Java values for binary reads, POJO binding, and SQL parameter formatting, including date/time handling. - Geometry type support: For ClickHouse `25.11+`, where `Geometry` changed from a string alias to `Variant(Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon)`, the client reads and writes `Geometry` values through generic records, binary readers, POJO binding, and SQL parameter formatting, using Java array dimensionality to represent the geometry shape. diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java index 08bd9be25..cd4406cb6 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/ResultSetImpl.java @@ -287,6 +287,7 @@ public InputStream getUnicodeStream(int columnIndex) throws SQLException { @Override public InputStream getBinaryStream(int columnIndex) throws SQLException { + checkClosed(); try { if (reader.hasValue(columnIndex)) { byte[] bytes = reader.getByteArray(columnIndex); From b35806df9a8041d8f34124dd80cf36a4ed42cfe8 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Tue, 30 Jun 2026 15:20:20 -0700 Subject: [PATCH 09/10] Redesigned to expose less. Added tests. Updated docs --- CHANGELOG.md | 4 +- .../com/clickhouse/client/api/Client.java | 20 ++-- .../client/api/ClientConfigProperties.java | 8 +- .../api/data_formats/NativeFormatReader.java | 9 +- .../data_formats/RowBinaryFormatReader.java | 11 +- ...owBinaryWithNamesAndTypesFormatReader.java | 10 +- .../RowBinaryWithNamesFormatReader.java | 11 +- .../internal/AbstractBinaryFormatReader.java | 9 +- .../internal/BinaryStreamReader.java | 17 ++- .../internal/MapBackedRecord.java | 1 - .../internal/SerializerUtils.java | 39 +++---- .../{ => internal}/StringValue.java | 74 ++++++------- .../api/internal/DataTypeConverter.java | 2 +- .../RowBinaryFormatStringWriteTest.java | 100 ------------------ .../internal/BaseReaderTests.java | 88 ++++++++++++++- .../internal/SerializerUtilsTest.java | 49 +++++++++ .../{ => internal}/StringValueTests.java | 48 +++++++-- .../client/datatypes/DataTypeTests.java | 78 ++++++++++++++ .../datatypes/RowBinaryFormatWriterTest.java | 99 ++++++++++++++++- .../clickhouse/client/insert/InsertTests.java | 14 +-- .../clickhouse/client/insert/SamplePOJO.java | 24 ----- docs/features.md | 4 +- .../clickhouse/jdbc/JdbcDataTypeTests.java | 58 ++++------ .../jdbc/metadata/DatabaseMetaDataTest.java | 62 +++++++++++ 24 files changed, 517 insertions(+), 322 deletions(-) rename client-v2/src/main/java/com/clickhouse/client/api/data_formats/{ => internal}/StringValue.java (61%) delete mode 100644 client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java rename client-v2/src/test/java/com/clickhouse/client/api/data_formats/{ => internal}/StringValueTests.java (93%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c1ee9047..cc916b92d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ - **[client-v2]** `Client.Builder#useBearerTokenAuth(String)` now stores the bearer token under the `access_token` configuration key (with the `Bearer ` prefix) instead of writing it directly into `http_header_authorization`. The HTTP wire format is unchanged, but the token is no longer observable through `Client#getReadOnlyConfig()` under the `http_header_authorization` key. +- **[client-v2]** The public `ClickHouseBinaryFormatWriter` interface gained two methods, `setString(String, byte[])` and `setString(int, byte[])`, for writing raw `String`/`FixedString` bytes. Code that only *uses* the interface is unaffected, but any third party that *implements* `ClickHouseBinaryFormatWriter` directly is source- and binary-incompatible until it adds these methods (recompiling against the new version is required; otherwise an `AbstractMethodError` can occur at runtime). + ### New Features - **[client-v2]** Added `Session` API to encapsulate and manage ClickHouse session settings (`session_id`, `session_check`, `session_timeout`, `session_timezone`) as a reusable object. The `Session` instance can be applied to any request settings using `applyTo()`, and session state can be cleared via `clearSession()`. Additionally, added `resetOption(String)` to `InsertSettings`, `QuerySettings`, and `CommonSettings` to allow removing specific settings. Settings explicitly set to `null` will not be sent to the server, which is useful for overriding global settings. @@ -37,7 +39,7 @@ - **[client-v2, jdbc-v2]** Added support for ClickHouse `Geometry` type for ClickHouse `25.11+`, where `Geometry` changed from a `String` alias to `Variant(Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon)` (client still compatible with older versions). Includes client read/write handling and JDBC type mapping for retrieving and inserting geometry values. Current writes infer the target geometry variant from array nesting depth, so `Ring` vs `LineString` and `Polygon` vs `MultiLineString` are not yet distinguishable through the generic `Geometry` write path. (https://github.com/ClickHouse/clickhouse-java/pull/2815) -- **[client-v2, jdbc-v2]** Added opt-in binary string support through the `binary_string_support` configuration property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. When enabled, top-level `String` and `FixedString` columns are read into a `StringValue` that preserves the raw bytes instead of decoding them into a `String`, allowing non-UTF-8/binary content to round-trip byte-for-byte. `StringValue` exposes the bytes via `toByteArray()`/`asByteBuffer()` and lazily decodes a `String` via `asString()` (UTF-8 by default, or a caller-supplied `Charset`). Values nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`) continue to be read as `String`, since those types are not expected to carry large/binary strings. On the JDBC side, `ResultSet#getBinaryStream(int)` and `ResultSet#getBinaryStream(String)` are now implemented (previously unsupported) and, together with `getBytes(...)`, return the raw column bytes. +- **[client-v2, jdbc-v2]** Added opt-in binary string support through the `binary_string_support` configuration property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. The setting is resolved per operation from the merged client and query settings, so it can be overridden for a single request via the `binary_string_support` operation option (e.g. `QuerySettings#setOption(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), true)`) independently of the client-level default. When enabled, top-level `String` and `FixedString` columns are read into a `StringValue` that preserves the raw bytes instead of decoding them into a `String`, allowing non-UTF-8/binary content to round-trip byte-for-byte. `StringValue` exposes the bytes via `toByteArray()`/`asByteBuffer()` and lazily decodes a `String` via `asString()` (UTF-8 by default, or a caller-supplied `Charset`). Values nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`) continue to be read as `String`, since those types are not expected to carry large/binary strings. On the JDBC side, `ResultSet#getBinaryStream(int)` and `ResultSet#getBinaryStream(String)` are now implemented (previously unsupported) and, together with `getBytes(...)`, return the raw column bytes. - **[jdbc-v2]** `ResultSet#getObject(int|String, Map>)` now accepts ClickHouse type names as map keys in addition to the JDBC `SQLType` names it has always accepted. Only unwrapped type names are used for the lookup — `Nullable(...)` and `LowCardinality(...)` wrappers are stripped and do not affect resolution, so a key like `"Int32"` matches both `Int32` and `Nullable(Int32)` columns; keys like `"Nullable(Int32)"` are not recognized. Lookup order is the `ClickHouseDataType` enum name (e.g. `"Int32"`, `"String"`, `"DateTime"`) then the JDBC `SQLType` name (e.g. `"INTEGER"`, `"VARCHAR"`, `"TIMESTAMP"`); a missing entry leaves the value uncoerced. The feature is supported for primitive ClickHouse types only — `Array`, `Tuple`, `Map`, `Nested`, and geometry types are not supported and continue to be returned in their native form regardless of the user-supplied map. Existing maps keyed only by JDBC `SQLType` names continue to work unchanged. diff --git a/client-v2/src/main/java/com/clickhouse/client/api/Client.java b/client-v2/src/main/java/com/clickhouse/client/api/Client.java index 19ee54ac2..f13bca530 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/Client.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/Client.java @@ -3,6 +3,7 @@ import com.clickhouse.client.api.command.CommandResponse; import com.clickhouse.client.api.command.CommandSettings; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; +import com.clickhouse.client.api.data_formats.ClickHouseFormatReader; import com.clickhouse.client.api.data_formats.NativeFormatReader; import com.clickhouse.client.api.data_formats.RowBinaryFormatReader; import com.clickhouse.client.api.data_formats.RowBinaryWithNamesAndTypesFormatReader; @@ -135,8 +136,6 @@ public class Client implements AutoCloseable { private final Map> typeHintMapping; - private final boolean binaryStringSupport; - // Server context private String dbUser; private String serverVersion; @@ -202,7 +201,6 @@ private Client(Collection endpoints, Map configuration, this.serverVersion = configuration.getOrDefault(ClientConfigProperties.SERVER_VERSION.getKey(), "unknown"); this.dbUser = configuration.getOrDefault(ClientConfigProperties.USER.getKey(), ClientConfigProperties.USER.getDefObjVal()); this.typeHintMapping = (Map>) this.configuration.get(ClientConfigProperties.TYPE_HINT_MAPPING.getKey()); - this.binaryStringSupport = ClientConfigProperties.BINARY_STRING_SUPPORT.getOrDefault(this.configuration); } /** @@ -1123,10 +1121,10 @@ public Builder typeHintMapping(Map> typeHintMapping } /** - * Enables reading top-level {@code String} and {@code FixedString} columns as - * {@link com.clickhouse.client.api.data_formats.StringValue}, preserving the raw bytes instead of - * decoding them into a {@link String}. Values nested inside containers (Array, Map, Tuple, Nested, - * Variant) are still read as {@link String}. + * Enables reading {@code String} and {@code FixedString} columns into an intermediate {@code byte[]} + * (a new array each time) instead of decoding them into a {@link String}. This improves working with + * large strings and allows {@link ClickHouseFormatReader#getByteArray} to be used more effectively. Can also be configured + * per operation. * * @param enable - if the feature is enabled * @return this builder instance @@ -2126,17 +2124,17 @@ public ClickHouseBinaryFormatReader newBinaryFormatReader(QueryResponse response switch (response.getFormat()) { case Native: reader = new NativeFormatReader(response.getInputStream(), response.getSettings(), - byteBufferPool, typeHintMapping, binaryStringSupport); + byteBufferPool, typeHintMapping); break; case RowBinaryWithNamesAndTypes: - reader = new RowBinaryWithNamesAndTypesFormatReader(response.getInputStream(), response.getSettings(), byteBufferPool, typeHintMapping, binaryStringSupport); + reader = new RowBinaryWithNamesAndTypesFormatReader(response.getInputStream(), response.getSettings(), byteBufferPool, typeHintMapping); break; case RowBinaryWithNames: - reader = new RowBinaryWithNamesFormatReader(response.getInputStream(), response.getSettings(), schema, byteBufferPool, typeHintMapping, binaryStringSupport); + reader = new RowBinaryWithNamesFormatReader(response.getInputStream(), response.getSettings(), schema, byteBufferPool, typeHintMapping); break; case RowBinary: reader = new RowBinaryFormatReader(response.getInputStream(), response.getSettings(), schema, - byteBufferPool, typeHintMapping, binaryStringSupport); + byteBufferPool, typeHintMapping); break; default: throw new IllegalArgumentException("Binary readers doesn't support format: " + response.getFormat()); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java b/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java index 605d92d86..cff97bc68 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/ClientConfigProperties.java @@ -1,5 +1,6 @@ package com.clickhouse.client.api; +import com.clickhouse.client.api.data_formats.ClickHouseFormatReader; import com.clickhouse.client.api.data_formats.internal.AbstractBinaryFormatReader; import com.clickhouse.client.api.enums.SSLMode; import com.clickhouse.client.api.internal.ClickHouseLZ4OutputStream; @@ -182,10 +183,9 @@ public Object parseValue(String value) { TYPE_HINT_MAPPING("type_hint_mapping", Map.class), /** - * When enabled, top-level {@code String} and {@code FixedString} columns are read into a - * {@link com.clickhouse.client.api.data_formats.StringValue} that preserves the raw bytes instead of - * decoding them into a {@link String}. Values nested inside containers (Array, Map, Tuple, Nested, Variant) - * are still read as {@link String}, since those types are not expected to carry large/binary strings. + * When enabled, {@code String} and {@code FixedString} columns are read into an intermediate {@code byte[]} + * instead of decoding them into a {@link String}. Improves working with large strings and lets + * {@link ClickHouseFormatReader#getByteArray} be used more effectively. Can be configured per operation. */ BINARY_STRING_SUPPORT("binary_string_support", Boolean.class, "false"), diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java index e097e9b16..ddcf0049b 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/NativeFormatReader.java @@ -29,14 +29,7 @@ public class NativeFormatReader extends AbstractBinaryFormatReader { public NativeFormatReader(InputStream inputStream, QuerySettings settings, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - this(inputStream, settings, byteBufferAllocator, typeHintMapping, false); - } - - public NativeFormatReader(InputStream inputStream, QuerySettings settings, - BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, - Map> typeHintMapping, - boolean binaryStringSupport) { - super(inputStream, settings, null, byteBufferAllocator, typeHintMapping, binaryStringSupport); + super(inputStream, settings, null, byteBufferAllocator, typeHintMapping); try { readBlock(); } catch (IOException e) { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java index 70507d8f9..0d8575bb9 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatReader.java @@ -19,16 +19,7 @@ public RowBinaryFormatReader(InputStream inputStream, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - this(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, false); - } - - public RowBinaryFormatReader(InputStream inputStream, - QuerySettings querySettings, - TableSchema schema, - BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, - Map> typeHintMapping, - boolean binaryStringSupport) { - super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, binaryStringSupport); + super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping); readNextRecord(); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java index b20acd926..67bda8b84 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesAndTypesFormatReader.java @@ -22,15 +22,7 @@ public RowBinaryWithNamesAndTypesFormatReader(InputStream inputStream, QuerySettings querySettings, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - this(inputStream, querySettings, byteBufferAllocator, typeHintMapping, false); - } - - public RowBinaryWithNamesAndTypesFormatReader(InputStream inputStream, - QuerySettings querySettings, - BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, - Map> typeHintMapping, - boolean binaryStringSupport) { - super(inputStream, querySettings, null, byteBufferAllocator, typeHintMapping, binaryStringSupport); + super(inputStream, querySettings, null, byteBufferAllocator, typeHintMapping); readSchema(); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java index 850861dd4..e572441ab 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryWithNamesFormatReader.java @@ -23,16 +23,7 @@ public RowBinaryWithNamesFormatReader(InputStream inputStream, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> typeHintMapping) { - this(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, false); - } - - public RowBinaryWithNamesFormatReader(InputStream inputStream, - QuerySettings querySettings, - TableSchema schema, - BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, - Map> typeHintMapping, - boolean binaryStringSupport) { - super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping, binaryStringSupport); + super(inputStream, querySettings, schema, byteBufferAllocator, typeHintMapping); int nCol = 0; try { nCol = BinaryStreamReader.readVarInt(input); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java index e1bb37b37..1f5559ae1 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java @@ -4,7 +4,6 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; -import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; import com.clickhouse.client.api.internal.MapUtils; import com.clickhouse.client.api.internal.ServerSettings; @@ -78,10 +77,6 @@ public abstract class AbstractBinaryFormatReader implements ClickHouseBinaryForm private long lastNextCallTs; // for exception to detect slow reader protected AbstractBinaryFormatReader(InputStream inputStream, QuerySettings querySettings, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> defaultTypeHintMap) { - this(inputStream, querySettings, schema, byteBufferAllocator, defaultTypeHintMap, false); - } - - protected AbstractBinaryFormatReader(InputStream inputStream, QuerySettings querySettings, TableSchema schema, BinaryStreamReader.ByteBufferAllocator byteBufferAllocator, Map> defaultTypeHintMap, boolean binaryStringSupport) { this.input = inputStream; Map settings = querySettings == null ? Collections.emptyMap() : querySettings.getAllSettings(); Boolean useServerTimeZone = (Boolean) settings.get(ClientConfigProperties.USE_SERVER_TIMEZONE.getKey()); @@ -93,6 +88,10 @@ protected AbstractBinaryFormatReader(InputStream inputStream, QuerySettings quer } boolean jsonAsString = MapUtils.getFlag(settings, ClientConfigProperties.serverSetting(ServerSettings.OUTPUT_FORMAT_BINARY_WRITE_JSON_AS_STRING), false); + // Binary string support is resolved from the (already merged client + operation) query settings so it can be + // toggled per operation, not just per client. + boolean binaryStringSupport = MapUtils.getFlag(settings, + ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), false); this.binaryStreamReader = new BinaryStreamReader(inputStream, timeZone, LOG, byteBufferAllocator, jsonAsString, defaultTypeHintMap, binaryStringSupport); if (schema != null) { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java index 84e029133..9b7c24375 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java @@ -2,7 +2,6 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; -import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; import com.clickhouse.data.ClickHouseEnum; @@ -109,12 +108,12 @@ public T readValue(ClickHouseColumn column) throws IOException { */ public T readValue(ClickHouseColumn column, Class typeHint) throws IOException { // Top-level reads honor the binary-string feature flag. Values nested inside containers always read - // strings as String (see readArray/readMap/readTuple/readNested/readVariant). + // strings as String (see readArray/readMap/readTuple/readNested/readVariant/readJsonData). return readValue(column, typeHint, binaryStringSupport); } @SuppressWarnings("unchecked") - private T readValue(ClickHouseColumn column, Class typeHint, boolean stringAsValue) throws IOException { + private T readValue(ClickHouseColumn column, Class typeHint, boolean stringAsBytes) throws IOException { if (column.isNullable()) { int isNull = readByteOrEOF(input); if (isNull == 1) { // is Null? @@ -133,7 +132,7 @@ private T readValue(ClickHouseColumn column, Class typeHint, boolean stri switch (dataType) { // Primitives case FixedString: { - if (stringAsValue) { + if (stringAsBytes) { return (T) new StringValue(readStringBytes(input, precision)); } byte[] bytes = precision > STRING_BUFF.length ? @@ -142,7 +141,7 @@ private T readValue(ClickHouseColumn column, Class typeHint, boolean stri return (T) new String(bytes, 0, precision, StandardCharsets.UTF_8); } case String: { - if (stringAsValue) { + if (stringAsBytes) { return (T) readStringValue(); } return (T) readString(); @@ -260,14 +259,14 @@ private T readValue(ClickHouseColumn column, Class typeHint, boolean stri case Nothing: return null; case SimpleAggregateFunction: - return (T) readValue(column.getNestedColumns().get(0)); + return (T) readValue(column.getNestedColumns().get(0), typeHint, false); case AggregateFunction: return (T) readBitmap( actualColumn); case Variant: case Geometry: return (T) readVariant(actualColumn); case Dynamic: - return (T) readValue(actualColumn, typeHint, stringAsValue); + return (T) readValue(actualColumn, typeHint, stringAsBytes); case Nested: return convertArray(readNested(actualColumn), typeHint); default: @@ -686,7 +685,7 @@ public ArrayValue readArrayItem(ClickHouseColumn itemTypeColumn, int len) throws } /** - * Reads a value nested inside a container (Array, Map, Tuple, Nested, Variant). Strings are always + * Reads a value nested inside a container (Array, Map, Tuple, Nested, Variant, JSON). Strings are always * decoded into {@link String} here, regardless of the binary-string feature flag, because nested types * are not expected to carry large/binary strings. */ @@ -1441,7 +1440,7 @@ private Map readJsonData(InputStream input, ClickHouseColumn col String path = readString(input); ClickHouseColumn dataColumn = predefinedColumns == null? JSON_PLACEHOLDER_COL : predefinedColumns.getOrDefault(path, JSON_PLACEHOLDER_COL); - Object value = readValue(dataColumn); + Object value = readNestedValue(dataColumn); if (value == null && (lastDataColumn != null && lastDataColumn.getDataType() == ClickHouseDataType.Nothing) ) { continue; } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java index 4049009d4..0d24063f4 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java @@ -2,7 +2,6 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; -import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.GenericRecord; diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java index 355fffd5e..3654b2742 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java @@ -2,7 +2,6 @@ import com.clickhouse.client.api.Client; import com.clickhouse.client.api.ClientException; -import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.serde.POJOFieldDeserializer; import com.clickhouse.data.ClickHouseAggregateFunction; import com.clickhouse.data.ClickHouseColumn; @@ -946,11 +945,12 @@ public static String convertToString(Object value) { } /** - * Converts a value read for a top-level {@code String}/{@code FixedString} column into a {@link String} - * for POJO binding. With the binary-string feature enabled the reader produces a {@link StringValue} - * holder (a read-time value holder, not a POJO field type); otherwise it produces a plain {@link String}. - * A {@code null} value (e.g. a SQL {@code NULL}) is passed through. Used from the bytecode generated by - * {@link #compilePOJOSetter(Method, ClickHouseColumn)}. + * Converts a value read for a string-backed column into a {@link String} for POJO binding (any column the + * reader may decode to a String, including top-level {@code String}/{@code FixedString} and wrappers such + * as {@code SimpleAggregateFunction(String)}). With the binary-string feature enabled the reader produces a + * {@link StringValue} holder (a read-time value holder, not a POJO field type); otherwise it produces a + * plain {@link String}. A {@code null} value (e.g. a SQL {@code NULL}) is passed through. Used from the + * bytecode generated by {@link #compilePOJOSetter(Method, ClickHouseColumn)}. * * @param value value returned by {@code BinaryStreamReader.readValue} * @return the decoded string, or {@code null} @@ -966,10 +966,10 @@ public static String stringValueToString(Object value) { } /** - * Converts a value read for a top-level {@code String}/{@code FixedString} column into the raw bytes for - * POJO binding. A {@link StringValue} yields its preserved bytes (no lossy decoding); a plain - * {@link String} is encoded as UTF-8, matching the historical behaviour. A {@code null} value is passed - * through. Used from the bytecode generated by {@link #compilePOJOSetter(Method, ClickHouseColumn)}. + * Converts a value read for a string-backed column into the raw bytes for POJO binding. A {@link StringValue} + * yields its preserved bytes (no lossy decoding); a plain {@link String} is encoded as UTF-8, matching the + * historical behaviour; a value that is already a {@code byte[]} is passed through. A {@code null} value is + * passed through. Used from the bytecode generated by {@link #compilePOJOSetter(Method, ClickHouseColumn)}. * * @param value value returned by {@code BinaryStreamReader.readValue} * @return the raw bytes, or {@code null} @@ -1116,20 +1116,21 @@ public static POJOFieldDeserializer compilePOJOSetter(Method setterMethod, Click Type.getType(Class.class)), false); - boolean stringLikeColumn = column.getDataType() == ClickHouseDataType.String - || column.getDataType() == ClickHouseDataType.FixedString; - - if (stringLikeColumn && targetType == String.class) { - // With binary-string support enabled readValue yields a StringValue holder; convert it (or a - // plain String when the feature is off) to the String the setter expects. StringValue is a - // read-time holder, not a supported POJO field type. + if (targetType == String.class) { + // The conversion is driven by the target field type, not the column data type: readValue may + // yield a StringValue holder for any string-backed column (top-level String/FixedString, but + // also wrappers such as SimpleAggregateFunction(String) or a Dynamic resolving to String) when + // binary-string support is enabled. Normalize it (or a plain String when the feature is off) to + // the String the setter expects. StringValue is a read-time holder, not a supported POJO field + // type, so it never reaches the setter directly. mv.visitMethodInsn(INVOKESTATIC, Type.getInternalName(SerializerUtils.class), "stringValueToString", Type.getMethodDescriptor(Type.getType(String.class), Type.getType(Object.class)), false); - } else if (stringLikeColumn && targetType == byte[].class) { - // Convert the StringValue (or plain String) to raw bytes for a byte[] field. + } else if (targetType == byte[].class) { + // Convert the StringValue (or plain String) to raw bytes for a byte[] field. A value that is + // already a byte[] (e.g. Array(UInt8)) passes through unchanged. mv.visitMethodInsn(INVOKESTATIC, Type.getInternalName(SerializerUtils.class), "stringValueToByteArray", diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/StringValue.java similarity index 61% rename from client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java rename to client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/StringValue.java index 8b67e34d3..5584c6564 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/StringValue.java @@ -1,8 +1,9 @@ -package com.clickhouse.client.api.data_formats; +package com.clickhouse.client.api.data_formats.internal; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.Objects; /** @@ -13,22 +14,20 @@ * by the read path when the binary-string feature is enabled (for example {@code GenericRecord.getObject} * or {@code BinaryStreamReader.readValue} without a type hint), so that callers that need exact bytes can * obtain them via {@link #toByteArray()} and callers that need text can decode via {@link #asString()}. - * It is not a supported field type for POJO binding: declare POJO fields for String/FixedString - * columns as {@link String} or {@code byte[]} instead. Normal application code should generally consume - * {@link String} or {@code byte[]} rather than holding onto a {@code StringValue}. + * Instances cannot be created by user code: the constructors are package-private and only the binary + * reader builds them. It is not a supported field type for POJO binding: declare POJO fields for + * String/FixedString columns as {@link String} or {@code byte[]} instead. Normal application code should + * generally consume {@link String} or {@code byte[]} rather than holding onto a {@code StringValue}. *

* This is a mutable structure and must be used with care. To avoid copying, it does not - * duplicate the bytes it is given: the constructor wraps the supplied array/buffer instead of - * copying it, and {@link #toByteArray()} returns a direct reference to the backing array rather - * than a defensive copy. Consequently, mutating the source array, the array returned by - * {@link #toByteArray()}, or reading the same value concurrently while it is being modified will - * change the observed value. Callers that need an independent snapshot must copy the bytes - * themselves. + * duplicate the bytes it is given: the constructor wraps the supplied array instead of copying it, and + * {@link #toByteArray()} returns a direct reference to the backing array when the value spans the whole + * array. Consequently, mutating the source array, the array returned by {@link #toByteArray()}, or reading + * the same value concurrently while it is being modified will change the observed value. Callers that need + * an independent snapshot must copy the bytes themselves. *

- * Backed by a {@link ByteBuffer} for a richer API and future off-heap memory support. Only heap - * buffers (with an accessible backing array) are supported today; constructing a value from a - * direct (off-heap) buffer is rejected. The decoded {@link String} produced by {@link #asString()} - * is cached. + * Backed by a {@link ByteBuffer} for a richer API and future off-heap memory support. The decoded + * {@link String} produced by {@link #asString()} is cached. */ public class StringValue { @@ -47,7 +46,7 @@ public class StringValue { * * @param bytes raw value bytes (not null) */ - public StringValue(byte[] bytes) { + StringValue(byte[] bytes) { this(bytes, DEFAULT_CHARSET); } @@ -58,28 +57,11 @@ public StringValue(byte[] bytes) { * @param bytes raw value bytes (not null) * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) */ - public StringValue(byte[] bytes, Charset defaultCharset) { - this(ByteBuffer.wrap(bytes), defaultCharset); - } + StringValue(byte[] bytes, Charset defaultCharset) { + Objects.requireNonNull(bytes, "bytes cannot be null"); + Objects.requireNonNull(defaultCharset, "charset is required to convert bytes to String"); - /** - * Creates a value backed by the remaining content of the given buffer using the provided default charset. - * The buffer is referenced, not copied, so its content must not be modified afterwards. - * - * @param buffer backing heap buffer (not null); its remaining bytes define the value - * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) - * @throws IllegalArgumentException if the buffer is a direct (off-heap) buffer with no accessible array - */ - public StringValue(ByteBuffer buffer, Charset defaultCharset) { - Objects.requireNonNull(buffer, "buffer cannot be null"); - Objects.requireNonNull(defaultCharset, "charset is required to convert buffer to String"); - - if (!buffer.hasArray()) { - throw new IllegalArgumentException("Can work only with heap buffer."); - } - - // Keep an independent view so external position/limit changes do not affect this value. - this.buffer = buffer.slice(); + this.buffer = ByteBuffer.wrap(bytes); this.defaultCharset = defaultCharset; } @@ -94,16 +76,22 @@ public ByteBuffer asByteBuffer() { } /** - * Returns a direct reference to the backing byte array of this value (no copy is made). + * Returns the raw bytes of this value, honoring the backing buffer's offset and position. *

- * The returned array is the live backing storage: mutating it mutates this value, and any change - * to the underlying bytes is reflected here. Callers that need an independent, immutable snapshot - * must copy the result themselves. + * As a zero-copy shortcut, when the value spans the entire backing array the live backing storage is + * returned directly (mutating it mutates this value); otherwise an exact-size copy of the value's bytes + * is returned. Callers that need a guaranteed independent snapshot should copy the result themselves. * - * @return the backing array holding the value bytes + * @return the value bytes (the live backing array when it spans the whole value, otherwise a copy) */ public byte[] toByteArray() { - return buffer.array(); + byte[] array = buffer.array(); + int offset = buffer.arrayOffset() + buffer.position(); + int length = buffer.remaining(); + if (offset == 0 && length == array.length) { + return array; + } + return Arrays.copyOfRange(array, offset, offset + length); } /** @@ -151,7 +139,7 @@ public String asString(Charset charset) { } private String decode(Charset charset) { - return new String(buffer.array(), charset); + return new String(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining(), charset); } @Override diff --git a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java index 0faa2a7a8..9ea4bfa25 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java @@ -2,7 +2,7 @@ import com.clickhouse.client.api.ClickHouseException; import com.clickhouse.client.api.DataTypeUtils; -import com.clickhouse.client.api.data_formats.StringValue; +import com.clickhouse.client.api.data_formats.internal.StringValue; import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java deleted file mode 100644 index da33964fc..000000000 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/RowBinaryFormatStringWriteTest.java +++ /dev/null @@ -1,100 +0,0 @@ -package com.clickhouse.client.api.data_formats; - -import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; -import com.clickhouse.client.api.metadata.TableSchema; -import com.clickhouse.data.ClickHouseColumn; -import com.clickhouse.data.ClickHouseFormat; -import org.testng.Assert; -import org.testng.annotations.Test; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.TimeZone; - -/** - * Unit coverage for the {@code writeString} helpers on {@link RowBinaryFormatSerializer} and the - * {@code setString(byte[])} overloads on {@link RowBinaryFormatWriter}, which the integration tests do - * not exercise in isolation. - */ -public class RowBinaryFormatStringWriteTest { - - private static final ClickHouseColumn STRING_COLUMN = ClickHouseColumn.of("s", "String"); - - private static BinaryStreamReader reader(byte[] data) { - return new BinaryStreamReader(new ByteArrayInputStream(data), TimeZone.getTimeZone("UTC"), null, - new BinaryStreamReader.DefaultByteBufferAllocator(), false, null, true); - } - - @Test - public void testSerializerWriteString() throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - new RowBinaryFormatSerializer(out).writeString("hello world"); - - StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); - Assert.assertEquals(read.asString(), "hello world"); - } - - @Test - public void testSerializerWriteStringUnicode() throws IOException { - String value = "Привет 你好 🚀"; - ByteArrayOutputStream out = new ByteArrayOutputStream(); - new RowBinaryFormatSerializer(out).writeString(value); - - StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); - Assert.assertEquals(read.asString(), value); - } - - @Test - public void testSerializerWriteStringFromBytesPreservesBinary() throws IOException { - byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80, (byte) 0x7F, (byte) 0xAB}; - ByteArrayOutputStream out = new ByteArrayOutputStream(); - new RowBinaryFormatSerializer(out).writeString(binary); - - StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); - Assert.assertEquals(read.toByteArray(), binary); - } - - @Test - public void testWriterSetStringBytesByName() throws IOException { - byte[] binary = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0x00, (byte) 0xBE, (byte) 0xEF}; - ByteArrayOutputStream out = writeSingleStringRow(w -> w.setString("s", binary)); - - StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); - Assert.assertEquals(read.toByteArray(), binary); - } - - @Test - public void testWriterSetStringBytesByIndex() throws IOException { - byte[] binary = new byte[]{(byte) 0x01, (byte) 0x02, (byte) 0xFE, (byte) 0xFF, (byte) 0x00}; - ByteArrayOutputStream out = writeSingleStringRow(w -> w.setString(1, binary)); - - StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); - Assert.assertEquals(read.toByteArray(), binary); - } - - @Test - public void testWriterSetStringBytesUtf8Content() throws IOException { - String text = "ascii and Юникод"; - byte[] binary = text.getBytes(StandardCharsets.UTF_8); - ByteArrayOutputStream out = writeSingleStringRow(w -> w.setString("s", binary)); - - StringValue read = reader(out.toByteArray()).readValue(STRING_COLUMN); - Assert.assertEquals(read.asString(), text); - } - - private interface RowWriter { - void write(RowBinaryFormatWriter writer); - } - - private static ByteArrayOutputStream writeSingleStringRow(RowWriter rowWriter) throws IOException { - TableSchema schema = new TableSchema(Collections.singletonList(STRING_COLUMN)); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - RowBinaryFormatWriter writer = new RowBinaryFormatWriter(out, schema, ClickHouseFormat.RowBinary); - rowWriter.write(writer); - writer.commitRow(); - return out; - } -} diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index 93ded550e..9d6c1f125 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -5,12 +5,13 @@ import com.clickhouse.client.ClickHouseProtocol; import com.clickhouse.client.ClickHouseServerForTest; import com.clickhouse.client.api.Client; +import com.clickhouse.client.api.ClientConfigProperties; import com.clickhouse.client.api.command.CommandSettings; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; -import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.enums.Protocol; import com.clickhouse.client.api.query.GenericRecord; import com.clickhouse.client.api.query.QueryResponse; +import com.clickhouse.client.api.query.QuerySettings; import com.clickhouse.data.ClickHouseVersion; import com.clickhouse.data.ClickHouseDataType; import org.testng.Assert; @@ -29,6 +30,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; @Test(groups = {"integration"}) public class BaseReaderTests extends BaseIntegrationTest { @@ -682,4 +684,88 @@ public void testReadingBinaryStringFromHash() throws Exception { customClient.close(); } } + + /** + * String values nested inside a JSON column must be read as plain {@link String} even when + * {@code binary_string_support} is enabled, consistent with all other container types. Only top-level + * String/FixedString columns are promoted to {@link StringValue}. + */ + @Test(groups = {"integration"}) + public void testJsonStringPathsStayStringWithBinaryStringSupport() throws Exception { + if (isCloud()) { + return; // TODO: add support on cloud + } + if (isVersionMatch("(,24.8]")) { + return; + } + + final String table = "test_json_binary_string_support"; + CommandSettings commandSettings = new CommandSettings(); + commandSettings.serverSetting("allow_experimental_json_type", "1"); + client.execute("DROP TABLE IF EXISTS " + table, commandSettings).get(); + client.execute("CREATE TABLE " + table + " (id Int32, json JSON) ENGINE = MergeTree ORDER BY id", commandSettings).get(); + client.execute("INSERT INTO " + table + " VALUES (1, '{\"name\" : \"hello\"}')", commandSettings).get(); + + Client customClient = newClient() + .binaryStringSupport(true) + .build(); + + try (QueryResponse response = customClient.query("SELECT json FROM " + table + " ORDER BY id").get()) { + ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + + Map json = reader.readValue("json"); + Object name = json.get("name"); + Assert.assertTrue(name instanceof String, + "String values nested in JSON must stay plain String, but got " + + (name == null ? "null" : name.getClass().getName())); + Assert.assertEquals(name, "hello"); + } finally { + customClient.close(); + } + } + + /** + * Binary string support is resolved per operation from the merged client and query settings, so two reads of the + * same table issued from the same client must honor each operation's {@code binary_string_support} override + * independently. The client default is left disabled here; one query opts in while the other relies on the + * default. + */ + @Test(groups = {"integration"}) + public void testBinaryStringSupportIsPerOperation() throws Exception { + final String table = "test_binary_string_support_per_operation"; + + client.execute("DROP TABLE IF EXISTS " + table).get(); + client.execute("CREATE TABLE " + table + " (id Int32, s String) ENGINE = Memory").get(); + client.execute("INSERT INTO " + table + " VALUES (1, 'hello')").get(); + + // The shared client keeps binary string support disabled (the default). + final String query = "SELECT s FROM " + table + " ORDER BY id"; + + // First operation: opt-in via per-operation QuerySettings -> reads a StringValue. + QuerySettings enabled = new QuerySettings() + .setOption(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), true); + try (QueryResponse response = client.query(query, enabled).get()) { + ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + Object value = reader.readValue("s"); + Assert.assertTrue(value instanceof StringValue, + "With binary_string_support enabled for the operation, top-level String must be a StringValue, but got " + + (value == null ? "null" : value.getClass().getName())); + Assert.assertEquals(((StringValue) value).asString(), "hello"); + } + + // Second operation on the same table/client without the override -> reads a plain String. + QuerySettings disabled = new QuerySettings() + .setOption(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), false); + try (QueryResponse response = client.query(query, disabled).get()) { + ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + Object value = reader.readValue("s"); + Assert.assertTrue(value instanceof String, + "With binary_string_support disabled for the operation, top-level String must stay a plain String, but got " + + (value == null ? "null" : value.getClass().getName())); + Assert.assertEquals(value, "hello"); + } + } } \ No newline at end of file diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java index 1a94e9fe9..04965cb9a 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/SerializerUtilsTest.java @@ -12,6 +12,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.net.InetAddress; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; @@ -327,6 +328,54 @@ public void testWriteFixedStringBytesRejectsValueLongerThanLength() { new byte[]{1, 2, 3, 4}, 3)); } + // stringValueToString / stringValueToByteArray are invoked from the bytecode generated for POJO setters. + // They are exercised end-to-end in StringValueTests, but these unit tests pin every input branch directly + // so the behaviour is locked in even if the set of column types that reach them is extended later. + + @Test + public void testStringValueToStringPassesThroughNull() { + Assert.assertNull(SerializerUtils.stringValueToString(null)); + } + + @Test + public void testStringValueToStringDecodesStringValue() { + StringValue value = new StringValue("héllo".getBytes(StandardCharsets.UTF_8)); + Assert.assertEquals(SerializerUtils.stringValueToString(value), "héllo"); + } + + @Test + public void testStringValueToStringReturnsPlainStringAsIs() { + String value = "plain"; + Assert.assertSame(SerializerUtils.stringValueToString(value), value); + } + + @Test + public void testStringValueToByteArrayPassesThroughNull() { + Assert.assertNull(SerializerUtils.stringValueToByteArray(null)); + } + + @Test + public void testStringValueToByteArrayPreservesStringValueBytes() { + // Non-UTF-8 bytes must survive without re-encoding. + byte[] binary = {(byte) 0xDE, (byte) 0xAD, (byte) 0x00, (byte) 0xBE, (byte) 0xEF}; + StringValue value = new StringValue(binary); + Assert.assertEquals(SerializerUtils.stringValueToByteArray(value), binary); + } + + @Test + public void testStringValueToByteArrayEncodesStringAsUtf8() { + Assert.assertEquals(SerializerUtils.stringValueToByteArray("héllo"), + "héllo".getBytes(StandardCharsets.UTF_8)); + } + + @Test + public void testStringValueToByteArrayPassesThroughByteArray() { + // This is the branch that lets future string-backed columns (e.g. Array(UInt8)) reuse the helper: + // a value that is already a byte[] must be returned unchanged, not re-wrapped or copied. + byte[] bytes = {1, 2, 3}; + Assert.assertSame(SerializerUtils.stringValueToByteArray(bytes), bytes); + } + private void assertCustomGeoTypeTag(String typeName) throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); SerializerUtils.writeDynamicTypeTag(out, ClickHouseColumn.of("v", typeName)); diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/StringValueTests.java similarity index 93% rename from client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java rename to client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/StringValueTests.java index 3086afe24..03ebfd0da 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/StringValueTests.java @@ -1,9 +1,7 @@ -package com.clickhouse.client.api.data_formats; +package com.clickhouse.client.api.data_formats.internal; -import com.clickhouse.client.api.data_formats.internal.AbstractBinaryFormatReader; -import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; -import com.clickhouse.client.api.data_formats.internal.MapBackedRecord; -import com.clickhouse.client.api.data_formats.internal.SerializerUtils; +import com.clickhouse.client.api.ClientConfigProperties; +import com.clickhouse.client.api.data_formats.RowBinaryWithNamesAndTypesFormatReader; import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.QuerySettings; import com.clickhouse.client.api.serde.POJOFieldDeserializer; @@ -365,6 +363,36 @@ public void testPojoSetterFixedStringStringFieldWhenFeatureEnabled() throws Exce Assert.assertEquals(pojo.asString, "abc"); } + @Test + public void testPojoSetterStringFieldOverSimpleAggregateFunctionWhenFeatureEnabled() throws Exception { + // Regression: a String column wrapped in SimpleAggregateFunction(String) also reads as a StringValue + // when the feature is on. The compiled setter must convert it to String based on the target field type + // (the column data type is SimpleAggregateFunction, not String), otherwise it would ClassCastException. + ClickHouseColumn column = ClickHouseColumn.of("s", "SimpleAggregateFunction(anyLast, String)"); + // On the wire SimpleAggregateFunction(String) is serialized exactly like its nested String. + byte[] wire = stringWire("hello".getBytes(StandardCharsets.UTF_8)); + + StringPojo pojo = new StringPojo(); + setterFor("setAsString", column).setValue(pojo, reader(wire, true), column); + Assert.assertEquals(pojo.asString, "hello"); + } + + @Test + public void testPojoSetterSimpleAggregateFunctionWhenFeatureDisabled() throws Exception { + // With the feature off the reader returns a plain String for SimpleAggregateFunction(String); the + // String field receives it directly and the byte[] field gets the UTF-8 encoding. + ClickHouseColumn column = ClickHouseColumn.of("s", "SimpleAggregateFunction(anyLast, String)"); + byte[] wire = stringWire("world".getBytes(StandardCharsets.UTF_8)); + + StringPojo asString = new StringPojo(); + setterFor("setAsString", column).setValue(asString, reader(wire, false), column); + Assert.assertEquals(asString.asString, "world"); + + StringPojo asBytes = new StringPojo(); + setterFor("setAsBytes", column).setValue(asBytes, reader(wire, false), column); + Assert.assertEquals(asBytes.asBytes, "world".getBytes(StandardCharsets.UTF_8)); + } + // ---- Writing binary String values ---- @Test @@ -450,8 +478,9 @@ private static MapBackedRecord materializeRow(String[] names, String[] types, Ob RowBinaryWithNamesAndTypesFormatReader reader = new RowBinaryWithNamesAndTypesFormatReader( new ByteArrayInputStream(out.toByteArray()), - new QuerySettings().setUseTimeZone(TimeZone.getTimeZone("UTC").toZoneId().getId()), - new BinaryStreamReader.CachingByteBufferAllocator(), null, binaryStringSupport); + new QuerySettings().setUseTimeZone(TimeZone.getTimeZone("UTC").toZoneId().getId()) + .setOption(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), binaryStringSupport), + new BinaryStreamReader.CachingByteBufferAllocator(), null); Map record = new LinkedHashMap<>(); Assert.assertTrue(reader.readRecord(record), "Expected a row to be read"); @@ -673,8 +702,9 @@ private static RowBinaryWithNamesAndTypesFormatReader readerForRow(String[] name RowBinaryWithNamesAndTypesFormatReader reader = new RowBinaryWithNamesAndTypesFormatReader( new ByteArrayInputStream(out.toByteArray()), - new QuerySettings().setUseTimeZone(TimeZone.getTimeZone("UTC").toZoneId().getId()), - new BinaryStreamReader.CachingByteBufferAllocator(), null, binaryStringSupport); + new QuerySettings().setUseTimeZone(TimeZone.getTimeZone("UTC").toZoneId().getId()) + .setOption(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), binaryStringSupport), + new BinaryStreamReader.CachingByteBufferAllocator(), null); Assert.assertNotNull(reader.next(), "Expected a row to be read"); return reader; } diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/DataTypeTests.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/DataTypeTests.java index 8a98ebd93..b07078f09 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/DataTypeTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/DataTypeTests.java @@ -154,6 +154,84 @@ public static String tblCreateSQL(String table) { } } + @Data + @AllArgsConstructor + @NoArgsConstructor + public static class DTOForBinaryStringTests { + private int rowId; + + // Mapped to a ClickHouse String column but holds raw (non-UTF-8) bytes. + private byte[] binaryString; + + // Mapped to a ClickHouse FixedString(N) column, also holding raw bytes. + private byte[] fixedBinary; + + public static String tblCreateSQL(String table, int fixedLength) { + return tableDefinition(table, "rowId Int32", "binaryString String", + "fixedBinary FixedString(" + fixedLength + ")"); + } + } + + /** + * Verifies that raw, non-UTF-8 binary content survives a round-trip through {@code String} and + * {@code FixedString} columns when binary string support is enabled. The check covers both the + * binary format reader ({@link ClickHouseBinaryFormatReader#getByteArray}) and the POJO mapping + * path (a {@code byte[]} field bound to a string-backed column). + */ + @Test(groups = {"integration"}) + public void testBinaryStringRoundTrip() throws Exception { + final String table = "test_binary_string_round_trip"; + final int fixedLength = 16; + + // A blob with every possible byte value guarantees invalid UTF-8 sequences (e.g. lone 0x80). + final byte[] binaryBlob = new byte[256]; + for (int i = 0; i < binaryBlob.length; i++) { + binaryBlob[i] = (byte) i; + } + final byte[] fixedBlob = new byte[fixedLength]; + for (int i = 0; i < fixedLength; i++) { + fixedBlob[i] = (byte) (0xFF - i); + } + + final DTOForBinaryStringTests sample = new DTOForBinaryStringTests(1, binaryBlob, fixedBlob); + + try (Client binClient = newClient().binaryStringSupport(true).build()) { + binClient.execute("DROP TABLE IF EXISTS " + table).get(); + binClient.execute(DTOForBinaryStringTests.tblCreateSQL(table, fixedLength)).get(); + + final TableSchema tableSchema = binClient.getTableSchema(table); + binClient.register(DTOForBinaryStringTests.class, tableSchema); + binClient.insert(table, Collections.singletonList(sample)).get().close(); + + // Reader path: getByteArray must return the original raw bytes. + try (QueryResponse response = binClient.query("SELECT * FROM " + table).get()) { + ClickHouseBinaryFormatReader reader = binClient.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + Assert.assertEquals(reader.getByteArray("binaryString"), binaryBlob); + Assert.assertEquals(reader.getByteArray("fixedBinary"), fixedBlob); + } + + // POJO path: a byte[] field bound to a string-backed column must receive the raw bytes. + List pojos = + binClient.queryAll("SELECT * FROM " + table + " ORDER BY rowId", + DTOForBinaryStringTests.class, tableSchema); + Assert.assertEquals(pojos.size(), 1); + Assert.assertEquals(pojos.get(0).getBinaryString(), binaryBlob); + Assert.assertEquals(pojos.get(0).getFixedBinary(), fixedBlob); + } + + // Negative control: without binary string support the String column is decoded as UTF-8, + // which is lossy for this blob, so the round-tripped bytes must NOT match the original. + try (QueryResponse response = client.query("SELECT * FROM " + table).get()) { + ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + Assert.assertFalse(Arrays.equals(reader.getByteArray("binaryString"), binaryBlob), + "Expected lossy UTF-8 decoding without binary string support"); + } + + client.execute("DROP TABLE IF EXISTS " + table).get(); + } + @Test(groups = {"integration"}) public void testVariantWithSimpleDataTypes() throws Exception { if (isVersionMatch("(,24.8]")) { diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index 700894d6f..ebdf0fc35 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -6,33 +6,42 @@ import com.clickhouse.client.ClickHouseServerForTest; import com.clickhouse.client.api.Client; import com.clickhouse.client.api.command.CommandSettings; +import com.clickhouse.client.api.data_formats.RowBinaryFormatSerializer; import com.clickhouse.client.api.data_formats.RowBinaryFormatWriter; import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; +import com.clickhouse.client.api.data_formats.internal.StringValue; import com.clickhouse.client.api.enums.Protocol; import com.clickhouse.client.api.insert.InsertResponse; import com.clickhouse.client.api.insert.InsertSettings; import com.clickhouse.client.api.internal.ServerSettings; import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.GenericRecord; +import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseFormat; import com.clickhouse.data.ClickHouseVersion; import org.apache.commons.lang3.RandomStringUtils; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; import java.net.Inet4Address; import java.net.Inet6Address; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.TimeZone; import java.util.UUID; import java.util.concurrent.TimeUnit; @@ -494,8 +503,8 @@ public void writeAndReadImageTest() throws Exception { org.testng.Assert.assertEquals(record.getByteArray("image"), imageData, "Image bytes read back via getByteArray must match the source exactly"); - com.clickhouse.client.api.data_formats.StringValue value = - (com.clickhouse.client.api.data_formats.StringValue) record.getObject("image"); + com.clickhouse.client.api.data_formats.internal.StringValue value = + (com.clickhouse.client.api.data_formats.internal.StringValue) record.getObject("image"); org.testng.Assert.assertEquals(value.size(), imageData.length); org.testng.Assert.assertEquals(value.toByteArray(), imageData, "StringValue must preserve the full binary payload"); @@ -847,4 +856,90 @@ public void writeVariantTests() throws Exception { writeTest(tableName, tableCreate, rows); } + + // ----------------------------------------------------------------------------------------------------------------- + // Unit coverage for the writeString helpers on RowBinaryFormatSerializer and the setString(byte[]) overloads on + // RowBinaryFormatWriter, which the integration tests above do not exercise in isolation. These run without a server. + // ----------------------------------------------------------------------------------------------------------------- + + private static final ClickHouseColumn STRING_COLUMN = ClickHouseColumn.of("s", "String"); + + private enum StringWriteVia { + SERIALIZER, + WRITER_BY_NAME, + WRITER_BY_INDEX + } + + private static BinaryStreamReader stringReader(byte[] data) { + return new BinaryStreamReader(new ByteArrayInputStream(data), TimeZone.getTimeZone("UTC"), null, + new BinaryStreamReader.DefaultByteBufferAllocator(), false, null, true); + } + + private static byte[] writeStringBytes(StringWriteVia via, byte[] payload) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + switch (via) { + case SERIALIZER: + new RowBinaryFormatSerializer(out).writeString(payload); + break; + case WRITER_BY_NAME: { + TableSchema schema = new TableSchema(Collections.singletonList(STRING_COLUMN)); + RowBinaryFormatWriter writer = new RowBinaryFormatWriter(out, schema, ClickHouseFormat.RowBinary); + writer.setString("s", payload); + writer.commitRow(); + break; + } + case WRITER_BY_INDEX: { + TableSchema schema = new TableSchema(Collections.singletonList(STRING_COLUMN)); + RowBinaryFormatWriter writer = new RowBinaryFormatWriter(out, schema, ClickHouseFormat.RowBinary); + writer.setString(1, payload); + writer.commitRow(); + break; + } + } + return out.toByteArray(); + } + + @DataProvider(name = "stringValues") + public static Object[][] stringValues() { + return new Object[][] { + {"ascii", "hello world"}, + {"unicode", "Привет 你好 🚀"}, + }; + } + + @Test(dataProvider = "stringValues") + public void testSerializerWriteString(String description, String value) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + new RowBinaryFormatSerializer(out).writeString(value); + + StringValue read = stringReader(out.toByteArray()).readValue(STRING_COLUMN); + assertEquals(read.asString(), value, description); + assertEquals(read.toByteArray(), value.getBytes(StandardCharsets.UTF_8), description); + } + + @DataProvider(name = "binaryStringWrites") + public static Object[][] binaryStringWrites() { + byte[] arbitraryBinary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80, (byte) 0x7F, (byte) 0xAB}; + byte[] utf8Content = "ascii and Юникод".getBytes(StandardCharsets.UTF_8); + + Object[][] cases = new Object[StringWriteVia.values().length * 2][]; + int i = 0; + for (StringWriteVia via : StringWriteVia.values()) { + cases[i++] = new Object[]{via, "arbitrary-binary", arbitraryBinary, null}; + cases[i++] = new Object[]{via, "utf8-content", utf8Content, "ascii and Юникод"}; + } + return cases; + } + + @Test(dataProvider = "binaryStringWrites") + public void testWriteStringFromBytes(StringWriteVia via, String description, byte[] payload, + String expectedString) throws IOException { + byte[] encoded = writeStringBytes(via, payload); + + StringValue read = stringReader(encoded).readValue(STRING_COLUMN); + assertEquals(read.toByteArray(), payload, via + "/" + description); + if (expectedString != null) { + assertEquals(read.asString(), expectedString, via + "/" + description); + } + } } diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java index 7bb632ca3..6ffdfea5e 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java @@ -199,14 +199,7 @@ public void insertPOJOAndReadBack() throws Exception { try (QueryResponse queryResponse = client.query("SELECT * FROM " + tableName + " LIMIT 1").get(EXECUTE_CMD_TIMEOUT, TimeUnit.SECONDS)) { - // To read the binaryString properly as raw bytes, we must enable binary string support - Client readerClient = client; - if (pojo.getBinaryString() != null) { - readerClient = newClient() - .binaryStringSupport(true) - .build(); - } - ClickHouseBinaryFormatReader reader = readerClient.newBinaryFormatReader(queryResponse); + ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(queryResponse); Assert.assertNotNull(reader.next()); Assert.assertEquals(reader.getByte("byteValue"), pojo.getByteValue()); @@ -219,17 +212,12 @@ public void insertPOJOAndReadBack() throws Exception { Assert.assertEquals(reader.getDouble("float64"), pojo.getFloat64()); Assert.assertEquals(reader.getString("string"), pojo.getString()); Assert.assertEquals(reader.getString("fixedString"), pojo.getFixedString()); - Assert.assertEquals(reader.getByteArray("binaryString"), pojo.getBinaryString()); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime").isEqual(pojo.getZonedDateTime().withNano(0))); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime64").isEqual(pojo.getZonedDateTime64())); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime").isEqual(pojo.getOffsetDateTime().withNano(0))); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime64").isEqual(pojo.getOffsetDateTime64())); Assert.assertEquals(reader.getInstant("instant"), pojo.getInstant().with(ChronoField.MICRO_OF_SECOND, 0)); Assert.assertEquals(reader.getInstant("instant64"), pojo.getInstant64()); - - if (readerClient != client) { - readerClient.close(); - } } } diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java index 920f86317..6661b94bc 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java @@ -5,9 +5,6 @@ import lombok.Setter; import org.apache.commons.lang3.RandomStringUtils; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; import java.math.BigDecimal; import java.math.BigInteger; import java.net.Inet4Address; @@ -66,7 +63,6 @@ public class SamplePOJO { private String string; private String fixedString; - private byte[] binaryString; private LocalDate date; private LocalDate date32; @@ -149,8 +145,6 @@ public SamplePOJO() { string = RandomStringUtils.randomAlphabetic(1, 256); fixedString = RandomStringUtils.randomAlphabetic(3); - // Use a real binary blob (a PNG image) to exercise inserting/reading large non-UTF-8 String values. - binaryString = loadClickHouseLogo(); date = LocalDate.now(); date32 = LocalDate.now(); @@ -213,23 +207,6 @@ public SamplePOJO() { keyword = "database"; } - private static byte[] loadClickHouseLogo() { - try (InputStream is = SamplePOJO.class.getClassLoader().getResourceAsStream("clickhouse-logo.png")) { - if (is == null) { - throw new IllegalStateException("Test resource not found on classpath: clickhouse-logo.png"); - } - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - byte[] chunk = new byte[8192]; - int read; - while ((read = is.read(chunk)) != -1) { - buffer.write(chunk, 0, read); - } - return buffer.toByteArray(); - } catch (IOException e) { - throw new IllegalStateException("Failed to read test resource clickhouse-logo.png", e); - } - } - @Override public String toString() { return "SamplePOJO{" + @@ -331,7 +308,6 @@ public static String generateTableCreateSQL(String tableName) { // "boxedBool UInt8, " + "string String, " + "fixedString FixedString(3), " + - "binaryString String, " + "date Date, " + "date32 Date, " + "dateTime DateTime, " + diff --git a/docs/features.md b/docs/features.md index f03d0547a..a3803d63f 100644 --- a/docs/features.md +++ b/docs/features.md @@ -16,7 +16,7 @@ This document lists stable, user-visible behavior in `client-v2` and `jdbc-v2` t - Parameterized SQL: Accepts named query parameters and can send them through supported HTTP request encodings. - Result materialization helpers: Provides streaming `Records`, generic row access, and convenience APIs that materialize all rows into generic records or typed POJOs. - Binary format readers: Reads ClickHouse binary result formats including `Native`, `RowBinary`, `RowBinaryWithNames`, and `RowBinaryWithNamesAndTypes`. -- Binary string support: Opt-in through the `binary_string_support` property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. When enabled, untyped reads (e.g. `GenericRecord.getObject(...)`/`BinaryStreamReader.readValue(...)`) of top-level `String` and `FixedString` columns return a `StringValue`, which preserves the raw bytes (`toByteArray()`/`asByteBuffer()`) and lazily decodes a `String` (`asString()`), instead of eagerly decoding to a `String`. `StringValue` is a read-time holder, not a supported POJO field type: typed `queryAll(...)`/`readToPOJO` binding still maps these columns to `String` (decoded) or `byte[]` (raw bytes) according to the POJO field type. Strings nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`) are still read as `String`. +- Binary string support: Opt-in through the `binary_string_support` property (or `Client.Builder#binaryStringSupport(boolean)`), disabled by default. The setting is resolved per operation from the merged client and query settings, so it can be overridden for a single request by setting the `binary_string_support` option on the operation's settings (e.g. `QuerySettings#setOption(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), true)`) regardless of the client-level default. When enabled, untyped reads (e.g. `GenericRecord.getObject(...)`/`BinaryStreamReader.readValue(...)`) of top-level `String` and `FixedString` columns return a `StringValue`, which preserves the raw bytes (`toByteArray()`/`asByteBuffer()`) and lazily decodes a `String` (`asString()`), instead of eagerly decoding to a `String`. `StringValue` is a read-time holder, not a supported POJO field type: typed `queryAll(...)`/`readToPOJO` binding still maps these columns to `String` (decoded) or `byte[]` (raw bytes) according to the POJO field type. Strings nested inside containers (`Array`, `Map`, `Tuple`, `Nested`, `Variant`, `JSON`) are still read as `String`. - JSONEachRow text reader: Can stream `JSONEachRow` responses through a caller-supplied `JsonParser`, with Jackson and Gson parser factory implementations available as optional classpath dependencies, and infers a best-effort schema from the first row. - Data type conversion: Maps ClickHouse types to Java values for binary reads, POJO binding, and SQL parameter formatting, including date/time handling. - Geometry type support: For ClickHouse `25.11+`, where `Geometry` changed from a string alias to `Variant(Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon)`, the client reads and writes `Geometry` values through generic records, binary readers, POJO binding, and SQL parameter formatting, using Java array dimensionality to represent the geometry shape. @@ -48,7 +48,7 @@ Compatibility-sensitive traits: - Certificate-as-content support is compatibility-sensitive: any certificate or key value containing a PEM begin marker (`-----BEGIN`) is treated as inline PEM content, otherwise it is treated as a file path (also searched in the home directory and on the classpath). - JSONEachRow reading depends on the selected parser factory and request format settings: parser materialization determines Java value types, the reader infers minimal schema from the first row, and JSON number server settings are applied only when `QuerySettings` resolves to `ClickHouseFormat.JSONEachRow` and `json_disable_number_quoting` is enabled. - JSONEachRow schema inference is intentionally best-effort: scalar values use Java-to-ClickHouse type mappings, while JSON arrays and objects are identified structurally as `Array` and `Map`. For arrays, maps, and some nested or ambiguous values, the inferred type may not include the most specific element, key, value, or nested ClickHouse type. -- Binary string support is scope-sensitive and off by default: it only changes how top-level `String`/`FixedString` columns are read (to `StringValue` instead of `String`); nested string values are always read as `String`. `StringValue` is intentionally a mutable, zero-copy holder — it wraps (does not copy) the supplied bytes and `toByteArray()` returns the live backing array, so callers needing an independent snapshot must copy the bytes themselves. Only heap buffers are supported; constructing from a direct (off-heap) buffer is rejected. +- Binary string support is scope-sensitive and off by default: it only changes how top-level `String`/`FixedString` columns are read (to `StringValue` instead of `String`); nested string values are always read as `String`. `StringValue` is produced only by the read path and has no public constructor (it cannot be instantiated by user code). It is intentionally a mutable, zero-copy holder — it wraps (does not copy) the bytes it is given and `toByteArray()` returns the live backing array when the value spans the whole array (otherwise an exact-size copy), so callers needing an independent snapshot must copy the bytes themselves. ## `jdbc-v2` diff --git a/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java b/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java index 16f7df71b..dedb8c52c 100644 --- a/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java +++ b/jdbc-v2/src/test/java/com/clickhouse/jdbc/JdbcDataTypeTests.java @@ -1545,36 +1545,34 @@ public void testStringsUsedAsBytes() throws Exception { } } - private static byte[] readClickHouseLogo() throws IOException { + private static byte[] readClickHouseLogo() { try (InputStream is = JdbcDataTypeTests.class.getResourceAsStream("/ch_logo.png")) { Assert.assertNotNull(is, "ch_logo.png not found in test resources"); return is.readAllBytes(); + } catch (Exception e) { + throw new RuntimeException("Failed to read test resource", e); } } - /** - * Asserts that {@code actual} matches the {@code ch_logo.png} source file byte-for-byte. The source file is - * read fresh from test resources so the comparison is against the file content rather than an in-memory copy. - */ - private static void assertEqualsToClickHouseLogo(byte[] actual) throws IOException { - byte[] expected = readClickHouseLogo(); + private static void assertEqualsToClickHouseLogo(byte[] actual) { Assert.assertNotNull(actual, "Read bytes must not be null"); - assertEquals(actual.length, expected.length, "Read byte count must match ch_logo.png size"); - assertEquals(actual, expected, "Read bytes must match ch_logo.png content"); + assertEquals(actual.length, CH_LOGO_PNG.length, "Read byte count must match ch_logo.png size"); + assertEquals(actual, CH_LOGO_PNG, "Read bytes must match ch_logo.png content"); } + private static final byte[] CH_LOGO_PNG = readClickHouseLogo(); + @Test(groups = { "integration" }) public void testBinaryStringSupportGetBytes() throws Exception { // ch_logo.png is real binary content that is not valid UTF-8, so it must survive a // round-trip through a String column byte-for-byte when binary_string_support is enabled. - byte[] imageBytes = readClickHouseLogo(); runQuery("CREATE TABLE test_binary_string_get_bytes (id Int8, str String) ENGINE = MergeTree ORDER BY ()"); try (Connection conn = getJdbcConnection(); PreparedStatement insert = conn.prepareStatement("INSERT INTO test_binary_string_get_bytes VALUES (?, ?)")) { insert.setInt(1, 1); - insert.setBytes(2, imageBytes); + insert.setBytes(2, CH_LOGO_PNG); insert.executeUpdate(); } @@ -1594,14 +1592,13 @@ public void testBinaryStringSupportGetBytes() throws Exception { @Test(groups = { "integration" }) public void testBinaryStringSupportGetBinaryStream() throws Exception { - byte[] imageBytes = readClickHouseLogo(); - - runQuery("CREATE TABLE test_binary_string_stream (id Int8, str String) ENGINE = MergeTree ORDER BY ()"); + runQuery("CREATE TABLE test_binary_string_stream (id Int8, str String, nullable_str Nullable(String)) ENGINE = MergeTree ORDER BY ()"); try (Connection conn = getJdbcConnection(); - PreparedStatement insert = conn.prepareStatement("INSERT INTO test_binary_string_stream VALUES (?, ?)")) { + PreparedStatement insert = conn.prepareStatement("INSERT INTO test_binary_string_stream VALUES (?, ?, ?)")) { insert.setInt(1, 1); - insert.setBytes(2, imageBytes); + insert.setBytes(2, CH_LOGO_PNG); + insert.setNull(3, Types.VARCHAR); insert.executeUpdate(); } @@ -1625,33 +1622,14 @@ public void testBinaryStringSupportGetBinaryStream() throws Exception { Assert.assertNotNull(stream); assertEqualsToClickHouseLogo(stream.readAllBytes()); } + assertFalse(rs.wasNull()); - assertFalse(rs.next()); - } - } - - @Test(groups = { "integration" }) - public void testBinaryStringSupportGetBinaryStreamNull() throws Exception { - runQuery("CREATE TABLE test_binary_string_stream_null (id Int8, str Nullable(String)) ENGINE = MergeTree ORDER BY ()"); - - try (Connection conn = getJdbcConnection(); - PreparedStatement insert = conn.prepareStatement("INSERT INTO test_binary_string_stream_null VALUES (?, ?)")) { - insert.setInt(1, 1); - insert.setNull(2, Types.VARCHAR); - insert.executeUpdate(); - } - - Properties props = new Properties(); - props.put(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), "true"); - - try (Connection conn = getJdbcConnection(props); - Statement stmt = conn.createStatement(); - ResultSet rs = stmt.executeQuery("SELECT * FROM test_binary_string_stream_null ORDER BY id")) { - assertTrue(rs.next()); - assertNull(rs.getBinaryStream("str")); + // null value on a nullable column + assertNull(rs.getBinaryStream("nullable_str")); assertTrue(rs.wasNull()); - assertNull(rs.getBytes("str")); + assertNull(rs.getBytes("nullable_str")); assertTrue(rs.wasNull()); + assertFalse(rs.next()); } } diff --git a/jdbc-v2/src/test/java/com/clickhouse/jdbc/metadata/DatabaseMetaDataTest.java b/jdbc-v2/src/test/java/com/clickhouse/jdbc/metadata/DatabaseMetaDataTest.java index cb407cbc5..b435d6253 100644 --- a/jdbc-v2/src/test/java/com/clickhouse/jdbc/metadata/DatabaseMetaDataTest.java +++ b/jdbc-v2/src/test/java/com/clickhouse/jdbc/metadata/DatabaseMetaDataTest.java @@ -1,6 +1,7 @@ package com.clickhouse.jdbc.metadata; import com.clickhouse.client.ClickHouseServerForTest; +import com.clickhouse.client.api.ClientConfigProperties; import com.clickhouse.data.ClickHouseDataType; import com.clickhouse.data.ClickHouseVersion; import com.clickhouse.jdbc.ClientInfoProperties; @@ -146,6 +147,67 @@ public void testGetColumns() throws Exception { } } + /** + * Database metadata is materialized internally by querying ClickHouse system tables, whose results contain + * top-level {@code String} columns (e.g. {@code TABLE_NAME}, {@code COLUMN_NAME}, {@code TYPE_NAME}). When the + * connection enables {@code binary_string_support}, those reads must still surface proper {@link String} values + * so the JDBC metadata API keeps working unchanged. + */ + @Test(groups = {"integration"}) + public void testGetColumnsWithBinaryStringSupport() throws Exception { + Properties props = new Properties(); + props.put(ClientConfigProperties.BINARY_STRING_SUPPORT.getKey(), "true"); + + try (Connection conn = getJdbcConnection(props)) { + final String tableName = "get_columns_binary_string_support_test"; + try (Statement stmt = conn.createStatement()) { + stmt.executeUpdate("CREATE TABLE " + tableName + + " (id Int32, name String NOT NULL, v1 Nullable(Int8), v2 Array(Int8)) " + + "ENGINE MergeTree ORDER BY tuple()"); + } + + DatabaseMetaData dbmd = conn.getMetaData(); + + try (ResultSet rs = dbmd.getColumns(null, getDatabase(), tableName, null)) { + assertTrue(rs.next()); + assertEquals(rs.getString("TABLE_SCHEM"), getDatabase()); + assertEquals(rs.getString("TABLE_NAME"), tableName); + assertEquals(rs.getString("COLUMN_NAME"), "id"); + assertEquals(rs.getInt("DATA_TYPE"), Types.INTEGER); + assertEquals(rs.getString("TYPE_NAME"), "Int32"); + assertFalse(rs.getBoolean("NULLABLE")); + + assertTrue(rs.next()); + assertEquals(rs.getString("TABLE_NAME"), tableName); + assertEquals(rs.getString("COLUMN_NAME"), "name"); + assertEquals(rs.getInt("DATA_TYPE"), Types.VARCHAR); + assertEquals(rs.getString("TYPE_NAME"), "String"); + assertFalse(rs.getBoolean("NULLABLE")); + + assertTrue(rs.next()); + assertEquals(rs.getString("COLUMN_NAME"), "v1"); + assertEquals(rs.getString("TYPE_NAME"), "Nullable(Int8)"); + assertTrue(rs.getBoolean("NULLABLE")); + + assertTrue(rs.next()); + assertEquals(rs.getString("COLUMN_NAME"), "v2"); + assertEquals(rs.getInt("DATA_TYPE"), Types.ARRAY); + assertEquals(rs.getString("TYPE_NAME"), "Array(Int8)"); + } + + // getTables exercises a different system-table query whose String columns must also stay String. + try (ResultSet rs = dbmd.getTables(null, getDatabase(), tableName, null)) { + assertTrue(rs.next()); + assertEquals(rs.getString("TABLE_SCHEM"), getDatabase()); + assertEquals(rs.getString("TABLE_NAME"), tableName); + Object tableNameObj = rs.getObject("TABLE_NAME"); + assertTrue(tableNameObj instanceof String, + "Metadata String columns must be plain String even with binary_string_support enabled, but got " + + (tableNameObj == null ? "null" : tableNameObj.getClass().getName())); + } + } + } + @Test(groups = {"integration"}) public void testSupportFlags() throws Exception { try (Connection conn = getJdbcConnection()) { From 2bbb60656d68235728cd0436e273a2aa39caf829 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Tue, 30 Jun 2026 17:59:47 -0700 Subject: [PATCH 10/10] Fix cloud tests --- .../client/api/data_formats/internal/BaseReaderTests.java | 2 +- .../test/java/com/clickhouse/client/metrics/MetricsTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index 9d6c1f125..2cfea137d 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -580,7 +580,7 @@ public void testReadingStringValue() throws Exception { final String table = "test_reading_stringvalue"; client.execute("DROP TABLE IF EXISTS " + table).get(); - client.execute("CREATE TABLE " + table + " (id Int32, s String, fs FixedString(5), e FixedString(1)) ENGINE = Memory").get(); + client.execute("CREATE TABLE " + table + " (id Int32, s String, fs FixedString(5), e FixedString(1)) ENGINE = MergeTree ORDER BY id").get(); client.execute("INSERT INTO " + table + " VALUES (1, 'hello', 'world', 'a'), (2, 'ClickHouse', 'Rocks', 'b')").get(); Client customClient = newClient() diff --git a/client-v2/src/test/java/com/clickhouse/client/metrics/MetricsTest.java b/client-v2/src/test/java/com/clickhouse/client/metrics/MetricsTest.java index f83f97db0..401917404 100644 --- a/client-v2/src/test/java/com/clickhouse/client/metrics/MetricsTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/metrics/MetricsTest.java @@ -67,8 +67,8 @@ public void testRegisterMetrics() throws Exception { Assert.assertEquals((int) available.value(), 1); Assert.assertEquals((int) leased.value(), 0); + final long maxDelay = isCloud() ? 300 : 15; Runnable task = () -> { - final long maxDelay = 15; long t1 = System.currentTimeMillis(); try (QueryResponse response = client.query("SELECT 1").get()) { long t = System.currentTimeMillis() - t1;