diff --git a/.github/workflows/vector-plugins.yml b/.github/workflows/vector-plugins.yml index cc57e97ffd..957d0f8777 100644 --- a/.github/workflows/vector-plugins.yml +++ b/.github/workflows/vector-plugins.yml @@ -26,7 +26,7 @@ jobs: strategy: fail-fast: false matrix: - java: [ '17' ] + java: [ '17', '21', '25' ] codes: [ 'uncompressed' ] name: Build Parquet with JDK ${{ matrix.java }} and ${{ matrix.codes }} @@ -46,7 +46,7 @@ jobs: run: | EXTRA_JAVA_TEST_ARGS=$(./mvnw help:evaluate -Dexpression=extraJavaTestArgs -q -DforceStdout) export MAVEN_OPTS="$MAVEN_OPTS $EXTRA_JAVA_TEST_ARGS" - ./mvnw install --batch-mode -Pvector-plugins -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true -Dmaven.buildNumber.skip=true -Djava.version=${{ matrix.java }} -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am + ./mvnw install --batch-mode -Pvector-plugins -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true -Dmaven.buildNumber.skip=true -Dspotless.check.skip=true -Djava.version=${{ matrix.java }} -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am - name: verify env: TEST_CODECS: ${{ matrix.codes }} @@ -54,4 +54,11 @@ jobs: run: | EXTRA_JAVA_TEST_ARGS=$(./mvnw help:evaluate -Dexpression=extraJavaTestArgs -q -DforceStdout) export MAVEN_OPTS="$MAVEN_OPTS $EXTRA_JAVA_TEST_ARGS" - ./mvnw verify --batch-mode -Pvector-plugins javadoc:javadoc -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am + # Spotless check uses palantir-java-format which relies on internal javac APIs + # that are not available on all JDK versions (e.g. JDK 25+). Since the formatting + # result is JDK-independent, running the check on JDK 17 alone is sufficient. + SPOTLESS_ARGS="" + if [ "${{ matrix.java }}" != "17" ]; then + SPOTLESS_ARGS="-Dspotless.check.skip=true" + fi + ./mvnw verify --batch-mode -Pvector-plugins javadoc:javadoc $SPOTLESS_ARGS -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am diff --git a/.gitignore b/.gitignore index 2fd06049ea..568aa2a323 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.class .project .classpath +.factorypath .settings target # Package Files # @@ -20,4 +21,3 @@ target/ mvn_install.log .vscode/* .DS_Store - diff --git a/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java b/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java index eb1690a4e1..bdb0f420ef 100644 --- a/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java +++ b/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java @@ -177,7 +177,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(BYTE_SPECIES_64, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(BYTE_SPECIES_64, in, inPos); ShortVector tempRes = byteVector .castShape(SHORT_SPECIES_512, 0) .reinterpretAsBytes() @@ -260,7 +260,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(BYTE_SPECIES, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(BYTE_SPECIES, in, inPos); ShortVector tempRes = byteVector .castShape(LONG_SPECIES, 0) .reinterpretAsBytes() @@ -377,9 +377,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B128, in, inPos, in.order()) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B128, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -466,7 +465,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(BSPECIES, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(BSPECIES, in, inPos); ShortVector tempRes = byteVector .castShape(ISPECIES, 0) .reinterpretAsBytes() @@ -582,9 +581,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B256, in, inPos, in.order(), inp_mask) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B256, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) @@ -705,9 +703,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B256, in, inPos, in.order(), inp_mask) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B256, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) @@ -827,9 +824,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B256, in, inPos, in.order(), inp_mask) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B256, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -914,7 +910,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(B512, in, inPos); byteVector .castShape(ISPECIES, 0) .lanewise(VectorOperators.AND, 255) @@ -1004,7 +1000,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1084,7 +1080,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(B512, in, inPos); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1194,7 +1190,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1280,7 +1276,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(B512, in, inPos); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1388,7 +1384,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1512,7 +1508,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1630,7 +1626,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1703,7 +1699,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector shortVector = byteVector.reinterpretAsShorts(); shortVector .castShape(I512, 0) @@ -1783,7 +1779,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -1866,7 +1862,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -1944,7 +1940,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2022,7 +2018,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2102,7 +2098,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2182,7 +2178,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2261,7 +2257,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2332,7 +2328,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector.rearrange(perm_mask0).reinterpretAsInts().lanewise(VectorOperators.AND, 16777215); tempRes1.intoArray(out, outPos, out_mask); @@ -2407,7 +2403,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2486,7 +2482,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2603,7 +2599,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2718,7 +2714,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2832,7 +2828,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2960,7 +2956,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -3089,7 +3085,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -3175,13 +3171,56 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector.rearrange(perm_mask0).reinterpretAsInts(); tempRes1.intoArray(out, outPos, out_mask); } } + // TODO Replace these helpers with ByteVector.fromMemorySegment(...) once the project's minimum + // supported JDK is >= 22 (where java.lang.foreign.MemorySegment became a permanent API per + // JEP 454). fromMemorySegment is the direct successor to ByteVector.fromByteBuffer (which was + // removed after JDK 21) and is intrinsifiable by HotSpot for both heap and direct buffers via + // jdk.internal.misc.ScopedMemoryAccess, eliminating the byte[] copy that this fallback uses + // for direct ByteBuffers. Until then, the implementations below are constrained to APIs + // available under --release 17. A multi-release JAR overlay (src/main/java22) would let JDK + // 22+ runtimes pick up the fromMemorySegment path automatically; see GH-3475 discussion. + private static ByteVector fromByteBuffer(VectorSpecies species, ByteBuffer input, int inPos) { + // Heap buffers are loaded directly from their backing array: no ByteBuffer.duplicate(), + // no intermediate byte[] allocation, no extra copy. ByteVector.fromArray is intrinsified by + // HotSpot and lowered to a single AVX-512 load. + if (input.hasArray()) { + return ByteVector.fromArray(species, input.array(), input.arrayOffset() + inPos); + } + // Off-heap (direct) buffers: fall back to copying species.length() bytes into a scratch + // array. The original JDK 17 fast path used ScopedMemoryAccess.loadFromByteBuffer to avoid + // the copy, but that intrinsic is not exposed; the alternative ByteVector.fromMemorySegment + // is only available on JDK 19+ and cannot be called from --release 17 sources. + return ByteVector.fromArray(species, readInputBytes(input, inPos, species.length()), 0); + } + + private static ByteVector fromByteBuffer( + VectorSpecies species, ByteBuffer input, int inPos, VectorMask mask) { + // Mirror the fast path of OpenJDK 17's ByteVector.fromByteBuffer(species, bb, offset, bo, m): + // when the full vector window fits in the buffer, read the entire window unconditionally and + // let fromArray(species, array, 0, mask) apply the mask. This matches the JDK semantics for + // arbitrary mask shapes (not just contiguous-prefix masks) and keeps array.length equal to + // species.length(), which satisfies the bounds-check precondition of fromArray. + if (input.hasArray()) { + return ByteVector.fromArray(species, input.array(), input.arrayOffset() + inPos, mask); + } + return ByteVector.fromArray(species, readInputBytes(input, inPos, species.length()), 0, mask); + } + + private static byte[] readInputBytes(ByteBuffer input, int inPos, int byteCount) { + byte[] bytes = new byte[byteCount]; + ByteBuffer source = input.duplicate(); + source.position(inPos); + source.get(bytes); + return bytes; + } + private static void notSupport() { throw new RuntimeException( "ByteBitPacking512VectorLE doesn't support the function, please use ByteBitPackingLE!");