From fcb088cd22963ed2d3a0bff974537f29586487e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Rou=C3=A9l?= Date: Fri, 17 Apr 2026 21:21:57 +0200 Subject: [PATCH] GH-3484 Eliminate per-page heap allocation for CRC32 checksums when using direct `ByteBufferAllocator` Why this is safe - CRC32.update(ByteBuffer) exists since Java 9, processes bytes from position to limit, advancing position. - toByteBuffer(releaser) returns either a slice() of the internal buffer (independent position) or a freshly allocated copy. Either way, the original BytesInput is unaffected for the subsequent buf.collect() call, because ByteBufferBytesInput.writeInto() uses buffer.duplicate(). - When the allocator is direct, toByteBuffer(releaser) returns the direct buffer directly -- zero heap copy. When the allocator is heap-based, behavior is functionally equivalent to the old toByteArray() path. - The releaser field already exists on ColumnChunkPageWriter (line 124) and manages buffer lifecycle. --- .../apache/parquet/hadoop/ColumnChunkPageWriteStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java index d9e6ea0990..fd1673673d 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java @@ -217,7 +217,7 @@ public void writePage( } if (pageWriteChecksumEnabled) { crc.reset(); - crc.update(compressedBytes.toByteArray()); + crc.update(compressedBytes.toByteBuffer(releaser)); parquetMetadataConverter.writeDataPageV1Header( (int) uncompressedSize, (int) compressedSize, @@ -322,13 +322,13 @@ public void writePageV2( if (pageWriteChecksumEnabled) { crc.reset(); if (repetitionLevels.size() > 0) { - crc.update(repetitionLevels.toByteArray()); + crc.update(repetitionLevels.toByteBuffer(releaser)); } if (definitionLevels.size() > 0) { - crc.update(definitionLevels.toByteArray()); + crc.update(definitionLevels.toByteBuffer(releaser)); } if (compressedData.size() > 0) { - crc.update(compressedData.toByteArray()); + crc.update(compressedData.toByteBuffer(releaser)); } parquetMetadataConverter.writeDataPageV2Header( uncompressedSize,