From fcb088cd22963ed2d3a0bff974537f29586487e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Rou=C3=A9l?= <andre.rouel@gmail.com>
Date: Fri, 17 Apr 2026 21:21:57 +0200
Subject: [PATCH] GH-3484 Eliminate per-page heap allocation for CRC32
 checksums when using direct `ByteBufferAllocator`

Why this is safe

- CRC32.update(ByteBuffer) exists since Java 9, processes bytes from position to limit, advancing position.
- toByteBuffer(releaser) returns either a slice() of the internal buffer (independent position) or a freshly allocated copy. Either way, the original BytesInput is unaffected for the subsequent buf.collect() call, because ByteBufferBytesInput.writeInto() uses buffer.duplicate().
- When the allocator is direct, toByteBuffer(releaser) returns the direct buffer directly -- zero heap copy. When the allocator is heap-based, behavior is functionally equivalent to the old toByteArray() path.
- The releaser field already exists on ColumnChunkPageWriter (line 124) and manages buffer lifecycle.
---
 .../apache/parquet/hadoop/ColumnChunkPageWriteStore.java  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
index d9e6ea0990..fd1673673d 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
@@ -217,7 +217,7 @@ public void writePage(
       }
       if (pageWriteChecksumEnabled) {
         crc.reset();
-        crc.update(compressedBytes.toByteArray());
+        crc.update(compressedBytes.toByteBuffer(releaser));
         parquetMetadataConverter.writeDataPageV1Header(
             (int) uncompressedSize,
             (int) compressedSize,
@@ -322,13 +322,13 @@ public void writePageV2(
       if (pageWriteChecksumEnabled) {
         crc.reset();
         if (repetitionLevels.size() > 0) {
-          crc.update(repetitionLevels.toByteArray());
+          crc.update(repetitionLevels.toByteBuffer(releaser));
         }
         if (definitionLevels.size() > 0) {
-          crc.update(definitionLevels.toByteArray());
+          crc.update(definitionLevels.toByteBuffer(releaser));
         }
         if (compressedData.size() > 0) {
-          crc.update(compressedData.toByteArray());
+          crc.update(compressedData.toByteBuffer(releaser));
         }
         parquetMetadataConverter.writeDataPageV2Header(
             uncompressedSize,