-
Notifications
You must be signed in to change notification settings - Fork 4.1k
GH-47435: [Python][Parquet] Add direct key encryption/decryption API #49667
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -711,3 +711,204 @@ cdef shared_ptr[CDecryptionConfiguration] pyarrow_unwrap_decryptionconfig(object | |
| if isinstance(decryptionconfig, DecryptionConfiguration): | ||
| return (<DecryptionConfiguration> decryptionconfig).unwrap() | ||
| raise TypeError("Expected DecryptionConfiguration, got %s" % type(decryptionconfig)) | ||
|
|
||
|
|
||
| def create_decryption_properties( | ||
| footer_key, | ||
| *, | ||
| aad_prefix=None, | ||
| bint check_footer_integrity=True, | ||
| bint allow_plaintext_files=False, | ||
| ): | ||
| """ | ||
| Create FileDecryptionProperties using a direct footer key. | ||
|
|
||
| This is a low-level API that constructs decryption properties directly | ||
| from a plaintext key, bypassing the KMS-based :class:`CryptoFactory`. | ||
| It is intended for callers that manage key wrapping and storage | ||
| themselves (e.g. an application-level scheme). | ||
|
|
||
| For most use cases, prefer the higher-level :class:`CryptoFactory` | ||
| with :class:`DecryptionConfiguration`, which implements the full | ||
| Parquet key management specification and is interoperable with | ||
| other tools and frameworks. | ||
|
|
||
| .. note:: | ||
| Currently only uniform encryption (single key for footer and all | ||
| columns) is supported with this method. Per-column keys are not | ||
| yet available; files encrypted with per-column keys cannot be | ||
| decrypted using this function. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| footer_key : bytes | ||
| The decryption key for the file footer and all columns (uniform | ||
| encryption). Must be 16, 24, or 32 bytes for AES-128, AES-192, | ||
| or AES-256 respectively. | ||
| aad_prefix : bytes, optional | ||
| Additional Authenticated Data prefix. Must match the AAD prefix | ||
| that was used during encryption. Required if the AAD prefix was | ||
| not stored in the file metadata during encryption. | ||
| check_footer_integrity : bool, default True | ||
| Whether to verify footer integrity using the signature stored | ||
| in the file. Set to False only for debugging. | ||
| allow_plaintext_files : bool, default False | ||
| Whether to allow reading plaintext (unencrypted) files with | ||
| these decryption properties without raising an error. | ||
|
|
||
| Returns | ||
| ------- | ||
| FileDecryptionProperties | ||
| Properties that can be passed to :func:`~pyarrow.parquet.read_table`, | ||
| :class:`~pyarrow.parquet.ParquetFile`, or | ||
| :class:`~pyarrow.dataset.ParquetFragmentScanOptions`. | ||
|
|
||
| Examples | ||
| -------- | ||
| >>> import pyarrow.parquet as pq | ||
| >>> import pyarrow.parquet.encryption as pe | ||
| >>> props = pe.create_decryption_properties( | ||
| ... footer_key=b'0123456789abcdef', | ||
| ... aad_prefix=b'table_id', | ||
| ... ) | ||
| >>> table = pq.read_table('encrypted.parquet', decryption_properties=props) | ||
| """ | ||
| cdef: | ||
| CSecureString c_footer_key | ||
| c_string c_aad_prefix | ||
| CFileDecryptionPropertiesBuilder* builder | ||
| shared_ptr[CFileDecryptionProperties] props | ||
|
|
||
| footer_key_bytes = tobytes(footer_key) | ||
| if len(footer_key_bytes) not in (16, 24, 32): | ||
| raise ValueError( | ||
| f"footer_key must be 16, 24, or 32 bytes, got {len(footer_key_bytes)}" | ||
| ) | ||
|
|
||
| c_footer_key = CSecureString(<c_string>footer_key_bytes) | ||
| builder = new CFileDecryptionPropertiesBuilder() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we using |
||
|
|
||
| try: | ||
| builder.footer_key(c_footer_key) | ||
|
|
||
| if aad_prefix is not None: | ||
| c_aad_prefix = tobytes(aad_prefix) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. |
||
| builder.aad_prefix(c_aad_prefix) | ||
|
|
||
| if not check_footer_integrity: | ||
| builder.disable_footer_signature_verification() | ||
|
|
||
| if allow_plaintext_files: | ||
| builder.plaintext_files_allowed() | ||
|
|
||
| props = builder.build() | ||
| finally: | ||
| del builder | ||
|
|
||
| return FileDecryptionProperties.wrap(props) | ||
|
|
||
|
|
||
| def create_encryption_properties( | ||
| footer_key, | ||
| *, | ||
| aad_prefix=None, | ||
| bint store_aad_prefix=True, | ||
| encryption_algorithm="AES_GCM_V1", | ||
| bint plaintext_footer=False, | ||
| ): | ||
| """ | ||
| Create FileEncryptionProperties using a direct footer key. | ||
|
|
||
| This is a low-level API that constructs encryption properties directly | ||
| from a plaintext key, bypassing the KMS-based :class:`CryptoFactory`. | ||
| It is intended for callers that manage key wrapping and storage | ||
| themselves (e.g. an application-level scheme). | ||
|
|
||
| .. warning:: | ||
| The caller is responsible for key management best practices. | ||
| Reusing the same key for multiple files without unique data keys | ||
| weakens AES-GCM security. The higher-level :class:`CryptoFactory` | ||
| with :class:`EncryptionConfiguration` handles this automatically | ||
| and is interoperable with other tools and frameworks -- | ||
| prefer it unless you have a specific reason to manage | ||
| keys yourself. | ||
|
|
||
| .. note:: | ||
| Currently only uniform encryption (single key for footer and all | ||
| columns) is supported with this method. Per-column keys are not | ||
| yet available; the provided key encrypts both the footer and | ||
| every column. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| footer_key : bytes | ||
| The encryption key for the file footer and all columns (uniform | ||
| encryption). Must be 16, 24, or 32 bytes for AES-128, AES-192, | ||
| or AES-256 respectively. | ||
| aad_prefix : bytes, optional | ||
| Additional Authenticated Data prefix for cryptographic binding. | ||
| store_aad_prefix : bool, default True | ||
| Whether to store the AAD prefix in the Parquet file metadata. | ||
| Set to False when the AAD prefix will be supplied externally | ||
| at read time. | ||
| Only meaningful when *aad_prefix* is provided. | ||
| encryption_algorithm : str, default "AES_GCM_V1" | ||
| Encryption algorithm. Either ``"AES_GCM_V1"`` or | ||
| ``"AES_GCM_CTR_V1"``. | ||
| plaintext_footer : bool, default False | ||
| Whether to leave the file footer unencrypted. When True, file | ||
| schema and metadata are readable without a key. | ||
|
|
||
| Returns | ||
| ------- | ||
| FileEncryptionProperties | ||
| Properties that can be passed to :func:`~pyarrow.parquet.write_table` or | ||
| :class:`~pyarrow.parquet.ParquetWriter`. | ||
|
|
||
| Examples | ||
| -------- | ||
| >>> import pyarrow as pa | ||
| >>> import pyarrow.parquet as pq | ||
| >>> import pyarrow.parquet.encryption as pe | ||
| >>> table = pa.table({'col': [1, 2, 3]}) | ||
| >>> props = pe.create_encryption_properties( | ||
| ... footer_key=b'0123456789abcdef', | ||
| ... aad_prefix=b'table_id', | ||
| ... store_aad_prefix=False, | ||
| ... ) | ||
| >>> pq.write_table(table, 'encrypted.parquet', encryption_properties=props) | ||
| """ | ||
| cdef: | ||
| CSecureString c_footer_key | ||
| c_string c_aad_prefix | ||
| CFileEncryptionPropertiesBuilder* builder | ||
| shared_ptr[CFileEncryptionProperties] props | ||
| ParquetCipher cipher | ||
|
|
||
| footer_key_bytes = tobytes(footer_key) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here and other instances below. |
||
| if len(footer_key_bytes) not in (16, 24, 32): | ||
| raise ValueError( | ||
| f"footer_key must be 16, 24, or 32 bytes, got {len(footer_key_bytes)}" | ||
| ) | ||
|
|
||
| cipher = cipher_from_name(encryption_algorithm) | ||
| c_footer_key = CSecureString(<c_string>footer_key_bytes) | ||
| builder = new CFileEncryptionPropertiesBuilder(c_footer_key) | ||
|
|
||
| try: | ||
| builder.algorithm(cipher) | ||
|
|
||
| if aad_prefix is not None: | ||
| c_aad_prefix = tobytes(aad_prefix) | ||
| builder.aad_prefix(c_aad_prefix) | ||
| if not store_aad_prefix: | ||
| builder.disable_aad_prefix_storage() | ||
|
|
||
| if plaintext_footer: | ||
| builder.set_plaintext_footer() | ||
|
|
||
| props = builder.build() | ||
| finally: | ||
| del builder | ||
|
|
||
| return FileEncryptionProperties.wrap(props) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,8 @@ from pyarrow.includes.libarrow cimport (Type, CChunkedArray, CScalar, CSchema, | |
| CStatus, CTable, CMemoryPool, CBuffer, | ||
| CKeyValueMetadata, CRandomAccessFile, | ||
| COutputStream, CCacheOptions, | ||
| TimeUnit, CRecordBatchReader) | ||
| TimeUnit, CRecordBatchReader, | ||
| CSecureString) | ||
|
|
||
|
|
||
| cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil: | ||
|
|
@@ -633,6 +634,28 @@ cdef extern from "parquet/encryption/encryption.h" namespace "parquet" nogil: | |
| " parquet::FileDecryptionProperties": | ||
| pass | ||
|
|
||
| cdef cppclass CFileDecryptionPropertiesBuilder\ | ||
| " parquet::FileDecryptionProperties::Builder": | ||
| CFileDecryptionPropertiesBuilder() except + | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If all these APIs can raise C++ exceptions, which kind of exceptions will be raised on the Python side? |
||
| CFileDecryptionPropertiesBuilder* footer_key( | ||
| CSecureString footer_key) except + | ||
| CFileDecryptionPropertiesBuilder* aad_prefix( | ||
| c_string aad_prefix) except + | ||
| CFileDecryptionPropertiesBuilder* disable_footer_signature_verification() except + | ||
| CFileDecryptionPropertiesBuilder* plaintext_files_allowed() except + | ||
| shared_ptr[CFileDecryptionProperties] build() except + | ||
|
|
||
| cdef cppclass CFileEncryptionProperties\ | ||
| " parquet::FileEncryptionProperties": | ||
| pass | ||
|
|
||
| cdef cppclass CFileEncryptionPropertiesBuilder\ | ||
| " parquet::FileEncryptionProperties::Builder": | ||
| CFileEncryptionPropertiesBuilder(CSecureString footer_key) except + | ||
| CFileEncryptionPropertiesBuilder* set_plaintext_footer() except + | ||
| CFileEncryptionPropertiesBuilder* algorithm( | ||
| ParquetCipher parquet_cipher) except + | ||
| CFileEncryptionPropertiesBuilder* aad_prefix( | ||
| c_string aad_prefix) except + | ||
| CFileEncryptionPropertiesBuilder* disable_aad_prefix_storage() except + | ||
| shared_ptr[CFileEncryptionProperties] build() except + | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We shouldn't call
tobytesas it will utf8-encode a str object.