Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions pyiceberg/catalog/rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,8 @@ def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str,
return {"namespace": identifier_tuple[:-1], "name": identifier_tuple[-1]}

def _init_sigv4(self, session: Session) -> None:
import base64
import hashlib
from urllib import parse

import boto3
Expand All @@ -686,6 +688,12 @@ def _init_sigv4(self, session: Session) -> None:
from requests import PreparedRequest
from requests.adapters import HTTPAdapter

class _IcebergSigV4Auth(SigV4Auth):
def canonical_request(self, request: Any) -> str:
cr = super().canonical_request(request)
# Replace the last line (body_checksum) with hex-encoded payload hash.
return cr.rsplit("\n", 1)[0] + "\n" + self.payload(request)

class SigV4Adapter(HTTPAdapter):
def __init__(self, **properties: str):
super().__init__()
Expand All @@ -710,17 +718,27 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin
# remove the connection header as it will be updated after signing
if "connection" in request.headers:
del request.headers["connection"]
# For empty bodies, explicitly set the content hash header to the SHA256 of an empty string
if not request.body:
request.headers["x-amz-content-sha256"] = EMPTY_BODY_SHA256

# Compute the x-amz-content-sha256 header to match Iceberg Java SDK:
# - empty body → hex (EMPTY_BODY_SHA256)
# - non-empty body → base64
if request.body:
body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body
content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
else:
content_sha256_header = EMPTY_BODY_SHA256

signing_headers = dict(request.headers)
signing_headers["x-amz-content-sha256"] = content_sha256_header

aws_request = AWSRequest(
method=request.method, url=url, params=params, data=request.body, headers=dict(request.headers)
method=request.method, url=url, params=params, data=request.body, headers=signing_headers
)

SigV4Auth(credentials, service, region).add_auth(aws_request)
original_header = request.headers
signed_headers = aws_request.headers
_IcebergSigV4Auth(credentials, service, region).add_auth(aws_request)

original_header = dict(request.headers)
signed_headers = dict(aws_request.headers)
relocated_headers = {}

# relocate headers if there is a conflict with signed headers
Expand Down
72 changes: 69 additions & 3 deletions tests/catalog/test_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,9 +493,10 @@ def test_sigv4_sign_request_without_body(rest_mock: Mocker) -> None:
assert isinstance(adapter, HTTPAdapter)
adapter.add_headers(prepared)

assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256")
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256
assert "SignedHeaders=" in prepared.headers["Authorization"]


def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
Expand Down Expand Up @@ -524,9 +525,74 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
assert isinstance(adapter, HTTPAdapter)
adapter.add_headers(prepared)

assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256")
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
assert "SignedHeaders=" in prepared.headers["Authorization"]
# Conflicting Authorization header is relocated
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
assert prepared.headers.get("x-amz-content-sha256") != EMPTY_BODY_SHA256
assert prepared.headers["x-amz-content-sha256"] == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k="


def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None:
existing_token = "existing_token"

catalog = RestCatalog(
"rest",
**{
"uri": TEST_URI,
"token": existing_token,
"rest.sigv4-enabled": "true",
"rest.signing-region": "us-west-2",
"client.access-key-id": "id",
"client.secret-access-key": "secret",
},
)

body_content = b'{"namespace": "test_namespace"}'
prepared = catalog._session.prepare_request(
Request(
"POST",
f"{TEST_URI}v1/namespaces",
data=body_content,
)
)
adapter = catalog._session.adapters[catalog.uri]
assert isinstance(adapter, HTTPAdapter)
adapter.add_headers(prepared)

assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
assert "SignedHeaders=" in prepared.headers["Authorization"]
assert prepared.headers["x-amz-content-sha256"] == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo="


def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None:
catalog = RestCatalog(
"rest",
**{
"uri": TEST_URI,
"rest.sigv4-enabled": "true",
"rest.signing-region": "us-west-2",
"client.access-key-id": "id",
"client.secret-access-key": "secret",
},
)

prepared = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config"))
adapter = catalog._session.adapters[catalog.uri]
assert isinstance(adapter, HTTPAdapter)

# Inject conflicting SigV4 headers before signing
prepared.headers["x-amz-content-sha256"] = "fake"
prepared.headers["X-Amz-Date"] = "fake"

adapter.add_headers(prepared)

# Matching Java SDK: conflicting headers are relocated with "Original-" prefix
assert prepared.headers.get("Original-x-amz-content-sha256") == "fake"
assert prepared.headers.get("Original-X-Amz-Date") == "fake"
# SigV4 headers are set correctly after signing
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256
assert "X-Amz-Date" in prepared.headers


def test_list_tables_404(rest_mock: Mocker) -> None:
Expand Down