From 0bac3885cf0f14a8b496f8066dab4ef570635755 Mon Sep 17 00:00:00 2001
From: Alex Stephen <alexstephen@google.com>
Date: Mon, 27 Apr 2026 12:57:26 -0700
Subject: [PATCH 1/5] Type promotion - metadata file reading

---
 pyiceberg/conversions.py            |  4 +++
 tests/expressions/test_evaluator.py | 44 +++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index 0311e76d89..d8a280dc9f 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -350,6 +350,8 @@ def _(_: PrimitiveType, b: bytes) -> int:
 @from_bytes.register(TimestampNanoType)
 @from_bytes.register(TimestamptzNanoType)
 def _(_: PrimitiveType, b: bytes) -> int:
+    if len(b) == 4:
+        return _INT_STRUCT.unpack(b)[0]
     return _LONG_STRUCT.unpack(b)[0]
 
 
@@ -360,6 +362,8 @@ def _(_: FloatType, b: bytes) -> float:
 
 @from_bytes.register(DoubleType)
 def _(_: DoubleType, b: bytes) -> float:
+    if len(b) == 4:
+        return _FLOAT_STRUCT.unpack(b)[0]
     return _DOUBLE_STRUCT.unpack(b)[0]
 
 
diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py
index 03a33e4c78..4b4629ea21 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -50,6 +50,7 @@
     FloatType,
     IcebergType,
     IntegerType,
+    LongType,
     NestedField,
     PrimitiveType,
     StringType,
@@ -1463,3 +1464,46 @@ def test_strict_integer_not_in(strict_data_file_schema: Schema, strict_data_file
 
     should_read = _StrictMetricsEvaluator(strict_data_file_schema, NotIn("no_nulls", {"abc", "def"})).eval(strict_data_file_1)
     assert not should_read, "Should not match: no_nulls field does not have bounds"
+
+
+def test_inclusive_metrics_evaluator_with_type_promotion_crash() -> None:
+    # Schema defines 'id' as LongType (evolved state)
+    schema = Schema(NestedField(1, "id", LongType(), required=True))
+
+    # Historical manifest contains 4-byte integer bounds
+    data_file = DataFile.from_args(
+        file_path="file_1.parquet",
+        file_format=FileFormat.PARQUET,
+        partition={},
+        record_count=100,
+        file_size_in_bytes=1024,
+        lower_bounds={1: to_bytes(IntegerType(), 30)},
+        upper_bounds={1: to_bytes(IntegerType(), 79)},
+    )
+
+    # Predicate: id > 100
+    # Decodes 4-byte bounds correctly and skips the file
+    evaluator_pruning = _InclusiveMetricsEvaluator(schema, GreaterThan("id", 100))
+    assert not evaluator_pruning.eval(data_file)
+
+
+def test_inclusive_metrics_evaluator_with_float_to_double_promotion() -> None:
+    # Schema defines 'val' as DoubleType (evolved state)
+    schema = Schema(NestedField(1, "val", DoubleType(), required=True))
+
+    # Historical manifest contains 4-byte float bounds
+    data_file = DataFile.from_args(
+        file_path="file_1.parquet",
+        file_format=FileFormat.PARQUET,
+        partition={},
+        record_count=100,
+        file_size_in_bytes=1024,
+        lower_bounds={1: to_bytes(FloatType(), 30.0)},
+        upper_bounds={1: to_bytes(FloatType(), 79.0)},
+    )
+
+    # Predicate: val < 50.0
+    evaluator = _InclusiveMetricsEvaluator(schema, LessThan("val", 50.0))
+
+    # Should not crash and should correctly identify that the file might match
+    assert evaluator.eval(data_file)

From 095fd76f98879be1becef9d315f57a33b893e923 Mon Sep 17 00:00:00 2001
From: Alex Stephen <alexstephen@google.com>
Date: Mon, 27 Apr 2026 13:45:49 -0700
Subject: [PATCH 2/5] linter fix

---
 pyiceberg/conversions.py          |  4 ---
 pyiceberg/expressions/visitors.py | 43 +++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index d8a280dc9f..0311e76d89 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -350,8 +350,6 @@ def _(_: PrimitiveType, b: bytes) -> int:
 @from_bytes.register(TimestampNanoType)
 @from_bytes.register(TimestamptzNanoType)
 def _(_: PrimitiveType, b: bytes) -> int:
-    if len(b) == 4:
-        return _INT_STRUCT.unpack(b)[0]
     return _LONG_STRUCT.unpack(b)[0]
 
 
@@ -362,8 +360,6 @@ def _(_: FloatType, b: bytes) -> float:
 
 @from_bytes.register(DoubleType)
 def _(_: DoubleType, b: bytes) -> float:
-    if len(b) == 4:
-        return _FLOAT_STRUCT.unpack(b)[0]
     return _DOUBLE_STRUCT.unpack(b)[0]
 
 
diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py
index 0beb0f3df0..04f0c3f971 100644
--- a/pyiceberg/expressions/visitors.py
+++ b/pyiceberg/expressions/visitors.py
@@ -59,12 +59,16 @@
 from pyiceberg.schema import Schema
 from pyiceberg.typedef import EMPTY_DICT, L, LiteralValue, Record, StructProtocol
 from pyiceberg.types import (
+    DateType,
     DoubleType,
     FloatType,
     IcebergType,
+    IntegerType,
+    LongType,
     NestedField,
     PrimitiveType,
     StructType,
+    TimestampNanoType,
     TimestampType,
     TimestamptzType,
 )
@@ -73,6 +77,17 @@
 T = TypeVar("T")
 
 
+def _from_bytes_with_promotion(field_type: PrimitiveType, b: bytes) -> Any:
+    if len(b) == 4:
+        if isinstance(field_type, LongType):
+            return from_bytes(IntegerType(), b)
+        elif isinstance(field_type, DoubleType):
+            return from_bytes(FloatType(), b)
+        elif isinstance(field_type, (TimestampType, TimestampNanoType)):
+            return from_bytes(DateType(), b)
+    return from_bytes(field_type, b)
+
+
 class BooleanExpressionVisitor(Generic[T], ABC):
     @abstractmethod
     def visit_true(self) -> T:
@@ -1242,7 +1257,7 @@ def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
+            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
 
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1264,7 +1279,7 @@ def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> bo
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
+            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1285,7 +1300,7 @@ def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
+            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
             if upper_bound <= literal.value:
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1306,7 +1321,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) ->
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
+            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
             if upper_bound < literal.value:
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1327,7 +1342,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
+            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1336,7 +1351,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
+            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
             if self._is_nan(upper_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1364,22 +1379,22 @@ def visit_in(self, term: BoundTerm, literals: set[L]) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
+            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
 
-            literals = {lit for lit in literals if lower_bound <= lit}  # type: ignore[operator]
+            literals = {lit for lit in literals if lower_bound <= lit}
             if len(literals) == 0:
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
+            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
             # this is different from Java, here NaN is always larger
             if self._is_nan(upper_bound):
                 return ROWS_MIGHT_MATCH
 
-            literals = {lit for lit in literals if upper_bound >= lit}  # type: ignore[operator]
+            literals = {lit for lit in literals if upper_bound >= lit}
             if len(literals) == 0:
                 return ROWS_CANNOT_MATCH
 
@@ -1404,14 +1419,14 @@ def visit_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
         len_prefix = len(prefix)
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
+            lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
 
             # truncate lower bound so that its length is not greater than the length of prefix
             if lower_bound and lower_bound[:len_prefix] > prefix:
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
+            upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
 
             # truncate upper bound so that its length is not greater than the length of prefix
             if upper_bound is not None and upper_bound[:len_prefix] < prefix:
@@ -1435,8 +1450,8 @@ def visit_not_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
         # not_starts_with will match unless all values must start with the prefix. This happens when
         # the lower and upper bounds both start with the prefix.
         if (lower_bound_bytes := self.lower_bounds.get(field_id)) and (upper_bound_bytes := self.upper_bounds.get(field_id)):
-            lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
-            upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
+            lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
+            upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
 
             # if lower is shorter than the prefix then lower doesn't start with the prefix
             if len(lower_bound) < len_prefix:

From 803ac61d3f6e31c352f4030fbc531da679a70a44 Mon Sep 17 00:00:00 2001
From: Alex Stephen <alexstephen@google.com>
Date: Wed, 29 Apr 2026 13:06:36 -0700
Subject: [PATCH 3/5] PR comments

---
 pyiceberg/expressions/visitors.py   |  9 ++-
 tests/expressions/test_evaluator.py | 96 +++++++++++++++++++++--------
 2 files changed, 76 insertions(+), 29 deletions(-)

diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py
index 04f0c3f971..9dbe04b564 100644
--- a/pyiceberg/expressions/visitors.py
+++ b/pyiceberg/expressions/visitors.py
@@ -59,7 +59,6 @@
 from pyiceberg.schema import Schema
 from pyiceberg.typedef import EMPTY_DICT, L, LiteralValue, Record, StructProtocol
 from pyiceberg.types import (
-    DateType,
     DoubleType,
     FloatType,
     IcebergType,
@@ -68,7 +67,6 @@
     NestedField,
     PrimitiveType,
     StructType,
-    TimestampNanoType,
     TimestampType,
     TimestamptzType,
 )
@@ -78,13 +76,14 @@
 
 
 def _from_bytes_with_promotion(field_type: PrimitiveType, b: bytes) -> Any:
+    # Integer, Float, Date are 4 bytes
+    # Long, Double, Timestamps are 8 bytes
+    # If we have 4 bytes, we may have to handle type promotion.
     if len(b) == 4:
         if isinstance(field_type, LongType):
             return from_bytes(IntegerType(), b)
         elif isinstance(field_type, DoubleType):
             return from_bytes(FloatType(), b)
-        elif isinstance(field_type, (TimestampType, TimestampNanoType)):
-            return from_bytes(DateType(), b)
     return from_bytes(field_type, b)
 
 
@@ -555,7 +554,7 @@ def visit_or(self, left_result: bool, right_result: bool) -> bool:
 def _from_byte_buffer(field_type: IcebergType, val: bytes) -> Any:
     if not isinstance(field_type, PrimitiveType):
         raise ValueError(f"Expected a PrimitiveType, got: {type(field_type)}")
-    return from_bytes(field_type, val)
+    return _from_bytes_with_promotion(field_type, val)
 
 
 class _ManifestEvalVisitor(BoundBooleanExpressionVisitor[bool]):
diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py
index 4b4629ea21..5245c8a27a 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -41,7 +41,14 @@
     Or,
     StartsWith,
 )
-from pyiceberg.expressions.visitors import _InclusiveMetricsEvaluator, _StrictMetricsEvaluator
+from pyiceberg.expressions.visitors import (
+    ROWS_CANNOT_MATCH,
+    ROWS_MIGHT_MATCH,
+    ROWS_MIGHT_NOT_MATCH,
+    ROWS_MUST_MATCH,
+    _InclusiveMetricsEvaluator,
+    _StrictMetricsEvaluator,
+)
 from pyiceberg.manifest import DataFile, FileFormat
 from pyiceberg.schema import Schema
 from pyiceberg.typedef import Record
@@ -1466,44 +1473,85 @@ def test_strict_integer_not_in(strict_data_file_schema: Schema, strict_data_file
     assert not should_read, "Should not match: no_nulls field does not have bounds"
 
 
-def test_inclusive_metrics_evaluator_with_type_promotion_crash() -> None:
-    # Schema defines 'id' as LongType (evolved state)
-    schema = Schema(NestedField(1, "id", LongType(), required=True))
+@pytest.mark.parametrize(
+    "file_type, evolved_type, lower_bound, upper_bound, op, lit, expected",
+    [
+        # Int -> Long
+        (IntegerType(), LongType(), 30, 79, GreaterThan, 100, ROWS_CANNOT_MATCH),
+        (IntegerType(), LongType(), 30, 79, LessThan, 50, ROWS_MIGHT_MATCH),
+        # Float -> Double
+        (FloatType(), DoubleType(), 30.0, 79.0, GreaterThan, 100.0, ROWS_CANNOT_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 50.0, ROWS_MIGHT_MATCH),
+    ],
+)
+def test_inclusive_metrics_evaluator_with_type_promotion(
+    file_type: PrimitiveType,
+    evolved_type: PrimitiveType,
+    lower_bound: Any,
+    upper_bound: Any,
+    op: Any,
+    lit: Any,
+    expected: bool,
+) -> None:
+    # Schema defines 'col' with evolved state
+    schema = Schema(NestedField(1, "col", evolved_type, required=True))
 
-    # Historical manifest contains 4-byte integer bounds
+    # Historical manifest contains file_type bounds
     data_file = DataFile.from_args(
         file_path="file_1.parquet",
         file_format=FileFormat.PARQUET,
         partition={},
         record_count=100,
         file_size_in_bytes=1024,
-        lower_bounds={1: to_bytes(IntegerType(), 30)},
-        upper_bounds={1: to_bytes(IntegerType(), 79)},
+        lower_bounds={1: to_bytes(file_type, lower_bound)},
+        upper_bounds={1: to_bytes(file_type, upper_bound)},
     )
 
-    # Predicate: id > 100
-    # Decodes 4-byte bounds correctly and skips the file
-    evaluator_pruning = _InclusiveMetricsEvaluator(schema, GreaterThan("id", 100))
-    assert not evaluator_pruning.eval(data_file)
-
-
-def test_inclusive_metrics_evaluator_with_float_to_double_promotion() -> None:
-    # Schema defines 'val' as DoubleType (evolved state)
-    schema = Schema(NestedField(1, "val", DoubleType(), required=True))
+    # Predicate refers to 'col'
+    evaluator = _InclusiveMetricsEvaluator(schema, op("col", lit))
+    assert evaluator.eval(data_file) == expected
+
+
+@pytest.mark.parametrize(
+    "file_type, evolved_type, lower_bound, upper_bound, op, lit, expected",
+    [
+        # Int -> Long
+        (IntegerType(), LongType(), 30, 79, GreaterThan, 20, ROWS_MUST_MATCH),
+        (IntegerType(), LongType(), 30, 79, GreaterThan, 100, ROWS_MIGHT_NOT_MATCH),
+        (IntegerType(), LongType(), 30, 79, LessThan, 100, ROWS_MUST_MATCH),
+        (IntegerType(), LongType(), 30, 79, LessThan, 20, ROWS_MIGHT_NOT_MATCH),
+        # Float -> Double
+        (FloatType(), DoubleType(), 30.0, 79.0, GreaterThan, 20.0, ROWS_MUST_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, GreaterThan, 100.0, ROWS_MIGHT_NOT_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 100.0, ROWS_MUST_MATCH),
+        (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 20.0, ROWS_MIGHT_NOT_MATCH),
+    ],
+)
+def test_strict_metrics_evaluator_with_type_promotion(
+    file_type: PrimitiveType,
+    evolved_type: PrimitiveType,
+    lower_bound: Any,
+    upper_bound: Any,
+    op: Any,
+    lit: Any,
+    expected: bool,
+) -> None:
+    # Schema defines 'col' with evolved state
+    schema = Schema(NestedField(1, "col", evolved_type, required=True))
 
-    # Historical manifest contains 4-byte float bounds
+    # Historical manifest contains file_type bounds
     data_file = DataFile.from_args(
         file_path="file_1.parquet",
         file_format=FileFormat.PARQUET,
         partition={},
         record_count=100,
         file_size_in_bytes=1024,
-        lower_bounds={1: to_bytes(FloatType(), 30.0)},
-        upper_bounds={1: to_bytes(FloatType(), 79.0)},
+        lower_bounds={1: to_bytes(file_type, lower_bound)},
+        upper_bounds={1: to_bytes(file_type, upper_bound)},
+        null_value_counts={1: 0},
+        nan_value_counts={1: 0},
     )
 
-    # Predicate: val < 50.0
-    evaluator = _InclusiveMetricsEvaluator(schema, LessThan("val", 50.0))
-
-    # Should not crash and should correctly identify that the file might match
-    assert evaluator.eval(data_file)
+    # Predicate refers to 'col'
+    evaluator = _StrictMetricsEvaluator(schema, op("col", lit))
+    assert evaluator.eval(data_file) == expected

From ef8e8c9dfe8268951a2c74b0a3c6d443952dec57 Mon Sep 17 00:00:00 2001
From: Alex Stephen <alexstephen@google.com>
Date: Thu, 30 Apr 2026 14:15:43 -0700
Subject: [PATCH 4/5] PR comments

---
 pyiceberg/conversions.py          | 12 ++++++++-
 pyiceberg/expressions/visitors.py | 44 +++++++++++--------------------
 2 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index 0311e76d89..a071a3c884 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -343,7 +343,6 @@ def _(_: PrimitiveType, b: bytes) -> int:
     return _INT_STRUCT.unpack(b)[0]
 
 
-@from_bytes.register(LongType)
 @from_bytes.register(TimeType)
 @from_bytes.register(TimestampType)
 @from_bytes.register(TimestamptzType)
@@ -353,6 +352,14 @@ def _(_: PrimitiveType, b: bytes) -> int:
     return _LONG_STRUCT.unpack(b)[0]
 
 
+@from_bytes.register(LongType)
+def _(_: PrimitiveType, b: bytes) -> int:
+    if len(b) == 4:
+        # If the length is 4 bytes, it is a promoted IntegerType
+        return _INT_STRUCT.unpack(b)[0]
+    return _LONG_STRUCT.unpack(b)[0]
+
+
 @from_bytes.register(FloatType)
 def _(_: FloatType, b: bytes) -> float:
     return _FLOAT_STRUCT.unpack(b)[0]
@@ -360,6 +367,9 @@ def _(_: FloatType, b: bytes) -> float:
 
 @from_bytes.register(DoubleType)
 def _(_: DoubleType, b: bytes) -> float:
+    if len(b) == 4:
+        # If the length is 4 bytes, it is a promoted FloatType
+        return _FLOAT_STRUCT.unpack(b)[0]
     return _DOUBLE_STRUCT.unpack(b)[0]
 
 
diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py
index 9dbe04b564..0beb0f3df0 100644
--- a/pyiceberg/expressions/visitors.py
+++ b/pyiceberg/expressions/visitors.py
@@ -62,8 +62,6 @@
     DoubleType,
     FloatType,
     IcebergType,
-    IntegerType,
-    LongType,
     NestedField,
     PrimitiveType,
     StructType,
@@ -75,18 +73,6 @@
 T = TypeVar("T")
 
 
-def _from_bytes_with_promotion(field_type: PrimitiveType, b: bytes) -> Any:
-    # Integer, Float, Date are 4 bytes
-    # Long, Double, Timestamps are 8 bytes
-    # If we have 4 bytes, we may have to handle type promotion.
-    if len(b) == 4:
-        if isinstance(field_type, LongType):
-            return from_bytes(IntegerType(), b)
-        elif isinstance(field_type, DoubleType):
-            return from_bytes(FloatType(), b)
-    return from_bytes(field_type, b)
-
-
 class BooleanExpressionVisitor(Generic[T], ABC):
     @abstractmethod
     def visit_true(self) -> T:
@@ -554,7 +540,7 @@ def visit_or(self, left_result: bool, right_result: bool) -> bool:
 def _from_byte_buffer(field_type: IcebergType, val: bytes) -> Any:
     if not isinstance(field_type, PrimitiveType):
         raise ValueError(f"Expected a PrimitiveType, got: {type(field_type)}")
-    return _from_bytes_with_promotion(field_type, val)
+    return from_bytes(field_type, val)
 
 
 class _ManifestEvalVisitor(BoundBooleanExpressionVisitor[bool]):
@@ -1256,7 +1242,7 @@ def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
 
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1278,7 +1264,7 @@ def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> bo
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1299,7 +1285,7 @@ def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             if upper_bound <= literal.value:
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1320,7 +1306,7 @@ def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) ->
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             if upper_bound < literal.value:
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
@@ -1341,7 +1327,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1350,7 +1336,7 @@ def visit_equal(self, term: BoundTerm, literal: LiteralValue) -> bool:
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             if self._is_nan(upper_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1378,22 +1364,22 @@ def visit_in(self, term: BoundTerm, literals: set[L]) -> bool:
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = _from_bytes_with_promotion(field.field_type, lower_bound_bytes)
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
 
-            literals = {lit for lit in literals if lower_bound <= lit}
+            literals = {lit for lit in literals if lower_bound <= lit}  # type: ignore[operator]
             if len(literals) == 0:
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = _from_bytes_with_promotion(field.field_type, upper_bound_bytes)
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             # this is different from Java, here NaN is always larger
             if self._is_nan(upper_bound):
                 return ROWS_MIGHT_MATCH
 
-            literals = {lit for lit in literals if upper_bound >= lit}
+            literals = {lit for lit in literals if upper_bound >= lit}  # type: ignore[operator]
             if len(literals) == 0:
                 return ROWS_CANNOT_MATCH
 
@@ -1418,14 +1404,14 @@ def visit_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
         len_prefix = len(prefix)
 
         if lower_bound_bytes := self.lower_bounds.get(field_id):
-            lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
+            lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
 
             # truncate lower bound so that its length is not greater than the length of prefix
             if lower_bound and lower_bound[:len_prefix] > prefix:
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
+            upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
 
             # truncate upper bound so that its length is not greater than the length of prefix
             if upper_bound is not None and upper_bound[:len_prefix] < prefix:
@@ -1449,8 +1435,8 @@ def visit_not_starts_with(self, term: BoundTerm, literal: LiteralValue) -> bool:
         # not_starts_with will match unless all values must start with the prefix. This happens when
         # the lower and upper bounds both start with the prefix.
         if (lower_bound_bytes := self.lower_bounds.get(field_id)) and (upper_bound_bytes := self.upper_bounds.get(field_id)):
-            lower_bound = str(_from_bytes_with_promotion(field.field_type, lower_bound_bytes))
-            upper_bound = str(_from_bytes_with_promotion(field.field_type, upper_bound_bytes))
+            lower_bound = str(from_bytes(field.field_type, lower_bound_bytes))
+            upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
 
             # if lower is shorter than the prefix then lower doesn't start with the prefix
             if len(lower_bound) < len_prefix:

From 7467c0a8132591150f2803eb3bff4af901995274 Mon Sep 17 00:00:00 2001
From: Alex Stephen <alexstephen@google.com>
Date: Thu, 30 Apr 2026 17:40:08 -0700
Subject: [PATCH 5/5] PR comments

---
 pyiceberg/conversions.py            |  4 ++--
 tests/expressions/test_evaluator.py | 10 ++--------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index a071a3c884..42d996f756 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -354,7 +354,7 @@ def _(_: PrimitiveType, b: bytes) -> int:
 
 @from_bytes.register(LongType)
 def _(_: PrimitiveType, b: bytes) -> int:
-    if len(b) == 4:
+    if len(b) < 8:
         # If the length is 4 bytes, it is a promoted IntegerType
         return _INT_STRUCT.unpack(b)[0]
     return _LONG_STRUCT.unpack(b)[0]
@@ -367,7 +367,7 @@ def _(_: FloatType, b: bytes) -> float:
 
 @from_bytes.register(DoubleType)
 def _(_: DoubleType, b: bytes) -> float:
-    if len(b) == 4:
+    if len(b) < 8:
         # If the length is 4 bytes, it is a promoted FloatType
         return _FLOAT_STRUCT.unpack(b)[0]
     return _DOUBLE_STRUCT.unpack(b)[0]
diff --git a/tests/expressions/test_evaluator.py b/tests/expressions/test_evaluator.py
index 5245c8a27a..b8e4d87044 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -1484,7 +1484,7 @@ def test_strict_integer_not_in(strict_data_file_schema: Schema, strict_data_file
         (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 50.0, ROWS_MIGHT_MATCH),
     ],
 )
-def test_inclusive_metrics_evaluator_with_type_promotion(
+def test_inclusive_metrics_eval_bounds_after_promotion(
     file_type: PrimitiveType,
     evolved_type: PrimitiveType,
     lower_bound: Any,
@@ -1493,10 +1493,8 @@ def test_inclusive_metrics_evaluator_with_type_promotion(
     lit: Any,
     expected: bool,
 ) -> None:
-    # Schema defines 'col' with evolved state
     schema = Schema(NestedField(1, "col", evolved_type, required=True))
 
-    # Historical manifest contains file_type bounds
     data_file = DataFile.from_args(
         file_path="file_1.parquet",
         file_format=FileFormat.PARQUET,
@@ -1507,7 +1505,6 @@ def test_inclusive_metrics_evaluator_with_type_promotion(
         upper_bounds={1: to_bytes(file_type, upper_bound)},
     )
 
-    # Predicate refers to 'col'
     evaluator = _InclusiveMetricsEvaluator(schema, op("col", lit))
     assert evaluator.eval(data_file) == expected
 
@@ -1527,7 +1524,7 @@ def test_inclusive_metrics_evaluator_with_type_promotion(
         (FloatType(), DoubleType(), 30.0, 79.0, LessThan, 20.0, ROWS_MIGHT_NOT_MATCH),
     ],
 )
-def test_strict_metrics_evaluator_with_type_promotion(
+def test_strict_metrics_eval_bounds_after_promotion(
     file_type: PrimitiveType,
     evolved_type: PrimitiveType,
     lower_bound: Any,
@@ -1536,10 +1533,8 @@ def test_strict_metrics_evaluator_with_type_promotion(
     lit: Any,
     expected: bool,
 ) -> None:
-    # Schema defines 'col' with evolved state
     schema = Schema(NestedField(1, "col", evolved_type, required=True))
 
-    # Historical manifest contains file_type bounds
     data_file = DataFile.from_args(
         file_path="file_1.parquet",
         file_format=FileFormat.PARQUET,
@@ -1552,6 +1547,5 @@ def test_strict_metrics_evaluator_with_type_promotion(
         nan_value_counts={1: 0},
     )
 
-    # Predicate refers to 'col'
     evaluator = _StrictMetricsEvaluator(schema, op("col", lit))
     assert evaluator.eval(data_file) == expected