Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,442 changes: 1,226 additions & 1,216 deletions Include/internal/pycore_uop_ids.h

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

125 changes: 125 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3237,6 +3237,131 @@ def testfunc(args):
uops = get_opnames(ex)
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

def test_float_truediv_inplace_unique_lhs(self):
# (a + b) produces a unique float; dividing by c reuses it
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += (a + b) / c
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.25)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)

def test_float_truediv_inplace_unique_rhs(self):
# (a + b) produces a unique float on the right side of /
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += c / (a + b)
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 0.8)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT", uops)

def test_float_truediv_type_propagation(self):
# (a/b) + (c/d): the optimizer speculatively inserts float guards
# for both divisions, specializing them to _BINARY_OP_TRUEDIV_FLOAT.
# Their results are unique floats, so the + uses inplace.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += (a / b) + (c / d)
return total

res, ex = self._run_with_optimizer(testfunc, (10.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * (10.0 / 3.0 + 4.0 / 5.0)
self.assertAlmostEqual(res, expected)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both divisions are specialized with speculative guards
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT", uops)
# The + uses inplace (a/b result is unique)
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)
# The += uses inplace (+ result is unique)
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)

def test_float_truediv_unique_result_enables_inplace(self):
# (a+b) / c / d: (a+b) is unique float, so the first / uses
# inplace. Its result is also unique, so the second / can use
# _BINARY_OP_TRUEDIV_FLOAT_INPLACE too.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += (a + b) / c / d
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * ((2.0 + 3.0) / 4.0 / 5.0)
self.assertAlmostEqual(res, expected)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both divisions should use inplace (chained uniqueness)
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)

def test_float_add_chain_both_unique(self):
# (a+b) + (c+d): both sub-additions produce unique floats.
# The outer + should use inplace on one of them.
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += (a + b) + (c + d)
return total

res, ex = self._run_with_optimizer(testfunc, (1.0, 2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The outer + should use inplace (at least one operand is unique)
inplace = (
"_BINARY_OP_ADD_FLOAT_INPLACE" in uops
or "_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT" in uops
)
self.assertTrue(inplace, "Expected inplace add for unique sub-results")

def test_float_truediv_non_float_type_no_crash(self):
# Fraction / Fraction goes through _BINARY_OP with NB_TRUE_DIVIDE
# but returns Fraction, not float. The optimizer must not assume
# the result is float for non-int/float operands. See gh-146306.
from fractions import Fraction
def testfunc(args):
a, b, n = args
total = Fraction(0)
for _ in range(n):
total += a / b
return float(total)

res, ex = self._run_with_optimizer(testfunc, (Fraction(10), Fraction(3), TIER2_THRESHOLD))
expected = float(TIER2_THRESHOLD * Fraction(10, 3))
self.assertAlmostEqual(res, expected)

def test_float_truediv_mixed_float_fraction_no_crash(self):
# float / Fraction: lhs is known float from a prior guard,
# but rhs is Fraction. The guard insertion for rhs should
# deopt cleanly at runtime, not crash.
from fractions import Fraction
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += (a + b) / c # (a+b) is float, c is Fraction
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, Fraction(4), TIER2_THRESHOLD))
expected = TIER2_THRESHOLD * (5.0 / Fraction(4))
self.assertAlmostEqual(res, float(expected))

def test_load_attr_instance_value(self):
def testfunc(n):
class C():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Specialize float true division in the tier 2 optimizer with inplace
mutation for uniquely-referenced operands.
47 changes: 47 additions & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,53 @@ dummy_func(
INPUTS_DEAD();
}

// Float true division — not specialized at tier 1, emitted by the
// tier 2 optimizer when both operands are known floats.
tier2 op(_BINARY_OP_TRUEDIV_FLOAT, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyFloat_CheckExact(left_o));
assert(PyFloat_CheckExact(right_o));
STAT_INC(BINARY_OP, hit);
double divisor = ((PyFloatObject *)right_o)->ob_fval;
if (divisor == 0.0) {
PyErr_SetString(PyExc_ZeroDivisionError,
"float division by zero");
ERROR_NO_POP();
}
double dres = ((PyFloatObject *)left_o)->ob_fval / divisor;
PyObject *d = PyFloat_FromDouble(dres);
if (d == NULL) {
ERROR_NO_POP();
}
res = PyStackRef_FromPyObjectSteal(d);
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE, (left, right -- res, l, r)) {
FLOAT_INPLACE_DIVOP(left, right, left);
if (_divop_err) {
ERROR_NO_POP();
}
res = left;
l = PyStackRef_NULL;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
FLOAT_INPLACE_DIVOP(left, right, right);
if (_divop_err) {
ERROR_NO_POP();
}
res = right;
l = left;
r = PyStackRef_NULL;
INPUTS_DEAD();
}

pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
Expand Down
24 changes: 24 additions & 0 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,3 +562,27 @@ gen_try_set_executing(PyGenObject *gen)
((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET)) \
->ob_fval = _dres; \
} while (0)

// Inplace float true division. Sets _divop_err to 1 on zero division.
// Caller must check _divop_err and call ERROR_NO_POP() if set.
#define FLOAT_INPLACE_DIVOP(left, right, TARGET) \
int _divop_err = 0; \
do { \
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); \
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); \
assert(PyFloat_CheckExact(left_o)); \
assert(PyFloat_CheckExact(right_o)); \
assert(_PyObject_IsUniquelyReferenced( \
PyStackRef_AsPyObjectBorrow(TARGET))); \
STAT_INC(BINARY_OP, hit); \
double _divisor = ((PyFloatObject *)right_o)->ob_fval; \
if (_divisor == 0.0) { \
PyErr_SetString(PyExc_ZeroDivisionError, \
"float division by zero"); \
_divop_err = 1; \
break; \
} \
double _dres = ((PyFloatObject *)left_o)->ob_fval / _divisor; \
((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET)) \
->ob_fval = _dres; \
} while (0)
Loading
Loading