Skip to content

Commit 47a9764

Browse files
committed
Optimize the new uops added when recording values during tracing.
* Handle dependencies in the optimizer, not the tracer * Strengthen some checks to avoid relying on optimizer for correctness
1 parent ebe02e4 commit 47a9764

17 files changed

+274
-243
lines changed

Include/internal/pycore_optimizer.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ typedef struct _PyJitUopBuffer {
2222
_PyUOpInstruction *end;
2323
} _PyJitUopBuffer;
2424

25+
typedef struct _JitOptRefBuffer {
26+
JitOptRef *used;
27+
JitOptRef *end;
28+
} _JitOptRefBuffer;
2529

2630
typedef struct _JitOptContext {
2731
char done;
@@ -37,10 +41,15 @@ typedef struct _JitOptContext {
3741
// Arena for the symbolic types.
3842
ty_arena t_arena;
3943

40-
JitOptRef *n_consumed;
41-
JitOptRef *limit;
42-
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
44+
/* To do -- We could make this more space efficient
45+
* by using a single array and growing the stack and
46+
* locals toward each other. */
47+
_JitOptRefBuffer locals;
48+
_JitOptRefBuffer stack;
49+
JitOptRef locals_array[ABSTRACT_INTERP_LOCALS_SIZE];
50+
JitOptRef stack_array[ABSTRACT_INTERP_STACK_SIZE];
4351
_PyJitUopBuffer out_buffer;
52+
_PyBloomFilter *dependencies;
4453
} JitOptContext;
4554

4655

@@ -83,13 +92,11 @@ typedef struct _PyJitTracerInitialState {
8392
} _PyJitTracerInitialState;
8493

8594
typedef struct _PyJitTracerPreviousState {
86-
bool dependencies_still_valid;
8795
int instr_oparg;
8896
int instr_stacklevel;
8997
_Py_CODEUNIT *instr;
9098
PyCodeObject *instr_code; // Strong
9199
struct _PyInterpreterFrame *instr_frame;
92-
_PyBloomFilter dependencies;
93100
PyObject *recorded_value; // Strong, may be NULL
94101
} _PyJitTracerPreviousState;
95102

@@ -303,25 +310,24 @@ extern void _Py_uop_sym_set_recorded_type(JitOptContext *ctx, JitOptRef sym, PyT
303310
extern void _Py_uop_sym_set_recorded_gen_func(JitOptContext *ctx, JitOptRef ref, PyFunctionObject *value);
304311
extern PyCodeObject *_Py_uop_sym_get_probable_func_code(JitOptRef sym);
305312
extern PyObject *_Py_uop_sym_get_probable_value(JitOptRef sym);
313+
extern JitOptRef *_Py_uop_sym_set_stack_depth(JitOptContext *ctx, int stack_depth, JitOptRef *current_sp);
306314

307-
extern void _Py_uop_abstractcontext_init(JitOptContext *ctx);
315+
extern void _Py_uop_abstractcontext_init(JitOptContext *ctx, _PyBloomFilter *dependencies);
308316
extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx);
309317

310318
extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
311319
JitOptContext *ctx,
312320
PyCodeObject *co,
313-
int curr_stackentries,
314321
JitOptRef *args,
315322
int arg_len);
316323

317324
extern _Py_UOpsAbstractFrame *_Py_uop_frame_new_from_symbol(
318325
JitOptContext *ctx,
319326
JitOptRef callable,
320-
int curr_stackentries,
321327
JitOptRef *args,
322328
int arg_len);
323329

324-
extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries);
330+
extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co);
325331

326332
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
327333

@@ -357,8 +363,6 @@ PyAPI_FUNC(void) _PyJit_FinalizeTracing(PyThreadState *tstate, int err);
357363
void _PyPrintExecutor(_PyExecutorObject *executor, const _PyUOpInstruction *marker);
358364
void _PyJit_TracerFree(_PyThreadStateImpl *_tstate);
359365

360-
void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj);
361-
362366
#ifdef _Py_TIER2
363367
typedef void (*_Py_RecordFuncPtr)(_PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg, PyObject **recorded_value);
364368
PyAPI_DATA(const _Py_RecordFuncPtr) _PyOpcode_RecordFunctions[];

Include/internal/pycore_optimizer_types.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ extern "C" {
1111
#include <stdbool.h>
1212
#include "pycore_uop.h" // UOP_MAX_TRACE_LENGTH
1313

14-
// Holds locals, stack, locals, stack ... (in that order)
15-
#define MAX_ABSTRACT_INTERP_SIZE 512
14+
#define ABSTRACT_INTERP_STACK_SIZE 256
15+
#define ABSTRACT_INTERP_LOCALS_SIZE 512
16+
1617

1718
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
1819

@@ -138,6 +139,7 @@ typedef struct _Py_UOpsAbstractFrame {
138139
// Max stacklen
139140
int stack_len;
140141
int locals_len;
142+
bool caller; // We have made a call from this frame during the trace
141143
PyFunctionObject *func;
142144
PyCodeObject *code;
143145

Include/internal/pycore_uop_ids.h

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 16 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/_testinternalcapi/test_cases.c.h

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Objects/codeobject.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2433,7 +2433,6 @@ code_dealloc(PyObject *self)
24332433
PyMem_Free(co_extra);
24342434
}
24352435
#ifdef _Py_TIER2
2436-
_PyJit_Tracer_InvalidateDependency(tstate, self);
24372436
if (co->co_executors != NULL) {
24382437
clear_executors(co);
24392438
}

Objects/frameobject.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,6 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value)
262262

263263
#if _Py_TIER2
264264
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1);
265-
_PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co);
266265
#endif
267266

268267
_PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);

Objects/funcobject.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include "pycore_setobject.h" // _PySet_NextEntry()
1313
#include "pycore_stats.h"
1414
#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS()
15-
#include "pycore_optimizer.h" // _PyJit_Tracer_InvalidateDependency
15+
#include "pycore_optimizer.h" // _Py_Executors_InvalidateDependency
1616

1717
static const char *
1818
func_event_name(PyFunction_WatchEvent event) {
@@ -1128,7 +1128,6 @@ func_dealloc(PyObject *self)
11281128
}
11291129
#if _Py_TIER2
11301130
_Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), self, 1);
1131-
_PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), self);
11321131
#endif
11331132
_PyObject_GC_UNTRACK(op);
11341133
FT_CLEAR_WEAKREFS(self, op->func_weakreflist);

Python/bytecodes.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3125,10 +3125,10 @@ dummy_func(
31253125
assert(executor->vm_data.code == code);
31263126
assert(executor->vm_data.valid);
31273127
assert(tstate->current_executor == NULL);
3128-
/* If the eval breaker is set then stay in tier 1.
3129-
* This avoids any potentially infinite loops
3130-
* involving _RESUME_CHECK */
3131-
if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
3128+
/* If the eval breaker is set, or instrumentation is needed, then stay in tier 1.
3129+
* This avoids any potentially infinite loops involving _RESUME_CHECK */
3130+
uintptr_t iversion = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(code->_co_instrumentation_version);
3131+
if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) != iversion) {
31323132
opcode = executor->vm_data.opcode;
31333133
oparg = (oparg & ~255) | executor->vm_data.oparg;
31343134
next_instr = this_instr;
@@ -5616,9 +5616,9 @@ dummy_func(
56165616
HANDLE_PENDING_AND_DEOPT_IF(_Py_emscripten_signal_clock == 0);
56175617
_Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING;
56185618
#endif
5619+
uintptr_t iversion = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version);
56195620
uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker);
5620-
HANDLE_PENDING_AND_DEOPT_IF(eval_breaker & _PY_EVAL_EVENTS_MASK);
5621-
assert(tstate->tracing || eval_breaker == FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version));
5621+
HANDLE_PENDING_AND_DEOPT_IF(eval_breaker != iversion);
56225622
}
56235623

56245624
tier2 op(_COLD_EXIT, ( -- )) {
@@ -5668,9 +5668,9 @@ dummy_func(
56685668
Py_UNREACHABLE();
56695669
}
56705670

5671-
tier2 op(_GUARD_CODE, (version/2 -- )) {
5671+
tier2 op(_GUARD_CODE_VERSION, (version/2 -- )) {
56725672
PyObject *code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
5673-
EXIT_IF(code == Py_None);
5673+
assert(PyCode_Check(code));
56745674
EXIT_IF(((PyCodeObject *)code)->co_version != version);
56755675
}
56765676

0 commit comments

Comments
 (0)