Skip to content

Commit 6680709

Browse files
committed
Track reasons for not being able to optimize instructions
This is a little cleaner and makes debugging easier
1 parent dd97d0c commit 6680709

File tree

1 file changed

+68
-55
lines changed

1 file changed

+68
-55
lines changed

Python/flowgraph.c

Lines changed: 68 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2415,35 +2415,24 @@ insert_superinstructions(cfg_builder *g)
24152415
return res;
24162416
}
24172417

2418+
#define NOT_LOCAL -1
2419+
#define DUMMY_INSTR -1
2420+
#define DUMMY_REF (ref){DUMMY_INSTR, NOT_LOCAL}
2421+
24182422
typedef struct {
2419-
// Index of instruction that produced the reference or -1.
2423+
// Index of instruction that produced the reference or DUMMY_INSTR.
24202424
int instr;
24212425

2422-
// The local to which the reference refers or -1.
2426+
// The local to which the reference refers or NOT_LOCAL.
24232427
int local;
24242428
} ref;
24252429

2426-
#define NOT_LOCAL -1
2427-
2428-
#define DUMMY_REF (ref){-1, NOT_LOCAL}
2429-
24302430
typedef struct {
24312431
ref *refs;
24322432
Py_ssize_t size;
24332433
Py_ssize_t capacity;
24342434
} ref_stack;
24352435

2436-
static bool
2437-
ref_stack_has_refs_from_instr(ref_stack *stack, int instr)
2438-
{
2439-
for (Py_ssize_t i = 0; i < stack->size; i++) {
2440-
if (stack->refs[i].instr == instr) {
2441-
return true;
2442-
}
2443-
}
2444-
return false;
2445-
}
2446-
24472436
static int
24482437
ref_stack_push(ref_stack *stack, ref r)
24492438
{
@@ -2505,18 +2494,36 @@ ref_stack_fini(ref_stack *stack)
25052494
stack->size = 0;
25062495
}
25072496

2497+
typedef enum {
2498+
// The loaded reference is still on the stack when the local is killed
2499+
LOCAL_KILLED_ON_STACK = 1,
2500+
// The loaded reference is stored into a local
2501+
STORED_AS_LOCAL = 2,
2502+
// The loaded reference is still on the stack at the end of the basic block
2503+
REF_UNCONSUMED = 4,
2504+
} LoadFastInstrFlag;
2505+
25082506
static void
2509-
kill_local(bool *has_killed_refs, ref_stack *refs, int local)
2507+
kill_local(uint8_t *instr_flags, ref_stack *refs, int local)
25102508
{
25112509
for (Py_ssize_t i = 0; i < refs->size; i++) {
25122510
ref r = ref_stack_at(refs, i);
25132511
if (r.local == local) {
25142512
assert(r.instr >= 0);
2515-
has_killed_refs[r.instr] = true;
2513+
instr_flags[r.instr] |= LOCAL_KILLED_ON_STACK;
25162514
}
25172515
}
25182516
}
25192517

2518+
static void
2519+
store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r)
2520+
{
2521+
kill_local(instr_flags, refs, local);
2522+
if (r.instr != -1) {
2523+
instr_flags[r.instr] |= STORED_AS_LOCAL;
2524+
}
2525+
}
2526+
25202527
static void
25212528
load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth)
25222529
{
@@ -2537,9 +2544,9 @@ optimize_load_fast(cfg_builder *g)
25372544
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
25382545
max_instrs = Py_MAX(max_instrs, b->b_iused);
25392546
}
2540-
size_t has_killed_refs_size = max_instrs * sizeof(bool);
2541-
bool *has_killed_refs = PyMem_Calloc(max_instrs, has_killed_refs_size);
2542-
if (has_killed_refs == NULL) {
2547+
size_t instr_flags_size = max_instrs * sizeof(bool);
2548+
uint8_t *instr_flags = PyMem_Calloc(max_instrs, instr_flags_size);
2549+
if (instr_flags == NULL) {
25432550
PyErr_NoMemory();
25442551
return ERROR;
25452552
}
@@ -2558,9 +2565,8 @@ optimize_load_fast(cfg_builder *g)
25582565
basicblock *block = *--sp;
25592566
assert(block->b_startdepth > -1);
25602567

2561-
// Reset state that tracks which instructions produce references to
2562-
// locals that are on the stack while the local is overwritten.
2563-
memset(has_killed_refs, 0, has_killed_refs_size);
2568+
// Reset per-block state.
2569+
memset(instr_flags, 0, instr_flags_size);
25642570

25652571
// Reset the stack of refs. We don't track references on the stack
25662572
// across basic blocks, but the bytecode will expect their
@@ -2595,8 +2601,11 @@ optimize_load_fast(cfg_builder *g)
25952601
}
25962602

25972603
case LOAD_FAST_AND_CLEAR: {
2598-
kill_local(has_killed_refs, &refs, oparg);
2599-
ref_stack_push(&refs, (ref){i, oparg});
2604+
kill_local(instr_flags, &refs, oparg);
2605+
if (ref_stack_push(&refs, (ref){i, oparg}) < 0) {
2606+
status = ERROR;
2607+
goto done;
2608+
}
26002609
break;
26012610
}
26022611

@@ -2613,35 +2622,30 @@ optimize_load_fast(cfg_builder *g)
26132622
}
26142623

26152624
case STORE_FAST: {
2616-
kill_local(has_killed_refs, &refs, oparg);
26172625
ref r = ref_stack_pop(&refs);
2618-
if (r.instr != -1) {
2619-
has_killed_refs[r.instr] = true;
2620-
}
2626+
store_local(instr_flags, &refs, oparg, r);
26212627
break;
26222628
}
26232629

26242630
case STORE_FAST_LOAD_FAST: {
2625-
kill_local(has_killed_refs, &refs, oparg >> 4);
2631+
// STORE_FAST
26262632
ref r = ref_stack_pop(&refs);
2627-
if (r.instr != -1) {
2628-
has_killed_refs[r.instr] = true;
2633+
store_local(instr_flags, &refs, oparg >> 4, r);
2634+
// LOAD_FAST
2635+
if (ref_stack_push(&refs, (ref){i, oparg & 15}) < 0) {
2636+
status = ERROR;
2637+
goto done;
26292638
}
2630-
ref_stack_push(&refs, (ref){i, oparg & 15});
26312639
break;
26322640
}
26332641

26342642
case STORE_FAST_STORE_FAST: {
2635-
kill_local(has_killed_refs, &refs, oparg >> 4);
2636-
kill_local(has_killed_refs, &refs, oparg & 15);
2643+
// STORE_FAST
26372644
ref r = ref_stack_pop(&refs);
2638-
if (r.instr != -1) {
2639-
has_killed_refs[r.instr] = true;
2640-
}
2645+
store_local(instr_flags, &refs, oparg >> 4, r);
2646+
// STORE_FAST
26412647
r = ref_stack_pop(&refs);
2642-
if (r.instr != -1) {
2643-
has_killed_refs[r.instr] = true;
2644-
}
2648+
store_local(instr_flags, &refs, oparg & 15, r);
26452649
break;
26462650
}
26472651

@@ -2674,9 +2678,27 @@ optimize_load_fast(cfg_builder *g)
26742678
}
26752679
}
26762680

2681+
// Push fallthrough block
2682+
cfg_instr *term = basicblock_last_instr(block);
2683+
if (term != NULL && block->b_next != NULL &&
2684+
!(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
2685+
IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
2686+
assert(BB_HAS_FALLTHROUGH(block));
2687+
load_fast_push_block(&sp, block->b_next, refs.size);
2688+
}
2689+
2690+
// Mark instructions that produce values that are on the stack at the
2691+
// end of the basic block
2692+
for (Py_ssize_t i = 0; i < refs.size; i++) {
2693+
ref r = ref_stack_at(&refs, i);
2694+
if (r.instr != -1) {
2695+
instr_flags[r.instr] |= REF_UNCONSUMED;
2696+
}
2697+
}
2698+
26772699
// Optimize instructions
26782700
for (int i = 0; i < block->b_iused; i++) {
2679-
if (!has_killed_refs[i] && !ref_stack_has_refs_from_instr(&refs, i)) {
2701+
if (!instr_flags[i]) {
26802702
cfg_instr *instr = &block->b_instr[i];
26812703
switch (instr->i_opcode) {
26822704
case LOAD_FAST:
@@ -2690,23 +2712,14 @@ optimize_load_fast(cfg_builder *g)
26902712
}
26912713
}
26922714
}
2693-
2694-
// Push fallthrough block
2695-
cfg_instr *term = basicblock_last_instr(block);
2696-
if (term != NULL && block->b_next != NULL &&
2697-
!(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) ||
2698-
IS_SCOPE_EXIT_OPCODE(term->i_opcode))) {
2699-
assert(BB_HAS_FALLTHROUGH(block));
2700-
load_fast_push_block(&sp, block->b_next, refs.size);
2701-
}
27022715
}
27032716

27042717
status = SUCCESS;
27052718

27062719
done:
27072720
ref_stack_fini(&refs);
2708-
if (has_killed_refs != NULL) {
2709-
PyMem_Free(has_killed_refs);
2721+
if (instr_flags != NULL) {
2722+
PyMem_Free(instr_flags);
27102723
}
27112724
if (blocks != NULL) {
27122725
PyMem_Free(blocks);

0 commit comments

Comments
 (0)