@@ -2415,35 +2415,24 @@ insert_superinstructions(cfg_builder *g)
24152415 return res ;
24162416}
24172417
2418+ #define NOT_LOCAL -1
2419+ #define DUMMY_INSTR -1
2420+ #define DUMMY_REF (ref){DUMMY_INSTR, NOT_LOCAL}
2421+
24182422typedef struct {
2419- // Index of instruction that produced the reference or -1 .
2423+ // Index of instruction that produced the reference or DUMMY_INSTR .
24202424 int instr ;
24212425
2422- // The local to which the reference refers or -1 .
2426+ // The local to which the reference refers or NOT_LOCAL .
24232427 int local ;
24242428} ref ;
24252429
2426- #define NOT_LOCAL -1
2427-
2428- #define DUMMY_REF (ref){-1, NOT_LOCAL}
2429-
24302430typedef struct {
24312431 ref * refs ;
24322432 Py_ssize_t size ;
24332433 Py_ssize_t capacity ;
24342434} ref_stack ;
24352435
2436- static bool
2437- ref_stack_has_refs_from_instr (ref_stack * stack , int instr )
2438- {
2439- for (Py_ssize_t i = 0 ; i < stack -> size ; i ++ ) {
2440- if (stack -> refs [i ].instr == instr ) {
2441- return true;
2442- }
2443- }
2444- return false;
2445- }
2446-
24472436static int
24482437ref_stack_push (ref_stack * stack , ref r )
24492438{
@@ -2505,18 +2494,36 @@ ref_stack_fini(ref_stack *stack)
25052494 stack -> size = 0 ;
25062495}
25072496
2497+ typedef enum {
2498+ // The loaded reference is still on the stack when the local is killed
2499+ LOCAL_KILLED_ON_STACK = 1 ,
2500+ // The loaded reference is stored into a local
2501+ STORED_AS_LOCAL = 2 ,
2502+ // The loaded reference is still on the stack at the end of the basic block
2503+ REF_UNCONSUMED = 4 ,
2504+ } LoadFastInstrFlag ;
2505+
25082506static void
2509- kill_local (bool * has_killed_refs , ref_stack * refs , int local )
2507+ kill_local (uint8_t * instr_flags , ref_stack * refs , int local )
25102508{
25112509 for (Py_ssize_t i = 0 ; i < refs -> size ; i ++ ) {
25122510 ref r = ref_stack_at (refs , i );
25132511 if (r .local == local ) {
25142512 assert (r .instr >= 0 );
2515- has_killed_refs [r .instr ] = true ;
2513+ instr_flags [r .instr ] |= LOCAL_KILLED_ON_STACK ;
25162514 }
25172515 }
25182516}
25192517
2518+ static void
2519+ store_local (uint8_t * instr_flags , ref_stack * refs , int local , ref r )
2520+ {
2521+ kill_local (instr_flags , refs , local );
2522+ if (r .instr != -1 ) {
2523+ instr_flags [r .instr ] |= STORED_AS_LOCAL ;
2524+ }
2525+ }
2526+
25202527static void
25212528load_fast_push_block (basicblock * * * sp , basicblock * target , int start_depth )
25222529{
@@ -2537,9 +2544,9 @@ optimize_load_fast(cfg_builder *g)
25372544 for (basicblock * b = entryblock ; b != NULL ; b = b -> b_next ) {
25382545 max_instrs = Py_MAX (max_instrs , b -> b_iused );
25392546 }
2540- size_t has_killed_refs_size = max_instrs * sizeof (bool );
2541- bool * has_killed_refs = PyMem_Calloc (max_instrs , has_killed_refs_size );
2542- if (has_killed_refs == NULL ) {
2547+ size_t instr_flags_size = max_instrs * sizeof (bool );
2548+ uint8_t * instr_flags = PyMem_Calloc (max_instrs , instr_flags_size );
2549+ if (instr_flags == NULL ) {
25432550 PyErr_NoMemory ();
25442551 return ERROR ;
25452552 }
@@ -2558,9 +2565,8 @@ optimize_load_fast(cfg_builder *g)
25582565 basicblock * block = * -- sp ;
25592566 assert (block -> b_startdepth > -1 );
25602567
2561- // Reset state that tracks which instructions produce references to
2562- // locals that are on the stack while the local is overwritten.
2563- memset (has_killed_refs , 0 , has_killed_refs_size );
2568+ // Reset per-block state.
2569+ memset (instr_flags , 0 , instr_flags_size );
25642570
25652571 // Reset the stack of refs. We don't track references on the stack
25662572 // across basic blocks, but the bytecode will expect their
@@ -2595,8 +2601,11 @@ optimize_load_fast(cfg_builder *g)
25952601 }
25962602
25972603 case LOAD_FAST_AND_CLEAR : {
2598- kill_local (has_killed_refs , & refs , oparg );
2599- ref_stack_push (& refs , (ref ){i , oparg });
2604+ kill_local (instr_flags , & refs , oparg );
2605+ if (ref_stack_push (& refs , (ref ){i , oparg }) < 0 ) {
2606+ status = ERROR ;
2607+ goto done ;
2608+ }
26002609 break ;
26012610 }
26022611
@@ -2613,35 +2622,30 @@ optimize_load_fast(cfg_builder *g)
26132622 }
26142623
26152624 case STORE_FAST : {
2616- kill_local (has_killed_refs , & refs , oparg );
26172625 ref r = ref_stack_pop (& refs );
2618- if (r .instr != -1 ) {
2619- has_killed_refs [r .instr ] = true;
2620- }
2626+ store_local (instr_flags , & refs , oparg , r );
26212627 break ;
26222628 }
26232629
26242630 case STORE_FAST_LOAD_FAST : {
2625- kill_local ( has_killed_refs , & refs , oparg >> 4 );
2631+ // STORE_FAST
26262632 ref r = ref_stack_pop (& refs );
2627- if (r .instr != -1 ) {
2628- has_killed_refs [r .instr ] = true;
2633+ store_local (instr_flags , & refs , oparg >> 4 , r );
2634+ // LOAD_FAST
2635+ if (ref_stack_push (& refs , (ref ){i , oparg & 15 }) < 0 ) {
2636+ status = ERROR ;
2637+ goto done ;
26292638 }
2630- ref_stack_push (& refs , (ref ){i , oparg & 15 });
26312639 break ;
26322640 }
26332641
26342642 case STORE_FAST_STORE_FAST : {
2635- kill_local (has_killed_refs , & refs , oparg >> 4 );
2636- kill_local (has_killed_refs , & refs , oparg & 15 );
2643+ // STORE_FAST
26372644 ref r = ref_stack_pop (& refs );
2638- if (r .instr != -1 ) {
2639- has_killed_refs [r .instr ] = true;
2640- }
2645+ store_local (instr_flags , & refs , oparg >> 4 , r );
2646+ // STORE_FAST
26412647 r = ref_stack_pop (& refs );
2642- if (r .instr != -1 ) {
2643- has_killed_refs [r .instr ] = true;
2644- }
2648+ store_local (instr_flags , & refs , oparg & 15 , r );
26452649 break ;
26462650 }
26472651
@@ -2674,9 +2678,27 @@ optimize_load_fast(cfg_builder *g)
26742678 }
26752679 }
26762680
2681+ // Push fallthrough block
2682+ cfg_instr * term = basicblock_last_instr (block );
2683+ if (term != NULL && block -> b_next != NULL &&
2684+ !(IS_UNCONDITIONAL_JUMP_OPCODE (term -> i_opcode ) ||
2685+ IS_SCOPE_EXIT_OPCODE (term -> i_opcode ))) {
2686+ assert (BB_HAS_FALLTHROUGH (block ));
2687+ load_fast_push_block (& sp , block -> b_next , refs .size );
2688+ }
2689+
2690+ // Mark instructions that produce values that are on the stack at the
2691+ // end of the basic block
2692+ for (Py_ssize_t i = 0 ; i < refs .size ; i ++ ) {
2693+ ref r = ref_stack_at (& refs , i );
2694+ if (r .instr != -1 ) {
2695+ instr_flags [r .instr ] |= REF_UNCONSUMED ;
2696+ }
2697+ }
2698+
26772699 // Optimize instructions
26782700 for (int i = 0 ; i < block -> b_iused ; i ++ ) {
2679- if (!has_killed_refs [i ] && ! ref_stack_has_refs_from_instr ( & refs , i ) ) {
2701+ if (!instr_flags [i ]) {
26802702 cfg_instr * instr = & block -> b_instr [i ];
26812703 switch (instr -> i_opcode ) {
26822704 case LOAD_FAST :
@@ -2690,23 +2712,14 @@ optimize_load_fast(cfg_builder *g)
26902712 }
26912713 }
26922714 }
2693-
2694- // Push fallthrough block
2695- cfg_instr * term = basicblock_last_instr (block );
2696- if (term != NULL && block -> b_next != NULL &&
2697- !(IS_UNCONDITIONAL_JUMP_OPCODE (term -> i_opcode ) ||
2698- IS_SCOPE_EXIT_OPCODE (term -> i_opcode ))) {
2699- assert (BB_HAS_FALLTHROUGH (block ));
2700- load_fast_push_block (& sp , block -> b_next , refs .size );
2701- }
27022715 }
27032716
27042717 status = SUCCESS ;
27052718
27062719done :
27072720 ref_stack_fini (& refs );
2708- if (has_killed_refs != NULL ) {
2709- PyMem_Free (has_killed_refs );
2721+ if (instr_flags != NULL ) {
2722+ PyMem_Free (instr_flags );
27102723 }
27112724 if (blocks != NULL ) {
27122725 PyMem_Free (blocks );
0 commit comments