@@ -27,7 +27,7 @@ use crate::arch::DEFAULT_LIR_BUFFER_CAPACITY;
2727use crate :: ir:: { Function , Initializer , Instruction , Module , Opcode , Pseudo , PseudoId , PseudoKind } ;
2828use crate :: target:: Target ;
2929use crate :: types:: { TypeId , TypeModifiers , TypeTable } ;
30- use std:: collections:: HashMap ;
30+ use std:: collections:: { HashMap , HashSet } ;
3131
3232// ============================================================================
3333// AArch64 Code Generator
@@ -395,7 +395,7 @@ impl Aarch64CodeGen {
395395 }
396396
397397 // Prologue: save frame pointer and link register, allocate stack
398- let ( scratch0, _scratch1) = Reg :: scratch_regs ( ) ;
398+ let ( scratch0, _scratch1, _ ) = Reg :: scratch_regs ( ) ;
399399 let fp = Reg :: fp ( ) ;
400400 let lr = Reg :: lr ( ) ;
401401 // Reference platform_reserved to acknowledge its existence
@@ -626,6 +626,33 @@ impl Aarch64CodeGen {
626626 . count ( ) ;
627627 }
628628
629+ // Emit stores for arguments spilled from caller-saved registers to stack
630+ // These must be stored early before any call can clobber them
631+ for spilled in alloc. spilled_args ( ) {
632+ // spilled.to_stack_offset is negative (e.g., -8, -16, etc.)
633+ // Convert to FP-relative offset
634+ let actual_offset = self . stack_offset ( total_frame, spilled. to_stack_offset ) ;
635+ if let Some ( gp_reg) = spilled. from_gp_reg {
636+ self . push_lir ( Aarch64Inst :: Str {
637+ size : OperandSize :: B64 ,
638+ src : gp_reg,
639+ addr : MemAddr :: BaseOffset {
640+ base : Reg :: X29 , // fp
641+ offset : actual_offset,
642+ } ,
643+ } ) ;
644+ } else if let Some ( fp_reg) = spilled. from_fp_reg {
645+ self . push_lir ( Aarch64Inst :: StrFp {
646+ size : FpSize :: Double ,
647+ src : fp_reg,
648+ addr : MemAddr :: BaseOffset {
649+ base : Reg :: X29 , // fp
650+ offset : actual_offset,
651+ } ,
652+ } ) ;
653+ }
654+ }
655+
629656 // Move arguments from registers to their allocated locations if needed
630657 // Note: On AAPCS64, sret uses X8, so regular args still start at X0
631658 // Complex parameters use two consecutive FP registers (D0+D1, D2+D3, etc.)
@@ -634,6 +661,11 @@ impl Aarch64CodeGen {
634661 let mut int_arg_idx = 0 ;
635662 let mut fp_arg_idx = 0 ;
636663
664+ // Track which pseudos were already spilled via spill_args_across_calls
665+ // to avoid double-storing them here
666+ let spilled_pseudos: HashSet < PseudoId > =
667+ alloc. spilled_args ( ) . iter ( ) . map ( |s| s. pseudo ) . collect ( ) ;
668+
637669 for ( i, ( _name, typ) ) in func. params . iter ( ) . enumerate ( ) {
638670 let is_complex = types. is_complex ( * typ) ;
639671 let is_fp = types. is_float ( * typ) ;
@@ -643,6 +675,18 @@ impl Aarch64CodeGen {
643675 if let PseudoKind :: Arg ( arg_idx) = pseudo. kind {
644676 // With sret, params have arg_idx = i + 1, but still use arg_regs[i]
645677 if arg_idx == ( i as u32 ) + arg_idx_offset {
678+ // Skip pseudos already stored via spilled_args
679+ if spilled_pseudos. contains ( & pseudo. id ) {
680+ // Still need to count this arg for register assignment tracking
681+ if is_complex {
682+ fp_arg_idx += 2 ;
683+ } else if is_fp {
684+ fp_arg_idx += 1 ;
685+ } else {
686+ int_arg_idx += 1 ;
687+ }
688+ break ;
689+ }
646690 if is_complex {
647691 // Complex argument - uses TWO consecutive FP registers
648692 // Look up the local variable for stack location
@@ -919,7 +963,7 @@ impl Aarch64CodeGen {
919963 } ;
920964
921965 let loc = self . get_location ( cond) ;
922- let ( scratch0, _) = Reg :: scratch_regs ( ) ;
966+ let ( scratch0, _, _ ) = Reg :: scratch_regs ( ) ;
923967
924968 match & loc {
925969 Loc :: Reg ( r) => {
@@ -1002,7 +1046,7 @@ impl Aarch64CodeGen {
10021046 let Some ( val) = insn. target else { return } ;
10031047
10041048 let loc = self . get_location ( val) ;
1005- let ( scratch0, scratch1) = Reg :: scratch_regs ( ) ;
1049+ let ( scratch0, scratch1, _ ) = Reg :: scratch_regs ( ) ;
10061050 let size = insn. size . max ( 32 ) ;
10071051 let op_size = OperandSize :: from_bits ( size) ;
10081052
@@ -1167,7 +1211,7 @@ impl Aarch64CodeGen {
11671211 Some ( Loc :: VReg ( v) ) => {
11681212 if let PseudoKind :: FVal ( f) = & pseudo. kind {
11691213 // Load FP constant using integer register
1170- let ( scratch0, _) = Reg :: scratch_regs ( ) ;
1214+ let ( scratch0, _, _ ) = Reg :: scratch_regs ( ) ;
11711215 let bits = if insn. size <= 32 {
11721216 ( * f as f32 ) . to_bits ( ) as i64
11731217 } else {
@@ -2032,7 +2076,7 @@ impl Aarch64CodeGen {
20322076 } else {
20332077 Symbol :: global ( & name)
20342078 } ;
2035- let ( scratch0, _) = Reg :: scratch_regs ( ) ;
2079+ let ( scratch0, _, _ ) = Reg :: scratch_regs ( ) ;
20362080 self . push_lir ( Aarch64Inst :: Adrp {
20372081 sym : sym. clone ( ) ,
20382082 dst : scratch0,
@@ -2252,11 +2296,25 @@ impl Aarch64CodeGen {
22522296 }
22532297
22542298 fn emit_call ( & mut self , insn : & Instruction , frame_size : i32 , types : & TypeTable ) {
2255- let func_name = match & insn. func_name {
2256- Some ( n) => n. clone ( ) ,
2257- None => return ,
2299+ // Check if this is an indirect call (through function pointer)
2300+ let is_indirect = insn. indirect_target . is_some ( ) ;
2301+
2302+ // For direct calls, we need a function name
2303+ let func_name = if is_indirect {
2304+ "<indirect>" . to_string ( )
2305+ } else {
2306+ match & insn. func_name {
2307+ Some ( n) => n. clone ( ) ,
2308+ None => return ,
2309+ }
22582310 } ;
22592311
2312+ // For indirect calls, load function pointer address into X16 (IP0)
2313+ // X16 is the intra-procedure-call scratch register per AAPCS64
2314+ if let Some ( func_addr) = insn. indirect_target {
2315+ self . emit_move ( func_addr, Reg :: X16 , 64 , frame_size) ;
2316+ }
2317+
22602318 // AAPCS64 calling convention:
22612319 // - Integer arguments: X0-X7 (8 registers)
22622320 // - Floating-point arguments: V0-V7 (8 registers)
@@ -2511,9 +2569,17 @@ impl Aarch64CodeGen {
25112569 }
25122570
25132571 // Call the function
2514- self . push_lir ( Aarch64Inst :: Bl {
2515- target : CallTarget :: Direct ( Symbol :: global ( & func_name) ) ,
2516- } ) ;
2572+ if is_indirect {
2573+ // Indirect call through X16 (function pointer was loaded there earlier)
2574+ self . push_lir ( Aarch64Inst :: Bl {
2575+ target : CallTarget :: Indirect ( Reg :: X16 ) ,
2576+ } ) ;
2577+ } else {
2578+ // Direct call to named function
2579+ self . push_lir ( Aarch64Inst :: Bl {
2580+ target : CallTarget :: Direct ( Symbol :: global ( & func_name) ) ,
2581+ } ) ;
2582+ }
25172583
25182584 // Clean up stack arguments
25192585 if stack_args > 0 {
@@ -2897,7 +2963,7 @@ impl Aarch64CodeGen {
28972963 Loc :: FImm ( f, imm_size) => {
28982964 // Load FP constant using integer register
28992965 // Use the size from the FImm for correct constant representation
2900- let ( scratch0, _) = Reg :: scratch_regs ( ) ;
2966+ let ( scratch0, _, _ ) = Reg :: scratch_regs ( ) ;
29012967 let bits = if imm_size <= 32 {
29022968 ( f as f32 ) . to_bits ( ) as i64
29032969 } else {
@@ -2920,7 +2986,7 @@ impl Aarch64CodeGen {
29202986 }
29212987 Loc :: Imm ( v) => {
29222988 // Load integer immediate and move to FP
2923- let ( scratch0, _) = Reg :: scratch_regs ( ) ;
2989+ let ( scratch0, _, _ ) = Reg :: scratch_regs ( ) ;
29242990 self . emit_mov_imm ( scratch0, v, 64 ) ;
29252991 self . push_lir ( Aarch64Inst :: FmovFromGp {
29262992 size : fp_size,
@@ -2930,7 +2996,7 @@ impl Aarch64CodeGen {
29302996 }
29312997 Loc :: Global ( name) => {
29322998 // Load from global - use size matching FP precision
2933- let ( scratch0, _) = Reg :: scratch_regs ( ) ;
2999+ let ( scratch0, _, _ ) = Reg :: scratch_regs ( ) ;
29343000 let load_size = match fp_size {
29353001 FpSize :: Single => OperandSize :: B32 ,
29363002 FpSize :: Double => OperandSize :: B64 ,
@@ -3148,7 +3214,7 @@ impl Aarch64CodeGen {
31483214 } ;
31493215
31503216 // Load source to integer register
3151- let ( scratch0, _) = Reg :: scratch_regs ( ) ;
3217+ let ( scratch0, _, _ ) = Reg :: scratch_regs ( ) ;
31523218 self . emit_move ( src, scratch0, src_size, frame_size) ;
31533219
31543220 // Convert integer to float
@@ -3305,7 +3371,7 @@ impl Aarch64CodeGen {
33053371 } ;
33063372
33073373 let ap_loc = self . get_location ( ap_addr) ;
3308- let ( scratch0, scratch1) = Reg :: scratch_regs ( ) ;
3374+ let ( scratch0, scratch1, _ ) = Reg :: scratch_regs ( ) ;
33093375
33103376 // Compute address of first variadic argument
33113377 let is_darwin = self . target . os == crate :: target:: Os :: MacOS ;
@@ -3377,7 +3443,7 @@ impl Aarch64CodeGen {
33773443
33783444 let ap_loc = self . get_location ( ap_addr) ;
33793445 let dst_loc = self . get_location ( target) ;
3380- let ( scratch0, scratch1) = Reg :: scratch_regs ( ) ;
3446+ let ( scratch0, scratch1, _ ) = Reg :: scratch_regs ( ) ;
33813447
33823448 // ap_loc contains the ADDRESS of the va_list variable (from symaddr)
33833449 // First, get the address of ap into scratch1, then load ap value from there
@@ -3541,7 +3607,7 @@ impl Aarch64CodeGen {
35413607
35423608 let dest_loc = self . get_location ( dest_addr) ;
35433609 let src_loc = self . get_location ( src_addr) ;
3544- let ( scratch0, scratch1) = Reg :: scratch_regs ( ) ;
3610+ let ( scratch0, scratch1, _ ) = Reg :: scratch_regs ( ) ;
35453611
35463612 // Both src_loc and dest_loc contain ADDRESSES of va_list variables
35473613 // Get the address of src va_list into scratch1
@@ -3999,10 +4065,11 @@ impl Aarch64CodeGen {
39994065 None => return ,
40004066 } ;
40014067
4002- // IMPORTANT: Load val first into X1, THEN env into X0.
4003- // If we loaded env into X0 first and val was passed as the first
4004- // function argument (in X0), it would get overwritten.
4005- // Put val argument in X1 (second argument register) FIRST
4068+ // CONSTRAINT: Load val into X1 BEFORE loading env into X0.
4069+ // If env is loaded into X0 first and val happens to be in X0 (first
4070+ // function argument), it would be overwritten. This is a manual constraint
4071+ // that will be expressible through the constraint system when inline asm
4072+ // support is added.
40064073 self . emit_move ( val, Reg :: X1 , 32 , frame_size) ;
40074074
40084075 // Put env argument in X0 (first argument register)
0 commit comments