Skip to content

Commit a13f20c

Browse files
committed
[cc] TypeTable-related fixes
1 parent 6eb76fb commit a13f20c

File tree

11 files changed

+108
-49
lines changed

11 files changed

+108
-49
lines changed

cc/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ Supported:
9696
Not yet implemented:
9797
- goto, longjmp, setjmp
9898
- `inline` and inlining support
99+
- multi-register returns (for structs larger than 8 bytes)
99100
- -fverbose-asm
100101
- Complex initializers
101102
- VLAs (variable-length arrays)

cc/arch/aarch64/codegen.rs

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
use crate::arch::aarch64::lir::{Aarch64Inst, CallTarget, Cond, GpOperand, MemAddr};
2121
use crate::arch::codegen::CodeGenerator;
2222
use crate::arch::lir::{Directive, FpSize, Label, OperandSize, Symbol};
23+
use crate::arch::DEFAULT_LIR_BUFFER_CAPACITY;
2324
use crate::ir::{Function, Initializer, Instruction, Module, Opcode, Pseudo, PseudoId, PseudoKind};
24-
use crate::linearize::MAX_REGISTER_AGGREGATE_BITS;
2525
use crate::target::Target;
2626
use crate::types::{TypeId, TypeModifiers, TypeTable};
2727
use std::collections::HashMap;
@@ -1022,7 +1022,7 @@ impl Aarch64CodeGen {
10221022
Self {
10231023
target,
10241024
output: String::new(),
1025-
lir_buffer: Vec::new(),
1025+
lir_buffer: Vec::with_capacity(DEFAULT_LIR_BUFFER_CAPACITY),
10261026
locations: HashMap::new(),
10271027
pseudos: Vec::new(),
10281028
current_fn: String::new(),
@@ -3002,28 +3002,9 @@ impl Aarch64CodeGen {
30023002
let is_darwin_variadic =
30033003
self.target.os == crate::target::Os::MacOS && insn.variadic_arg_start.is_some();
30043004

3005-
// Check if this call returns a large struct
3006-
// If so, the first argument is the sret pointer and goes in X8 (not X0)
3007-
let returns_large_struct = insn.typ.is_some_and(|t| {
3008-
let kind = types.kind(t);
3009-
(kind == crate::types::TypeKind::Struct || kind == crate::types::TypeKind::Union)
3010-
&& types.size_bits(t) > MAX_REGISTER_AGGREGATE_BITS
3011-
});
3012-
3013-
// Also check if return type is a pointer to a large struct (linearizer wraps it)
3014-
let returns_large_struct = returns_large_struct
3015-
|| insn.typ.is_some_and(|t| {
3016-
if let Some(pointee) = types.base_type(t) {
3017-
let kind = types.kind(pointee);
3018-
(kind == crate::types::TypeKind::Struct
3019-
|| kind == crate::types::TypeKind::Union)
3020-
&& types.size_bits(pointee) > MAX_REGISTER_AGGREGATE_BITS
3021-
} else {
3022-
false
3023-
}
3024-
});
3025-
3026-
let args_start = if returns_large_struct && !insn.src.is_empty() {
3005+
// Check if this call returns a large struct via sret (hidden pointer argument).
3006+
// The linearizer sets is_sret_call=true and puts the sret pointer as the first arg.
3007+
let args_start = if insn.is_sret_call && !insn.src.is_empty() {
30273008
// First argument is sret pointer - move to X8
30283009
self.emit_move(insn.src[0], Reg::X8, 64, frame_size);
30293010
1 // Skip first arg in main loop

cc/arch/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
// Architecture-specific predefined macros and code generators
1010
//
1111

12+
/// Default capacity for LIR instruction buffers (reduces reallocation overhead)
13+
pub const DEFAULT_LIR_BUFFER_CAPACITY: usize = 5000;
14+
1215
pub mod aarch64;
1316
pub mod codegen;
1417
pub mod lir;

cc/arch/x86_64/codegen.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::arch::lir::{Directive, FpSize, Label, OperandSize, Symbol};
1919
use crate::arch::x86_64::lir::{
2020
CallTarget, GpOperand, IntCC, MemAddr, ShiftCount, X86Inst, XmmOperand,
2121
};
22+
use crate::arch::DEFAULT_LIR_BUFFER_CAPACITY;
2223
use crate::ir::{Function, Initializer, Instruction, Module, Opcode, Pseudo, PseudoId, PseudoKind};
2324
use crate::target::Target;
2425
use crate::types::{TypeId, TypeModifiers, TypeTable};
@@ -879,7 +880,7 @@ impl X86_64CodeGen {
879880
Self {
880881
target,
881882
output: String::new(),
882-
lir_buffer: Vec::new(),
883+
lir_buffer: Vec::with_capacity(DEFAULT_LIR_BUFFER_CAPACITY),
883884
locations: HashMap::new(),
884885
pseudos: Vec::new(),
885886
current_fn: String::new(),

cc/ir.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,9 @@ pub struct Instruction {
444444
/// For variadic calls: index where variadic arguments start (0-based)
445445
/// All arguments at this index and beyond are variadic (should be passed on stack)
446446
pub variadic_arg_start: Option<usize>,
447+
/// For calls: true if this call returns a large struct via sret (hidden pointer arg).
448+
/// The first element of `src` is the sret pointer when this is true.
449+
pub is_sret_call: bool,
447450
/// Source position for debug info
448451
pub pos: Option<Position>,
449452
}
@@ -466,6 +469,7 @@ impl Default for Instruction {
466469
switch_default: None,
467470
arg_types: Vec::new(),
468471
variadic_arg_start: None,
472+
is_sret_call: false,
469473
pos: None,
470474
}
471475
}

cc/linearize.rs

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,10 @@ use crate::parse::ast::{
2020
};
2121
use crate::ssa::ssa_convert;
2222
use crate::symbol::SymbolTable;
23+
use crate::target::Target;
2324
use crate::types::{MemberInfo, TypeId, TypeKind, TypeModifiers, TypeTable};
2425
use std::collections::HashMap;
2526

26-
/// Maximum size (in bits) for aggregate types (struct/union) to be passed or
27-
/// returned by value in registers. Aggregates larger than this require
28-
/// indirect passing (pointer) or sret (struct return pointer).
29-
pub const MAX_REGISTER_AGGREGATE_BITS: u32 = 64;
30-
3127
/// Information about a local variable
3228
#[derive(Clone)]
3329
struct LocalVarInfo {
@@ -91,11 +87,13 @@ pub struct Linearizer<'a> {
9187
static_locals: HashMap<String, StaticLocalInfo>,
9288
/// Current source position for debug info
9389
current_pos: Option<Position>,
90+
/// Target configuration (architecture, ABI details)
91+
target: &'a Target,
9492
}
9593

9694
impl<'a> Linearizer<'a> {
9795
/// Create a new linearizer
98-
pub fn new(symbols: &'a SymbolTable, types: &'a TypeTable) -> Self {
96+
pub fn new(symbols: &'a SymbolTable, types: &'a TypeTable, target: &'a Target) -> Self {
9997
Self {
10098
module: Module::new(),
10199
current_func: None,
@@ -116,15 +114,16 @@ impl<'a> Linearizer<'a> {
116114
static_local_counter: 0,
117115
static_locals: HashMap::new(),
118116
current_pos: None,
117+
target,
119118
}
120119
}
121120

122121
/// Create a linearizer with SSA conversion disabled (for testing)
123122
#[cfg(test)]
124-
pub fn new_no_ssa(symbols: &'a SymbolTable, types: &'a TypeTable) -> Self {
123+
pub fn new_no_ssa(symbols: &'a SymbolTable, types: &'a TypeTable, target: &'a Target) -> Self {
125124
Self {
126125
run_ssa: false,
127-
..Self::new(symbols, types)
126+
..Self::new(symbols, types, target)
128127
}
129128
}
130129

@@ -510,7 +509,7 @@ impl<'a> Linearizer<'a> {
510509
// Large structs are returned via a hidden first parameter (sret)
511510
// that points to caller-allocated space
512511
let returns_large_struct = (ret_kind == TypeKind::Struct || ret_kind == TypeKind::Union)
513-
&& self.types.size_bits(func.return_type) > MAX_REGISTER_AGGREGATE_BITS;
512+
&& self.types.size_bits(func.return_type) > self.target.max_aggregate_register_bits;
514513

515514
// Argument index offset: if returning large struct, first arg is hidden return pointer
516515
let arg_offset: u32 = if returns_large_struct { 1 } else { 0 };
@@ -570,7 +569,7 @@ impl<'a> Linearizer<'a> {
570569
let typ_size = self.types.size_bits(typ);
571570
// For large structs, arg_pseudo is a pointer to the struct
572571
// We need to copy the data from that pointer to local storage
573-
if typ_size > MAX_REGISTER_AGGREGATE_BITS {
572+
if typ_size > self.target.max_aggregate_register_bits {
574573
// arg_pseudo is a pointer - copy each 8-byte chunk
575574
let struct_size = typ_size / 8;
576575
let mut offset = 0i64;
@@ -2239,7 +2238,7 @@ impl<'a> Linearizer<'a> {
22392238
let typ_kind = self.types.kind(typ);
22402239
let returns_large_struct = (typ_kind == TypeKind::Struct
22412240
|| typ_kind == TypeKind::Union)
2242-
&& self.types.size_bits(typ) > MAX_REGISTER_AGGREGATE_BITS;
2241+
&& self.types.size_bits(typ) > self.target.max_aggregate_register_bits;
22432242

22442243
let (result_sym, mut arg_vals, mut arg_types_vec) = if returns_large_struct {
22452244
// Allocate local storage for the return value
@@ -2282,7 +2281,7 @@ impl<'a> Linearizer<'a> {
22822281
let arg_type = self.expr_type(a);
22832282
let arg_kind = self.types.kind(arg_type);
22842283
let arg_val = if (arg_kind == TypeKind::Struct || arg_kind == TypeKind::Union)
2285-
&& self.types.size_bits(arg_type) > MAX_REGISTER_AGGREGATE_BITS
2284+
&& self.types.size_bits(arg_type) > self.target.max_aggregate_register_bits
22862285
{
22872286
// Large struct: pass address instead of value
22882287
// The argument type becomes a pointer
@@ -2313,6 +2312,7 @@ impl<'a> Linearizer<'a> {
23132312
64, // pointers are 64-bit
23142313
);
23152314
call_insn.variadic_arg_start = variadic_arg_start;
2315+
call_insn.is_sret_call = true;
23162316
self.emit(call_insn);
23172317
// Return the symbol (address) where struct is stored
23182318
result_sym
@@ -3688,19 +3688,25 @@ impl<'a> Linearizer<'a> {
36883688

36893689
/// Linearize an AST to IR (convenience wrapper for tests)
36903690
#[cfg(test)]
3691-
pub fn linearize(tu: &TranslationUnit, symbols: &SymbolTable, types: &TypeTable) -> Module {
3692-
linearize_with_debug(tu, symbols, types, false, None)
3691+
pub fn linearize(
3692+
tu: &TranslationUnit,
3693+
symbols: &SymbolTable,
3694+
types: &TypeTable,
3695+
target: &Target,
3696+
) -> Module {
3697+
linearize_with_debug(tu, symbols, types, target, false, None)
36933698
}
36943699

36953700
/// Linearize an AST to IR with debug info support
36963701
pub fn linearize_with_debug(
36973702
tu: &TranslationUnit,
36983703
symbols: &SymbolTable,
36993704
types: &TypeTable,
3705+
target: &Target,
37003706
debug: bool,
37013707
source_file: Option<&str>,
37023708
) -> Module {
3703-
let mut linearizer = Linearizer::new(symbols, types);
3709+
let mut linearizer = Linearizer::new(symbols, types, target);
37043710
let mut module = linearizer.linearize(tu);
37053711
module.debug = debug;
37063712
if let Some(path) = source_file {
@@ -3732,7 +3738,8 @@ mod tests {
37323738

37333739
fn test_linearize(tu: &TranslationUnit, types: &TypeTable) -> Module {
37343740
let symbols = SymbolTable::new();
3735-
linearize(tu, &symbols, types)
3741+
let target = Target::host();
3742+
linearize(tu, &symbols, types, &target)
37363743
}
37373744

37383745
fn make_simple_func(name: &str, body: Stmt, types: &TypeTable) -> FunctionDef {
@@ -4017,7 +4024,8 @@ mod tests {
40174024

40184025
// Create linearizer and test that expr_type reads from the expression
40194026
let symbols = SymbolTable::new();
4020-
let linearizer = Linearizer::new(&symbols, &types);
4027+
let target = Target::host();
4028+
let linearizer = Linearizer::new(&symbols, &types, &target);
40214029
let typ = linearizer.expr_type(&expr);
40224030
assert_eq!(types.kind(typ), TypeKind::Int);
40234031

@@ -4037,7 +4045,8 @@ mod tests {
40374045
expr.typ = Some(types.double_id);
40384046

40394047
let symbols = SymbolTable::new();
4040-
let linearizer = Linearizer::new(&symbols, &types);
4048+
let target = Target::host();
4049+
let linearizer = Linearizer::new(&symbols, &types, &target);
40414050
let typ = linearizer.expr_type(&expr);
40424051
assert_eq!(types.kind(typ), TypeKind::Double);
40434052
}
@@ -4049,7 +4058,8 @@ mod tests {
40494058
/// Helper to linearize without SSA conversion (for comparing before/after)
40504059
fn linearize_no_ssa(tu: &TranslationUnit, types: &TypeTable) -> Module {
40514060
let symbols = SymbolTable::new();
4052-
let mut linearizer = Linearizer::new_no_ssa(&symbols, types);
4061+
let target = Target::host();
4062+
let mut linearizer = Linearizer::new_no_ssa(&symbols, types, &target);
40534063
linearizer.linearize(tu)
40544064
}
40554065

cc/main.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,14 @@ fn process_file(
218218
}
219219

220220
// Linearize to IR
221-
let mut module =
222-
linearize::linearize_with_debug(&ast, &symbols, &types, args.debug, Some(display_path));
221+
let mut module = linearize::linearize_with_debug(
222+
&ast,
223+
&symbols,
224+
&types,
225+
target,
226+
args.debug,
227+
Some(display_path),
228+
);
223229

224230
if args.dump_ir {
225231
print!("{}", module);

cc/parse/parser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5752,7 +5752,7 @@ mod tests {
57525752
#[test]
57535753
fn test_typedef_local_variable() {
57545754
// Typedef used as local variable type inside function body
5755-
let (tu, types) =
5755+
let (tu, _types) =
57565756
parse_tu("typedef int myint; int main(void) { myint x; x = 42; return 0; }").unwrap();
57575757
assert_eq!(tu.items.len(), 2);
57585758

cc/target.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ pub struct Target {
6363
pub long_width: u32,
6464
/// char is signed by default
6565
pub char_signed: bool,
66+
/// Maximum size (in bits) for aggregate types (struct/union) that can be
67+
/// passed or returned by value in registers. Aggregates larger than this
68+
/// require indirect passing (pointer) or sret (struct return pointer).
69+
pub max_aggregate_register_bits: u32,
6670
}
6771

6872
impl Target {
@@ -91,12 +95,20 @@ impl Target {
9195
(Arch::X86_64, _) => true,
9296
};
9397

98+
// Maximum aggregate size that can be returned in registers.
99+
// Both x86-64 SysV ABI and AAPCS64 technically support returning
100+
// 16-byte structs in registers (rax+rdx or x0+x1), but pcc currently
101+
// only supports single-register returns. Use sret for >8 byte structs
102+
// until multi-register returns are implemented.
103+
let max_aggregate_register_bits = 64;
104+
94105
Self {
95106
arch,
96107
os,
97108
pointer_width,
98109
long_width,
99110
char_signed,
111+
max_aggregate_register_bits,
100112
}
101113
}
102114

cc/tests/datatypes/struct_type.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,44 @@ int main(void) {
269269
"#;
270270
assert_eq!(compile_and_run("struct_pointer", code), 0);
271271
}
272+
273+
// ============================================================================
274+
// Large Struct Return: Return struct >8 bytes requiring sret (hidden pointer)
275+
// This tests the sret ABI where large structs are returned via a hidden pointer.
276+
// On ARM64, the sret pointer goes in X8 (not X0 like other args).
277+
// On x86-64, the sret pointer goes in RDI (first arg register).
278+
// ============================================================================
279+
280+
#[test]
281+
fn struct_return_large() {
282+
let code = r#"
283+
struct large {
284+
long first;
285+
long second;
286+
};
287+
288+
struct large make_large(long a, long b) {
289+
struct large s;
290+
s.first = a;
291+
s.second = b;
292+
return s;
293+
}
294+
295+
int main(void) {
296+
struct large result;
297+
result = make_large(300000, 200000);
298+
299+
// Verify the struct was correctly returned
300+
if (result.first != 300000) return 1;
301+
if (result.second != 200000) return 2;
302+
303+
// Test with different values to ensure correct member mapping
304+
result = make_large(42, 84);
305+
if (result.first != 42) return 3;
306+
if (result.second != 84) return 4;
307+
308+
return 0;
309+
}
310+
"#;
311+
assert_eq!(compile_and_run("struct_return_large", code), 0);
312+
}

0 commit comments

Comments
 (0)