diff --git a/cc/linearize.rs b/cc/linearize.rs index 36103ad4..61fece3a 100644 --- a/cc/linearize.rs +++ b/cc/linearize.rs @@ -19,6 +19,7 @@ use crate::parse::ast::{ FunctionDef, InitElement, Stmt, TranslationUnit, UnaryOp, }; use crate::ssa::ssa_convert; +use crate::strings::{StringId, StringTable}; use crate::symbol::SymbolTable; use crate::target::Target; use crate::types::{MemberInfo, TypeId, TypeKind, TypeModifiers, TypeTable}; @@ -74,6 +75,8 @@ pub struct Linearizer<'a> { symbols: &'a SymbolTable, /// Type table for type information types: &'a TypeTable, + /// String table for converting StringId to String at IR boundary + strings: &'a StringTable, /// Hidden struct return pointer (for functions returning large structs) struct_return_ptr: Option, /// Size of struct being returned (for functions returning large structs) @@ -93,7 +96,12 @@ pub struct Linearizer<'a> { impl<'a> Linearizer<'a> { /// Create a new linearizer - pub fn new(symbols: &'a SymbolTable, types: &'a TypeTable, target: &'a Target) -> Self { + pub fn new( + symbols: &'a SymbolTable, + types: &'a TypeTable, + strings: &'a StringTable, + target: &'a Target, + ) -> Self { Self { module: Module::new(), current_func: None, @@ -108,6 +116,7 @@ impl<'a> Linearizer<'a> { run_ssa: true, // Enable SSA conversion by default symbols, types, + strings, struct_return_ptr: None, struct_return_size: 0, current_func_name: String::new(), @@ -120,13 +129,24 @@ impl<'a> Linearizer<'a> { /// Create a linearizer with SSA conversion disabled (for testing) #[cfg(test)] - pub fn new_no_ssa(symbols: &'a SymbolTable, types: &'a TypeTable, target: &'a Target) -> Self { + pub fn new_no_ssa( + symbols: &'a SymbolTable, + types: &'a TypeTable, + strings: &'a StringTable, + target: &'a Target, + ) -> Self { Self { run_ssa: false, - ..Self::new(symbols, types, target) + ..Self::new(symbols, types, strings, target) } } + /// Convert a StringId to a &str using the string table + #[inline] + fn str(&self, id: StringId) -> &str { + self.strings.get(id) + } + /// Linearize a translation unit pub fn linearize(&mut self, tu: &TranslationUnit) -> Module { for item in &tu.items { @@ -470,8 +490,8 @@ impl<'a> Linearizer<'a> { _ => Initializer::None, }); - self.module - .add_global(&declarator.name, declarator.typ, init); + let name = self.str(declarator.name).to_string(); + self.module.add_global(&name, declarator.typ, init); } } @@ -493,7 +513,7 @@ impl<'a> Linearizer<'a> { self.continue_targets.clear(); self.struct_return_ptr = None; self.struct_return_size = 0; - self.current_func_name = func.name.clone(); + self.current_func_name = self.str(func.name).to_string(); // Note: static_locals is NOT cleared - it persists across functions // Create function @@ -501,7 +521,7 @@ impl<'a> Linearizer<'a> { .types .modifiers(func.return_type) .contains(TypeModifiers::STATIC); - let mut ir_func = Function::new(&func.name, func.return_type); + let mut ir_func = Function::new(self.str(func.name), func.return_type); ir_func.is_static = is_static; let ret_kind = self.types.kind(func.return_type); @@ -529,7 +549,10 @@ impl<'a> Linearizer<'a> { let mut struct_params: Vec<(String, TypeId, PseudoId)> = Vec::new(); for (i, param) in func.params.iter().enumerate() { - let name = param.name.clone().unwrap_or_else(|| format!("arg{}", i)); + let name = param + .name + .map(|id| self.str(id).to_string()) + .unwrap_or_else(|| format!("arg{}", i)); ir_func.add_param(&name, param.typ); // Create argument pseudo (offset by 1 if there's a hidden return pointer) @@ -759,12 +782,14 @@ impl<'a> Linearizer<'a> { } Stmt::Goto(label) => { - let target = self.get_or_create_label(label); + let label_str = self.str(*label).to_string(); + let target = self.get_or_create_label(&label_str); self.emit(Instruction::br(target)); } Stmt::Label { name, stmt } => { - let label_bb = self.get_or_create_label(name); + let name_str = self.str(*name).to_string(); + let label_bb = self.get_or_create_label(&name_str); // If current block is not terminated, branch to label if !self.is_terminated() { @@ -814,8 +839,9 @@ impl<'a> Linearizer<'a> { } // Track in linearizer's locals map + let name_str = self.str(declarator.name).to_string(); self.locals - .insert(declarator.name.clone(), LocalVarInfo { sym: sym_id, typ }); + .insert(name_str, LocalVarInfo { sym: sym_id, typ }); // If there's an initializer, emit Store(s) if let Some(init) = &declarator.init { @@ -841,16 +867,18 @@ impl<'a> Linearizer<'a> { /// They are implemented as globals with unique names like `funcname.varname.N`. /// Initialization happens once at program start (compile-time). fn linearize_static_local(&mut self, declarator: &crate::parse::ast::InitDeclarator) { + let name_str = self.str(declarator.name).to_string(); + // Generate unique global name: funcname.varname.counter let global_name = format!( "{}.{}.{}", - self.current_func_name, declarator.name, self.static_local_counter + self.current_func_name, name_str, self.static_local_counter ); self.static_local_counter += 1; // Track mapping from local name to global name for this function's scope // Use a key that includes function name to handle same-named statics in different functions - let key = format!("{}.{}", self.current_func_name, declarator.name); + let key = format!("{}.{}", self.current_func_name, name_str); self.static_locals.insert( key, StaticLocalInfo { @@ -862,7 +890,7 @@ impl<'a> Linearizer<'a> { // Also insert with just the local name for the current function scope // This is used during expression linearization self.locals.insert( - declarator.name.clone(), + name_str, LocalVarInfo { // Use a sentinel value - we'll handle static locals specially sym: PseudoId(u32::MAX), @@ -1334,7 +1362,7 @@ impl<'a> Linearizer<'a> { ExprKind::CharLit(c) => Some(*c as i64), ExprKind::Ident { name, .. } => { // Check if it's an enum constant - self.symbols.get_enum_value(name) + self.symbols.get_enum_value(*name) } ExprKind::Unary { op, operand } => { let val = self.eval_const_expr(operand)?; @@ -1472,12 +1500,13 @@ impl<'a> Linearizer<'a> { fn linearize_lvalue(&mut self, expr: &Expr) -> PseudoId { match &expr.kind { ExprKind::Ident { name, .. } => { + let name_str = self.str(*name).to_string(); // For local variables, emit SymAddr to get the stack address - if let Some(local) = self.locals.get(name).cloned() { + if let Some(local) = self.locals.get(&name_str).cloned() { // Check if this is a static local (sentinel value) if local.sym.0 == u32::MAX { // Static local - look up the global name - let key = format!("{}.{}", self.current_func_name, name); + let key = format!("{}.{}", self.current_func_name, name_str); if let Some(static_info) = self.static_locals.get(&key).cloned() { let sym_id = self.alloc_pseudo(); let pseudo = Pseudo::sym(sym_id, static_info.global_name); @@ -1503,7 +1532,7 @@ impl<'a> Linearizer<'a> { } else { // Global variable - emit SymAddr to get its address let sym_id = self.alloc_pseudo(); - let pseudo = Pseudo::sym(sym_id, name.clone()); + let pseudo = Pseudo::sym(sym_id, name_str.clone()); if let Some(func) = &mut self.current_func { func.add_pseudo(pseudo); } @@ -1533,7 +1562,7 @@ impl<'a> Linearizer<'a> { let struct_type = self.expr_type(inner); let member_info = self.types - .find_member(struct_type, member) + .find_member(struct_type, *member) .unwrap_or_else(|| MemberInfo { offset: 0, typ: self.expr_type(expr), @@ -1571,7 +1600,7 @@ impl<'a> Linearizer<'a> { .unwrap_or_else(|| self.expr_type(expr)); let member_info = self.types - .find_member(struct_type, member) + .find_member(struct_type, *member) .unwrap_or_else(|| MemberInfo { offset: 0, typ: self.expr_type(expr), @@ -1693,16 +1722,17 @@ impl<'a> Linearizer<'a> { } ExprKind::Ident { name, .. } => { + let name_str = self.str(*name).to_string(); // First check if it's an enum constant - if let Some(value) = self.symbols.get_enum_value(name) { + if let Some(value) = self.symbols.get_enum_value(*name) { self.emit_const(value, self.types.int_id) } // Check if it's a local variable - else if let Some(local) = self.locals.get(name).cloned() { + else if let Some(local) = self.locals.get(&name_str).cloned() { // Check if this is a static local (sentinel value) if local.sym.0 == u32::MAX { // Static local - look up the global name and treat as global - let key = format!("{}.{}", self.current_func_name, name); + let key = format!("{}.{}", self.current_func_name, &name_str); if let Some(static_info) = self.static_locals.get(&key).cloned() { let sym_id = self.alloc_pseudo(); let pseudo = Pseudo::sym(sym_id, static_info.global_name); @@ -1744,13 +1774,13 @@ impl<'a> Linearizer<'a> { result } // Check if it's a parameter (already SSA value) - else if let Some(&pseudo) = self.var_map.get(name) { + else if let Some(&pseudo) = self.var_map.get(&name_str) { pseudo } // Global variable - create symbol reference and load else { let sym_id = self.alloc_pseudo(); - let pseudo = Pseudo::sym(sym_id, name.clone()); + let pseudo = Pseudo::sym(sym_id, name_str.clone()); if let Some(func) = &mut self.current_func { func.add_pseudo(pseudo); } @@ -1824,7 +1854,8 @@ impl<'a> Linearizer<'a> { // Store back to the variable if let ExprKind::Ident { name, .. } = &operand.kind { - if let Some(local) = self.locals.get(name).cloned() { + let name_str = self.str(*name).to_string(); + if let Some(local) = self.locals.get(&name_str).cloned() { let store_size = self.types.size_bits(typ); self.emit(Instruction::store( final_result, @@ -1833,12 +1864,12 @@ impl<'a> Linearizer<'a> { typ, store_size, )); - } else if self.var_map.contains_key(name) { - self.var_map.insert(name.clone(), final_result); + } else if self.var_map.contains_key(&name_str) { + self.var_map.insert(name_str.clone(), final_result); } else { // Global variable - emit store let sym_id = self.alloc_pseudo(); - let pseudo = Pseudo::sym(sym_id, name.clone()); + let pseudo = Pseudo::sym(sym_id, name_str.clone()); if let Some(func) = &mut self.current_func { func.add_pseudo(pseudo); } @@ -2002,7 +2033,7 @@ impl<'a> Linearizer<'a> { // For locals, we need to save the old value before updating // because the pseudo will be reloaded from stack which gets overwritten let is_local = if let ExprKind::Ident { name, .. } = &operand.kind { - self.locals.contains_key(name) + self.locals.contains_key(self.str(*name)) } else { false }; @@ -2058,7 +2089,8 @@ impl<'a> Linearizer<'a> { let store_size = self.types.size_bits(typ); match &operand.kind { ExprKind::Ident { name, .. } => { - if let Some(local) = self.locals.get(name).cloned() { + let name_str = self.str(*name).to_string(); + if let Some(local) = self.locals.get(&name_str).cloned() { self.emit(Instruction::store( final_result, local.sym, @@ -2066,12 +2098,12 @@ impl<'a> Linearizer<'a> { typ, store_size, )); - } else if self.var_map.contains_key(name) { - self.var_map.insert(name.clone(), final_result); + } else if self.var_map.contains_key(&name_str) { + self.var_map.insert(name_str.clone(), final_result); } else { // Global variable - emit store let sym_id = self.alloc_pseudo(); - let pseudo = Pseudo::sym(sym_id, name.clone()); + let pseudo = Pseudo::sym(sym_id, name_str.clone()); if let Some(func) = &mut self.current_func { func.add_pseudo(pseudo); } @@ -2101,7 +2133,7 @@ impl<'a> Linearizer<'a> { // For locals, we need to save the old value before updating // because the pseudo will be reloaded from stack which gets overwritten let is_local = if let ExprKind::Ident { name, .. } = &operand.kind { - self.locals.contains_key(name) + self.locals.contains_key(self.str(*name)) } else { false }; @@ -2157,7 +2189,8 @@ impl<'a> Linearizer<'a> { let store_size = self.types.size_bits(typ); match &operand.kind { ExprKind::Ident { name, .. } => { - if let Some(local) = self.locals.get(name).cloned() { + let name_str = self.str(*name).to_string(); + if let Some(local) = self.locals.get(&name_str).cloned() { self.emit(Instruction::store( final_result, local.sym, @@ -2165,12 +2198,12 @@ impl<'a> Linearizer<'a> { typ, store_size, )); - } else if self.var_map.contains_key(name) { - self.var_map.insert(name.clone(), final_result); + } else if self.var_map.contains_key(&name_str) { + self.var_map.insert(name_str.clone(), final_result); } else { // Global variable - emit store let sym_id = self.alloc_pseudo(); - let pseudo = Pseudo::sym(sym_id, name.clone()); + let pseudo = Pseudo::sym(sym_id, name_str.clone()); if let Some(func) = &mut self.current_func { func.add_pseudo(pseudo); } @@ -2213,7 +2246,7 @@ impl<'a> Linearizer<'a> { ExprKind::Call { func, args } => { // Get function name let func_name = match &func.kind { - ExprKind::Ident { name, .. } => name.clone(), + ExprKind::Ident { name, .. } => self.str(*name).to_string(), _ => "".to_string(), }; @@ -2343,7 +2376,7 @@ impl<'a> Linearizer<'a> { // Look up member offset and type let member_info = self.types - .find_member(struct_type, member) + .find_member(struct_type, *member) .unwrap_or_else(|| MemberInfo { offset: 0, typ: self.expr_type(expr), @@ -2415,7 +2448,7 @@ impl<'a> Linearizer<'a> { // Look up member offset and type let member_info = self.types - .find_member(struct_type, member) + .find_member(struct_type, *member) .unwrap_or_else(|| MemberInfo { offset: 0, typ: self.expr_type(expr), @@ -2650,7 +2683,7 @@ impl<'a> Linearizer<'a> { let insn = Instruction::new(Opcode::VaStart) .with_target(result) .with_src(ap_addr) - .with_func(last_param.clone()) + .with_func(self.str(*last_param).to_string()) .with_type(self.types.void_id); self.emit(insn); result @@ -3478,11 +3511,12 @@ impl<'a> Linearizer<'a> { let target_size = self.types.size_bits(target_typ); match &target.kind { ExprKind::Ident { name, .. } => { - if let Some(local) = self.locals.get(name).cloned() { + let name_str = self.str(*name).to_string(); + if let Some(local) = self.locals.get(&name_str).cloned() { // Check if this is a static local (sentinel value) if local.sym.0 == u32::MAX { // Static local - look up the global name and emit store to global - let key = format!("{}.{}", self.current_func_name, name); + let key = format!("{}.{}", self.current_func_name, &name_str); if let Some(static_info) = self.static_locals.get(&key).cloned() { let sym_id = self.alloc_pseudo(); let pseudo = Pseudo::sym(sym_id, static_info.global_name); @@ -3507,15 +3541,15 @@ impl<'a> Linearizer<'a> { target_size, )); } - } else if self.var_map.contains_key(name) { + } else if self.var_map.contains_key(&name_str) { // Parameter: this is not SSA-correct but parameters // shouldn't be reassigned. If they are, we'd need to // demote them to locals. For now, just update the mapping. - self.var_map.insert(name.clone(), final_val); + self.var_map.insert(name_str.clone(), final_val); } else { // Global variable - emit store let sym_id = self.alloc_pseudo(); - let pseudo = Pseudo::sym(sym_id, name.clone()); + let pseudo = Pseudo::sym(sym_id, name_str); if let Some(func) = &mut self.current_func { func.add_pseudo(pseudo); } @@ -3534,7 +3568,7 @@ impl<'a> Linearizer<'a> { let struct_type = self.expr_type(expr); let member_info = self.types - .find_member(struct_type, member) + .find_member(struct_type, *member) .unwrap_or(MemberInfo { offset: 0, typ: target_typ, @@ -3574,7 +3608,7 @@ impl<'a> Linearizer<'a> { let struct_type = self.types.base_type(ptr_type).unwrap_or(target_typ); let member_info = self.types - .find_member(struct_type, member) + .find_member(struct_type, *member) .unwrap_or(MemberInfo { offset: 0, typ: target_typ, @@ -3692,9 +3726,10 @@ pub fn linearize( tu: &TranslationUnit, symbols: &SymbolTable, types: &TypeTable, + strings: &StringTable, target: &Target, ) -> Module { - linearize_with_debug(tu, symbols, types, target, false, None) + linearize_with_debug(tu, symbols, types, strings, target, false, None) } /// Linearize an AST to IR with debug info support @@ -3702,11 +3737,12 @@ pub fn linearize_with_debug( tu: &TranslationUnit, symbols: &SymbolTable, types: &TypeTable, + strings: &StringTable, target: &Target, debug: bool, source_file: Option<&str>, ) -> Module { - let mut linearizer = Linearizer::new(symbols, types, target); + let mut linearizer = Linearizer::new(symbols, types, strings, target); let mut module = linearizer.linearize(tu); module.debug = debug; if let Some(path) = source_file { @@ -3723,6 +3759,7 @@ pub fn linearize_with_debug( mod tests { use super::*; use crate::parse::ast::{ExternalDecl, FunctionDef, Parameter}; + use crate::strings::StringTable; /// Create a default position for test code fn test_pos() -> Position { @@ -3736,16 +3773,16 @@ mod tests { } } - fn test_linearize(tu: &TranslationUnit, types: &TypeTable) -> Module { + fn test_linearize(tu: &TranslationUnit, types: &TypeTable, strings: &StringTable) -> Module { let symbols = SymbolTable::new(); let target = Target::host(); - linearize(tu, &symbols, types, &target) + linearize(tu, &symbols, types, strings, &target) } - fn make_simple_func(name: &str, body: Stmt, types: &TypeTable) -> FunctionDef { + fn make_simple_func(name: StringId, body: Stmt, types: &TypeTable) -> FunctionDef { FunctionDef { return_type: types.int_id, - name: name.to_string(), + name, params: vec![], body, pos: test_pos(), @@ -3754,13 +3791,15 @@ mod tests { #[test] fn test_linearize_empty_function() { + let mut strings = StringTable::new(); let types = TypeTable::new(); - let func = make_simple_func("test", Stmt::Block(vec![]), &types); + let test_id = strings.intern("test"); + let func = make_simple_func(test_id, Stmt::Block(vec![]), &types); let tu = TranslationUnit { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); assert_eq!(module.functions.len(), 1); assert_eq!(module.functions[0].name, "test"); assert!(!module.functions[0].blocks.is_empty()); @@ -3768,22 +3807,26 @@ mod tests { #[test] fn test_linearize_return() { + let mut strings = StringTable::new(); let types = TypeTable::new(); - let func = make_simple_func("test", Stmt::Return(Some(Expr::int(42, &types))), &types); + let test_id = strings.intern("test"); + let func = make_simple_func(test_id, Stmt::Return(Some(Expr::int(42, &types))), &types); let tu = TranslationUnit { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); assert!(ir.contains("ret")); } #[test] fn test_linearize_if() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); let func = make_simple_func( - "test", + test_id, Stmt::If { cond: Expr::int(1, &types), then_stmt: Box::new(Stmt::Return(Some(Expr::int(1, &types)))), @@ -3795,16 +3838,18 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); assert!(ir.contains("cbr")); // Conditional branch } #[test] fn test_linearize_while() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); let func = make_simple_func( - "test", + test_id, Stmt::While { cond: Expr::int(1, &types), body: Box::new(Stmt::Break), @@ -3815,22 +3860,25 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); assert!(module.functions[0].blocks.len() >= 3); // cond, body, exit } #[test] fn test_linearize_for() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); + let i_id = strings.intern("i"); // for (int i = 0; i < 10; i++) { } let int_type = types.int_id; - let i_var = Expr::var_typed("i", int_type); + let i_var = Expr::var_typed(i_id, int_type); let func = make_simple_func( - "test", + test_id, Stmt::For { init: Some(ForInit::Declaration(Declaration { declarators: vec![crate::parse::ast::InitDeclarator { - name: "i".to_string(), + name: i_id, typ: int_type, init: Some(Expr::int(0, &types)), }], @@ -3854,16 +3902,18 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); assert!(module.functions[0].blocks.len() >= 4); // entry, cond, body, post, exit } #[test] fn test_linearize_binary_expr() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); // return 1 + 2 * 3; let func = make_simple_func( - "test", + test_id, Stmt::Return(Some(Expr::binary( BinaryOp::Add, Expr::int(1, &types), @@ -3881,7 +3931,7 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); assert!(ir.contains("mul")); assert!(ir.contains("add")); @@ -3889,25 +3939,29 @@ mod tests { #[test] fn test_linearize_function_with_params() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let add_id = strings.intern("add"); + let a_id = strings.intern("a"); + let b_id = strings.intern("b"); let int_type = types.int_id; let func = FunctionDef { return_type: int_type, - name: "add".to_string(), + name: add_id, params: vec![ Parameter { - name: Some("a".to_string()), + name: Some(a_id), typ: int_type, }, Parameter { - name: Some("b".to_string()), + name: Some(b_id), typ: int_type, }, ], body: Stmt::Return(Some(Expr::binary( BinaryOp::Add, - Expr::var_typed("a", int_type), - Expr::var_typed("b", int_type), + Expr::var_typed(a_id, int_type), + Expr::var_typed(b_id, int_type), &types, ))), pos: test_pos(), @@ -3916,7 +3970,7 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); assert!(ir.contains("add")); assert!(ir.contains("%a")); @@ -3925,11 +3979,14 @@ mod tests { #[test] fn test_linearize_call() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); + let foo_id = strings.intern("foo"); let func = make_simple_func( - "test", + test_id, Stmt::Return(Some(Expr::call( - Expr::var("foo"), + Expr::var(foo_id), vec![Expr::int(1, &types), Expr::int(2, &types)], &types, ))), @@ -3939,7 +3996,7 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); assert!(ir.contains("call")); assert!(ir.contains("foo")); @@ -3947,9 +4004,11 @@ mod tests { #[test] fn test_linearize_comparison() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); let func = make_simple_func( - "test", + test_id, Stmt::Return(Some(Expr::binary( BinaryOp::Lt, Expr::int(1, &types), @@ -3962,14 +4021,16 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); assert!(ir.contains("setlt")); } #[test] fn test_linearize_unsigned_comparison() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); // Create unsigned comparison: (unsigned)1 < (unsigned)2 let uint_type = types.uint_id; @@ -3980,12 +4041,12 @@ mod tests { let mut cmp = Expr::binary(BinaryOp::Lt, left, right, &types); cmp.typ = Some(types.int_id); - let func = make_simple_func("test", Stmt::Return(Some(cmp)), &types); + let func = make_simple_func(test_id, Stmt::Return(Some(cmp)), &types); let tu = TranslationUnit { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); // Should use unsigned comparison opcode (setb = set if below) assert!( @@ -3997,13 +4058,15 @@ mod tests { #[test] fn test_display_module() { + let mut strings = StringTable::new(); let types = TypeTable::new(); - let func = make_simple_func("main", Stmt::Return(Some(Expr::int(0, &types))), &types); + let main_id = strings.intern("main"); + let func = make_simple_func(main_id, Stmt::Return(Some(Expr::int(0, &types))), &types); let tu = TranslationUnit { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); // Should have proper structure @@ -4015,6 +4078,7 @@ mod tests { #[test] fn test_type_propagation_expr_type() { + let strings = StringTable::new(); let types = TypeTable::new(); // Create an expression with a type annotation @@ -4025,7 +4089,7 @@ mod tests { // Create linearizer and test that expr_type reads from the expression let symbols = SymbolTable::new(); let target = Target::host(); - let linearizer = Linearizer::new(&symbols, &types, &target); + let linearizer = Linearizer::new(&symbols, &types, &strings, &target); let typ = linearizer.expr_type(&expr); assert_eq!(types.kind(typ), TypeKind::Int); @@ -4038,6 +4102,7 @@ mod tests { #[test] fn test_type_propagation_double_literal() { + let strings = StringTable::new(); let types = TypeTable::new(); // Create a double literal @@ -4046,7 +4111,7 @@ mod tests { let symbols = SymbolTable::new(); let target = Target::host(); - let linearizer = Linearizer::new(&symbols, &types, &target); + let linearizer = Linearizer::new(&symbols, &types, &strings, &target); let typ = linearizer.expr_type(&expr); assert_eq!(types.kind(typ), TypeKind::Double); } @@ -4056,31 +4121,34 @@ mod tests { // ======================================================================== /// Helper to linearize without SSA conversion (for comparing before/after) - fn linearize_no_ssa(tu: &TranslationUnit, types: &TypeTable) -> Module { + fn linearize_no_ssa(tu: &TranslationUnit, types: &TypeTable, strings: &StringTable) -> Module { let symbols = SymbolTable::new(); let target = Target::host(); - let mut linearizer = Linearizer::new_no_ssa(&symbols, types, &target); + let mut linearizer = Linearizer::new_no_ssa(&symbols, types, strings, &target); linearizer.linearize(tu) } #[test] fn test_local_var_emits_load_store() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); + let x_id = strings.intern("x"); // int test() { int x = 1; return x; } let int_type = types.int_id; let func = FunctionDef { return_type: int_type, - name: "test".to_string(), + name: test_id, params: vec![], body: Stmt::Block(vec![ BlockItem::Declaration(Declaration { declarators: vec![crate::parse::ast::InitDeclarator { - name: "x".to_string(), + name: x_id, typ: int_type, init: Some(Expr::int(1, &types)), }], }), - BlockItem::Statement(Stmt::Return(Some(Expr::var_typed("x", int_type)))), + BlockItem::Statement(Stmt::Return(Some(Expr::var_typed(x_id, int_type)))), ]), pos: test_pos(), }; @@ -4089,7 +4157,7 @@ mod tests { }; // Without SSA, should have store and load - let module = linearize_no_ssa(&tu, &types); + let module = linearize_no_ssa(&tu, &types, &strings); let ir = format!("{}", module); assert!( ir.contains("store"), @@ -4105,7 +4173,11 @@ mod tests { #[test] fn test_ssa_converts_local_to_phi() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); + let cond_id = strings.intern("cond"); + let x_id = strings.intern("x"); // int test(int cond) { // int x = 1; // if (cond) x = 2; @@ -4115,32 +4187,32 @@ mod tests { let func = FunctionDef { return_type: int_type, - name: "test".to_string(), + name: test_id, params: vec![Parameter { - name: Some("cond".to_string()), + name: Some(cond_id), typ: int_type, }], body: Stmt::Block(vec![ // int x = 1; BlockItem::Declaration(Declaration { declarators: vec![crate::parse::ast::InitDeclarator { - name: "x".to_string(), + name: x_id, typ: int_type, init: Some(Expr::int(1, &types)), }], }), // if (cond) x = 2; BlockItem::Statement(Stmt::If { - cond: Expr::var_typed("cond", int_type), + cond: Expr::var_typed(cond_id, int_type), then_stmt: Box::new(Stmt::Expr(Expr::assign( - Expr::var_typed("x", int_type), + Expr::var_typed(x_id, int_type), Expr::int(2, &types), &types, ))), else_stmt: None, }), // return x; - BlockItem::Statement(Stmt::Return(Some(Expr::var_typed("x", int_type)))), + BlockItem::Statement(Stmt::Return(Some(Expr::var_typed(x_id, int_type)))), ]), pos: test_pos(), }; @@ -4149,7 +4221,7 @@ mod tests { }; // With SSA, should have phi node at merge point - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); // Should have a phi instruction @@ -4162,7 +4234,10 @@ mod tests { #[test] fn test_ssa_loop_variable() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); + let i_id = strings.intern("i"); // int test() { // int i = 0; // while (i < 10) { i = i + 1; } @@ -4170,17 +4245,17 @@ mod tests { // } let int_type = types.int_id; - let i_var = || Expr::var_typed("i", int_type); + let i_var = || Expr::var_typed(i_id, int_type); let func = FunctionDef { return_type: int_type, - name: "test".to_string(), + name: test_id, params: vec![], body: Stmt::Block(vec![ // int i = 0; BlockItem::Declaration(Declaration { declarators: vec![crate::parse::ast::InitDeclarator { - name: "i".to_string(), + name: i_id, typ: int_type, init: Some(Expr::int(0, &types)), }], @@ -4204,7 +4279,7 @@ mod tests { }; // With SSA, should have phi node at loop header - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); // Loop should have a phi at the condition block @@ -4213,7 +4288,11 @@ mod tests { #[test] fn test_short_circuit_and() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); + let a_id = strings.intern("a"); + let b_id = strings.intern("b"); // int test(int a, int b) { // return a && b; // } @@ -4222,21 +4301,21 @@ mod tests { let func = FunctionDef { return_type: int_type, - name: "test".to_string(), + name: test_id, params: vec![ Parameter { - name: Some("a".to_string()), + name: Some(a_id), typ: int_type, }, Parameter { - name: Some("b".to_string()), + name: Some(b_id), typ: int_type, }, ], body: Stmt::Return(Some(Expr::binary( BinaryOp::LogAnd, - Expr::var_typed("a", int_type), - Expr::var_typed("b", int_type), + Expr::var_typed(a_id, int_type), + Expr::var_typed(b_id, int_type), &types, ))), pos: test_pos(), @@ -4245,7 +4324,7 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); // Short-circuit AND should have: @@ -4265,7 +4344,11 @@ mod tests { #[test] fn test_short_circuit_or() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let test_id = strings.intern("test"); + let a_id = strings.intern("a"); + let b_id = strings.intern("b"); // int test(int a, int b) { // return a || b; // } @@ -4274,21 +4357,21 @@ mod tests { let func = FunctionDef { return_type: int_type, - name: "test".to_string(), + name: test_id, params: vec![ Parameter { - name: Some("a".to_string()), + name: Some(a_id), typ: int_type, }, Parameter { - name: Some("b".to_string()), + name: Some(b_id), typ: int_type, }, ], body: Stmt::Return(Some(Expr::binary( BinaryOp::LogOr, - Expr::var_typed("a", int_type), - Expr::var_typed("b", int_type), + Expr::var_typed(a_id, int_type), + Expr::var_typed(b_id, int_type), &types, ))), pos: test_pos(), @@ -4297,7 +4380,7 @@ mod tests { items: vec![ExternalDecl::FunctionDef(func)], }; - let module = test_linearize(&tu, &types); + let module = test_linearize(&tu, &types, &strings); let ir = format!("{}", module); // Short-circuit OR should have: diff --git a/cc/main.rs b/cc/main.rs index 81dccb39..ec96c11d 100644 --- a/cc/main.rs +++ b/cc/main.rs @@ -18,6 +18,7 @@ mod lower; mod os; mod parse; mod ssa; +mod strings; mod symbol; mod target; mod token; @@ -31,6 +32,7 @@ use std::path::Path; use std::process::Command; use parse::Parser as CParser; +use strings::StringTable; use symbol::SymbolTable; use target::Target; use token::{preprocess, show_token, token_type_name, StreamTable, Tokenizer}; @@ -144,13 +146,17 @@ fn process_file( // Create stream let stream_id = streams.add(display_path.to_string()); + // Create shared string table for identifier interning + let mut strings = StringTable::new(); + // Tokenize - let mut tokenizer = Tokenizer::new(&buffer, stream_id); - let tokens = tokenizer.tokenize(); + let tokens = { + let mut tokenizer = Tokenizer::new(&buffer, stream_id, &mut strings); + tokenizer.tokenize() + }; // Dump raw tokens if requested if args.dump_tokens && !args.preprocess_only { - let idents = tokenizer.ident_table(); for token in &tokens { if args.verbose { println!( @@ -158,10 +164,10 @@ fn process_file( token.pos.line, token.pos.col, token_type_name(token.typ), - show_token(token, idents) + show_token(token, &strings) ); } else { - let text = show_token(token, idents); + let text = show_token(token, &strings); if !text.starts_with('<') { print!("{} ", text); } @@ -174,8 +180,7 @@ fn process_file( } // Preprocess (may add new identifiers from included files) - let preprocessed = preprocess(tokens, target, tokenizer.ident_table_mut(), path); - let idents = tokenizer.ident_table(); + let preprocessed = preprocess(tokens, target, &mut strings, path); if args.preprocess_only { // Output preprocessed tokens @@ -186,10 +191,10 @@ fn process_file( token.pos.line, token.pos.col, token_type_name(token.typ), - show_token(token, idents) + show_token(token, &strings) ); } else { - let text = show_token(token, idents); + let text = show_token(token, &strings); if !text.starts_with('<') { print!("{} ", text); } @@ -207,7 +212,7 @@ fn process_file( let mut types = types::TypeTable::new(); // Parse (this also binds symbols to the symbol table) - let mut parser = CParser::new(&preprocessed, idents, &mut symbols, &mut types); + let mut parser = CParser::new(&preprocessed, &strings, &mut symbols, &mut types); let ast = parser .parse_translation_unit() .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("parse error: {}", e)))?; @@ -222,6 +227,7 @@ fn process_file( &ast, &symbols, &types, + &strings, target, args.debug, Some(display_path), diff --git a/cc/parse/ast.rs b/cc/parse/ast.rs index 8ae4cb50..5e81d0ec 100644 --- a/cc/parse/ast.rs +++ b/cc/parse/ast.rs @@ -11,6 +11,7 @@ // use crate::diag::Position; +use crate::strings::StringId; use crate::types::TypeId; // ============================================================================ @@ -179,7 +180,7 @@ pub enum ExprKind { /// Identifier (variable reference) Ident { - name: String, + name: StringId, }, /// Unary operation @@ -222,13 +223,13 @@ pub enum ExprKind { /// Member access: expr.member Member { expr: Box, - member: String, + member: StringId, }, /// Pointer member access: expr->member Arrow { expr: Box, - member: String, + member: StringId, }, /// Array subscript: array[index] @@ -266,7 +267,7 @@ pub enum ExprKind { /// The va_list to initialize (lvalue) ap: Box, /// Name of the last named parameter before ... - last_param: String, + last_param: StringId, }, /// __builtin_va_arg(ap, type) @@ -334,7 +335,7 @@ pub enum ExprKind { #[derive(Debug, Clone)] pub enum Designator { /// Field designator: .field_name - Field(String), + Field(StringId), /// Index designator: [constant_expr] - evaluated at parse time Index(i64), } @@ -361,20 +362,13 @@ impl Expr { } /// Create a variable reference (untyped - needs type evaluation) - no position - pub fn var(name: &str) -> Self { - Expr::new_unpositioned(ExprKind::Ident { - name: name.to_string(), - }) + pub fn var(name: StringId) -> Self { + Expr::new_unpositioned(ExprKind::Ident { name }) } /// Create a variable reference with a known type - no position - pub fn var_typed(name: &str, typ: TypeId) -> Self { - Expr::typed_unpositioned( - ExprKind::Ident { - name: name.to_string(), - }, - typ, - ) + pub fn var_typed(name: StringId, typ: TypeId) -> Self { + Expr::typed_unpositioned(ExprKind::Ident { name }, typ) } /// Create a binary expression (using TypeTable for type inference) @@ -496,10 +490,10 @@ pub enum Stmt { Continue, /// Goto statement: goto label; - Goto(String), + Goto(StringId), /// Labeled statement: label: stmt - Label { name: String, stmt: Box }, + Label { name: StringId, stmt: Box }, /// Switch statement: switch (expr) { cases } Switch { expr: Expr, body: Box }, @@ -542,7 +536,7 @@ pub struct Declaration { #[derive(Debug, Clone)] pub struct InitDeclarator { /// The name being declared - pub name: String, + pub name: StringId, /// The complete type (after applying declarator modifiers) - interned TypeId pub typ: TypeId, /// Optional initializer @@ -552,13 +546,9 @@ pub struct InitDeclarator { #[cfg(test)] impl Declaration { /// Create a simple declaration with one variable - pub fn simple(name: &str, typ: TypeId, init: Option) -> Self { + pub fn simple(name: StringId, typ: TypeId, init: Option) -> Self { Declaration { - declarators: vec![InitDeclarator { - name: name.to_string(), - typ, - init, - }], + declarators: vec![InitDeclarator { name, typ, init }], } } } @@ -570,7 +560,7 @@ impl Declaration { /// A function parameter #[derive(Debug, Clone)] pub struct Parameter { - pub name: Option, + pub name: Option, /// Parameter type (interned TypeId) pub typ: TypeId, } @@ -581,7 +571,7 @@ pub struct FunctionDef { /// Return type (interned TypeId) pub return_type: TypeId, /// Function name - pub name: String, + pub name: StringId, /// Parameters pub params: Vec, /// Function body @@ -632,6 +622,7 @@ impl Default for TranslationUnit { #[cfg(test)] mod tests { use super::*; + use crate::strings::StringTable; use crate::types::TypeKind; #[test] @@ -701,15 +692,17 @@ mod tests { #[test] fn test_unary_expr() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let x_id = strings.intern("x"); // -x - let expr = Expr::unary(UnaryOp::Neg, Expr::var("x"), &types); + let expr = Expr::unary(UnaryOp::Neg, Expr::var(x_id), &types); match expr.kind { ExprKind::Unary { op, operand } => { assert_eq!(op, UnaryOp::Neg); match operand.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + ExprKind::Ident { name } => assert_eq!(name, x_id), _ => panic!("Expected Ident"), } } @@ -719,15 +712,17 @@ mod tests { #[test] fn test_assignment() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let x_id = strings.intern("x"); // x = 5 - let expr = Expr::assign(Expr::var("x"), Expr::int(5, &types), &types); + let expr = Expr::assign(Expr::var(x_id), Expr::int(5, &types), &types); match expr.kind { ExprKind::Assign { op, target, value } => { assert_eq!(op, AssignOp::Assign); match target.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + ExprKind::Ident { name } => assert_eq!(name, x_id), _ => panic!("Expected Ident"), } match value.kind { @@ -741,10 +736,12 @@ mod tests { #[test] fn test_function_call() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let foo_id = strings.intern("foo"); // foo(1, 2) let expr = Expr::call( - Expr::var("foo"), + Expr::var(foo_id), vec![Expr::int(1, &types), Expr::int(2, &types)], &types, ); @@ -752,7 +749,7 @@ mod tests { match expr.kind { ExprKind::Call { func, args } => { match func.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + ExprKind::Ident { name } => assert_eq!(name, foo_id), _ => panic!("Expected Ident"), } assert_eq!(args.len(), 2); @@ -763,10 +760,12 @@ mod tests { #[test] fn test_if_stmt() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let x_id = strings.intern("x"); // if (x) return 1; let stmt = Stmt::If { - cond: Expr::var("x"), + cond: Expr::var(x_id), then_stmt: Box::new(Stmt::Return(Some(Expr::int(1, &types)))), else_stmt: None, }; @@ -778,7 +777,7 @@ mod tests { else_stmt, } => { match cond.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + ExprKind::Ident { name } => assert_eq!(name, x_id), _ => panic!("Expected Ident"), } match *then_stmt { @@ -796,18 +795,20 @@ mod tests { #[test] fn test_while_stmt() { + let mut strings = StringTable::new(); + let x_id = strings.intern("x"); // while (x) x--; let stmt = Stmt::While { - cond: Expr::var("x"), + cond: Expr::var(x_id), body: Box::new(Stmt::Expr(Expr::new_unpositioned(ExprKind::PostDec( - Box::new(Expr::var("x")), + Box::new(Expr::var(x_id)), )))), }; match stmt { Stmt::While { cond, body } => { match cond.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + ExprKind::Ident { name } => assert_eq!(name, x_id), _ => panic!("Expected Ident"), } match *body { @@ -824,23 +825,27 @@ mod tests { #[test] fn test_declaration() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let x_id = strings.intern("x"); // int x = 5; - let decl = Declaration::simple("x", types.int_id, Some(Expr::int(5, &types))); + let decl = Declaration::simple(x_id, types.int_id, Some(Expr::int(5, &types))); assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "x"); + assert_eq!(decl.declarators[0].name, x_id); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Int); assert!(decl.declarators[0].init.is_some()); } #[test] fn test_translation_unit() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let x_id = strings.intern("x"); let mut tu = TranslationUnit::new(); // Add a declaration - let decl = Declaration::simple("x", types.int_id, None); + let decl = Declaration::simple(x_id, types.int_id, None); tu.add(ExternalDecl::Declaration(decl)); assert_eq!(tu.items.len(), 1); @@ -848,15 +853,17 @@ mod tests { #[test] fn test_for_loop() { + let mut strings = StringTable::new(); let types = TypeTable::new(); + let i_id = strings.intern("i"); // for (int i = 0; i < 10; i++) {} let init = ForInit::Declaration(Declaration::simple( - "i", + i_id, types.int_id, Some(Expr::int(0, &types)), )); - let cond = Expr::binary(BinaryOp::Lt, Expr::var("i"), Expr::int(10, &types), &types); - let post = Expr::new_unpositioned(ExprKind::PostInc(Box::new(Expr::var("i")))); + let cond = Expr::binary(BinaryOp::Lt, Expr::var(i_id), Expr::int(10, &types), &types); + let post = Expr::new_unpositioned(ExprKind::PostInc(Box::new(Expr::var(i_id)))); let stmt = Stmt::For { init: Some(init), diff --git a/cc/parse/parser.rs b/cc/parse/parser.rs index ef981404..d9fa5287 100644 --- a/cc/parse/parser.rs +++ b/cc/parse/parser.rs @@ -15,6 +15,7 @@ use super::ast::{ FunctionDef, InitDeclarator, InitElement, Parameter, Stmt, TranslationUnit, UnaryOp, }; use crate::diag; +use crate::strings::StringId; use crate::symbol::{Namespace, Symbol, SymbolTable}; use crate::token::lexer::{IdentTable, Position, SpecialToken, Token, TokenType, TokenValue}; use crate::types::{ @@ -271,12 +272,27 @@ impl<'a> Parser<'a> { /// Get the identifier name from an Ident token value fn get_ident_name(&self, token: &Token) -> Option { if let TokenValue::Ident(id) = &token.value { - self.idents.get(*id).map(|s| s.to_string()) + self.idents.get_opt(*id).map(|s| s.to_string()) } else { None } } + /// Get the StringId directly from an Ident token + fn get_ident_id(&self, token: &Token) -> Option { + if let TokenValue::Ident(id) = &token.value { + Some(*id) + } else { + None + } + } + + /// Get string value for a StringId + #[inline] + fn str(&self, id: StringId) -> &str { + self.idents.get(id) + } + /// Skip StreamBegin/StreamEnd tokens pub fn skip_stream_tokens(&mut self) { while self.peek() == TokenType::StreamBegin || self.peek() == TokenType::StreamEnd { @@ -1155,8 +1171,9 @@ impl<'a> Parser<'a> { } // Check if this looks like a type name (keyword or typedef) - let name = self.get_ident_name(self.current())?; - if !Self::is_type_keyword(&name) && self.symbols.lookup_typedef(&name).is_none() { + let name_id = self.get_ident_id(self.current())?; + let name = self.str(name_id); + if !Self::is_type_keyword(name) && self.symbols.lookup_typedef(name_id).is_none() { // Not a type keyword and not a typedef return None; } @@ -1171,12 +1188,13 @@ impl<'a> Parser<'a> { break; } - let name = match self.get_ident_name(self.current()) { - Some(n) => n, + let name_id = match self.get_ident_id(self.current()) { + Some(id) => id, None => break, }; + let name = self.str(name_id); - match name.as_str() { + match name { "const" => { self.advance(); modifiers |= TypeModifiers::CONST; @@ -1271,12 +1289,12 @@ impl<'a> Parser<'a> { "struct" => { self.advance(); // consume 'struct' // For struct tag reference, look up directly in symbol table - if let Some(tag_name) = self.get_ident_name(self.current()) { + if let Some(tag_name) = self.get_ident_id(self.current()) { if !self.is_special(b'{') { // This is a tag reference (e.g., "struct Point*") // Look up the existing tag to get its TypeId directly self.advance(); // consume tag name - if let Some(existing) = self.symbols.lookup_tag(&tag_name) { + if let Some(existing) = self.symbols.lookup_tag(tag_name) { let mut result_id = existing.typ; // Handle pointer while self.is_special(b'*') { @@ -1344,11 +1362,11 @@ impl<'a> Parser<'a> { "union" => { self.advance(); // consume 'union' // For union tag reference, look up directly in symbol table - if let Some(tag_name) = self.get_ident_name(self.current()) { + if let Some(tag_name) = self.get_ident_id(self.current()) { if !self.is_special(b'{') { // This is a tag reference self.advance(); // consume tag name - if let Some(existing) = self.symbols.lookup_tag(&tag_name) { + if let Some(existing) = self.symbols.lookup_tag(tag_name) { let mut result_id = existing.typ; while self.is_special(b'*') { self.advance(); @@ -1438,7 +1456,7 @@ impl<'a> Parser<'a> { _ => { // Check if it's a typedef name if base_kind.is_none() { - if let Some(typedef_type_id) = self.symbols.lookup_typedef(&name) { + if let Some(typedef_type_id) = self.symbols.lookup_typedef(name_id) { self.advance(); // For typedef, we already have a TypeId - just apply pointer/array modifiers let mut result_id = typedef_type_id; @@ -1551,7 +1569,7 @@ impl<'a> Parser<'a> { // Get member type from struct type let member_type = expr .typ - .and_then(|t| self.types.find_member(t, &member)) + .and_then(|t| self.types.find_member(t, member)) .map(|info| info.typ) .unwrap_or(self.types.int_id); expr = Self::typed_expr( @@ -1570,7 +1588,7 @@ impl<'a> Parser<'a> { let member_type = expr .typ .and_then(|t| self.types.base_type(t)) - .and_then(|struct_type| self.types.find_member(struct_type, &member)) + .and_then(|struct_type| self.types.find_member(struct_type, member)) .map(|info| info.typ) .unwrap_or(self.types.int_id); expr = Self::typed_expr( @@ -1639,18 +1657,18 @@ impl<'a> Parser<'a> { Ok(args) } - /// Expect and consume an identifier, returning its name - fn expect_identifier(&mut self) -> ParseResult { + /// Expect and consume an identifier, returning its StringId + fn expect_identifier(&mut self) -> ParseResult { if self.peek() != TokenType::Ident { return Err(ParseError::new("expected identifier", self.current_pos())); } - let name = self - .get_ident_name(self.current()) + let id = self + .get_ident_id(self.current()) .ok_or_else(|| ParseError::new("invalid identifier", self.current_pos()))?; self.advance(); - Ok(name) + Ok(id) } /// Check if an expression is const and report error if assigning to it @@ -1837,10 +1855,11 @@ impl<'a> Parser<'a> { let token = self.consume(); let token_pos = token.pos; if let TokenValue::Ident(id) = &token.value { - let name = self.idents.get(*id).unwrap_or("").to_string(); + let name_id = *id; + let name_str = self.idents.get_opt(name_id).unwrap_or(""); // Check for varargs builtins that need special parsing - match name.as_str() { + match name_str { "__builtin_va_start" => { // __builtin_va_start(ap, last_param) self.expect_special(b'(')?; @@ -1984,10 +2003,14 @@ impl<'a> Parser<'a> { // Look up symbol to get type (during parsing, symbol is in scope) let typ = self .symbols - .lookup(&name, Namespace::Ordinary) + .lookup(name_id, Namespace::Ordinary) .map(|s| s.typ) .unwrap_or(self.types.int_id); // Default to int if not found - Ok(Self::typed_expr(ExprKind::Ident { name }, typ, token_pos)) + Ok(Self::typed_expr( + ExprKind::Ident { name: name_id }, + typ, + token_pos, + )) } else { Err(ParseError::new("invalid identifier token", token.pos)) } @@ -2590,10 +2613,11 @@ impl Parser<'_> { return false; } - if let Some(name) = self.get_ident_name(self.current()) { + if let Some(name_id) = self.get_ident_id(self.current()) { + let name = self.str(name_id); // Check for type keywords first if matches!( - name.as_str(), + name, "void" | "char" | "short" @@ -2621,7 +2645,7 @@ impl Parser<'_> { return true; } // Also check for typedef names - self.symbols.lookup_typedef(&name).is_some() + self.symbols.lookup_typedef(name_id).is_some() } else { false } @@ -2701,13 +2725,15 @@ impl Parser<'_> { }; // Bind to symbol table (like sparse's bind_symbol) - if !name.is_empty() { + // Note: StringId is Copy, check for empty by comparing to empty string + let name_str = self.str(name); + if !name_str.is_empty() { if is_typedef { // For typedef, the type being aliased is the declarator type - let sym = Symbol::typedef(name.clone(), typ, self.symbols.depth()); + let sym = Symbol::typedef(name, typ, self.symbols.depth()); let _ = self.symbols.declare(sym); } else { - let sym = Symbol::variable(name.clone(), typ, self.symbols.depth()); + let sym = Symbol::variable(name, typ, self.symbols.depth()); let _ = self.symbols.declare(sym); } } @@ -2740,12 +2766,13 @@ impl Parser<'_> { break; } - let name = match self.get_ident_name(self.current()) { - Some(n) => n, + let name_id = match self.get_ident_id(self.current()) { + Some(id) => id, None => break, }; + let name = self.str(name_id); - match name.as_str() { + match name { // Skip __attribute__ in the type specifier loop "__attribute__" | "__attribute" => { self.skip_extensions(); @@ -2871,7 +2898,7 @@ impl Parser<'_> { // Check if it's a typedef name // Only consume the typedef if we haven't already seen a base type if base_kind.is_none() { - if let Some(typedef_type_id) = self.symbols.lookup_typedef(&name) { + if let Some(typedef_type_id) = self.symbols.lookup_typedef(name_id) { self.advance(); // Get the underlying type and merge in any modifiers we collected let typedef_type = self.types.get(typedef_type_id); @@ -2924,10 +2951,7 @@ impl Parser<'_> { next_value }; - constants.push(EnumConstant { - name: name.clone(), - value, - }); + constants.push(EnumConstant { name, value }); next_value = value + 1; // Register enum constant in symbol table (Ordinary namespace) @@ -2949,7 +2973,7 @@ impl Parser<'_> { self.expect_special(b'}')?; let composite = CompositeType { - tag: tag.clone(), + tag, members: Vec::new(), enum_constants: constants, size: 4, @@ -2960,22 +2984,22 @@ impl Parser<'_> { let enum_type = Type::enum_type(composite); // Register tag if present - if let Some(ref tag_name) = tag { + if let Some(tag_name) = tag { let enum_type_id = self.types.intern(enum_type.clone()); - let sym = Symbol::tag(tag_name.clone(), enum_type_id, self.symbols.depth()); + let sym = Symbol::tag(tag_name, enum_type_id, self.symbols.depth()); let _ = self.symbols.declare(sym); } Ok(enum_type) } else { // Forward reference - look up existing tag - if let Some(ref tag_name) = tag { + if let Some(tag_name) = tag { // Look up or create incomplete type if let Some(existing) = self.symbols.lookup_tag(tag_name) { // Return a clone of the underlying type Ok(self.types.get(existing.typ).clone()) } else { - Ok(Type::incomplete_enum(tag_name.clone())) + Ok(Type::incomplete_enum(tag_name)) } } else { Err(ParseError::new( @@ -3033,7 +3057,7 @@ impl Parser<'_> { let width = self.parse_bitfield_width()?; members.push(StructMember { - name: String::new(), + name: StringId::EMPTY, typ: member_base_type_id, offset: 0, bit_offset: None, @@ -3091,7 +3115,7 @@ impl Parser<'_> { }; let composite = CompositeType { - tag: tag.clone(), + tag, members, enum_constants: Vec::new(), size, @@ -3106,23 +3130,23 @@ impl Parser<'_> { }; // Register tag if present - if let Some(ref tag_name) = tag { + if let Some(tag_name) = tag { let typ_id = self.types.intern(struct_type.clone()); - let sym = Symbol::tag(tag_name.clone(), typ_id, self.symbols.depth()); + let sym = Symbol::tag(tag_name, typ_id, self.symbols.depth()); let _ = self.symbols.declare(sym); } Ok(struct_type) } else { // Forward reference - if let Some(ref tag_name) = tag { + if let Some(tag_name) = tag { // Look up existing tag if let Some(existing) = self.symbols.lookup_tag(tag_name) { Ok(self.types.get(existing.typ).clone()) } else if is_union { - Ok(Type::incomplete_union(tag_name.clone())) + Ok(Type::incomplete_union(tag_name)) } else { - Ok(Type::incomplete_struct(tag_name.clone())) + Ok(Type::incomplete_struct(tag_name)) } } else { Err(ParseError::new( @@ -3140,7 +3164,7 @@ impl Parser<'_> { /// - `(*p)` means p is a pointer /// - `[3]` after the parens means "to array of 3" /// So p is "pointer to array of 3 ints" - fn parse_declarator(&mut self, base_type_id: TypeId) -> ParseResult<(String, TypeId)> { + fn parse_declarator(&mut self, base_type_id: TypeId) -> ParseResult<(StringId, TypeId)> { // Collect pointer modifiers (they bind tighter than array/function) let mut pointer_modifiers: Vec = Vec::new(); while self.is_special(b'*') { @@ -3434,7 +3458,7 @@ impl Parser<'_> { // Bind function to symbol table at current (global) scope // Like sparse's bind_symbol() in parse.c - let func_sym = Symbol::function(name.clone(), func_type_id, self.symbols.depth()); + let func_sym = Symbol::function(name, func_type_id, self.symbols.depth()); let _ = self.symbols.declare(func_sym); // Ignore redefinition errors for now // Enter function scope for parameters and body @@ -3442,9 +3466,8 @@ impl Parser<'_> { // Bind parameters in function scope for param in ¶ms { - if let Some(param_name) = ¶m.name { - let param_sym = - Symbol::parameter(param_name.clone(), param.typ, self.symbols.depth()); + if let Some(param_name) = param.name { + let param_sym = Symbol::parameter(param_name, param.typ, self.symbols.depth()); let _ = self.symbols.declare(param_sym); } } @@ -3543,10 +3566,10 @@ impl Parser<'_> { // Parse optional parameter name let param_name = if self.peek() == TokenType::Ident { // Check if it's actually a name (not a type keyword) - if let Some(name) = self.get_ident_name(self.current()) { + if let Some(name_id) = self.get_ident_id(self.current()) { if !self.is_declaration_start() { self.advance(); - Some(name) + Some(name_id) } else { None } @@ -3663,10 +3686,10 @@ impl Parser<'_> { // Add to symbol table if is_typedef { - let sym = Symbol::typedef(name.clone(), typ, self.symbols.depth()); + let sym = Symbol::typedef(name, typ, self.symbols.depth()); let _ = self.symbols.declare(sym); } else { - let var_sym = Symbol::variable(name.clone(), typ, self.symbols.depth()); + let var_sym = Symbol::variable(name, typ, self.symbols.depth()); let _ = self.symbols.declare(var_sym); } @@ -3750,10 +3773,10 @@ impl Parser<'_> { // Add to symbol table if is_typedef { - let sym = Symbol::typedef(name.clone(), full_typ, self.symbols.depth()); + let sym = Symbol::typedef(name, full_typ, self.symbols.depth()); let _ = self.symbols.declare(sym); } else { - let var_sym = Symbol::variable(name.clone(), full_typ, self.symbols.depth()); + let var_sym = Symbol::variable(name, full_typ, self.symbols.depth()); let _ = self.symbols.declare(var_sym); } @@ -3788,7 +3811,7 @@ impl Parser<'_> { let param_type_ids: Vec = params.iter().map(|p| p.typ).collect(); let func_type = Type::function(typ_id, param_type_ids.clone(), variadic); let func_type_id = self.types.intern(func_type); - let func_sym = Symbol::function(name.clone(), func_type_id, self.symbols.depth()); + let func_sym = Symbol::function(name, func_type_id, self.symbols.depth()); let _ = self.symbols.declare(func_sym); // Enter function scope for parameters @@ -3796,9 +3819,9 @@ impl Parser<'_> { // Bind parameters in function scope for param in ¶ms { - if let Some(param_name) = ¶m.name { + if let Some(param_name) = param.name { let param_sym = - Symbol::parameter(param_name.clone(), param.typ, self.symbols.depth()); + Symbol::parameter(param_name, param.typ, self.symbols.depth()); let _ = self.symbols.declare(param_sym); } } @@ -3824,7 +3847,7 @@ impl Parser<'_> { let func_type_id = self.types.intern(func_type); // Add function declaration to symbol table so the variadic flag // is available when the function is called - let func_sym = Symbol::function(name.clone(), func_type_id, self.symbols.depth()); + let func_sym = Symbol::function(name, func_type_id, self.symbols.depth()); let _ = self.symbols.declare(func_sym); return Ok(ExternalDecl::Declaration(Declaration { declarators: vec![InitDeclarator { @@ -3881,11 +3904,11 @@ impl Parser<'_> { // Add to symbol table if is_typedef { - let sym = Symbol::typedef(name.clone(), var_type_id, self.symbols.depth()); + let sym = Symbol::typedef(name, var_type_id, self.symbols.depth()); let _ = self.symbols.declare(sym); } else { // Add global variable to symbol table so it can be referenced by later code - let var_sym = Symbol::variable(name.clone(), var_type_id, self.symbols.depth()); + let var_sym = Symbol::variable(name, var_type_id, self.symbols.depth()); let _ = self.symbols.declare(var_sym); } @@ -3913,10 +3936,10 @@ impl Parser<'_> { }; // Add to symbol table if is_typedef { - let sym = Symbol::typedef(decl_name.clone(), decl_type, self.symbols.depth()); + let sym = Symbol::typedef(decl_name, decl_type, self.symbols.depth()); let _ = self.symbols.declare(sym); } else { - let var_sym = Symbol::variable(decl_name.clone(), decl_type, self.symbols.depth()); + let var_sym = Symbol::variable(decl_name, decl_type, self.symbols.depth()); let _ = self.symbols.declare(var_sym); } declarators.push(InitDeclarator { @@ -3983,7 +4006,7 @@ impl Parser<'_> { } ExprKind::Ident { name } => { // Check for enum constant - self.symbols.get_enum_value(name) + self.symbols.get_enum_value(*name) } ExprKind::Conditional { cond, @@ -4053,19 +4076,26 @@ impl Parser<'_> { #[cfg(test)] mod tests { use super::*; + use crate::strings::StringTable; use crate::symbol::SymbolTable; use crate::token::lexer::Tokenizer; - fn parse_expr(input: &str) -> ParseResult<(Expr, TypeTable)> { - let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + fn parse_expr(input: &str) -> ParseResult<(Expr, TypeTable, StringTable)> { + let mut strings = StringTable::new(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0, &mut strings); let tokens = tokenizer.tokenize(); - let idents = tokenizer.ident_table(); + drop(tokenizer); let mut symbols = SymbolTable::new(); let mut types = TypeTable::new(); - let mut parser = Parser::new(&tokens, idents, &mut symbols, &mut types); + let mut parser = Parser::new(&tokens, &strings, &mut symbols, &mut types); parser.skip_stream_tokens(); let expr = parser.parse_expression()?; - Ok((expr, types)) + Ok((expr, types, strings)) + } + + /// Helper to compare a StringId with a string literal + fn check_name(strings: &StringTable, id: StringId, expected: &str) { + assert_eq!(strings.get(id), expected); } // ======================================================================== @@ -4074,25 +4104,25 @@ mod tests { #[test] fn test_int_literal() { - let (expr, _types) = parse_expr("42").unwrap(); + let (expr, _types, _strings) = parse_expr("42").unwrap(); assert!(matches!(expr.kind, ExprKind::IntLit(42))); } #[test] fn test_hex_literal() { - let (expr, _types) = parse_expr("0xFF").unwrap(); + let (expr, _types, _strings) = parse_expr("0xFF").unwrap(); assert!(matches!(expr.kind, ExprKind::IntLit(255))); } #[test] fn test_octal_literal() { - let (expr, _types) = parse_expr("0777").unwrap(); + let (expr, _types, _strings) = parse_expr("0777").unwrap(); assert!(matches!(expr.kind, ExprKind::IntLit(511))); } #[test] fn test_float_literal() { - let (expr, _types) = parse_expr("3.14").unwrap(); + let (expr, _types, _strings) = parse_expr("3.14").unwrap(); match expr.kind { ExprKind::FloatLit(v) => assert!((v - 3.14).abs() < 0.001), _ => panic!("Expected FloatLit"), @@ -4101,19 +4131,19 @@ mod tests { #[test] fn test_char_literal() { - let (expr, _types) = parse_expr("'a'").unwrap(); + let (expr, _types, _strings) = parse_expr("'a'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('a'))); } #[test] fn test_char_escape() { - let (expr, _types) = parse_expr("'\\n'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\n'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); } #[test] fn test_string_literal() { - let (expr, _types) = parse_expr("\"hello\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"hello\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "hello"), _ => panic!("Expected StringLit"), @@ -4126,91 +4156,91 @@ mod tests { #[test] fn test_char_escape_newline() { - let (expr, _types) = parse_expr("'\\n'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\n'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); } #[test] fn test_char_escape_tab() { - let (expr, _types) = parse_expr("'\\t'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\t'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\t'))); } #[test] fn test_char_escape_carriage_return() { - let (expr, _types) = parse_expr("'\\r'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\r'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\r'))); } #[test] fn test_char_escape_backslash() { - let (expr, _types) = parse_expr("'\\\\'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\\\'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\\'))); } #[test] fn test_char_escape_single_quote() { - let (expr, _types) = parse_expr("'\\''").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\''").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\''))); } #[test] fn test_char_escape_double_quote() { - let (expr, _types) = parse_expr("'\\\"'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\\"'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('"'))); } #[test] fn test_char_escape_bell() { - let (expr, _types) = parse_expr("'\\a'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\a'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\x07'))); } #[test] fn test_char_escape_backspace() { - let (expr, _types) = parse_expr("'\\b'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\b'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\x08'))); } #[test] fn test_char_escape_formfeed() { - let (expr, _types) = parse_expr("'\\f'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\f'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\x0C'))); } #[test] fn test_char_escape_vertical_tab() { - let (expr, _types) = parse_expr("'\\v'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\v'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\x0B'))); } #[test] fn test_char_escape_null() { - let (expr, _types) = parse_expr("'\\0'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\0'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\0'))); } #[test] fn test_char_escape_hex() { - let (expr, _types) = parse_expr("'\\x41'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\x41'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('A'))); } #[test] fn test_char_escape_hex_lowercase() { - let (expr, _types) = parse_expr("'\\x0a'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\x0a'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); } #[test] fn test_char_escape_octal() { - let (expr, _types) = parse_expr("'\\101'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\101'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('A'))); // octal 101 = 65 = 'A' } #[test] fn test_char_escape_octal_012() { - let (expr, _types) = parse_expr("'\\012'").unwrap(); + let (expr, _types, _strings) = parse_expr("'\\012'").unwrap(); assert!(matches!(expr.kind, ExprKind::CharLit('\n'))); // octal 012 = 10 = '\n' } @@ -4220,7 +4250,7 @@ mod tests { #[test] fn test_string_escape_newline() { - let (expr, _types) = parse_expr("\"hello\\nworld\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"hello\\nworld\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "hello\nworld"), _ => panic!("Expected StringLit"), @@ -4229,7 +4259,7 @@ mod tests { #[test] fn test_string_escape_tab() { - let (expr, _types) = parse_expr("\"hello\\tworld\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"hello\\tworld\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "hello\tworld"), _ => panic!("Expected StringLit"), @@ -4238,7 +4268,7 @@ mod tests { #[test] fn test_string_escape_carriage_return() { - let (expr, _types) = parse_expr("\"hello\\rworld\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"hello\\rworld\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "hello\rworld"), _ => panic!("Expected StringLit"), @@ -4247,7 +4277,7 @@ mod tests { #[test] fn test_string_escape_backslash() { - let (expr, _types) = parse_expr("\"hello\\\\world\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"hello\\\\world\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "hello\\world"), _ => panic!("Expected StringLit"), @@ -4256,7 +4286,7 @@ mod tests { #[test] fn test_string_escape_double_quote() { - let (expr, _types) = parse_expr("\"hello\\\"world\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"hello\\\"world\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "hello\"world"), _ => panic!("Expected StringLit"), @@ -4265,7 +4295,7 @@ mod tests { #[test] fn test_string_escape_bell() { - let (expr, _types) = parse_expr("\"\\a\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\\a\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "\x07"), _ => panic!("Expected StringLit"), @@ -4274,7 +4304,7 @@ mod tests { #[test] fn test_string_escape_backspace() { - let (expr, _types) = parse_expr("\"\\b\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\\b\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "\x08"), _ => panic!("Expected StringLit"), @@ -4283,7 +4313,7 @@ mod tests { #[test] fn test_string_escape_formfeed() { - let (expr, _types) = parse_expr("\"\\f\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\\f\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "\x0C"), _ => panic!("Expected StringLit"), @@ -4292,7 +4322,7 @@ mod tests { #[test] fn test_string_escape_vertical_tab() { - let (expr, _types) = parse_expr("\"\\v\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\\v\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "\x0B"), _ => panic!("Expected StringLit"), @@ -4301,7 +4331,7 @@ mod tests { #[test] fn test_string_escape_null() { - let (expr, _types) = parse_expr("\"hello\\0world\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"hello\\0world\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => { assert_eq!(s.len(), 11); @@ -4313,7 +4343,7 @@ mod tests { #[test] fn test_string_escape_hex() { - let (expr, _types) = parse_expr("\"\\x41\\x42\\x43\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\\x41\\x42\\x43\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "ABC"), _ => panic!("Expected StringLit"), @@ -4322,7 +4352,7 @@ mod tests { #[test] fn test_string_escape_octal() { - let (expr, _types) = parse_expr("\"\\101\\102\\103\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\\101\\102\\103\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "ABC"), // octal 101,102,103 = A,B,C _ => panic!("Expected StringLit"), @@ -4331,7 +4361,7 @@ mod tests { #[test] fn test_string_escape_octal_012() { - let (expr, _types) = parse_expr("\"line1\\012line2\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"line1\\012line2\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "line1\nline2"), // octal 012 = newline _ => panic!("Expected StringLit"), @@ -4340,7 +4370,7 @@ mod tests { #[test] fn test_string_multiple_escapes() { - let (expr, _types) = parse_expr("\"\\t\\n\\r\\\\\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\\t\\n\\r\\\\\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "\t\n\r\\"), _ => panic!("Expected StringLit"), @@ -4349,7 +4379,7 @@ mod tests { #[test] fn test_string_mixed_content() { - let (expr, _types) = parse_expr("\"Name:\\tJohn\\nAge:\\t30\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"Name:\\tJohn\\nAge:\\t30\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, "Name:\tJohn\nAge:\t30"), _ => panic!("Expected StringLit"), @@ -4358,7 +4388,7 @@ mod tests { #[test] fn test_string_empty() { - let (expr, _types) = parse_expr("\"\"").unwrap(); + let (expr, _types, _strings) = parse_expr("\"\"").unwrap(); match expr.kind { ExprKind::StringLit(s) => assert_eq!(s, ""), _ => panic!("Expected StringLit"), @@ -4368,60 +4398,60 @@ mod tests { #[test] fn test_integer_literal_suffixes() { // Plain int - let (expr, types) = parse_expr("42").unwrap(); + let (expr, types, _strings) = parse_expr("42").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::Int); assert!(!types.is_unsigned(expr.typ.unwrap())); // Unsigned int - let (expr, types) = parse_expr("42U").unwrap(); + let (expr, types, _strings) = parse_expr("42U").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::Int); assert!(types.is_unsigned(expr.typ.unwrap())); // Long - let (expr, types) = parse_expr("42L").unwrap(); + let (expr, types, _strings) = parse_expr("42L").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::Long); assert!(!types.is_unsigned(expr.typ.unwrap())); // Unsigned long (UL) - let (expr, types) = parse_expr("42UL").unwrap(); + let (expr, types, _strings) = parse_expr("42UL").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::Long); assert!(types.is_unsigned(expr.typ.unwrap())); // Unsigned long (LU) - let (expr, types) = parse_expr("42LU").unwrap(); + let (expr, types, _strings) = parse_expr("42LU").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::Long); assert!(types.is_unsigned(expr.typ.unwrap())); // Long long - let (expr, types) = parse_expr("42LL").unwrap(); + let (expr, types, _strings) = parse_expr("42LL").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::LongLong); assert!(!types.is_unsigned(expr.typ.unwrap())); // Unsigned long long (ULL) - let (expr, types) = parse_expr("42ULL").unwrap(); + let (expr, types, _strings) = parse_expr("42ULL").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::LongLong); assert!(types.is_unsigned(expr.typ.unwrap())); // Unsigned long long (LLU) - let (expr, types) = parse_expr("42LLU").unwrap(); + let (expr, types, _strings) = parse_expr("42LLU").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::LongLong); assert!(types.is_unsigned(expr.typ.unwrap())); // Hex with suffix - let (expr, types) = parse_expr("0xFFLL").unwrap(); + let (expr, types, _strings) = parse_expr("0xFFLL").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::LongLong); assert!(!types.is_unsigned(expr.typ.unwrap())); - let (expr, types) = parse_expr("0xFFULL").unwrap(); + let (expr, types, _strings) = parse_expr("0xFFULL").unwrap(); assert_eq!(types.kind(expr.typ.unwrap()), TypeKind::LongLong); assert!(types.is_unsigned(expr.typ.unwrap())); } #[test] fn test_identifier() { - let (expr, _types) = parse_expr("foo").unwrap(); + let (expr, _types, strings) = parse_expr("foo").unwrap(); match expr.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "foo"), _ => panic!("Expected Ident"), } } @@ -4432,7 +4462,7 @@ mod tests { #[test] fn test_addition() { - let (expr, _types) = parse_expr("1 + 2").unwrap(); + let (expr, _types, _strings) = parse_expr("1 + 2").unwrap(); match expr.kind { ExprKind::Binary { op, left, right } => { assert_eq!(op, BinaryOp::Add); @@ -4445,7 +4475,7 @@ mod tests { #[test] fn test_subtraction() { - let (expr, _types) = parse_expr("5 - 3").unwrap(); + let (expr, _types, _strings) = parse_expr("5 - 3").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Sub), _ => panic!("Expected Binary"), @@ -4454,7 +4484,7 @@ mod tests { #[test] fn test_multiplication() { - let (expr, _types) = parse_expr("2 * 3").unwrap(); + let (expr, _types, _strings) = parse_expr("2 * 3").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Mul), _ => panic!("Expected Binary"), @@ -4463,7 +4493,7 @@ mod tests { #[test] fn test_division() { - let (expr, _types) = parse_expr("10 / 2").unwrap(); + let (expr, _types, _strings) = parse_expr("10 / 2").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Div), _ => panic!("Expected Binary"), @@ -4472,7 +4502,7 @@ mod tests { #[test] fn test_modulo() { - let (expr, _types) = parse_expr("10 % 3").unwrap(); + let (expr, _types, _strings) = parse_expr("10 % 3").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Mod), _ => panic!("Expected Binary"), @@ -4482,7 +4512,7 @@ mod tests { #[test] fn test_precedence_mul_add() { // 1 + 2 * 3 should be 1 + (2 * 3) - let (expr, _types) = parse_expr("1 + 2 * 3").unwrap(); + let (expr, _types, _strings) = parse_expr("1 + 2 * 3").unwrap(); match expr.kind { ExprKind::Binary { op, left, right } => { assert_eq!(op, BinaryOp::Add); @@ -4499,7 +4529,7 @@ mod tests { #[test] fn test_left_associativity() { // 1 - 2 - 3 should be (1 - 2) - 3 - let (expr, _types) = parse_expr("1 - 2 - 3").unwrap(); + let (expr, _types, _strings) = parse_expr("1 - 2 - 3").unwrap(); match expr.kind { ExprKind::Binary { op, left, right } => { assert_eq!(op, BinaryOp::Sub); @@ -4519,25 +4549,25 @@ mod tests { #[test] fn test_comparison_ops() { - let (expr, _types) = parse_expr("a < b").unwrap(); + let (expr, _types, _strings) = parse_expr("a < b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Lt), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a > b").unwrap(); + let (expr, _types, _strings) = parse_expr("a > b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Gt), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a <= b").unwrap(); + let (expr, _types, _strings) = parse_expr("a <= b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Le), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a >= b").unwrap(); + let (expr, _types, _strings) = parse_expr("a >= b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Ge), _ => panic!("Expected Binary"), @@ -4546,13 +4576,13 @@ mod tests { #[test] fn test_equality_ops() { - let (expr, _types) = parse_expr("a == b").unwrap(); + let (expr, _types, _strings) = parse_expr("a == b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Eq), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a != b").unwrap(); + let (expr, _types, _strings) = parse_expr("a != b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Ne), _ => panic!("Expected Binary"), @@ -4561,13 +4591,13 @@ mod tests { #[test] fn test_logical_ops() { - let (expr, _types) = parse_expr("a && b").unwrap(); + let (expr, _types, _strings) = parse_expr("a && b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::LogAnd), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a || b").unwrap(); + let (expr, _types, _strings) = parse_expr("a || b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::LogOr), _ => panic!("Expected Binary"), @@ -4576,19 +4606,19 @@ mod tests { #[test] fn test_bitwise_ops() { - let (expr, _types) = parse_expr("a & b").unwrap(); + let (expr, _types, _strings) = parse_expr("a & b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::BitAnd), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a | b").unwrap(); + let (expr, _types, _strings) = parse_expr("a | b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::BitOr), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a ^ b").unwrap(); + let (expr, _types, _strings) = parse_expr("a ^ b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::BitXor), _ => panic!("Expected Binary"), @@ -4597,13 +4627,13 @@ mod tests { #[test] fn test_shift_ops() { - let (expr, _types) = parse_expr("a << b").unwrap(); + let (expr, _types, _strings) = parse_expr("a << b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Shl), _ => panic!("Expected Binary"), } - let (expr, _types) = parse_expr("a >> b").unwrap(); + let (expr, _types, _strings) = parse_expr("a >> b").unwrap(); match expr.kind { ExprKind::Binary { op, .. } => assert_eq!(op, BinaryOp::Shr), _ => panic!("Expected Binary"), @@ -4616,12 +4646,12 @@ mod tests { #[test] fn test_unary_neg() { - let (expr, _types) = parse_expr("-x").unwrap(); + let (expr, _types, strings) = parse_expr("-x").unwrap(); match expr.kind { ExprKind::Unary { op, operand } => { assert_eq!(op, UnaryOp::Neg); match operand.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "x"), _ => panic!("Expected Ident"), } } @@ -4631,7 +4661,7 @@ mod tests { #[test] fn test_unary_not() { - let (expr, _types) = parse_expr("!x").unwrap(); + let (expr, _types, _strings) = parse_expr("!x").unwrap(); match expr.kind { ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::Not), _ => panic!("Expected Unary"), @@ -4640,7 +4670,7 @@ mod tests { #[test] fn test_unary_bitnot() { - let (expr, _types) = parse_expr("~x").unwrap(); + let (expr, _types, _strings) = parse_expr("~x").unwrap(); match expr.kind { ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::BitNot), _ => panic!("Expected Unary"), @@ -4649,7 +4679,7 @@ mod tests { #[test] fn test_unary_addr() { - let (expr, _types) = parse_expr("&x").unwrap(); + let (expr, _types, _strings) = parse_expr("&x").unwrap(); match expr.kind { ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::AddrOf), _ => panic!("Expected Unary"), @@ -4658,7 +4688,7 @@ mod tests { #[test] fn test_unary_deref() { - let (expr, _types) = parse_expr("*p").unwrap(); + let (expr, _types, _strings) = parse_expr("*p").unwrap(); match expr.kind { ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::Deref), _ => panic!("Expected Unary"), @@ -4667,7 +4697,7 @@ mod tests { #[test] fn test_pre_increment() { - let (expr, _types) = parse_expr("++x").unwrap(); + let (expr, _types, _strings) = parse_expr("++x").unwrap(); match expr.kind { ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::PreInc), _ => panic!("Expected Unary"), @@ -4676,7 +4706,7 @@ mod tests { #[test] fn test_pre_decrement() { - let (expr, _types) = parse_expr("--x").unwrap(); + let (expr, _types, _strings) = parse_expr("--x").unwrap(); match expr.kind { ExprKind::Unary { op, .. } => assert_eq!(op, UnaryOp::PreDec), _ => panic!("Expected Unary"), @@ -4689,23 +4719,23 @@ mod tests { #[test] fn test_post_increment() { - let (expr, _types) = parse_expr("x++").unwrap(); + let (expr, _types, _strings) = parse_expr("x++").unwrap(); assert!(matches!(expr.kind, ExprKind::PostInc(_))); } #[test] fn test_post_decrement() { - let (expr, _types) = parse_expr("x--").unwrap(); + let (expr, _types, _strings) = parse_expr("x--").unwrap(); assert!(matches!(expr.kind, ExprKind::PostDec(_))); } #[test] fn test_array_subscript() { - let (expr, _types) = parse_expr("arr[5]").unwrap(); + let (expr, _types, strings) = parse_expr("arr[5]").unwrap(); match expr.kind { ExprKind::Index { array, index } => { match array.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "arr"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "arr"), _ => panic!("Expected Ident"), } assert!(matches!(index.kind, ExprKind::IntLit(5))); @@ -4716,14 +4746,14 @@ mod tests { #[test] fn test_member_access() { - let (expr, _types) = parse_expr("obj.field").unwrap(); + let (expr, _types, strings) = parse_expr("obj.field").unwrap(); match expr.kind { ExprKind::Member { expr, member } => { match expr.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "obj"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "obj"), _ => panic!("Expected Ident"), } - assert_eq!(member, "field"); + check_name(&strings, member, "field"); } _ => panic!("Expected Member"), } @@ -4731,14 +4761,14 @@ mod tests { #[test] fn test_arrow_access() { - let (expr, _types) = parse_expr("ptr->field").unwrap(); + let (expr, _types, strings) = parse_expr("ptr->field").unwrap(); match expr.kind { ExprKind::Arrow { expr, member } => { match expr.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "ptr"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "ptr"), _ => panic!("Expected Ident"), } - assert_eq!(member, "field"); + check_name(&strings, member, "field"); } _ => panic!("Expected Arrow"), } @@ -4746,11 +4776,11 @@ mod tests { #[test] fn test_function_call_no_args() { - let (expr, _types) = parse_expr("foo()").unwrap(); + let (expr, _types, strings) = parse_expr("foo()").unwrap(); match expr.kind { ExprKind::Call { func, args } => { match func.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "foo"), _ => panic!("Expected Ident"), } assert!(args.is_empty()); @@ -4761,11 +4791,11 @@ mod tests { #[test] fn test_function_call_with_args() { - let (expr, _types) = parse_expr("foo(1, 2, 3)").unwrap(); + let (expr, _types, strings) = parse_expr("foo(1, 2, 3)").unwrap(); match expr.kind { ExprKind::Call { func, args } => { match func.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "foo"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "foo"), _ => panic!("Expected Ident"), } assert_eq!(args.len(), 3); @@ -4777,16 +4807,16 @@ mod tests { #[test] fn test_chained_postfix() { // obj.arr[0]->next - let (expr, _types) = parse_expr("obj.arr[0]").unwrap(); + let (expr, _types, strings) = parse_expr("obj.arr[0]").unwrap(); match expr.kind { ExprKind::Index { array, index } => { match array.kind { ExprKind::Member { expr, member } => { match expr.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "obj"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "obj"), _ => panic!("Expected Ident"), } - assert_eq!(member, "arr"); + check_name(&strings, member, "arr"); } _ => panic!("Expected Member"), } @@ -4802,12 +4832,12 @@ mod tests { #[test] fn test_simple_assignment() { - let (expr, _types) = parse_expr("x = 5").unwrap(); + let (expr, _types, strings) = parse_expr("x = 5").unwrap(); match expr.kind { ExprKind::Assign { op, target, value } => { assert_eq!(op, AssignOp::Assign); match target.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "x"), _ => panic!("Expected Ident"), } assert!(matches!(value.kind, ExprKind::IntLit(5))); @@ -4818,19 +4848,19 @@ mod tests { #[test] fn test_compound_assignments() { - let (expr, _types) = parse_expr("x += 5").unwrap(); + let (expr, _types, _strings) = parse_expr("x += 5").unwrap(); match expr.kind { ExprKind::Assign { op, .. } => assert_eq!(op, AssignOp::AddAssign), _ => panic!("Expected Assign"), } - let (expr, _types) = parse_expr("x -= 5").unwrap(); + let (expr, _types, _strings) = parse_expr("x -= 5").unwrap(); match expr.kind { ExprKind::Assign { op, .. } => assert_eq!(op, AssignOp::SubAssign), _ => panic!("Expected Assign"), } - let (expr, _types) = parse_expr("x *= 5").unwrap(); + let (expr, _types, _strings) = parse_expr("x *= 5").unwrap(); match expr.kind { ExprKind::Assign { op, .. } => assert_eq!(op, AssignOp::MulAssign), _ => panic!("Expected Assign"), @@ -4840,16 +4870,16 @@ mod tests { #[test] fn test_assignment_right_associativity() { // a = b = c should be a = (b = c) - let (expr, _types) = parse_expr("a = b = c").unwrap(); + let (expr, _types, strings) = parse_expr("a = b = c").unwrap(); match expr.kind { ExprKind::Assign { target, value, .. } => { match target.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "a"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "a"), _ => panic!("Expected Ident"), } match value.kind { ExprKind::Assign { target, .. } => match target.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "b"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "b"), _ => panic!("Expected Ident"), }, _ => panic!("Expected nested Assign"), @@ -4865,7 +4895,7 @@ mod tests { #[test] fn test_ternary() { - let (expr, _types) = parse_expr("a ? b : c").unwrap(); + let (expr, _types, strings) = parse_expr("a ? b : c").unwrap(); match expr.kind { ExprKind::Conditional { cond, @@ -4873,15 +4903,15 @@ mod tests { else_expr, } => { match cond.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "a"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "a"), _ => panic!("Expected Ident"), } match then_expr.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "b"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "b"), _ => panic!("Expected Ident"), } match else_expr.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "c"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "c"), _ => panic!("Expected Ident"), } } @@ -4892,7 +4922,7 @@ mod tests { #[test] fn test_nested_ternary() { // a ? b : c ? d : e should be a ? b : (c ? d : e) - let (expr, _types) = parse_expr("a ? b : c ? d : e").unwrap(); + let (expr, _types, _strings) = parse_expr("a ? b : c ? d : e").unwrap(); match expr.kind { ExprKind::Conditional { else_expr, .. } => { assert!(matches!(else_expr.kind, ExprKind::Conditional { .. })); @@ -4907,7 +4937,7 @@ mod tests { #[test] fn test_comma_expr() { - let (expr, _types) = parse_expr("a, b, c").unwrap(); + let (expr, _types, _strings) = parse_expr("a, b, c").unwrap(); match expr.kind { ExprKind::Comma(exprs) => assert_eq!(exprs.len(), 3), _ => panic!("Expected Comma"), @@ -4920,13 +4950,13 @@ mod tests { #[test] fn test_sizeof_expr() { - let (expr, _types) = parse_expr("sizeof x").unwrap(); + let (expr, _types, _strings) = parse_expr("sizeof x").unwrap(); assert!(matches!(expr.kind, ExprKind::SizeofExpr(_))); } #[test] fn test_sizeof_type() { - let (expr, types) = parse_expr("sizeof(int)").unwrap(); + let (expr, types, _strings) = parse_expr("sizeof(int)").unwrap(); match expr.kind { ExprKind::SizeofType(typ) => assert_eq!(types.kind(typ), TypeKind::Int), _ => panic!("Expected SizeofType"), @@ -4936,7 +4966,7 @@ mod tests { #[test] fn test_sizeof_paren_expr() { // sizeof(x) where x is not a type - let (expr, _types) = parse_expr("sizeof(x)").unwrap(); + let (expr, _types, _strings) = parse_expr("sizeof(x)").unwrap(); assert!(matches!(expr.kind, ExprKind::SizeofExpr(_))); } @@ -4946,12 +4976,12 @@ mod tests { #[test] fn test_cast() { - let (expr, types) = parse_expr("(int)x").unwrap(); + let (expr, types, strings) = parse_expr("(int)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, expr } => { assert_eq!(types.kind(cast_type), TypeKind::Int); match expr.kind { - ExprKind::Ident { name, .. } => assert_eq!(name, "x"), + ExprKind::Ident { name, .. } => check_name(&strings, name, "x"), _ => panic!("Expected Ident"), } } @@ -4961,7 +4991,7 @@ mod tests { #[test] fn test_cast_unsigned_char() { - let (expr, types) = parse_expr("(unsigned char)x").unwrap(); + let (expr, types, _strings) = parse_expr("(unsigned char)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Char); @@ -4976,7 +5006,7 @@ mod tests { #[test] fn test_cast_signed_int() { - let (expr, types) = parse_expr("(signed int)x").unwrap(); + let (expr, types, _strings) = parse_expr("(signed int)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Int); @@ -4991,7 +5021,7 @@ mod tests { #[test] fn test_cast_unsigned_long() { - let (expr, types) = parse_expr("(unsigned long)x").unwrap(); + let (expr, types, _strings) = parse_expr("(unsigned long)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Long); @@ -5006,7 +5036,7 @@ mod tests { #[test] fn test_cast_long_long() { - let (expr, types) = parse_expr("(long long)x").unwrap(); + let (expr, types, _strings) = parse_expr("(long long)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::LongLong); @@ -5017,7 +5047,7 @@ mod tests { #[test] fn test_cast_unsigned_long_long() { - let (expr, types) = parse_expr("(unsigned long long)x").unwrap(); + let (expr, types, _strings) = parse_expr("(unsigned long long)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::LongLong); @@ -5032,7 +5062,7 @@ mod tests { #[test] fn test_cast_pointer() { - let (expr, types) = parse_expr("(int*)x").unwrap(); + let (expr, types, _strings) = parse_expr("(int*)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Pointer); @@ -5045,7 +5075,7 @@ mod tests { #[test] fn test_cast_void_pointer() { - let (expr, types) = parse_expr("(void*)x").unwrap(); + let (expr, types, _strings) = parse_expr("(void*)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Pointer); @@ -5058,7 +5088,7 @@ mod tests { #[test] fn test_cast_unsigned_char_pointer() { - let (expr, types) = parse_expr("(unsigned char*)x").unwrap(); + let (expr, types, _strings) = parse_expr("(unsigned char*)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Pointer); @@ -5072,7 +5102,7 @@ mod tests { #[test] fn test_cast_const_int() { - let (expr, types) = parse_expr("(const int)x").unwrap(); + let (expr, types, _strings) = parse_expr("(const int)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Int); @@ -5087,7 +5117,7 @@ mod tests { #[test] fn test_cast_double_pointer() { - let (expr, types) = parse_expr("(int**)x").unwrap(); + let (expr, types, _strings) = parse_expr("(int**)x").unwrap(); match expr.kind { ExprKind::Cast { cast_type, .. } => { assert_eq!(types.kind(cast_type), TypeKind::Pointer); @@ -5102,7 +5132,7 @@ mod tests { #[test] fn test_sizeof_compound_type() { - let (expr, types) = parse_expr("sizeof(unsigned long long)").unwrap(); + let (expr, types, _strings) = parse_expr("sizeof(unsigned long long)").unwrap(); match expr.kind { ExprKind::SizeofType(typ) => { assert_eq!(types.kind(typ), TypeKind::LongLong); @@ -5114,7 +5144,7 @@ mod tests { #[test] fn test_sizeof_pointer_type() { - let (expr, types) = parse_expr("sizeof(int*)").unwrap(); + let (expr, types, _strings) = parse_expr("sizeof(int*)").unwrap(); match expr.kind { ExprKind::SizeofType(typ) => { assert_eq!(types.kind(typ), TypeKind::Pointer); @@ -5129,7 +5159,7 @@ mod tests { #[test] fn test_parentheses() { - let (expr, _types) = parse_expr("(1 + 2) * 3").unwrap(); + let (expr, _types, _strings) = parse_expr("(1 + 2) * 3").unwrap(); match expr.kind { ExprKind::Binary { op, left, .. } => { assert_eq!(op, BinaryOp::Mul); @@ -5149,14 +5179,14 @@ mod tests { #[test] fn test_complex_expr() { // x = a + b * c - d / e - let (expr, _types) = parse_expr("x = a + b * c - d / e").unwrap(); + let (expr, _types, _strings) = parse_expr("x = a + b * c - d / e").unwrap(); assert!(matches!(expr.kind, ExprKind::Assign { .. })); } #[test] fn test_function_call_complex() { // foo(a + b, c * d) - let (expr, _types) = parse_expr("foo(a + b, c * d)").unwrap(); + let (expr, _types, _strings) = parse_expr("foo(a + b, c * d)").unwrap(); match expr.kind { ExprKind::Call { args, .. } => { assert_eq!(args.len(), 2); @@ -5182,7 +5212,7 @@ mod tests { #[test] fn test_pointer_arithmetic() { // *p++ - let (expr, _types) = parse_expr("*p++").unwrap(); + let (expr, _types, _strings) = parse_expr("*p++").unwrap(); match expr.kind { ExprKind::Unary { op: UnaryOp::Deref, @@ -5198,32 +5228,34 @@ mod tests { // Statement tests // ======================================================================== - fn parse_stmt(input: &str) -> ParseResult { - let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + fn parse_stmt(input: &str) -> ParseResult<(Stmt, StringTable)> { + let mut strings = StringTable::new(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0, &mut strings); let tokens = tokenizer.tokenize(); - let idents = tokenizer.ident_table(); + drop(tokenizer); let mut symbols = SymbolTable::new(); let mut types = TypeTable::new(); - let mut parser = Parser::new(&tokens, idents, &mut symbols, &mut types); + let mut parser = Parser::new(&tokens, &strings, &mut symbols, &mut types); parser.skip_stream_tokens(); - parser.parse_statement() + let stmt = parser.parse_statement()?; + Ok((stmt, strings)) } #[test] fn test_empty_stmt() { - let stmt = parse_stmt(";").unwrap(); + let (stmt, _strings) = parse_stmt(";").unwrap(); assert!(matches!(stmt, Stmt::Empty)); } #[test] fn test_expr_stmt() { - let stmt = parse_stmt("x = 5;").unwrap(); + let (stmt, _strings) = parse_stmt("x = 5;").unwrap(); assert!(matches!(stmt, Stmt::Expr(_))); } #[test] fn test_if_stmt() { - let stmt = parse_stmt("if (x) y = 1;").unwrap(); + let (stmt, _strings) = parse_stmt("if (x) y = 1;").unwrap(); match stmt { Stmt::If { cond, @@ -5240,7 +5272,7 @@ mod tests { #[test] fn test_if_else_stmt() { - let stmt = parse_stmt("if (x) y = 1; else y = 2;").unwrap(); + let (stmt, _strings) = parse_stmt("if (x) y = 1; else y = 2;").unwrap(); match stmt { Stmt::If { else_stmt, .. } => { assert!(else_stmt.is_some()); @@ -5251,7 +5283,7 @@ mod tests { #[test] fn test_while_stmt() { - let stmt = parse_stmt("while (x) x--;").unwrap(); + let (stmt, _strings) = parse_stmt("while (x) x--;").unwrap(); match stmt { Stmt::While { cond, body } => { assert!(matches!(cond.kind, ExprKind::Ident { .. })); @@ -5263,7 +5295,7 @@ mod tests { #[test] fn test_do_while_stmt() { - let stmt = parse_stmt("do x++; while (x < 10);").unwrap(); + let (stmt, _strings) = parse_stmt("do x++; while (x < 10);").unwrap(); match stmt { Stmt::DoWhile { body, cond } => { assert!(matches!(*body, Stmt::Expr(_))); @@ -5281,7 +5313,7 @@ mod tests { #[test] fn test_for_stmt_basic() { - let stmt = parse_stmt("for (i = 0; i < 10; i++) x++;").unwrap(); + let (stmt, _strings) = parse_stmt("for (i = 0; i < 10; i++) x++;").unwrap(); match stmt { Stmt::For { init, @@ -5300,7 +5332,7 @@ mod tests { #[test] fn test_for_stmt_with_decl() { - let stmt = parse_stmt("for (int i = 0; i < 10; i++) x++;").unwrap(); + let (stmt, _strings) = parse_stmt("for (int i = 0; i < 10; i++) x++;").unwrap(); match stmt { Stmt::For { init, .. } => { assert!(matches!(init, Some(ForInit::Declaration(_)))); @@ -5311,7 +5343,7 @@ mod tests { #[test] fn test_for_stmt_empty() { - let stmt = parse_stmt("for (;;) ;").unwrap(); + let (stmt, _strings) = parse_stmt("for (;;) ;").unwrap(); match stmt { Stmt::For { init, @@ -5330,7 +5362,7 @@ mod tests { #[test] fn test_return_void() { - let stmt = parse_stmt("return;").unwrap(); + let (stmt, _strings) = parse_stmt("return;").unwrap(); match stmt { Stmt::Return(None) => {} _ => panic!("Expected Return(None)"), @@ -5339,7 +5371,7 @@ mod tests { #[test] fn test_return_value() { - let stmt = parse_stmt("return 42;").unwrap(); + let (stmt, _strings) = parse_stmt("return 42;").unwrap(); match stmt { Stmt::Return(Some(ref e)) => { assert!(matches!(e.kind, ExprKind::IntLit(42))); @@ -5350,31 +5382,31 @@ mod tests { #[test] fn test_break_stmt() { - let stmt = parse_stmt("break;").unwrap(); + let (stmt, _strings) = parse_stmt("break;").unwrap(); assert!(matches!(stmt, Stmt::Break)); } #[test] fn test_continue_stmt() { - let stmt = parse_stmt("continue;").unwrap(); + let (stmt, _strings) = parse_stmt("continue;").unwrap(); assert!(matches!(stmt, Stmt::Continue)); } #[test] fn test_goto_stmt() { - let stmt = parse_stmt("goto label;").unwrap(); + let (stmt, strings) = parse_stmt("goto label;").unwrap(); match stmt { - Stmt::Goto(name) => assert_eq!(name, "label"), + Stmt::Goto(name) => check_name(&strings, name, "label"), _ => panic!("Expected Goto"), } } #[test] fn test_labeled_stmt() { - let stmt = parse_stmt("label: x = 1;").unwrap(); + let (stmt, strings) = parse_stmt("label: x = 1;").unwrap(); match stmt { Stmt::Label { name, stmt } => { - assert_eq!(name, "label"); + check_name(&strings, name, "label"); assert!(matches!(*stmt, Stmt::Expr(_))); } _ => panic!("Expected Label"), @@ -5383,7 +5415,7 @@ mod tests { #[test] fn test_block_stmt() { - let stmt = parse_stmt("{ x = 1; y = 2; }").unwrap(); + let (stmt, _strings) = parse_stmt("{ x = 1; y = 2; }").unwrap(); match stmt { Stmt::Block(items) => { assert_eq!(items.len(), 2); @@ -5396,7 +5428,7 @@ mod tests { #[test] fn test_block_with_decl() { - let stmt = parse_stmt("{ int x = 1; x++; }").unwrap(); + let (stmt, _strings) = parse_stmt("{ int x = 1; x++; }").unwrap(); match stmt { Stmt::Block(items) => { assert_eq!(items.len(), 2); @@ -5411,58 +5443,59 @@ mod tests { // Declaration tests // ======================================================================== - fn parse_decl(input: &str) -> ParseResult<(Declaration, TypeTable)> { - let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + fn parse_decl(input: &str) -> ParseResult<(Declaration, TypeTable, StringTable)> { + let mut strings = StringTable::new(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0, &mut strings); let tokens = tokenizer.tokenize(); - let idents = tokenizer.ident_table(); + drop(tokenizer); let mut symbols = SymbolTable::new(); let mut types = TypeTable::new(); - let mut parser = Parser::new(&tokens, idents, &mut symbols, &mut types); + let mut parser = Parser::new(&tokens, &strings, &mut symbols, &mut types); parser.skip_stream_tokens(); let decl = parser.parse_declaration()?; - Ok((decl, types)) + Ok((decl, types, strings)) } #[test] fn test_simple_decl() { - let (decl, types) = parse_decl("int x;").unwrap(); + let (decl, types, strings) = parse_decl("int x;").unwrap(); assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "x"); + check_name(&strings, decl.declarators[0].name, "x"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Int); } #[test] fn test_decl_with_init() { - let (decl, _types) = parse_decl("int x = 5;").unwrap(); + let (decl, _types, _strings) = parse_decl("int x = 5;").unwrap(); assert_eq!(decl.declarators.len(), 1); assert!(decl.declarators[0].init.is_some()); } #[test] fn test_multiple_declarators() { - let (decl, _types) = parse_decl("int x, y, z;").unwrap(); + let (decl, _types, strings) = parse_decl("int x, y, z;").unwrap(); assert_eq!(decl.declarators.len(), 3); - assert_eq!(decl.declarators[0].name, "x"); - assert_eq!(decl.declarators[1].name, "y"); - assert_eq!(decl.declarators[2].name, "z"); + check_name(&strings, decl.declarators[0].name, "x"); + check_name(&strings, decl.declarators[1].name, "y"); + check_name(&strings, decl.declarators[2].name, "z"); } #[test] fn test_pointer_decl() { - let (decl, types) = parse_decl("int *p;").unwrap(); + let (decl, types, _strings) = parse_decl("int *p;").unwrap(); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); } #[test] fn test_array_decl() { - let (decl, types) = parse_decl("int arr[10];").unwrap(); + let (decl, types, _strings) = parse_decl("int arr[10];").unwrap(); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Array); assert_eq!(types.get(decl.declarators[0].typ).array_size, Some(10)); } #[test] fn test_const_decl() { - let (decl, types) = parse_decl("const int x = 5;").unwrap(); + let (decl, types, _strings) = parse_decl("const int x = 5;").unwrap(); assert!(types .get(decl.declarators[0].typ) .modifiers @@ -5471,7 +5504,7 @@ mod tests { #[test] fn test_unsigned_decl() { - let (decl, types) = parse_decl("unsigned int x;").unwrap(); + let (decl, types, _strings) = parse_decl("unsigned int x;").unwrap(); assert!(types .get(decl.declarators[0].typ) .modifiers @@ -5480,7 +5513,7 @@ mod tests { #[test] fn test_long_long_decl() { - let (decl, types) = parse_decl("long long x;").unwrap(); + let (decl, types, _strings) = parse_decl("long long x;").unwrap(); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::LongLong); } @@ -5488,36 +5521,38 @@ mod tests { // Function parsing tests // ======================================================================== - fn parse_func(input: &str) -> ParseResult<(FunctionDef, TypeTable)> { - let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + fn parse_func(input: &str) -> ParseResult<(FunctionDef, TypeTable, StringTable)> { + let mut strings = StringTable::new(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0, &mut strings); let tokens = tokenizer.tokenize(); - let idents = tokenizer.ident_table(); + drop(tokenizer); let mut symbols = SymbolTable::new(); let mut types = TypeTable::new(); - let mut parser = Parser::new(&tokens, idents, &mut symbols, &mut types); + let mut parser = Parser::new(&tokens, &strings, &mut symbols, &mut types); parser.skip_stream_tokens(); let func = parser.parse_function_def()?; - Ok((func, types)) + Ok((func, types, strings)) } #[test] fn test_simple_function() { - let (func, types) = parse_func("int main() { return 0; }").unwrap(); - assert_eq!(func.name, "main"); + let (func, types, strings) = parse_func("int main() { return 0; }").unwrap(); + check_name(&strings, func.name, "main"); assert_eq!(types.kind(func.return_type), TypeKind::Int); assert!(func.params.is_empty()); } #[test] fn test_function_with_params() { - let (func, _types) = parse_func("int add(int a, int b) { return a + b; }").unwrap(); - assert_eq!(func.name, "add"); + let (func, _types, strings) = + parse_func("int add(int a, int b) { return a + b; }").unwrap(); + check_name(&strings, func.name, "add"); assert_eq!(func.params.len(), 2); } #[test] fn test_void_function() { - let (func, types) = parse_func("void foo(void) { }").unwrap(); + let (func, types, _strings) = parse_func("void foo(void) { }").unwrap(); assert_eq!(types.kind(func.return_type), TypeKind::Void); assert!(func.params.is_empty()); } @@ -5525,13 +5560,14 @@ mod tests { #[test] fn test_variadic_function() { // Variadic functions are parsed but variadic info is not tracked in FunctionDef - let (func, _types) = parse_func("int printf(char *fmt, ...) { return 0; }").unwrap(); - assert_eq!(func.name, "printf"); + let (func, _types, strings) = + parse_func("int printf(char *fmt, ...) { return 0; }").unwrap(); + check_name(&strings, func.name, "printf"); } #[test] fn test_pointer_return() { - let (func, types) = parse_func("int *getptr() { return 0; }").unwrap(); + let (func, types, _strings) = parse_func("int *getptr() { return 0; }").unwrap(); assert_eq!(types.kind(func.return_type), TypeKind::Pointer); } @@ -5539,34 +5575,35 @@ mod tests { // Translation unit tests // ======================================================================== - fn parse_tu(input: &str) -> ParseResult<(TranslationUnit, TypeTable)> { - let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + fn parse_tu(input: &str) -> ParseResult<(TranslationUnit, TypeTable, StringTable)> { + let mut strings = StringTable::new(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0, &mut strings); let tokens = tokenizer.tokenize(); - let idents = tokenizer.ident_table(); + drop(tokenizer); let mut symbols = SymbolTable::new(); let mut types = TypeTable::new(); - let mut parser = Parser::new(&tokens, idents, &mut symbols, &mut types); + let mut parser = Parser::new(&tokens, &strings, &mut symbols, &mut types); let tu = parser.parse_translation_unit()?; - Ok((tu, types)) + Ok((tu, types, strings)) } #[test] fn test_simple_program() { - let (tu, _types) = parse_tu("int main() { return 0; }").unwrap(); + let (tu, _types, _strings) = parse_tu("int main() { return 0; }").unwrap(); assert_eq!(tu.items.len(), 1); assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); } #[test] fn test_global_var() { - let (tu, _types) = parse_tu("int x = 5;").unwrap(); + let (tu, _types, _strings) = parse_tu("int x = 5;").unwrap(); assert_eq!(tu.items.len(), 1); assert!(matches!(tu.items[0], ExternalDecl::Declaration(_))); } #[test] fn test_multiple_items() { - let (tu, _types) = parse_tu("int x; int main() { return x; }").unwrap(); + let (tu, _types, _strings) = parse_tu("int x; int main() { return x; }").unwrap(); assert_eq!(tu.items.len(), 2); assert!(matches!(tu.items[0], ExternalDecl::Declaration(_))); assert!(matches!(tu.items[1], ExternalDecl::FunctionDef(_))); @@ -5574,11 +5611,11 @@ mod tests { #[test] fn test_function_declaration() { - let (tu, types) = parse_tu("int foo(int x);").unwrap(); + let (tu, types, strings) = parse_tu("int foo(int x);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "foo"); + check_name(&strings, decl.declarators[0].name, "foo"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); } _ => panic!("Expected Declaration"), @@ -5588,7 +5625,7 @@ mod tests { #[test] fn test_struct_only_declaration() { // Struct definition without a variable declarator - let (tu, _types) = parse_tu("struct point { int x; int y; };").unwrap(); + let (tu, _types, _strings) = parse_tu("struct point { int x; int y; };").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { @@ -5602,12 +5639,12 @@ mod tests { #[test] fn test_struct_with_variable_declaration() { // Struct definition with a variable declarator - let (tu, types) = parse_tu("struct point { int x; int y; } p;").unwrap(); + let (tu, types, strings) = parse_tu("struct point { int x; int y; } p;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "p"); + check_name(&strings, decl.declarators[0].name, "p"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Struct); } _ => panic!("Expected Declaration"), @@ -5621,12 +5658,12 @@ mod tests { #[test] fn test_typedef_basic() { // Basic typedef declaration - let (tu, types) = parse_tu("typedef int myint;").unwrap(); + let (tu, types, strings) = parse_tu("typedef int myint;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "myint"); + check_name(&strings, decl.declarators[0].name, "myint"); // The type includes the TYPEDEF modifier assert!(types .get(decl.declarators[0].typ) @@ -5640,13 +5677,13 @@ mod tests { #[test] fn test_typedef_usage() { // Typedef declaration followed by usage - let (tu, types) = parse_tu("typedef int myint; myint x;").unwrap(); + let (tu, types, strings) = parse_tu("typedef int myint; myint x;").unwrap(); assert_eq!(tu.items.len(), 2); // First item: typedef declaration match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "myint"); + check_name(&strings, decl.declarators[0].name, "myint"); } _ => panic!("Expected typedef Declaration"), } @@ -5654,7 +5691,7 @@ mod tests { // Second item: variable using typedef match &tu.items[1] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "x"); + check_name(&strings, decl.declarators[0].name, "x"); // The variable should have int type (resolved from typedef) assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Int); } @@ -5665,13 +5702,13 @@ mod tests { #[test] fn test_typedef_pointer() { // Typedef for pointer type - let (tu, types) = parse_tu("typedef int *intptr; intptr p;").unwrap(); + let (tu, types, strings) = parse_tu("typedef int *intptr; intptr p;").unwrap(); assert_eq!(tu.items.len(), 2); // Variable should have pointer type match &tu.items[1] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "p"); + check_name(&strings, decl.declarators[0].name, "p"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); } _ => panic!("Expected variable Declaration"), @@ -5681,13 +5718,14 @@ mod tests { #[test] fn test_typedef_struct() { // Typedef for anonymous struct - let (tu, types) = parse_tu("typedef struct { int x; int y; } Point; Point p;").unwrap(); + let (tu, types, strings) = + parse_tu("typedef struct { int x; int y; } Point; Point p;").unwrap(); assert_eq!(tu.items.len(), 2); // Variable should have struct type match &tu.items[1] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "p"); + check_name(&strings, decl.declarators[0].name, "p"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Struct); } _ => panic!("Expected variable Declaration"), @@ -5697,13 +5735,14 @@ mod tests { #[test] fn test_typedef_chained() { // Chained typedef: typedef of typedef - let (tu, types) = parse_tu("typedef int myint; typedef myint myint2; myint2 x;").unwrap(); + let (tu, types, strings) = + parse_tu("typedef int myint; typedef myint myint2; myint2 x;").unwrap(); assert_eq!(tu.items.len(), 3); // Final variable should resolve to int match &tu.items[2] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "x"); + check_name(&strings, decl.declarators[0].name, "x"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Int); } _ => panic!("Expected variable Declaration"), @@ -5713,14 +5752,14 @@ mod tests { #[test] fn test_typedef_multiple() { // Multiple typedefs in one declaration - let (tu, types) = parse_tu("typedef int INT, *INTPTR;").unwrap(); + let (tu, types, strings) = parse_tu("typedef int INT, *INTPTR;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 2); - assert_eq!(decl.declarators[0].name, "INT"); - assert_eq!(decl.declarators[1].name, "INTPTR"); + check_name(&strings, decl.declarators[0].name, "INT"); + check_name(&strings, decl.declarators[1].name, "INTPTR"); // INTPTR should be a pointer type assert_eq!(types.kind(decl.declarators[1].typ), TypeKind::Pointer); } @@ -5731,13 +5770,13 @@ mod tests { #[test] fn test_typedef_in_function() { // Typedef used in function parameter and return type - let (tu, types) = + let (tu, types, strings) = parse_tu("typedef int myint; myint add(myint a, myint b) { return a + b; }").unwrap(); assert_eq!(tu.items.len(), 2); match &tu.items[1] { ExternalDecl::FunctionDef(func) => { - assert_eq!(func.name, "add"); + check_name(&strings, func.name, "add"); // Return type should resolve to int assert_eq!(types.kind(func.return_type), TypeKind::Int); // Parameters should also resolve to int @@ -5752,13 +5791,13 @@ mod tests { #[test] fn test_typedef_local_variable() { // Typedef used as local variable type inside function body - let (tu, _types) = + let (tu, _types, strings) = parse_tu("typedef int myint; int main(void) { myint x; x = 42; return 0; }").unwrap(); assert_eq!(tu.items.len(), 2); match &tu.items[1] { ExternalDecl::FunctionDef(func) => { - assert_eq!(func.name, "main"); + check_name(&strings, func.name, "main"); // Check that the body parsed correctly match &func.body { Stmt::Block(items) => { @@ -5778,7 +5817,8 @@ mod tests { #[test] fn test_restrict_pointer_decl() { // Local variable with restrict qualifier - let (tu, _types) = parse_tu("int main(void) { int * restrict p; return 0; }").unwrap(); + let (tu, _types, _strings) = + parse_tu("int main(void) { int * restrict p; return 0; }").unwrap(); assert_eq!(tu.items.len(), 1); // Just verify it parses without error } @@ -5786,14 +5826,14 @@ mod tests { #[test] fn test_restrict_function_param() { // Function with restrict-qualified pointer parameters - let (tu, types) = + let (tu, types, strings) = parse_tu("void copy(int * restrict dest, int * restrict src) { *dest = *src; }") .unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::FunctionDef(func) => { - assert_eq!(func.name, "copy"); + check_name(&strings, func.name, "copy"); assert_eq!(func.params.len(), 2); // Both params should be restrict-qualified pointers assert_eq!(types.kind(func.params[0].typ), TypeKind::Pointer); @@ -5814,7 +5854,7 @@ mod tests { #[test] fn test_restrict_with_const() { // Pointer with both const and restrict qualifiers - let (tu, _types) = + let (tu, _types, _strings) = parse_tu("int main(void) { int * const restrict p = 0; return 0; }").unwrap(); assert_eq!(tu.items.len(), 1); // Just verify it parses without error - both qualifiers should be accepted @@ -5823,12 +5863,12 @@ mod tests { #[test] fn test_restrict_global_pointer() { // Global pointer with restrict qualifier - let (tu, types) = parse_tu("int * restrict global_ptr;").unwrap(); + let (tu, types, strings) = parse_tu("int * restrict global_ptr;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "global_ptr"); + check_name(&strings, decl.declarators[0].name, "global_ptr"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); assert!(types .get(decl.declarators[0].typ) @@ -5846,12 +5886,12 @@ mod tests { #[test] fn test_volatile_basic() { // Basic volatile variable - let (tu, types) = parse_tu("volatile int x;").unwrap(); + let (tu, types, strings) = parse_tu("volatile int x;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "x"); + check_name(&strings, decl.declarators[0].name, "x"); assert!(types .get(decl.declarators[0].typ) .modifiers @@ -5864,12 +5904,12 @@ mod tests { #[test] fn test_volatile_pointer() { // Pointer to volatile int - let (tu, types) = parse_tu("volatile int *p;").unwrap(); + let (tu, types, strings) = parse_tu("volatile int *p;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "p"); + check_name(&strings, decl.declarators[0].name, "p"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); // The base type should be volatile let base_id = types.base_type(decl.declarators[0].typ).unwrap(); @@ -5885,12 +5925,12 @@ mod tests { #[test] fn test_volatile_pointer_itself() { // Volatile pointer to int (pointer itself is volatile) - let (tu, types) = parse_tu("int * volatile p;").unwrap(); + let (tu, types, strings) = parse_tu("int * volatile p;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "p"); + check_name(&strings, decl.declarators[0].name, "p"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); // The pointer type itself should be volatile assert!(types @@ -5905,12 +5945,12 @@ mod tests { #[test] fn test_volatile_const_combined() { // Both const and volatile - let (tu, types) = parse_tu("const volatile int x;").unwrap(); + let (tu, types, strings) = parse_tu("const volatile int x;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "x"); + check_name(&strings, decl.declarators[0].name, "x"); assert!(types .get(decl.declarators[0].typ) .modifiers @@ -5927,12 +5967,12 @@ mod tests { #[test] fn test_volatile_function_param() { // Function with volatile pointer parameter - let (tu, types) = parse_tu("void foo(volatile int *p) { *p = 1; }").unwrap(); + let (tu, types, strings) = parse_tu("void foo(volatile int *p) { *p = 1; }").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::FunctionDef(func) => { - assert_eq!(func.name, "foo"); + check_name(&strings, func.name, "foo"); assert_eq!(func.params.len(), 1); // Parameter is pointer to volatile int assert_eq!(types.kind(func.params[0].typ), TypeKind::Pointer); @@ -5953,13 +5993,13 @@ mod tests { #[test] fn test_attribute_on_function_declaration() { // Attribute on function declaration - let (tu, _types) = parse_tu("void foo(void) __attribute__((noreturn));").unwrap(); + let (tu, _types, strings) = parse_tu("void foo(void) __attribute__((noreturn));").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "foo"); + check_name(&strings, decl.declarators[0].name, "foo"); } _ => panic!("Expected Declaration"), } @@ -5968,13 +6008,14 @@ mod tests { #[test] fn test_attribute_on_struct() { // Attribute between struct keyword and name (with variable) - let (tu, types) = parse_tu("struct __attribute__((packed)) foo { int x; } s;").unwrap(); + let (tu, types, strings) = + parse_tu("struct __attribute__((packed)) foo { int x; } s;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "s"); + check_name(&strings, decl.declarators[0].name, "s"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Struct); } _ => panic!("Expected Declaration"), @@ -5984,14 +6025,14 @@ mod tests { #[test] fn test_attribute_after_struct() { // Attribute after struct closing brace (with variable) - let (tu, types) = + let (tu, types, strings) = parse_tu("struct foo { int x; } __attribute__((aligned(16))) s;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "s"); + check_name(&strings, decl.declarators[0].name, "s"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Struct); } _ => panic!("Expected Declaration"), @@ -6001,7 +6042,8 @@ mod tests { #[test] fn test_attribute_on_struct_only() { // Attribute on struct-only definition (no variable) - let (tu, _types) = parse_tu("struct __attribute__((packed)) foo { int x; };").unwrap(); + let (tu, _types, _strings) = + parse_tu("struct __attribute__((packed)) foo { int x; };").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { @@ -6016,13 +6058,13 @@ mod tests { #[test] fn test_attribute_on_variable() { // Attribute on variable declaration - let (tu, _types) = parse_tu("int x __attribute__((aligned(8)));").unwrap(); + let (tu, _types, strings) = parse_tu("int x __attribute__((aligned(8)));").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "x"); + check_name(&strings, decl.declarators[0].name, "x"); } _ => panic!("Expected Declaration"), } @@ -6031,12 +6073,13 @@ mod tests { #[test] fn test_attribute_multiple() { // Multiple attributes in one list - let (tu, _types) = parse_tu("void foo(void) __attribute__((noreturn, cold));").unwrap(); + let (tu, _types, strings) = + parse_tu("void foo(void) __attribute__((noreturn, cold));").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "foo"); + check_name(&strings, decl.declarators[0].name, "foo"); } _ => panic!("Expected Declaration"), } @@ -6045,7 +6088,7 @@ mod tests { #[test] fn test_attribute_with_args() { // Attribute with multiple arguments - let (tu, _types) = parse_tu( + let (tu, _types, strings) = parse_tu( "void foo(const char *fmt, ...) __attribute__((__format__(__printf__, 1, 2)));", ) .unwrap(); @@ -6053,7 +6096,7 @@ mod tests { match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "foo"); + check_name(&strings, decl.declarators[0].name, "foo"); } _ => panic!("Expected Declaration"), } @@ -6062,13 +6105,13 @@ mod tests { #[test] fn test_attribute_before_declaration() { // Attribute before declaration - let (tu, _types) = + let (tu, _types, strings) = parse_tu("__attribute__((visibility(\"default\"))) int exported_var;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "exported_var"); + check_name(&strings, decl.declarators[0].name, "exported_var"); } _ => panic!("Expected Declaration"), } @@ -6077,12 +6120,12 @@ mod tests { #[test] fn test_attribute_underscore_variant() { // __attribute variant (single underscore pair) - let (tu, _types) = parse_tu("void foo(void) __attribute((noreturn));").unwrap(); + let (tu, _types, strings) = parse_tu("void foo(void) __attribute((noreturn));").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "foo"); + check_name(&strings, decl.declarators[0].name, "foo"); } _ => panic!("Expected Declaration"), } @@ -6099,7 +6142,7 @@ mod tests { #[test] fn test_const_assignment_parses() { // Assignment to const variable should still parse (errors are reported but parsing continues) - let (tu, _types) = + let (tu, _types, _strings) = parse_tu("int main(void) { const int x = 42; x = 10; return 0; }").unwrap(); assert_eq!(tu.items.len(), 1); // Verify we got a function definition @@ -6109,7 +6152,7 @@ mod tests { #[test] fn test_const_pointer_deref_parses() { // Assignment through pointer to const should still parse - let (tu, _types) = + let (tu, _types, _strings) = parse_tu("int main(void) { int v = 1; const int *p = &v; *p = 2; return 0; }").unwrap(); assert_eq!(tu.items.len(), 1); assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); @@ -6118,7 +6161,7 @@ mod tests { #[test] fn test_const_usage_valid() { // Valid const usage - reading const values - let (tu, _types) = + let (tu, _types, _strings) = parse_tu("int main(void) { const int x = 42; int y = x + 1; return y; }").unwrap(); assert_eq!(tu.items.len(), 1); assert!(matches!(tu.items[0], ExternalDecl::FunctionDef(_))); @@ -6127,7 +6170,7 @@ mod tests { #[test] fn test_const_pointer_types() { // Different const pointer combinations - let (tu, _types) = parse_tu( + let (tu, _types, _strings) = parse_tu( "int main(void) { int v = 1; const int *a = &v; int * const b = &v; const int * const c = &v; return 0; }", ) .unwrap(); @@ -6141,12 +6184,12 @@ mod tests { #[test] fn test_function_decl_no_params() { - let (tu, types) = parse_tu("int foo(void);").unwrap(); + let (tu, types, strings) = parse_tu("int foo(void);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "foo"); + check_name(&strings, decl.declarators[0].name, "foo"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); assert!(!types.is_variadic(decl.declarators[0].typ)); // Check return type @@ -6164,11 +6207,11 @@ mod tests { #[test] fn test_function_decl_one_param() { - let (tu, types) = parse_tu("int square(int x);").unwrap(); + let (tu, types, strings) = parse_tu("int square(int x);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "square"); + check_name(&strings, decl.declarators[0].name, "square"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); assert!(!types.is_variadic(decl.declarators[0].typ)); if let Some(params) = types.params(decl.declarators[0].typ) { @@ -6182,11 +6225,11 @@ mod tests { #[test] fn test_function_decl_multiple_params() { - let (tu, types) = parse_tu("int add(int a, int b, int c);").unwrap(); + let (tu, types, strings) = parse_tu("int add(int a, int b, int c);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "add"); + check_name(&strings, decl.declarators[0].name, "add"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); assert!(!types.is_variadic(decl.declarators[0].typ)); if let Some(params) = types.params(decl.declarators[0].typ) { @@ -6202,11 +6245,11 @@ mod tests { #[test] fn test_function_decl_void_return() { - let (tu, types) = parse_tu("void do_something(int x);").unwrap(); + let (tu, types, strings) = parse_tu("void do_something(int x);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "do_something"); + check_name(&strings, decl.declarators[0].name, "do_something"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); if let Some(base_id) = types.base_type(decl.declarators[0].typ) { assert_eq!(types.kind(base_id), TypeKind::Void); @@ -6218,11 +6261,11 @@ mod tests { #[test] fn test_function_decl_pointer_return() { - let (tu, types) = parse_tu("char *get_string(void);").unwrap(); + let (tu, types, strings) = parse_tu("char *get_string(void);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "get_string"); + check_name(&strings, decl.declarators[0].name, "get_string"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); if let Some(base_id) = types.base_type(decl.declarators[0].typ) { assert_eq!(types.kind(base_id), TypeKind::Pointer); @@ -6234,11 +6277,11 @@ mod tests { #[test] fn test_function_decl_pointer_param() { - let (tu, types) = parse_tu("void process(int *data, int count);").unwrap(); + let (tu, types, strings) = parse_tu("void process(int *data, int count);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "process"); + check_name(&strings, decl.declarators[0].name, "process"); if let Some(params) = types.params(decl.declarators[0].typ) { assert_eq!(params.len(), 2); assert_eq!(types.kind(params[0]), TypeKind::Pointer); @@ -6256,11 +6299,11 @@ mod tests { #[test] fn test_function_decl_variadic_printf() { // Classic printf prototype - let (tu, types) = parse_tu("int printf(const char *fmt, ...);").unwrap(); + let (tu, types, strings) = parse_tu("int printf(const char *fmt, ...);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "printf"); + check_name(&strings, decl.declarators[0].name, "printf"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); // Should be marked as variadic assert!( @@ -6279,11 +6322,12 @@ mod tests { #[test] fn test_function_decl_variadic_sprintf() { - let (tu, types) = parse_tu("int sprintf(char *buf, const char *fmt, ...);").unwrap(); + let (tu, types, strings) = + parse_tu("int sprintf(char *buf, const char *fmt, ...);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "sprintf"); + check_name(&strings, decl.declarators[0].name, "sprintf"); assert!( types.is_variadic(decl.declarators[0].typ), "sprintf should be marked as variadic" @@ -6300,11 +6344,11 @@ mod tests { #[test] fn test_function_decl_variadic_custom() { // Custom variadic function with int first param - let (tu, types) = parse_tu("int sum_ints(int count, ...);").unwrap(); + let (tu, types, strings) = parse_tu("int sum_ints(int count, ...);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "sum_ints"); + check_name(&strings, decl.declarators[0].name, "sum_ints"); assert!( types.is_variadic(decl.declarators[0].typ), "sum_ints should be marked as variadic" @@ -6321,11 +6365,12 @@ mod tests { #[test] fn test_function_decl_variadic_multiple_fixed() { // Variadic function with multiple fixed parameters - let (tu, types) = parse_tu("int variadic_func(int a, double b, char *c, ...);").unwrap(); + let (tu, types, strings) = + parse_tu("int variadic_func(int a, double b, char *c, ...);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "variadic_func"); + check_name(&strings, decl.declarators[0].name, "variadic_func"); assert!( types.is_variadic(decl.declarators[0].typ), "variadic_func should be marked as variadic" @@ -6344,11 +6389,11 @@ mod tests { #[test] fn test_function_decl_variadic_void_return() { // Variadic function with void return type - let (tu, types) = parse_tu("void log_message(const char *fmt, ...);").unwrap(); + let (tu, types, strings) = parse_tu("void log_message(const char *fmt, ...);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "log_message"); + check_name(&strings, decl.declarators[0].name, "log_message"); assert!( types.is_variadic(decl.declarators[0].typ), "log_message should be marked as variadic" @@ -6364,11 +6409,11 @@ mod tests { #[test] fn test_function_decl_not_variadic() { // Make sure non-variadic functions are NOT marked as variadic - let (tu, types) = parse_tu("int regular_func(int a, int b);").unwrap(); + let (tu, types, strings) = parse_tu("int regular_func(int a, int b);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "regular_func"); + check_name(&strings, decl.declarators[0].name, "regular_func"); assert!( !types.is_variadic(decl.declarators[0].typ), "regular_func should NOT be marked as variadic" @@ -6381,8 +6426,9 @@ mod tests { #[test] fn test_variadic_function_definition() { // Variadic function definition (not just declaration) - let (func, _types) = parse_func("int my_printf(char *fmt, ...) { return 0; }").unwrap(); - assert_eq!(func.name, "my_printf"); + let (func, _types, strings) = + parse_func("int my_printf(char *fmt, ...) { return 0; }").unwrap(); + check_name(&strings, func.name, "my_printf"); // Note: FunctionDef doesn't directly expose variadic, but the function // body can use va_start etc. This test just ensures parsing succeeds. assert_eq!(func.params.len(), 1); @@ -6391,7 +6437,7 @@ mod tests { #[test] fn test_multiple_function_decls_mixed() { // Mix of variadic and non-variadic declarations - let (tu, types) = parse_tu( + let (tu, types, strings) = parse_tu( "int printf(const char *fmt, ...); int puts(const char *s); int sprintf(char *buf, const char *fmt, ...);", ) .unwrap(); @@ -6400,7 +6446,7 @@ mod tests { // printf - variadic match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "printf"); + check_name(&strings, decl.declarators[0].name, "printf"); assert!(types.is_variadic(decl.declarators[0].typ)); } _ => panic!("Expected Declaration"), @@ -6409,7 +6455,7 @@ mod tests { // puts - not variadic match &tu.items[1] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "puts"); + check_name(&strings, decl.declarators[0].name, "puts"); assert!(!types.is_variadic(decl.declarators[0].typ)); } _ => panic!("Expected Declaration"), @@ -6418,7 +6464,7 @@ mod tests { // sprintf - variadic match &tu.items[2] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "sprintf"); + check_name(&strings, decl.declarators[0].name, "sprintf"); assert!(types.is_variadic(decl.declarators[0].typ)); } _ => panic!("Expected Declaration"), @@ -6428,12 +6474,12 @@ mod tests { #[test] fn test_function_decl_with_struct_param() { // Function declaration with struct parameter - let (tu, types) = + let (tu, types, strings) = parse_tu("struct point { int x; int y; }; void move_point(struct point p);").unwrap(); assert_eq!(tu.items.len(), 2); match &tu.items[1] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "move_point"); + check_name(&strings, decl.declarators[0].name, "move_point"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); assert!(!types.is_variadic(decl.declarators[0].typ)); } @@ -6444,11 +6490,11 @@ mod tests { #[test] fn test_function_decl_array_decay() { // Array parameters decay to pointers in function declarations - let (tu, types) = parse_tu("void process_array(int arr[]);").unwrap(); + let (tu, types, strings) = parse_tu("void process_array(int arr[]);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "process_array"); + check_name(&strings, decl.declarators[0].name, "process_array"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Function); // The array parameter should decay to pointer if let Some(params) = types.params(decl.declarators[0].typ) { @@ -6469,12 +6515,12 @@ mod tests { #[test] fn test_function_pointer_declaration() { // Basic function pointer: void (*fp)(int) - let (tu, types) = parse_tu("void (*fp)(int);").unwrap(); + let (tu, types, strings) = parse_tu("void (*fp)(int);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "fp"); + check_name(&strings, decl.declarators[0].name, "fp"); // fp should be a pointer to a function assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); // The base type of the pointer should be a function @@ -6500,11 +6546,11 @@ mod tests { #[test] fn test_function_pointer_no_params() { // Function pointer with no parameters: int (*fp)(void) - let (tu, types) = parse_tu("int (*fp)(void);").unwrap(); + let (tu, types, strings) = parse_tu("int (*fp)(void);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "fp"); + check_name(&strings, decl.declarators[0].name, "fp"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); if let Some(base_id) = types.base_type(decl.declarators[0].typ) { assert_eq!(types.kind(base_id), TypeKind::Function); @@ -6521,11 +6567,11 @@ mod tests { #[test] fn test_function_pointer_multiple_params() { // Function pointer with multiple parameters: int (*fp)(int, char, double) - let (tu, types) = parse_tu("int (*fp)(int, char, double);").unwrap(); + let (tu, types, strings) = parse_tu("int (*fp)(int, char, double);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "fp"); + check_name(&strings, decl.declarators[0].name, "fp"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); if let Some(base_id) = types.base_type(decl.declarators[0].typ) { assert_eq!(types.kind(base_id), TypeKind::Function); @@ -6544,11 +6590,11 @@ mod tests { #[test] fn test_function_pointer_returning_pointer() { // Function pointer returning a pointer: char *(*fp)(int) - let (tu, types) = parse_tu("char *(*fp)(int);").unwrap(); + let (tu, types, strings) = parse_tu("char *(*fp)(int);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "fp"); + check_name(&strings, decl.declarators[0].name, "fp"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); if let Some(base_id) = types.base_type(decl.declarators[0].typ) { assert_eq!(types.kind(base_id), TypeKind::Function); @@ -6568,11 +6614,11 @@ mod tests { #[test] fn test_function_pointer_variadic() { // Variadic function pointer: int (*fp)(const char *, ...) - let (tu, types) = parse_tu("int (*fp)(const char *, ...);").unwrap(); + let (tu, types, strings) = parse_tu("int (*fp)(const char *, ...);").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { - assert_eq!(decl.declarators[0].name, "fp"); + check_name(&strings, decl.declarators[0].name, "fp"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Pointer); if let Some(base_id) = types.base_type(decl.declarators[0].typ) { assert_eq!(types.kind(base_id), TypeKind::Function); @@ -6594,22 +6640,22 @@ mod tests { #[test] fn test_bitfield_basic() { // Basic bitfield parsing - include a variable declarator - let (tu, types) = + let (tu, types, strings) = parse_tu("struct flags { unsigned int a : 4; unsigned int b : 4; } f;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { ExternalDecl::Declaration(decl) => { assert_eq!(decl.declarators.len(), 1); - assert_eq!(decl.declarators[0].name, "f"); + check_name(&strings, decl.declarators[0].name, "f"); assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Struct); if let Some(composite) = types.composite(decl.declarators[0].typ) { assert_eq!(composite.members.len(), 2); // First bitfield - assert_eq!(composite.members[0].name, "a"); + check_name(&strings, composite.members[0].name, "a"); assert_eq!(composite.members[0].bit_width, Some(4)); assert_eq!(composite.members[0].bit_offset, Some(0)); // Second bitfield - assert_eq!(composite.members[1].name, "b"); + check_name(&strings, composite.members[1].name, "b"); assert_eq!(composite.members[1].bit_width, Some(4)); assert_eq!(composite.members[1].bit_offset, Some(4)); } @@ -6621,7 +6667,7 @@ mod tests { #[test] fn test_bitfield_unnamed() { // Unnamed bitfield for padding - let (tu, types) = parse_tu( + let (tu, types, strings) = parse_tu( "struct padded { unsigned int a : 4; unsigned int : 4; unsigned int b : 8; } p;", ) .unwrap(); @@ -6632,13 +6678,13 @@ mod tests { if let Some(composite) = types.composite(decl.declarators[0].typ) { assert_eq!(composite.members.len(), 3); // First named bitfield - assert_eq!(composite.members[0].name, "a"); + check_name(&strings, composite.members[0].name, "a"); assert_eq!(composite.members[0].bit_width, Some(4)); // Unnamed padding bitfield - assert_eq!(composite.members[1].name, ""); + check_name(&strings, composite.members[1].name, ""); assert_eq!(composite.members[1].bit_width, Some(4)); // Second named bitfield - assert_eq!(composite.members[2].name, "b"); + check_name(&strings, composite.members[2].name, "b"); assert_eq!(composite.members[2].bit_width, Some(8)); } } @@ -6649,7 +6695,7 @@ mod tests { #[test] fn test_bitfield_zero_width() { // Zero-width bitfield forces alignment - let (tu, types) = parse_tu( + let (tu, types, strings) = parse_tu( "struct aligned { unsigned int a : 4; unsigned int : 0; unsigned int b : 4; } x;", ) .unwrap(); @@ -6660,7 +6706,7 @@ mod tests { if let Some(composite) = types.composite(decl.declarators[0].typ) { assert_eq!(composite.members.len(), 3); // After zero-width bitfield, b should start at new storage unit - assert_eq!(composite.members[2].name, "b"); + check_name(&strings, composite.members[2].name, "b"); assert_eq!(composite.members[2].bit_width, Some(4)); // b should be at offset 0 within its storage unit assert_eq!(composite.members[2].bit_offset, Some(0)); @@ -6675,7 +6721,7 @@ mod tests { #[test] fn test_bitfield_mixed_with_regular() { // Bitfield mixed with regular member - let (tu, types) = + let (tu, types, strings) = parse_tu("struct mixed { int x; unsigned int bits : 8; int y; } m;").unwrap(); assert_eq!(tu.items.len(), 1); match &tu.items[0] { @@ -6684,13 +6730,13 @@ mod tests { if let Some(composite) = types.composite(decl.declarators[0].typ) { assert_eq!(composite.members.len(), 3); // x is regular member - assert_eq!(composite.members[0].name, "x"); + check_name(&strings, composite.members[0].name, "x"); assert!(composite.members[0].bit_width.is_none()); // bits is a bitfield - assert_eq!(composite.members[1].name, "bits"); + check_name(&strings, composite.members[1].name, "bits"); assert_eq!(composite.members[1].bit_width, Some(8)); // y is regular member - assert_eq!(composite.members[2].name, "y"); + check_name(&strings, composite.members[2].name, "y"); assert!(composite.members[2].bit_width.is_none()); } } @@ -6701,7 +6747,7 @@ mod tests { #[test] fn test_bitfield_struct_size() { // Verify struct size calculation with bitfields - let (tu, types) = parse_tu( + let (tu, types, _strings) = parse_tu( "struct small { unsigned int a : 1; unsigned int b : 1; unsigned int c : 1; } s;", ) .unwrap(); diff --git a/cc/strings.rs b/cc/strings.rs new file mode 100644 index 00000000..d0dbecb1 --- /dev/null +++ b/cc/strings.rs @@ -0,0 +1,204 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// String interning for pcc C99 compiler +// +// This module provides a global StringTable for interning strings across all +// compiler passes. Each unique string is stored once and referenced by a +// compact StringId (u32), providing: +// - Memory efficiency: each unique string stored once +// - O(1) equality comparison by ID +// - Reduced allocations throughout the compiler +// +// Design follows the pattern from: +// - TypeTable in types.rs (interned type references) +// - IdentTable in token/lexer.rs (lexer-local identifier interning) +// - https://matklad.github.io/2020/03/22/fast-simple-rust-interner.html +// + +use std::collections::HashMap; +use std::fmt; + +// ============================================================================ +// StringId - Unique identifier for interned strings +// ============================================================================ + +/// A unique identifier for an interned string +/// Similar to TypeId for types, this provides O(1) equality comparisons +/// and compact storage (4 bytes vs 16+ bytes for String on 64-bit) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct StringId(pub u32); + +impl StringId { + /// Empty string ID (always ID 0, pre-interned) + pub const EMPTY: StringId = StringId(0); +} + +impl fmt::Display for StringId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "S{}", self.0) + } +} + +// ============================================================================ +// StringTable - Interned string storage +// ============================================================================ + +/// Default capacity for string table allocations (reduces reallocation overhead) +const DEFAULT_STRING_TABLE_CAPACITY: usize = 2048; + +/// String interner - stores all strings and provides ID-based lookup +/// +/// Pattern follows TypeTable and IdentTable: +/// - HashMap for deduplication (string -> id) +/// - Vec for reverse lookup (id -> string) +/// +/// All strings in the compiler should be interned through this table: +/// - Identifier names (variables, functions, types) +/// - Struct/union/enum member names +/// - Macro names +/// - Label names +/// - etc. +pub struct StringTable { + /// Lookup map for deduplication: string content -> StringId + map: HashMap, + /// All interned strings indexed by StringId + strings: Vec, +} + +impl StringTable { + /// Create a new string table with default capacity and empty string pre-interned + pub fn new() -> Self { + Self::with_capacity(DEFAULT_STRING_TABLE_CAPACITY) + } + + /// Create a new string table with specified capacity and empty string pre-interned + pub fn with_capacity(capacity: usize) -> Self { + let mut table = Self { + map: HashMap::with_capacity(capacity), + strings: Vec::with_capacity(capacity), + }; + // Pre-intern empty string as ID 0 + let empty_id = table.intern_internal(""); + debug_assert_eq!(empty_id, StringId::EMPTY); + table + } + + /// Internal interning without pre-interned string check + fn intern_internal(&mut self, s: &str) -> StringId { + let id = StringId(self.strings.len() as u32); + self.strings.push(s.to_string()); + self.map.insert(s.to_string(), id); + id + } + + /// Intern a string, returning its unique ID + /// + /// If the string has been interned before, returns the existing ID. + /// Otherwise, stores the string and returns a new ID. + /// + /// # Example + /// ```ignore + /// let mut strings = StringTable::new(); + /// let id1 = strings.intern("foo"); + /// let id2 = strings.intern("foo"); + /// assert_eq!(id1, id2); // Same string -> same ID + /// ``` + pub fn intern(&mut self, s: &str) -> StringId { + if let Some(&id) = self.map.get(s) { + return id; + } + self.intern_internal(s) + } + + /// Get the string for an ID + /// + /// # Panics + /// Panics if the ID is invalid + pub fn get(&self, id: StringId) -> &str { + &self.strings[id.0 as usize] + } + + /// Get the string for an ID, returning None for invalid IDs + pub fn get_opt(&self, id: StringId) -> Option<&str> { + self.strings.get(id.0 as usize).map(|s| s.as_str()) + } +} + +impl Default for StringTable { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Debug for StringTable { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("StringTable") + .field("len", &self.strings.len()) + .finish() + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_string_is_zero() { + let table = StringTable::new(); + assert_eq!(table.get(StringId::EMPTY), ""); + } + + #[test] + fn test_intern_returns_same_id() { + let mut table = StringTable::new(); + let id1 = table.intern("foo"); + let id2 = table.intern("foo"); + assert_eq!(id1, id2); + } + + #[test] + fn test_different_strings_different_ids() { + let mut table = StringTable::new(); + let id1 = table.intern("foo"); + let id2 = table.intern("bar"); + assert_ne!(id1, id2); + } + + #[test] + fn test_get_returns_string() { + let mut table = StringTable::new(); + let id = table.intern("hello"); + assert_eq!(table.get(id), "hello"); + } + + #[test] + fn test_string_id_display() { + let id = StringId(42); + assert_eq!(format!("{}", id), "S42"); + } + + #[test] + fn test_string_id_empty_constant() { + assert_eq!(StringId::EMPTY.0, 0); + } + + #[test] + fn test_many_strings() { + let mut table = StringTable::new(); + for i in 0..1000 { + let s = format!("string_{}", i); + let id = table.intern(&s); + assert_eq!(table.get(id), s); + } + } +} diff --git a/cc/symbol.rs b/cc/symbol.rs index c8ad969d..9890a7b4 100644 --- a/cc/symbol.rs +++ b/cc/symbol.rs @@ -10,6 +10,7 @@ // Based on sparse's scope-aware symbol management // +use crate::strings::StringId; use crate::types::TypeId; use std::collections::HashMap; @@ -73,8 +74,8 @@ pub enum StorageClass { /// A symbol in the symbol table #[derive(Debug, Clone)] pub struct Symbol { - /// The symbol's name - pub name: String, + /// The symbol's name (interned StringId) + pub name: StringId, /// What kind of symbol this is pub kind: SymbolKind, @@ -97,7 +98,7 @@ pub struct Symbol { impl Symbol { /// Create a new variable symbol - pub fn variable(name: String, typ: TypeId, scope_depth: u32) -> Self { + pub fn variable(name: StringId, typ: TypeId, scope_depth: u32) -> Self { Self { name, kind: SymbolKind::Variable, @@ -110,7 +111,7 @@ impl Symbol { } /// Create a new function symbol - pub fn function(name: String, typ: TypeId, scope_depth: u32) -> Self { + pub fn function(name: StringId, typ: TypeId, scope_depth: u32) -> Self { Self { name, kind: SymbolKind::Function, @@ -123,7 +124,7 @@ impl Symbol { } /// Create a new parameter symbol - pub fn parameter(name: String, typ: TypeId, scope_depth: u32) -> Self { + pub fn parameter(name: StringId, typ: TypeId, scope_depth: u32) -> Self { Self { name, kind: SymbolKind::Parameter, @@ -136,7 +137,7 @@ impl Symbol { } /// Create a new enum constant symbol (requires int_id from TypeTable) - pub fn enum_constant(name: String, value: i64, int_id: TypeId, scope_depth: u32) -> Self { + pub fn enum_constant(name: StringId, value: i64, int_id: TypeId, scope_depth: u32) -> Self { Self { name, kind: SymbolKind::EnumConstant, @@ -149,7 +150,7 @@ impl Symbol { } /// Create a new tag symbol (struct/union/enum tag) - pub fn tag(name: String, typ: TypeId, scope_depth: u32) -> Self { + pub fn tag(name: StringId, typ: TypeId, scope_depth: u32) -> Self { Self { name, kind: SymbolKind::Tag, @@ -162,7 +163,7 @@ impl Symbol { } /// Create a new typedef symbol - pub fn typedef(name: String, typ: TypeId, scope_depth: u32) -> Self { + pub fn typedef(name: StringId, typ: TypeId, scope_depth: u32) -> Self { Self { name, kind: SymbolKind::Typedef, @@ -233,7 +234,7 @@ pub struct SymbolTable { /// Fast lookup: name -> list of symbol IDs with that name /// (most recent first, for shadowing) - name_map: HashMap<(String, Namespace), Vec>, + name_map: HashMap<(StringId, Namespace), Vec>, } impl SymbolTable { @@ -266,7 +267,7 @@ impl SymbolTable { let scope = &self.scopes[self.current_scope as usize]; for &sym_id in &scope.symbols { let sym = &self.symbols[sym_id.0 as usize]; - let key = (sym.name.clone(), sym.namespace); + let key = (sym.name, sym.namespace); if let Some(ids) = self.name_map.get_mut(&key) { // Remove this symbol from the list ids.retain(|&id| id != sym_id); @@ -293,14 +294,14 @@ impl SymbolTable { sym.scope_depth = self.scope_depth; // Check for redefinition in the same scope - let key = (sym.name.clone(), sym.namespace); + let key = (sym.name, sym.namespace); if let Some(ids) = self.name_map.get(&key) { for &id in ids { let existing = &self.symbols[id.0 as usize]; if existing.scope_depth == self.scope_depth { // Same scope - check for redefinition if existing.defined && sym.defined { - return Err(SymbolError::Redefinition(sym.name.clone())); + return Err(SymbolError::Redefinition(sym.name)); } } } @@ -322,21 +323,21 @@ impl SymbolTable { /// Look up a symbol by name in the given namespace /// /// Searches from innermost scope outward - pub fn lookup(&self, name: &str, ns: Namespace) -> Option<&Symbol> { - let key = (name.to_string(), ns); + pub fn lookup(&self, name: StringId, ns: Namespace) -> Option<&Symbol> { + let key = (name, ns); self.name_map .get(&key) .and_then(|ids| ids.first().map(|id| &self.symbols[id.0 as usize])) } /// Look up a tag (struct/union/enum) by name - pub fn lookup_tag(&self, name: &str) -> Option<&Symbol> { + pub fn lookup_tag(&self, name: StringId) -> Option<&Symbol> { self.lookup(name, Namespace::Tag) } /// Look up a typedef by name /// Returns the aliased TypeId if found - pub fn lookup_typedef(&self, name: &str) -> Option { + pub fn lookup_typedef(&self, name: StringId) -> Option { self.lookup(name, Namespace::Ordinary).and_then(|s| { if s.is_typedef() { Some(s.typ) // TypeId is Copy @@ -347,7 +348,7 @@ impl SymbolTable { } /// Get the value of an enum constant - pub fn get_enum_value(&self, name: &str) -> Option { + pub fn get_enum_value(&self, name: StringId) -> Option { self.lookup(name, Namespace::Ordinary).and_then(|s| { if s.is_enum_constant() { s.enum_value @@ -369,10 +370,10 @@ impl Default for SymbolTable { // ============================================================================ /// Symbol table errors -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SymbolError { /// Attempted to redefine an existing symbol - Redefinition(String), + Redefinition(StringId), } impl std::fmt::Display for SymbolError { @@ -392,90 +393,104 @@ impl std::error::Error for SymbolError {} #[cfg(test)] mod tests { use super::*; + use crate::strings::StringTable; use crate::types::{Type, TypeKind, TypeTable}; #[test] fn test_declare_and_lookup() { + let mut strings = StringTable::new(); let types = TypeTable::new(); let mut table = SymbolTable::new(); + let x_id = strings.intern("x"); + // Declare a variable - let sym = Symbol::variable("x".to_string(), types.int_id, 0); + let sym = Symbol::variable(x_id, types.int_id, 0); let _id = table.declare(sym).unwrap(); // Look it up - let found = table.lookup("x", Namespace::Ordinary).unwrap(); - assert_eq!(found.name, "x"); + let found = table.lookup(x_id, Namespace::Ordinary).unwrap(); + assert_eq!(found.name, x_id); assert_eq!(found.kind, SymbolKind::Variable); } #[test] fn test_scopes() { + let mut strings = StringTable::new(); let types = TypeTable::new(); let mut table = SymbolTable::new(); + let x_id = strings.intern("x"); + let y_id = strings.intern("y"); + // Declare x in global scope - let sym1 = Symbol::variable("x".to_string(), types.int_id, 0); + let sym1 = Symbol::variable(x_id, types.int_id, 0); table.declare(sym1).unwrap(); // Enter a new scope table.enter_scope(); // Declare y in inner scope - let sym2 = Symbol::variable("y".to_string(), types.char_id, 0); + let sym2 = Symbol::variable(y_id, types.char_id, 0); table.declare(sym2).unwrap(); // Both should be visible - assert!(table.lookup("x", Namespace::Ordinary).is_some()); - assert!(table.lookup("y", Namespace::Ordinary).is_some()); + assert!(table.lookup(x_id, Namespace::Ordinary).is_some()); + assert!(table.lookup(y_id, Namespace::Ordinary).is_some()); // Leave scope table.leave_scope(); // x should still be visible, y should not - assert!(table.lookup("x", Namespace::Ordinary).is_some()); - assert!(table.lookup("y", Namespace::Ordinary).is_none()); + assert!(table.lookup(x_id, Namespace::Ordinary).is_some()); + assert!(table.lookup(y_id, Namespace::Ordinary).is_none()); } #[test] fn test_shadowing() { + let mut strings = StringTable::new(); let types = TypeTable::new(); let mut table = SymbolTable::new(); + let x_id = strings.intern("x"); + // Declare x as int in global scope - let sym1 = Symbol::variable("x".to_string(), types.int_id, 0); + let sym1 = Symbol::variable(x_id, types.int_id, 0); table.declare(sym1).unwrap(); // Enter a new scope table.enter_scope(); // Shadow x with char - let sym2 = Symbol::variable("x".to_string(), types.char_id, 0); + let sym2 = Symbol::variable(x_id, types.char_id, 0); table.declare(sym2).unwrap(); // Should find the inner x (char) - let found = table.lookup("x", Namespace::Ordinary).unwrap(); + let found = table.lookup(x_id, Namespace::Ordinary).unwrap(); assert_eq!(types.kind(found.typ), TypeKind::Char); // Leave scope table.leave_scope(); // Should find the outer x (int) - let found = table.lookup("x", Namespace::Ordinary).unwrap(); + let found = table.lookup(x_id, Namespace::Ordinary).unwrap(); assert_eq!(types.kind(found.typ), TypeKind::Int); } #[test] fn test_redefinition_error() { + let mut strings = StringTable::new(); let types = TypeTable::new(); let mut table = SymbolTable::new(); + let x_id = strings.intern("x"); + // Declare x - let sym1 = Symbol::variable("x".to_string(), types.int_id, 0); + let sym1 = Symbol::variable(x_id, types.int_id, 0); table.declare(sym1).unwrap(); // Try to redeclare x in the same scope - let sym2 = Symbol::variable("x".to_string(), types.char_id, 0); + let sym2 = Symbol::variable(x_id, types.char_id, 0); let result = table.declare(sym2); assert!(matches!(result, Err(SymbolError::Redefinition(_)))); @@ -502,15 +517,18 @@ mod tests { #[test] fn test_function_symbol() { + let mut strings = StringTable::new(); let mut types = TypeTable::new(); let mut table = SymbolTable::new(); + let foo_id = strings.intern("foo"); + // Declare a function let func_type = types.intern(Type::function(types.int_id, vec![types.int_id], false)); - let func = Symbol::function("foo".to_string(), func_type, 0); + let func = Symbol::function(foo_id, func_type, 0); table.declare(func).unwrap(); - let found = table.lookup("foo", Namespace::Ordinary).unwrap(); + let found = table.lookup(foo_id, Namespace::Ordinary).unwrap(); assert_eq!(found.kind, SymbolKind::Function); assert!(!found.defined); // Not yet defined } diff --git a/cc/token/lexer.rs b/cc/token/lexer.rs index e0cbd210..1aceaf19 100644 --- a/cc/token/lexer.rs +++ b/cc/token/lexer.rs @@ -11,7 +11,7 @@ // use crate::diag; -use std::collections::HashMap; +use crate::strings::{StringId, StringTable}; // Re-export Position for use by other modules pub use crate::diag::Position; @@ -80,42 +80,9 @@ impl SpecialToken { // Identifier Interning // ============================================================================ -/// Identifier intern table (string interning for identifiers) -pub struct IdentTable { - map: HashMap, - idents: Vec, -} - -impl IdentTable { - pub fn new() -> Self { - Self { - map: HashMap::new(), - idents: Vec::new(), - } - } - - /// Intern an identifier, returning its unique ID - pub fn intern(&mut self, name: &str) -> u32 { - if let Some(&id) = self.map.get(name) { - return id; - } - let id = self.idents.len() as u32; - self.idents.push(name.to_string()); - self.map.insert(name.to_string(), id); - id - } - - /// Get identifier name by ID - pub fn get(&self, id: u32) -> Option<&str> { - self.idents.get(id as usize).map(|s| s.as_str()) - } -} - -impl Default for IdentTable { - fn default() -> Self { - Self::new() - } -} +/// Identifier intern table - now a re-export of StringTable +/// Kept for backward compatibility during transition +pub type IdentTable = StringTable; // ============================================================================ // Token Value @@ -126,7 +93,7 @@ impl Default for IdentTable { pub enum TokenValue { None, Number(String), // Numeric literal as string (pp-number) - Ident(u32), // Identifier (interned ID) + Ident(StringId), // Identifier (interned StringId) Special(u32), // Operator/punctuator String(String), // String literal content Char(String), // Character literal content @@ -237,7 +204,7 @@ impl Default for StreamTable { const EOF: i32 = -1; /// C Tokenizer following sparse's design -pub struct Tokenizer<'a> { +pub struct Tokenizer<'a, 'b> { // Input buffer: &'a [u8], offset: usize, @@ -249,12 +216,12 @@ pub struct Tokenizer<'a> { newline: bool, whitespace: bool, - // Interning - idents: IdentTable, + // Interning - shared string table + strings: &'b mut StringTable, } -impl<'a> Tokenizer<'a> { - pub fn new(buffer: &'a [u8], stream_id: u16) -> Self { +impl<'a, 'b> Tokenizer<'a, 'b> { + pub fn new(buffer: &'a [u8], stream_id: u16, strings: &'b mut StringTable) -> Self { Self { buffer, offset: 0, @@ -263,7 +230,7 @@ impl<'a> Tokenizer<'a> { col: 0, newline: true, whitespace: false, - idents: IdentTable::new(), + strings, } } @@ -454,7 +421,7 @@ impl<'a> Tokenizer<'a> { } } - let id = self.idents.intern(&name); + let id = self.strings.intern(&name); Token::with_value(TokenType::Ident, pos, TokenValue::Ident(id)) } @@ -708,22 +675,6 @@ impl<'a> Tokenizer<'a> { tokens } - - /// Get the identifier table (for looking up identifier names) - pub fn ident_table(&self) -> &IdentTable { - &self.idents - } - - /// Get mutable access to the identifier table (for preprocessing) - pub fn ident_table_mut(&mut self) -> &mut IdentTable { - &mut self.idents - } - - /// Take ownership of the identifier table (used by preprocessor tests) - #[cfg(test)] - pub fn into_ident_table(self) -> IdentTable { - self.idents - } } // ============================================================================ @@ -767,7 +718,7 @@ pub fn show_special(value: u32) -> String { } /// Format a token for display -pub fn show_token(token: &Token, idents: &IdentTable) -> String { +pub fn show_token(token: &Token, strings: &StringTable) -> String { match token.typ { TokenType::Eof => "".to_string(), TokenType::Error => "".to_string(), @@ -775,7 +726,7 @@ pub fn show_token(token: &Token, idents: &IdentTable) -> String { TokenType::StreamEnd => "".to_string(), TokenType::Ident => { if let TokenValue::Ident(id) = &token.value { - idents.get(*id).unwrap_or("").to_string() + strings.get(*id).to_string() } else { "".to_string() } @@ -850,11 +801,12 @@ pub fn token_type_name(typ: TokenType) -> &'static str { mod tests { use super::*; - fn tokenize_str(input: &str) -> (Vec, IdentTable) { - let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + fn tokenize_str(input: &str) -> (Vec, StringTable) { + let mut strings = StringTable::new(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0, &mut strings); let tokens = tokenizer.tokenize(); - let idents = std::mem::take(&mut tokenizer.idents); - (tokens, idents) + drop(tokenizer); + (tokens, strings) } #[test] diff --git a/cc/token/preprocess.rs b/cc/token/preprocess.rs index dcaba673..d99f87dd 100644 --- a/cc/token/preprocess.rs +++ b/cc/token/preprocess.rs @@ -487,7 +487,7 @@ impl<'a> Preprocessor<'a> { } // Check for macro expansion if let TokenValue::Ident(id) = &token.value { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { let name = name.to_string(); if let Some(expanded) = self.try_expand_macro(&name, &token.pos, &mut iter, idents) @@ -543,7 +543,7 @@ impl<'a> Preprocessor<'a> { let directive_name = match &directive_token.typ { TokenType::Ident => { if let TokenValue::Ident(id) = &directive_token.value { - idents.get(*id).map(|s| s.to_string()) + idents.get_opt(*id).map(|s| s.to_string()) } else { None } @@ -635,7 +635,7 @@ impl<'a> Preprocessor<'a> { let macro_name = match &name_token.typ { TokenType::Ident => { if let TokenValue::Ident(id) = &name_token.value { - idents.get(*id).map(|s| s.to_string()) + idents.get_opt(*id).map(|s| s.to_string()) } else { None } @@ -689,7 +689,7 @@ impl<'a> Preprocessor<'a> { break; } TokenValue::Ident(id) => { - if let Some(param_name) = idents.get(*id) { + if let Some(param_name) = idents.get_opt(*id) { params.push(MacroParam { name: param_name.to_string(), index: param_index, @@ -756,7 +756,7 @@ impl<'a> Preprocessor<'a> { // # stringify - look for following parameter if i + 1 < tokens.len() { if let TokenValue::Ident(id) = &tokens[i + 1].value { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { // Check if it's a parameter for param in params { if param.name == name { @@ -798,7 +798,7 @@ impl<'a> Preprocessor<'a> { // Check for parameter reference or __VA_ARGS__ if let TokenValue::Ident(id) = &token.value { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { // Check if it's __VA_ARGS__ if name == "__VA_ARGS__" { body.push(MacroToken { @@ -844,7 +844,7 @@ impl<'a> Preprocessor<'a> { let value = match &token.value { TokenValue::Number(n) => MacroTokenValue::Number(n.clone()), TokenValue::Ident(id) => { - let name = idents.get(*id).unwrap_or("").to_string(); + let name = idents.get_opt(*id).unwrap_or("").to_string(); MacroTokenValue::Ident(name) } TokenValue::String(s) => MacroTokenValue::String(s.clone()), @@ -875,7 +875,7 @@ impl<'a> Preprocessor<'a> { if let Some(token) = iter.next() { if let TokenType::Ident = &token.typ { if let TokenValue::Ident(id) = &token.value { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { self.undef_macro(name); } } @@ -893,7 +893,7 @@ impl<'a> Preprocessor<'a> { let defined = if let Some(token) = iter.next() { if let TokenType::Ident = &token.typ { if let TokenValue::Ident(id) = &token.value { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { self.is_defined(name) } else { false @@ -920,7 +920,7 @@ impl<'a> Preprocessor<'a> { let defined = if let Some(token) = iter.next() { if let TokenType::Ident = &token.typ { if let TokenValue::Ident(id) = &token.value { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { self.is_defined(name) } else { false @@ -1136,7 +1136,7 @@ impl<'a> Preprocessor<'a> { /// Convert token to string fn token_to_string(&self, token: &Token, idents: &IdentTable) -> String { match &token.value { - TokenValue::Ident(id) => idents.get(*id).unwrap_or("").to_string(), + TokenValue::Ident(id) => idents.get_opt(*id).unwrap_or("").to_string(), TokenValue::Number(n) => n.clone(), TokenValue::String(s) => s.clone(), TokenValue::Special(code) => { @@ -1263,37 +1263,16 @@ impl<'a> Preprocessor<'a> { // Create a new stream for this file let stream_id = diag::init_stream(&self.current_file); - // Tokenize the included file - let mut tokenizer = Tokenizer::new(&content, stream_id); - let tokens = tokenizer.tokenize(); - - // Build mapping from included file's ident IDs to main ident table IDs - let included_idents = tokenizer.ident_table(); - let mut id_map: HashMap = HashMap::new(); - for i in 0.. { - if let Some(name) = included_idents.get(i) { - let new_id = idents.intern(name); - id_map.insert(i, new_id); - } else { - break; - } - } - - // Remap token identifiers - let remapped_tokens: Vec = tokens - .into_iter() - .map(|mut token| { - if let TokenValue::Ident(old_id) = &token.value { - if let Some(&new_id) = id_map.get(old_id) { - token.value = TokenValue::Ident(new_id); - } - } - token - }) - .collect(); + // Tokenize the included file using the same shared string table + // Since we use the same StringTable, all StringIds are consistent + // and no ID remapping is needed. + let tokens = { + let mut tokenizer = Tokenizer::new(&content, stream_id, idents); + tokenizer.tokenize() + }; // Preprocess the included tokens - let preprocessed = self.preprocess(remapped_tokens, idents); + let preprocessed = self.preprocess(tokens, idents); // Filter out stream markers from included content for token in preprocessed { @@ -1362,7 +1341,7 @@ impl<'a> Preprocessor<'a> { // Check for #pragma once if let Some(token) = iter.peek() { if let TokenValue::Ident(id) = &token.value { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { if name == "once" { if let Ok(canonical) = Path::new(&self.current_file).canonicalize() { self.once_files.insert(canonical); @@ -1398,7 +1377,7 @@ impl<'a> Preprocessor<'a> { } match &token.value { TokenValue::Ident(id) => { - if let Some(name) = idents.get(*id) { + if let Some(name) = idents.get_opt(*id) { result.push_str(name); } } @@ -1642,34 +1621,20 @@ impl<'a> Preprocessor<'a> { let right_str = self.token_to_string(&right[0], idents); let combined = format!("{}{}", left_str, right_str); - // Re-tokenize the combined string + // Re-tokenize the combined string using the same shared string table + // Since we use the same StringTable, all StringIds are consistent + // and no ID remapping is needed. let stream_id = diag::init_stream(""); - let mut tokenizer = Tokenizer::new(combined.as_bytes(), stream_id); - let tokens = tokenizer.tokenize(); - - // Build mapping from paste_idents IDs to main idents IDs - let paste_idents = tokenizer.ident_table(); - let mut id_map: HashMap = HashMap::new(); - for i in 0.. { - if let Some(name) = paste_idents.get(i) { - let new_id = idents.intern(name); - id_map.insert(i, new_id); - } else { - break; - } - } + let tokens = { + let mut tokenizer = Tokenizer::new(combined.as_bytes(), stream_id, idents); + tokenizer.tokenize() + }; let mut result: Vec<_> = tokens .into_iter() .filter(|t| !matches!(t.typ, TokenType::StreamBegin | TokenType::StreamEnd)) .map(|mut t| { t.pos = *pos; - // Remap identifier IDs - if let TokenValue::Ident(old_id) = &t.value { - if let Some(&new_id) = id_map.get(old_id) { - t.value = TokenValue::Ident(new_id); - } - } t }) .collect(); @@ -2012,7 +1977,7 @@ impl<'a, 'b> ExprEvaluator<'a, 'b> { fn is_ident(&self, expected: &str) -> bool { if let Some(tok) = self.current() { if let TokenValue::Ident(id) = &tok.value { - if let Some(name) = self.idents.get(*id) { + if let Some(name) = self.idents.get_opt(*id) { return name == expected; } } @@ -2023,7 +1988,7 @@ impl<'a, 'b> ExprEvaluator<'a, 'b> { fn get_ident(&self) -> Option { if let Some(tok) = self.current() { if let TokenValue::Ident(id) = &tok.value { - return self.idents.get(*id).map(|s| s.to_string()); + return self.idents.get_opt(*id).map(|s| s.to_string()); } } None @@ -2266,7 +2231,7 @@ impl<'a, 'b> ExprEvaluator<'a, 'b> { let ident_id = *id; self.advance(); // Check if it's a defined macro with a value - if let Some(name) = self.idents.get(ident_id) { + if let Some(name) = self.idents.get_opt(ident_id) { if let Some(mac) = self.pp.get_macro(name) { if let Some(mt) = mac.body.first() { if let MacroTokenValue::Number(n) = &mt.value { @@ -2352,13 +2317,12 @@ mod tests { fn preprocess_str(input: &str) -> (Vec, IdentTable) { let target = Target::host(); - let mut tokenizer = Tokenizer::new(input.as_bytes(), 0); + let mut strings = IdentTable::new(); + let mut tokenizer = Tokenizer::new(input.as_bytes(), 0, &mut strings); let tokens = tokenizer.tokenize(); - let result = { - let idents = tokenizer.ident_table_mut(); - preprocess(tokens, &target, idents, "") - }; - (result, tokenizer.into_ident_table()) + drop(tokenizer); + let result = preprocess(tokens, &target, &mut strings, ""); + (result, strings) } fn get_token_strings(tokens: &[Token], idents: &IdentTable) -> Vec { @@ -2367,7 +2331,7 @@ mod tests { .filter_map(|t| match &t.typ { TokenType::Ident => { if let TokenValue::Ident(id) = &t.value { - idents.get(*id).map(|s| s.to_string()) + idents.get_opt(*id).map(|s| s.to_string()) } else { None } diff --git a/cc/types.rs b/cc/types.rs index 4a9aca88..9254c3a9 100644 --- a/cc/types.rs +++ b/cc/types.rs @@ -10,6 +10,7 @@ // Based on sparse's compositional type model // +use crate::strings::StringId; use std::collections::HashMap; use std::fmt; @@ -39,8 +40,8 @@ impl TypeId { /// A struct/union member #[derive(Debug, Clone, PartialEq)] pub struct StructMember { - /// Member name (empty string for unnamed bitfields) - pub name: String, + /// Member name (StringId::EMPTY for unnamed bitfields) + pub name: StringId, /// Member type (interned TypeId) pub typ: TypeId, /// Byte offset within struct (0 for unions, offset of storage unit for bitfields) @@ -71,8 +72,8 @@ pub struct MemberInfo { /// An enum constant #[derive(Debug, Clone, PartialEq)] pub struct EnumConstant { - /// Constant name - pub name: String, + /// Constant name (interned StringId) + pub name: StringId, /// Constant value pub value: i64, } @@ -80,8 +81,8 @@ pub struct EnumConstant { /// Composite type definition (struct, union, or enum) #[derive(Debug, Clone, PartialEq)] pub struct CompositeType { - /// Tag name (e.g., "point" in "struct point") - pub tag: Option, + /// Tag name (e.g., "point" in "struct point") - None for anonymous + pub tag: Option, /// Members for struct/union pub members: Vec, /// Constants for enum @@ -96,7 +97,7 @@ pub struct CompositeType { impl CompositeType { /// Create a new empty composite type (forward declaration) - pub fn incomplete(tag: Option) -> Self { + pub fn incomplete(tag: Option) -> Self { Self { tag, members: Vec::new(), @@ -353,23 +354,23 @@ impl Type { } /// Create an incomplete (forward-declared) struct type - pub fn incomplete_struct(tag: String) -> Self { + pub fn incomplete_struct(tag: StringId) -> Self { Self::struct_type(CompositeType::incomplete(Some(tag))) } /// Create an incomplete (forward-declared) union type - pub fn incomplete_union(tag: String) -> Self { + pub fn incomplete_union(tag: StringId) -> Self { Self::union_type(CompositeType::incomplete(Some(tag))) } /// Create an incomplete (forward-declared) enum type - pub fn incomplete_enum(tag: String) -> Self { + pub fn incomplete_enum(tag: StringId) -> Self { Self::enum_type(CompositeType::incomplete(Some(tag))) } /// Find a member in a struct/union type /// Returns MemberInfo with full bitfield details if found - pub fn find_member(&self, name: &str) -> Option { + pub fn find_member(&self, name: StringId) -> Option { if let Some(ref composite) = self.composite { for member in &composite.members { if member.name == name { @@ -970,7 +971,7 @@ impl TypeTable { } /// Find a member in a struct/union type - pub fn find_member(&self, id: TypeId, name: &str) -> Option { + pub fn find_member(&self, id: TypeId, name: StringId) -> Option { self.get(id).find_member(name) }