Skip to content

Commit 52c61eb

Browse files
authored
Merge pull request #434 from rustcoreutils/cc
[cc] string interning
2 parents 7cfdd27 + 9de1435 commit 52c61eb

File tree

9 files changed

+1023
-742
lines changed

9 files changed

+1023
-742
lines changed

cc/linearize.rs

Lines changed: 199 additions & 116 deletions
Large diffs are not rendered by default.

cc/main.rs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ mod lower;
1818
mod os;
1919
mod parse;
2020
mod ssa;
21+
mod strings;
2122
mod symbol;
2223
mod target;
2324
mod token;
@@ -31,6 +32,7 @@ use std::path::Path;
3132
use std::process::Command;
3233

3334
use parse::Parser as CParser;
35+
use strings::StringTable;
3436
use symbol::SymbolTable;
3537
use target::Target;
3638
use token::{preprocess, show_token, token_type_name, StreamTable, Tokenizer};
@@ -144,24 +146,28 @@ fn process_file(
144146
// Create stream
145147
let stream_id = streams.add(display_path.to_string());
146148

149+
// Create shared string table for identifier interning
150+
let mut strings = StringTable::new();
151+
147152
// Tokenize
148-
let mut tokenizer = Tokenizer::new(&buffer, stream_id);
149-
let tokens = tokenizer.tokenize();
153+
let tokens = {
154+
let mut tokenizer = Tokenizer::new(&buffer, stream_id, &mut strings);
155+
tokenizer.tokenize()
156+
};
150157

151158
// Dump raw tokens if requested
152159
if args.dump_tokens && !args.preprocess_only {
153-
let idents = tokenizer.ident_table();
154160
for token in &tokens {
155161
if args.verbose {
156162
println!(
157163
"{:>4}:{:<3} {:12} {}",
158164
token.pos.line,
159165
token.pos.col,
160166
token_type_name(token.typ),
161-
show_token(token, idents)
167+
show_token(token, &strings)
162168
);
163169
} else {
164-
let text = show_token(token, idents);
170+
let text = show_token(token, &strings);
165171
if !text.starts_with('<') {
166172
print!("{} ", text);
167173
}
@@ -174,8 +180,7 @@ fn process_file(
174180
}
175181

176182
// Preprocess (may add new identifiers from included files)
177-
let preprocessed = preprocess(tokens, target, tokenizer.ident_table_mut(), path);
178-
let idents = tokenizer.ident_table();
183+
let preprocessed = preprocess(tokens, target, &mut strings, path);
179184

180185
if args.preprocess_only {
181186
// Output preprocessed tokens
@@ -186,10 +191,10 @@ fn process_file(
186191
token.pos.line,
187192
token.pos.col,
188193
token_type_name(token.typ),
189-
show_token(token, idents)
194+
show_token(token, &strings)
190195
);
191196
} else {
192-
let text = show_token(token, idents);
197+
let text = show_token(token, &strings);
193198
if !text.starts_with('<') {
194199
print!("{} ", text);
195200
}
@@ -207,7 +212,7 @@ fn process_file(
207212
let mut types = types::TypeTable::new();
208213

209214
// Parse (this also binds symbols to the symbol table)
210-
let mut parser = CParser::new(&preprocessed, idents, &mut symbols, &mut types);
215+
let mut parser = CParser::new(&preprocessed, &strings, &mut symbols, &mut types);
211216
let ast = parser
212217
.parse_translation_unit()
213218
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("parse error: {}", e)))?;
@@ -222,6 +227,7 @@ fn process_file(
222227
&ast,
223228
&symbols,
224229
&types,
230+
&strings,
225231
target,
226232
args.debug,
227233
Some(display_path),

cc/parse/ast.rs

Lines changed: 51 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//
1212

1313
use crate::diag::Position;
14+
use crate::strings::StringId;
1415
use crate::types::TypeId;
1516

1617
// ============================================================================
@@ -179,7 +180,7 @@ pub enum ExprKind {
179180

180181
/// Identifier (variable reference)
181182
Ident {
182-
name: String,
183+
name: StringId,
183184
},
184185

185186
/// Unary operation
@@ -222,13 +223,13 @@ pub enum ExprKind {
222223
/// Member access: expr.member
223224
Member {
224225
expr: Box<Expr>,
225-
member: String,
226+
member: StringId,
226227
},
227228

228229
/// Pointer member access: expr->member
229230
Arrow {
230231
expr: Box<Expr>,
231-
member: String,
232+
member: StringId,
232233
},
233234

234235
/// Array subscript: array[index]
@@ -266,7 +267,7 @@ pub enum ExprKind {
266267
/// The va_list to initialize (lvalue)
267268
ap: Box<Expr>,
268269
/// Name of the last named parameter before ...
269-
last_param: String,
270+
last_param: StringId,
270271
},
271272

272273
/// __builtin_va_arg(ap, type)
@@ -334,7 +335,7 @@ pub enum ExprKind {
334335
#[derive(Debug, Clone)]
335336
pub enum Designator {
336337
/// Field designator: .field_name
337-
Field(String),
338+
Field(StringId),
338339
/// Index designator: [constant_expr] - evaluated at parse time
339340
Index(i64),
340341
}
@@ -361,20 +362,13 @@ impl Expr {
361362
}
362363

363364
/// Create a variable reference (untyped - needs type evaluation) - no position
364-
pub fn var(name: &str) -> Self {
365-
Expr::new_unpositioned(ExprKind::Ident {
366-
name: name.to_string(),
367-
})
365+
pub fn var(name: StringId) -> Self {
366+
Expr::new_unpositioned(ExprKind::Ident { name })
368367
}
369368

370369
/// Create a variable reference with a known type - no position
371-
pub fn var_typed(name: &str, typ: TypeId) -> Self {
372-
Expr::typed_unpositioned(
373-
ExprKind::Ident {
374-
name: name.to_string(),
375-
},
376-
typ,
377-
)
370+
pub fn var_typed(name: StringId, typ: TypeId) -> Self {
371+
Expr::typed_unpositioned(ExprKind::Ident { name }, typ)
378372
}
379373

380374
/// Create a binary expression (using TypeTable for type inference)
@@ -496,10 +490,10 @@ pub enum Stmt {
496490
Continue,
497491

498492
/// Goto statement: goto label;
499-
Goto(String),
493+
Goto(StringId),
500494

501495
/// Labeled statement: label: stmt
502-
Label { name: String, stmt: Box<Stmt> },
496+
Label { name: StringId, stmt: Box<Stmt> },
503497

504498
/// Switch statement: switch (expr) { cases }
505499
Switch { expr: Expr, body: Box<Stmt> },
@@ -542,7 +536,7 @@ pub struct Declaration {
542536
#[derive(Debug, Clone)]
543537
pub struct InitDeclarator {
544538
/// The name being declared
545-
pub name: String,
539+
pub name: StringId,
546540
/// The complete type (after applying declarator modifiers) - interned TypeId
547541
pub typ: TypeId,
548542
/// Optional initializer
@@ -552,13 +546,9 @@ pub struct InitDeclarator {
552546
#[cfg(test)]
553547
impl Declaration {
554548
/// Create a simple declaration with one variable
555-
pub fn simple(name: &str, typ: TypeId, init: Option<Expr>) -> Self {
549+
pub fn simple(name: StringId, typ: TypeId, init: Option<Expr>) -> Self {
556550
Declaration {
557-
declarators: vec![InitDeclarator {
558-
name: name.to_string(),
559-
typ,
560-
init,
561-
}],
551+
declarators: vec![InitDeclarator { name, typ, init }],
562552
}
563553
}
564554
}
@@ -570,7 +560,7 @@ impl Declaration {
570560
/// A function parameter
571561
#[derive(Debug, Clone)]
572562
pub struct Parameter {
573-
pub name: Option<String>,
563+
pub name: Option<StringId>,
574564
/// Parameter type (interned TypeId)
575565
pub typ: TypeId,
576566
}
@@ -581,7 +571,7 @@ pub struct FunctionDef {
581571
/// Return type (interned TypeId)
582572
pub return_type: TypeId,
583573
/// Function name
584-
pub name: String,
574+
pub name: StringId,
585575
/// Parameters
586576
pub params: Vec<Parameter>,
587577
/// Function body
@@ -632,6 +622,7 @@ impl Default for TranslationUnit {
632622
#[cfg(test)]
633623
mod tests {
634624
use super::*;
625+
use crate::strings::StringTable;
635626
use crate::types::TypeKind;
636627

637628
#[test]
@@ -701,15 +692,17 @@ mod tests {
701692

702693
#[test]
703694
fn test_unary_expr() {
695+
let mut strings = StringTable::new();
704696
let types = TypeTable::new();
697+
let x_id = strings.intern("x");
705698
// -x
706-
let expr = Expr::unary(UnaryOp::Neg, Expr::var("x"), &types);
699+
let expr = Expr::unary(UnaryOp::Neg, Expr::var(x_id), &types);
707700

708701
match expr.kind {
709702
ExprKind::Unary { op, operand } => {
710703
assert_eq!(op, UnaryOp::Neg);
711704
match operand.kind {
712-
ExprKind::Ident { name, .. } => assert_eq!(name, "x"),
705+
ExprKind::Ident { name } => assert_eq!(name, x_id),
713706
_ => panic!("Expected Ident"),
714707
}
715708
}
@@ -719,15 +712,17 @@ mod tests {
719712

720713
#[test]
721714
fn test_assignment() {
715+
let mut strings = StringTable::new();
722716
let types = TypeTable::new();
717+
let x_id = strings.intern("x");
723718
// x = 5
724-
let expr = Expr::assign(Expr::var("x"), Expr::int(5, &types), &types);
719+
let expr = Expr::assign(Expr::var(x_id), Expr::int(5, &types), &types);
725720

726721
match expr.kind {
727722
ExprKind::Assign { op, target, value } => {
728723
assert_eq!(op, AssignOp::Assign);
729724
match target.kind {
730-
ExprKind::Ident { name, .. } => assert_eq!(name, "x"),
725+
ExprKind::Ident { name } => assert_eq!(name, x_id),
731726
_ => panic!("Expected Ident"),
732727
}
733728
match value.kind {
@@ -741,18 +736,20 @@ mod tests {
741736

742737
#[test]
743738
fn test_function_call() {
739+
let mut strings = StringTable::new();
744740
let types = TypeTable::new();
741+
let foo_id = strings.intern("foo");
745742
// foo(1, 2)
746743
let expr = Expr::call(
747-
Expr::var("foo"),
744+
Expr::var(foo_id),
748745
vec![Expr::int(1, &types), Expr::int(2, &types)],
749746
&types,
750747
);
751748

752749
match expr.kind {
753750
ExprKind::Call { func, args } => {
754751
match func.kind {
755-
ExprKind::Ident { name, .. } => assert_eq!(name, "foo"),
752+
ExprKind::Ident { name } => assert_eq!(name, foo_id),
756753
_ => panic!("Expected Ident"),
757754
}
758755
assert_eq!(args.len(), 2);
@@ -763,10 +760,12 @@ mod tests {
763760

764761
#[test]
765762
fn test_if_stmt() {
763+
let mut strings = StringTable::new();
766764
let types = TypeTable::new();
765+
let x_id = strings.intern("x");
767766
// if (x) return 1;
768767
let stmt = Stmt::If {
769-
cond: Expr::var("x"),
768+
cond: Expr::var(x_id),
770769
then_stmt: Box::new(Stmt::Return(Some(Expr::int(1, &types)))),
771770
else_stmt: None,
772771
};
@@ -778,7 +777,7 @@ mod tests {
778777
else_stmt,
779778
} => {
780779
match cond.kind {
781-
ExprKind::Ident { name, .. } => assert_eq!(name, "x"),
780+
ExprKind::Ident { name } => assert_eq!(name, x_id),
782781
_ => panic!("Expected Ident"),
783782
}
784783
match *then_stmt {
@@ -796,18 +795,20 @@ mod tests {
796795

797796
#[test]
798797
fn test_while_stmt() {
798+
let mut strings = StringTable::new();
799+
let x_id = strings.intern("x");
799800
// while (x) x--;
800801
let stmt = Stmt::While {
801-
cond: Expr::var("x"),
802+
cond: Expr::var(x_id),
802803
body: Box::new(Stmt::Expr(Expr::new_unpositioned(ExprKind::PostDec(
803-
Box::new(Expr::var("x")),
804+
Box::new(Expr::var(x_id)),
804805
)))),
805806
};
806807

807808
match stmt {
808809
Stmt::While { cond, body } => {
809810
match cond.kind {
810-
ExprKind::Ident { name, .. } => assert_eq!(name, "x"),
811+
ExprKind::Ident { name } => assert_eq!(name, x_id),
811812
_ => panic!("Expected Ident"),
812813
}
813814
match *body {
@@ -824,39 +825,45 @@ mod tests {
824825

825826
#[test]
826827
fn test_declaration() {
828+
let mut strings = StringTable::new();
827829
let types = TypeTable::new();
830+
let x_id = strings.intern("x");
828831
// int x = 5;
829-
let decl = Declaration::simple("x", types.int_id, Some(Expr::int(5, &types)));
832+
let decl = Declaration::simple(x_id, types.int_id, Some(Expr::int(5, &types)));
830833

831834
assert_eq!(decl.declarators.len(), 1);
832-
assert_eq!(decl.declarators[0].name, "x");
835+
assert_eq!(decl.declarators[0].name, x_id);
833836
assert_eq!(types.kind(decl.declarators[0].typ), TypeKind::Int);
834837
assert!(decl.declarators[0].init.is_some());
835838
}
836839

837840
#[test]
838841
fn test_translation_unit() {
842+
let mut strings = StringTable::new();
839843
let types = TypeTable::new();
844+
let x_id = strings.intern("x");
840845
let mut tu = TranslationUnit::new();
841846

842847
// Add a declaration
843-
let decl = Declaration::simple("x", types.int_id, None);
848+
let decl = Declaration::simple(x_id, types.int_id, None);
844849
tu.add(ExternalDecl::Declaration(decl));
845850

846851
assert_eq!(tu.items.len(), 1);
847852
}
848853

849854
#[test]
850855
fn test_for_loop() {
856+
let mut strings = StringTable::new();
851857
let types = TypeTable::new();
858+
let i_id = strings.intern("i");
852859
// for (int i = 0; i < 10; i++) {}
853860
let init = ForInit::Declaration(Declaration::simple(
854-
"i",
861+
i_id,
855862
types.int_id,
856863
Some(Expr::int(0, &types)),
857864
));
858-
let cond = Expr::binary(BinaryOp::Lt, Expr::var("i"), Expr::int(10, &types), &types);
859-
let post = Expr::new_unpositioned(ExprKind::PostInc(Box::new(Expr::var("i"))));
865+
let cond = Expr::binary(BinaryOp::Lt, Expr::var(i_id), Expr::int(10, &types), &types);
866+
let post = Expr::new_unpositioned(ExprKind::PostInc(Box::new(Expr::var(i_id))));
860867

861868
let stmt = Stmt::For {
862869
init: Some(init),

0 commit comments

Comments
 (0)