diff --git a/cc/README.md b/cc/README.md index 3b3277c6..91086642 100644 --- a/cc/README.md +++ b/cc/README.md @@ -52,20 +52,20 @@ cargo test --release -p posixutils-cc The compiler supports C input via `-` for stdin, and can output intermediate representations: -- `--dump-asm` - Output assembly to stdout (instead of invoking assembler/linker) +- `-S -o -` - Output assembly to stdout (standard clang/gcc option) - `--dump-ir` - Output IR before code generation Examples: ```bash # Compile from stdin, view generated assembly -echo 'int main() { return 42; }' | ./target/release/pcc - --dump-asm +echo 'int main() { return 42; }' | ./target/release/pcc - -S -o - # View IR for a source file ./target/release/pcc myfile.c --dump-ir # Using heredoc for multi-line test cases -./target/release/pcc - --dump-asm <<'EOF' +./target/release/pcc - -S -o - <<'EOF' int add(int a, int b) { return a + b; } @@ -94,14 +94,12 @@ Supported: - Bitfields (named, unnamed, zero-width for alignment) Not yet implemented: -- goto, longjmp, setjmp +- longjmp, setjmp - `inline` and inlining support - multi-register returns (for structs larger than 8 bytes) - -fverbose-asm - Complex initializers - VLAs (variable-length arrays) -- _Complex and _Atomic types -- Thread-local storage, alignas, etc. - top builtins to implement: __builtin_expect __builtin_clz / clzl / clzll @@ -109,9 +107,12 @@ Not yet implemented: __sync_synchronize __sync_fetch_and_add (and maybe a couple of its siblings) __builtin_unreachable (helps optimizations + silences some warnings) -- string interning - DCE and other opt passes - assembly peephole optimizations +- _Complex +- C11 Alignment Specifiers (_Alignas, _Alignof) +- C11 Thread-Local Storage (_Thread_local) and atomics (_Atomic) +- Other C11 features: _Static_assert, _Generic, _Noreturn, anonymous structs ## Known Issues diff --git a/cc/TODO.md b/cc/TODO.md index c6ae64ce..a3908d01 100644 --- a/cc/TODO.md +++ b/cc/TODO.md @@ -157,3 +157,344 @@ ARM64: - [C/C++11 mappings to processors](https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html) - [LOCK prefix - x86 reference](https://www.felixcloutier.com/x86/lock) - [AArch64 atomic access - Microsoft](https://devblogs.microsoft.com/oldnewthing/20220811-00/?p=106963) + +--- + +### C99 Complex Numbers (`_Complex`) + +#### Overview + +Add C99 complex number support. The `` header and library functions (`cabs`, `csqrt`, etc.) are provided by the system - the compiler only needs to handle the type and arithmetic. + +#### Types + +| Type | Size | Layout | +|------|------|--------| +| `float _Complex` | 8 bytes | `{float real, float imag}` | +| `double _Complex` | 16 bytes | `{double real, double imag}` | +| `long double _Complex` | 32 bytes | `{long double real, long double imag}` | + +#### Syntax + +```c +#include + +double _Complex z1 = 1.0 + 2.0*I; // _Complex keyword +float _Complex z2 = 3.0f - 4.0f*I; // float complex +``` + +**Constraints:** +- `_Complex` must combine with `float`, `double`, or `long double` +- Cannot combine with integer types (`int _Complex` is invalid) +- `complex` is a macro from `` expanding to `_Complex` + +#### Implementation Phases + +**Phase 1: Type System (`cc/types.rs`)** + +1. Add `COMPLEX` modifier to `TypeModifiers` +2. Add three complex type variants or track via modifier +3. Size: 2× the base float type; Alignment: same as base type + +**Phase 2: Parser (`cc/parse/parser.rs`)** + +1. Add `_Complex` keyword to lexer +2. Parse as type specifier (like `long`, `unsigned`) +3. Validate: only with `float`/`double`/`long double` + +**Phase 3: Linearizer - Arithmetic Expansion (`cc/linearize.rs`)** + +Complex arithmetic must be expanded to real operations: + +```c +// Addition: c = a + b +c.real = a.real + b.real; +c.imag = a.imag + b.imag; + +// Subtraction: c = a - b +c.real = a.real - b.real; +c.imag = a.imag - b.imag; + +// Multiplication: c = a * b +c.real = a.real*b.real - a.imag*b.imag; +c.imag = a.real*b.imag + a.imag*b.real; + +// Division: c = a / b (simplified, production needs overflow handling) +denom = b.real*b.real + b.imag*b.imag; +c.real = (a.real*b.real + a.imag*b.imag) / denom; +c.imag = (a.imag*b.real - a.real*b.imag) / denom; +``` + +**Phase 4: Code Generation - ABI** + +x86-64 SysV: +- `float _Complex`: returned in xmm0 (both parts packed) +- `double _Complex`: returned in xmm0 (real) + xmm1 (imag) +- Arguments: same pattern, use two SSE registers + +ARM64: +- Treated as struct of two floats/doubles +- Passed/returned in two FP registers (d0+d1 or s0+s1) + +**Phase 5: Member Access (Optional GCC Extensions)** + +```c +double _Complex z = 1.0 + 2.0*I; +double r = __real__ z; // 1.0 +double i = __imag__ z; // 2.0 +__real__ z = 3.0; // Can assign to parts +``` + +Alternative: users can call `creal(z)` and `cimag(z)` which are library functions. + +#### Implementation Order + +| Order | Component | Complexity | +|-------|-----------|------------| +| 1 | Type system (`COMPLEX` modifier) | Low | +| 2 | Parser (`_Complex` keyword) | Low | +| 3 | Basic load/store of complex values | Medium | +| 4 | Addition/subtraction expansion | Medium | +| 5 | Multiplication expansion | Medium | +| 6 | Division expansion | Medium-High | +| 7 | ABI for function calls | Medium | +| 8 | `__real__`/`__imag__` extensions | Low (optional) | + +#### What the Compiler Does NOT Need + +- Library functions (`cabs`, `csqrt`, `cexp`, etc.) - just normal calls +- The `I` macro - defined in `` as `_Complex_I` +- The `complex` macro - defined in `` as `_Complex` + +#### References + +- [C99 Complex arithmetic - cppreference](https://en.cppreference.com/w/c/numeric/complex) +- [complex.h - cppreference](https://en.cppreference.com/w/c/header/complex) + +--- + +### C11 Alignment Specifiers (`_Alignas`, `_Alignof`) + +#### Overview + +Add C11 alignment control to specify/query memory alignment of objects. + +#### Syntax + +```c +_Alignas(16) float sse_data[4]; // Align to 16-byte boundary +_Alignas(int) char c; // Align char like int +size_t align = _Alignof(double); // Query alignment (compile-time constant) +``` + +**Two forms:** +1. `_Alignas(constant-expression)` - explicit byte alignment (must be power of 2) +2. `_Alignas(type-name)` - align like another type + +#### Restrictions + +- Cannot apply to: bit-fields, `register` variables, function parameters, typedefs +- Cannot reduce alignment below natural alignment (only increase) +- When multiple `_Alignas` specifiers appear, the strictest (largest) wins +- `_Alignof` cannot be applied to function types or incomplete types + +#### Implementation Phases + +**Phase 1: Type System (`cc/types.rs`)** + +1. Add `alignment: Option` field to relevant type structures +2. Add helpers: `alignment_of(type)`, `set_explicit_alignment()` +3. Track whether alignment is natural vs explicitly specified + +**Phase 2: Parser (`cc/parse/parser.rs`)** + +1. Add `_Alignas` and `_Alignof` keywords to lexer +2. Parse `_Alignas(expr)` and `_Alignas(type-name)` as declaration specifiers +3. Parse `_Alignof(type-name)` as unary expression (returns `size_t`) +4. Semantic checks: power of 2, not less than natural alignment + +**Phase 3: IR Extensions (`cc/ir.rs`)** + +1. Add alignment field to `Alloca` instruction for local variables +2. Add alignment attribute to global variable definitions + +**Phase 4: Code Generation** + +For globals: +```asm + .balign 16 +symbol: + .zero 64 +``` + +For locals: +- Adjust stack pointer with additional padding +- Use aligned stack slots + +**Phase 5: Struct Layout** + +- When computing struct layout, member `_Alignas` affects padding +- Struct alignment = max of all member alignments + +#### Implementation Order + +| Order | Component | Complexity | +|-------|-----------|------------| +| 1 | `_Alignof` operator | Easy - compile-time type query | +| 2 | `_Alignas` for globals | Easy - `.balign` directives | +| 3 | `_Alignas` for struct members | Moderate - layout changes | +| 4 | `_Alignas` for locals | Moderate - stack frame adjustments | + +#### References + +- [_Alignas - cppreference.com](https://en.cppreference.com/w/c/language/_Alignas.html) +- [Alignment (C11) - Microsoft Learn](https://learn.microsoft.com/en-us/cpp/c-language/alignment-c) + +--- + +### C11 Thread-Local Storage (`_Thread_local`) + +#### Overview + +Add thread-local storage class specifier for per-thread variable instances. + +#### Syntax + +```c +_Thread_local int errno; // Each thread gets its own copy +static _Thread_local int counter; // Thread-local + file scope +extern _Thread_local int shared_tls; // Declaration of TLS from another TU +``` + +**Constraints:** +- Can combine with `static` or `extern`, but not `auto` or `register` +- Cannot be used on function parameters or local block-scope variables (unless `static`) +- GCC extension `__thread` is equivalent + +#### TLS Memory Models + +| Model | Use Case | Mechanism | +|-------|----------|-----------| +| Local-Exec | Non-preemptible in executables | TP offset is link-time constant | +| Initial-Exec | Preemptible at program start | GOT entry holds fixed TP offset | +| Local-Dynamic | Non-preemptible in shared libs | Module ID lookup, local offsets | +| General-Dynamic | Preemptible in shared libs | Full runtime lookup via `__tls_get_addr` | + +For a simple compiler targeting executables, **Local-Exec** is sufficient and simplest. + +#### Implementation Phases + +**Phase 1: Lexer/Parser** + +1. Add `_Thread_local` keyword (and `__thread` as extension) +2. Parse as storage class specifier +3. Validate: not with `auto`/`register`, not on block-scope non-static + +**Phase 2: Symbol Table (`cc/symbol.rs`)** + +1. Add `is_thread_local: bool` to `Symbol` +2. Track TLS storage class during declaration + +**Phase 3: IR (`cc/ir.rs`)** + +1. Mark global variables as TLS in IR representation +2. Add `is_tls` flag to global definitions + +**Phase 4: Code Generation - x86-64** + +Local-Exec model (simplest, for executables): +```asm +# Read TLS variable +movl %fs:symbol@TPOFF, %eax + +# Write TLS variable +movl $42, %fs:symbol@TPOFF + +# Get address of TLS variable +movq %fs:0, %rax +leaq symbol@TPOFF(%rax), %rax +``` + +Relocations needed: +- `R_X86_64_TPOFF32` - 32-bit signed TP offset + +Section placement: +- Initialized: `.tdata` section +- Zero-initialized: `.tbss` section + +**Phase 5: Code Generation - ARM64** + +Local-Exec model: +```asm +# Read TLS variable (default -mtls-size=24) +mrs x0, tpidr_el0 +add x0, x0, #:tprel_hi12:symbol +add x0, x0, #:tprel_lo12_nc:symbol +ldr w0, [x0] +``` + +Relocations needed: +- `R_AARCH64_TLSLE_ADD_TPREL_HI12` +- `R_AARCH64_TLSLE_ADD_TPREL_LO12_NC` + +#### Implementation Order + +| Order | Component | Complexity | +|-------|-----------|------------| +| 1 | Parser + symbol tracking | Low | +| 2 | IR representation | Low | +| 3 | x86-64 Local-Exec codegen | Medium | +| 4 | ARM64 Local-Exec codegen | Medium | +| 5 | Initial-Exec model (optional) | High | +| 6 | General-Dynamic (for DSOs) | High | + +#### Complexity Assessment + +TLS is **significantly more complex** than alignment: +- Requires special relocations the assembler/linker must handle +- Platform-specific thread pointer access (`%fs` on x86-64, `tpidr_el0` on ARM64) +- Multiple models depending on PIC/PIE/shared library context +- Runtime support for dynamic models (`__tls_get_addr`) + +**Recommendation:** Start with Local-Exec model only, which works for simple executables and avoids runtime dependencies. + +#### References + +- [Thread-local storage - Wikipedia](https://en.wikipedia.org/wiki/Thread-local_storage) +- [All about thread-local storage - MaskRay](https://maskray.me/blog/2021-02-14-all-about-thread-local-storage) +- [GCC Thread-Local Documentation](https://gcc.gnu.org/onlinedocs/gcc/Thread-Local.html) +- [thread_local - cppreference.com](https://en.cppreference.com/w/c/thread/thread_local) + +--- + +### Other C11 Features + +| Feature | Description | Complexity | +|---------|-------------|------------| +| `_Static_assert(expr, msg)` | Compile-time assertion | Easy | +| `_Generic(expr, type: val, ...)` | Type-generic selection | Moderate | +| `_Noreturn` | Function attribute (never returns) | Easy | +| Anonymous structs/unions | `struct { struct { int x; }; };` | Moderate | +| `` | `alignas`/`alignof` macros | Preprocessor only | + +#### `_Static_assert` Implementation + +```c +_Static_assert(sizeof(int) == 4, "int must be 32 bits"); +``` + +1. Parser: recognize keyword, parse `(constant-expr, string-literal)` +2. Semantic: evaluate expression at compile time +3. If false: emit error with the string message +4. No codegen needed - purely compile-time + +#### `_Noreturn` Implementation + +```c +_Noreturn void exit(int status); +``` + +1. Parser: recognize as function specifier +2. Type system: mark function type as noreturn +3. Semantic: warn if function can return +4. Codegen: can omit function epilogue, enable optimizations diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs index 9210a62c..a9a8354f 100644 --- a/cc/arch/aarch64/codegen.rs +++ b/cc/arch/aarch64/codegen.rs @@ -1518,20 +1518,12 @@ impl Aarch64CodeGen { frame_info: &(i32, Vec), types: &TypeTable, ) { - // Emit block label using LIR (include function name for uniqueness) - if let Some(label) = &block.label { - // LIR: named block label (using Raw since format differs from standard) - self.push_lir(Aarch64Inst::Directive(Directive::Raw(format!( - ".L_{}_{}:", - self.current_fn, label - )))); - } else { - // LIR: numbered block label - self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(Label::new( - &self.current_fn, - block.id.0, - )))); - } + // Always emit block ID label for consistency with jumps + // (jumps reference blocks by ID, not by C label name) + self.push_lir(Aarch64Inst::Directive(Directive::BlockLabel(Label::new( + &self.current_fn, + block.id.0, + )))); // Emit instructions for insn in &block.insns { diff --git a/cc/arch/lir.rs b/cc/arch/lir.rs index b1d8e6a6..8d0acf8d 100644 --- a/cc/arch/lir.rs +++ b/cc/arch/lir.rs @@ -340,9 +340,6 @@ pub enum Directive { /// Blank line for readability Blank, - - /// Raw assembly line (escape hatch for special cases) - Raw(String), } impl Directive { @@ -548,9 +545,6 @@ impl EmitAsm for Directive { Directive::Blank => { let _ = writeln!(out); } - Directive::Raw(text) => { - let _ = writeln!(out, "{}", text); - } } } } diff --git a/cc/arch/mod.rs b/cc/arch/mod.rs index 09bafb26..58992add 100644 --- a/cc/arch/mod.rs +++ b/cc/arch/mod.rs @@ -21,40 +21,30 @@ use crate::target::{Arch, Target}; /// Get architecture-specific predefined macros as (name, value) pairs pub fn get_arch_macros(target: &Target) -> Vec<(&'static str, Option<&'static str>)> { - let mut macros = Vec::new(); + // long is 64-bit on LP64 (Unix), 32-bit on LLP64 (Windows) + let long_size = if target.long_width == 64 { "8" } else { "4" }; - // Common architecture macros based on type sizes - macros.push(("__CHAR_BIT__", Some("8"))); + // Signedness: __CHAR_UNSIGNED__ is defined only if char is unsigned + let char_unsigned: Option<&'static str> = if target.char_signed { None } else { Some("1") }; - // Pointer size - if target.pointer_width == 64 { - macros.push(("__LP64__", Some("1"))); - macros.push(("_LP64", Some("1"))); - macros.push(("__SIZEOF_POINTER__", Some("8"))); - } else { - macros.push(("__ILP32__", Some("1"))); - macros.push(("__SIZEOF_POINTER__", Some("4"))); - } - - // Type sizes - macros.push(("__SIZEOF_SHORT__", Some("2"))); - macros.push(("__SIZEOF_INT__", Some("4"))); + let mut macros = vec![ + // Common architecture macros based on type sizes + ("__CHAR_BIT__", Some("8")), + ("__SIZEOF_POINTER__", Some("8")), + // Type sizes + ("__SIZEOF_SHORT__", Some("2")), + ("__SIZEOF_INT__", Some("4")), + ("__SIZEOF_LONG__", Some(long_size)), + ("__SIZEOF_LONG_LONG__", Some("8")), + ("__SIZEOF_FLOAT__", Some("4")), + ("__SIZEOF_DOUBLE__", Some("8")), + ("__CHAR_UNSIGNED__", char_unsigned), + ]; + // LP64 macros only on LP64 targets (Unix), not on LLP64 (Windows) if target.long_width == 64 { - macros.push(("__SIZEOF_LONG__", Some("8"))); - } else { - macros.push(("__SIZEOF_LONG__", Some("4"))); - } - - macros.push(("__SIZEOF_LONG_LONG__", Some("8"))); - macros.push(("__SIZEOF_FLOAT__", Some("4"))); - macros.push(("__SIZEOF_DOUBLE__", Some("8"))); - - // Signedness - if target.char_signed { - macros.push(("__CHAR_UNSIGNED__", None)); // Not defined - } else { - macros.push(("__CHAR_UNSIGNED__", Some("1"))); + macros.push(("__LP64__", Some("1"))); + macros.push(("_LP64", Some("1"))); } // Architecture-specific @@ -72,52 +62,295 @@ pub fn get_arch_macros(target: &Target) -> Vec<(&'static str, Option<&'static st /// Get type limit macros (for compatibility) pub fn get_limit_macros(target: &Target) -> Vec<(&'static str, &'static str)> { - let mut macros = Vec::new(); + // long is 64-bit on LP64 (Unix), 32-bit on LLP64 (Windows) + let (long_max, long_width) = if target.long_width == 64 { + ("9223372036854775807L", "64") + } else { + ("2147483647L", "32") + }; - // Character limits - macros.push(("__SCHAR_MAX__", "127")); - macros.push(("__SHRT_MAX__", "32767")); - macros.push(("__INT_MAX__", "2147483647")); + vec![ + // Character limits + ("__SCHAR_MAX__", "127"), + ("__SHRT_MAX__", "32767"), + ("__INT_MAX__", "2147483647"), + ("__LONG_MAX__", long_max), + ("__LONG_LONG_MAX__", "9223372036854775807LL"), + // Width macros + ("__SCHAR_WIDTH__", "8"), + ("__SHRT_WIDTH__", "16"), + ("__INT_WIDTH__", "32"), + ("__LONG_WIDTH__", long_width), + ("__LLONG_WIDTH__", "64"), + // Size type (always 64-bit on our targets) + ("__SIZE_MAX__", "18446744073709551615UL"), + ("__SIZE_WIDTH__", "64"), + ("__PTRDIFF_MAX__", "9223372036854775807L"), + ("__PTRDIFF_WIDTH__", "64"), + ("__INTPTR_MAX__", "9223372036854775807L"), + ("__INTPTR_WIDTH__", "64"), + ("__UINTPTR_MAX__", "18446744073709551615UL"), + ] +} - if target.long_width == 64 { - macros.push(("__LONG_MAX__", "9223372036854775807L")); - } else { - macros.push(("__LONG_MAX__", "2147483647L")); - } +/// Get type definition macros (for and compatibility) +/// These define the underlying C type for various abstract types +pub fn get_type_macros(_target: &Target) -> Vec<(&'static str, &'static str)> { + vec![ + // Fixed-width integer types + ("__INT8_TYPE__", "signed char"), + ("__INT16_TYPE__", "short"), + ("__INT32_TYPE__", "int"), + ("__INT64_TYPE__", "long long int"), + ("__UINT8_TYPE__", "unsigned char"), + ("__UINT16_TYPE__", "unsigned short"), + ("__UINT32_TYPE__", "unsigned int"), + ("__UINT64_TYPE__", "long long unsigned int"), + // Least-width integer types (same as fixed-width for common targets) + ("__INT_LEAST8_TYPE__", "signed char"), + ("__INT_LEAST16_TYPE__", "short"), + ("__INT_LEAST32_TYPE__", "int"), + ("__INT_LEAST64_TYPE__", "long long int"), + ("__UINT_LEAST8_TYPE__", "unsigned char"), + ("__UINT_LEAST16_TYPE__", "unsigned short"), + ("__UINT_LEAST32_TYPE__", "unsigned int"), + ("__UINT_LEAST64_TYPE__", "long long unsigned int"), + // Fast integer types (same as fixed-width for common targets) + ("__INT_FAST8_TYPE__", "signed char"), + ("__INT_FAST16_TYPE__", "short"), + ("__INT_FAST32_TYPE__", "int"), + ("__INT_FAST64_TYPE__", "long long int"), + ("__UINT_FAST8_TYPE__", "unsigned char"), + ("__UINT_FAST16_TYPE__", "unsigned short"), + ("__UINT_FAST32_TYPE__", "unsigned int"), + ("__UINT_FAST64_TYPE__", "long long unsigned int"), + // Pointer-width types (always 64-bit on our targets) + ("__SIZE_TYPE__", "long unsigned int"), + ("__PTRDIFF_TYPE__", "long int"), + ("__INTPTR_TYPE__", "long int"), + ("__UINTPTR_TYPE__", "long unsigned int"), + ("__INTMAX_TYPE__", "long int"), + ("__UINTMAX_TYPE__", "long unsigned int"), + // Character types + ("__WCHAR_TYPE__", "int"), + ("__WINT_TYPE__", "int"), + ("__CHAR16_TYPE__", "unsigned short"), + ("__CHAR32_TYPE__", "unsigned int"), + // sig_atomic_t + ("__SIG_ATOMIC_TYPE__", "int"), + ] +} - macros.push(("__LONG_LONG_MAX__", "9223372036854775807LL")); +/// Get fixed-width integer limit macros (for compatibility) +pub fn get_stdint_limit_macros(_target: &Target) -> Vec<(&'static str, &'static str)> { + vec![ + // Signed fixed-width limits + ("__INT8_MAX__", "127"), + ("__INT16_MAX__", "32767"), + ("__INT32_MAX__", "2147483647"), + ("__INT64_MAX__", "9223372036854775807LL"), + // Unsigned fixed-width limits + ("__UINT8_MAX__", "255"), + ("__UINT16_MAX__", "65535"), + ("__UINT32_MAX__", "4294967295U"), + ("__UINT64_MAX__", "18446744073709551615ULL"), + // Least-width limits (same as fixed-width) + ("__INT_LEAST8_MAX__", "127"), + ("__INT_LEAST16_MAX__", "32767"), + ("__INT_LEAST32_MAX__", "2147483647"), + ("__INT_LEAST64_MAX__", "9223372036854775807LL"), + ("__UINT_LEAST8_MAX__", "255"), + ("__UINT_LEAST16_MAX__", "65535"), + ("__UINT_LEAST32_MAX__", "4294967295U"), + ("__UINT_LEAST64_MAX__", "18446744073709551615ULL"), + // Fast limits (same as fixed-width) + ("__INT_FAST8_MAX__", "127"), + ("__INT_FAST16_MAX__", "32767"), + ("__INT_FAST32_MAX__", "2147483647"), + ("__INT_FAST64_MAX__", "9223372036854775807LL"), + ("__UINT_FAST8_MAX__", "255"), + ("__UINT_FAST16_MAX__", "65535"), + ("__UINT_FAST32_MAX__", "4294967295U"), + ("__UINT_FAST64_MAX__", "18446744073709551615ULL"), + // Width macros for fixed-width types + ("__INT8_WIDTH__", "8"), + ("__INT16_WIDTH__", "16"), + ("__INT32_WIDTH__", "32"), + ("__INT64_WIDTH__", "64"), + ("__INT_LEAST8_WIDTH__", "8"), + ("__INT_LEAST16_WIDTH__", "16"), + ("__INT_LEAST32_WIDTH__", "32"), + ("__INT_LEAST64_WIDTH__", "64"), + ("__INT_FAST8_WIDTH__", "8"), + ("__INT_FAST16_WIDTH__", "16"), + ("__INT_FAST32_WIDTH__", "32"), + ("__INT_FAST64_WIDTH__", "64"), + // intmax_t limits (64-bit on all our targets) + ("__INTMAX_MAX__", "9223372036854775807L"), + ("__UINTMAX_MAX__", "18446744073709551615UL"), + ("__INTMAX_WIDTH__", "64"), + ("__UINTMAX_WIDTH__", "64"), + // wchar_t and wint_t limits + ("__WCHAR_MAX__", "2147483647"), + ("__WCHAR_WIDTH__", "32"), + ("__WINT_MAX__", "2147483647"), + ("__WINT_WIDTH__", "32"), + // sig_atomic_t limits + ("__SIG_ATOMIC_MAX__", "2147483647"), + ("__SIG_ATOMIC_WIDTH__", "32"), + ] +} - // Width macros - macros.push(("__SCHAR_WIDTH__", "8")); - macros.push(("__SHRT_WIDTH__", "16")); - macros.push(("__INT_WIDTH__", "32")); +/// Get integer constant suffix macros (for compatibility) +pub fn get_suffix_macros(_target: &Target) -> Vec<(&'static str, &'static str)> { + vec![ + // Fixed-width suffixes + ("__INT8_C_SUFFIX__", ""), + ("__INT16_C_SUFFIX__", ""), + ("__INT32_C_SUFFIX__", ""), + ("__INT64_C_SUFFIX__", "LL"), + ("__UINT8_C_SUFFIX__", ""), + ("__UINT16_C_SUFFIX__", ""), + ("__UINT32_C_SUFFIX__", "U"), + ("__UINT64_C_SUFFIX__", "ULL"), + // intmax_t suffixes (64-bit uses L suffix on all our targets) + ("__INTMAX_C_SUFFIX__", "L"), + ("__UINTMAX_C_SUFFIX__", "UL"), + ] +} - if target.long_width == 64 { - macros.push(("__LONG_WIDTH__", "64")); - } else { - macros.push(("__LONG_WIDTH__", "32")); - } +/// Get format specifier macros (for compatibility) +pub fn get_format_macros(_target: &Target) -> Vec<(&'static str, &'static str)> { + vec![ + // Signed format specifiers + ("__INT8_FMTd__", "\"hhd\""), + ("__INT8_FMTi__", "\"hhi\""), + ("__INT16_FMTd__", "\"hd\""), + ("__INT16_FMTi__", "\"hi\""), + ("__INT32_FMTd__", "\"d\""), + ("__INT32_FMTi__", "\"i\""), + ("__INT64_FMTd__", "\"lld\""), + ("__INT64_FMTi__", "\"lli\""), + // Unsigned format specifiers + ("__UINT8_FMTo__", "\"hho\""), + ("__UINT8_FMTu__", "\"hhu\""), + ("__UINT8_FMTx__", "\"hhx\""), + ("__UINT8_FMTX__", "\"hhX\""), + ("__UINT16_FMTo__", "\"ho\""), + ("__UINT16_FMTu__", "\"hu\""), + ("__UINT16_FMTx__", "\"hx\""), + ("__UINT16_FMTX__", "\"hX\""), + ("__UINT32_FMTo__", "\"o\""), + ("__UINT32_FMTu__", "\"u\""), + ("__UINT32_FMTx__", "\"x\""), + ("__UINT32_FMTX__", "\"X\""), + ("__UINT64_FMTo__", "\"llo\""), + ("__UINT64_FMTu__", "\"llu\""), + ("__UINT64_FMTx__", "\"llx\""), + ("__UINT64_FMTX__", "\"llX\""), + // Least-width format specifiers (same as fixed-width) + ("__INT_LEAST8_FMTd__", "\"hhd\""), + ("__INT_LEAST8_FMTi__", "\"hhi\""), + ("__INT_LEAST16_FMTd__", "\"hd\""), + ("__INT_LEAST16_FMTi__", "\"hi\""), + ("__INT_LEAST32_FMTd__", "\"d\""), + ("__INT_LEAST32_FMTi__", "\"i\""), + ("__INT_LEAST64_FMTd__", "\"lld\""), + ("__INT_LEAST64_FMTi__", "\"lli\""), + ("__UINT_LEAST8_FMTo__", "\"hho\""), + ("__UINT_LEAST8_FMTu__", "\"hhu\""), + ("__UINT_LEAST8_FMTx__", "\"hhx\""), + ("__UINT_LEAST8_FMTX__", "\"hhX\""), + ("__UINT_LEAST16_FMTo__", "\"ho\""), + ("__UINT_LEAST16_FMTu__", "\"hu\""), + ("__UINT_LEAST16_FMTx__", "\"hx\""), + ("__UINT_LEAST16_FMTX__", "\"hX\""), + ("__UINT_LEAST32_FMTo__", "\"o\""), + ("__UINT_LEAST32_FMTu__", "\"u\""), + ("__UINT_LEAST32_FMTx__", "\"x\""), + ("__UINT_LEAST32_FMTX__", "\"X\""), + ("__UINT_LEAST64_FMTo__", "\"llo\""), + ("__UINT_LEAST64_FMTu__", "\"llu\""), + ("__UINT_LEAST64_FMTx__", "\"llx\""), + ("__UINT_LEAST64_FMTX__", "\"llX\""), + // Fast format specifiers (same as fixed-width) + ("__INT_FAST8_FMTd__", "\"hhd\""), + ("__INT_FAST8_FMTi__", "\"hhi\""), + ("__INT_FAST16_FMTd__", "\"hd\""), + ("__INT_FAST16_FMTi__", "\"hi\""), + ("__INT_FAST32_FMTd__", "\"d\""), + ("__INT_FAST32_FMTi__", "\"i\""), + ("__INT_FAST64_FMTd__", "\"lld\""), + ("__INT_FAST64_FMTi__", "\"lli\""), + ("__UINT_FAST8_FMTo__", "\"hho\""), + ("__UINT_FAST8_FMTu__", "\"hhu\""), + ("__UINT_FAST8_FMTx__", "\"hhx\""), + ("__UINT_FAST8_FMTX__", "\"hhX\""), + ("__UINT_FAST16_FMTo__", "\"ho\""), + ("__UINT_FAST16_FMTu__", "\"hu\""), + ("__UINT_FAST16_FMTx__", "\"hx\""), + ("__UINT_FAST16_FMTX__", "\"hX\""), + ("__UINT_FAST32_FMTo__", "\"o\""), + ("__UINT_FAST32_FMTu__", "\"u\""), + ("__UINT_FAST32_FMTx__", "\"x\""), + ("__UINT_FAST32_FMTX__", "\"X\""), + ("__UINT_FAST64_FMTo__", "\"llo\""), + ("__UINT_FAST64_FMTu__", "\"llu\""), + ("__UINT_FAST64_FMTx__", "\"llx\""), + ("__UINT_FAST64_FMTX__", "\"llX\""), + // intmax_t format specifiers (64-bit uses l format on all our targets) + ("__INTMAX_FMTd__", "\"ld\""), + ("__INTMAX_FMTi__", "\"li\""), + ("__UINTMAX_FMTo__", "\"lo\""), + ("__UINTMAX_FMTu__", "\"lu\""), + ("__UINTMAX_FMTx__", "\"lx\""), + ("__UINTMAX_FMTX__", "\"lX\""), + // intptr_t format specifiers (64-bit uses l format) + ("__INTPTR_FMTd__", "\"ld\""), + ("__INTPTR_FMTi__", "\"li\""), + ("__UINTPTR_FMTo__", "\"lo\""), + ("__UINTPTR_FMTu__", "\"lu\""), + ("__UINTPTR_FMTx__", "\"lx\""), + ("__UINTPTR_FMTX__", "\"lX\""), + // ptrdiff_t format specifiers + ("__PTRDIFF_FMTd__", "\"ld\""), + ("__PTRDIFF_FMTi__", "\"li\""), + // size_t format specifiers + ("__SIZE_FMTo__", "\"lo\""), + ("__SIZE_FMTu__", "\"lu\""), + ("__SIZE_FMTx__", "\"lx\""), + ("__SIZE_FMTX__", "\"lX\""), + ] +} - macros.push(("__LLONG_WIDTH__", "64")); - - // Size type - if target.pointer_width == 64 { - macros.push(("__SIZE_MAX__", "18446744073709551615UL")); - macros.push(("__SIZE_WIDTH__", "64")); - macros.push(("__PTRDIFF_MAX__", "9223372036854775807L")); - macros.push(("__PTRDIFF_WIDTH__", "64")); - macros.push(("__INTPTR_MAX__", "9223372036854775807L")); - macros.push(("__INTPTR_WIDTH__", "64")); - macros.push(("__UINTPTR_MAX__", "18446744073709551615UL")); - } else { - macros.push(("__SIZE_MAX__", "4294967295U")); - macros.push(("__SIZE_WIDTH__", "32")); - macros.push(("__PTRDIFF_MAX__", "2147483647")); - macros.push(("__PTRDIFF_WIDTH__", "32")); - macros.push(("__INTPTR_MAX__", "2147483647")); - macros.push(("__INTPTR_WIDTH__", "32")); - macros.push(("__UINTPTR_MAX__", "4294967295U")); - } +/// Get additional sizeof macros +pub fn get_additional_sizeof_macros(_target: &Target) -> Vec<(&'static str, &'static str)> { + vec![ + // size_t and ptrdiff_t sizes (always 8 bytes on 64-bit) + ("__SIZEOF_SIZE_T__", "8"), + ("__SIZEOF_PTRDIFF_T__", "8"), + // wchar_t and wint_t sizes + ("__SIZEOF_WCHAR_T__", "4"), + ("__SIZEOF_WINT_T__", "4"), + ] +} - macros +/// Get miscellaneous macros +pub fn get_misc_macros(_target: &Target) -> Vec<(&'static str, &'static str)> { + vec![ + // Pointer width in bits (always 64 on our targets) + ("__POINTER_WIDTH__", "64"), + // Alignment + ("__BIGGEST_ALIGNMENT__", "16"), + ("__BOOL_WIDTH__", "8"), + // Byte order (all our supported architectures are little-endian) + ("__ORDER_LITTLE_ENDIAN__", "1234"), + ("__ORDER_BIG_ENDIAN__", "4321"), + ("__ORDER_PDP_ENDIAN__", "3412"), + ("__BYTE_ORDER__", "__ORDER_LITTLE_ENDIAN__"), + ("__LITTLE_ENDIAN__", "1"), + // Floating point + ("__FLT_RADIX__", "2"), + ("__FINITE_MATH_ONLY__", "0"), + ] } diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs index 844c7408..e5a35bcc 100644 --- a/cc/arch/x86_64/codegen.rs +++ b/cc/arch/x86_64/codegen.rs @@ -1251,20 +1251,12 @@ impl X86_64CodeGen { } fn emit_block(&mut self, block: &crate::ir::BasicBlock, types: &TypeTable) { - // Emit block label (include function name for uniqueness) - if let Some(label) = &block.label { - // LIR: named block label (using Raw since format differs from standard) - self.push_lir(X86Inst::Directive(Directive::Raw(format!( - ".L_{}_{}:", - self.current_fn, label - )))); - } else { - // LIR: numbered block label - self.push_lir(X86Inst::Directive(Directive::BlockLabel(Label::new( - &self.current_fn, - block.id.0, - )))); - } + // Always emit block ID label for consistency with jumps + // (jumps reference blocks by ID, not by C label name) + self.push_lir(X86Inst::Directive(Directive::BlockLabel(Label::new( + &self.current_fn, + block.id.0, + )))); // Emit instructions for insn in &block.insns { diff --git a/cc/linearize.rs b/cc/linearize.rs index 61fece3a..3c06337b 100644 --- a/cc/linearize.rs +++ b/cc/linearize.rs @@ -784,7 +784,14 @@ impl<'a> Linearizer<'a> { Stmt::Goto(label) => { let label_str = self.str(*label).to_string(); let target = self.get_or_create_label(&label_str); - self.emit(Instruction::br(target)); + if let Some(current) = self.current_bb { + self.emit(Instruction::br(target)); + self.link_bb(current, target); + } + + // Set current_bb to None - any subsequent code until a label is dead + // emit() will safely skip when current_bb is None + self.current_bb = None; } Stmt::Label { name, stmt } => { diff --git a/cc/main.rs b/cc/main.rs index ec96c11d..2c6f4203 100644 --- a/cc/main.rs +++ b/cc/main.rs @@ -53,11 +53,7 @@ struct Args { print_targets: bool, /// Dump tokens (for debugging tokenizer) - #[arg( - short = 'T', - long = "dump-tokens", - help = gettext("Dump tokens to stdout") - )] + #[arg(long = "dump-tokens", help = gettext("Dump tokens to stdout"))] dump_tokens: bool, /// Run preprocessor and dump result @@ -65,11 +61,7 @@ struct Args { preprocess_only: bool, /// Dump AST (for debugging parser) - #[arg( - short = 'A', - long = "dump-ast", - help = gettext("Parse and dump AST to stdout") - )] + #[arg(long = "dump-ast", help = gettext("Parse and dump AST to stdout"))] dump_ast: bool, /// Dump IR (for debugging linearizer) @@ -108,10 +100,6 @@ struct Args { #[arg(short = 'o', value_name = "file", help = gettext("Place output in file"))] output: Option, - /// Dump generated assembly (for debugging codegen) - #[arg(long = "dump-asm", help = gettext("Dump generated assembly to stdout"))] - dump_asm: bool, - /// Generate debug information (DWARF) #[arg(short = 'g', help = gettext("Generate debug information"))] debug: bool, @@ -247,11 +235,6 @@ fn process_file( arch::codegen::create_codegen_with_options(target.clone(), emit_unwind_tables); let asm = codegen.generate(&module, &types); - if args.dump_asm { - print!("{}", asm); - return Ok(()); - } - // Determine output file names // For stdin ("-"), use "stdin" as the default stem let stem = if path == "-" { @@ -268,10 +251,15 @@ fn process_file( if args.asm_only { // Output assembly let asm_file = args.output.clone().unwrap_or_else(|| format!("{}.s", stem)); - let mut file = File::create(&asm_file)?; - file.write_all(asm.as_bytes())?; - if args.verbose { - eprintln!("Wrote assembly to {}", asm_file); + if asm_file == "-" { + // Write to stdout + print!("{}", asm); + } else { + let mut file = File::create(&asm_file)?; + file.write_all(asm.as_bytes())?; + if args.verbose { + eprintln!("Wrote assembly to {}", asm_file); + } } return Ok(()); } diff --git a/cc/tests/features/debug.rs b/cc/tests/features/debug.rs index 77fe92a6..1fc11d93 100644 --- a/cc/tests/features/debug.rs +++ b/cc/tests/features/debug.rs @@ -101,11 +101,13 @@ int main() { ); let c_path = c_file.path().to_path_buf(); - // Compile with --dump-asm to see generated assembly + // Compile with -S -o - to output assembly to stdout let output = run_test_base( "pcc", &vec![ - "--dump-asm".to_string(), + "-S".to_string(), + "-o".to_string(), + "-".to_string(), c_path.to_string_lossy().to_string(), ], &[], @@ -113,7 +115,7 @@ int main() { assert!( output.status.success(), - "pcc --dump-asm failed: {}", + "pcc -S -o - failed: {}", String::from_utf8_lossy(&output.stderr) ); @@ -148,11 +150,13 @@ int main() { ); let c_path = c_file.path().to_path_buf(); - // Compile with --dump-asm --fno-unwind-tables + // Compile with -S -o - --fno-unwind-tables let output = run_test_base( "pcc", &vec![ - "--dump-asm".to_string(), + "-S".to_string(), + "-o".to_string(), + "-".to_string(), "--fno-unwind-tables".to_string(), c_path.to_string_lossy().to_string(), ], @@ -161,7 +165,7 @@ int main() { assert!( output.status.success(), - "pcc --dump-asm --fno-unwind-tables failed: {}", + "pcc -S -o - --fno-unwind-tables failed: {}", String::from_utf8_lossy(&output.stderr) ); @@ -191,12 +195,14 @@ int main() { ); let c_path = c_file.path().to_path_buf(); - // Compile with -g --dump-asm to see generated assembly + // Compile with -g -S -o - to see generated assembly let output = run_test_base( "pcc", &vec![ "-g".to_string(), - "--dump-asm".to_string(), + "-S".to_string(), + "-o".to_string(), + "-".to_string(), c_path.to_string_lossy().to_string(), ], &[], @@ -204,7 +210,7 @@ int main() { assert!( output.status.success(), - "pcc -g --dump-asm failed: {}", + "pcc -g -S -o - failed: {}", String::from_utf8_lossy(&output.stderr) ); @@ -242,11 +248,13 @@ int main() { ); let c_path = c_file.path().to_path_buf(); - // Compile without -g + // Compile without -g (using -S -o - for stdout) let output = run_test_base( "pcc", &vec![ - "--dump-asm".to_string(), + "-S".to_string(), + "-o".to_string(), + "-".to_string(), c_path.to_string_lossy().to_string(), ], &[], @@ -254,7 +262,7 @@ int main() { assert!( output.status.success(), - "pcc --dump-asm failed: {}", + "pcc -S -o - failed: {}", String::from_utf8_lossy(&output.stderr) ); diff --git a/cc/tests/features/has_feature.rs b/cc/tests/features/has_feature.rs new file mode 100644 index 00000000..a930123a --- /dev/null +++ b/cc/tests/features/has_feature.rs @@ -0,0 +1,169 @@ +// +// Copyright (c) 2024 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// Tests for __has_builtin, __has_attribute, __has_feature, __has_extension +// + +use crate::common::compile_and_run; + +/// Test __has_builtin for supported builtins +#[test] +fn has_builtin_supported() { + let code = r#" +int main(void) { + // Test supported builtins return 1 + #if !__has_builtin(__builtin_constant_p) + return 1; + #endif + + #if !__has_builtin(__builtin_types_compatible_p) + return 2; + #endif + + #if !__has_builtin(__builtin_bswap16) + return 3; + #endif + + #if !__has_builtin(__builtin_bswap32) + return 4; + #endif + + #if !__has_builtin(__builtin_bswap64) + return 5; + #endif + + #if !__has_builtin(__builtin_alloca) + return 6; + #endif + + return 0; +} +"#; + assert_eq!(compile_and_run("has_builtin_supported", code), 0); +} + +/// Test __has_builtin for unsupported builtins +#[test] +fn has_builtin_unsupported() { + let code = r#" +int main(void) { + // Test unsupported/unknown builtins return 0 + #if __has_builtin(__builtin_nonexistent_thing) + return 1; + #endif + + #if __has_builtin(__builtin_xyz_unknown) + return 2; + #endif + + return 0; +} +"#; + assert_eq!(compile_and_run("has_builtin_unsupported", code), 0); +} + +/// Test __has_attribute always returns 0 (not yet implemented) +#[test] +fn has_attribute_returns_zero() { + let code = r#" +int main(void) { + // __has_attribute currently returns 0 for all attributes + #if __has_attribute(unused) + return 1; + #endif + + #if __has_attribute(noreturn) + return 2; + #endif + + #if __has_attribute(packed) + return 3; + #endif + + return 0; +} +"#; + assert_eq!(compile_and_run("has_attribute_zero", code), 0); +} + +/// Test __has_feature always returns 0 (not yet implemented) +#[test] +fn has_feature_returns_zero() { + let code = r#" +int main(void) { + // __has_feature currently returns 0 for all features + #if __has_feature(c_alignas) + return 1; + #endif + + #if __has_feature(c_static_assert) + return 2; + #endif + + #if __has_feature(c_generic_selections) + return 3; + #endif + + return 0; +} +"#; + assert_eq!(compile_and_run("has_feature_zero", code), 0); +} + +/// Test __has_extension always returns 0 (not yet implemented) +#[test] +fn has_extension_returns_zero() { + let code = r#" +int main(void) { + // __has_extension currently returns 0 for all extensions + #if __has_extension(c_alignas) + return 1; + #endif + + #if __has_extension(attribute_deprecated_with_message) + return 2; + #endif + + return 0; +} +"#; + assert_eq!(compile_and_run("has_extension_zero", code), 0); +} + +/// Test __has_builtin in complex preprocessor expressions +#[test] +fn has_builtin_complex_expressions() { + let code = r#" +int main(void) { + // Test in logical AND + #if __has_builtin(__builtin_constant_p) && __has_builtin(__builtin_bswap32) + int ok1 = 1; + #else + return 1; + #endif + + // Test in logical OR + #if __has_builtin(__builtin_nonexistent) || __has_builtin(__builtin_alloca) + int ok2 = 1; + #else + return 2; + #endif + + // Test negation + #if !__has_builtin(__builtin_nonexistent) + int ok3 = 1; + #else + return 3; + #endif + + // Use the variables to avoid warnings + return ok1 + ok2 + ok3 - 3; +} +"#; + assert_eq!(compile_and_run("has_builtin_complex", code), 0); +} diff --git a/cc/tests/features/mod.rs b/cc/tests/features/mod.rs index bb6604e4..99faefbb 100644 --- a/cc/tests/features/mod.rs +++ b/cc/tests/features/mod.rs @@ -13,6 +13,7 @@ mod alloca; mod bswap; mod constant_p; mod debug; +mod has_feature; mod storage; mod types_compatible; mod varargs; diff --git a/cc/tests/pcc/mod.rs b/cc/tests/pcc/mod.rs index 1bd4b3b5..5e78b866 100644 --- a/cc/tests/pcc/mod.rs +++ b/cc/tests/pcc/mod.rs @@ -850,3 +850,90 @@ int main(void) { assert_eq!(exit_code, 0, "enum_switch: switch on enum should work"); cleanup_exe(&exe); } + +// ============================================================================ +// Goto Statement Tests +// ============================================================================ + +/// Test: Basic forward goto +#[test] +fn test_goto_forward() { + let c_file = create_c_file( + "goto_forward", + r#" +int main(void) { + int x = 1; + goto skip; + x = 99; // Should be skipped +skip: + return x; // Should return 1 +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "goto_forward: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 1, + "goto_forward: should skip assignment and return 1" + ); + cleanup_exe(&exe); +} + +/// Test: Backward goto (loop-like pattern) +#[test] +fn test_goto_backward() { + let c_file = create_c_file( + "goto_backward", + r#" +int main(void) { + int sum = 0; + int i = 1; +loop: + sum = sum + i; + i = i + 1; + if (i <= 5) + goto loop; + return sum; // Should return 1+2+3+4+5 = 15 +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "goto_backward: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!(exit_code, 15, "goto_backward: loop via goto should sum 1-5"); + cleanup_exe(&exe); +} + +/// Test: Multiple labels and gotos +#[test] +fn test_goto_multiple_labels() { + let c_file = create_c_file( + "goto_multi", + r#" +int main(void) { + int x = 0; + goto first; +second: + x = x + 10; + goto done; +first: + x = x + 1; + goto second; +done: + return x; // Should return 11 +} +"#, + ); + + let exe = compile(&c_file.path().to_path_buf()); + assert!(exe.is_some(), "goto_multi: compilation should succeed"); + let exit_code = run(exe.as_ref().unwrap()); + assert_eq!( + exit_code, 11, + "goto_multi: should follow first->second->done path" + ); + cleanup_exe(&exe); +} diff --git a/cc/token/preprocess.rs b/cc/token/preprocess.rs index d99f87dd..f4df7c15 100644 --- a/cc/token/preprocess.rs +++ b/cc/token/preprocess.rs @@ -379,6 +379,36 @@ impl<'a> Preprocessor<'a> { self.define_macro(Macro::predefined(name, Some(value))); } + // Type definition macros (for and ) + for (name, value) in arch::get_type_macros(self.target) { + self.define_macro(Macro::predefined(name, Some(value))); + } + + // Fixed-width integer limit macros (for ) + for (name, value) in arch::get_stdint_limit_macros(self.target) { + self.define_macro(Macro::predefined(name, Some(value))); + } + + // Integer constant suffix macros + for (name, value) in arch::get_suffix_macros(self.target) { + self.define_macro(Macro::predefined(name, Some(value))); + } + + // Format specifier macros (for ) + for (name, value) in arch::get_format_macros(self.target) { + self.define_macro(Macro::predefined(name, Some(value))); + } + + // Additional sizeof macros + for (name, value) in arch::get_additional_sizeof_macros(self.target) { + self.define_macro(Macro::predefined(name, Some(value))); + } + + // Miscellaneous macros + for (name, value) in arch::get_misc_macros(self.target) { + self.define_macro(Macro::predefined(name, Some(value))); + } + // OS macros for (name, value) in os::get_os_macros(self.target) { if let Some(v) = value { @@ -1842,34 +1872,12 @@ impl<'a> Preprocessor<'a> { match builtin { BuiltinMacro::HasAttribute => { - // Return true for common attributes we support - matches!( - name.as_str(), - "noreturn" - | "const" - | "pure" - | "unused" - | "used" - | "deprecated" - | "weak" - | "aligned" - | "packed" - | "format" - | "nonnull" - | "returns_nonnull" - | "warn_unused_result" - | "visibility" - | "always_inline" - | "noinline" - | "cold" - | "hot" - | "malloc" - | "constructor" - | "destructor" - ) + // We don't currently implement any __attribute__ semantics + let _ = name; + false } BuiltinMacro::HasBuiltin => { - // Return true for builtins we support + // Return true only for builtins actually implemented in the compiler matches!( name.as_str(), "__builtin_va_list" @@ -1880,39 +1888,20 @@ impl<'a> Preprocessor<'a> { | "__builtin_bswap16" | "__builtin_bswap32" | "__builtin_bswap64" - | "__builtin_expect" - | "__builtin_unreachable" - | "__builtin_trap" + | "__builtin_alloca" | "__builtin_constant_p" | "__builtin_types_compatible_p" - | "__builtin_offsetof" - | "__builtin_alloca" ) } BuiltinMacro::HasFeature => { - // C11/C99 features - matches!( - name.as_str(), - "c_alignas" - | "c_alignof" - | "c_static_assert" - | "c_generic_selections" - | "c_thread_local" - ) + // We don't currently implement any C11/C23 features + let _ = name; + false } BuiltinMacro::HasExtension => { - // Extensions we support (basically same as features for now) - matches!( - name.as_str(), - "c_alignas" - | "c_alignof" - | "c_static_assert" - | "c_generic_selections" - | "attribute_deprecated_with_message" - | "attribute_unavailable_with_message" - | "enumerator_attributes" - | "cxx_attributes" - ) + // We don't currently implement any extensions + let _ = name; + false } _ => false, } @@ -2194,6 +2183,24 @@ impl<'a, 'b> ExprEvaluator<'a, 'b> { return self.eval_defined(); } + // Handle __has_attribute(X) + if self.is_ident("__has_attribute") { + self.advance(); + return self.eval_has_attribute(); + } + + // Handle __has_builtin(X) + if self.is_ident("__has_builtin") { + self.advance(); + return self.eval_has_builtin_expr(); + } + + // Handle __has_feature(X) and __has_extension(X) + if self.is_ident("__has_feature") || self.is_ident("__has_extension") { + self.advance(); + return self.eval_has_feature(); + } + // Handle parenthesized expression if self.is_special(b'(' as u32) { self.advance(); @@ -2273,6 +2280,79 @@ impl<'a, 'b> ExprEvaluator<'a, 'b> { result } + /// Get the identifier argument from a __has_* expression: __has_*(ident) + fn get_has_arg(&mut self) -> Option { + if !self.is_special(b'(' as u32) { + return None; + } + self.advance(); // consume '(' + + let name = self.get_ident(); + if name.is_some() { + self.advance(); // consume identifier + } + + if self.is_special(b')' as u32) { + self.advance(); // consume ')' + } + + name + } + + /// Evaluate __has_attribute(X) + fn eval_has_attribute(&mut self) -> i64 { + let _name = match self.get_has_arg() { + Some(n) => n, + None => return 0, + }; + + // We don't currently implement any __attribute__ semantics + // Return 0 for all attribute queries + 0 + } + + /// Evaluate __has_builtin(X) + fn eval_has_builtin_expr(&mut self) -> i64 { + let name = match self.get_has_arg() { + Some(n) => n, + None => return 0, + }; + + // Return 1 for builtins actually implemented in the compiler + let supported = matches!( + name.as_str(), + "__builtin_va_list" + | "__builtin_va_start" + | "__builtin_va_end" + | "__builtin_va_arg" + | "__builtin_va_copy" + | "__builtin_bswap16" + | "__builtin_bswap32" + | "__builtin_bswap64" + | "__builtin_alloca" + | "__builtin_constant_p" + | "__builtin_types_compatible_p" + ); + + if supported { + 1 + } else { + 0 + } + } + + /// Evaluate __has_feature(X) and __has_extension(X) + fn eval_has_feature(&mut self) -> i64 { + let _name = match self.get_has_arg() { + Some(n) => n, + None => return 0, + }; + + // We don't currently implement any C11/C23 features + // Return 0 for all feature queries + 0 + } + fn parse_number(&self, s: &str) -> i64 { let s = s.trim_end_matches(['u', 'U', 'l', 'L']);