From 857b308912c5bcb44c4dca3188ccfe34cb3bdfb9 Mon Sep 17 00:00:00 2001 From: Haydn Trigg Date: Fri, 16 Jan 2026 21:31:49 +1030 Subject: [PATCH 1/3] Better COFF and Big Endian Strings --- objdiff-core/src/arch/mod.rs | 15 +++++++++++---- objdiff-core/src/arch/ppc/mod.rs | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 592ed20..44344f6 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -175,19 +175,26 @@ impl DataType { } DataType::String => { if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') { - let str_bytes = &bytes[..nul_idx]; + let ascii_str_bytes = &bytes[..nul_idx]; // Special case to display (ASCII) as the label for ASCII-only strings. - let (cow, _, had_errors) = encoding_rs::UTF_8.decode(str_bytes); + let (cow, _, had_errors) = encoding_rs::UTF_8.decode(ascii_str_bytes); if !had_errors && cow.is_ascii() { let string = format!("{cow}"); let copy_string = escape_special_ascii_characters(string.clone()); strs.push((string, Some("ASCII".into()), Some(copy_string))); } + for (encoding, encoding_name) in SUPPORTED_ENCODINGS { - let (cow, _, had_errors) = encoding.decode(str_bytes); + let (cow, _, had_errors) = encoding.decode(&bytes); // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) { - let string = format!("{cow}"); + let mut string = format!("{cow}"); + + // Inline loop to strip all trailing "\0" + while let Some(stripped) = string.strip_suffix('\0') { + string = stripped.to_string(); + } + let copy_string = escape_special_ascii_characters(string.clone()); strs.push((string, Some(encoding_name.into()), Some(copy_string))); } diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index c61ea36..b99c972 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -355,7 +355,7 @@ impl Arch for ArchPpc { } fn guess_data_type(&self, resolved: ResolvedInstructionRef, bytes: &[u8]) -> Option { - if resolved.relocation.is_some_and(|r| r.symbol.name.starts_with("@stringBase")) { + if resolved.relocation.is_some_and(|r| r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG")) { // Pooled string. return Some(DataType::String); } From 5588f27ac82600f29ef11c69ca2ed2bcd1979e86 Mon Sep 17 00:00:00 2001 From: Haydn Trigg Date: Fri, 16 Jan 2026 21:42:25 +1030 Subject: [PATCH 2/3] Cargo Check and Format Fixes --- objdiff-core/src/arch/mod.rs | 2 +- objdiff-core/src/arch/ppc/mod.rs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 44344f6..184512a 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -185,7 +185,7 @@ impl DataType { } for (encoding, encoding_name) in SUPPORTED_ENCODINGS { - let (cow, _, had_errors) = encoding.decode(&bytes); + let (cow, _, had_errors) = encoding.decode(bytes); // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) { let mut string = format!("{cow}"); diff --git a/objdiff-core/src/arch/ppc/mod.rs b/objdiff-core/src/arch/ppc/mod.rs index b99c972..ccce28a 100644 --- a/objdiff-core/src/arch/ppc/mod.rs +++ b/objdiff-core/src/arch/ppc/mod.rs @@ -355,7 +355,9 @@ impl Arch for ArchPpc { } fn guess_data_type(&self, resolved: ResolvedInstructionRef, bytes: &[u8]) -> Option { - if resolved.relocation.is_some_and(|r| r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG")) { + if resolved.relocation.is_some_and(|r| { + r.symbol.name.starts_with("@stringBase") || r.symbol.name.starts_with("$SG") + }) { // Pooled string. return Some(DataType::String); } From 066ca43d9cf14ed7520f27deeda3b4591467a9d3 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Sat, 24 Jan 2026 20:18:16 -0700 Subject: [PATCH 3/3] Rework DataType::String in display_literals --- objdiff-core/src/arch/mod.rs | 52 ++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 184512a..30179fd 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -174,30 +174,36 @@ impl DataType { strs.push((format!("{bytes:#?}"), None, None)); } DataType::String => { - if let Some(nul_idx) = bytes.iter().position(|&c| c == b'\0') { - let ascii_str_bytes = &bytes[..nul_idx]; - // Special case to display (ASCII) as the label for ASCII-only strings. - let (cow, _, had_errors) = encoding_rs::UTF_8.decode(ascii_str_bytes); - if !had_errors && cow.is_ascii() { - let string = format!("{cow}"); - let copy_string = escape_special_ascii_characters(string.clone()); - strs.push((string, Some("ASCII".into()), Some(copy_string))); + // Special case to display (ASCII) as the label for ASCII-only strings. + let mut is_ascii = false; + if bytes.is_ascii() + && let Ok(str) = str::from_utf8(bytes) + { + let trimmed = str.trim_end_matches('\0'); + if !trimmed.is_empty() { + let copy_string = escape_special_ascii_characters(trimmed); + strs.push((trimmed.to_string(), Some("ASCII".into()), Some(copy_string))); + is_ascii = true; } + } - for (encoding, encoding_name) in SUPPORTED_ENCODINGS { - let (cow, _, had_errors) = encoding.decode(bytes); - // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. - if !had_errors && (!encoding.is_ascii_compatible() || !cow.is_ascii()) { - let mut string = format!("{cow}"); - - // Inline loop to strip all trailing "\0" - while let Some(stripped) = string.strip_suffix('\0') { - string = stripped.to_string(); - } - - let copy_string = escape_special_ascii_characters(string.clone()); - strs.push((string, Some(encoding_name.into()), Some(copy_string))); - } + for (encoding, encoding_name) in SUPPORTED_ENCODINGS { + // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible. + if is_ascii && encoding.is_ascii_compatible() { + continue; + } + let (cow, _, had_errors) = encoding.decode(bytes); + if had_errors { + continue; + } + let trimmed = cow.trim_end_matches('\0'); + if !trimmed.is_empty() { + let copy_string = escape_special_ascii_characters(trimmed); + strs.push(( + trimmed.to_string(), + Some(encoding_name.into()), + Some(copy_string), + )); } } } @@ -515,7 +521,7 @@ pub struct RelocationOverride { /// Escape ASCII characters such as \n or \t, but not Unicode characters such as \u{3000}. /// Suitable for copying to clipboard. -fn escape_special_ascii_characters(value: String) -> String { +fn escape_special_ascii_characters(value: &str) -> String { let mut escaped = String::new(); escaped.push('"'); for c in value.chars() {