Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 6 additions & 24 deletions objdiff-core/src/diff/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,13 @@ pub fn diff_bss_symbol(
))
}

pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool {
if let Some((left_prefix, left_suffix)) = left_name.split_once("@class$")
&& let Some((right_prefix, right_suffix)) = right_name.split_once("@class$")
pub fn symbol_name_matches(left: &Symbol, right: &Symbol) -> bool {
if let Some(left_name) = &left.normalized_name
&& let Some(right_name) = &right.normalized_name
{
// Match Metrowerks anonymous class symbol names, ignoring the unique ID.
// e.g. __dt__Q29dCamera_c23@class$3665d_camera_cppFv
if left_prefix == right_prefix
&& let Some(left_idx) = left_suffix.chars().position(|c| !c.is_numeric())
&& let Some(right_idx) = right_suffix.chars().position(|c| !c.is_numeric())
{
// e.g. d_camera_cppFv (after the unique ID)
left_suffix[left_idx..] == right_suffix[right_idx..]
} else {
false
}
} else if let Some((prefix, suffix)) = left_name.split_once(['$', '.'])
&& suffix.chars().all(char::is_numeric)
{
// Match Metrowerks symbol$1234 against symbol$2345
// and GCC symbol.1234 against symbol.2345
right_name
.split_once(['$', '.'])
.is_some_and(|(p, s)| p == prefix && s.chars().all(char::is_numeric))
} else {
left_name == right_name
} else {
left.name == right.name
}
}

Expand All @@ -73,7 +55,7 @@ fn reloc_eq(
return false;
}

let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name)
let symbol_name_addend_matches = symbol_name_matches(left.symbol, right.symbol)
&& left.relocation.addend == right.relocation.addend;
match (left.symbol.section, right.symbol.section) {
(Some(sl), Some(sr)) => {
Expand Down
22 changes: 9 additions & 13 deletions objdiff-core/src/diff/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -812,19 +812,15 @@ pub fn display_sections(
}
let section_diff = &diff.sections[section_idx];
let reverse_fn_order = section.kind == SectionKind::Code && reverse_fn_order;
symbols.sort_by(|a, b| {
let a = &obj.symbols[a.symbol];
let b = &obj.symbols[b.symbol];
section_symbol_sort(a, b)
.then_with(|| {
if reverse_fn_order {
b.address.cmp(&a.address)
} else {
a.address.cmp(&b.address)
}
})
.then_with(|| a.size.cmp(&b.size))
});
if reverse_fn_order {
symbols.sort_by(|a, b| {
let a = &obj.symbols[a.symbol];
let b = &obj.symbols[b.symbol];
section_symbol_sort(a, b)
.then_with(|| b.address.cmp(&a.address))
.then_with(|| a.size.cmp(&b.size))
});
}
sections.push(SectionDisplay {
id: section.id.clone(),
name: if section.flags.contains(SectionFlag::Combined) {
Expand Down
31 changes: 7 additions & 24 deletions objdiff-core/src/diff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use alloc::{
use core::{num::NonZeroU32, ops::Range};

use anyhow::Result;
use itertools::Itertools;

use crate::{
diff::{
Expand Down Expand Up @@ -687,18 +686,6 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
}
}

/// Check if a symbol is a compiler-generated like @1234 or _$E1234.
fn is_symbol_compiler_generated(symbol: &Symbol) -> bool {
if symbol.name.starts_with('@') && symbol.name[1..].chars().all(char::is_numeric) {
// Exclude @stringBase0, @GUARD@, etc.
return true;
}
if symbol.name.starts_with("_$E") && symbol.name[3..].chars().all(char::is_numeric) {
return true;
}
false
}

fn find_symbol(
obj: Option<&Object>,
in_obj: &Object,
Expand All @@ -712,7 +699,7 @@ fn find_symbol(

// Match compiler-generated symbols against each other (e.g. @251 -> @60)
// If they are in the same section and have the same value
if is_symbol_compiler_generated(in_symbol)
if in_symbol.flags.contains(SymbolFlag::CompilerGenerated)
&& matches!(section_kind, SectionKind::Code | SectionKind::Data | SectionKind::Bss)
{
let mut closest_match_symbol_idx = None;
Expand All @@ -724,7 +711,7 @@ fn find_symbol(
if obj.sections[section_index].name != section_name {
continue;
}
if !is_symbol_compiler_generated(symbol) {
if !symbol.flags.contains(SymbolFlag::CompilerGenerated) {
continue;
}
match section_kind {
Expand Down Expand Up @@ -761,15 +748,11 @@ fn find_symbol(
}

// Try to find a symbol with a matching name
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used)
.filter(|&(_, symbol)| {
symbol_name_matches(&in_symbol.name, &symbol.name)
&& symbol_section_kind(obj, symbol) == section_kind
&& symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name)
})
.sorted_unstable_by_key(|&(_, symbol)| (symbol.section, symbol.address))
.next()
{
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
symbol_name_matches(in_symbol, symbol)
&& symbol_section_kind(obj, symbol) == section_kind
&& symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name)
}) {
return Some(symbol_idx);
}

Expand Down
6 changes: 5 additions & 1 deletion objdiff-core/src/obj/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub enum SectionKind {

flags! {
#[derive(Hash)]
pub enum SymbolFlag: u8 {
pub enum SymbolFlag: u16 {
Global,
Local,
Weak,
Expand All @@ -50,6 +50,8 @@ flags! {
SizeInferred,
/// Symbol should be ignored by any diffing
Ignored,
/// Symbol name is compiler-generated; compare by value instead of name
CompilerGenerated,
}
}

Expand Down Expand Up @@ -264,6 +266,7 @@ pub trait FlowAnalysisResult: core::fmt::Debug + Send {
pub struct Symbol {
pub name: String,
pub demangled_name: Option<String>,
pub normalized_name: Option<String>,
pub address: u64,
pub size: u64,
pub kind: SymbolKind,
Expand Down Expand Up @@ -403,6 +406,7 @@ pub struct ResolvedInstructionRef<'obj> {
static DUMMY_SYMBOL: Symbol = Symbol {
name: String::new(),
demangled_name: None,
normalized_name: None,
address: 0,
size: 0,
kind: SymbolKind::Unknown,
Expand Down
136 changes: 92 additions & 44 deletions objdiff-core/src/obj/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use alloc::{
collections::BTreeMap,
format,
string::{String, ToString},
vec,
vec::Vec,
};
use core::{cmp::Ordering, num::NonZeroU64};
Expand Down Expand Up @@ -35,6 +36,46 @@ fn map_section_kind(section: &object::Section) -> SectionKind {
}
}

/// Check if a symbol's name is partially compiler-generated, and if so normalize it for pairing.
/// e.g. symbol$1234 and symbol$2345 will both be replaced with symbol$0000 internally.
fn get_normalized_symbol_name(name: &str) -> Option<String> {
const DUMMY_UNIQUE_ID: &str = "0000";
if let Some((prefix, suffix)) = name.split_once("@class$")
&& let Some(idx) = suffix.chars().position(|c| !c.is_numeric())
&& idx > 0
{
// Match Metrowerks anonymous class symbol names, ignoring the unique ID.
// e.g. __dt__Q29dCamera_c23@class$3665d_camera_cppFv
// and: __dt__Q29dCamera_c23@class$1727d_camera_cppFv
let suffix = &suffix[idx..];
Some(format!("{prefix}@class${DUMMY_UNIQUE_ID}{suffix}"))
} else if let Some((prefix, suffix)) = name.split_once('$')
&& suffix.chars().all(char::is_numeric)
{
// Match Metrowerks symbol$1234 against symbol$2345
Some(format!("{prefix}${DUMMY_UNIQUE_ID}"))
} else if let Some((prefix, suffix)) = name.split_once('.')
&& suffix.chars().all(char::is_numeric)
{
// Match GCC symbol.1234 against symbol.2345
Some(format!("{prefix}.{DUMMY_UNIQUE_ID}"))
} else {
None
}
}

/// Check if a symbol's name is entirely compiler-generated, such as @1234 or _$E1234.
/// This enables pairing these symbols up by their value instead of their name.
fn is_symbol_name_compiler_generated(name: &str) -> bool {
if name.starts_with('@') && name[1..].chars().all(char::is_numeric) {
// Exclude @stringBase0, @GUARD@, etc.
return true;
} else if name.starts_with("_$E") && name[3..].chars().all(char::is_numeric) {
return true;
}
false
}

fn map_symbol(
arch: &dyn Arch,
file: &object::File,
Expand Down Expand Up @@ -97,10 +138,15 @@ fn map_symbol(
.and_then(|m| m.virtual_addresses.as_ref())
.and_then(|v| v.get(symbol.index().0).cloned());
let section = symbol.section_index().and_then(|i| section_indices.get(i.0).copied());
let normalized_name = get_normalized_symbol_name(&name);
if is_symbol_name_compiler_generated(&name) {
flags |= SymbolFlag::CompilerGenerated;
}

Ok(Symbol {
name,
demangled_name,
normalized_name,
address,
size,
kind,
Expand All @@ -119,13 +165,38 @@ fn map_symbols(
split_meta: Option<&SplitMeta>,
config: &DiffObjConfig,
) -> Result<(Vec<Symbol>, Vec<usize>)> {
let symbol_count = obj_file.symbols().count();
let mut symbols = Vec::<Symbol>::with_capacity(symbol_count + obj_file.sections().count());
let mut symbol_indices = Vec::<usize>::with_capacity(symbol_count + 1);
for obj_symbol in obj_file.symbols() {
if symbol_indices.len() <= obj_symbol.index().0 {
symbol_indices.resize(obj_symbol.index().0 + 1, usize::MAX);
}
// symbols() is not guaranteed to be sorted by address.
// We sort it here to fix pairing bugs with diff algorithms that assume the symbols are ordered.
// Sorting everything here once is less expensive than sorting subsets later in expensive loops.
let mut max_index = 0;
let mut obj_symbols = obj_file
.symbols()
.filter(|s| s.kind() != object::SymbolKind::File)
.inspect(|sym| max_index = max_index.max(sym.index().0))
.collect::<Vec<_>>();
obj_symbols.sort_by(|a, b| {
// Sort symbols by section index, placing absolute symbols last
a.section_index()
.map_or(usize::MAX, |s| s.0)
.cmp(&b.section_index().map_or(usize::MAX, |s| s.0))
.then_with(|| {
// Sort section symbols first in a section
if a.kind() == object::SymbolKind::Section {
Ordering::Less
} else if b.kind() == object::SymbolKind::Section {
Ordering::Greater
} else {
Ordering::Equal
}
})
// Sort by address within section
.then_with(|| a.address().cmp(&b.address()))
// If there are multiple symbols with the same address, smaller symbol first
.then_with(|| a.size().cmp(&b.size()))
});
let mut symbols = Vec::<Symbol>::with_capacity(obj_symbols.len() + obj_file.sections().count());
let mut symbol_indices = vec![usize::MAX; max_index + 1];
for obj_symbol in obj_symbols {
let symbol = map_symbol(arch, obj_file, &obj_symbol, section_indices, split_meta, config)?;
symbol_indices[obj_symbol.index().0] = symbols.len();
symbols.push(symbol);
Expand Down Expand Up @@ -172,6 +243,7 @@ fn add_section_symbols(sections: &[Section], symbols: &mut Vec<Symbol>) {
symbols.push(Symbol {
name,
demangled_name: None,
normalized_name: None,
address: 0,
size,
kind: SymbolKind::Section,
Expand All @@ -193,40 +265,18 @@ fn is_local_label(symbol: &Symbol) -> bool {
}

fn infer_symbol_sizes(arch: &dyn Arch, symbols: &mut [Symbol], sections: &[Section]) -> Result<()> {
// Create a sorted list of symbol indices by section
let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len());
for (i, symbol) in symbols.iter().enumerate() {
if symbol.section.is_some() {
symbols_with_section.push(i);
}
}
symbols_with_section.sort_by(|a, b| {
let a = &symbols[*a];
let b = &symbols[*b];
a.section
.unwrap_or(usize::MAX)
.cmp(&b.section.unwrap_or(usize::MAX))
.then_with(|| {
// Sort section symbols first
if a.kind == SymbolKind::Section {
Ordering::Less
} else if b.kind == SymbolKind::Section {
Ordering::Greater
} else {
Ordering::Equal
}
})
.then_with(|| a.address.cmp(&b.address))
.then_with(|| a.size.cmp(&b.size))
});
// Above, we've sorted the symbols by section and then by address.

// Set symbol sizes based on the next symbol's address
let mut iter_idx = 0;
let mut last_end = (0, 0);
while iter_idx < symbols_with_section.len() {
let symbol_idx = symbols_with_section[iter_idx];
while iter_idx < symbols.len() {
let symbol_idx = iter_idx;
let symbol = &symbols[symbol_idx];
let section_idx = symbol.section.unwrap();
let Some(section_idx) = symbol.section else {
// Start of absolute symbols
break;
};
iter_idx += 1;
if symbol.size != 0 {
if symbol.kind != SymbolKind::Section {
Expand All @@ -239,10 +289,9 @@ fn infer_symbol_sizes(arch: &dyn Arch, symbols: &mut [Symbol], sections: &[Secti
continue;
}
let next_symbol = loop {
if iter_idx >= symbols_with_section.len() {
let Some(next_symbol) = symbols.get(iter_idx) else {
break None;
}
let next_symbol = &symbols[symbols_with_section[iter_idx]];
};
if next_symbol.section != Some(section_idx) {
break None;
}
Expand Down Expand Up @@ -298,9 +347,11 @@ fn map_sections(
split_meta: Option<&SplitMeta>,
) -> Result<(Vec<Section>, Vec<usize>)> {
let mut section_names = BTreeMap::<String, usize>::new();
let section_count = obj_file.sections().count();
let mut max_index = 0;
let section_count =
obj_file.sections().inspect(|s| max_index = max_index.max(s.index().0)).count();
let mut result = Vec::<Section>::with_capacity(section_count);
let mut section_indices = Vec::<usize>::with_capacity(section_count + 1);
let mut section_indices = vec![usize::MAX; max_index + 1];
for section in obj_file.sections() {
let name = section.name().context("Failed to process section name")?;
let kind = map_section_kind(&section);
Expand All @@ -325,9 +376,6 @@ fn map_sections(
let id = format!("{name}-{unique_id}");
*unique_id += 1;

if section_indices.len() <= section.index().0 {
section_indices.resize(section.index().0 + 1, usize::MAX);
}
section_indices[section.index().0] = result.len();
result.push(Section {
id,
Expand Down
Loading
Loading