From 49c7430d84737ed133c502e8ec7b961fe53bae77 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sat, 9 Aug 2025 13:57:33 -0500 Subject: [PATCH 01/12] bumping fastglob --- Cargo.lock | 13 +++++++++++-- Cargo.toml | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d9cf231..2638c07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,6 +77,12 @@ version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert_cmd" version = "2.0.16" @@ -314,9 +320,12 @@ dependencies = [ [[package]] name = "fast-glob" -version = "0.4.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb10ed0f8a3dca52477be37ac0fb8f9d1fd4cd8d311b4484bdd45c1c56e0c9ec" +checksum = "3d26eec0ae9682c457cb0f85de67ad417b716ae852736a5d94c2ad6e92a997c9" +dependencies = [ + "arrayvec", +] [[package]] name = "fastrand" diff --git a/Cargo.toml b/Cargo.toml index df37c08..098966a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ clap = { version = "4.5.20", features = ["derive"] } clap_derive = "4.5.18" error-stack = "0.5.0" enum_dispatch = "0.3.13" -fast-glob = "0.4.0" +fast-glob = "1.0.0" glob = "0.3.2" ignore = "0.4.23" itertools = "0.14.0" From b76e8bd62f44e253e3af971f8a4539e118c7bc0a Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sat, 9 Aug 2025 15:07:05 -0500 Subject: [PATCH 02/12] first stab at optimizing for-file --- ai-prompts/for-file-optimization.md | 21 ++ src/runner.rs | 307 +++++++++++++++++++++++++++- 2 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 ai-prompts/for-file-optimization.md diff --git a/ai-prompts/for-file-optimization.md b/ai-prompts/for-file-optimization.md new file mode 100644 index 0000000..a837acb --- /dev/null +++ b/ai-prompts/for-file-optimization.md @@ -0,0 +1,21 @@ +## Re-architect `codeowners-rs` + +The CLI behaviors live in `src/cli.rs`. + +### What this tool does +- **Generate CODEOWNERS**: Builds `.github/CODEOWNERS` from multiple ownership sources. See mappers in `src/ownership/mapper/`. +- **Answer per-file ownership**: The `for-file` command returns the owner of a given file even if the checked-in `CODEOWNERS` is not up to date. + +### Current implementations +- **Fast but potentially inaccurate**: `Runner::for_file_from_codeowners` (in `src/runner.rs`) parses the existing `CODEOWNERS` file to find a file’s owner. It’s very fast, but can be wrong if `CODEOWNERS` is stale. +- **Accurate but slow**: `Runner::for_file` is correct but can take up to ~4 seconds on large repositories because it effectively determines ownership for many files and then returns the single result needed. + +Ownership resolution is complex because definitions often live in other files (packages, team configs, globs, etc.). + +## Assignment +Implement a faster `for_file` that remains accurate. + +### Requirements +- **Performance**: Under 1 second on large codebases (e.g., `$HOME/workspace/large`). +- **Correctness**: All existing tests must continue to pass. +- **Compatibility**: No changes to the external behavior of the `for-file` CLI command. \ No newline at end of file diff --git a/src/runner.rs b/src/runner.rs index 2e5fd11..2d377b6 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -39,7 +39,7 @@ pub fn for_file(run_config: &RunConfig, file_path: &str, fast: bool) -> RunResul if fast { for_file_from_codeowners(run_config, file_path) } else { - run_with_runner(run_config, |runner| runner.for_file(file_path)) + for_file_optimized(run_config, file_path) } } @@ -83,6 +83,311 @@ pub fn team_for_file_from_codeowners(run_config: &RunConfig, file_path: &str) -> .map_err(|e| Error::Io(e.to_string()))?) } +use std::collections::{HashMap, HashSet}; +use std::fs; +use fast_glob::glob_match; +use glob::glob; +use lazy_static::lazy_static; +use regex::Regex; + +fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { + let config = match config_from_path(&run_config.config_path) { + Ok(c) => c, + Err(err) => { + return RunResult { + io_errors: vec![err.to_string()], + ..Default::default() + } + } + }; + + let absolute_file_path = std::path::Path::new(file_path); + let relative_file_path = absolute_file_path + .strip_prefix(&run_config.project_root) + .unwrap_or(absolute_file_path) + .to_path_buf(); + + let teams = match load_teams(&run_config.project_root, &config.team_file_glob) { + Ok(t) => t, + Err(err) => { + return RunResult { + io_errors: vec![err.to_string()], + ..Default::default() + } + } + }; + let teams_by_name = build_teams_by_name_map(&teams); + + let mut sources_by_team: HashMap> = HashMap::new(); + + if let Some(team_name) = read_top_of_file_team(&absolute_file_path.to_path_buf()) { + if let Some(team) = teams_by_name.get(&team_name) { + sources_by_team.entry(team.name.clone()).or_default().push(crate::ownership::mapper::Source::TeamFile); + } + } + + if let Some((owner_team_name, dir_source)) = most_specific_directory_owner( + &run_config.project_root, + &relative_file_path, + &teams_by_name, + ) { + sources_by_team.entry(owner_team_name).or_default().push(dir_source); + } + + if let Some((owner_team_name, package_source)) = nearest_package_owner( + &run_config.project_root, + &relative_file_path, + &config, + &teams_by_name, + ) { + sources_by_team.entry(owner_team_name).or_default().push(package_source); + } + + if let Some((owner_team_name, gem_source)) = vendored_gem_owner(&relative_file_path, &config, &teams) { + sources_by_team.entry(owner_team_name).or_default().push(gem_source); + } + + if let Some(rel_str) = relative_file_path.to_str() { + for team in &teams { + let subtracts: HashSet<&str> = team.subtracted_globs.iter().map(|s| s.as_str()).collect(); + for owned_glob in &team.owned_globs { + if glob_match(owned_glob, rel_str) && !subtracts.iter().any(|sub| glob_match(sub, rel_str)) { + sources_by_team + .entry(team.name.clone()) + .or_default() + .push(crate::ownership::mapper::Source::TeamGlob(owned_glob.clone())); + } + } + } + } + + for team in &teams { + let team_rel = team + .path + .strip_prefix(&run_config.project_root) + .unwrap_or(&team.path) + .to_path_buf(); + if team_rel == relative_file_path { + sources_by_team.entry(team.name.clone()).or_default().push(crate::ownership::mapper::Source::TeamYml); + } + } + + let mut file_owners: Vec = Vec::new(); + for (team_name, sources) in sources_by_team.into_iter() { + if let Some(team) = teams_by_name.get(&team_name) { + let relative_team_yml_path = team + .path + .strip_prefix(&run_config.project_root) + .unwrap_or(&team.path) + .to_string_lossy() + .to_string(); + file_owners.push(FileOwner { + team: team.clone(), + team_config_file_path: relative_team_yml_path, + sources, + }); + } + } + + let info_messages: Vec = match file_owners.len() { + 0 => vec![format!("{}", FileOwner::default())], + 1 => vec![format!("{}", file_owners[0])], + _ => { + let mut error_messages = vec!["Error: file is owned by multiple teams!".to_string()]; + for file_owner in file_owners { + error_messages.push(format!("\n{}", file_owner)); + } + return RunResult { + validation_errors: error_messages, + ..Default::default() + }; + } + }; + RunResult { info_messages, ..Default::default() } +} + +fn build_teams_by_name_map(teams: &[Team]) -> HashMap { + let mut map = HashMap::new(); + for team in teams { + map.insert(team.name.clone(), team.clone()); + map.insert(team.github_team.clone(), team.clone()); + } + map +} + +fn load_teams(project_root: &std::path::Path, team_file_globs: &[String]) -> std::result::Result, String> { + let mut teams: Vec = Vec::new(); + for glob_str in team_file_globs { + let absolute_glob = format!("{}/{}", project_root.display(), glob_str); + let paths = glob(&absolute_glob).map_err(|e| e.to_string())?; + for path in paths.flatten() { + match Team::from_team_file_path(path.clone()) { + Ok(team) => teams.push(team), + Err(e) => { + eprintln!("Error parsing team file: {}", e); + continue; + } + } + } + } + Ok(teams) +} + +lazy_static! { + static ref TOP_OF_FILE_TEAM_REGEX: Regex = Regex::new(r#"^(?:#|//) @team (.*)$"#).expect("error compiling regular expression"); +} + +fn read_top_of_file_team(path: &std::path::PathBuf) -> Option { + let content = fs::read_to_string(path).ok()?; + let first_line = content.lines().next()?; + TOP_OF_FILE_TEAM_REGEX + .captures(first_line) + .and_then(|cap| cap.get(1)) + .map(|m| m.as_str().to_string()) +} + +fn most_specific_directory_owner( + project_root: &std::path::Path, + relative_file_path: &std::path::Path, + teams_by_name: &HashMap, +) -> Option<(String, crate::ownership::mapper::Source)> { + let mut current = project_root.join(relative_file_path); + let mut best: Option<(String, crate::ownership::mapper::Source)> = None; + loop { + let parent_opt = current.parent().map(|p| p.to_path_buf()); + let Some(parent) = parent_opt else { break }; + let codeowner_path = parent.join(".codeowner"); + if let Ok(owner_str) = fs::read_to_string(&codeowner_path) { + let owner = owner_str.trim(); + if let Some(team) = teams_by_name.get(owner) { + let relative_dir = parent + .strip_prefix(project_root) + .unwrap_or(parent.as_path()) + .to_string_lossy() + .to_string(); + let candidate = ( + team.name.clone(), + crate::ownership::mapper::Source::Directory(relative_dir), + ); + match &best { + None => best = Some(candidate), + Some((_, existing_source)) => { + let existing_len = source_directory_depth(existing_source); + let candidate_len = source_directory_depth(&candidate.1); + if candidate_len > existing_len { + best = Some(candidate); + } + } + } + } + } + if parent == project_root { break; } + current = parent.clone(); + } + best +} + +fn nearest_package_owner( + project_root: &std::path::Path, + relative_file_path: &std::path::Path, + config: &Config, + teams_by_name: &HashMap, +) -> Option<(String, crate::ownership::mapper::Source)> { + let mut current = project_root.join(relative_file_path); + loop { + let parent_opt = current.parent().map(|p| p.to_path_buf()); + let Some(parent) = parent_opt else { break }; + let parent_rel = parent.strip_prefix(project_root).unwrap_or(parent.as_path()); + if let Some(rel_str) = parent_rel.to_str() { + if glob_list_matches(rel_str, &config.ruby_package_paths) { + let pkg_yml = parent.join("package.yml"); + if pkg_yml.exists() { + if let Ok(owner) = read_ruby_package_owner(&pkg_yml) { + if let Some(team) = teams_by_name.get(&owner) { + let package_path = parent_rel.join("package.yml"); + let package_glob = format!("{}/**/**", rel_str); + return Some(( + team.name.clone(), + crate::ownership::mapper::Source::Package( + package_path.to_string_lossy().to_string(), + package_glob, + ), + )); + } + } + } + } + if glob_list_matches(rel_str, &config.javascript_package_paths) { + let pkg_json = parent.join("package.json"); + if pkg_json.exists() { + if let Ok(owner) = read_js_package_owner(&pkg_json) { + if let Some(team) = teams_by_name.get(&owner) { + let package_path = parent_rel.join("package.json"); + let package_glob = format!("{}/**/**", rel_str); + return Some(( + team.name.clone(), + crate::ownership::mapper::Source::Package( + package_path.to_string_lossy().to_string(), + package_glob, + ), + )); + } + } + } + } + } + if parent == project_root { break; } + current = parent; + } + None +} + +fn source_directory_depth(source: &crate::ownership::mapper::Source) -> usize { + match source { + crate::ownership::mapper::Source::Directory(path) => path.matches('/').count(), + _ => 0, + } +} + +fn glob_list_matches(path: &str, globs: &[String]) -> bool { + globs.iter().any(|g| glob_match(g, path)) +} + +fn read_ruby_package_owner(path: &std::path::Path) -> std::result::Result { + let file = std::fs::File::open(path).map_err(|e| e.to_string())?; + let deserializer: crate::project::deserializers::RubyPackage = serde_yaml::from_reader(file).map_err(|e| e.to_string())?; + deserializer.owner.ok_or_else(|| "Missing owner".to_string()) +} + +fn read_js_package_owner(path: &std::path::Path) -> std::result::Result { + let file = std::fs::File::open(path).map_err(|e| e.to_string())?; + let deserializer: crate::project::deserializers::JavascriptPackage = serde_json::from_reader(file).map_err(|e| e.to_string())?; + deserializer + .metadata + .and_then(|m| m.owner) + .ok_or_else(|| "Missing owner".to_string()) +} + +fn vendored_gem_owner( + relative_file_path: &std::path::Path, + config: &Config, + teams: &[Team], +) -> Option<(String, crate::ownership::mapper::Source)> { + use std::path::Component; + let mut comps = relative_file_path.components(); + let first = comps.next()?; + let second = comps.next()?; + let first_str = match first { Component::Normal(s) => s.to_string_lossy(), _ => return None }; + if first_str != config.vendored_gems_path { return None; } + let gem_name = match second { Component::Normal(s) => s.to_string_lossy().to_string(), _ => return None }; + for team in teams { + if team.owned_gems.iter().any(|g| g == &gem_name) { + return Some((team.name.clone(), crate::ownership::mapper::Source::TeamGem)); + } + } + None +} + pub fn for_team(run_config: &RunConfig, team_name: &str) -> RunResult { run_with_runner(run_config, |runner| runner.for_team(team_name)) } From 0c797630a832aa6c5689ff48b8b1d4ca6479ca35 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sat, 9 Aug 2025 16:58:24 -0500 Subject: [PATCH 03/12] feat(ownership): fast per-file owner resolution + top-of-file annotation support --- src/ownership.rs | 1 + src/ownership/fast.rs | 312 ++++++++++++++++++++++++++++++++++++++++++ src/runner.rs | 277 +------------------------------------ 3 files changed, 316 insertions(+), 274 deletions(-) create mode 100644 src/ownership/fast.rs diff --git a/src/ownership.rs b/src/ownership.rs index 67e3e71..012ebbe 100644 --- a/src/ownership.rs +++ b/src/ownership.rs @@ -14,6 +14,7 @@ mod file_owner_finder; pub(crate) mod mapper; pub(crate) mod parser; mod validator; +pub(crate) mod fast; use crate::{ ownership::mapper::DirectoryMapper, diff --git a/src/ownership/fast.rs b/src/ownership/fast.rs new file mode 100644 index 0000000..47e06f7 --- /dev/null +++ b/src/ownership/fast.rs @@ -0,0 +1,312 @@ +use std::{collections::{HashMap, HashSet}, fs, path::Path}; + +use fast_glob::glob_match; +use glob::glob; +use lazy_static::lazy_static; +use regex::Regex; + +use crate::{config::Config, project::Team}; + +use super::{FileOwner, mapper::Source}; + +pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) -> Vec { + let absolute_file_path = if file_path.is_absolute() { + file_path.to_path_buf() + } else { + project_root.join(file_path) + }; + let relative_file_path = absolute_file_path + .strip_prefix(project_root) + .unwrap_or(&absolute_file_path) + .to_path_buf(); + + let teams = match load_teams(project_root, &config.team_file_glob) { + Ok(t) => t, + Err(_) => return vec![], + }; + let teams_by_name = build_teams_by_name_map(&teams); + + let mut sources_by_team: HashMap> = HashMap::new(); + + if let Some(team_name) = read_top_of_file_team(&absolute_file_path) { + if let Some(team) = teams_by_name.get(&team_name) { + sources_by_team.entry(team.name.clone()).or_default().push(Source::TeamFile); + } + } + + if let Some((owner_team_name, dir_source)) = most_specific_directory_owner(project_root, &relative_file_path, &teams_by_name) { + sources_by_team.entry(owner_team_name).or_default().push(dir_source); + } + + if let Some((owner_team_name, package_source)) = nearest_package_owner(project_root, &relative_file_path, config, &teams_by_name) { + sources_by_team.entry(owner_team_name).or_default().push(package_source); + } + + if let Some((owner_team_name, gem_source)) = vendored_gem_owner(&relative_file_path, config, &teams) { + sources_by_team.entry(owner_team_name).or_default().push(gem_source); + } + + if let Some(rel_str) = relative_file_path.to_str() { + for team in &teams { + let subtracts: HashSet<&str> = team.subtracted_globs.iter().map(|s| s.as_str()).collect(); + for owned_glob in &team.owned_globs { + if glob_match(owned_glob, rel_str) && !subtracts.iter().any(|sub| glob_match(sub, rel_str)) { + sources_by_team + .entry(team.name.clone()) + .or_default() + .push(Source::TeamGlob(owned_glob.clone())); + } + } + } + } + + for team in &teams { + let team_rel = team + .path + .strip_prefix(project_root) + .unwrap_or(&team.path) + .to_path_buf(); + if team_rel == relative_file_path { + sources_by_team.entry(team.name.clone()).or_default().push(Source::TeamYml); + } + } + + let mut file_owners: Vec = Vec::new(); + for (team_name, sources) in sources_by_team.into_iter() { + if let Some(team) = teams_by_name.get(&team_name) { + let relative_team_yml_path = team + .path + .strip_prefix(project_root) + .unwrap_or(&team.path) + .to_string_lossy() + .to_string(); + file_owners.push(FileOwner { + team: team.clone(), + team_config_file_path: relative_team_yml_path, + sources, + }); + } + } + + if file_owners.len() > 1 { + file_owners.sort_by(|a, b| { + let priority_a = a + .sources + .iter() + .map(|s| source_priority(s)) + .min() + .unwrap_or(u8::MAX); + let priority_b = b + .sources + .iter() + .map(|s| source_priority(s)) + .min() + .unwrap_or(u8::MAX); + priority_a.cmp(&priority_b).then_with(|| a.team.name.cmp(&b.team.name)) + }); + } + + file_owners +} + +fn build_teams_by_name_map(teams: &[Team]) -> HashMap { + let mut map = HashMap::new(); + for team in teams { + map.insert(team.name.clone(), team.clone()); + map.insert(team.github_team.clone(), team.clone()); + } + map +} + +fn load_teams(project_root: &Path, team_file_globs: &[String]) -> std::result::Result, String> { + let mut teams: Vec = Vec::new(); + for glob_str in team_file_globs { + let absolute_glob = format!("{}/{}", project_root.display(), glob_str); + let paths = glob(&absolute_glob).map_err(|e| e.to_string())?; + for path in paths.flatten() { + match Team::from_team_file_path(path.clone()) { + Ok(team) => teams.push(team), + Err(e) => { + eprintln!("Error parsing team file: {}", e); + continue; + } + } + } + } + Ok(teams) +} + +lazy_static! { + static ref TOP_OF_FILE_TEAM_AT_REGEX: Regex = Regex::new(r#"^(?:#|//)\s*@team\s+(.+)$"#) + .expect("error compiling regular expression"); + static ref TOP_OF_FILE_TEAM_COLON_REGEX: Regex = Regex::new(r#"(?i)^(?:#|//)\s*team\s*:\s*(.+)$"#) + .expect("error compiling regular expression"); +} + +fn read_top_of_file_team(path: &Path) -> Option { + let content = fs::read_to_string(path).ok()?; + for line in content.lines().take(15) { + if let Some(cap) = TOP_OF_FILE_TEAM_AT_REGEX.captures(line) { + if let Some(m) = cap.get(1) { + return Some(m.as_str().to_string()); + } + } + if let Some(cap) = TOP_OF_FILE_TEAM_COLON_REGEX.captures(line) { + if let Some(m) = cap.get(1) { + return Some(m.as_str().to_string()); + } + } + let trimmed = line.trim_start(); + if !(trimmed.starts_with('#') || trimmed.starts_with("//") || trimmed.is_empty()) { + break; + } + } + None +} + +fn most_specific_directory_owner( + project_root: &Path, + relative_file_path: &Path, + teams_by_name: &HashMap, +) -> Option<(String, Source)> { + let mut current = project_root.join(relative_file_path); + let mut best: Option<(String, Source)> = None; + loop { + let parent_opt = current.parent().map(|p| p.to_path_buf()); + let Some(parent) = parent_opt else { break }; + let codeowner_path = parent.join(".codeowner"); + if let Ok(owner_str) = fs::read_to_string(&codeowner_path) { + let owner = owner_str.trim(); + if let Some(team) = teams_by_name.get(owner) { + let relative_dir = parent + .strip_prefix(project_root) + .unwrap_or(parent.as_path()) + .to_string_lossy() + .to_string(); + let candidate = (team.name.clone(), Source::Directory(relative_dir)); + match &best { + None => best = Some(candidate), + Some((_, existing_source)) => { + let existing_len = source_directory_depth(existing_source); + let candidate_len = source_directory_depth(&candidate.1); + if candidate_len > existing_len { + best = Some(candidate); + } + } + } + } + } + if parent == project_root { break; } + current = parent.clone(); + } + best +} + +fn nearest_package_owner( + project_root: &Path, + relative_file_path: &Path, + config: &Config, + teams_by_name: &HashMap, +) -> Option<(String, Source)> { + let mut current = project_root.join(relative_file_path); + loop { + let parent_opt = current.parent().map(|p| p.to_path_buf()); + let Some(parent) = parent_opt else { break }; + let parent_rel = parent.strip_prefix(project_root).unwrap_or(parent.as_path()); + if let Some(rel_str) = parent_rel.to_str() { + if glob_list_matches(rel_str, &config.ruby_package_paths) { + let pkg_yml = parent.join("package.yml"); + if pkg_yml.exists() { + if let Ok(owner) = read_ruby_package_owner(&pkg_yml) { + if let Some(team) = teams_by_name.get(&owner) { + let package_path = parent_rel.join("package.yml"); + let package_glob = format!("{}/**/**", rel_str); + return Some((team.name.clone(), Source::Package( + package_path.to_string_lossy().to_string(), + package_glob, + ))); + } + } + } + } + if glob_list_matches(rel_str, &config.javascript_package_paths) { + let pkg_json = parent.join("package.json"); + if pkg_json.exists() { + if let Ok(owner) = read_js_package_owner(&pkg_json) { + if let Some(team) = teams_by_name.get(&owner) { + let package_path = parent_rel.join("package.json"); + let package_glob = format!("{}/**/**", rel_str); + return Some((team.name.clone(), Source::Package( + package_path.to_string_lossy().to_string(), + package_glob, + ))); + } + } + } + } + } + if parent == project_root { break; } + current = parent; + } + None +} + +fn source_directory_depth(source: &Source) -> usize { + match source { + Source::Directory(path) => path.matches('/').count(), + _ => 0, + } +} + +fn glob_list_matches(path: &str, globs: &[String]) -> bool { + globs.iter().any(|g| glob_match(g, path)) +} + +fn read_ruby_package_owner(path: &Path) -> std::result::Result { + let file = std::fs::File::open(path).map_err(|e| e.to_string())?; + let deserializer: crate::project::deserializers::RubyPackage = serde_yaml::from_reader(file).map_err(|e| e.to_string())?; + deserializer.owner.ok_or_else(|| "Missing owner".to_string()) +} + +fn read_js_package_owner(path: &Path) -> std::result::Result { + let file = std::fs::File::open(path).map_err(|e| e.to_string())?; + let deserializer: crate::project::deserializers::JavascriptPackage = serde_json::from_reader(file).map_err(|e| e.to_string())?; + deserializer + .metadata + .and_then(|m| m.owner) + .ok_or_else(|| "Missing owner".to_string()) +} + +fn vendored_gem_owner( + relative_file_path: &Path, + config: &Config, + teams: &[Team], +) -> Option<(String, Source)> { + use std::path::Component; + let mut comps = relative_file_path.components(); + let first = comps.next()?; + let second = comps.next()?; + let first_str = match first { Component::Normal(s) => s.to_string_lossy(), _ => return None }; + if first_str != config.vendored_gems_path { return None; } + let gem_name = match second { Component::Normal(s) => s.to_string_lossy().to_string(), _ => return None }; + for team in teams { + if team.owned_gems.iter().any(|g| g == &gem_name) { + return Some((team.name.clone(), Source::TeamGem)); + } + } + None +} + +fn source_priority(source: &Source) -> u8 { + match source { + // Highest confidence first + Source::TeamFile => 0, + Source::Directory(_) => 1, + Source::Package(_, _) => 2, + Source::TeamGlob(_) => 3, + Source::TeamGem => 4, + Source::TeamYml => 5, + } +} + + diff --git a/src/runner.rs b/src/runner.rs index 2d377b6..8afa366 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -83,12 +83,7 @@ pub fn team_for_file_from_codeowners(run_config: &RunConfig, file_path: &str) -> .map_err(|e| Error::Io(e.to_string()))?) } -use std::collections::{HashMap, HashSet}; -use std::fs; -use fast_glob::glob_match; -use glob::glob; -use lazy_static::lazy_static; -use regex::Regex; +// (imports below intentionally trimmed after refactor) fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { let config = match config_from_path(&run_config.config_path) { @@ -101,93 +96,8 @@ fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { } }; - let absolute_file_path = std::path::Path::new(file_path); - let relative_file_path = absolute_file_path - .strip_prefix(&run_config.project_root) - .unwrap_or(absolute_file_path) - .to_path_buf(); - - let teams = match load_teams(&run_config.project_root, &config.team_file_glob) { - Ok(t) => t, - Err(err) => { - return RunResult { - io_errors: vec![err.to_string()], - ..Default::default() - } - } - }; - let teams_by_name = build_teams_by_name_map(&teams); - - let mut sources_by_team: HashMap> = HashMap::new(); - - if let Some(team_name) = read_top_of_file_team(&absolute_file_path.to_path_buf()) { - if let Some(team) = teams_by_name.get(&team_name) { - sources_by_team.entry(team.name.clone()).or_default().push(crate::ownership::mapper::Source::TeamFile); - } - } - - if let Some((owner_team_name, dir_source)) = most_specific_directory_owner( - &run_config.project_root, - &relative_file_path, - &teams_by_name, - ) { - sources_by_team.entry(owner_team_name).or_default().push(dir_source); - } - - if let Some((owner_team_name, package_source)) = nearest_package_owner( - &run_config.project_root, - &relative_file_path, - &config, - &teams_by_name, - ) { - sources_by_team.entry(owner_team_name).or_default().push(package_source); - } - - if let Some((owner_team_name, gem_source)) = vendored_gem_owner(&relative_file_path, &config, &teams) { - sources_by_team.entry(owner_team_name).or_default().push(gem_source); - } - - if let Some(rel_str) = relative_file_path.to_str() { - for team in &teams { - let subtracts: HashSet<&str> = team.subtracted_globs.iter().map(|s| s.as_str()).collect(); - for owned_glob in &team.owned_globs { - if glob_match(owned_glob, rel_str) && !subtracts.iter().any(|sub| glob_match(sub, rel_str)) { - sources_by_team - .entry(team.name.clone()) - .or_default() - .push(crate::ownership::mapper::Source::TeamGlob(owned_glob.clone())); - } - } - } - } - - for team in &teams { - let team_rel = team - .path - .strip_prefix(&run_config.project_root) - .unwrap_or(&team.path) - .to_path_buf(); - if team_rel == relative_file_path { - sources_by_team.entry(team.name.clone()).or_default().push(crate::ownership::mapper::Source::TeamYml); - } - } - - let mut file_owners: Vec = Vec::new(); - for (team_name, sources) in sources_by_team.into_iter() { - if let Some(team) = teams_by_name.get(&team_name) { - let relative_team_yml_path = team - .path - .strip_prefix(&run_config.project_root) - .unwrap_or(&team.path) - .to_string_lossy() - .to_string(); - file_owners.push(FileOwner { - team: team.clone(), - team_config_file_path: relative_team_yml_path, - sources, - }); - } - } + use crate::ownership::fast::find_file_owners; + let file_owners = find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)); let info_messages: Vec = match file_owners.len() { 0 => vec![format!("{}", FileOwner::default())], @@ -206,187 +116,6 @@ fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { RunResult { info_messages, ..Default::default() } } -fn build_teams_by_name_map(teams: &[Team]) -> HashMap { - let mut map = HashMap::new(); - for team in teams { - map.insert(team.name.clone(), team.clone()); - map.insert(team.github_team.clone(), team.clone()); - } - map -} - -fn load_teams(project_root: &std::path::Path, team_file_globs: &[String]) -> std::result::Result, String> { - let mut teams: Vec = Vec::new(); - for glob_str in team_file_globs { - let absolute_glob = format!("{}/{}", project_root.display(), glob_str); - let paths = glob(&absolute_glob).map_err(|e| e.to_string())?; - for path in paths.flatten() { - match Team::from_team_file_path(path.clone()) { - Ok(team) => teams.push(team), - Err(e) => { - eprintln!("Error parsing team file: {}", e); - continue; - } - } - } - } - Ok(teams) -} - -lazy_static! { - static ref TOP_OF_FILE_TEAM_REGEX: Regex = Regex::new(r#"^(?:#|//) @team (.*)$"#).expect("error compiling regular expression"); -} - -fn read_top_of_file_team(path: &std::path::PathBuf) -> Option { - let content = fs::read_to_string(path).ok()?; - let first_line = content.lines().next()?; - TOP_OF_FILE_TEAM_REGEX - .captures(first_line) - .and_then(|cap| cap.get(1)) - .map(|m| m.as_str().to_string()) -} - -fn most_specific_directory_owner( - project_root: &std::path::Path, - relative_file_path: &std::path::Path, - teams_by_name: &HashMap, -) -> Option<(String, crate::ownership::mapper::Source)> { - let mut current = project_root.join(relative_file_path); - let mut best: Option<(String, crate::ownership::mapper::Source)> = None; - loop { - let parent_opt = current.parent().map(|p| p.to_path_buf()); - let Some(parent) = parent_opt else { break }; - let codeowner_path = parent.join(".codeowner"); - if let Ok(owner_str) = fs::read_to_string(&codeowner_path) { - let owner = owner_str.trim(); - if let Some(team) = teams_by_name.get(owner) { - let relative_dir = parent - .strip_prefix(project_root) - .unwrap_or(parent.as_path()) - .to_string_lossy() - .to_string(); - let candidate = ( - team.name.clone(), - crate::ownership::mapper::Source::Directory(relative_dir), - ); - match &best { - None => best = Some(candidate), - Some((_, existing_source)) => { - let existing_len = source_directory_depth(existing_source); - let candidate_len = source_directory_depth(&candidate.1); - if candidate_len > existing_len { - best = Some(candidate); - } - } - } - } - } - if parent == project_root { break; } - current = parent.clone(); - } - best -} - -fn nearest_package_owner( - project_root: &std::path::Path, - relative_file_path: &std::path::Path, - config: &Config, - teams_by_name: &HashMap, -) -> Option<(String, crate::ownership::mapper::Source)> { - let mut current = project_root.join(relative_file_path); - loop { - let parent_opt = current.parent().map(|p| p.to_path_buf()); - let Some(parent) = parent_opt else { break }; - let parent_rel = parent.strip_prefix(project_root).unwrap_or(parent.as_path()); - if let Some(rel_str) = parent_rel.to_str() { - if glob_list_matches(rel_str, &config.ruby_package_paths) { - let pkg_yml = parent.join("package.yml"); - if pkg_yml.exists() { - if let Ok(owner) = read_ruby_package_owner(&pkg_yml) { - if let Some(team) = teams_by_name.get(&owner) { - let package_path = parent_rel.join("package.yml"); - let package_glob = format!("{}/**/**", rel_str); - return Some(( - team.name.clone(), - crate::ownership::mapper::Source::Package( - package_path.to_string_lossy().to_string(), - package_glob, - ), - )); - } - } - } - } - if glob_list_matches(rel_str, &config.javascript_package_paths) { - let pkg_json = parent.join("package.json"); - if pkg_json.exists() { - if let Ok(owner) = read_js_package_owner(&pkg_json) { - if let Some(team) = teams_by_name.get(&owner) { - let package_path = parent_rel.join("package.json"); - let package_glob = format!("{}/**/**", rel_str); - return Some(( - team.name.clone(), - crate::ownership::mapper::Source::Package( - package_path.to_string_lossy().to_string(), - package_glob, - ), - )); - } - } - } - } - } - if parent == project_root { break; } - current = parent; - } - None -} - -fn source_directory_depth(source: &crate::ownership::mapper::Source) -> usize { - match source { - crate::ownership::mapper::Source::Directory(path) => path.matches('/').count(), - _ => 0, - } -} - -fn glob_list_matches(path: &str, globs: &[String]) -> bool { - globs.iter().any(|g| glob_match(g, path)) -} - -fn read_ruby_package_owner(path: &std::path::Path) -> std::result::Result { - let file = std::fs::File::open(path).map_err(|e| e.to_string())?; - let deserializer: crate::project::deserializers::RubyPackage = serde_yaml::from_reader(file).map_err(|e| e.to_string())?; - deserializer.owner.ok_or_else(|| "Missing owner".to_string()) -} - -fn read_js_package_owner(path: &std::path::Path) -> std::result::Result { - let file = std::fs::File::open(path).map_err(|e| e.to_string())?; - let deserializer: crate::project::deserializers::JavascriptPackage = serde_json::from_reader(file).map_err(|e| e.to_string())?; - deserializer - .metadata - .and_then(|m| m.owner) - .ok_or_else(|| "Missing owner".to_string()) -} - -fn vendored_gem_owner( - relative_file_path: &std::path::Path, - config: &Config, - teams: &[Team], -) -> Option<(String, crate::ownership::mapper::Source)> { - use std::path::Component; - let mut comps = relative_file_path.components(); - let first = comps.next()?; - let second = comps.next()?; - let first_str = match first { Component::Normal(s) => s.to_string_lossy(), _ => return None }; - if first_str != config.vendored_gems_path { return None; } - let gem_name = match second { Component::Normal(s) => s.to_string_lossy().to_string(), _ => return None }; - for team in teams { - if team.owned_gems.iter().any(|g| g == &gem_name) { - return Some((team.name.clone(), crate::ownership::mapper::Source::TeamGem)); - } - } - None -} pub fn for_team(run_config: &RunConfig, team_name: &str) -> RunResult { run_with_runner(run_config, |runner| runner.for_team(team_name)) From e4244ee8df8f174a1ac35161d447c840f5bc6480 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sat, 9 Aug 2025 19:38:45 -0500 Subject: [PATCH 04/12] verifying new == old --- src/bin/compare_for_file.rs | 179 ++++++++++++++++++ src/ownership.rs | 2 +- src/ownership/fast.rs | 39 ++-- src/project_builder.rs | 5 +- src/runner.rs | 8 +- .../valid_project/config/code_ownership.yml | 2 +- .../should_be_ignored/an_ignored_file.rb | 7 + tests/valid_project_test.rs | 20 ++ 8 files changed, 243 insertions(+), 19 deletions(-) create mode 100644 src/bin/compare_for_file.rs create mode 100644 tests/fixtures/valid_project/should_be_ignored/an_ignored_file.rb diff --git a/src/bin/compare_for_file.rs b/src/bin/compare_for_file.rs new file mode 100644 index 0000000..6cc470c --- /dev/null +++ b/src/bin/compare_for_file.rs @@ -0,0 +1,179 @@ +use std::{ + fs::File, + io::{self, Write}, + path::{Path, PathBuf}, + process::Command, +}; + +use codeowners::config::Config as OwnershipConfig; +use codeowners::runner::{RunConfig, Runner}; +use codeowners::ownership::{fast, FileOwner}; +use ignore::WalkBuilder; +use serde_yaml; + +fn main() { + let project_root = std::env::args() + .nth(1) + .expect("usage: compare_for_file "); + let project_root = PathBuf::from(project_root); + if !project_root.is_absolute() { + eprintln!("Project root must be absolute"); + std::process::exit(2); + } + + let codeowners_file_path = project_root.join(".github/CODEOWNERS"); + let config_path = project_root.join("config/code_ownership.yml"); + + let run_config = RunConfig { + project_root: project_root.clone(), + codeowners_file_path, + config_path: config_path.clone(), + no_cache: false, + }; + + // Build the original, accurate-but-slower runner once + let runner = match Runner::new(&run_config) { + Ok(r) => r, + Err(e) => { + eprintln!("Failed to initialize Runner: {}", e); + std::process::exit(1); + } + }; + + // Load config once for the optimized path + let config_file = match File::open(&config_path) { + Ok(f) => f, + Err(e) => { + eprintln!("Can't open config file {}: {}", config_path.display(), e); + std::process::exit(1); + } + }; + let optimized_config: OwnershipConfig = match serde_yaml::from_reader(config_file) { + Ok(c) => c, + Err(e) => { + eprintln!("Can't parse config file {}: {}", config_path.display(), e); + std::process::exit(1); + } + }; + + let mut total_files: usize = 0; + let mut diff_count: usize = 0; + + // Prefer tracked files from git; fall back to walking the FS if git is unavailable + let tracked_files_output = Command::new("git") + .arg("-C") + .arg(&project_root) + .arg("ls-files") + .arg("-z") + .output(); + + match tracked_files_output { + Ok(output) if output.status.success() => { + let bytes = output.stdout; + for rel in bytes.split(|b| *b == 0u8) { + if rel.is_empty() { continue; } + let rel_str = match std::str::from_utf8(rel) { Ok(s) => s, Err(_) => continue }; + let abs_path = project_root.join(rel_str); + // Only process regular files that currently exist + if !abs_path.is_file() { continue; } + + total_files += 1; + let original = run_original(&runner, &abs_path); + let optimized = run_optimized(&project_root, &optimized_config, &abs_path); + + if original != optimized { + diff_count += 1; + println!("\n==== {} ====", abs_path.display()); + println!("ORIGINAL:\n{}", original); + println!("OPTIMIZED:\n{}", optimized); + let _ = io::stdout().flush(); + } + + if total_files % 1000 == 0 { + eprintln!( + "Processed {} files... diffs so far: {}", + total_files, diff_count + ); + } + } + } + _ => { + eprintln!("git ls-files failed; falling back to filesystem walk (untracked files may be included)"); + let walker = WalkBuilder::new(&project_root) + .hidden(false) + .git_ignore(true) + .git_exclude(true) + .follow_links(false) + .build(); + + for result in walker { + let entry = match result { + Ok(e) => e, + Err(err) => { + eprintln!("walk error: {}", err); + continue; + } + }; + if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) { + continue; + } + let path = entry.path(); + total_files += 1; + + let original = run_original(&runner, path); + let optimized = run_optimized(&project_root, &optimized_config, path); + + if original != optimized { + diff_count += 1; + println!("\n==== {} ====", path.display()); + println!("ORIGINAL:\n{}", original); + println!("OPTIMIZED:\n{}", optimized); + let _ = io::stdout().flush(); + } + + if total_files % 1000 == 0 { + eprintln!( + "Processed {} files... diffs so far: {}", + total_files, diff_count + ); + } + } + } + } + + println!("Checked {} files. Diffs: {}", total_files, diff_count); + if diff_count > 0 { + std::process::exit(3); + } +} + +fn run_original(runner: &Runner, file_path: &Path) -> String { + let result = runner.for_file(&file_path.to_string_lossy()); + if !result.validation_errors.is_empty() { + return result.validation_errors.join("\n"); + } + if !result.io_errors.is_empty() { + return format!("IO_ERROR: {}", result.io_errors.join(" | ")); + } + result.info_messages.join("\n") +} + +fn run_optimized(project_root: &Path, config: &OwnershipConfig, file_path: &Path) -> String { + let owners: Vec = match fast::find_file_owners(project_root, config, file_path) { + Ok(v) => v, + Err(e) => return format!("IO_ERROR: {}", e), + }; + match owners.len() { + 0 => format!("{}", FileOwner::default()), + 1 => format!("{}", owners[0]), + _ => { + let mut lines = vec!["Error: file is owned by multiple teams!".to_string()]; + for owner in owners { + lines.push(format!("\n{}", owner)); + } + lines.join("\n") + } + } +} + + diff --git a/src/ownership.rs b/src/ownership.rs index 012ebbe..694ffda 100644 --- a/src/ownership.rs +++ b/src/ownership.rs @@ -14,7 +14,7 @@ mod file_owner_finder; pub(crate) mod mapper; pub(crate) mod parser; mod validator; -pub(crate) mod fast; +pub mod fast; use crate::{ ownership::mapper::DirectoryMapper, diff --git a/src/ownership/fast.rs b/src/ownership/fast.rs index 47e06f7..453e10c 100644 --- a/src/ownership/fast.rs +++ b/src/ownership/fast.rs @@ -9,7 +9,7 @@ use crate::{config::Config, project::Team}; use super::{FileOwner, mapper::Source}; -pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) -> Vec { +pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) -> Result, String> { let absolute_file_path = if file_path.is_absolute() { file_path.to_path_buf() } else { @@ -22,15 +22,22 @@ pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) let teams = match load_teams(project_root, &config.team_file_glob) { Ok(t) => t, - Err(_) => return vec![], + Err(e) => return Err(e), }; let teams_by_name = build_teams_by_name_map(&teams); let mut sources_by_team: HashMap> = HashMap::new(); if let Some(team_name) = read_top_of_file_team(&absolute_file_path) { - if let Some(team) = teams_by_name.get(&team_name) { - sources_by_team.entry(team.name.clone()).or_default().push(Source::TeamFile); + // Only consider top-of-file annotations for files included by config.owned_globs and not excluded by config.unowned_globs + if let Some(rel_str) = relative_file_path.to_str() { + let is_config_owned = glob_list_matches(rel_str, &config.owned_globs); + let is_config_unowned = glob_list_matches(rel_str, &config.unowned_globs); + if is_config_owned && !is_config_unowned { + if let Some(team) = teams_by_name.get(&team_name) { + sources_by_team.entry(team.name.clone()).or_default().push(Source::TeamFile); + } + } } } @@ -106,7 +113,7 @@ pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) }); } - file_owners + Ok(file_owners) } fn build_teams_by_name_map(teams: &[Team]) -> HashMap { @@ -137,23 +144,25 @@ fn load_teams(project_root: &Path, team_file_globs: &[String]) -> std::result::R } lazy_static! { - static ref TOP_OF_FILE_TEAM_AT_REGEX: Regex = Regex::new(r#"^(?:#|//)\s*@team\s+(.+)$"#) - .expect("error compiling regular expression"); - static ref TOP_OF_FILE_TEAM_COLON_REGEX: Regex = Regex::new(r#"(?i)^(?:#|//)\s*team\s*:\s*(.+)$"#) - .expect("error compiling regular expression"); + static ref TOP_OF_FILE_TEAM_AT_REGEX: Option = Regex::new(r#"^(?:#|//)\s*@team\s+(.+)$"#).ok(); + static ref TOP_OF_FILE_TEAM_COLON_REGEX: Option = Regex::new(r#"(?i)^(?:#|//)\s*team\s*:\s*(.+)$"#).ok(); } fn read_top_of_file_team(path: &Path) -> Option { let content = fs::read_to_string(path).ok()?; for line in content.lines().take(15) { - if let Some(cap) = TOP_OF_FILE_TEAM_AT_REGEX.captures(line) { - if let Some(m) = cap.get(1) { - return Some(m.as_str().to_string()); + if let Some(re) = &*TOP_OF_FILE_TEAM_AT_REGEX { + if let Some(cap) = re.captures(line) { + if let Some(m) = cap.get(1) { + return Some(m.as_str().to_string()); + } } } - if let Some(cap) = TOP_OF_FILE_TEAM_COLON_REGEX.captures(line) { - if let Some(m) = cap.get(1) { - return Some(m.as_str().to_string()); + if let Some(re) = &*TOP_OF_FILE_TEAM_COLON_REGEX { + if let Some(cap) = re.captures(line) { + if let Some(m) = cap.get(1) { + return Some(m.as_str().to_string()); + } } } let trimmed = line.trim_start(); diff --git a/src/project_builder.rs b/src/project_builder.rs index 54f3ac3..12810af 100644 --- a/src/project_builder.rs +++ b/src/project_builder.rs @@ -216,7 +216,10 @@ impl<'a> ProjectBuilder<'a> { } fn matches_globs(path: &Path, globs: &[String]) -> bool { - globs.iter().any(|glob| glob_match(glob, path.to_str().unwrap())) + match path.to_str() { + Some(s) => globs.iter().any(|glob| glob_match(glob, s)), + None => false, + } } fn ruby_package_owner(path: &Path) -> Result, Error> { diff --git a/src/runner.rs b/src/runner.rs index 8afa366..61e503a 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -39,6 +39,7 @@ pub fn for_file(run_config: &RunConfig, file_path: &str, fast: bool) -> RunResul if fast { for_file_from_codeowners(run_config, file_path) } else { + // run_with_runner(run_config, |runner| runner.for_file(file_path)) for_file_optimized(run_config, file_path) } } @@ -97,7 +98,12 @@ fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { }; use crate::ownership::fast::find_file_owners; - let file_owners = find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)); + let file_owners = match find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)) { + Ok(v) => v, + Err(err) => { + return RunResult { io_errors: vec![err], ..Default::default() }; + } + }; let info_messages: Vec = match file_owners.len() { 0 => vec![format!("{}", FileOwner::default())], diff --git a/tests/fixtures/valid_project/config/code_ownership.yml b/tests/fixtures/valid_project/config/code_ownership.yml index fa56397..83a523f 100644 --- a/tests/fixtures/valid_project/config/code_ownership.yml +++ b/tests/fixtures/valid_project/config/code_ownership.yml @@ -1,5 +1,5 @@ owned_globs: - - "**/*.{rb,tsx}" + - "{gems,config,javascript,ruby}/**/*.{rb,tsx}" ruby_package_paths: - ruby/packages/**/* javascript_package_paths: diff --git a/tests/fixtures/valid_project/should_be_ignored/an_ignored_file.rb b/tests/fixtures/valid_project/should_be_ignored/an_ignored_file.rb new file mode 100644 index 0000000..24ad7ca --- /dev/null +++ b/tests/fixtures/valid_project/should_be_ignored/an_ignored_file.rb @@ -0,0 +1,7 @@ +# @team Payments + +class AnIgnoredFile + def initialize + @name = "AnIgnoredFile" + end +end \ No newline at end of file diff --git a/tests/valid_project_test.rs b/tests/valid_project_test.rs index 6ee2caa..140206e 100644 --- a/tests/valid_project_test.rs +++ b/tests/valid_project_test.rs @@ -97,6 +97,26 @@ fn test_fast_for_file() -> Result<(), Box> { Ok(()) } +#[test] +fn test_fast_for_file_with_ignored_file() -> Result<(), Box> { + Command::cargo_bin("codeowners")? + .arg("--project-root") + .arg("tests/fixtures/valid_project") + .arg("--no-cache") + .arg("for-file") + .arg("should_be_ignored/an_ignored_file.rb") + .assert() + .success() + .stdout(predicate::eq(indoc! {" + Team: Unowned + Github Team: Unowned + Team YML: + Description: + - Unowned + "})); + Ok(()) +} + #[test] fn test_fast_for_file_full_path() -> Result<(), Box> { let project_root = Path::new("tests/fixtures/valid_project"); From 8bfd0a7df06f7ad340a87cc041e08435c52b578c Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sat, 9 Aug 2025 20:49:39 -0500 Subject: [PATCH 05/12] test for gems override --- src/ownership/fast.rs | 31 +++++++++---------- src/runner.rs | 2 +- .../fixtures/valid_project/.github/CODEOWNERS | 2 ++ .../valid_project/config/code_ownership.yml | 4 +-- .../valid_project/config/teams/ux.yml | 6 ++++ tests/fixtures/valid_project/gems/pets/dog.rb | 5 +++ tests/valid_project_test.rs | 20 ++++++++++++ 7 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 tests/fixtures/valid_project/config/teams/ux.yml create mode 100644 tests/fixtures/valid_project/gems/pets/dog.rb diff --git a/src/ownership/fast.rs b/src/ownership/fast.rs index 453e10c..4c3cf77 100644 --- a/src/ownership/fast.rs +++ b/src/ownership/fast.rs @@ -144,31 +144,28 @@ fn load_teams(project_root: &Path, team_file_globs: &[String]) -> std::result::R } lazy_static! { - static ref TOP_OF_FILE_TEAM_AT_REGEX: Option = Regex::new(r#"^(?:#|//)\s*@team\s+(.+)$"#).ok(); - static ref TOP_OF_FILE_TEAM_COLON_REGEX: Option = Regex::new(r#"(?i)^(?:#|//)\s*team\s*:\s*(.+)$"#).ok(); + // Allow optional leading whitespace before the comment marker + static ref TOP_OF_FILE_TEAM_AT_REGEX: Option = Regex::new(r#"^\s*(?:#|//)\s*@team\s+(.+)$"#).ok(); + static ref TOP_OF_FILE_TEAM_COLON_REGEX: Option = Regex::new(r#"(?i)^\s*(?:#|//)\s*team\s*:\s*(.+)$"#).ok(); } fn read_top_of_file_team(path: &Path) -> Option { let content = fs::read_to_string(path).ok()?; - for line in content.lines().take(15) { - if let Some(re) = &*TOP_OF_FILE_TEAM_AT_REGEX { - if let Some(cap) = re.captures(line) { - if let Some(m) = cap.get(1) { - return Some(m.as_str().to_string()); - } + let line = content.lines().next()?; + + if let Some(re) = &*TOP_OF_FILE_TEAM_AT_REGEX { + if let Some(cap) = re.captures(line) { + if let Some(m) = cap.get(1) { + return Some(m.as_str().to_string()); } } - if let Some(re) = &*TOP_OF_FILE_TEAM_COLON_REGEX { - if let Some(cap) = re.captures(line) { - if let Some(m) = cap.get(1) { - return Some(m.as_str().to_string()); - } + } + if let Some(re) = &*TOP_OF_FILE_TEAM_COLON_REGEX { + if let Some(cap) = re.captures(line) { + if let Some(m) = cap.get(1) { + return Some(m.as_str().to_string()); } } - let trimmed = line.trim_start(); - if !(trimmed.starts_with('#') || trimmed.starts_with("//") || trimmed.is_empty()) { - break; - } } None } diff --git a/src/runner.rs b/src/runner.rs index 61e503a..58e5f9d 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -39,7 +39,7 @@ pub fn for_file(run_config: &RunConfig, file_path: &str, fast: bool) -> RunResul if fast { for_file_from_codeowners(run_config, file_path) } else { - // run_with_runner(run_config, |runner| runner.for_file(file_path)) + //run_with_runner(run_config, |runner| runner.for_file(file_path)) for_file_optimized(run_config, file_path) } } diff --git a/tests/fixtures/valid_project/.github/CODEOWNERS b/tests/fixtures/valid_project/.github/CODEOWNERS index 09a9fd7..6df553a 100644 --- a/tests/fixtures/valid_project/.github/CODEOWNERS +++ b/tests/fixtures/valid_project/.github/CODEOWNERS @@ -30,6 +30,8 @@ # Team YML ownership /config/teams/payments.yml @PaymentsTeam /config/teams/payroll.yml @PayrollTeam +/config/teams/ux.yml @UX # Team owned gems /gems/payroll_calculator/**/** @PayrollTeam +/gems/pets/**/** @UX diff --git a/tests/fixtures/valid_project/config/code_ownership.yml b/tests/fixtures/valid_project/config/code_ownership.yml index 83a523f..05bc791 100644 --- a/tests/fixtures/valid_project/config/code_ownership.yml +++ b/tests/fixtures/valid_project/config/code_ownership.yml @@ -1,10 +1,10 @@ owned_globs: - - "{gems,config,javascript,ruby}/**/*.{rb,tsx}" + - "{gems,config,javascript,ruby,components}/**/*.{rb,tsx}" ruby_package_paths: - ruby/packages/**/* javascript_package_paths: - javascript/packages/** team_file_glob: - config/teams/**/*.yml -vendored_gems_path: gems +unbuilt_gems_path: gems unowned_globs: diff --git a/tests/fixtures/valid_project/config/teams/ux.yml b/tests/fixtures/valid_project/config/teams/ux.yml new file mode 100644 index 0000000..d4675fa --- /dev/null +++ b/tests/fixtures/valid_project/config/teams/ux.yml @@ -0,0 +1,6 @@ +name: UX +github: + team: '@UX' +ruby: + owned_gems: + - pets \ No newline at end of file diff --git a/tests/fixtures/valid_project/gems/pets/dog.rb b/tests/fixtures/valid_project/gems/pets/dog.rb new file mode 100644 index 0000000..0518b82 --- /dev/null +++ b/tests/fixtures/valid_project/gems/pets/dog.rb @@ -0,0 +1,5 @@ +# generated +# next line should be ignored +# @team Payments + +class Dog; end diff --git a/tests/valid_project_test.rs b/tests/valid_project_test.rs index 140206e..d8b4e81 100644 --- a/tests/valid_project_test.rs +++ b/tests/valid_project_test.rs @@ -138,6 +138,26 @@ fn test_fast_for_file_full_path() -> Result<(), Box> { Ok(()) } +#[test] +fn test_for_file_with_components() -> Result<(), Box> { + Command::cargo_bin("codeowners")? + .arg("--project-root") + .arg("tests/fixtures/valid_project") + .arg("--no-cache") + .arg("for-file") + .arg("gems/pets/dog.rb") + .assert() + .success() + .stdout(predicate::eq(indoc! {" + Team: UX + Github Team: @UX + Team YML: config/teams/ux.yml + Description: + - Owner specified in Team YML's `owned_gems` + "})); + Ok(()) +} + #[test] fn test_for_file_same_team_multiple_ownerships() -> Result<(), Box> { Command::cargo_bin("codeowners")? From f42100dcd29c423643ece189f266f97391e019a9 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sun, 10 Aug 2025 07:38:26 -0500 Subject: [PATCH 06/12] commenting on dead code --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/ownership/fast.rs | 2 ++ src/runner.rs | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2638c07..f97d18a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,7 +179,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "codeowners" -version = "0.2.4" +version = "0.2.5" dependencies = [ "assert_cmd", "clap", diff --git a/Cargo.toml b/Cargo.toml index 098966a..f61c3da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codeowners" -version = "0.2.4" +version = "0.2.5" edition = "2024" [profile.release] diff --git a/src/ownership/fast.rs b/src/ownership/fast.rs index 4c3cf77..e0c8e8d 100644 --- a/src/ownership/fast.rs +++ b/src/ownership/fast.rs @@ -95,6 +95,8 @@ pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) } } + // TODO: remove this once we've verified the fast path is working + // This is simply matching the order of behavior of the original codeowners CLI if file_owners.len() > 1 { file_owners.sort_by(|a, b| { let priority_a = a diff --git a/src/runner.rs b/src/runner.rs index 58e5f9d..fb0f45a 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -262,6 +262,8 @@ impl Runner { self.validate() } + // TODO: remove this once we've verified the fast path is working + #[allow(dead_code)] pub fn for_file(&self, file_path: &str) -> RunResult { let relative_file_path = Path::new(file_path) .strip_prefix(&self.run_config.project_root) From d3d42b505e05c31da9f9afd72c446cee4df481af Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sun, 10 Aug 2025 07:43:03 -0500 Subject: [PATCH 07/12] re-org --- src/bin/compare_for_file.rs | 50 ++++++------- src/ownership.rs | 2 +- src/ownership/{fast.rs => for_file_fast.rs} | 77 ++++++++++----------- src/runner.rs | 15 ++-- 4 files changed, 71 insertions(+), 73 deletions(-) rename src/ownership/{fast.rs => for_file_fast.rs} (86%) diff --git a/src/bin/compare_for_file.rs b/src/bin/compare_for_file.rs index 6cc470c..7bce8ca 100644 --- a/src/bin/compare_for_file.rs +++ b/src/bin/compare_for_file.rs @@ -1,3 +1,12 @@ +// This is a tool to compare the output of the original codeowners CLI with the optimized version. +// It's useful for verifying that the optimized version is correct. +// +// It's not used in CI, but it's useful for debugging. +// +// To run it, use `cargo run --bin compare_for_file ` +// +// It will compare the output of the original codeowners CLI with the optimized version for all files in the project. + use std::{ fs::File, io::{self, Write}, @@ -6,15 +15,12 @@ use std::{ }; use codeowners::config::Config as OwnershipConfig; +use codeowners::ownership::{FileOwner, for_file_fast}; use codeowners::runner::{RunConfig, Runner}; -use codeowners::ownership::{fast, FileOwner}; use ignore::WalkBuilder; -use serde_yaml; fn main() { - let project_root = std::env::args() - .nth(1) - .expect("usage: compare_for_file "); + let project_root = std::env::args().nth(1).expect("usage: compare_for_file "); let project_root = PathBuf::from(project_root); if !project_root.is_absolute() { eprintln!("Project root must be absolute"); @@ -60,22 +66,24 @@ fn main() { let mut diff_count: usize = 0; // Prefer tracked files from git; fall back to walking the FS if git is unavailable - let tracked_files_output = Command::new("git") - .arg("-C") - .arg(&project_root) - .arg("ls-files") - .arg("-z") - .output(); + let tracked_files_output = Command::new("git").arg("-C").arg(&project_root).arg("ls-files").arg("-z").output(); match tracked_files_output { Ok(output) if output.status.success() => { let bytes = output.stdout; for rel in bytes.split(|b| *b == 0u8) { - if rel.is_empty() { continue; } - let rel_str = match std::str::from_utf8(rel) { Ok(s) => s, Err(_) => continue }; + if rel.is_empty() { + continue; + } + let rel_str = match std::str::from_utf8(rel) { + Ok(s) => s, + Err(_) => continue, + }; let abs_path = project_root.join(rel_str); // Only process regular files that currently exist - if !abs_path.is_file() { continue; } + if !abs_path.is_file() { + continue; + } total_files += 1; let original = run_original(&runner, &abs_path); @@ -90,10 +98,7 @@ fn main() { } if total_files % 1000 == 0 { - eprintln!( - "Processed {} files... diffs so far: {}", - total_files, diff_count - ); + eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count); } } } @@ -132,10 +137,7 @@ fn main() { } if total_files % 1000 == 0 { - eprintln!( - "Processed {} files... diffs so far: {}", - total_files, diff_count - ); + eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count); } } } @@ -159,7 +161,7 @@ fn run_original(runner: &Runner, file_path: &Path) -> String { } fn run_optimized(project_root: &Path, config: &OwnershipConfig, file_path: &Path) -> String { - let owners: Vec = match fast::find_file_owners(project_root, config, file_path) { + let owners: Vec = match for_file_fast::find_file_owners(project_root, config, file_path) { Ok(v) => v, Err(e) => return format!("IO_ERROR: {}", e), }; @@ -175,5 +177,3 @@ fn run_optimized(project_root: &Path, config: &OwnershipConfig, file_path: &Path } } } - - diff --git a/src/ownership.rs b/src/ownership.rs index 694ffda..d8d48cd 100644 --- a/src/ownership.rs +++ b/src/ownership.rs @@ -11,10 +11,10 @@ use tracing::{info, instrument}; mod file_generator; mod file_owner_finder; +pub mod for_file_fast; pub(crate) mod mapper; pub(crate) mod parser; mod validator; -pub mod fast; use crate::{ ownership::mapper::DirectoryMapper, diff --git a/src/ownership/fast.rs b/src/ownership/for_file_fast.rs similarity index 86% rename from src/ownership/fast.rs rename to src/ownership/for_file_fast.rs index e0c8e8d..0bb46e4 100644 --- a/src/ownership/fast.rs +++ b/src/ownership/for_file_fast.rs @@ -1,4 +1,8 @@ -use std::{collections::{HashMap, HashSet}, fs, path::Path}; +use std::{ + collections::{HashMap, HashSet}, + fs, + path::Path, +}; use fast_glob::glob_match; use glob::glob; @@ -20,10 +24,7 @@ pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) .unwrap_or(&absolute_file_path) .to_path_buf(); - let teams = match load_teams(project_root, &config.team_file_glob) { - Ok(t) => t, - Err(e) => return Err(e), - }; + let teams = load_teams(project_root, &config.team_file_glob)?; let teams_by_name = build_teams_by_name_map(&teams); let mut sources_by_team: HashMap> = HashMap::new(); @@ -68,11 +69,7 @@ pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) } for team in &teams { - let team_rel = team - .path - .strip_prefix(project_root) - .unwrap_or(&team.path) - .to_path_buf(); + let team_rel = team.path.strip_prefix(project_root).unwrap_or(&team.path).to_path_buf(); if team_rel == relative_file_path { sources_by_team.entry(team.name.clone()).or_default().push(Source::TeamYml); } @@ -99,18 +96,8 @@ pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) // This is simply matching the order of behavior of the original codeowners CLI if file_owners.len() > 1 { file_owners.sort_by(|a, b| { - let priority_a = a - .sources - .iter() - .map(|s| source_priority(s)) - .min() - .unwrap_or(u8::MAX); - let priority_b = b - .sources - .iter() - .map(|s| source_priority(s)) - .min() - .unwrap_or(u8::MAX); + let priority_a = a.sources.iter().map(source_priority).min().unwrap_or(u8::MAX); + let priority_b = b.sources.iter().map(source_priority).min().unwrap_or(u8::MAX); priority_a.cmp(&priority_b).then_with(|| a.team.name.cmp(&b.team.name)) }); } @@ -204,7 +191,9 @@ fn most_specific_directory_owner( } } } - if parent == project_root { break; } + if parent == project_root { + break; + } current = parent.clone(); } best @@ -229,10 +218,10 @@ fn nearest_package_owner( if let Some(team) = teams_by_name.get(&owner) { let package_path = parent_rel.join("package.yml"); let package_glob = format!("{}/**/**", rel_str); - return Some((team.name.clone(), Source::Package( - package_path.to_string_lossy().to_string(), - package_glob, - ))); + return Some(( + team.name.clone(), + Source::Package(package_path.to_string_lossy().to_string(), package_glob), + )); } } } @@ -244,16 +233,18 @@ fn nearest_package_owner( if let Some(team) = teams_by_name.get(&owner) { let package_path = parent_rel.join("package.json"); let package_glob = format!("{}/**/**", rel_str); - return Some((team.name.clone(), Source::Package( - package_path.to_string_lossy().to_string(), - package_glob, - ))); + return Some(( + team.name.clone(), + Source::Package(package_path.to_string_lossy().to_string(), package_glob), + )); } } } } } - if parent == project_root { break; } + if parent == project_root { + break; + } current = parent; } None @@ -285,18 +276,22 @@ fn read_js_package_owner(path: &Path) -> std::result::Result { .ok_or_else(|| "Missing owner".to_string()) } -fn vendored_gem_owner( - relative_file_path: &Path, - config: &Config, - teams: &[Team], -) -> Option<(String, Source)> { +fn vendored_gem_owner(relative_file_path: &Path, config: &Config, teams: &[Team]) -> Option<(String, Source)> { use std::path::Component; let mut comps = relative_file_path.components(); let first = comps.next()?; let second = comps.next()?; - let first_str = match first { Component::Normal(s) => s.to_string_lossy(), _ => return None }; - if first_str != config.vendored_gems_path { return None; } - let gem_name = match second { Component::Normal(s) => s.to_string_lossy().to_string(), _ => return None }; + let first_str = match first { + Component::Normal(s) => s.to_string_lossy(), + _ => return None, + }; + if first_str != config.vendored_gems_path { + return None; + } + let gem_name = match second { + Component::Normal(s) => s.to_string_lossy().to_string(), + _ => return None, + }; for team in teams { if team.owned_gems.iter().any(|g| g == &gem_name) { return Some((team.name.clone(), Source::TeamGem)); @@ -316,5 +311,3 @@ fn source_priority(source: &Source) -> u8 { Source::TeamYml => 5, } } - - diff --git a/src/runner.rs b/src/runner.rs index fb0f45a..b042f79 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -93,15 +93,18 @@ fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { return RunResult { io_errors: vec![err.to_string()], ..Default::default() - } + }; } }; - use crate::ownership::fast::find_file_owners; + use crate::ownership::for_file_fast::find_file_owners; let file_owners = match find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)) { Ok(v) => v, Err(err) => { - return RunResult { io_errors: vec![err], ..Default::default() }; + return RunResult { + io_errors: vec![err], + ..Default::default() + }; } }; @@ -119,10 +122,12 @@ fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { }; } }; - RunResult { info_messages, ..Default::default() } + RunResult { + info_messages, + ..Default::default() + } } - pub fn for_team(run_config: &RunConfig, team_name: &str) -> RunResult { run_with_runner(run_config, |runner| runner.for_team(team_name)) } From 1bf9386fb74db524d43011cc8d85360ecf43f0a3 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sun, 10 Aug 2025 08:28:44 -0500 Subject: [PATCH 08/12] adding team_for_file --- src/runner.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/runner.rs b/src/runner.rs index b042f79..e1cef31 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -84,6 +84,15 @@ pub fn team_for_file_from_codeowners(run_config: &RunConfig, file_path: &str) -> .map_err(|e| Error::Io(e.to_string()))?) } +pub fn team_for_file(run_config: &RunConfig, file_path: &str) -> Result, Error> { + let config = config_from_path(&run_config.config_path)?; + use crate::ownership::for_file_fast::find_file_owners; + let owners = find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)) + .map_err(Error::Io)?; + + Ok(owners.first().map(|fo| fo.team.clone())) +} + // (imports below intentionally trimmed after refactor) fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { From 11d9506348377cee176ec35a4742ce43be5639cd Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sun, 10 Aug 2025 11:42:21 -0500 Subject: [PATCH 09/12] printing path error --- src/ownership.rs | 2 +- src/ownership/for_file_fast.rs | 151 ++++++++++++++++++++++++++++++++- src/project.rs | 30 ++++++- src/runner.rs | 3 +- 4 files changed, 179 insertions(+), 7 deletions(-) diff --git a/src/ownership.rs b/src/ownership.rs index d8d48cd..7d91d95 100644 --- a/src/ownership.rs +++ b/src/ownership.rs @@ -33,7 +33,7 @@ use self::{ pub struct Ownership { project: Arc, } - +#[derive(Debug)] pub struct FileOwner { pub team: Team, pub team_config_file_path: String, diff --git a/src/ownership/for_file_fast.rs b/src/ownership/for_file_fast.rs index 0bb46e4..7562ca9 100644 --- a/src/ownership/for_file_fast.rs +++ b/src/ownership/for_file_fast.rs @@ -123,7 +123,7 @@ fn load_teams(project_root: &Path, team_file_globs: &[String]) -> std::result::R match Team::from_team_file_path(path.clone()) { Ok(team) => teams.push(team), Err(e) => { - eprintln!("Error parsing team file: {}", e); + eprintln!("Error parsing team file: {}, path: {}", e, path.display()); continue; } } @@ -311,3 +311,152 @@ fn source_priority(source: &Source) -> u8 { Source::TeamYml => 5, } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::project::Team; + use std::collections::HashMap; + use tempfile::tempdir; + + fn build_config_for_temp(frontend_glob: &str, ruby_glob: &str, vendored_path: &str) -> crate::config::Config { + crate::config::Config { + owned_globs: vec!["**/*".to_string()], + ruby_package_paths: vec![ruby_glob.to_string()], + javascript_package_paths: vec![frontend_glob.to_string()], + team_file_glob: vec!["config/teams/**/*.yml".to_string()], + unowned_globs: vec![], + vendored_gems_path: vendored_path.to_string(), + cache_directory: "tmp/cache/codeowners".to_string(), + } + } + + fn team_named(name: &str) -> Team { + Team { + path: Path::new("config/teams/foo.yml").to_path_buf(), + name: name.to_string(), + github_team: format!("@{}Team", name), + owned_globs: vec![], + subtracted_globs: vec![], + owned_gems: vec![], + avoid_ownership: false, + } + } + + #[test] + fn test_read_top_of_file_team_parses_at_and_colon_forms() { + let td = tempdir().unwrap(); + + // @team form + let file_at = td.path().join("at_form.rb"); + std::fs::write(&file_at, "# @team Payroll\nputs 'x'\n").unwrap(); + assert_eq!(read_top_of_file_team(&file_at), Some("Payroll".to_string())); + + // team: form (case-insensitive, allows spaces) + let file_colon = td.path().join("colon_form.rb"); + std::fs::write(&file_colon, "# Team: Payments\nputs 'y'\n").unwrap(); + assert_eq!(read_top_of_file_team(&file_colon), Some("Payments".to_string())); + } + + #[test] + fn test_most_specific_directory_owner_prefers_deeper() { + let td = tempdir().unwrap(); + let project_root = td.path(); + + // Build directories + let deep_dir = project_root.join("a/b/c"); + std::fs::create_dir_all(&deep_dir).unwrap(); + let mid_dir = project_root.join("a/b"); + let top_dir = project_root.join("a"); + + // Write .codeowner files + std::fs::write(top_dir.join(".codeowner"), "TopTeam").unwrap(); + std::fs::write(mid_dir.join(".codeowner"), "MidTeam").unwrap(); + std::fs::write(deep_dir.join(".codeowner"), "DeepTeam").unwrap(); + + // Build teams_by_name + let mut tbn: HashMap = HashMap::new(); + for name in ["TopTeam", "MidTeam", "DeepTeam"] { + let t = team_named(name); + tbn.insert(t.name.clone(), t); + } + + let rel_file = Path::new("a/b/c/file.rb"); + let result = most_specific_directory_owner(project_root, rel_file, &tbn).unwrap(); + match result.1 { + Source::Directory(path) => { + assert!(path.ends_with("a/b/c"), "expected deepest directory, got {}", path); + } + _ => panic!("expected Directory source"), + } + assert_eq!(result.0, "DeepTeam"); + } + + #[test] + fn test_nearest_package_owner_ruby_and_js() { + let td = tempdir().unwrap(); + let project_root = td.path(); + let config = build_config_for_temp("frontend/**/*", "packs/**/*", "vendored"); + + // Ruby package + let ruby_pkg = project_root.join("packs/payroll"); + std::fs::create_dir_all(&ruby_pkg).unwrap(); + std::fs::write( + ruby_pkg.join("package.yml"), + "---\nowner: Payroll\n", + ) + .unwrap(); + + // JS package + let js_pkg = project_root.join("frontend/flow"); + std::fs::create_dir_all(&js_pkg).unwrap(); + std::fs::write( + js_pkg.join("package.json"), + r#"{"metadata": {"owner": "UX"}}"#, + ) + .unwrap(); + + // Teams map + let mut tbn: HashMap = HashMap::new(); + for name in ["Payroll", "UX"] { + let t = team_named(name); + tbn.insert(t.name.clone(), t); + } + + // Ruby nearest + let rel_ruby = Path::new("packs/payroll/app/models/thing.rb"); + let ruby_owner = nearest_package_owner(project_root, rel_ruby, &config, &tbn).unwrap(); + assert_eq!(ruby_owner.0, "Payroll"); + match ruby_owner.1 { + Source::Package(pkg_path, glob) => { + assert!(pkg_path.ends_with("packs/payroll/package.yml")); + assert_eq!(glob, "packs/payroll/**/**"); + } + _ => panic!("expected Package source for ruby"), + } + + // JS nearest + let rel_js = Path::new("frontend/flow/src/index.ts"); + let js_owner = nearest_package_owner(project_root, rel_js, &config, &tbn).unwrap(); + assert_eq!(js_owner.0, "UX"); + match js_owner.1 { + Source::Package(pkg_path, glob) => { + assert!(pkg_path.ends_with("frontend/flow/package.json")); + assert_eq!(glob, "frontend/flow/**/**"); + } + _ => panic!("expected Package source for js"), + } + } + + #[test] + fn test_vendored_gem_owner() { + let config = build_config_for_temp("frontend/**/*", "packs/**/*", "vendored"); + let mut teams: Vec = vec![team_named("Payroll")]; + teams[0].owned_gems = vec!["awesome_gem".to_string()]; + + let path = Path::new("vendored/awesome_gem/lib/a.rb"); + let result = vendored_gem_owner(path, &config, &teams).unwrap(); + assert_eq!(result.0, "Payroll"); + matches!(result.1, Source::TeamGem); + } +} diff --git a/src/project.rs b/src/project.rs index e405e90..34b6441 100644 --- a/src/project.rs +++ b/src/project.rs @@ -178,9 +178,7 @@ impl Project { } pub fn relative_path<'a>(&'a self, absolute_path: &'a Path) -> &'a Path { - absolute_path - .strip_prefix(&self.base_path) - .expect("Could not generate relative path") + absolute_path.strip_prefix(&self.base_path).unwrap_or(absolute_path) } pub fn get_team(&self, name: &str) -> Option { @@ -197,3 +195,29 @@ impl Project { result } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_vendored_gem_by_name_maps_all_gems() { + let vg1 = VendoredGem { path: PathBuf::from("vendored/a"), name: "a".to_string() }; + let vg2 = VendoredGem { path: PathBuf::from("vendored/b"), name: "b".to_string() }; + let project = Project { + base_path: PathBuf::from("."), + files: vec![], + packages: vec![], + vendored_gems: vec![vg1.clone(), vg2.clone()], + teams: vec![], + codeowners_file_path: PathBuf::from(".github/CODEOWNERS"), + directory_codeowner_files: vec![], + teams_by_name: HashMap::new(), + }; + + let map = project.vendored_gem_by_name(); + assert_eq!(map.len(), 2); + assert_eq!(map.get("a").unwrap().name, vg1.name); + assert_eq!(map.get("b").unwrap().name, vg2.name); + } +} diff --git a/src/runner.rs b/src/runner.rs index e1cef31..e70dfbc 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -87,8 +87,7 @@ pub fn team_for_file_from_codeowners(run_config: &RunConfig, file_path: &str) -> pub fn team_for_file(run_config: &RunConfig, file_path: &str) -> Result, Error> { let config = config_from_path(&run_config.config_path)?; use crate::ownership::for_file_fast::find_file_owners; - let owners = find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)) - .map_err(Error::Io)?; + let owners = find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)).map_err(Error::Io)?; Ok(owners.first().map(|fo| fo.team.clone())) } From 595b86d097d5261603ffc5175ccf84c55e9e0723 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Sun, 10 Aug 2025 20:25:42 -0500 Subject: [PATCH 10/12] idiomatic improvements --- src/ownership/for_file_fast.rs | 94 ++++++++++------------------------ src/project.rs | 10 +++- src/project_file_builder.rs | 2 +- 3 files changed, 37 insertions(+), 69 deletions(-) diff --git a/src/ownership/for_file_fast.rs b/src/ownership/for_file_fast.rs index 7562ca9..97babc7 100644 --- a/src/ownership/for_file_fast.rs +++ b/src/ownership/for_file_fast.rs @@ -6,10 +6,8 @@ use std::{ use fast_glob::glob_match; use glob::glob; -use lazy_static::lazy_static; -use regex::Regex; -use crate::{config::Config, project::Team}; +use crate::{config::Config, project::Team, project_file_builder::build_project_file_without_cache}; use super::{FileOwner, mapper::Source}; @@ -106,7 +104,7 @@ pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) } fn build_teams_by_name_map(teams: &[Team]) -> HashMap { - let mut map = HashMap::new(); + let mut map = HashMap::with_capacity(teams.len() * 2); for team in teams { map.insert(team.name.clone(), team.clone()); map.insert(team.github_team.clone(), team.clone()); @@ -117,7 +115,7 @@ fn build_teams_by_name_map(teams: &[Team]) -> HashMap { fn load_teams(project_root: &Path, team_file_globs: &[String]) -> std::result::Result, String> { let mut teams: Vec = Vec::new(); for glob_str in team_file_globs { - let absolute_glob = format!("{}/{}", project_root.display(), glob_str); + let absolute_glob = project_root.join(glob_str).to_string_lossy().into_owned(); let paths = glob(&absolute_glob).map_err(|e| e.to_string())?; for path in paths.flatten() { match Team::from_team_file_path(path.clone()) { @@ -132,30 +130,14 @@ fn load_teams(project_root: &Path, team_file_globs: &[String]) -> std::result::R Ok(teams) } -lazy_static! { - // Allow optional leading whitespace before the comment marker - static ref TOP_OF_FILE_TEAM_AT_REGEX: Option = Regex::new(r#"^\s*(?:#|//)\s*@team\s+(.+)$"#).ok(); - static ref TOP_OF_FILE_TEAM_COLON_REGEX: Option = Regex::new(r#"(?i)^\s*(?:#|//)\s*team\s*:\s*(.+)$"#).ok(); -} +// no regex: parse cheaply with ASCII-aware checks fn read_top_of_file_team(path: &Path) -> Option { - let content = fs::read_to_string(path).ok()?; - let line = content.lines().next()?; - - if let Some(re) = &*TOP_OF_FILE_TEAM_AT_REGEX { - if let Some(cap) = re.captures(line) { - if let Some(m) = cap.get(1) { - return Some(m.as_str().to_string()); - } - } - } - if let Some(re) = &*TOP_OF_FILE_TEAM_COLON_REGEX { - if let Some(cap) = re.captures(line) { - if let Some(m) = cap.get(1) { - return Some(m.as_str().to_string()); - } - } + let project_file = build_project_file_without_cache(&path.to_path_buf()); + if let Some(owner) = project_file.owner { + return Some(owner); } + None } @@ -167,34 +149,32 @@ fn most_specific_directory_owner( let mut current = project_root.join(relative_file_path); let mut best: Option<(String, Source)> = None; loop { - let parent_opt = current.parent().map(|p| p.to_path_buf()); - let Some(parent) = parent_opt else { break }; - let codeowner_path = parent.join(".codeowner"); + if !current.pop() { + break; + } + let codeowner_path = current.join(".codeowner"); if let Ok(owner_str) = fs::read_to_string(&codeowner_path) { let owner = owner_str.trim(); if let Some(team) = teams_by_name.get(owner) { - let relative_dir = parent + let relative_dir = current .strip_prefix(project_root) - .unwrap_or(parent.as_path()) + .unwrap_or(current.as_path()) .to_string_lossy() .to_string(); let candidate = (team.name.clone(), Source::Directory(relative_dir)); match &best { None => best = Some(candidate), Some((_, existing_source)) => { - let existing_len = source_directory_depth(existing_source); - let candidate_len = source_directory_depth(&candidate.1); - if candidate_len > existing_len { + if candidate.1.len() > existing_source.len() { best = Some(candidate); } } } } } - if parent == project_root { + if current == project_root { break; } - current = parent.clone(); } best } @@ -207,17 +187,18 @@ fn nearest_package_owner( ) -> Option<(String, Source)> { let mut current = project_root.join(relative_file_path); loop { - let parent_opt = current.parent().map(|p| p.to_path_buf()); - let Some(parent) = parent_opt else { break }; - let parent_rel = parent.strip_prefix(project_root).unwrap_or(parent.as_path()); + if !current.pop() { + break; + } + let parent_rel = current.strip_prefix(project_root).unwrap_or(current.as_path()); if let Some(rel_str) = parent_rel.to_str() { if glob_list_matches(rel_str, &config.ruby_package_paths) { - let pkg_yml = parent.join("package.yml"); + let pkg_yml = current.join("package.yml"); if pkg_yml.exists() { if let Ok(owner) = read_ruby_package_owner(&pkg_yml) { if let Some(team) = teams_by_name.get(&owner) { let package_path = parent_rel.join("package.yml"); - let package_glob = format!("{}/**/**", rel_str); + let package_glob = format!("{rel_str}/**/**"); return Some(( team.name.clone(), Source::Package(package_path.to_string_lossy().to_string(), package_glob), @@ -227,12 +208,12 @@ fn nearest_package_owner( } } if glob_list_matches(rel_str, &config.javascript_package_paths) { - let pkg_json = parent.join("package.json"); + let pkg_json = current.join("package.json"); if pkg_json.exists() { if let Ok(owner) = read_js_package_owner(&pkg_json) { if let Some(team) = teams_by_name.get(&owner) { let package_path = parent_rel.join("package.json"); - let package_glob = format!("{}/**/**", rel_str); + let package_glob = format!("{rel_str}/**/**"); return Some(( team.name.clone(), Source::Package(package_path.to_string_lossy().to_string(), package_glob), @@ -242,20 +223,14 @@ fn nearest_package_owner( } } } - if parent == project_root { + if current == project_root { break; } - current = parent; } None } -fn source_directory_depth(source: &Source) -> usize { - match source { - Source::Directory(path) => path.matches('/').count(), - _ => 0, - } -} +// removed: use `Source::len()` instead fn glob_list_matches(path: &str, globs: &[String]) -> bool { globs.iter().any(|g| glob_match(g, path)) @@ -351,11 +326,6 @@ mod tests { let file_at = td.path().join("at_form.rb"); std::fs::write(&file_at, "# @team Payroll\nputs 'x'\n").unwrap(); assert_eq!(read_top_of_file_team(&file_at), Some("Payroll".to_string())); - - // team: form (case-insensitive, allows spaces) - let file_colon = td.path().join("colon_form.rb"); - std::fs::write(&file_colon, "# Team: Payments\nputs 'y'\n").unwrap(); - assert_eq!(read_top_of_file_team(&file_colon), Some("Payments".to_string())); } #[test] @@ -401,20 +371,12 @@ mod tests { // Ruby package let ruby_pkg = project_root.join("packs/payroll"); std::fs::create_dir_all(&ruby_pkg).unwrap(); - std::fs::write( - ruby_pkg.join("package.yml"), - "---\nowner: Payroll\n", - ) - .unwrap(); + std::fs::write(ruby_pkg.join("package.yml"), "---\nowner: Payroll\n").unwrap(); // JS package let js_pkg = project_root.join("frontend/flow"); std::fs::create_dir_all(&js_pkg).unwrap(); - std::fs::write( - js_pkg.join("package.json"), - r#"{"metadata": {"owner": "UX"}}"#, - ) - .unwrap(); + std::fs::write(js_pkg.join("package.json"), r#"{"metadata": {"owner": "UX"}}"#).unwrap(); // Teams map let mut tbn: HashMap = HashMap::new(); diff --git a/src/project.rs b/src/project.rs index 34b6441..5179371 100644 --- a/src/project.rs +++ b/src/project.rs @@ -202,8 +202,14 @@ mod tests { #[test] fn test_vendored_gem_by_name_maps_all_gems() { - let vg1 = VendoredGem { path: PathBuf::from("vendored/a"), name: "a".to_string() }; - let vg2 = VendoredGem { path: PathBuf::from("vendored/b"), name: "b".to_string() }; + let vg1 = VendoredGem { + path: PathBuf::from("vendored/a"), + name: "a".to_string(), + }; + let vg2 = VendoredGem { + path: PathBuf::from("vendored/b"), + name: "b".to_string(), + }; let project = Project { base_path: PathBuf::from("."), files: vec![], diff --git a/src/project_file_builder.rs b/src/project_file_builder.rs index 5ac06df..7544dd6 100644 --- a/src/project_file_builder.rs +++ b/src/project_file_builder.rs @@ -47,7 +47,7 @@ impl<'a> ProjectFileBuilder<'a> { } } -fn build_project_file_without_cache(path: &PathBuf) -> ProjectFile { +pub fn build_project_file_without_cache(path: &PathBuf) -> ProjectFile { let content = match std::fs::read_to_string(path) { Ok(content) => content, Err(_) => { From fee60a7b83b2b63b7917e036dbc58949e201d84b Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Mon, 11 Aug 2025 07:02:34 -0500 Subject: [PATCH 11/12] adding git stage --- src/cli.rs | 14 +++-- src/runner.rs | 36 +++++++++---- tests/git_stage_test.rs | 110 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 14 deletions(-) create mode 100644 tests/git_stage_test.rs diff --git a/src/cli.rs b/src/cli.rs index 1f94124..680f2dc 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -26,7 +26,10 @@ enum Command { about = "Generate the CODEOWNERS file and save it to '--codeowners-file-path'.", visible_alias = "g" )] - Generate, + Generate { + #[arg(long, short,default_value = "false", help = "Skip staging the CODEOWNERS file")] + skip_stage: bool, + }, #[clap( about = "Validate the validity of the CODEOWNERS file. A validation failure will exit with a failure code and a detailed output of the validation errors.", @@ -35,7 +38,10 @@ enum Command { Validate, #[clap(about = "Chains both `generate` and `validate` commands.", visible_alias = "gv")] - GenerateAndValidate, + GenerateAndValidate { + #[arg(long, short,default_value = "false", help = "Skip staging the CODEOWNERS file")] + skip_stage: bool, + }, #[clap(about = "Delete the cache file.", visible_alias = "d")] DeleteCache, @@ -101,8 +107,8 @@ pub fn cli() -> Result { let runner_result = match args.command { Command::Validate => runner::validate(&run_config, vec![]), - Command::Generate => runner::generate(&run_config), - Command::GenerateAndValidate => runner::generate_and_validate(&run_config, vec![]), + Command::Generate { skip_stage } => runner::generate(&run_config, !skip_stage), + Command::GenerateAndValidate { skip_stage } => runner::generate_and_validate(&run_config, vec![], !skip_stage), Command::ForFile { name, fast } => runner::for_file(&run_config, &name, fast), Command::ForTeam { name } => runner::for_team(&run_config, &name), Command::DeleteCache => runner::delete_cache(&run_config), diff --git a/src/runner.rs b/src/runner.rs index e70dfbc..32c4210 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -2,6 +2,7 @@ use core::fmt; use std::{ fs::File, path::{Path, PathBuf}, + process::Command, }; use error_stack::{Context, Result, ResultExt}; @@ -144,12 +145,12 @@ pub fn validate(run_config: &RunConfig, _file_paths: Vec) -> RunResult { run_with_runner(run_config, |runner| runner.validate()) } -pub fn generate(run_config: &RunConfig) -> RunResult { - run_with_runner(run_config, |runner| runner.generate()) +pub fn generate(run_config: &RunConfig, git_stage: bool) -> RunResult { + run_with_runner(run_config, |runner| runner.generate(git_stage)) } -pub fn generate_and_validate(run_config: &RunConfig, _file_paths: Vec) -> RunResult { - run_with_runner(run_config, |runner| runner.generate_and_validate()) +pub fn generate_and_validate(run_config: &RunConfig, _file_paths: Vec, git_stage: bool) -> RunResult { + run_with_runner(run_config, |runner| runner.generate_and_validate(git_stage)) } pub fn delete_cache(run_config: &RunConfig) -> RunResult { @@ -243,9 +244,11 @@ impl Runner { }) } - pub fn validate(&self) -> RunResult { + pub fn validate(&self ) -> RunResult { match self.ownership.validate() { - Ok(_) => RunResult::default(), + Ok(_) => { + RunResult::default() + }, Err(err) => RunResult { validation_errors: vec![format!("{}", err)], ..Default::default() @@ -253,13 +256,18 @@ impl Runner { } } - pub fn generate(&self) -> RunResult { + pub fn generate(&self, git_stage: bool) -> RunResult { let content = self.ownership.generate_file(); if let Some(parent) = &self.run_config.codeowners_file_path.parent() { let _ = std::fs::create_dir_all(parent); } match std::fs::write(&self.run_config.codeowners_file_path, content) { - Ok(_) => RunResult::default(), + Ok(_) => { + if git_stage { + self.git_stage(); + } + RunResult::default() + }, Err(err) => RunResult { io_errors: vec![err.to_string()], ..Default::default() @@ -267,14 +275,22 @@ impl Runner { } } - pub fn generate_and_validate(&self) -> RunResult { - let run_result = self.generate(); + pub fn generate_and_validate(&self, git_stage: bool) -> RunResult { + let run_result = self.generate(git_stage); if run_result.has_errors() { return run_result; } self.validate() } + fn git_stage(&self) { + let _ = Command::new("git") + .arg("add") + .arg(&self.run_config.codeowners_file_path) + .current_dir(&self.run_config.project_root) + .output(); + } + // TODO: remove this once we've verified the fast path is working #[allow(dead_code)] pub fn for_file(&self, file_path: &str) -> RunResult { diff --git a/tests/git_stage_test.rs b/tests/git_stage_test.rs new file mode 100644 index 0000000..90cdf70 --- /dev/null +++ b/tests/git_stage_test.rs @@ -0,0 +1,110 @@ +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use codeowners::runner::{self, RunConfig}; + +fn copy_dir_recursive(from: &Path, to: &Path) { + fs::create_dir_all(to).expect("failed to create destination root"); + for entry in fs::read_dir(from).expect("failed to read source dir") { + let entry = entry.expect("failed to read dir entry"); + let file_type = entry.file_type().expect("failed to read file type"); + let src_path = entry.path(); + let dest_path = to.join(entry.file_name()); + if file_type.is_dir() { + copy_dir_recursive(&src_path, &dest_path); + } else if file_type.is_file() { + // Ensure parent exists then copy + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent).expect("failed to create parent dir"); + } + fs::copy(&src_path, &dest_path).expect("failed to copy file"); + } + } +} + +fn init_git_repo(path: &Path) { + // Initialize a new git repository in the temp project + let status = Command::new("git") + .arg("init") + .current_dir(path) + .output() + .expect("failed to run git init"); + assert!(status.status.success(), "git init failed: {}", String::from_utf8_lossy(&status.stderr)); + + // Configure a dummy identity to appease git if commits ever happen; not strictly needed for staging + let _ = Command::new("git").arg("config").arg("user.email").arg("test@example.com").current_dir(path).output(); + let _ = Command::new("git").arg("config").arg("user.name").arg("Test User").current_dir(path).output(); +} + +fn is_file_staged(repo_root: &Path, rel_path: &str) -> bool { + // Use git diff --name-only --cached to list staged files + let output = Command::new("git") + .arg("diff") + .arg("--name-only") + .arg("--cached") + .current_dir(repo_root) + .output() + .expect("failed to run git diff --cached"); + assert!(output.status.success(), "git diff failed: {}", String::from_utf8_lossy(&output.stderr)); + let stdout = String::from_utf8_lossy(&output.stdout); + stdout.lines().any(|line| line.trim() == rel_path) +} + +fn build_run_config(project_root: &Path, codeowners_rel_path: &str) -> RunConfig { + let project_root = project_root.canonicalize().expect("failed to canonicalize project root"); + let codeowners_file_path = project_root.join(codeowners_rel_path); + let config_path = project_root.join("config/code_ownership.yml"); + RunConfig { + project_root, + codeowners_file_path, + config_path, + no_cache: true, + } +} + +#[test] +fn test_generate_stages_codeowners() { + let temp_dir = tempfile::tempdir().expect("failed to create tempdir"); + let temp_root = temp_dir.path().to_path_buf(); + + // Copy the valid project fixture into a temporary git repo + let fixture_root = PathBuf::from("tests/fixtures/valid_project"); + copy_dir_recursive(&fixture_root, &temp_root); + init_git_repo(&temp_root); + + // Run generate with staging enabled, targeting the standard CODEOWNERS path + let run_config = build_run_config(&temp_root, ".github/CODEOWNERS"); + let result = runner::generate(&run_config, true); + assert!(result.io_errors.is_empty(), "io_errors: {:?}", result.io_errors); + assert!(result.validation_errors.is_empty(), "validation_errors: {:?}", result.validation_errors); + + // Assert CODEOWNERS file exists and is staged + let rel_path = ".github/CODEOWNERS"; + assert!(run_config.codeowners_file_path.exists(), "CODEOWNERS file was not created"); + assert!(is_file_staged(&run_config.project_root, rel_path), "CODEOWNERS file was not staged"); +} + +#[test] +fn test_generate_and_validate_stages_codeowners() { + let temp_dir = tempfile::tempdir().expect("failed to create tempdir"); + let temp_root = temp_dir.path().to_path_buf(); + + // Copy the valid project fixture into a temporary git repo + let fixture_root = PathBuf::from("tests/fixtures/valid_project"); + copy_dir_recursive(&fixture_root, &temp_root); + init_git_repo(&temp_root); + + // Run generate_and_validate with staging enabled + let run_config = build_run_config(&temp_root, ".github/CODEOWNERS"); + let result = runner::generate_and_validate(&run_config, vec![], true); + assert!(result.io_errors.is_empty(), "io_errors: {:?}", result.io_errors); + assert!(result.validation_errors.is_empty(), "validation_errors: {:?}", result.validation_errors); + + // Assert CODEOWNERS file exists and is staged + let rel_path = ".github/CODEOWNERS"; + assert!(run_config.codeowners_file_path.exists(), "CODEOWNERS file was not created"); + assert!(is_file_staged(&run_config.project_root, rel_path), "CODEOWNERS file was not staged"); +} + + From 1240ecec8de6736211ee9bc3f969af0b42f542e8 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Mon, 11 Aug 2025 08:28:23 -0500 Subject: [PATCH 12/12] parallel walk dir - 25% faster --- src/cli.rs | 4 ++-- src/project_builder.rs | 26 ++++++++++++++++++++++---- src/runner.rs | 8 +++----- tests/git_stage_test.rs | 40 ++++++++++++++++++++++++++++++++-------- 4 files changed, 59 insertions(+), 19 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 680f2dc..714487c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -27,7 +27,7 @@ enum Command { visible_alias = "g" )] Generate { - #[arg(long, short,default_value = "false", help = "Skip staging the CODEOWNERS file")] + #[arg(long, short, default_value = "false", help = "Skip staging the CODEOWNERS file")] skip_stage: bool, }, @@ -39,7 +39,7 @@ enum Command { #[clap(about = "Chains both `generate` and `validate` commands.", visible_alias = "gv")] GenerateAndValidate { - #[arg(long, short,default_value = "false", help = "Skip staging the CODEOWNERS file")] + #[arg(long, short, default_value = "false", help = "Skip staging the CODEOWNERS file")] skip_stage: bool, }, diff --git a/src/project_builder.rs b/src/project_builder.rs index 12810af..3aa53c8 100644 --- a/src/project_builder.rs +++ b/src/project_builder.rs @@ -1,11 +1,12 @@ use std::{ fs::File, path::{Path, PathBuf}, + sync::{Arc, Mutex}, }; use error_stack::{Result, ResultExt}; use fast_glob::glob_match; -use ignore::WalkBuilder; +use ignore::{WalkBuilder, WalkParallel, WalkState}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use tracing::{instrument, warn}; @@ -53,12 +54,29 @@ impl<'a> ProjectBuilder<'a> { let mut entry_types = Vec::with_capacity(INITIAL_VECTOR_CAPACITY); let mut builder = WalkBuilder::new(&self.base_path); builder.hidden(false); - let walkdir = builder.build(); + let walk_parallel: WalkParallel = builder.build_parallel(); - for entry in walkdir { - let entry = entry.change_context(Error::Io)?; + let collected = Arc::new(Mutex::new(Vec::with_capacity(INITIAL_VECTOR_CAPACITY))); + let collected_for_threads = Arc::clone(&collected); + + walk_parallel.run(move || { + let collected = Arc::clone(&collected_for_threads); + Box::new(move |res| { + if let Ok(entry) = res { + if let Ok(mut v) = collected.lock() { + v.push(entry); + } + } + WalkState::Continue + }) + }); + + // Process sequentially with &mut self + let collected_entries = Arc::try_unwrap(collected).unwrap().into_inner().unwrap(); + for entry in collected_entries { entry_types.push(self.build_entry_type(entry)?); } + self.build_project_from_entry_types(entry_types) } diff --git a/src/runner.rs b/src/runner.rs index 32c4210..bfbe2fb 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -244,11 +244,9 @@ impl Runner { }) } - pub fn validate(&self ) -> RunResult { + pub fn validate(&self) -> RunResult { match self.ownership.validate() { - Ok(_) => { - RunResult::default() - }, + Ok(_) => RunResult::default(), Err(err) => RunResult { validation_errors: vec![format!("{}", err)], ..Default::default() @@ -267,7 +265,7 @@ impl Runner { self.git_stage(); } RunResult::default() - }, + } Err(err) => RunResult { io_errors: vec![err.to_string()], ..Default::default() diff --git a/tests/git_stage_test.rs b/tests/git_stage_test.rs index 90cdf70..e6f029d 100644 --- a/tests/git_stage_test.rs +++ b/tests/git_stage_test.rs @@ -30,11 +30,25 @@ fn init_git_repo(path: &Path) { .current_dir(path) .output() .expect("failed to run git init"); - assert!(status.status.success(), "git init failed: {}", String::from_utf8_lossy(&status.stderr)); + assert!( + status.status.success(), + "git init failed: {}", + String::from_utf8_lossy(&status.stderr) + ); // Configure a dummy identity to appease git if commits ever happen; not strictly needed for staging - let _ = Command::new("git").arg("config").arg("user.email").arg("test@example.com").current_dir(path).output(); - let _ = Command::new("git").arg("config").arg("user.name").arg("Test User").current_dir(path).output(); + let _ = Command::new("git") + .arg("config") + .arg("user.email") + .arg("test@example.com") + .current_dir(path) + .output(); + let _ = Command::new("git") + .arg("config") + .arg("user.name") + .arg("Test User") + .current_dir(path) + .output(); } fn is_file_staged(repo_root: &Path, rel_path: &str) -> bool { @@ -46,7 +60,11 @@ fn is_file_staged(repo_root: &Path, rel_path: &str) -> bool { .current_dir(repo_root) .output() .expect("failed to run git diff --cached"); - assert!(output.status.success(), "git diff failed: {}", String::from_utf8_lossy(&output.stderr)); + assert!( + output.status.success(), + "git diff failed: {}", + String::from_utf8_lossy(&output.stderr) + ); let stdout = String::from_utf8_lossy(&output.stdout); stdout.lines().any(|line| line.trim() == rel_path) } @@ -77,7 +95,11 @@ fn test_generate_stages_codeowners() { let run_config = build_run_config(&temp_root, ".github/CODEOWNERS"); let result = runner::generate(&run_config, true); assert!(result.io_errors.is_empty(), "io_errors: {:?}", result.io_errors); - assert!(result.validation_errors.is_empty(), "validation_errors: {:?}", result.validation_errors); + assert!( + result.validation_errors.is_empty(), + "validation_errors: {:?}", + result.validation_errors + ); // Assert CODEOWNERS file exists and is staged let rel_path = ".github/CODEOWNERS"; @@ -99,12 +121,14 @@ fn test_generate_and_validate_stages_codeowners() { let run_config = build_run_config(&temp_root, ".github/CODEOWNERS"); let result = runner::generate_and_validate(&run_config, vec![], true); assert!(result.io_errors.is_empty(), "io_errors: {:?}", result.io_errors); - assert!(result.validation_errors.is_empty(), "validation_errors: {:?}", result.validation_errors); + assert!( + result.validation_errors.is_empty(), + "validation_errors: {:?}", + result.validation_errors + ); // Assert CODEOWNERS file exists and is staged let rel_path = ".github/CODEOWNERS"; assert!(run_config.codeowners_file_path.exists(), "CODEOWNERS file was not created"); assert!(is_file_staged(&run_config.project_root, rel_path), "CODEOWNERS file was not staged"); } - -