diff --git a/Cargo.lock b/Cargo.lock index d9cf231..f97d18a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,6 +77,12 @@ version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert_cmd" version = "2.0.16" @@ -173,7 +179,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "codeowners" -version = "0.2.4" +version = "0.2.5" dependencies = [ "assert_cmd", "clap", @@ -314,9 +320,12 @@ dependencies = [ [[package]] name = "fast-glob" -version = "0.4.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb10ed0f8a3dca52477be37ac0fb8f9d1fd4cd8d311b4484bdd45c1c56e0c9ec" +checksum = "3d26eec0ae9682c457cb0f85de67ad417b716ae852736a5d94c2ad6e92a997c9" +dependencies = [ + "arrayvec", +] [[package]] name = "fastrand" diff --git a/Cargo.toml b/Cargo.toml index df37c08..f61c3da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codeowners" -version = "0.2.4" +version = "0.2.5" edition = "2024" [profile.release] @@ -14,7 +14,7 @@ clap = { version = "4.5.20", features = ["derive"] } clap_derive = "4.5.18" error-stack = "0.5.0" enum_dispatch = "0.3.13" -fast-glob = "0.4.0" +fast-glob = "1.0.0" glob = "0.3.2" ignore = "0.4.23" itertools = "0.14.0" diff --git a/ai-prompts/for-file-optimization.md b/ai-prompts/for-file-optimization.md new file mode 100644 index 0000000..a837acb --- /dev/null +++ b/ai-prompts/for-file-optimization.md @@ -0,0 +1,21 @@ +## Re-architect `codeowners-rs` + +The CLI behaviors live in `src/cli.rs`. + +### What this tool does +- **Generate CODEOWNERS**: Builds `.github/CODEOWNERS` from multiple ownership sources. See mappers in `src/ownership/mapper/`. +- **Answer per-file ownership**: The `for-file` command returns the owner of a given file even if the checked-in `CODEOWNERS` is not up to date. + +### Current implementations +- **Fast but potentially inaccurate**: `Runner::for_file_from_codeowners` (in `src/runner.rs`) parses the existing `CODEOWNERS` file to find a file’s owner. It’s very fast, but can be wrong if `CODEOWNERS` is stale. +- **Accurate but slow**: `Runner::for_file` is correct but can take up to ~4 seconds on large repositories because it effectively determines ownership for many files and then returns the single result needed. + +Ownership resolution is complex because definitions often live in other files (packages, team configs, globs, etc.). + +## Assignment +Implement a faster `for_file` that remains accurate. + +### Requirements +- **Performance**: Under 1 second on large codebases (e.g., `$HOME/workspace/large`). +- **Correctness**: All existing tests must continue to pass. +- **Compatibility**: No changes to the external behavior of the `for-file` CLI command. \ No newline at end of file diff --git a/src/bin/compare_for_file.rs b/src/bin/compare_for_file.rs new file mode 100644 index 0000000..7bce8ca --- /dev/null +++ b/src/bin/compare_for_file.rs @@ -0,0 +1,179 @@ +// This is a tool to compare the output of the original codeowners CLI with the optimized version. +// It's useful for verifying that the optimized version is correct. +// +// It's not used in CI, but it's useful for debugging. +// +// To run it, use `cargo run --bin compare_for_file ` +// +// It will compare the output of the original codeowners CLI with the optimized version for all files in the project. + +use std::{ + fs::File, + io::{self, Write}, + path::{Path, PathBuf}, + process::Command, +}; + +use codeowners::config::Config as OwnershipConfig; +use codeowners::ownership::{FileOwner, for_file_fast}; +use codeowners::runner::{RunConfig, Runner}; +use ignore::WalkBuilder; + +fn main() { + let project_root = std::env::args().nth(1).expect("usage: compare_for_file "); + let project_root = PathBuf::from(project_root); + if !project_root.is_absolute() { + eprintln!("Project root must be absolute"); + std::process::exit(2); + } + + let codeowners_file_path = project_root.join(".github/CODEOWNERS"); + let config_path = project_root.join("config/code_ownership.yml"); + + let run_config = RunConfig { + project_root: project_root.clone(), + codeowners_file_path, + config_path: config_path.clone(), + no_cache: false, + }; + + // Build the original, accurate-but-slower runner once + let runner = match Runner::new(&run_config) { + Ok(r) => r, + Err(e) => { + eprintln!("Failed to initialize Runner: {}", e); + std::process::exit(1); + } + }; + + // Load config once for the optimized path + let config_file = match File::open(&config_path) { + Ok(f) => f, + Err(e) => { + eprintln!("Can't open config file {}: {}", config_path.display(), e); + std::process::exit(1); + } + }; + let optimized_config: OwnershipConfig = match serde_yaml::from_reader(config_file) { + Ok(c) => c, + Err(e) => { + eprintln!("Can't parse config file {}: {}", config_path.display(), e); + std::process::exit(1); + } + }; + + let mut total_files: usize = 0; + let mut diff_count: usize = 0; + + // Prefer tracked files from git; fall back to walking the FS if git is unavailable + let tracked_files_output = Command::new("git").arg("-C").arg(&project_root).arg("ls-files").arg("-z").output(); + + match tracked_files_output { + Ok(output) if output.status.success() => { + let bytes = output.stdout; + for rel in bytes.split(|b| *b == 0u8) { + if rel.is_empty() { + continue; + } + let rel_str = match std::str::from_utf8(rel) { + Ok(s) => s, + Err(_) => continue, + }; + let abs_path = project_root.join(rel_str); + // Only process regular files that currently exist + if !abs_path.is_file() { + continue; + } + + total_files += 1; + let original = run_original(&runner, &abs_path); + let optimized = run_optimized(&project_root, &optimized_config, &abs_path); + + if original != optimized { + diff_count += 1; + println!("\n==== {} ====", abs_path.display()); + println!("ORIGINAL:\n{}", original); + println!("OPTIMIZED:\n{}", optimized); + let _ = io::stdout().flush(); + } + + if total_files % 1000 == 0 { + eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count); + } + } + } + _ => { + eprintln!("git ls-files failed; falling back to filesystem walk (untracked files may be included)"); + let walker = WalkBuilder::new(&project_root) + .hidden(false) + .git_ignore(true) + .git_exclude(true) + .follow_links(false) + .build(); + + for result in walker { + let entry = match result { + Ok(e) => e, + Err(err) => { + eprintln!("walk error: {}", err); + continue; + } + }; + if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) { + continue; + } + let path = entry.path(); + total_files += 1; + + let original = run_original(&runner, path); + let optimized = run_optimized(&project_root, &optimized_config, path); + + if original != optimized { + diff_count += 1; + println!("\n==== {} ====", path.display()); + println!("ORIGINAL:\n{}", original); + println!("OPTIMIZED:\n{}", optimized); + let _ = io::stdout().flush(); + } + + if total_files % 1000 == 0 { + eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count); + } + } + } + } + + println!("Checked {} files. Diffs: {}", total_files, diff_count); + if diff_count > 0 { + std::process::exit(3); + } +} + +fn run_original(runner: &Runner, file_path: &Path) -> String { + let result = runner.for_file(&file_path.to_string_lossy()); + if !result.validation_errors.is_empty() { + return result.validation_errors.join("\n"); + } + if !result.io_errors.is_empty() { + return format!("IO_ERROR: {}", result.io_errors.join(" | ")); + } + result.info_messages.join("\n") +} + +fn run_optimized(project_root: &Path, config: &OwnershipConfig, file_path: &Path) -> String { + let owners: Vec = match for_file_fast::find_file_owners(project_root, config, file_path) { + Ok(v) => v, + Err(e) => return format!("IO_ERROR: {}", e), + }; + match owners.len() { + 0 => format!("{}", FileOwner::default()), + 1 => format!("{}", owners[0]), + _ => { + let mut lines = vec!["Error: file is owned by multiple teams!".to_string()]; + for owner in owners { + lines.push(format!("\n{}", owner)); + } + lines.join("\n") + } + } +} diff --git a/src/cli.rs b/src/cli.rs index 1f94124..714487c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -26,7 +26,10 @@ enum Command { about = "Generate the CODEOWNERS file and save it to '--codeowners-file-path'.", visible_alias = "g" )] - Generate, + Generate { + #[arg(long, short, default_value = "false", help = "Skip staging the CODEOWNERS file")] + skip_stage: bool, + }, #[clap( about = "Validate the validity of the CODEOWNERS file. A validation failure will exit with a failure code and a detailed output of the validation errors.", @@ -35,7 +38,10 @@ enum Command { Validate, #[clap(about = "Chains both `generate` and `validate` commands.", visible_alias = "gv")] - GenerateAndValidate, + GenerateAndValidate { + #[arg(long, short, default_value = "false", help = "Skip staging the CODEOWNERS file")] + skip_stage: bool, + }, #[clap(about = "Delete the cache file.", visible_alias = "d")] DeleteCache, @@ -101,8 +107,8 @@ pub fn cli() -> Result { let runner_result = match args.command { Command::Validate => runner::validate(&run_config, vec![]), - Command::Generate => runner::generate(&run_config), - Command::GenerateAndValidate => runner::generate_and_validate(&run_config, vec![]), + Command::Generate { skip_stage } => runner::generate(&run_config, !skip_stage), + Command::GenerateAndValidate { skip_stage } => runner::generate_and_validate(&run_config, vec![], !skip_stage), Command::ForFile { name, fast } => runner::for_file(&run_config, &name, fast), Command::ForTeam { name } => runner::for_team(&run_config, &name), Command::DeleteCache => runner::delete_cache(&run_config), diff --git a/src/ownership.rs b/src/ownership.rs index 67e3e71..7d91d95 100644 --- a/src/ownership.rs +++ b/src/ownership.rs @@ -11,6 +11,7 @@ use tracing::{info, instrument}; mod file_generator; mod file_owner_finder; +pub mod for_file_fast; pub(crate) mod mapper; pub(crate) mod parser; mod validator; @@ -32,7 +33,7 @@ use self::{ pub struct Ownership { project: Arc, } - +#[derive(Debug)] pub struct FileOwner { pub team: Team, pub team_config_file_path: String, diff --git a/src/ownership/for_file_fast.rs b/src/ownership/for_file_fast.rs new file mode 100644 index 0000000..97babc7 --- /dev/null +++ b/src/ownership/for_file_fast.rs @@ -0,0 +1,424 @@ +use std::{ + collections::{HashMap, HashSet}, + fs, + path::Path, +}; + +use fast_glob::glob_match; +use glob::glob; + +use crate::{config::Config, project::Team, project_file_builder::build_project_file_without_cache}; + +use super::{FileOwner, mapper::Source}; + +pub fn find_file_owners(project_root: &Path, config: &Config, file_path: &Path) -> Result, String> { + let absolute_file_path = if file_path.is_absolute() { + file_path.to_path_buf() + } else { + project_root.join(file_path) + }; + let relative_file_path = absolute_file_path + .strip_prefix(project_root) + .unwrap_or(&absolute_file_path) + .to_path_buf(); + + let teams = load_teams(project_root, &config.team_file_glob)?; + let teams_by_name = build_teams_by_name_map(&teams); + + let mut sources_by_team: HashMap> = HashMap::new(); + + if let Some(team_name) = read_top_of_file_team(&absolute_file_path) { + // Only consider top-of-file annotations for files included by config.owned_globs and not excluded by config.unowned_globs + if let Some(rel_str) = relative_file_path.to_str() { + let is_config_owned = glob_list_matches(rel_str, &config.owned_globs); + let is_config_unowned = glob_list_matches(rel_str, &config.unowned_globs); + if is_config_owned && !is_config_unowned { + if let Some(team) = teams_by_name.get(&team_name) { + sources_by_team.entry(team.name.clone()).or_default().push(Source::TeamFile); + } + } + } + } + + if let Some((owner_team_name, dir_source)) = most_specific_directory_owner(project_root, &relative_file_path, &teams_by_name) { + sources_by_team.entry(owner_team_name).or_default().push(dir_source); + } + + if let Some((owner_team_name, package_source)) = nearest_package_owner(project_root, &relative_file_path, config, &teams_by_name) { + sources_by_team.entry(owner_team_name).or_default().push(package_source); + } + + if let Some((owner_team_name, gem_source)) = vendored_gem_owner(&relative_file_path, config, &teams) { + sources_by_team.entry(owner_team_name).or_default().push(gem_source); + } + + if let Some(rel_str) = relative_file_path.to_str() { + for team in &teams { + let subtracts: HashSet<&str> = team.subtracted_globs.iter().map(|s| s.as_str()).collect(); + for owned_glob in &team.owned_globs { + if glob_match(owned_glob, rel_str) && !subtracts.iter().any(|sub| glob_match(sub, rel_str)) { + sources_by_team + .entry(team.name.clone()) + .or_default() + .push(Source::TeamGlob(owned_glob.clone())); + } + } + } + } + + for team in &teams { + let team_rel = team.path.strip_prefix(project_root).unwrap_or(&team.path).to_path_buf(); + if team_rel == relative_file_path { + sources_by_team.entry(team.name.clone()).or_default().push(Source::TeamYml); + } + } + + let mut file_owners: Vec = Vec::new(); + for (team_name, sources) in sources_by_team.into_iter() { + if let Some(team) = teams_by_name.get(&team_name) { + let relative_team_yml_path = team + .path + .strip_prefix(project_root) + .unwrap_or(&team.path) + .to_string_lossy() + .to_string(); + file_owners.push(FileOwner { + team: team.clone(), + team_config_file_path: relative_team_yml_path, + sources, + }); + } + } + + // TODO: remove this once we've verified the fast path is working + // This is simply matching the order of behavior of the original codeowners CLI + if file_owners.len() > 1 { + file_owners.sort_by(|a, b| { + let priority_a = a.sources.iter().map(source_priority).min().unwrap_or(u8::MAX); + let priority_b = b.sources.iter().map(source_priority).min().unwrap_or(u8::MAX); + priority_a.cmp(&priority_b).then_with(|| a.team.name.cmp(&b.team.name)) + }); + } + + Ok(file_owners) +} + +fn build_teams_by_name_map(teams: &[Team]) -> HashMap { + let mut map = HashMap::with_capacity(teams.len() * 2); + for team in teams { + map.insert(team.name.clone(), team.clone()); + map.insert(team.github_team.clone(), team.clone()); + } + map +} + +fn load_teams(project_root: &Path, team_file_globs: &[String]) -> std::result::Result, String> { + let mut teams: Vec = Vec::new(); + for glob_str in team_file_globs { + let absolute_glob = project_root.join(glob_str).to_string_lossy().into_owned(); + let paths = glob(&absolute_glob).map_err(|e| e.to_string())?; + for path in paths.flatten() { + match Team::from_team_file_path(path.clone()) { + Ok(team) => teams.push(team), + Err(e) => { + eprintln!("Error parsing team file: {}, path: {}", e, path.display()); + continue; + } + } + } + } + Ok(teams) +} + +// no regex: parse cheaply with ASCII-aware checks + +fn read_top_of_file_team(path: &Path) -> Option { + let project_file = build_project_file_without_cache(&path.to_path_buf()); + if let Some(owner) = project_file.owner { + return Some(owner); + } + + None +} + +fn most_specific_directory_owner( + project_root: &Path, + relative_file_path: &Path, + teams_by_name: &HashMap, +) -> Option<(String, Source)> { + let mut current = project_root.join(relative_file_path); + let mut best: Option<(String, Source)> = None; + loop { + if !current.pop() { + break; + } + let codeowner_path = current.join(".codeowner"); + if let Ok(owner_str) = fs::read_to_string(&codeowner_path) { + let owner = owner_str.trim(); + if let Some(team) = teams_by_name.get(owner) { + let relative_dir = current + .strip_prefix(project_root) + .unwrap_or(current.as_path()) + .to_string_lossy() + .to_string(); + let candidate = (team.name.clone(), Source::Directory(relative_dir)); + match &best { + None => best = Some(candidate), + Some((_, existing_source)) => { + if candidate.1.len() > existing_source.len() { + best = Some(candidate); + } + } + } + } + } + if current == project_root { + break; + } + } + best +} + +fn nearest_package_owner( + project_root: &Path, + relative_file_path: &Path, + config: &Config, + teams_by_name: &HashMap, +) -> Option<(String, Source)> { + let mut current = project_root.join(relative_file_path); + loop { + if !current.pop() { + break; + } + let parent_rel = current.strip_prefix(project_root).unwrap_or(current.as_path()); + if let Some(rel_str) = parent_rel.to_str() { + if glob_list_matches(rel_str, &config.ruby_package_paths) { + let pkg_yml = current.join("package.yml"); + if pkg_yml.exists() { + if let Ok(owner) = read_ruby_package_owner(&pkg_yml) { + if let Some(team) = teams_by_name.get(&owner) { + let package_path = parent_rel.join("package.yml"); + let package_glob = format!("{rel_str}/**/**"); + return Some(( + team.name.clone(), + Source::Package(package_path.to_string_lossy().to_string(), package_glob), + )); + } + } + } + } + if glob_list_matches(rel_str, &config.javascript_package_paths) { + let pkg_json = current.join("package.json"); + if pkg_json.exists() { + if let Ok(owner) = read_js_package_owner(&pkg_json) { + if let Some(team) = teams_by_name.get(&owner) { + let package_path = parent_rel.join("package.json"); + let package_glob = format!("{rel_str}/**/**"); + return Some(( + team.name.clone(), + Source::Package(package_path.to_string_lossy().to_string(), package_glob), + )); + } + } + } + } + } + if current == project_root { + break; + } + } + None +} + +// removed: use `Source::len()` instead + +fn glob_list_matches(path: &str, globs: &[String]) -> bool { + globs.iter().any(|g| glob_match(g, path)) +} + +fn read_ruby_package_owner(path: &Path) -> std::result::Result { + let file = std::fs::File::open(path).map_err(|e| e.to_string())?; + let deserializer: crate::project::deserializers::RubyPackage = serde_yaml::from_reader(file).map_err(|e| e.to_string())?; + deserializer.owner.ok_or_else(|| "Missing owner".to_string()) +} + +fn read_js_package_owner(path: &Path) -> std::result::Result { + let file = std::fs::File::open(path).map_err(|e| e.to_string())?; + let deserializer: crate::project::deserializers::JavascriptPackage = serde_json::from_reader(file).map_err(|e| e.to_string())?; + deserializer + .metadata + .and_then(|m| m.owner) + .ok_or_else(|| "Missing owner".to_string()) +} + +fn vendored_gem_owner(relative_file_path: &Path, config: &Config, teams: &[Team]) -> Option<(String, Source)> { + use std::path::Component; + let mut comps = relative_file_path.components(); + let first = comps.next()?; + let second = comps.next()?; + let first_str = match first { + Component::Normal(s) => s.to_string_lossy(), + _ => return None, + }; + if first_str != config.vendored_gems_path { + return None; + } + let gem_name = match second { + Component::Normal(s) => s.to_string_lossy().to_string(), + _ => return None, + }; + for team in teams { + if team.owned_gems.iter().any(|g| g == &gem_name) { + return Some((team.name.clone(), Source::TeamGem)); + } + } + None +} + +fn source_priority(source: &Source) -> u8 { + match source { + // Highest confidence first + Source::TeamFile => 0, + Source::Directory(_) => 1, + Source::Package(_, _) => 2, + Source::TeamGlob(_) => 3, + Source::TeamGem => 4, + Source::TeamYml => 5, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::project::Team; + use std::collections::HashMap; + use tempfile::tempdir; + + fn build_config_for_temp(frontend_glob: &str, ruby_glob: &str, vendored_path: &str) -> crate::config::Config { + crate::config::Config { + owned_globs: vec!["**/*".to_string()], + ruby_package_paths: vec![ruby_glob.to_string()], + javascript_package_paths: vec![frontend_glob.to_string()], + team_file_glob: vec!["config/teams/**/*.yml".to_string()], + unowned_globs: vec![], + vendored_gems_path: vendored_path.to_string(), + cache_directory: "tmp/cache/codeowners".to_string(), + } + } + + fn team_named(name: &str) -> Team { + Team { + path: Path::new("config/teams/foo.yml").to_path_buf(), + name: name.to_string(), + github_team: format!("@{}Team", name), + owned_globs: vec![], + subtracted_globs: vec![], + owned_gems: vec![], + avoid_ownership: false, + } + } + + #[test] + fn test_read_top_of_file_team_parses_at_and_colon_forms() { + let td = tempdir().unwrap(); + + // @team form + let file_at = td.path().join("at_form.rb"); + std::fs::write(&file_at, "# @team Payroll\nputs 'x'\n").unwrap(); + assert_eq!(read_top_of_file_team(&file_at), Some("Payroll".to_string())); + } + + #[test] + fn test_most_specific_directory_owner_prefers_deeper() { + let td = tempdir().unwrap(); + let project_root = td.path(); + + // Build directories + let deep_dir = project_root.join("a/b/c"); + std::fs::create_dir_all(&deep_dir).unwrap(); + let mid_dir = project_root.join("a/b"); + let top_dir = project_root.join("a"); + + // Write .codeowner files + std::fs::write(top_dir.join(".codeowner"), "TopTeam").unwrap(); + std::fs::write(mid_dir.join(".codeowner"), "MidTeam").unwrap(); + std::fs::write(deep_dir.join(".codeowner"), "DeepTeam").unwrap(); + + // Build teams_by_name + let mut tbn: HashMap = HashMap::new(); + for name in ["TopTeam", "MidTeam", "DeepTeam"] { + let t = team_named(name); + tbn.insert(t.name.clone(), t); + } + + let rel_file = Path::new("a/b/c/file.rb"); + let result = most_specific_directory_owner(project_root, rel_file, &tbn).unwrap(); + match result.1 { + Source::Directory(path) => { + assert!(path.ends_with("a/b/c"), "expected deepest directory, got {}", path); + } + _ => panic!("expected Directory source"), + } + assert_eq!(result.0, "DeepTeam"); + } + + #[test] + fn test_nearest_package_owner_ruby_and_js() { + let td = tempdir().unwrap(); + let project_root = td.path(); + let config = build_config_for_temp("frontend/**/*", "packs/**/*", "vendored"); + + // Ruby package + let ruby_pkg = project_root.join("packs/payroll"); + std::fs::create_dir_all(&ruby_pkg).unwrap(); + std::fs::write(ruby_pkg.join("package.yml"), "---\nowner: Payroll\n").unwrap(); + + // JS package + let js_pkg = project_root.join("frontend/flow"); + std::fs::create_dir_all(&js_pkg).unwrap(); + std::fs::write(js_pkg.join("package.json"), r#"{"metadata": {"owner": "UX"}}"#).unwrap(); + + // Teams map + let mut tbn: HashMap = HashMap::new(); + for name in ["Payroll", "UX"] { + let t = team_named(name); + tbn.insert(t.name.clone(), t); + } + + // Ruby nearest + let rel_ruby = Path::new("packs/payroll/app/models/thing.rb"); + let ruby_owner = nearest_package_owner(project_root, rel_ruby, &config, &tbn).unwrap(); + assert_eq!(ruby_owner.0, "Payroll"); + match ruby_owner.1 { + Source::Package(pkg_path, glob) => { + assert!(pkg_path.ends_with("packs/payroll/package.yml")); + assert_eq!(glob, "packs/payroll/**/**"); + } + _ => panic!("expected Package source for ruby"), + } + + // JS nearest + let rel_js = Path::new("frontend/flow/src/index.ts"); + let js_owner = nearest_package_owner(project_root, rel_js, &config, &tbn).unwrap(); + assert_eq!(js_owner.0, "UX"); + match js_owner.1 { + Source::Package(pkg_path, glob) => { + assert!(pkg_path.ends_with("frontend/flow/package.json")); + assert_eq!(glob, "frontend/flow/**/**"); + } + _ => panic!("expected Package source for js"), + } + } + + #[test] + fn test_vendored_gem_owner() { + let config = build_config_for_temp("frontend/**/*", "packs/**/*", "vendored"); + let mut teams: Vec = vec![team_named("Payroll")]; + teams[0].owned_gems = vec!["awesome_gem".to_string()]; + + let path = Path::new("vendored/awesome_gem/lib/a.rb"); + let result = vendored_gem_owner(path, &config, &teams).unwrap(); + assert_eq!(result.0, "Payroll"); + matches!(result.1, Source::TeamGem); + } +} diff --git a/src/project.rs b/src/project.rs index e405e90..5179371 100644 --- a/src/project.rs +++ b/src/project.rs @@ -178,9 +178,7 @@ impl Project { } pub fn relative_path<'a>(&'a self, absolute_path: &'a Path) -> &'a Path { - absolute_path - .strip_prefix(&self.base_path) - .expect("Could not generate relative path") + absolute_path.strip_prefix(&self.base_path).unwrap_or(absolute_path) } pub fn get_team(&self, name: &str) -> Option { @@ -197,3 +195,35 @@ impl Project { result } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_vendored_gem_by_name_maps_all_gems() { + let vg1 = VendoredGem { + path: PathBuf::from("vendored/a"), + name: "a".to_string(), + }; + let vg2 = VendoredGem { + path: PathBuf::from("vendored/b"), + name: "b".to_string(), + }; + let project = Project { + base_path: PathBuf::from("."), + files: vec![], + packages: vec![], + vendored_gems: vec![vg1.clone(), vg2.clone()], + teams: vec![], + codeowners_file_path: PathBuf::from(".github/CODEOWNERS"), + directory_codeowner_files: vec![], + teams_by_name: HashMap::new(), + }; + + let map = project.vendored_gem_by_name(); + assert_eq!(map.len(), 2); + assert_eq!(map.get("a").unwrap().name, vg1.name); + assert_eq!(map.get("b").unwrap().name, vg2.name); + } +} diff --git a/src/project_builder.rs b/src/project_builder.rs index 54f3ac3..3aa53c8 100644 --- a/src/project_builder.rs +++ b/src/project_builder.rs @@ -1,11 +1,12 @@ use std::{ fs::File, path::{Path, PathBuf}, + sync::{Arc, Mutex}, }; use error_stack::{Result, ResultExt}; use fast_glob::glob_match; -use ignore::WalkBuilder; +use ignore::{WalkBuilder, WalkParallel, WalkState}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use tracing::{instrument, warn}; @@ -53,12 +54,29 @@ impl<'a> ProjectBuilder<'a> { let mut entry_types = Vec::with_capacity(INITIAL_VECTOR_CAPACITY); let mut builder = WalkBuilder::new(&self.base_path); builder.hidden(false); - let walkdir = builder.build(); + let walk_parallel: WalkParallel = builder.build_parallel(); - for entry in walkdir { - let entry = entry.change_context(Error::Io)?; + let collected = Arc::new(Mutex::new(Vec::with_capacity(INITIAL_VECTOR_CAPACITY))); + let collected_for_threads = Arc::clone(&collected); + + walk_parallel.run(move || { + let collected = Arc::clone(&collected_for_threads); + Box::new(move |res| { + if let Ok(entry) = res { + if let Ok(mut v) = collected.lock() { + v.push(entry); + } + } + WalkState::Continue + }) + }); + + // Process sequentially with &mut self + let collected_entries = Arc::try_unwrap(collected).unwrap().into_inner().unwrap(); + for entry in collected_entries { entry_types.push(self.build_entry_type(entry)?); } + self.build_project_from_entry_types(entry_types) } @@ -216,7 +234,10 @@ impl<'a> ProjectBuilder<'a> { } fn matches_globs(path: &Path, globs: &[String]) -> bool { - globs.iter().any(|glob| glob_match(glob, path.to_str().unwrap())) + match path.to_str() { + Some(s) => globs.iter().any(|glob| glob_match(glob, s)), + None => false, + } } fn ruby_package_owner(path: &Path) -> Result, Error> { diff --git a/src/project_file_builder.rs b/src/project_file_builder.rs index 5ac06df..7544dd6 100644 --- a/src/project_file_builder.rs +++ b/src/project_file_builder.rs @@ -47,7 +47,7 @@ impl<'a> ProjectFileBuilder<'a> { } } -fn build_project_file_without_cache(path: &PathBuf) -> ProjectFile { +pub fn build_project_file_without_cache(path: &PathBuf) -> ProjectFile { let content = match std::fs::read_to_string(path) { Ok(content) => content, Err(_) => { diff --git a/src/runner.rs b/src/runner.rs index 2e5fd11..bfbe2fb 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -2,6 +2,7 @@ use core::fmt; use std::{ fs::File, path::{Path, PathBuf}, + process::Command, }; use error_stack::{Context, Result, ResultExt}; @@ -39,7 +40,8 @@ pub fn for_file(run_config: &RunConfig, file_path: &str, fast: bool) -> RunResul if fast { for_file_from_codeowners(run_config, file_path) } else { - run_with_runner(run_config, |runner| runner.for_file(file_path)) + //run_with_runner(run_config, |runner| runner.for_file(file_path)) + for_file_optimized(run_config, file_path) } } @@ -83,6 +85,58 @@ pub fn team_for_file_from_codeowners(run_config: &RunConfig, file_path: &str) -> .map_err(|e| Error::Io(e.to_string()))?) } +pub fn team_for_file(run_config: &RunConfig, file_path: &str) -> Result, Error> { + let config = config_from_path(&run_config.config_path)?; + use crate::ownership::for_file_fast::find_file_owners; + let owners = find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)).map_err(Error::Io)?; + + Ok(owners.first().map(|fo| fo.team.clone())) +} + +// (imports below intentionally trimmed after refactor) + +fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult { + let config = match config_from_path(&run_config.config_path) { + Ok(c) => c, + Err(err) => { + return RunResult { + io_errors: vec![err.to_string()], + ..Default::default() + }; + } + }; + + use crate::ownership::for_file_fast::find_file_owners; + let file_owners = match find_file_owners(&run_config.project_root, &config, std::path::Path::new(file_path)) { + Ok(v) => v, + Err(err) => { + return RunResult { + io_errors: vec![err], + ..Default::default() + }; + } + }; + + let info_messages: Vec = match file_owners.len() { + 0 => vec![format!("{}", FileOwner::default())], + 1 => vec![format!("{}", file_owners[0])], + _ => { + let mut error_messages = vec!["Error: file is owned by multiple teams!".to_string()]; + for file_owner in file_owners { + error_messages.push(format!("\n{}", file_owner)); + } + return RunResult { + validation_errors: error_messages, + ..Default::default() + }; + } + }; + RunResult { + info_messages, + ..Default::default() + } +} + pub fn for_team(run_config: &RunConfig, team_name: &str) -> RunResult { run_with_runner(run_config, |runner| runner.for_team(team_name)) } @@ -91,12 +145,12 @@ pub fn validate(run_config: &RunConfig, _file_paths: Vec) -> RunResult { run_with_runner(run_config, |runner| runner.validate()) } -pub fn generate(run_config: &RunConfig) -> RunResult { - run_with_runner(run_config, |runner| runner.generate()) +pub fn generate(run_config: &RunConfig, git_stage: bool) -> RunResult { + run_with_runner(run_config, |runner| runner.generate(git_stage)) } -pub fn generate_and_validate(run_config: &RunConfig, _file_paths: Vec) -> RunResult { - run_with_runner(run_config, |runner| runner.generate_and_validate()) +pub fn generate_and_validate(run_config: &RunConfig, _file_paths: Vec, git_stage: bool) -> RunResult { + run_with_runner(run_config, |runner| runner.generate_and_validate(git_stage)) } pub fn delete_cache(run_config: &RunConfig) -> RunResult { @@ -200,13 +254,18 @@ impl Runner { } } - pub fn generate(&self) -> RunResult { + pub fn generate(&self, git_stage: bool) -> RunResult { let content = self.ownership.generate_file(); if let Some(parent) = &self.run_config.codeowners_file_path.parent() { let _ = std::fs::create_dir_all(parent); } match std::fs::write(&self.run_config.codeowners_file_path, content) { - Ok(_) => RunResult::default(), + Ok(_) => { + if git_stage { + self.git_stage(); + } + RunResult::default() + } Err(err) => RunResult { io_errors: vec![err.to_string()], ..Default::default() @@ -214,14 +273,24 @@ impl Runner { } } - pub fn generate_and_validate(&self) -> RunResult { - let run_result = self.generate(); + pub fn generate_and_validate(&self, git_stage: bool) -> RunResult { + let run_result = self.generate(git_stage); if run_result.has_errors() { return run_result; } self.validate() } + fn git_stage(&self) { + let _ = Command::new("git") + .arg("add") + .arg(&self.run_config.codeowners_file_path) + .current_dir(&self.run_config.project_root) + .output(); + } + + // TODO: remove this once we've verified the fast path is working + #[allow(dead_code)] pub fn for_file(&self, file_path: &str) -> RunResult { let relative_file_path = Path::new(file_path) .strip_prefix(&self.run_config.project_root) diff --git a/tests/fixtures/valid_project/.github/CODEOWNERS b/tests/fixtures/valid_project/.github/CODEOWNERS index 09a9fd7..6df553a 100644 --- a/tests/fixtures/valid_project/.github/CODEOWNERS +++ b/tests/fixtures/valid_project/.github/CODEOWNERS @@ -30,6 +30,8 @@ # Team YML ownership /config/teams/payments.yml @PaymentsTeam /config/teams/payroll.yml @PayrollTeam +/config/teams/ux.yml @UX # Team owned gems /gems/payroll_calculator/**/** @PayrollTeam +/gems/pets/**/** @UX diff --git a/tests/fixtures/valid_project/config/code_ownership.yml b/tests/fixtures/valid_project/config/code_ownership.yml index fa56397..05bc791 100644 --- a/tests/fixtures/valid_project/config/code_ownership.yml +++ b/tests/fixtures/valid_project/config/code_ownership.yml @@ -1,10 +1,10 @@ owned_globs: - - "**/*.{rb,tsx}" + - "{gems,config,javascript,ruby,components}/**/*.{rb,tsx}" ruby_package_paths: - ruby/packages/**/* javascript_package_paths: - javascript/packages/** team_file_glob: - config/teams/**/*.yml -vendored_gems_path: gems +unbuilt_gems_path: gems unowned_globs: diff --git a/tests/fixtures/valid_project/config/teams/ux.yml b/tests/fixtures/valid_project/config/teams/ux.yml new file mode 100644 index 0000000..d4675fa --- /dev/null +++ b/tests/fixtures/valid_project/config/teams/ux.yml @@ -0,0 +1,6 @@ +name: UX +github: + team: '@UX' +ruby: + owned_gems: + - pets \ No newline at end of file diff --git a/tests/fixtures/valid_project/gems/pets/dog.rb b/tests/fixtures/valid_project/gems/pets/dog.rb new file mode 100644 index 0000000..0518b82 --- /dev/null +++ b/tests/fixtures/valid_project/gems/pets/dog.rb @@ -0,0 +1,5 @@ +# generated +# next line should be ignored +# @team Payments + +class Dog; end diff --git a/tests/fixtures/valid_project/should_be_ignored/an_ignored_file.rb b/tests/fixtures/valid_project/should_be_ignored/an_ignored_file.rb new file mode 100644 index 0000000..24ad7ca --- /dev/null +++ b/tests/fixtures/valid_project/should_be_ignored/an_ignored_file.rb @@ -0,0 +1,7 @@ +# @team Payments + +class AnIgnoredFile + def initialize + @name = "AnIgnoredFile" + end +end \ No newline at end of file diff --git a/tests/git_stage_test.rs b/tests/git_stage_test.rs new file mode 100644 index 0000000..e6f029d --- /dev/null +++ b/tests/git_stage_test.rs @@ -0,0 +1,134 @@ +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use codeowners::runner::{self, RunConfig}; + +fn copy_dir_recursive(from: &Path, to: &Path) { + fs::create_dir_all(to).expect("failed to create destination root"); + for entry in fs::read_dir(from).expect("failed to read source dir") { + let entry = entry.expect("failed to read dir entry"); + let file_type = entry.file_type().expect("failed to read file type"); + let src_path = entry.path(); + let dest_path = to.join(entry.file_name()); + if file_type.is_dir() { + copy_dir_recursive(&src_path, &dest_path); + } else if file_type.is_file() { + // Ensure parent exists then copy + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent).expect("failed to create parent dir"); + } + fs::copy(&src_path, &dest_path).expect("failed to copy file"); + } + } +} + +fn init_git_repo(path: &Path) { + // Initialize a new git repository in the temp project + let status = Command::new("git") + .arg("init") + .current_dir(path) + .output() + .expect("failed to run git init"); + assert!( + status.status.success(), + "git init failed: {}", + String::from_utf8_lossy(&status.stderr) + ); + + // Configure a dummy identity to appease git if commits ever happen; not strictly needed for staging + let _ = Command::new("git") + .arg("config") + .arg("user.email") + .arg("test@example.com") + .current_dir(path) + .output(); + let _ = Command::new("git") + .arg("config") + .arg("user.name") + .arg("Test User") + .current_dir(path) + .output(); +} + +fn is_file_staged(repo_root: &Path, rel_path: &str) -> bool { + // Use git diff --name-only --cached to list staged files + let output = Command::new("git") + .arg("diff") + .arg("--name-only") + .arg("--cached") + .current_dir(repo_root) + .output() + .expect("failed to run git diff --cached"); + assert!( + output.status.success(), + "git diff failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + stdout.lines().any(|line| line.trim() == rel_path) +} + +fn build_run_config(project_root: &Path, codeowners_rel_path: &str) -> RunConfig { + let project_root = project_root.canonicalize().expect("failed to canonicalize project root"); + let codeowners_file_path = project_root.join(codeowners_rel_path); + let config_path = project_root.join("config/code_ownership.yml"); + RunConfig { + project_root, + codeowners_file_path, + config_path, + no_cache: true, + } +} + +#[test] +fn test_generate_stages_codeowners() { + let temp_dir = tempfile::tempdir().expect("failed to create tempdir"); + let temp_root = temp_dir.path().to_path_buf(); + + // Copy the valid project fixture into a temporary git repo + let fixture_root = PathBuf::from("tests/fixtures/valid_project"); + copy_dir_recursive(&fixture_root, &temp_root); + init_git_repo(&temp_root); + + // Run generate with staging enabled, targeting the standard CODEOWNERS path + let run_config = build_run_config(&temp_root, ".github/CODEOWNERS"); + let result = runner::generate(&run_config, true); + assert!(result.io_errors.is_empty(), "io_errors: {:?}", result.io_errors); + assert!( + result.validation_errors.is_empty(), + "validation_errors: {:?}", + result.validation_errors + ); + + // Assert CODEOWNERS file exists and is staged + let rel_path = ".github/CODEOWNERS"; + assert!(run_config.codeowners_file_path.exists(), "CODEOWNERS file was not created"); + assert!(is_file_staged(&run_config.project_root, rel_path), "CODEOWNERS file was not staged"); +} + +#[test] +fn test_generate_and_validate_stages_codeowners() { + let temp_dir = tempfile::tempdir().expect("failed to create tempdir"); + let temp_root = temp_dir.path().to_path_buf(); + + // Copy the valid project fixture into a temporary git repo + let fixture_root = PathBuf::from("tests/fixtures/valid_project"); + copy_dir_recursive(&fixture_root, &temp_root); + init_git_repo(&temp_root); + + // Run generate_and_validate with staging enabled + let run_config = build_run_config(&temp_root, ".github/CODEOWNERS"); + let result = runner::generate_and_validate(&run_config, vec![], true); + assert!(result.io_errors.is_empty(), "io_errors: {:?}", result.io_errors); + assert!( + result.validation_errors.is_empty(), + "validation_errors: {:?}", + result.validation_errors + ); + + // Assert CODEOWNERS file exists and is staged + let rel_path = ".github/CODEOWNERS"; + assert!(run_config.codeowners_file_path.exists(), "CODEOWNERS file was not created"); + assert!(is_file_staged(&run_config.project_root, rel_path), "CODEOWNERS file was not staged"); +} diff --git a/tests/valid_project_test.rs b/tests/valid_project_test.rs index 6ee2caa..d8b4e81 100644 --- a/tests/valid_project_test.rs +++ b/tests/valid_project_test.rs @@ -97,6 +97,26 @@ fn test_fast_for_file() -> Result<(), Box> { Ok(()) } +#[test] +fn test_fast_for_file_with_ignored_file() -> Result<(), Box> { + Command::cargo_bin("codeowners")? + .arg("--project-root") + .arg("tests/fixtures/valid_project") + .arg("--no-cache") + .arg("for-file") + .arg("should_be_ignored/an_ignored_file.rb") + .assert() + .success() + .stdout(predicate::eq(indoc! {" + Team: Unowned + Github Team: Unowned + Team YML: + Description: + - Unowned + "})); + Ok(()) +} + #[test] fn test_fast_for_file_full_path() -> Result<(), Box> { let project_root = Path::new("tests/fixtures/valid_project"); @@ -118,6 +138,26 @@ fn test_fast_for_file_full_path() -> Result<(), Box> { Ok(()) } +#[test] +fn test_for_file_with_components() -> Result<(), Box> { + Command::cargo_bin("codeowners")? + .arg("--project-root") + .arg("tests/fixtures/valid_project") + .arg("--no-cache") + .arg("for-file") + .arg("gems/pets/dog.rb") + .assert() + .success() + .stdout(predicate::eq(indoc! {" + Team: UX + Github Team: @UX + Team YML: config/teams/ux.yml + Description: + - Owner specified in Team YML's `owned_gems` + "})); + Ok(()) +} + #[test] fn test_for_file_same_team_multiple_ownerships() -> Result<(), Box> { Command::cargo_bin("codeowners")?