Skip to content

Commit 52638c9

Browse files
committed
make performance checks performant
1 parent abc5952 commit 52638c9

File tree

2 files changed

+83
-112
lines changed

2 files changed

+83
-112
lines changed

tests/performance.rs

Lines changed: 63 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,18 @@ mod tests {
3939
#[case("chakracore")]
4040
#[ignore]
4141
fn test_human_only_edits_then_commit(#[case] repo_name: &str) {
42+
use std::time::Instant;
43+
4244
let repos = get_performance_repos();
4345
let test_repo = repos
4446
.get(repo_name)
4547
.expect(&format!("{} repo should be available", repo_name));
4648
// Find random files for testing
49+
println!("Finding random files for {}", repo_name);
50+
let start = Instant::now();
4751
let random_files = find_random_files(test_repo).expect("Should find random files");
48-
52+
let duration = start.elapsed();
53+
println!("Time taken to find random files: {:?}", duration);
4954
// Select 3 random files (not large ones)
5055
let files_to_edit: Vec<String> =
5156
random_files.random_files.iter().take(3).cloned().collect();
@@ -375,135 +380,80 @@ pub struct RandomFiles {
375380
///
376381
/// Returns:
377382
/// - 10 random files from the repository
378-
/// - 2 random large files that are between 5k-10k lines
383+
/// - 2 random large files (by byte size, as a proxy for line count)
379384
///
380-
/// This helper is useful for performance testing various operations on different file sizes
385+
/// This helper uses filesystem operations directly instead of git commands
386+
/// for much faster performance on large repositories.
381387
pub fn find_random_files(test_repo: &TestRepo) -> Result<RandomFiles, String> {
382-
use git_ai::git::repository::find_repository_in_path;
383-
384-
// Get the underlying Repository from the TestRepo path
385-
let repo = find_repository_in_path(test_repo.path().to_str().unwrap())
386-
.map_err(|e| format!("Failed to find repository: {:?}", e))?;
387-
388-
// Get HEAD commit
389-
let head = repo
390-
.head()
391-
.map_err(|e| format!("Failed to get HEAD: {:?}", e))?;
392-
let head_commit = head
393-
.target()
394-
.map_err(|e| format!("Failed to get HEAD target: {:?}", e))?;
395-
396-
// Use git ls-tree to get all files in the repository at HEAD
397-
let mut args = repo.global_args_for_exec();
398-
args.push("ls-tree".to_string());
399-
args.push("-r".to_string()); // Recursive
400-
args.push("--name-only".to_string());
401-
args.push(head_commit.clone());
402-
403-
let output = Command::new(git_ai::config::Config::get().git_cmd())
404-
.args(&args)
405-
.output()
406-
.map_err(|e| format!("Failed to run git ls-tree: {}", e))?;
407-
408-
if !output.status.success() {
409-
return Err(format!(
410-
"git ls-tree failed: {}",
411-
String::from_utf8_lossy(&output.stderr)
412-
));
413-
}
414-
415-
let all_files: Vec<String> = String::from_utf8_lossy(&output.stdout)
416-
.lines()
417-
.filter(|line| !line.is_empty())
418-
.map(|s| s.to_string())
419-
.collect();
420-
421-
if all_files.is_empty() {
422-
return Err("No files found in repository".to_string());
423-
}
424-
425-
// Select 10 random files
426-
let mut rng = thread_rng();
427-
let mut random_files: Vec<String> = all_files
428-
.choose_multiple(&mut rng, 10.min(all_files.len()))
429-
.cloned()
430-
.collect();
431-
432-
// Find large files (5k-10k lines)
433-
let mut large_files: Vec<String> = Vec::new();
434-
435-
// Shuffle to randomize the search order
436-
let mut shuffled_files = all_files.clone();
437-
shuffled_files.shuffle(&mut rng);
438-
439-
for file_path in shuffled_files {
440-
if large_files.len() >= 2 {
441-
break;
442-
}
443-
444-
// Read file content from HEAD
445-
let file_content = match repo.get_file_content(&file_path, &head_commit) {
446-
Ok(content) => content,
447-
Err(_) => continue, // Skip files that can't be read (binaries, etc.)
448-
};
388+
use std::fs;
449389

450-
// Count lines
451-
let line_count = file_content.iter().filter(|&&b| b == b'\n').count();
390+
let repo_path = test_repo.path();
452391

453-
if line_count >= 5000 && line_count <= 10000 {
454-
large_files.push(file_path);
455-
}
456-
}
392+
// Collect all files recursively, skipping .git directory
393+
let mut all_files: Vec<String> = Vec::new();
394+
let mut dirs_to_visit: Vec<std::path::PathBuf> = vec![repo_path.to_path_buf()];
457395

458-
// If we couldn't find 2 large files, fall back to the largest files we can find
459-
if large_files.len() < 2 {
460-
let mut file_sizes: Vec<(String, usize)> = Vec::new();
396+
while let Some(dir) = dirs_to_visit.pop() {
397+
let entries = fs::read_dir(&dir).map_err(|e| format!("Failed to read dir: {}", e))?;
461398

462-
// Sample a subset of files to check (to avoid checking all files in huge repos)
463-
let sample_size = 1000.min(all_files.len());
464-
let sample: Vec<String> = all_files
465-
.choose_multiple(&mut rng, sample_size)
466-
.cloned()
467-
.collect();
399+
for entry in entries.flatten() {
400+
let path = entry.path();
401+
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
468402

469-
for file_path in sample {
470-
if large_files.contains(&file_path) {
403+
// Skip .git directory
404+
if file_name == ".git" {
471405
continue;
472406
}
473407

474-
if let Ok(content) = repo.get_file_content(&file_path, &head_commit) {
475-
let line_count = content.iter().filter(|&&b| b == b'\n').count();
476-
if line_count >= 1000 {
477-
// Only consider reasonably large files
478-
file_sizes.push((file_path, line_count));
408+
if path.is_dir() {
409+
dirs_to_visit.push(path);
410+
} else if path.is_file() {
411+
// Get relative path from repo root
412+
if let Ok(relative) = path.strip_prefix(repo_path) {
413+
if let Some(rel_str) = relative.to_str() {
414+
all_files.push(rel_str.to_string());
415+
}
479416
}
480417
}
481418
}
419+
}
482420

483-
// Sort by line count descending
484-
file_sizes.sort_by(|a, b| b.1.cmp(&a.1));
485-
486-
// Take additional large files to reach 2 total
487-
for (file_path, _line_count) in file_sizes.iter().take(2 - large_files.len()) {
488-
large_files.push(file_path.clone());
489-
}
421+
if all_files.is_empty() {
422+
return Err("No files found in repository".to_string());
490423
}
491424

492-
// Make sure random_files doesn't overlap with large_files
493-
random_files.retain(|f| !large_files.contains(f));
425+
let mut rng = thread_rng();
494426

495-
// If we removed some, add more random files
496-
while random_files.len() < 10 && random_files.len() < all_files.len() {
497-
if let Some(file) = all_files
498-
.choose(&mut rng)
499-
.filter(|f| !random_files.contains(f) && !large_files.contains(f))
500-
{
501-
random_files.push(file.clone());
502-
} else {
503-
break;
427+
// Find large files using file size as a proxy (> 100KB considered large)
428+
// This is much faster than reading files to count lines
429+
const LARGE_FILE_THRESHOLD: u64 = 100 * 1024; // 100KB
430+
431+
let mut file_sizes: Vec<(String, u64)> = Vec::new();
432+
for file_path in &all_files {
433+
let full_path = repo_path.join(file_path);
434+
if let Ok(metadata) = fs::metadata(&full_path) {
435+
let size = metadata.len();
436+
if size >= LARGE_FILE_THRESHOLD {
437+
file_sizes.push((file_path.clone(), size));
438+
}
504439
}
505440
}
506441

442+
// Sort by size descending and take top 2
443+
file_sizes.sort_by(|a, b| b.1.cmp(&a.1));
444+
let large_files: Vec<String> = file_sizes.into_iter().take(2).map(|(p, _)| p).collect();
445+
446+
// Select 10 random files, excluding large files
447+
let candidates: Vec<&String> = all_files
448+
.iter()
449+
.filter(|f| !large_files.contains(f))
450+
.collect();
451+
452+
let random_files: Vec<String> = candidates
453+
.choose_multiple(&mut rng, 10.min(candidates.len()))
454+
.map(|s| (*s).clone())
455+
.collect();
456+
507457
Ok(RandomFiles {
508458
random_files,
509459
large_files,
@@ -678,10 +628,11 @@ impl Sampler {
678628
// Default setup: Reset to clean state before each run (not timed)
679629

680630
// 1. Clean any untracked files and directories
681-
repo.git(&["clean", "-fd"]).expect("Clean should succeed");
631+
repo.git_og(&["clean", "-fd"])
632+
.expect("Clean should succeed");
682633

683634
// 2. Reset --hard to clean any changes
684-
repo.git(&["reset", "--hard"])
635+
repo.git_og(&["reset", "--hard"])
685636
.expect("Reset --hard should succeed");
686637

687638
// 3. Get the default branch from the remote

tests/repos/test_repo.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,26 @@ impl TestRepo {
118118
return self.git_with_env(args, &[]);
119119
}
120120

121+
pub fn git_og(&self, args: &[&str]) -> Result<String, String> {
122+
let mut full_args: Vec<String> =
123+
vec!["-C".to_string(), self.path.to_str().unwrap().to_string()];
124+
full_args.extend(args.iter().map(|s| s.to_string()));
125+
126+
GitAiRepository::exec_git(&full_args)
127+
.map(|output| {
128+
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
129+
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
130+
if stdout.is_empty() {
131+
stderr
132+
} else if stderr.is_empty() {
133+
stdout
134+
} else {
135+
format!("{}{}", stdout, stderr)
136+
}
137+
})
138+
.map_err(|e| e.to_string())
139+
}
140+
121141
pub fn benchmark_git(&self, args: &[&str]) -> Result<BenchmarkResult, String> {
122142
let output = self.git_with_env(args, &[("GIT_AI_DEBUG_PERFORMANCE", "2")])?;
123143

0 commit comments

Comments
 (0)