@@ -39,13 +39,18 @@ mod tests {
3939 #[ case( "chakracore" ) ]
4040 #[ ignore]
4141 fn test_human_only_edits_then_commit ( #[ case] repo_name : & str ) {
42+ use std:: time:: Instant ;
43+
4244 let repos = get_performance_repos ( ) ;
4345 let test_repo = repos
4446 . get ( repo_name)
4547 . expect ( & format ! ( "{} repo should be available" , repo_name) ) ;
4648 // Find random files for testing
49+ println ! ( "Finding random files for {}" , repo_name) ;
50+ let start = Instant :: now ( ) ;
4751 let random_files = find_random_files ( test_repo) . expect ( "Should find random files" ) ;
48-
52+ let duration = start. elapsed ( ) ;
53+ println ! ( "Time taken to find random files: {:?}" , duration) ;
4954 // Select 3 random files (not large ones)
5055 let files_to_edit: Vec < String > =
5156 random_files. random_files . iter ( ) . take ( 3 ) . cloned ( ) . collect ( ) ;
@@ -375,135 +380,80 @@ pub struct RandomFiles {
375380///
376381/// Returns:
377382/// - 10 random files from the repository
378- /// - 2 random large files that are between 5k-10k lines
383+ /// - 2 random large files (by byte size, as a proxy for line count)
379384///
380- /// This helper is useful for performance testing various operations on different file sizes
385+ /// This helper uses filesystem operations directly instead of git commands
386+ /// for much faster performance on large repositories.
381387pub fn find_random_files ( test_repo : & TestRepo ) -> Result < RandomFiles , String > {
382- use git_ai:: git:: repository:: find_repository_in_path;
383-
384- // Get the underlying Repository from the TestRepo path
385- let repo = find_repository_in_path ( test_repo. path ( ) . to_str ( ) . unwrap ( ) )
386- . map_err ( |e| format ! ( "Failed to find repository: {:?}" , e) ) ?;
387-
388- // Get HEAD commit
389- let head = repo
390- . head ( )
391- . map_err ( |e| format ! ( "Failed to get HEAD: {:?}" , e) ) ?;
392- let head_commit = head
393- . target ( )
394- . map_err ( |e| format ! ( "Failed to get HEAD target: {:?}" , e) ) ?;
395-
396- // Use git ls-tree to get all files in the repository at HEAD
397- let mut args = repo. global_args_for_exec ( ) ;
398- args. push ( "ls-tree" . to_string ( ) ) ;
399- args. push ( "-r" . to_string ( ) ) ; // Recursive
400- args. push ( "--name-only" . to_string ( ) ) ;
401- args. push ( head_commit. clone ( ) ) ;
402-
403- let output = Command :: new ( git_ai:: config:: Config :: get ( ) . git_cmd ( ) )
404- . args ( & args)
405- . output ( )
406- . map_err ( |e| format ! ( "Failed to run git ls-tree: {}" , e) ) ?;
407-
408- if !output. status . success ( ) {
409- return Err ( format ! (
410- "git ls-tree failed: {}" ,
411- String :: from_utf8_lossy( & output. stderr)
412- ) ) ;
413- }
414-
415- let all_files: Vec < String > = String :: from_utf8_lossy ( & output. stdout )
416- . lines ( )
417- . filter ( |line| !line. is_empty ( ) )
418- . map ( |s| s. to_string ( ) )
419- . collect ( ) ;
420-
421- if all_files. is_empty ( ) {
422- return Err ( "No files found in repository" . to_string ( ) ) ;
423- }
424-
425- // Select 10 random files
426- let mut rng = thread_rng ( ) ;
427- let mut random_files: Vec < String > = all_files
428- . choose_multiple ( & mut rng, 10 . min ( all_files. len ( ) ) )
429- . cloned ( )
430- . collect ( ) ;
431-
432- // Find large files (5k-10k lines)
433- let mut large_files: Vec < String > = Vec :: new ( ) ;
434-
435- // Shuffle to randomize the search order
436- let mut shuffled_files = all_files. clone ( ) ;
437- shuffled_files. shuffle ( & mut rng) ;
438-
439- for file_path in shuffled_files {
440- if large_files. len ( ) >= 2 {
441- break ;
442- }
443-
444- // Read file content from HEAD
445- let file_content = match repo. get_file_content ( & file_path, & head_commit) {
446- Ok ( content) => content,
447- Err ( _) => continue , // Skip files that can't be read (binaries, etc.)
448- } ;
388+ use std:: fs;
449389
450- // Count lines
451- let line_count = file_content. iter ( ) . filter ( |& & b| b == b'\n' ) . count ( ) ;
390+ let repo_path = test_repo. path ( ) ;
452391
453- if line_count >= 5000 && line_count <= 10000 {
454- large_files. push ( file_path) ;
455- }
456- }
392+ // Collect all files recursively, skipping .git directory
393+ let mut all_files: Vec < String > = Vec :: new ( ) ;
394+ let mut dirs_to_visit: Vec < std:: path:: PathBuf > = vec ! [ repo_path. to_path_buf( ) ] ;
457395
458- // If we couldn't find 2 large files, fall back to the largest files we can find
459- if large_files. len ( ) < 2 {
460- let mut file_sizes: Vec < ( String , usize ) > = Vec :: new ( ) ;
396+ while let Some ( dir) = dirs_to_visit. pop ( ) {
397+ let entries = fs:: read_dir ( & dir) . map_err ( |e| format ! ( "Failed to read dir: {}" , e) ) ?;
461398
462- // Sample a subset of files to check (to avoid checking all files in huge repos)
463- let sample_size = 1000 . min ( all_files. len ( ) ) ;
464- let sample: Vec < String > = all_files
465- . choose_multiple ( & mut rng, sample_size)
466- . cloned ( )
467- . collect ( ) ;
399+ for entry in entries. flatten ( ) {
400+ let path = entry. path ( ) ;
401+ let file_name = path. file_name ( ) . and_then ( |n| n. to_str ( ) ) . unwrap_or ( "" ) ;
468402
469- for file_path in sample {
470- if large_files . contains ( & file_path ) {
403+ // Skip .git directory
404+ if file_name == ".git" {
471405 continue ;
472406 }
473407
474- if let Ok ( content) = repo. get_file_content ( & file_path, & head_commit) {
475- let line_count = content. iter ( ) . filter ( |& & b| b == b'\n' ) . count ( ) ;
476- if line_count >= 1000 {
477- // Only consider reasonably large files
478- file_sizes. push ( ( file_path, line_count) ) ;
408+ if path. is_dir ( ) {
409+ dirs_to_visit. push ( path) ;
410+ } else if path. is_file ( ) {
411+ // Get relative path from repo root
412+ if let Ok ( relative) = path. strip_prefix ( repo_path) {
413+ if let Some ( rel_str) = relative. to_str ( ) {
414+ all_files. push ( rel_str. to_string ( ) ) ;
415+ }
479416 }
480417 }
481418 }
419+ }
482420
483- // Sort by line count descending
484- file_sizes. sort_by ( |a, b| b. 1 . cmp ( & a. 1 ) ) ;
485-
486- // Take additional large files to reach 2 total
487- for ( file_path, _line_count) in file_sizes. iter ( ) . take ( 2 - large_files. len ( ) ) {
488- large_files. push ( file_path. clone ( ) ) ;
489- }
421+ if all_files. is_empty ( ) {
422+ return Err ( "No files found in repository" . to_string ( ) ) ;
490423 }
491424
492- // Make sure random_files doesn't overlap with large_files
493- random_files. retain ( |f| !large_files. contains ( f) ) ;
425+ let mut rng = thread_rng ( ) ;
494426
495- // If we removed some, add more random files
496- while random_files. len ( ) < 10 && random_files. len ( ) < all_files. len ( ) {
497- if let Some ( file) = all_files
498- . choose ( & mut rng)
499- . filter ( |f| !random_files. contains ( f) && !large_files. contains ( f) )
500- {
501- random_files. push ( file. clone ( ) ) ;
502- } else {
503- break ;
427+ // Find large files using file size as a proxy (> 100KB considered large)
428+ // This is much faster than reading files to count lines
429+ const LARGE_FILE_THRESHOLD : u64 = 100 * 1024 ; // 100KB
430+
431+ let mut file_sizes: Vec < ( String , u64 ) > = Vec :: new ( ) ;
432+ for file_path in & all_files {
433+ let full_path = repo_path. join ( file_path) ;
434+ if let Ok ( metadata) = fs:: metadata ( & full_path) {
435+ let size = metadata. len ( ) ;
436+ if size >= LARGE_FILE_THRESHOLD {
437+ file_sizes. push ( ( file_path. clone ( ) , size) ) ;
438+ }
504439 }
505440 }
506441
442+ // Sort by size descending and take top 2
443+ file_sizes. sort_by ( |a, b| b. 1 . cmp ( & a. 1 ) ) ;
444+ let large_files: Vec < String > = file_sizes. into_iter ( ) . take ( 2 ) . map ( |( p, _) | p) . collect ( ) ;
445+
446+ // Select 10 random files, excluding large files
447+ let candidates: Vec < & String > = all_files
448+ . iter ( )
449+ . filter ( |f| !large_files. contains ( f) )
450+ . collect ( ) ;
451+
452+ let random_files: Vec < String > = candidates
453+ . choose_multiple ( & mut rng, 10 . min ( candidates. len ( ) ) )
454+ . map ( |s| ( * s) . clone ( ) )
455+ . collect ( ) ;
456+
507457 Ok ( RandomFiles {
508458 random_files,
509459 large_files,
@@ -678,10 +628,11 @@ impl Sampler {
678628 // Default setup: Reset to clean state before each run (not timed)
679629
680630 // 1. Clean any untracked files and directories
681- repo. git ( & [ "clean" , "-fd" ] ) . expect ( "Clean should succeed" ) ;
631+ repo. git_og ( & [ "clean" , "-fd" ] )
632+ . expect ( "Clean should succeed" ) ;
682633
683634 // 2. Reset --hard to clean any changes
684- repo. git ( & [ "reset" , "--hard" ] )
635+ repo. git_og ( & [ "reset" , "--hard" ] )
685636 . expect ( "Reset --hard should succeed" ) ;
686637
687638 // 3. Get the default branch from the remote
0 commit comments