@@ -30,6 +30,7 @@ fn setup() {
3030mod tests {
3131 use super :: * ;
3232 use git_ai:: observability:: wrapper_performance_targets:: PERFORMANCE_FLOOR_MS ;
33+ use rand:: seq:: SliceRandom ;
3334 use rstest:: rstest;
3435
3536 #[ rstest]
@@ -309,6 +310,130 @@ mod tests {
309310 "Average overhead should be less than 20%"
310311 ) ;
311312 }
313+
314+ #[ rstest]
315+ #[ case( "chromium" ) ]
316+ #[ case( "react" ) ]
317+ #[ case( "node" ) ]
318+ #[ case( "chakracore" ) ]
319+ #[ ignore]
320+ fn test_large_checkpoints ( #[ case] repo_name : & str ) {
321+ use std:: time:: Instant ;
322+
323+ let repos = get_performance_repos ( ) ;
324+ let test_repo = repos
325+ . get ( repo_name)
326+ . expect ( & format ! ( "{} repo should be available" , repo_name) ) ;
327+
328+ // Find 1000 random files for testing
329+ println ! ( "Finding 1000 random files for {}" , repo_name) ;
330+ let start = Instant :: now ( ) ;
331+ let random_files = find_random_files_with_options (
332+ test_repo,
333+ FindRandomFilesOptions {
334+ random_file_count : 1000 ,
335+ large_file_count : 0 ,
336+ } ,
337+ )
338+ . expect ( "Should find random files" ) ;
339+ let duration = start. elapsed ( ) ;
340+ println ! ( "Time taken to find random files: {:?}" , duration) ;
341+
342+ let all_files: Vec < String > = random_files. random_files ;
343+ println ! ( "Found {} files to edit" , all_files. len( ) ) ;
344+
345+ assert ! (
346+ all_files. len( ) == 1000 ,
347+ "Should have at least 100 files to edit, found {}" ,
348+ all_files. len( )
349+ ) ;
350+
351+ // Create a sampler that runs 5 times (fewer due to the large number of files)
352+ let sampler = Sampler :: new ( 5 ) ;
353+
354+ // Sample the performance of large checkpoint operations
355+ let result = sampler. sample ( test_repo, |repo| {
356+ // Step 1: Edit all 1000 files (simulating AI edits)
357+ println ! ( "Editing {} files..." , all_files. len( ) ) ;
358+ for file_path in & all_files {
359+ let full_path = repo. path ( ) . join ( file_path) ;
360+
361+ let mut file = OpenOptions :: new ( )
362+ . append ( true )
363+ . open ( & full_path)
364+ . expect ( & format ! ( "Should be able to open file: {}" , file_path) ) ;
365+
366+ file. write_all ( b"\n # AI Generated Line\n " )
367+ . expect ( & format ! ( "Should be able to write to file: {}" , file_path) ) ;
368+ }
369+
370+ // Step 2: Run git-ai checkpoint mock_ai -- <all pathspecs>
371+ println ! ( "Running checkpoint mock_ai on {} files..." , all_files. len( ) ) ;
372+ let mut checkpoint_args: Vec < & str > = vec ! [ "checkpoint" , "mock_ai" , "--" ] ;
373+ let all_files_refs: Vec < & str > = all_files. iter ( ) . map ( |s| s. as_str ( ) ) . collect ( ) ;
374+ checkpoint_args. extend ( all_files_refs. iter ( ) ) ;
375+
376+ repo. git_ai ( & checkpoint_args)
377+ . expect ( "Checkpoint mock_ai should succeed" ) ;
378+
379+ // Step 3: Select 100 random files from the 1000 and edit them (simulating human edits)
380+ let mut rng = thread_rng ( ) ;
381+ let files_to_re_edit: Vec < String > = all_files
382+ . choose_multiple ( & mut rng, 100 . min ( all_files. len ( ) ) )
383+ . cloned ( )
384+ . collect ( ) ;
385+
386+ println ! (
387+ "Re-editing {} files (human edits)..." ,
388+ files_to_re_edit. len( )
389+ ) ;
390+ for file_path in & files_to_re_edit {
391+ let full_path = repo. path ( ) . join ( file_path) ;
392+
393+ let mut file = OpenOptions :: new ( )
394+ . append ( true )
395+ . open ( & full_path)
396+ . expect ( & format ! ( "Should be able to open file: {}" , file_path) ) ;
397+
398+ file. write_all ( b"\n # Human Line\n " )
399+ . expect ( & format ! ( "Should be able to write to file: {}" , file_path) ) ;
400+ }
401+
402+ // Step 4: Benchmark the checkpoint on the 100 human-edited files
403+ println ! (
404+ "Benchmarking checkpoint on {} files..." ,
405+ files_to_re_edit. len( )
406+ ) ;
407+ let mut final_checkpoint_args: Vec < & str > = vec ! [ "checkpoint" , "--" ] ;
408+ let files_to_re_edit_refs: Vec < & str > =
409+ files_to_re_edit. iter ( ) . map ( |s| s. as_str ( ) ) . collect ( ) ;
410+ final_checkpoint_args. extend ( files_to_re_edit_refs. iter ( ) ) ;
411+
412+ repo. benchmark_git_ai ( & final_checkpoint_args)
413+ . expect ( "Checkpoint should succeed" )
414+ } ) ;
415+
416+ // Print the results
417+ result. print_summary ( & format ! ( "Large checkpoints ({})" , repo_name) ) ;
418+
419+ // For checkpoint operations, we measure time per file
420+ // The benchmark is on 100 files, so we calculate ms per file
421+ let files_benchmarked = 100 ;
422+ let avg_total_ms = result. average . total_duration . as_millis ( ) as f64 ;
423+ let ms_per_file = avg_total_ms / files_benchmarked as f64 ;
424+
425+ println ! (
426+ "Average total time: {:.2}ms, Files: {}, Time per file: {:.2}ms" ,
427+ avg_total_ms, files_benchmarked, ms_per_file
428+ ) ;
429+
430+ // Assert that checkpoint takes less than 50ms per file on average
431+ assert ! (
432+ ms_per_file < 50.0 ,
433+ "Checkpoint should take less than 50ms per file, got {:.2}ms per file" ,
434+ ms_per_file
435+ ) ;
436+ }
312437}
313438
314439const PERFORMANCE_REPOS : & [ ( & str , & str ) ] = & [
@@ -370,12 +495,29 @@ pub fn get_performance_repos() -> &'static HashMap<String, TestRepo> {
370495/// Result of finding random files in a repository
371496#[ derive( Debug ) ]
372497pub struct RandomFiles {
373- /// 10 random files from the repository
498+ /// Random files from the repository (default 10)
374499 pub random_files : Vec < String > ,
375500 /// 2 random large files (5k-10k lines)
376501 pub large_files : Vec < String > ,
377502}
378503
504+ /// Options for finding random files
505+ pub struct FindRandomFilesOptions {
506+ /// Number of random files to find (default 10)
507+ pub random_file_count : usize ,
508+ /// Number of large files to find (default 2)
509+ pub large_file_count : usize ,
510+ }
511+
512+ impl Default for FindRandomFilesOptions {
513+ fn default ( ) -> Self {
514+ Self {
515+ random_file_count : 10 ,
516+ large_file_count : 2 ,
517+ }
518+ }
519+ }
520+
379521/// Find random files in a repository for performance testing
380522///
381523/// Returns:
@@ -385,6 +527,21 @@ pub struct RandomFiles {
385527/// This helper uses filesystem operations directly instead of git commands
386528/// for much faster performance on large repositories.
387529pub fn find_random_files ( test_repo : & TestRepo ) -> Result < RandomFiles , String > {
530+ find_random_files_with_options ( test_repo, FindRandomFilesOptions :: default ( ) )
531+ }
532+
533+ /// Find random files in a repository with custom options
534+ ///
535+ /// Returns:
536+ /// - `random_file_count` random files from the repository
537+ /// - `large_file_count` random large files (by byte size, as a proxy for line count)
538+ ///
539+ /// This helper uses filesystem operations directly instead of git commands
540+ /// for much faster performance on large repositories.
541+ pub fn find_random_files_with_options (
542+ test_repo : & TestRepo ,
543+ options : FindRandomFilesOptions ,
544+ ) -> Result < RandomFiles , String > {
388545 use std:: fs;
389546
390547 let repo_path = test_repo. path ( ) ;
@@ -439,18 +596,22 @@ pub fn find_random_files(test_repo: &TestRepo) -> Result<RandomFiles, String> {
439596 }
440597 }
441598
442- // Sort by size descending and take top 2
599+ // Sort by size descending and take top N
443600 file_sizes. sort_by ( |a, b| b. 1 . cmp ( & a. 1 ) ) ;
444- let large_files: Vec < String > = file_sizes. into_iter ( ) . take ( 2 ) . map ( |( p, _) | p) . collect ( ) ;
601+ let large_files: Vec < String > = file_sizes
602+ . into_iter ( )
603+ . take ( options. large_file_count )
604+ . map ( |( p, _) | p)
605+ . collect ( ) ;
445606
446- // Select 10 random files, excluding large files
607+ // Select N random files, excluding large files
447608 let candidates: Vec < & String > = all_files
448609 . iter ( )
449610 . filter ( |f| !large_files. contains ( f) )
450611 . collect ( ) ;
451612
452613 let random_files: Vec < String > = candidates
453- . choose_multiple ( & mut rng, 10 . min ( candidates. len ( ) ) )
614+ . choose_multiple ( & mut rng, options . random_file_count . min ( candidates. len ( ) ) )
454615 . map ( |s| ( * s) . clone ( ) )
455616 . collect ( ) ;
456617
0 commit comments