diff --git a/.config/nextest.toml b/.config/nextest.toml index bc5838d..bdc6d13 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,7 +3,7 @@ nextest-version = "0.9.77" [profile.default] # A profile to run most tests, except tests that run longer than 10 seconds -default-filter = "not test(#*rate_limit_secondary) - test(#git::test::with_*)" +default-filter = "not test(#*rate_limit_secondary) - test(#git::test::with_*) - test(#repo_get_sha)" # This will flag any test that runs longer than 10 seconds. Useful when writing new tests. slow-timeout = "10s" diff --git a/cpp-linter/src/cli/mod.rs b/cpp-linter/src/cli/mod.rs index 9b815ff..70efebb 100644 --- a/cpp-linter/src/cli/mod.rs +++ b/cpp-linter/src/cli/mod.rs @@ -195,6 +195,27 @@ pub struct SourceOptions { verbatim_doc_comment )] pub ignore: Vec, + + /// The git reference to use as the base for diffing changed files. + /// + /// This can be any valid git ref, such as a branch name, tag name, or commit SHA. + /// If it is an integer, then it is treated as the number of parent commits from HEAD. + /// + /// This option only applies to non-CI contexts (eg. local CLI use). + #[arg( + short = 'b', + long, + value_name = "REF", + help_heading = "Source options", + verbatim_doc_comment + )] + pub diff_base: Option, + + /// Assert this switch to ignore any staged changes when + /// generating a diff of changed files. + /// Useful when used with [`--diff-base`](#-b-diff-base). + #[arg(default_value_t = false, long, help_heading = "Source options")] + pub ignore_index: bool, } #[derive(Debug, Clone, Args)] diff --git a/cpp-linter/src/git.rs b/cpp-linter/src/git.rs index 3ffb5d7..71cbea2 100644 --- a/cpp-linter/src/git.rs +++ b/cpp-linter/src/git.rs @@ -8,7 +8,7 @@ //! (str or bytes) only happens in CI or when libgit2 cannot be used to initialize a //! repository. -use std::{ops::RangeInclusive, path::PathBuf}; +use std::{fmt::Display, ops::RangeInclusive, path::PathBuf}; use anyhow::{Context, Result}; // non-std crates @@ -33,9 +33,26 @@ pub fn open_repo(path: &str) -> Result { /// /// The optionally specified `depth` can be used to traverse the tree a number of times /// since the current `"HEAD"`. -fn get_sha(repo: &Repository, depth: Option) -> Result, Error> { +fn get_sha<'d, T: Display>( + repo: &'d Repository, + depth: &Option, +) -> Result, Error> { match depth { - Some(int) => repo.revparse_single(format!("HEAD~{}", int).as_str()), + Some(base) => { + let base = base.to_string(); + // First treat base as an explicit refs/SHAs. If that fails, then + // fall back to `HEAD~` if `base` is purely numeric. + match repo.revparse_single(base.as_str()) { + Ok(obj) => Ok(obj), + Err(err) => { + if base.chars().all(|c| c.is_ascii_digit()) { + repo.revparse_single(format!("HEAD~{}", base).as_str()) + } else { + Err(err) + } + } + } + } None => repo.revparse_single("HEAD"), } } @@ -43,40 +60,76 @@ fn get_sha(repo: &Repository, depth: Option) -> Result, Er /// Fetch the [`git2::Diff`] about a given [`git2::Repository`]. /// /// This is actually not used in CI for file permissions and ownership reasons. -/// Rather this is only (supposed to be) used when executed on a local developer +/// Rather, this is only (supposed to be) used when executed on a local developer /// machine. /// -/// If there are files staged for a commit, then the resulting [`Diff`] will describe -/// the staged changes. However, if there are no staged changes, then the last commit's -/// [`Diff`] is returned. -pub fn get_diff(repo: &'_ Repository) -> Result> { - let head = get_sha(repo, None).unwrap().peel_to_tree().unwrap(); - let mut has_staged_files = false; - for entry in repo.statuses(None).unwrap().iter() { - if entry.status().bits() - & (git2::Status::INDEX_NEW.bits() - | git2::Status::INDEX_MODIFIED.bits() - | git2::Status::INDEX_RENAMED.bits()) - > 0 - { - has_staged_files = true; - break; - } - } +/// ## Using `diff_base` and `ignore_index` +/// +/// The `diff_base` is a commit or ref to use as the base of the diff. +/// Use `ignore_index` to exclude any staged changes in the local index. +/// +/// | `diff_base` value | Git index state | Scope of diff | +/// |-------------------|-----------------|---------------| +/// | `None` | No staged changes | `HEAD~1..HEAD` | +/// | `None` | Has staged changes | `HEAD..index` | +/// | `Some(2)` or `Some("HEAD~2")` | No staged changes | `HEAD~2..HEAD` | +/// | `Some(2)` or `Some("HEAD~2")` | Has staged changes | `HEAD~2..index` | +pub fn get_diff<'d, T: Display>( + repo: &'d Repository, + diff_base: &Option, + ignore_index: bool, +) -> Result> { + let use_staged_files = if ignore_index { + false + } else { + // check if there are staged file changes + repo.statuses(None) + .with_context(|| "Could not get repo statuses")? + .iter() + .any(|entry| { + entry.status().bits() + & (git2::Status::INDEX_NEW.bits() + | git2::Status::INDEX_MODIFIED.bits() + | git2::Status::INDEX_RENAMED.bits()) + > 0 + }) + }; + let base = if diff_base.is_some() { + // diff base is specified (regardless of staged changes) + get_sha(repo, diff_base) + } else if !use_staged_files { + // diff base is unspecified, when the repo has + // no staged changes (and they are not ignored), + // then focus on just the last commit + get_sha(repo, &Some(1)) + } else { + // diff base is unspecified and there are staged changes, so + // let base be set to HEAD. + get_sha(repo, &None::) + }? + .peel_to_tree()?; // RARE BUG when `head` is the first commit in the repo! Affects local-only runs. - // > panicked at cpp-linter\src\git.rs:73:43: - // > called `Result::unwrap()` on an `Err` value: // > Error { code: -3, class: 3, message: "parent 0 does not exist" } - if has_staged_files { - // get diff for staged files only - repo.diff_tree_to_index(Some(&head), None, None) - .with_context(|| "Could not get diff for current changes in local repo index") + if use_staged_files { + // get diff including staged files + repo.diff_tree_to_index(Some(&base), None, None) + .with_context(|| { + format!( + "Could not get diff for {}..index", + &base.id().to_string()[..7] + ) + }) } else { - // get diff for last commit only - let base = get_sha(repo, Some(1)).unwrap().peel_to_tree().unwrap(); + // get diff for range of commits between base..HEAD + let head = get_sha(repo, &None::)?.peel_to_tree()?; repo.diff_tree_to_tree(Some(&base), Some(&head), None) - .with_context(|| "Could not get diff for last commit") + .with_context(|| { + format!( + "Could not get diff for {}..HEAD", + &base.id().to_string()[..7] + ) + }) } } @@ -105,8 +158,10 @@ fn parse_patch(patch: &Patch) -> (Vec, Vec>) { /// Parses a given [`git2::Diff`] and returns a list of [`FileObj`]s. /// -/// The specified list of `extensions`, `ignored` and `not_ignored` files are used as -/// filters to expedite the process and only focus on the data cpp_linter can use. +/// The `lines_changed_only` parameter is used to expedite the process and only +/// focus on files that have relevant changes. The `file_filter` parameter applies +/// a filter to only include source files (or ignored files) based on the +/// extensions and ignore patterns specified. pub fn parse_diff( diff: &git2::Diff, file_filter: &FileFilter, @@ -393,19 +448,30 @@ mod test { fs::read, }; - use git2::build::CheckoutBuilder; - use git2::{ApplyLocation, Diff, IndexAddOption, Repository}; + use git2::{ApplyLocation, Diff, IndexAddOption, Repository, build::CheckoutBuilder}; + use tempfile::{TempDir, tempdir}; + + use super::get_sha; + use crate::{ + cli::LinesChangedOnly, + common_fs::FileFilter, + rest_api::{RestApiClient, github::GithubApiClient}, + }; + + const TEST_REPO_URL: &str = "https://github.com/cpp-linter/cpp-linter"; // used to setup a testing stage - fn clone_repo(url: &str, sha: &str, path: &str, patch_path: Option<&str>) { - let repo = Repository::clone(url, path).unwrap(); - let commit = repo.revparse_single(sha).unwrap(); - repo.checkout_tree( - &commit, - Some(CheckoutBuilder::new().force().recreate_missing(true)), - ) - .unwrap(); - repo.set_head_detached(commit.id()).unwrap(); + fn clone_repo(sha: Option<&str>, path: &str, patch_path: Option<&str>) -> Repository { + let repo = Repository::clone(TEST_REPO_URL, path).unwrap(); + if let Some(sha) = sha { + let commit = repo.revparse_single(sha).unwrap(); + repo.checkout_tree( + &commit, + Some(CheckoutBuilder::new().force().recreate_missing(true)), + ) + .unwrap(); + repo.set_head_detached(commit.id()).unwrap(); + } if let Some(patch) = patch_path { let diff = Diff::from_buffer(&read(patch).unwrap()).unwrap(); repo.apply(&diff, ApplyLocation::Both, None).unwrap(); @@ -415,16 +481,9 @@ mod test { .unwrap(); index.write().unwrap(); } + repo } - use tempfile::{TempDir, tempdir}; - - use crate::{ - cli::LinesChangedOnly, - common_fs::FileFilter, - rest_api::{RestApiClient, github::GithubApiClient}, - }; - fn get_temp_dir() -> TempDir { let tmp = tempdir().unwrap(); println!("Using temp folder at {:?}", tmp.path()); @@ -436,11 +495,10 @@ mod test { extensions: &[String], tmp: &TempDir, patch_path: Option<&str>, + ignore_staged: bool, ) -> Vec { - let url = "https://github.com/cpp-linter/cpp-linter"; clone_repo( - url, - sha, + Some(sha), tmp.path().as_os_str().to_str().unwrap(), patch_path, ); @@ -453,7 +511,12 @@ mod test { } rest_api_client .unwrap() - .get_list_of_changed_files(&file_filter, &LinesChangedOnly::Off) + .get_list_of_changed_files( + &file_filter, + &LinesChangedOnly::Off, + if ignore_staged { &Some(0) } else { &None:: }, + ignore_staged, + ) .await .unwrap() } @@ -465,7 +528,7 @@ mod test { let cur_dir = current_dir().unwrap(); let tmp = get_temp_dir(); let extensions = vec!["cpp".to_string(), "hpp".to_string()]; - let files = checkout_cpp_linter_py_repo(sha, &extensions, &tmp, None).await; + let files = checkout_cpp_linter_py_repo(sha, &extensions, &tmp, None, false).await; println!("files = {:?}", files); assert!(files.is_empty()); set_current_dir(cur_dir).unwrap(); // prep to delete temp_folder @@ -479,7 +542,7 @@ mod test { let cur_dir = current_dir().unwrap(); let tmp = get_temp_dir(); let extensions = vec!["cpp".to_string(), "hpp".to_string()]; - let files = checkout_cpp_linter_py_repo(sha, &extensions.clone(), &tmp, None).await; + let files = checkout_cpp_linter_py_repo(sha, &extensions.clone(), &tmp, None, false).await; println!("files = {:?}", files); assert!(files.len() >= 2); for file in files { @@ -503,6 +566,7 @@ mod test { &extensions.clone(), &tmp, Some("tests/git_status_test_assets/cpp-linter/cpp-linter/test_git_lib.patch"), + false, ) .await; println!("files = {:?}", files); @@ -515,4 +579,39 @@ mod test { set_current_dir(cur_dir).unwrap(); // prep to delete temp_folder drop(tmp); // delete temp_folder } + + #[tokio::test] + async fn with_ignored_staged_changes() { + // commit with no modified C/C++ sources + let sha = "0c236809891000b16952576dc34de082d7a40bf3"; + let cur_dir = current_dir().unwrap(); + let tmp = get_temp_dir(); + let extensions = vec!["cpp".to_string(), "hpp".to_string()]; + let files = checkout_cpp_linter_py_repo( + sha, + &extensions.clone(), + &tmp, + Some("tests/git_status_test_assets/cpp-linter/cpp-linter/test_git_lib.patch"), + true, + ) + .await; + println!("files = {:?}", files); + assert!(files.is_empty()); + set_current_dir(cur_dir).unwrap(); // prep to delete temp_folder + drop(tmp); // delete temp_folder + } + + #[test] + fn repo_get_sha() { + let tmp_dir = get_temp_dir(); + let repo = clone_repo(None, tmp_dir.path().to_str().unwrap(), None); + for (ours, theirs) in [(None::, "HEAD"), (Some(2), "HEAD~2")] { + let our_obj = get_sha(&repo, &ours).unwrap(); + let their_obj = get_sha(&repo, &Some(theirs)).unwrap(); + assert_eq!(our_obj.id(), their_obj.id()); + } + // test an invalid ref for coverage measurement + assert!(get_sha(&repo, &Some("1.0")).is_err()); + drop(tmp_dir); // delete temp_folder + } } diff --git a/cpp-linter/src/rest_api/github/mod.rs b/cpp-linter/src/rest_api/github/mod.rs index 3e7a4fa..0124b94 100644 --- a/cpp-linter/src/rest_api/github/mod.rs +++ b/cpp-linter/src/rest_api/github/mod.rs @@ -4,6 +4,7 @@ //! In other (private) submodules we implement behavior specific to Github's REST API. use std::env; +use std::fmt::Display; use std::fs::OpenOptions; use std::io::Write; use std::sync::{Arc, Mutex}; @@ -119,10 +120,12 @@ impl RestApiClient for GithubApiClient { Ok(headers) } - async fn get_list_of_changed_files( + async fn get_list_of_changed_files( &self, file_filter: &FileFilter, lines_changed_only: &LinesChangedOnly, + diff_base: &Option, + ignore_index: bool, ) -> Result> { if env::var("CI").is_ok_and(|val| val.as_str() == "true") && let Some(repo) = self.repo.as_ref() @@ -171,7 +174,11 @@ impl RestApiClient for GithubApiClient { let repo = open_repo(".").with_context( || "Please ensure the repository is checked out before running cpp-linter.", )?; - let list = parse_diff(&get_diff(&repo)?, file_filter, lines_changed_only); + let list = parse_diff( + &get_diff(&repo, diff_base, ignore_index)?, + file_filter, + lines_changed_only, + ); Ok(list) } } @@ -430,7 +437,12 @@ mod test { env::set_current_dir(tmp_dir.path()).unwrap(); let rest_client = GithubApiClient::new().unwrap(); let files = rest_client - .get_list_of_changed_files(&FileFilter::new(&[], vec![]), &LinesChangedOnly::Off) + .get_list_of_changed_files( + &FileFilter::new(&[], vec![]), + &LinesChangedOnly::Off, + &None::, + false, + ) .await; assert!(files.is_err()) } diff --git a/cpp-linter/src/rest_api/mod.rs b/cpp-linter/src/rest_api/mod.rs index 4eb5b4e..7d620d2 100644 --- a/cpp-linter/src/rest_api/mod.rs +++ b/cpp-linter/src/rest_api/mod.rs @@ -3,7 +3,7 @@ //! //! Currently, only Github is supported. -use std::fmt::Debug; +use std::fmt::{Debug, Display}; use std::future::Future; use std::path::PathBuf; use std::sync::{Arc, Mutex}; @@ -103,10 +103,16 @@ pub trait RestApiClient { /// /// The context of the file changes are subject to the type of event in which /// cpp_linter package is used. - fn get_list_of_changed_files( + /// + /// See [`get_diff()`](crate::git::get_diff()) for explanation of + /// `diff_base` and `ignore_index` parameters, which only applies to a + /// local (non-CI) environment. + fn get_list_of_changed_files( &self, file_filter: &FileFilter, lines_changed_only: &LinesChangedOnly, + diff_base: &Option, + ignore_index: bool, ) -> impl Future>>; /// Makes a comment in MarkDown syntax based on the concerns in `format_advice` and @@ -383,6 +389,7 @@ fn make_tidy_comment( /// from `try_next_page()` and `send_api_request()` functions. #[cfg(test)] mod test { + use std::fmt::Display; use std::sync::{Arc, Mutex}; use anyhow::{Result, anyhow}; @@ -422,10 +429,12 @@ mod test { Err(anyhow!("Not implemented")) } - async fn get_list_of_changed_files( + async fn get_list_of_changed_files( &self, _file_filter: &FileFilter, _lines_changed_only: &LinesChangedOnly, + _diff_base: &Option, + _ignore_index: bool, ) -> Result> { Err(anyhow!("Not implemented")) } @@ -456,7 +465,12 @@ mod test { assert_eq!(dummy.set_exit_code(1, None, None), 0); assert!( dummy - .get_list_of_changed_files(&FileFilter::new(&[], vec![]), &LinesChangedOnly::Off) + .get_list_of_changed_files( + &FileFilter::new(&[], vec![]), + &LinesChangedOnly::Off, + &None::, + false + ) .await .is_err() ); diff --git a/cpp-linter/src/run.rs b/cpp-linter/src/run.rs index 1fdb747..f21276c 100644 --- a/cpp-linter/src/run.rs +++ b/cpp-linter/src/run.rs @@ -104,14 +104,24 @@ pub async fn run_main(args: Vec) -> Result<()> { { // parse_diff(github_rest_api_payload) rest_api_client - .get_list_of_changed_files(&file_filter, &cli.source_options.lines_changed_only) + .get_list_of_changed_files( + &file_filter, + &cli.source_options.lines_changed_only, + &cli.source_options.diff_base, + cli.source_options.ignore_index, + ) .await? } else { // walk the folder and look for files with specified extensions according to ignore values. let mut all_files = file_filter.list_source_files(".")?; if is_pr && (cli.feedback_options.tidy_review || cli.feedback_options.format_review) { let changed_files = rest_api_client - .get_list_of_changed_files(&file_filter, &LinesChangedOnly::Off) + .get_list_of_changed_files( + &file_filter, + &LinesChangedOnly::Off, + &cli.source_options.diff_base, + cli.source_options.ignore_index, + ) .await?; for changed_file in changed_files { for file in &mut all_files { diff --git a/cpp-linter/tests/paginated_changed_files.rs b/cpp-linter/tests/paginated_changed_files.rs index a0e15b9..1501548 100644 --- a/cpp-linter/tests/paginated_changed_files.rs +++ b/cpp-linter/tests/paginated_changed_files.rs @@ -144,7 +144,7 @@ async fn get_paginated_changes(lib_root: &Path, test_params: &TestParams) { let file_filter = FileFilter::new(&[], vec!["cpp".to_string(), "hpp".to_string()]); let files = client - .get_list_of_changed_files(&file_filter, &LinesChangedOnly::Off) + .get_list_of_changed_files(&file_filter, &LinesChangedOnly::Off, &None::, false) .await; match files { Err(e) => { diff --git a/docs/cli.yml b/docs/cli.yml index 4932ef7..7f9bb3b 100644 --- a/docs/cli.yml +++ b/docs/cli.yml @@ -49,6 +49,10 @@ inputs: required-permission: 'pull-requests: write #pull-request-reviews' jobs: minimum-version: '1.8.1' + diff-base: + minimum-version: '1.12.0' + ignore-index: + minimum-version: '1.12.0' outputs: checks-failed: minimum-version: '1.4.6' diff --git a/docs/src/lib.rs b/docs/src/lib.rs index 304c22d..2c8d4be 100644 --- a/docs/src/lib.rs +++ b/docs/src/lib.rs @@ -84,17 +84,11 @@ fn generate_cli_doc(metadata: HashMap>>) -> Py "Failed to get long name of argument with id {}", arg_id.as_str() )))?; - out.push_str( - format!( - "\n### `-{}, --{}`\n\n", - arg.get_short().ok_or(PyValueError::new_err(format!( - "Failed to get short name for argument with id {}", - arg_id.as_str() - )))?, - long_name - ) - .as_str(), - ); + let short_name = arg + .get_short() + .map(|c| format!("-{}, ", c)) + .unwrap_or_default(); + out.push_str(format!("\n### `{}--{}`\n\n", short_name, long_name).as_str()); if let Some(map) = metadata.get(long_name) { if let Some(val) = map.get("minimum-version") { out.push_str(format!("\n", val).as_str());