1+ #! /bin/bash
2+
3+ # -----------------------------------------------------------------------------
4+ # Script Name: git-repo-commit-analyzer.sh
5+ # Description: This script analyzes the size of all commits in a Git repository.
6+ # It generates detailed logs, CSV reports, and exception logs for
7+ # commits that meet or exceed a specified size threshold.
8+ #
9+ # Usage: ./git-repo-commit-analyzer.sh <size-threshold-in-bytes>
10+ #
11+ # Features:
12+ # - Analyzes all commits in the current Git repository.
13+ # - Calculates the total size of each commit and the number of files it modifies.
14+ # - Generates the following output files:
15+ # 1. A log file with detailed analysis of all commits.
16+ # 2. A CSV file summarizing commit hash, size, and file count.
17+ # 3. An exceptions log file for commits exceeding the size threshold,
18+ # including detailed file sizes for each commit.
19+ # - Identifies and logs the largest commit in the repository.
20+ #
21+ # Requirements:
22+ # - Must be run from within a valid Git repository.
23+ # - Requires a size threshold (in bytes) to be passed as an argument.
24+ #
25+ # Output Files:
26+ # - <repo-name>-analyzer-<timestamp>.log
27+ # - <repo-name>-commits-size-<timestamp>.csv
28+ # - <repo-name>-commit-size-exceptions-<timestamp>.log
29+ #
30+ # Example:
31+ # ./git-repo-commit-analyzer.sh 100000
32+ # This will analyze all commits in the repository and log details for commits
33+ # with a total size of 100,000 bytes or more.
34+ #
35+ # Author: Mickey Gousset (@mickeygousset)
36+ # Date: 2025-04-05
37+ # -----------------------------------------------------------------------------
38+
39+ set -e
40+
41+ # Verify this is a valid git repository
42+ if ! git rev-parse --is-inside-work-tree & > /dev/null; then
43+ echo " Error: Not in a git repository"
44+ exit 1
45+ fi
46+
47+ # Get the repository name
48+ REPO_NAME=$( basename " $( git rev-parse --show-toplevel) " )
49+
50+ # Check if a size threshold is provided
51+ if [ $# -ne 1 ]; then
52+ echo " Usage: $0 <size-threshold-in-bytes>"
53+ exit 1
54+ fi
55+
56+ SIZE_THRESHOLD=$1
57+
58+ # Generate timestamp for log and CSV filenames
59+ TIMESTAMP=$( date +" %Y%m%d-%H%M%S" )
60+ LOG_FILE=" ${REPO_NAME} -analyzer-$TIMESTAMP .log"
61+ CSV_FILE=" ${REPO_NAME} -commits-size-$TIMESTAMP .csv"
62+ EXCEPTIONS_FILE=" ${REPO_NAME} -commit-size-exceptions-$TIMESTAMP .log"
63+
64+ # Initialize variables to track the largest commit
65+ LARGEST_COMMIT=" "
66+ LARGEST_SIZE=0
67+
68+ # Create the CSV file and add the header
69+ echo " Commit Hash,Commit Size (bytes),Number of Files" > " $CSV_FILE "
70+
71+ # Create the exceptions file
72+ echo " Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes) in repository '$REPO_NAME ':" > " $EXCEPTIONS_FILE "
73+
74+ # Function to log output to both the screen and the log file
75+ log () {
76+ echo " $1 " | tee -a " $LOG_FILE "
77+ }
78+
79+ # Function to log exceptions to the exceptions file
80+ log_exception () {
81+ echo " $1 " | tee -a " $EXCEPTIONS_FILE "
82+ }
83+
84+ # Array to store commits that meet the size threshold
85+ declare -a LARGE_COMMITS
86+
87+ # Loop through all commits in the repository
88+ for COMMIT in $( git rev-list --all) ; do
89+ log " Analyzing commit: $COMMIT in repository '$REPO_NAME '"
90+ log " ------------------------"
91+
92+ # Get the parent commit
93+ PARENT=$( git rev-parse " $COMMIT ^" 2> /dev/null || echo " " )
94+
95+ # If there's no parent (first commit), compare with empty tree
96+ if [ -z " $PARENT " ]; then
97+ PARENT=$( git hash-object -t tree /dev/null)
98+ log " This is the initial commit. Comparing with empty tree."
99+ fi
100+
101+ # Get the list of files changed in this commit
102+ FILES=$( git diff-tree --no-commit-id --name-only -r " $COMMIT " 2> /dev/null || echo " " )
103+
104+ if [ -z " $FILES " ]; then
105+ log " No files changed in this commit."
106+ continue
107+ fi
108+
109+ # Create a temporary file to store file sizes for sorting
110+ TEMP_FILE=$( mktemp)
111+
112+ # Process each file
113+ while IFS= read -r file; do
114+ # Get the file blob from the commit
115+ BLOB=$( git ls-tree -r " $COMMIT " -- " $file " 2> /dev/null | awk ' {print $3}' )
116+
117+ if [ -n " $BLOB " ]; then
118+ # Get the size of the blob
119+ SIZE=$( git cat-file -s " $BLOB " 2> /dev/null || echo " 0" )
120+
121+ if [ " $SIZE " -gt 0 ]; then
122+ # Add to temp file with size and filename
123+ echo " $SIZE $file " >> " $TEMP_FILE "
124+ fi
125+ fi
126+ done <<< " $FILES"
127+
128+ # Calculate total size and count
129+ TOTAL_SIZE=0
130+ FILE_COUNT=0
131+
132+ if [ -s " $TEMP_FILE " ]; then
133+ # Sort by size (numerically, descending) and calculate totals
134+ while read -r SIZE file; do
135+ TOTAL_SIZE=$(( TOTAL_SIZE + SIZE))
136+ FILE_COUNT=$(( FILE_COUNT + 1 ))
137+ done < <( sort -nr " $TEMP_FILE " )
138+ fi
139+
140+ # Clean up temp file
141+ rm " $TEMP_FILE "
142+
143+ # Log total for this commit
144+ log " Total files: $FILE_COUNT "
145+ log " Total size: $TOTAL_SIZE bytes"
146+ log " ------------------------"
147+
148+ # Append commit details to the CSV file
149+ echo " $COMMIT ,$TOTAL_SIZE ,$FILE_COUNT " >> " $CSV_FILE "
150+
151+ # Check if this is the largest commit
152+ if [ " $TOTAL_SIZE " -gt " $LARGEST_SIZE " ]; then
153+ LARGEST_SIZE=$TOTAL_SIZE
154+ LARGEST_COMMIT=$COMMIT
155+ fi
156+
157+ # Check if the commit meets the size threshold
158+ if [ " $TOTAL_SIZE " -ge " $SIZE_THRESHOLD " ]; then
159+ LARGE_COMMITS+=(" $COMMIT ($TOTAL_SIZE bytes)" )
160+
161+ # Log details to the exceptions file
162+ log_exception " Commit: $COMMIT "
163+ log_exception " Total Size: $TOTAL_SIZE bytes"
164+ log_exception " Files:"
165+
166+ # Log each file and its size
167+ while IFS= read -r file; do
168+ BLOB=$( git ls-tree -r " $COMMIT " -- " $file " 2> /dev/null | awk ' {print $3}' )
169+ if [ -n " $BLOB " ]; then
170+ SIZE=$( git cat-file -s " $BLOB " 2> /dev/null || echo " 0" )
171+ log_exception " $file : $SIZE bytes"
172+ fi
173+ done <<< " $FILES"
174+
175+ log_exception " ------------------------"
176+ fi
177+ done
178+
179+ # Output the largest commit
180+ log " Largest commit: $LARGEST_COMMIT "
181+ log " Largest size: $LARGEST_SIZE bytes"
182+
183+ # Output commits that meet the size threshold
184+ if [ ${# LARGE_COMMITS[@]} -gt 0 ]; then
185+ log " Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes):"
186+ for COMMIT_INFO in " ${LARGE_COMMITS[@]} " ; do
187+ log " $COMMIT_INFO "
188+ done
189+ else
190+ log " No commits meet or exceed the size threshold ($SIZE_THRESHOLD bytes)."
191+ fi
192+
193+ log " Log file created: $LOG_FILE "
194+ log " CSV file created: $CSV_FILE "
195+ log " Exceptions file created: $EXCEPTIONS_FILE "
0 commit comments