diff --git a/.github/workflows/lint-with-vale.yml b/.github/workflows/lint-with-vale.yml new file mode 100644 index 000000000..1fcacca01 --- /dev/null +++ b/.github/workflows/lint-with-vale.yml @@ -0,0 +1,132 @@ +--- +name: Lint with Vale on pull requests +on: + pull_request: + paths: + - "**.adoc" + - "**.md" + - content/learn/** + - content/patterns/** + - content/contribute/** + - modules/** +jobs: + vale-lint: + name: Linting with Vale + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 3 + - name: Install dependencies + run: > + sudo DEBIAN_FRONTEND=noninteractive apt-get update + + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends --no-upgrade asciidoctor jq + - name: Install Vale + run: > + wget -O vale.tar.gz + https://github.com/errata-ai/vale/releases/download/v3.12.0/vale_3.12.0_Linux_64-bit.tar.gz + + tar -xzf vale.tar.gz + + sudo mv vale /usr/local/bin/ + + vale --version + - name: Run Vale linting script + env: + GITHUB_REPOSITORY: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: > + set +e + + ./.github/workflows/scripts/lintwithvale.sh \ + "${{ github.event.pull_request.base.sha }}" \ + "${{ github.event.pull_request.head.sha }}" || { + echo "⚠️ Vale linting script encountered an error, but workflow will continue." + } + + exit 0 + - name: Prepare and post/update PR comment + if: ${{ github.event.pull_request.head.repo.full_name == github.repository }} + env: + GITHUB_TOKEN: ${{ secrets.VALE_GITHUB_TOKEN }} + REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} + BOT_USERNAME: ocpdocs-previewbot + run: | + set -e + echo "Reading summary JSON..." + if [ ! -f vale_summary.json ]; then + echo "No vale_summary.json produced; skipping comment." + exit 0 + fi + cat vale_summary.json + HAS_ERRORS=$(jq -r '.has_errors' vale_summary.json) + ERROR_COUNT=$(jq -r '.error_count' vale_summary.json) + + # Find existing comment id (authored by bot and heading match) + EXISTING_ID=$(curl -s -H "Authorization: token $GITHUB_TOKEN" -H "Accept: application/vnd.github.v3+json" \ + "https://api.github.com/repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" | \ + jq -r --arg bot "$BOT_USERNAME" '[.[] | select(.user.login==$bot) | select(.body | startswith("### 📝 Vale Linting Results"))][0].id') + echo "Existing comment id: ${EXISTING_ID:-none}" + + if [ "$HAS_ERRORS" = "true" ]; then + if [ -n "$EXISTING_ID" ] && [ "$EXISTING_ID" != "null" ]; then + COMMENT_FILE=vale_comment_errors_updated.md + else + COMMENT_FILE=vale_comment_errors_new.md + fi + if [ ! -f "$COMMENT_FILE" ]; then + echo "Expected $COMMENT_FILE not found; skipping."; exit 0 + fi + BODY=$(jq -Rs . < "$COMMENT_FILE") + if [ -n "$EXISTING_ID" ] && [ "$EXISTING_ID" != "null" ]; then + echo "Updating existing Vale comment with $ERROR_COUNT errors" + curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + -d "{\"body\": $BODY}" \ + "https://api.github.com/repos/$REPO/issues/comments/$EXISTING_ID" > /dev/null || true + else + echo "Creating new Vale comment with $ERROR_COUNT errors" + curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + -d "{\"body\": $BODY}" \ + "https://api.github.com/repos/$REPO/issues/$PR_NUMBER/comments" > /dev/null || true + fi + else + # No errors: only update an existing comment if present + if [ -n "$EXISTING_ID" ] && [ "$EXISTING_ID" != "null" ]; then + if [ -f vale_comment_clean_updated.md ]; then + BODY=$(jq -Rs . < vale_comment_clean_updated.md) + echo "Updating existing Vale comment to clean state" + curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + -d "{\"body\": $BODY}" \ + "https://api.github.com/repos/$REPO/issues/comments/$EXISTING_ID" > /dev/null || true + else + echo "Clean updated comment file missing; skipping update." + fi + else + echo "No existing comment and no errors => skipping comment creation (per requirements)." + fi + fi + - name: Workflow summary + if: always() + run: > + echo "✅ Vale linting workflow completed" + + echo "📋 This workflow is configured to always pass, regardless of linting results" + + echo "🔍 Check the previous step logs for Vale linting details" + + echo "💬 For PR comment see earlier steps." diff --git a/.github/workflows/scripts/lintwithvale.sh b/.github/workflows/scripts/lintwithvale.sh new file mode 100755 index 000000000..95cd142a0 --- /dev/null +++ b/.github/workflows/scripts/lintwithvale.sh @@ -0,0 +1,307 @@ +#!/bin/bash + +# Vale Linting Script for GitHub Actions +# This script runs Vale on changed files in a PR and posts results as comments + +set -e # Exit on error for better debugging + +# Function to log messages with timestamps +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" +} + +# Function to handle errors gracefully +handle_error() { + local exit_code=$? + log "ERROR: $1 (exit code: $exit_code)" + # Don't exit - we want the workflow to always pass + return 0 +} + +# Trap errors to handle them gracefully +trap 'handle_error "Script encountered an error"' ERR + +log "Syncing vale rules..." +vale sync || log "WARNING: Vale sync failed, continuing with existing rules" + +log "Starting Vale linting process..." + +if [ -z "$GITHUB_REPOSITORY" ]; then + log "WARNING: GITHUB_REPOSITORY not set (continuing; affects only logging)" +fi +if [ -z "$PR_NUMBER" ]; then + log "WARNING: PR_NUMBER not set (continuing; affects only logging)" +fi + +# Get PR information +BASE_SHA="$1" +HEAD_SHA="$2" + +if [ -z "$BASE_SHA" ] || [ -z "$HEAD_SHA" ]; then + log "ERROR: BASE_SHA and HEAD_SHA must be provided as arguments" + log "Usage: $0 " + exit 0 +fi + +log "Processing PR #$PR_NUMBER in $GITHUB_REPOSITORY" +log "Comparing $BASE_SHA...$HEAD_SHA" + +# Step 1: Get changed files +log "Getting changed files..." +CHANGED_FILES=$(git diff --name-only --diff-filter=AM $BASE_SHA...$HEAD_SHA | grep -E '\.(adoc|md)$|^(content/learn/|content/patterns/|content/contribute/|modules/)' || true) + +if [ -z "$CHANGED_FILES" ]; then + log "No relevant files changed in this PR" + exit 0 +fi + +log "Changed files:" +echo "$CHANGED_FILES" + +# Convert to JSON array for easier processing +FILES_JSON=$(echo "$CHANGED_FILES" | jq -R -s -c 'split("\n") | map(select(length > 0))') +log "Files JSON: $FILES_JSON" + +# Step 2: Get changed lines for each file +log "Getting changed lines for each file..." +echo '{}' > changed_lines.json + +echo "$FILES_JSON" | jq -r '.[]' | while read -r file; do + if [ -f "$file" ]; then + log "Processing changed lines for: $file" + + # Get changed line ranges for this file (handles both single lines and ranges) + git diff -U0 $BASE_SHA...$HEAD_SHA -- "$file" | grep '^@@' | while read -r hunk; do + # Extract line info from hunk header like @@ -1,4 +1,6 @@ + NEW_LINES=$(echo "$hunk" | sed -n 's/.*+\([0-9]*\),*\([0-9]*\).*/\1 \2/p') + START_LINE=$(echo "$NEW_LINES" | cut -d' ' -f1) + LINE_COUNT=$(echo "$NEW_LINES" | cut -d' ' -f2) + + # If no line count specified, it's a single line + if [ -z "$LINE_COUNT" ] || [ "$LINE_COUNT" = "$START_LINE" ]; then + LINE_COUNT=1 + fi + + # Generate all line numbers in the range + for i in $(seq $START_LINE $((START_LINE + LINE_COUNT - 1))); do + echo "$i" + done + done > "lines_${file//\//_}.txt" + + # Convert line numbers to comma-separated string + if [ -s "lines_${file//\//_}.txt" ]; then + CHANGED_LINES=$(cat "lines_${file//\//_}.txt" | sort -n | uniq | tr '\n' ',' | sed 's/,$//') + if [ -n "$CHANGED_LINES" ]; then + # Update the JSON with this file's changed lines + jq --arg file "$file" --arg lines "$CHANGED_LINES" '. + {($file): $lines}' changed_lines.json > tmp.json && mv tmp.json changed_lines.json + log "Changed lines for $file: $CHANGED_LINES" + fi + fi + else + log "WARNING: File $file not found (may have been deleted)" + fi +done + +log "Final changed lines mapping:" +cat changed_lines.json + +# Step 3: Run Vale on changed files +log "Running Vale on changed files..." + +# Initialize error collection +echo "[]" > vale_errors.json +touch all_vale_errors.jsonl + +# Check if Vale config exists +if [ ! -f ".vale.ini" ] && [ ! -f "_vale.ini" ] && [ ! -f "vale.ini" ]; then + log "WARNING: No Vale configuration file found. Vale may not work properly." +fi + +# Run Vale on each changed file +FILES_PROCESSED=0 +echo "$FILES_JSON" | jq -r '.[]' | while read -r file; do + if [ -f "$file" ]; then + log "Running Vale on: $file" + FILES_PROCESSED=$((FILES_PROCESSED + 1)) + + # Create safe filename for output + SAFE_FILENAME=$(echo "$file" | sed 's/[^a-zA-Z0-9._-]/_/g') + + # Run Vale and capture JSON output with better error handling + if vale --output=JSON --no-exit --minAlertLevel=error "$file" > "vale_output_${SAFE_FILENAME}.json" 2>vale_stderr.log; then + log "Vale completed successfully for $file" + else + log "Vale encountered issues with $file, but continuing..." + cat vale_stderr.log || true + fi + + # Check if Vale found any errors and the output is valid JSON + if [ -s "vale_output_${SAFE_FILENAME}.json" ]; then + # Validate JSON before processing + if jq empty "vale_output_${SAFE_FILENAME}.json" 2>/dev/null; then + # Add file path to each error and append to collection + jq --arg filepath "$file" ' + if type == "object" then + to_entries | map(.value[] | . + {"File": $filepath}) + else + [] + end + ' "vale_output_${SAFE_FILENAME}.json" >> all_vale_errors.jsonl 2>/dev/null || true + else + log "WARNING: Invalid JSON output from Vale for $file" + log "Content: $(head -n 5 "vale_output_${SAFE_FILENAME}.json")" + fi + else + log "No errors found in $file" + fi + else + log "WARNING: File $file not found (may have been deleted)" + fi +done + +log "Processed $FILES_PROCESSED files" + +# Combine all errors into a single JSON array with error handling +if [ -s "all_vale_errors.jsonl" ]; then + if jq -s 'add // []' all_vale_errors.jsonl > vale_errors.json 2>/dev/null; then + log "Successfully combined Vale errors" + else + log "Error combining Vale errors, using empty array" + echo "[]" > vale_errors.json + fi +else + log "No Vale errors found" + echo "[]" > vale_errors.json +fi + +# Filter errors to only include those on changed lines +if [ -f "changed_lines.json" ] && [ -s "vale_errors.json" ]; then + log "Filtering errors to changed lines only..." + if jq -s ' + .[1] as $changed_lines | + .[0] | map( + select( + .File as $file | + .Line as $line | + ($changed_lines[$file] // "") | split(",") | map(tonumber) | any(. == $line) + ) + ) + ' vale_errors.json changed_lines.json > filtered_vale_errors.json 2>/dev/null; then + mv filtered_vale_errors.json vale_errors.json + log "Successfully filtered errors" + else + log "Error filtering errors, keeping all errors" + fi +fi + +# Count errors safely +ERROR_COUNT=$(jq 'length // 0' vale_errors.json 2>/dev/null || echo "0") +log "Found $ERROR_COUNT Vale errors in changed lines" + +# Debug: Show sample errors +if [ "$ERROR_COUNT" -gt 0 ]; then + log "Sample errors:" + jq -r '.[0:3] | .[] | "- Line \(.Line): \(.Message)"' vale_errors.json 2>/dev/null || true +fi + +# Step 4: Prepare comment artifacts (no direct API calls here) +log "Preparing comment body files (GitHub API handled in workflow)..." + +BASE_HEADING="### 📝 Vale Linting Results" +UPDATED_HEADING="### 📝 Vale Linting Results (Updated)" + +if [ "$ERROR_COUNT" -gt 0 ]; then + # New errors comment (initial) + { + echo "$BASE_HEADING"; echo + echo "Vale found **$ERROR_COUNT** issue(s) in the modified content of this PR."; echo + jq -r ' + group_by(.File) | + map( + "### 📄 \`" + .[0].File + "\`\n" + + (map( + "- **Line " + (.Line | tostring) + "**: " + .Message + + (if .Check then " `[" + .Check + "]`" else "" end) + + (if .Severity then " (*" + .Severity + "*)" else "" end) + ) | join("\n")) + ) | join("\n\n") + ' vale_errors.json 2>/dev/null || { + echo "Error formatting Vale results. Raw errors:" + jq -r '.[] | "- Line \(.Line): \(.Message)"' vale_errors.json 2>/dev/null || echo "Unable to format errors properly" + } + cat <<'EOT' + +--- + +💡 **Tips:** +- Fix these issues to improve the documentation quality. +- Some matches may be false positives — review each suggestion before applying changes. + +*This comment was automatically generated by Vale linting on the modified content.* +EOT + } > vale_comment_errors_new.md + + # Updated errors comment (heading includes Updated) + sed "1s|$BASE_HEADING|$UPDATED_HEADING|" vale_comment_errors_new.md > vale_comment_errors_updated.md || cp vale_comment_errors_new.md vale_comment_errors_updated.md +else + # Clean updated comment only (created only if an existing comment is present) + NO_ERROR_SECTION="" + echo "$FILES_JSON" | jq -r '.[]' | while read -r changed_file; do + [ -z "$changed_file" ] && continue + if jq -e --arg f "$changed_file" '.[] | select(.File==$f)' vale_errors.json >/dev/null 2>&1; then + continue + fi + BASENAME=$(basename "$changed_file") + NO_ERROR_SECTION+=$'\n''📄 '$BASENAME$'\n''No issues found in the modified lines. ✅'$'\n' + done + { + echo "$UPDATED_HEADING"; echo + if [ -n "$NO_ERROR_SECTION" ]; then + echo "All currently modified lines are clean."; echo + echo "$NO_ERROR_SECTION" | sed '/^$/d' + else + echo "No issues found (no changed lines or all filtered out)."; echo + fi + cat <<'EOT' + +--- +*This comment was automatically generated by Vale linting on the modified content.* +EOT + } > vale_comment_clean_updated.md +fi + +# Summary JSON for workflow consumption +log "Writing summary JSON (vale_summary.json)" +ALL_CHANGED_FILES=$(echo "$FILES_JSON" | jq -c '.') +FILES_WITH_ERRORS_JSON=$(jq -c 'group_by(.File) | map({file: .[0].File, errors: map({line: .Line, message: .Message, check: .Check, severity: .Severity})})' vale_errors.json 2>/dev/null || echo '[]') +FILES_CLEAN_JSON=$(jq -n --argjson changed "$FILES_JSON" --argjson errs "$(jq -c 'map(.File)' vale_errors.json 2>/dev/null || echo '[]')" ' + ($changed // []) - ([$errs[]] | unique)') +cat > vale_summary.json <