Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
321 changes: 235 additions & 86 deletions .github/workflows/docs-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@ on:
branches: [main, develop]
paths:
- 'docs/**'
- 'README.md'
- 'CHANGELOG.md'
- 'AGENTS.md'
- '.github/workflows/docs-ci.yml'
pull_request:
branches: [main]
paths:
- 'docs/**'
- 'README.md'
- 'CHANGELOG.md'
- 'AGENTS.md'

jobs:
validate-documentation:
Expand All @@ -20,132 +26,275 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up dependencies
- name: Validate internal links
id: links
run: |
sudo apt-get update
sudo apt-get install -y bc jq python3 python3-yaml
echo "## Internal Link Validation" > /tmp/report.md
echo "" >> /tmp/report.md

- name: Make scripts executable
run: chmod +x docs/scripts/*.sh
broken=0
checked=0

- name: Run link validation
id: links
run: |
echo "Running link validation..."
docs/scripts/validate-links.sh --json > /tmp/links.json || true
cat /tmp/links.json
echo "links_result=$(cat /tmp/links.json | jq -r '.success_rate')" >> $GITHUB_OUTPUT
while IFS= read -r file; do
# Extract markdown links to local files: [text](path) but not http/https/mailto/#
while IFS= read -r link; do
[ -z "$link" ] && continue
checked=$((checked + 1))

- name: Run frontmatter validation
id: frontmatter
run: |
echo "Running frontmatter validation..."
docs/scripts/validate-frontmatter.sh --json > /tmp/frontmatter.json || true
cat /tmp/frontmatter.json
echo "frontmatter_result=$(cat /tmp/frontmatter.json | jq -r '.success_rate')" >> $GITHUB_OUTPUT
# Strip anchor fragments
target="${link%%#*}"
[ -z "$target" ] && continue

# Resolve relative to the file's directory
dir="$(dirname "$file")"
resolved="$dir/$target"

if [ ! -e "$resolved" ]; then
echo "- \`$file\` -> \`$link\` (not found)" >> /tmp/report.md
broken=$((broken + 1))
fi
done < <(grep -oP '\[(?:[^\]]*)\]\(\K(?!https?://|mailto:|#)[^)]+' "$file" 2>/dev/null || true)
done < <(find docs -name '*.md' -type f)

valid=$((checked - broken))
if [ "$checked" -gt 0 ]; then
rate=$(( (valid * 100) / checked ))
else
rate=100
fi

echo "" >> /tmp/report.md
echo "**Result**: $valid/$checked links valid ($rate%)" >> /tmp/report.md
echo "" >> /tmp/report.md

echo "links_checked=$checked" >> $GITHUB_OUTPUT
echo "links_broken=$broken" >> $GITHUB_OUTPUT
echo "links_rate=$rate" >> $GITHUB_OUTPUT

if [ "$broken" -gt 0 ]; then
echo "::warning::Found $broken broken internal links out of $checked checked"
fi

- name: Run Mermaid diagram validation
- name: Validate Mermaid diagrams
id: mermaid
run: |
echo "Running Mermaid validation..."
docs/scripts/validate-mermaid.sh --json > /tmp/mermaid.json || true
cat /tmp/mermaid.json
echo "mermaid_result=$(cat /tmp/mermaid.json | jq -r '.success_rate')" >> $GITHUB_OUTPUT
echo "## Mermaid Diagram Validation" >> /tmp/report.md
echo "" >> /tmp/report.md

- name: Detect ASCII diagrams
id: ascii
run: |
echo "Detecting ASCII diagrams..."
docs/scripts/detect-ascii.sh --json > /tmp/ascii.json || true
cat /tmp/ascii.json
echo "ascii_count=$(cat /tmp/ascii.json | jq -r '.ascii_diagrams_found')" >> $GITHUB_OUTPUT
total=0
invalid=0
VALID_STARTS="^(graph|flowchart|sequenceDiagram|classDiagram|stateDiagram|erDiagram|gantt|pie|gitgraph|mindmap|timeline|journey|quadrantChart|sankey|xychart|block|packet|kanban|architecture|C4Context|C4Container|C4Component|C4Deployment|C4Dynamic|%%)"

while IFS= read -r file; do
# Extract mermaid blocks and check first non-empty line
in_block=false
first_line=""
while IFS= read -r line; do
if echo "$line" | grep -qP '^\s*```mermaid'; then
in_block=true
first_line=""
continue
fi
if [ "$in_block" = true ]; then
if echo "$line" | grep -qP '^\s*```\s*$'; then
in_block=false
total=$((total + 1))
if [ -z "$first_line" ]; then
echo "- \`$file\`: empty mermaid block" >> /tmp/report.md
invalid=$((invalid + 1))
elif ! echo "$first_line" | grep -qP "$VALID_STARTS"; then
echo "- \`$file\`: invalid start \`$first_line\`" >> /tmp/report.md
invalid=$((invalid + 1))
fi
elif [ -z "$first_line" ]; then
# Capture first non-empty line of block
trimmed="$(echo "$line" | sed 's/^[[:space:]]*//')"
[ -n "$trimmed" ] && first_line="$trimmed"
fi
fi
done < "$file"
done < <(find docs -name '*.md' -type f)

valid=$((total - invalid))
if [ "$total" -gt 0 ]; then
rate=$(( (valid * 100) / total ))
else
rate=100
fi

echo "**Result**: $valid/$total diagrams valid ($rate%)" >> /tmp/report.md
echo "" >> /tmp/report.md

echo "mermaid_total=$total" >> $GITHUB_OUTPUT
echo "mermaid_invalid=$invalid" >> $GITHUB_OUTPUT
echo "mermaid_rate=$rate" >> $GITHUB_OUTPUT

- name: Validate UK English spelling
id: spelling
if [ "$invalid" -gt 0 ]; then
echo "::warning::Found $invalid invalid Mermaid diagrams out of $total"
fi

- name: Check for stale references
id: stale
run: |
echo "Validating UK English spelling..."
docs/scripts/validate-spelling.sh --json > /tmp/spelling.json || true
cat /tmp/spelling.json
echo "spelling_errors=$(cat /tmp/spelling.json | jq -r '.spelling_errors')" >> $GITHUB_OUTPUT
echo "## Stale Reference Check" >> /tmp/report.md
echo "" >> /tmp/report.md

stale=0

- name: Validate structure
# Check for references to removed database (unified.db as active, not historical)
while IFS= read -r file; do
# Skip migration docs where unified.db references are expected
case "$file" in
*migration*|*CHANGELOG*|*schemas*) continue ;;
esac
count=$(grep -c 'unified\.db' "$file" 2>/dev/null || true)
if [ "$count" -gt 0 ]; then
echo "- \`$file\`: $count reference(s) to \`unified.db\` (migrated to Neo4j)" >> /tmp/report.md
stale=$((stale + count))
fi
done < <(find docs -name '*.md' -type f)

# Check for references to removed SQLite repositories
while IFS= read -r file; do
case "$file" in
*migration*|*CHANGELOG*|*schemas*) continue ;;
esac
count=$(grep -cE 'Sqlite(KnowledgeGraph|Ontology)Repository' "$file" 2>/dev/null || true)
if [ "$count" -gt 0 ]; then
echo "- \`$file\`: $count reference(s) to removed SQLite repositories" >> /tmp/report.md
stale=$((stale + count))
fi
done < <(find docs -name '*.md' -type f)

echo "" >> /tmp/report.md
if [ "$stale" -eq 0 ]; then
echo "**Result**: No stale references found" >> /tmp/report.md
else
echo "**Result**: $stale stale reference(s) found" >> /tmp/report.md
fi
echo "" >> /tmp/report.md

echo "stale_refs=$stale" >> $GITHUB_OUTPUT

if [ "$stale" -gt 0 ]; then
echo "::warning::Found $stale stale references to removed components"
fi

- name: Validate directory structure
id: structure
run: |
echo "Validating structure..."
docs/scripts/validate-structure.sh --json > /tmp/structure.json || true
cat /tmp/structure.json
echo "structure_errors=$(cat /tmp/structure.json | jq -r '.structure_errors')" >> $GITHUB_OUTPUT
echo "## Directory Structure Validation" >> /tmp/report.md
echo "" >> /tmp/report.md

- name: Calculate overall quality score
id: quality
run: |
links_score=${{ steps.links.outputs.links_result }}
frontmatter_score=${{ steps.frontmatter.outputs.frontmatter_result }}
mermaid_score=${{ steps.mermaid.outputs.mermaid_result }}
ascii_count=${{ steps.ascii.outputs.ascii_count }}
spelling_errors=${{ steps.spelling.outputs.spelling_errors }}
structure_errors=${{ steps.structure.outputs.structure_errors }}

# Calculate weighted score
overall_score=$(echo "scale=2; ($links_score + $frontmatter_score + $mermaid_score) / 3 - ($ascii_count * 2) - ($spelling_errors * 0.5) - ($structure_errors * 0.5)" | bc)

# Ensure 0-100 range
if (( $(echo "$overall_score < 0" | bc -l) )); then
overall_score=0
elif (( $(echo "$overall_score > 100" | bc -l) )); then
overall_score=100
fi
errors=0

# Diataxis directories that must exist
for dir in docs/tutorials docs/how-to docs/explanation docs/reference; do
if [ ! -d "$dir" ]; then
echo "- Missing required directory: \`$dir\`" >> /tmp/report.md
errors=$((errors + 1))
fi
done

echo "overall_score=$overall_score" >> $GITHUB_OUTPUT
echo "### Documentation Quality Score: ${overall_score}%" >> $GITHUB_STEP_SUMMARY
# Must have a docs index
if [ ! -f "docs/README.md" ]; then
echo "- Missing \`docs/README.md\` index" >> /tmp/report.md
errors=$((errors + 1))
fi

if (( $(echo "$overall_score >= 90" | bc -l) )); then
echo "✅ Documentation quality is excellent!" >> $GITHUB_STEP_SUMMARY
if [ "$errors" -eq 0 ]; then
echo "**Result**: Directory structure valid" >> /tmp/report.md
else
echo "⚠️ Documentation quality needs improvement" >> $GITHUB_STEP_SUMMARY
echo "**Result**: $errors structure issue(s)" >> /tmp/report.md
fi
echo "" >> /tmp/report.md

- name: Generate report
if: always()
echo "structure_errors=$errors" >> $GITHUB_OUTPUT

- name: Calculate quality score
id: quality
run: |
docs/scripts/generate-reports.sh
links_rate=${{ steps.links.outputs.links_rate }}
mermaid_rate=${{ steps.mermaid.outputs.mermaid_rate }}
stale=${{ steps.stale.outputs.stale_refs }}
struct_errors=${{ steps.structure.outputs.structure_errors }}

- name: Upload validation reports
# Weighted score: links 50%, mermaid 30%, penalties for stale refs and structure
base=$(( (links_rate * 50 + mermaid_rate * 30) / 80 ))
penalty=$(( stale * 2 + struct_errors * 5 ))
score=$((base - penalty))

# Clamp 0-100
[ "$score" -lt 0 ] && score=0
[ "$score" -gt 100 ] && score=100

echo "overall_score=$score" >> $GITHUB_OUTPUT

echo "---" >> /tmp/report.md
echo "## Quality Score: ${score}%" >> /tmp/report.md
echo "" >> /tmp/report.md
echo "| Check | Result |" >> /tmp/report.md
echo "|-------|--------|" >> /tmp/report.md
echo "| Internal links | ${{ steps.links.outputs.links_rate }}% (${{ steps.links.outputs.links_broken }} broken / ${{ steps.links.outputs.links_checked }} checked) |" >> /tmp/report.md
echo "| Mermaid diagrams | ${{ steps.mermaid.outputs.mermaid_rate }}% (${{ steps.mermaid.outputs.mermaid_invalid }} invalid / ${{ steps.mermaid.outputs.mermaid_total }} total) |" >> /tmp/report.md
echo "| Stale references | ${{ steps.stale.outputs.stale_refs }} found |" >> /tmp/report.md
echo "| Directory structure | ${{ steps.structure.outputs.structure_errors }} issues |" >> /tmp/report.md

# Step summary
cat /tmp/report.md >> $GITHUB_STEP_SUMMARY

- name: Upload report
if: always()
uses: actions/upload-artifact@v4
with:
name: validation-reports
path: docs/reports/
name: docs-quality-report
path: /tmp/report.md
retention-days: 30

- name: Comment PR with results
if: github.event_name == 'pull_request' && always()
- name: Comment on PR
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const reportPath = 'docs/reports/';
const files = fs.readdirSync(reportPath).filter(f => f.endsWith('.md'));
const report = fs.readFileSync('/tmp/report.md', 'utf8');

if (files.length > 0) {
const latestReport = files.sort().reverse()[0];
const report = fs.readFileSync(reportPath + latestReport, 'utf8');
const body = `# Documentation Quality Report\n\n${report}\n\n---\n*Generated by docs-ci*`;

github.rest.issues.createComment({
issue_number: context.issue.number,
// Find and update existing comment, or create new one
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});

const existing = comments.find(c =>
c.user.type === 'Bot' && c.body.includes('Documentation Quality Report')
);

if (existing) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existing.id,
body,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
body: report
issue_number: context.issue.number,
body,
});
}

- name: Check quality threshold
run: |
overall_score=${{ steps.quality.outputs.overall_score }}
score=${{ steps.quality.outputs.overall_score }}

if (( $(echo "$overall_score < 90" | bc -l) )); then
echo "Documentation quality score ($overall_score%) is below the required threshold of 90%"
if [ "$score" -lt 80 ]; then
echo "::error::Documentation quality score ($score%) is below the required threshold of 80%"
exit 1
else
echo "Documentation quality score ($overall_score%) meets the threshold"
echo "Documentation quality score ($score%) meets the threshold"
fi
Loading
Loading