Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 63 additions & 41 deletions .github/workflows/data-processing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -324,16 +324,15 @@ jobs:

# Store generated files in temp location
mkdir -p /tmp/generated-resources
cp -r content/curated_resources /tmp/generated-resources/
cp content/contributors/tenzing.md /tmp/generated-resources/

# Only copy glossary if it was regenerated
if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ]; then
echo "✓ Glossary regeneration enabled, including glossary files"
cp -r content/glossary /tmp/generated-resources/
else
echo "ℹ️ Glossary regeneration skipped (use workflow_dispatch with regenerate_glossary=true to update)"
fi
# Copy all files that are part of the artifact
cp -r content/curated_resources /tmp/generated-resources/ 2>/dev/null || true
cp content/contributors/tenzing.md /tmp/generated-resources/ 2>/dev/null || true
cp -r content/glossary /tmp/generated-resources/ 2>/dev/null || true
cp -r data /tmp/generated-resources/ 2>/dev/null || true
cp -r content/contributor-analysis /tmp/generated-resources/ 2>/dev/null || true
mkdir -p /tmp/generated-resources/content/publications 2>/dev/null || true
cp content/publications/citation_chart.webp /tmp/generated-resources/content/publications/ 2>/dev/null || true

# Fetch build-resources branch (create if doesn't exist)
git fetch origin build-resources || echo "build-resources branch doesn't exist yet"
Expand All @@ -347,55 +346,78 @@ jobs:
git checkout -b build-resources
fi

# Remove old generated resource files (but keep _index.md)
# Remove old generated resource files (but keep _index.md and other important files)
find content/curated_resources -type f ! -name '_index.md' -delete 2>/dev/null || true
find content/glossary -type f ! -name '_index.md' ! -name '_create_glossaries.py' -delete 2>/dev/null || true

# Copy newly generated files from temp location
# Curated resources
if [ -d /tmp/generated-resources/curated_resources ]; then
cp -r /tmp/generated-resources/curated_resources/* content/curated_resources/ 2>/dev/null || true
fi

# Tenzing contributor data
if [ -f /tmp/generated-resources/tenzing.md ]; then
cp /tmp/generated-resources/tenzing.md content/contributors/
fi

# Glossary files (always copy if they exist in temp)
if [ -d /tmp/generated-resources/glossary ]; then
rsync -av --exclude='_index.md' --exclude='_create_glossaries.py' /tmp/generated-resources/glossary/ content/glossary/ 2>/dev/null || true
fi

# Copy newly generated files
cp -r /tmp/generated-resources/curated_resources/* content/curated_resources/
cp /tmp/generated-resources/tenzing.md content/contributors/
# Data directory (GA data, etc.)
if [ -d /tmp/generated-resources/data ]; then
mkdir -p data
cp -r /tmp/generated-resources/data/* data/ 2>/dev/null || true
fi

# Contributor analysis
if [ -d /tmp/generated-resources/contributor-analysis ]; then
mkdir -p content/contributor-analysis
cp -r /tmp/generated-resources/contributor-analysis/* content/contributor-analysis/ 2>/dev/null || true
fi

# Copy glossary files only if regenerated (preserving directory structure)
if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ]; then
echo "✓ Updating glossary files in build-resources"
# Remove old glossary files (but keep _index.md files)
find content/glossary -type f ! -name '_index.md' ! -name '_create_glossaries.py' -delete 2>/dev/null || true
rsync -av --exclude='_index.md' --exclude='_create_glossaries.py' /tmp/generated-resources/glossary/ content/glossary/
# Citation chart
if [ -f /tmp/generated-resources/content/publications/citation_chart.webp ]; then
mkdir -p content/publications
cp /tmp/generated-resources/content/publications/citation_chart.webp content/publications/
fi

# Add all changes (including untracked files)
git add -A content/curated_resources/ 2>/dev/null || true
git add -A content/contributors/tenzing.md 2>/dev/null || true
git add -A content/glossary/ 2>/dev/null || true
git add -A data/ 2>/dev/null || true
git add -A content/contributor-analysis/ 2>/dev/null || true
git add -A content/publications/citation_chart.webp 2>/dev/null || true

# Check if there are any changes to commit
if git diff --quiet && git diff --cached --quiet; then
echo "ℹ️ No changes to commit"
else
echo "✓ Changes detected, committing..."

# Add files based on what was regenerated
git add content/curated_resources/ content/contributors/tenzing.md
# Show what will be committed
echo "Files to be committed:"
git diff --cached --name-only

# Commit with appropriate message
if [ "${{ github.event.inputs.regenerate_glossary }}" = "true" ]; then
git add content/glossary/
git commit -m "Update generated resources and glossary - $(date -u +'%Y-%m-%d %H:%M:%S UTC')" || echo "Nothing to commit"
else
git commit -m "Update generated resources - $(date -u +'%Y-%m-%d %H:%M:%S UTC')" || echo "Nothing to commit"
fi

# Push to build-resources branch with retry logic
MAX_RETRIES=3
RETRY_COUNT=0
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
if git push origin build-resources --force-with-lease; then
echo "✅ Successfully pushed to build-resources branch"
break
else
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then
echo "⚠️ Push failed, retrying ($RETRY_COUNT/$MAX_RETRIES)..."
sleep 2
git pull origin build-resources --rebase
else
echo "❌ Push failed after $MAX_RETRIES attempts"
exit 1
fi
fi
done
# Force push to build-resources branch
# Using --force instead of --force-with-lease because we want to ensure
# all artifact files are pushed regardless of remote state
if git push origin build-resources --force; then
echo "✅ Successfully force-pushed to build-resources branch"
else
echo "❌ Failed to push to build-resources branch"
exit 1
fi
fi

# Switch back to original branch
Expand Down
77 changes: 59 additions & 18 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,30 +59,71 @@ jobs:
# =======================
# Fallback Data Processing
# =======================
- name: Run data processing if needed
- name: Trigger data processing if artifact missing
if: steps.download-artifact.outcome == 'failure'
env:
PYTHON_VERSION: "3.11"
GA_API_CREDENTIALS: ${{ secrets.GA_API_CREDENTIALS }}
GA_PROPERTY_ID: ${{ secrets.GA_PROPERTY_ID }}
run: |
# Install Python dependencies
python3 -m pip install -r ./requirements.txt

# Generate data files
python3 scripts/forrt_contribs/tenzing.py
python3 content/resources/resource.py
mv scripts/forrt_contribs/tenzing.md content/contributors/tenzing.md
echo "⚠️ Data artifact not found, triggering data-processing workflow..."
gh workflow run data-processing.yml --ref master
echo "✅ Data processing workflow triggered"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Download GA data if possible
if [ "${{ github.event_name }}" != 'pull_request' ]; then
python scripts/download_ga_data.py
- name: Wait for data processing to complete
if: steps.download-artifact.outcome == 'failure'
run: |
echo "⏳ Waiting for data processing workflow to complete..."

# Small delay to ensure the workflow appears in the list
sleep 10

# Wait up to 15 minutes for the workflow to complete
MAX_WAIT_TIME=900 # 15 minutes in seconds
POLL_INTERVAL=30 # Check every 30 seconds
ELAPSED=0

while [ $ELAPSED -lt $MAX_WAIT_TIME ]; do
# Get the most recent data-processing workflow run status
# We check the most recent run - if it's queued or in_progress, we wait
# If it's completed, we're done
RUN_INFO=$(gh run list --workflow=data-processing.yml --limit=1 --json status,conclusion)
RUN_STATUS=$(echo "$RUN_INFO" | jq -r '.[0].status')

# Quick validation of GA data structure
if [ -f "data/ga_data.json" ]; then
python3 -c "import json; data = json.load(open('data/ga_data.json')); print('✅ GA data:', len(data.get('regions', [])), 'countries,', len(data.get('top_pages', [])), 'pages')"
if [ "$RUN_STATUS" = "completed" ]; then
RUN_CONCLUSION=$(echo "$RUN_INFO" | jq -r '.[0].conclusion')
echo "✅ Data processing workflow completed with conclusion: $RUN_CONCLUSION"
break
fi

echo "⏳ Still processing (status: $RUN_STATUS)... (${ELAPSED}s elapsed)"
sleep $POLL_INTERVAL
ELAPSED=$((ELAPSED + POLL_INTERVAL))
done

if [ $ELAPSED -ge $MAX_WAIT_TIME ]; then
echo "⚠️ Data processing workflow did not complete within timeout"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Retry downloading data artifact
if: steps.download-artifact.outcome == 'failure'
id: retry-download-artifact
uses: dawidd6/action-download-artifact@07ab29fd4a977ae4d2b275087cf67563dfdf0295
with:
workflow: data-processing.yml
name: data-artifact
path: .
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: Verify artifact availability
if: steps.download-artifact.outcome == 'failure'
run: |
if [ "${{ steps.retry-download-artifact.outcome }}" = "failure" ]; then
echo "❌ Failed to download data artifact even after triggering data processing"
echo "This indicates a critical issue with the data-processing workflow"
exit 1
fi
echo "✅ Data artifact successfully downloaded after retry"


# =======================
Expand Down
75 changes: 59 additions & 16 deletions .github/workflows/staging-aggregate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -202,28 +202,71 @@ jobs:
path: .
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: Run data processing if needed
- name: Trigger data processing if artifact missing
if: steps.download-artifact.outcome == 'failure'
run: |
echo "⚠️ Data artifact not found, triggering data-processing workflow..."
gh workflow run data-processing.yml --ref master
echo "✅ Data processing workflow triggered"
env:
PYTHON_VERSION: "3.11"
GA_API_CREDENTIALS: ${{ secrets.GA_API_CREDENTIALS }}
GA_PROPERTY_ID: ${{ secrets.GA_PROPERTY_ID }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Wait for data processing to complete
if: steps.download-artifact.outcome == 'failure'
run: |
# Install Python dependencies
python3 -m pip install -r ./requirements.txt
echo "⏳ Waiting for data processing workflow to complete..."

# Small delay to ensure the workflow appears in the list
sleep 10

# Wait up to 15 minutes for the workflow to complete
MAX_WAIT_TIME=900 # 15 minutes in seconds
POLL_INTERVAL=30 # Check every 30 seconds
ELAPSED=0

while [ $ELAPSED -lt $MAX_WAIT_TIME ]; do
# Get the most recent data-processing workflow run status
# We check the most recent run - if it's queued or in_progress, we wait
# If it's completed, we're done
RUN_INFO=$(gh run list --workflow=data-processing.yml --limit=1 --json status,conclusion)
RUN_STATUS=$(echo "$RUN_INFO" | jq -r '.[0].status')

if [ "$RUN_STATUS" = "completed" ]; then
RUN_CONCLUSION=$(echo "$RUN_INFO" | jq -r '.[0].conclusion')
echo "✅ Data processing workflow completed with conclusion: $RUN_CONCLUSION"
break
fi

echo "⏳ Still processing (status: $RUN_STATUS)... (${ELAPSED}s elapsed)"
sleep $POLL_INTERVAL
ELAPSED=$((ELAPSED + POLL_INTERVAL))
done

if [ $ELAPSED -ge $MAX_WAIT_TIME ]; then
echo "⚠️ Data processing workflow did not complete within timeout"
fi
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# Generate data files
python3 scripts/forrt_contribs/tenzing.py
python3 content/resources/resource.py
mv scripts/forrt_contribs/tenzing.md content/contributors/tenzing.md
- name: Retry downloading data artifact
if: steps.download-artifact.outcome == 'failure'
id: retry-download-artifact
uses: dawidd6/action-download-artifact@07ab29fd4a977ae4d2b275087cf67563dfdf0295
with:
workflow: data-processing.yml
name: data-artifact
path: .
github_token: ${{ secrets.GITHUB_TOKEN }}

# Download GA data if possible
python scripts/download_ga_data.py

# Quick validation of GA data structure
if [ -f "data/ga_data.json" ]; then
python3 -c "import json; data = json.load(open('data/ga_data.json')); print('✅ GA data:', len(data.get('regions', [])), 'countries,', len(data.get('top_pages', [])), 'pages')"
- name: Verify artifact availability
if: steps.download-artifact.outcome == 'failure'
run: |
if [ "${{ steps.retry-download-artifact.outcome }}" = "failure" ]; then
echo "❌ Failed to download data artifact even after triggering data processing"
echo "This indicates a critical issue with the data-processing workflow"
exit 1
fi
echo "✅ Data artifact successfully downloaded after retry"


- name: Setup Hugo
Expand Down
Loading