Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 53 additions & 46 deletions .github/workflows/data-processing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,86 +92,92 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'

#=======================
# Tenzing Data Processing
#=======================
#========================================
# Install Python packages for data processing scripts
#========================================
- name: Install Python dependencies
run: python3 -m pip install -r ./requirements.txt


#========================================
# Process contributor data using Tenzing script
# Must run before Contributor Analysis, which reads contributors_cache.csv
#========================================
- name: Run Tenzing script
id: tenzing-script
continue-on-error: true # Continue even if this step fails
run: python3 scripts/forrt_contribs/tenzing.py
env:
GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}

#========================================
# Check for Tenzing failures and create issue if needed
#========================================
- name: Check Tenzing failures and create issue
if: always() # Run even if previous step failed
continue-on-error: true # Don't fail the workflow if issue creation fails
run: python3 scripts/forrt_contribs/create_failure_issue.py
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

#==============================
# Contributor Analysis (Monthly)
#==============================
#========================================
# Setup r2u for fast R package installation
#========================================
- name: Setup r2u
if: steps.monthly-run.outputs.is_monthly == 'true'
uses: eddelbuettel/github-actions/r2u-setup@master

#========================================
# Install Pandoc for rendering R Markdown documents
#========================================
- uses: r-lib/actions/setup-pandoc@v2
- name: Setup Pandoc
if: steps.monthly-run.outputs.is_monthly == 'true'
uses: r-lib/actions/setup-pandoc@v2

#========================================
# Install R packages for contributor analysis and visualization
#========================================
- name: Install tenzing R dependencies
run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "readxl", "dplyr", "googlesheets4", "stringr", "gridExtra", "glue", "tidygraph", "ggraph", "igraph", "visNetwork"))'
- name: Install R dependencies
if: steps.monthly-run.outputs.is_monthly == 'true'
run: Rscript -e 'install.packages(c("rmarkdown", "ggplot2", "dplyr", "tidyr", "readr", "googlesheets4", "stringr", "here", "sysfonts", "showtext", "treemapify", "igraph", "visNetwork"))'

#==============================
# Contributor Analysis (Monthly)
#==============================
#========================================
# Generate contributor analysis reports and network visualizations
# Reads from contributors_cache.csv generated by Tenzing script above
#========================================
- name: Run Contributor Analysis
id: contributor-analysis
if: steps.monthly-run.outputs.is_monthly == 'true'
continue-on-error: true # Continue even if this step fails
run: |
echo "🚀 Running Contributor Analysis..."

# Clean old files from content/contributor-analysis and partials
rm -rf content/contributor-analysis/*.png content/contributor-analysis/*.html content/contributor-analysis/htmlwidgets_libs
rm -f layouts/partials/network-graph.html
rm -f static/partials/network-graph.html

# Run index.Rmd to generate contributor analysis content and plots
echo "📊 Rendering contributor analysis..."
Rscript -e "rmarkdown::render('content/contributor-analysis/index.Rmd')"

# Run network-graph.Rmd to generate interactive network visualization
echo "🕸️ Rendering network visualization..."
Rscript -e "rmarkdown::render('content/contributor-analysis/network-graph.Rmd')"

# Move generated HTML file to layouts/partials
echo "📁 Moving network graph to partials..."
mv content/contributor-analysis/network-graph.html layouts/partials/

# Clean up HTML artifacts from index.md if any
sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak

echo "✅ Contributor analysis complete"

#=======================
# Tenzing Data Processing
#=======================
#========================================
# Install Python packages for data processing scripts
#========================================
- name: Install Python dependencies
run: python3 -m pip install -r ./requirements.txt
# Move generated HTML file to static/partials (served via iframe)
echo "📁 Moving network graph to static/partials..."
mv content/contributor-analysis/network-graph.html static/partials/


#========================================
# Process contributor data using Tenzing script
#========================================
- name: Run Tenzing script
id: tenzing-script
continue-on-error: true # Continue even if this step fails
run: python3 scripts/forrt_contribs/tenzing.py
env:
GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}
# Clean up HTML artifacts from index.md if any
sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak

#========================================
# Check for Tenzing failures and create issue if needed
#========================================
- name: Check Tenzing failures and create issue
if: always() # Run even if previous step failed
continue-on-error: true # Don't fail the workflow if issue creation fails
run: python3 scripts/forrt_contribs/create_failure_issue.py
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
echo "✅ Contributor analysis complete"

#========================================
# Process and organize curated resources data
Expand Down Expand Up @@ -359,6 +365,7 @@ jobs:
content/glossary/
data/
static/data/
static/partials/
content/contributor-analysis/
content/publications/citation_chart.webp
retention-days: 7
Expand Down
Loading
Loading