Data Processing #161
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Data Processing | |
| # This workflow is triggered daily at midnight and can also be manually triggered. | |
| # It processes data from various scripts and uploads the processed data as an artifact. | |
| # The data is used to update the website's content. | |
| on: | |
| schedule: | |
| - cron: '0 0 * * *' # Daily at midnight | |
| workflow_dispatch: | |
| jobs: | |
| process-data: | |
| name: Process Data | |
| runs-on: ubuntu-22.04 | |
| permissions: | |
| contents: read | |
| env: | |
| PYTHON_VERSION: "3.11" | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| cache: 'pip' | |
| - name: Setup r2u | |
| uses: eddelbuettel/github-actions/r2u-setup@master | |
| - uses: r-lib/actions/setup-pandoc@v2 | |
| - name: Install tenzing R dependencies | |
| run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "readxl", "dplyr", "googlesheets4", "stringr", "gridExtra", "glue", "tidygraph", "ggraph", "igraph", "visNetwork"))' | |
| - name: Run Tenzing analysis | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| Rscript -e "rmarkdown::render('scripts/contributor-analysis/contributor_analysis.rmd')" | |
| - name: Move Tenzing analysis | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| mv scripts/contributor-analysis/contributor_analysis.md content/contributor-analysis/index.md | |
| mv scripts/contributor-analysis/*.png content/contributor-analysis/ | |
| rm -rf content/contributor-analysis/htmlwidgets_libs | |
| mv scripts/contributor-analysis/htmlwidgets_libs content/contributor-analysis/ | |
| sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak | |
| - name: Install Python dependencies | |
| run: python3 -m pip install -r ./requirements.txt | |
| - name: Run Tenzing script | |
| continue-on-error: true # Continue even if this step fails | |
| run: python3 scripts/forrt_contribs/tenzing.py | |
| - name: Run Curated Resources script | |
| continue-on-error: true # Continue even if this step fails | |
| run: python3 content/resources/resource.py | |
| - name: Move and validate Tenzing output | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| mv scripts/forrt_contribs/tenzing.md content/contributors/tenzing.md | |
| if [ ! -f content/contributors/tenzing.md ]; then | |
| echo "tenzing.md not found" | |
| exit 1 | |
| fi | |
| - name: Validate curated resources | |
| continue-on-error: true # Continue even if this step fails | |
| run: | | |
| for file in content/curated_resources/*; do | |
| if [ ! -f "$file" ]; then | |
| echo "Non-markdown file found: $file" | |
| exit 1 | |
| fi | |
| done | |
| - name: Download GA Data | |
| continue-on-error: true # Continue even if this step fails | |
| env: | |
| GA_API_CREDENTIALS: ${{ secrets.GA_API_CREDENTIALS }} | |
| GA_PROPERTY_ID: ${{ secrets.GA_PROPERTY_ID }} | |
| run: python scripts/download_ga_data.py | |
| - name: Run Google Scholar script | |
| continue-on-error: true # Continue even if this step fails | |
| run: python3 scripts/gs-cite/google_scholar.py | |
| env: | |
| SERPAPI: ${{ secrets.SERPAPI }} | |
| - name: Upload data artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: data-artifact | |
| path: | | |
| content/contributors/tenzing.md | |
| content/curated_resources/ | |
| data/ # GA data | |
| content/contributor-analysis/ | |
| content/publications/citation_chart.webp | |
| retention-days: 1 |