diff --git a/.github/workflows/data-processing.yml b/.github/workflows/data-processing.yml
index 2077216b907..e199377e158 100644
--- a/.github/workflows/data-processing.yml
+++ b/.github/workflows/data-processing.yml
@@ -92,28 +92,64 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
+ #=======================
+ # Tenzing Data Processing
+ #=======================
+ #========================================
+ # Install Python packages for data processing scripts
+ #========================================
+ - name: Install Python dependencies
+ run: python3 -m pip install -r ./requirements.txt
+
+
+ #========================================
+ # Process contributor data using Tenzing script
+ # Must run before Contributor Analysis, which reads contributors_cache.csv
+ #========================================
+ - name: Run Tenzing script
+ id: tenzing-script
+ continue-on-error: true # Continue even if this step fails
+ run: python3 scripts/forrt_contribs/tenzing.py
+ env:
+ GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}
+
+ #========================================
+ # Check for Tenzing failures and create issue if needed
+ #========================================
+ - name: Check Tenzing failures and create issue
+ if: always() # Run even if previous step failed
+ continue-on-error: true # Don't fail the workflow if issue creation fails
+ run: python3 scripts/forrt_contribs/create_failure_issue.py
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+ #==============================
+ # Contributor Analysis (Monthly)
+ #==============================
#========================================
# Setup r2u for fast R package installation
#========================================
- name: Setup r2u
+ if: steps.monthly-run.outputs.is_monthly == 'true'
uses: eddelbuettel/github-actions/r2u-setup@master
#========================================
# Install Pandoc for rendering R Markdown documents
#========================================
- - uses: r-lib/actions/setup-pandoc@v2
+ - name: Setup Pandoc
+ if: steps.monthly-run.outputs.is_monthly == 'true'
+ uses: r-lib/actions/setup-pandoc@v2
#========================================
# Install R packages for contributor analysis and visualization
#========================================
- - name: Install tenzing R dependencies
- run: Rscript -e 'install.packages(c("rmarkdown","ggplot2", "readxl", "dplyr", "googlesheets4", "stringr", "gridExtra", "glue", "tidygraph", "ggraph", "igraph", "visNetwork"))'
+ - name: Install R dependencies
+ if: steps.monthly-run.outputs.is_monthly == 'true'
+ run: Rscript -e 'install.packages(c("rmarkdown", "ggplot2", "dplyr", "tidyr", "readr", "googlesheets4", "stringr", "here", "sysfonts", "showtext", "treemapify", "igraph", "visNetwork"))'
- #==============================
- # Contributor Analysis (Monthly)
- #==============================
#========================================
# Generate contributor analysis reports and network visualizations
+ # Reads from contributors_cache.csv generated by Tenzing script above
#========================================
- name: Run Contributor Analysis
id: contributor-analysis
@@ -121,57 +157,27 @@ jobs:
continue-on-error: true # Continue even if this step fails
run: |
echo "🚀 Running Contributor Analysis..."
-
+
# Clean old files from content/contributor-analysis and partials
rm -rf content/contributor-analysis/*.png content/contributor-analysis/*.html content/contributor-analysis/htmlwidgets_libs
- rm -f layouts/partials/network-graph.html
-
+ rm -f static/partials/network-graph.html
+
# Run index.Rmd to generate contributor analysis content and plots
echo "📊 Rendering contributor analysis..."
Rscript -e "rmarkdown::render('content/contributor-analysis/index.Rmd')"
-
+
# Run network-graph.Rmd to generate interactive network visualization
echo "🕸️ Rendering network visualization..."
Rscript -e "rmarkdown::render('content/contributor-analysis/network-graph.Rmd')"
-
- # Move generated HTML file to layouts/partials
- echo "📁 Moving network graph to partials..."
- mv content/contributor-analysis/network-graph.html layouts/partials/
-
- # Clean up HTML artifacts from index.md if any
- sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak
-
- echo "✅ Contributor analysis complete"
- #=======================
- # Tenzing Data Processing
- #=======================
- #========================================
- # Install Python packages for data processing scripts
- #========================================
- - name: Install Python dependencies
- run: python3 -m pip install -r ./requirements.txt
+ # Move generated HTML file to static/partials (served via iframe)
+ echo "📁 Moving network graph to static/partials..."
+ mv content/contributor-analysis/network-graph.html static/partials/
-
- #========================================
- # Process contributor data using Tenzing script
- #========================================
- - name: Run Tenzing script
- id: tenzing-script
- continue-on-error: true # Continue even if this step fails
- run: python3 scripts/forrt_contribs/tenzing.py
- env:
- GSHEET_CREDENTIALS: ${{ secrets.GSHEET_CREDENTIALS }}
+ # Clean up HTML artifacts from index.md if any
+ sed -i.bak -e '/^```{=html}$/d' -e '/^```$/d' content/contributor-analysis/index.md && rm content/contributor-analysis/index.md.bak
- #========================================
- # Check for Tenzing failures and create issue if needed
- #========================================
- - name: Check Tenzing failures and create issue
- if: always() # Run even if previous step failed
- continue-on-error: true # Don't fail the workflow if issue creation fails
- run: python3 scripts/forrt_contribs/create_failure_issue.py
- env:
- GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ echo "✅ Contributor analysis complete"
#========================================
# Process and organize curated resources data
@@ -359,6 +365,7 @@ jobs:
content/glossary/
data/
static/data/
+ static/partials/
content/contributor-analysis/
content/publications/citation_chart.webp
retention-days: 7
diff --git a/content/contributor-analysis/index.Rmd b/content/contributor-analysis/index.Rmd
index ab87820d3cf..5c17d2a0e66 100644
--- a/content/contributor-analysis/index.Rmd
+++ b/content/contributor-analysis/index.Rmd
@@ -23,242 +23,218 @@ library(ggplot2) # For visualization
library(tidyr) # For reshaping data
library(googlesheets4) # For reading Google Sheets
library(stringr) # For removing unwanted characters around strings
-library(gridExtra) # For arranging figures
-library(glue) # For printing text
-library(tidygraph) #
-library(ggraph)
-library(igraph)
-library(visNetwork)
-library(Matrix)
+library(here)
+library(readr)
+library(sysfonts)
+library(showtext)
+library(treemapify)
+
+font_add_google("Domine", "Domine")
+showtext_auto()
```
-```{r Read sheets from automation source}
+```{r Read contributor data from cache}
-# Define Google Sheet URL for Automation Source
-google_sheet_url_automation <- "https://docs.google.com/spreadsheets/d/1MUD54FQUhfcBKrvr5gCYoh2wgbJ6Lf7oAJRAqsQ-Nag/edit"
+# Read role column mappings (same source as tenzing.py)
+fields_url <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vT_IaXiYtB3iAmtDZ_XiQKrToRkxOlkXNAeNU2SIT_J9PxvsQyptga6Gg9c8mSvDZpwY6d8skswIQYh/pub?output=csv&gid=277271370"
+role_cols <- read_csv(fields_url, show_col_types = FALSE) |> pull(Fields)
+
+# Read contributor data from cache (generated by tenzing.py)
+cache_path <- here("scripts", "forrt_contribs", "contributors_cache.csv")
+credit_roles <- read_csv(cache_path, show_col_types = FALSE)
-# Get sheet names
-gs4_deauth()
-sheets <- sheet_names(google_sheet_url_automation)
-
-# Exclude the first four Automation Source sheets
-relevant_sheets <- sheets[-c(1,2,3,4)]
-
-# Exclude 'full' sheets
-relevant_sheets <- relevant_sheets[!grepl(" - full$", relevant_sheets)]
-
-# Read all relevant sheets into a list and remove rows without last names
-internal_links <- read_sheet(google_sheet_url_automation, sheet = "INTERNAL LINKS") %>%
- select(`Project Name`) # Extract project names
-
-data_list <- lapply(relevant_sheets, function(sheet) {
- # Read the content of the current sheet
- df <- read_sheet(google_sheet_url_automation, sheet = sheet)
- # Remove rows where the `Surname` is missing or empty
- df <- df %>% filter(!is.na(`Surname`) & `Surname` != "")
- # Match the `Project Name` using the sheet name (assuming sheet name is the identifier)
- project_name <- internal_links$`Project Name`[internal_links$`Project Name` == sheet]
- # If a match is found, assign the `Project Name`, else set to NA
- if (length(project_name) > 0) {
- df <- df %>% mutate(`Project Name` = project_name)
- } else {
- df <- df %>% mutate(`Project Name` = NA)
- }
- df
-})
-
-# Combine all sheets into one dataframe
-dt <- bind_rows(data_list, .id = "Source_Sheet")
-
-# Move project name column to first position
-dt <- dt %>%
- select(`Project Name`, everything())
-
-# Remove PM column as it is unnecessary and might cause problems with binding
-dt <- dt %>%
- select(-`Project Managers`)
```
```{r Read sheets from leads tenzing}
-# Define Google Sheet URL for Leads Tenzing
+# Define Google Sheet URL for Leads Tenzing
google_sheet_url_leads <- "https://docs.google.com/spreadsheets/d/1roy-sZTxyXENA5c5IIV7IIemYvzbzs7zojUN2yIpi58/edit?gid=0#gid=0"
-# Get sheet names
-leads_sheets <- sheet_names(google_sheet_url_leads)
-
-# Exclude the second Leads Tenzing sheet
-relevant_leads_sheets <- leads_sheets[-c(2)]
-
-# Read the Leads Tenzing sheet
-leads_df <- read_sheet(google_sheet_url_leads)
+gs4_deauth()
-# Rename columns and select relevant ones
-leads_df <- leads_df %>%
+lead_roles <- read_sheet(google_sheet_url_leads) |>
rename(
`Project Name` = `FORRT project(s)`,
`ORCID iD` = `ORCID`
- ) %>%
- select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`)
-
-# Create a column to mark the presence of a role and group by individual
-leads_df <- leads_df %>%
- mutate(has_role = TRUE) %>% # Mark with TRUE for individuals having a role
- distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE) # Remove duplicates
-
-# Pivot the data to create a column for each leadership role type, removing director
-leads_df <- leads_df %>%
- pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) %>%
- select(-Director, -`Operations Coordinator`) # Drop the 'Director' and Operations Coordinator columns
+ ) |>
+ select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`) |>
+ mutate(has_role = TRUE) |>
+ distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE) |>
+ pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) |>
+ # Remove FORRT-wide organizational roles (not project-level contributions)
+ select(-Director, -`Operations Coordinator`)
```
```{r Combine roles from leads tenzing with automation source}
+# Get leadership role columns (everything except ID columns)
+id_cols <- c("First name", "Middle name", "Surname", "Project Name", "ORCID iD")
+leads_role_cols <- setdiff(names(lead_roles), id_cols)
+all_role_cols <- union(role_cols, leads_role_cols)
+
# Combine leads tenzing rows with automation source
-dt <- bind_rows(dt, leads_df)
+contributions <- bind_rows(credit_roles, lead_roles)
```
```{r Trim Values For Consistency}
# Trim names and ORCIDs to ensure inconsistent Tenzing entries are not counted separately
-dt <- dt %>%
+contributions <- contributions |>
mutate(
`First name` = str_trim(str_replace_all(`First name`, "\\*", "")), # Remove * and trim spaces
- `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) %>% str_sub(1, 1), # Remove *, trim spaces, and keep first letter
+ `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) |> str_sub(1, 1), # Remove *, trim spaces, and keep first letter
`Surname` = str_trim(str_replace_all(`Surname`, "\\*", "")), # Remove * and trim spaces
`ORCID iD` = str_trim(str_remove(`ORCID iD`, "https://orcid.org/")) # Remove ORCID URL prefix and trim spaces
)
```
-```{r ensure Conceptualization is logical}
-dt$Conceptualization <- as.logical(dt$Conceptualization)
-```
-
```{r Metrics}
# Count unique contributors (only use surname and first name at the moment, middle name causes problems)
-unique_contributors <- dt %>%
- distinct(`Surname`, `First name`) %>%
+unique_contributors <- contributions |>
+ distinct(Surname, `First name`) |>
nrow()
# Contributions per person
-contributions_per_person <- dt %>%
- group_by(`Surname`, `First name`,) %>%
- summarise(Contributions = n(), .groups = 'drop')
+contributions_per_person <- contributions |>
+ summarise(Contributions = n(), .by = c(Surname, `First name`))
# Mean contributions per person
mean_contributions_per_person <- mean(contributions_per_person$Contributions)
```
```{r Project Engagement}
-count_projects <- dt %>%
- summarise(n_distinct(`Project Name`))
-
-count_projects <- as.integer(count_projects)
+count_projects <- n_distinct(contributions$`Project Name`)
# Contributors per project
-project_contributors <- dt %>%
- distinct(`Project Name`, `Surname`, `First name`, `Middle name`) %>% # Remove duplicate contributor entries
- group_by(`Project Name`) %>%
- summarise(Unique_Contributors = n(), .groups = 'drop') %>% # Count distinct names
- arrange(desc(Unique_Contributors))
+project_contributors <- contributions |>
+ distinct(`Project Name`, Surname, `First name`, `Middle name`) |>
+ summarise(unique_contributors = n(), .by = `Project Name`) |>
+ arrange(desc(unique_contributors))
# Reorder the 'Project Name' based on the number of unique contributors (descending)
project_contributors$`Project Name` <- factor(project_contributors$`Project Name`,
levels = project_contributors$`Project Name`)
# Roles and contributions distribution
-dt_long <- dt %>%
+contributions_long <- contributions |>
pivot_longer(
- cols = "Conceptualization" | starts_with("Writing") |
- "Data curation" | "Formal analysis" |
- "Funding acquisition" | "Investigation" |
- "Methodology" | "Project administration" |
- "Resources" | "Software" | "Supervision" | "Validation" |
- "Visualization" | "Project manager" |
- "Project Coordinators" | "Project lead" | "Project co-lead",
+ cols = any_of(all_role_cols),
names_to = "Role",
values_to = "Contribution"
- ) %>%
+ ) |>
filter(Contribution == TRUE)
-role_distribution <- dt_long %>%
+role_distribution <- contributions_long |>
count(Role, sort = TRUE)
-mean_project_contributors <- mean(project_contributors$Unique_Contributors)
+mean_project_contributors <- mean(project_contributors$unique_contributors)
```
-```{r Summary for Website}
-output <- glue("
-As of {format(Sys.Date(), '%d %B %Y')}, FORRT has a total of {sprintf('%d', count_projects)} completed or ongoing projects and support teams, with a total of {format(unique_contributors, big.mark = ',')} contributors. There is an average (mean) of {sprintf('%.2f', mean_contributions_per_person)} contributions per person across all FORRT projects, and the average number of contributors per project is {sprintf('%.2f', mean_project_contributors)}. You can see the full list of FORRT contributors and their individual contributions [here](https://forrt.org/contributors/).")
-```
-
-`r output`
+As of `r format(Sys.Date(), '%d %B %Y')`, FORRT has a total of `r count_projects` completed or ongoing projects and support teams, with a total of `r format(unique_contributors, big.mark = ',')` contributors. There is an average (mean) of `r round(mean_contributions_per_person, 2)` contributions per person across all FORRT projects, and the average number of contributors per project is `r round(mean_project_contributors)`. You can see the full list of FORRT contributors and their individual contributions [here](https://forrt.org/contributors/).
```{r Basic visualizations}
# Contributors per project
-# Keep only the top 10 projects based on Unique_Contributors
-project_contributors <- project_contributors %>%
- arrange(desc(Unique_Contributors)) %>%
+# Keep only the top 10 projects based on unique_contributors
+project_contributors <- project_contributors |>
+ arrange(desc(unique_contributors)) |>
slice_head(n = 10)
-# Get the first three project names from the sorted order
-top_projects <- project_contributors$`Project Name`[1:5]
-
-# Add labels only for these projects
-project_contributors <- project_contributors %>%
- mutate(label = ifelse(`Project Name` %in% top_projects, as.character(`Project Name`), NA))
-
# Plot
-projects_plot <- ggplot(project_contributors, aes(x = `Project Name`, y = Unique_Contributors)) +
- geom_bar(stat = "identity", fill = "#A52828", color = "#323232", alpha = 0.8) +
- geom_text(aes(label = label), hjust = 0, nudge_x = -0.3, vjust = 1.0, nudge_y = 9, size = 3, na.rm = TRUE) + # Adjusted vjust and nudge_y
- labs(title = "Contributors for FORRT's 10 Biggest Projects",
- x = "Projects",
- y = "Number of Contributors") +
- theme_minimal(base_size = 18) +
- theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
- axis.text.x = element_blank(),
- axis.ticks.x = element_blank(),
- axis.text.y = element_text(size = 12),
- axis.title = element_text(size = 14),
- panel.background = element_rect(fill = "#fefdf6", color = NA), # No panel border
- plot.background = element_rect(fill = "#fefdf6", color = NA),
- panel.border = element_blank(),
- panel.spacing = unit(0, "lines"))
+projects_plot <- ggplot(project_contributors, aes(y = reorder(`Project Name`, unique_contributors), x = unique_contributors)) +
+ geom_segment(aes(x = 0, xend = unique_contributors, yend = reorder(`Project Name`, unique_contributors)),
+ color = "#A52828", linewidth = 1) +
+ geom_point(color = "#A52828", size = 4) +
+ geom_text(aes(label = unique_contributors), hjust = -0.8, size = 5, family = "Domine", color = "#333333") +
+ scale_x_continuous(limits = c(0, max(project_contributors$unique_contributors) * 1.15), expand = c(0, 0)) +
+ labs(title = "Number of Contributors for FORRT's 10 Biggest Projects") +
+ theme_minimal(base_size = 14, base_family = "Domine") +
+ theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)),
+ plot.title.position = "plot",
+ axis.text.y = element_text(size = 14, color = "#333333"),
+ axis.text.x = element_blank(),
+ axis.title = element_blank(),
+ panel.grid = element_blank(),
+ panel.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.background = element_rect(fill = "#fefdf6", color = NA),
+ panel.border = element_blank(),
+ panel.spacing = unit(0, "lines"),
+ plot.margin = margin(t = 10, r = 10, b = 60, l = 10))
# Roles in projects
-roles_plot <- ggplot(role_distribution, aes(x = reorder(Role, n), y = n)) +
- geom_bar(stat = "identity", fill = "#A52828", color = "#323232", alpha = 0.8) +
- coord_flip() +
- labs(title = "Distribution of Contributions Across Roles",
- x = "Role",
+
+roles_plot <- ggplot(role_distribution, aes(y = reorder(Role, n), x = n)) +
+ geom_col(fill = "#A52828", alpha = 0.8) +
+ geom_text(aes(label = n), hjust = -0.3, size = 5, family = "Domine", color = "#333333") +
+ scale_x_continuous(limits = c(0, max(role_distribution$n) * 1.1), expand = c(0, 0)) +
+ labs(title = "Number of Contributions Across Roles",
y = "Number of People") +
- theme_minimal(base_size = 14) +
- theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16), # Set title size to 16
- axis.text = element_text(size = 12),
- axis.title = element_text(size = 14),
- panel.background = element_rect(fill = "#fefdf6", color = NA), # No panel border
- plot.background = element_rect(fill = "#fefdf6", color = NA), # No plot border
- panel.border = element_blank(), # Explicitly blank
- panel.spacing = unit(0, "lines"))
+ theme_minimal(base_size = 14, base_family = "Domine") +
+ theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)),
+ plot.title.position = "plot",
+ axis.text.y = element_text(size = 16, color = "#333333"),
+ axis.text.x = element_blank(),
+ axis.title = element_blank(),
+ panel.grid = element_blank(),
+ panel.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.background = element_rect(fill = "#fefdf6", color = NA),
+ panel.border = element_blank(),
+ panel.spacing = unit(0, "lines"),
+ plot.margin = margin(t = 10, r = 10, b = 10, l = 10))
+
+# Treemap: number of projects per contributor
+projects_per_person <- contributions_per_person |>
+ mutate(
+ label = case_when(
+ Contributions == 1 ~ "1 project",
+ Contributions >= 5 ~ "5 or more projects",
+ TRUE ~ str_c(Contributions, " projects")
+ )
+ ) |>
+ count(label) |>
+ mutate(
+ sort_order = case_when(
+ str_detect(label, "^1") ~ 1L,
+ str_detect(label, "^2") ~ 2L,
+ str_detect(label, "^3") ~ 3L,
+ str_detect(label, "^4") ~ 4L,
+ TRUE ~ 5L
+ ),
+ percent = round(n / sum(n) * 100, 1)
+ ) |>
+ arrange(sort_order)
+
+treemap_plot <- ggplot(projects_per_person, aes(area = n, fill = sort_order)) +
+ geom_treemap() +
+ geom_treemap_text(aes(label = label), colour = "white", place = "centre", size = 16, family = "Domine") +
+ geom_treemap_text(aes(label = str_c("\n\n\n(", percent, "% of contributors)")), colour = "white", place = "centre", size = 11, family = "Domine") +
+ scale_fill_gradient(low = "#e08a8a", high = "#A52828", guide = "none") +
+ labs(title = "Share of Contributors by Number of Projects they Contributed To") +
+ theme_minimal(base_size = 14, base_family = "Domine") +
+ theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 20, margin = margin(b = 20)),
+ plot.title.position = "plot",
+ panel.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.background = element_rect(fill = "#fefdf6", color = NA),
+ plot.margin = margin(t = 10, r = 10, b = 60, l = 10))
```
-```{r projects-plot, fig.alt = "Bar chart of contributors per project"}
+```{r projects-plot, fig.alt = "Bar chart of contributors per project", fig.width = 10, fig.height = 7}
projects_plot
```
-```{r roles-plot, fig.alt = "Bar chart of contributions by Tenzing role"}
-roles_plot
+```{r treemap-plot, fig.alt = "Treemap of number of projects per contributor", fig.width = 10, fig.height = 7}
+treemap_plot
```
+```{r roles-plot, fig.alt = "Bar chart of contributions by Tenzing role", fig.width = 10, fig.height = 7}
+roles_plot
+```
-```{r dt long}
-dt_long <- dt_long %>%
- rowwise() %>%
- mutate(Contributor = paste(na.omit(c(`Surname`, `First name`)), collapse = " ")) %>%
+```{r credit-roles-long}
+contributions_long <- contributions_long |>
+ unite(Contributor, Surname, `First name`, sep = " ", remove = FALSE, na.rm = TRUE) |>
mutate(Lead = if_else(Role %in% c("lead", "co-lead"), Role, "other"))
```
diff --git a/content/contributor-analysis/index.md b/content/contributor-analysis/index.md
index 5716bcb8a3e..08984289f38 100644
--- a/content/contributor-analysis/index.md
+++ b/content/contributor-analysis/index.md
@@ -3,19 +3,21 @@ title: "FORRT Contributor Analyses"
always_allow_html: true
output:
md_document:
- variant: markdown_github
+ variant: gfm
preserve_yaml: true
toc: false
type: contributors_analysis
---
-As of 14 September 2025, FORRT has a total of 91 completed or ongoing
-projects and support teams, with a total of 587 contributors. There is
-an average (mean) of 2.45 contributions per person across all FORRT
-projects, and the average number of contributors per project is 15.54.
-You can see the full list of FORRT contributors and their individual
+As of 12 February 2026, FORRT has a total of 98 completed or ongoing
+projects and support teams, with a total of 627 contributors. There is
+an average (mean) of 2.4 contributions per person across all FORRT
+projects, and the average number of contributors per project is 15. You
+can see the full list of FORRT contributors and their individual
contributions [here](https://forrt.org/contributors/).
+
+
diff --git a/content/contributor-analysis/network-graph.Rmd b/content/contributor-analysis/network-graph.Rmd
index 1ab83d430d8..1aabee399a9 100644
--- a/content/contributor-analysis/network-graph.Rmd
+++ b/content/contributor-analysis/network-graph.Rmd
@@ -6,9 +6,26 @@ output:
keep_md: FALSE
---
-The interactive network visualization below shows the connections between FORRT contributors based on their collaborative work across different projects. Each node represents a contributor, and the connections (edges) represent shared project participation.
+The interactive network visualization below shows the connections between FORRT contributors based on their collaborative work across different projects.
+
+**How to read the graph:**
+
+- **Nodes** represent contributor names. Each node is connected to other people involved in the same project(s).
+- **Edges** (lines) represent connections between contributors who worked on the same project.
+
+Use the dropdown menus below to search for a specific contributor by name or to filter the network by project. Hovering over a node highlights its connections and shows which projects that person contributed to. Clicking on a node highlights it in red, and clicking on an edge highlights the connection between two contributors. You can also zoom in and out using the scroll wheel or the navigation buttons.
+
+
```{r setup, include=FALSE}
@@ -40,168 +58,133 @@ library(tidyr)
library(igraph)
library(googlesheets4)
library(stringr)
+library(here)
+library(readr)
```
```{r data-processing}
-# Google Sheet URLs
-google_sheet_url_automation <- "https://docs.google.com/spreadsheets/d/1MUD54FQUhfcBKrvr5gCYoh2wgbJ6Lf7oAJRAqsQ-Nag/edit"
-google_sheet_url_leads <- "https://docs.google.com/spreadsheets/d/1roy-sZTxyXENA5c5IIV7IIemYvzbzs7zojUN2yIpi58/edit?gid=0#gid=0"
-# Get sheet names
-gs4_deauth()
-sheets <- sheet_names(google_sheet_url_automation)
-
-# Exclude the first four Automation Source sheets
-relevant_sheets <- sheets[-c(1,2,3,4)]
-
-# Exclude 'full' sheets
-relevant_sheets <- relevant_sheets[!grepl(" - full$", relevant_sheets)]
-
-# Read all relevant sheets into a list and remove rows without last names
-internal_links <- read_sheet(google_sheet_url_automation, sheet = "INTERNAL LINKS") %>%
- select(`Project Name`) # Extract project names
-
-data_list <- lapply(relevant_sheets, function(sheet) {
- # Read the content of the current sheet
- df <- read_sheet(google_sheet_url_automation, sheet = sheet)
- # Remove rows where the `Surname` is missing or empty
- df <- df %>% filter(!is.na(`Surname`) & `Surname` != "")
- # Match the `Project Name` using the sheet name (assuming sheet name is the identifier)
- project_name <- internal_links$`Project Name`[internal_links$`Project Name` == sheet]
- # If a match is found, assign the `Project Name`, else set to NA
- if (length(project_name) > 0) {
- df <- df %>% mutate(`Project Name` = project_name)
- } else {
- df <- df %>% mutate(`Project Name` = NA)
- }
- df
-})
-
-# Combine all sheets into one dataframe
-dt <- bind_rows(data_list, .id = "Source_Sheet")
-
-# Move project name column to first position
-dt <- dt %>%
- select(`Project Name`, everything())
-
-# Remove PM column as it is unnecessary and might cause problems with binding
-dt <- dt %>%
- select(-`Project Managers`)
+# Read role column mappings (same source as tenzing.py)
+fields_url <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vT_IaXiYtB3iAmtDZ_XiQKrToRkxOlkXNAeNU2SIT_J9PxvsQyptga6Gg9c8mSvDZpwY6d8skswIQYh/pub?output=csv&gid=277271370"
+role_cols <- read_csv(fields_url, show_col_types = FALSE) |> pull(Fields)
+
+# Read contributor data from cache (generated by tenzing.py)
+cache_path <- here("scripts", "forrt_contribs", "contributors_cache.csv")
+credit_roles <- read_csv(cache_path, show_col_types = FALSE)
+
```
```{r leads-data}
# Read Leads Tenzing data
-leads_sheets <- sheet_names(google_sheet_url_leads)
-relevant_leads_sheets <- leads_sheets[-c(2)]
-leads_df <- read_sheet(google_sheet_url_leads)
+google_sheet_url_leads <- "https://docs.google.com/spreadsheets/d/1roy-sZTxyXENA5c5IIV7IIemYvzbzs7zojUN2yIpi58/edit?gid=0#gid=0"
+gs4_deauth()
-# Rename columns and select relevant ones
-leads_df <- leads_df %>%
+lead_roles <- read_sheet(google_sheet_url_leads) |>
rename(
`Project Name` = `FORRT project(s)`,
`ORCID iD` = `ORCID`
- ) %>%
- select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`)
+ ) |>
+ select(`First name`, `Middle name`, `Surname`, `Project Name`, `Role`, `ORCID iD`) |>
+ mutate(has_role = TRUE) |>
+ distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE) |>
+ pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) |>
+ # Remove FORRT-wide organizational roles (not project-level contributions)
+ select(-Director, -`Operations Coordinator`)
-# Create a column to mark the presence of a role and group by individual
-leads_df <- leads_df %>%
- mutate(has_role = TRUE) %>%
- distinct(`First name`, `Middle name`, `Surname`, `Project Name`, `ORCID iD`, `Role`, .keep_all = TRUE)
+# Get leadership role columns (everything except ID columns)
+id_cols <- c("First name", "Middle name", "Surname", "Project Name", "ORCID iD")
+leads_role_cols <- setdiff(names(lead_roles), id_cols)
-# Pivot the data to create a column for each leadership role type, removing director
-leads_df <- leads_df %>%
- pivot_wider(names_from = `Role`, values_from = `has_role`, values_fill = list(has_role = FALSE)) %>%
- select(-Director, -`Operations Coordinator`)
+# TODO: Including leadership roles (all_role_cols) in the network pivot changes the
+# graph topology significantly, which breaks the visIgraphLayout centering and makes
+# the network unreadable. For now, only CRediT roles (role_cols) are used for the
+# network. A future fix should integrate leadership roles without breaking the layout.
+# all_role_cols <- union(role_cols, leads_role_cols)
-# Combine leads tenzing rows with automation source
-dt <- bind_rows(dt, leads_df)
+# Combine leads tenzing rows with automation source (CRediT roles only)
+credit_roles <- bind_rows(credit_roles, lead_roles)
```
```{r data-cleaning}
-# Trim names and ORCIDs to ensure inconsistent Tenzing entries are not counted separately
-dt <- dt %>%
+# Trim names and ORCIDs to ensure inconsistent Tenzing entries are not counted separately
+credit_roles <- credit_roles |>
mutate(
`First name` = str_trim(str_replace_all(`First name`, "\\*", "")),
- `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) %>% str_sub(1, 1),
- `Surname` = str_trim(str_replace_all(`Surname`, "\\*", "")),
+ `Middle name` = str_trim(str_replace_all(`Middle name`, "\\*", "")) |> str_sub(1, 1),
+ Surname = str_trim(str_replace_all(Surname, "\\*", "")),
`ORCID iD` = str_trim(str_remove(`ORCID iD`, "https://orcid.org/"))
)
-
-# Ensure Conceptualization is logical
-dt$Conceptualization <- as.logical(dt$Conceptualization)
```
```{r network-data-preparation}
# Process data for network
-dt_long <- dt %>%
- pivot_longer(cols = c("Conceptualization", "Data curation", "Formal analysis", "Funding acquisition", "Investigation", "Methodology", "Project administration", "Resources", "Software", "Supervision", "Validation", "Visualization", "Writing - original draft", "Writing - review & editing"),
- names_to = "Role",
- values_to = "has_role") %>%
- filter(has_role == TRUE) %>%
- rowwise() %>%
- mutate(Contributor = paste(na.omit(c(`Surname`, `First name`)), collapse = " ")) %>%
- mutate(Lead = if_else(Role %in% c("lead", "co-lead"), Role, "other"))
-
-# Get co-occurences
-Contributor_Project <- xtabs(~ `Project Name` + Contributor,
- data=dt_long %>%
- select(`Project Name`, Contributor) %>%
- unique(),
- sparse = TRUE)
-
-Contributor_occur <- crossprod(Contributor_Project, Contributor_Project)
+credit_roles_long <- credit_roles |>
+ pivot_longer(
+ cols = any_of(role_cols),
+ names_to = "Role",
+ values_to = "has_role"
+ ) |>
+ filter(has_role == TRUE) |>
+ unite(Contributor, Surname, `First name`, sep = " ", remove = FALSE, na.rm = TRUE)
+ # TODO: uncomment to style lead/co-lead nodes differently
+ # mutate(Lead = if_else(Role %in% c("lead", "co-lead"), Role, "other"))
+
+# Get co-occurrences
+contributor_project_matrix <- credit_roles_long |>
+ distinct(`Project Name`, Contributor) |>
+ xtabs(~ `Project Name` + Contributor, data = _, sparse = TRUE)
+
+cooccurrence_matrix <- crossprod(contributor_project_matrix, contributor_project_matrix)
```
```{r create-nodes}
# Create nodes
-Contributor_nodes_n <- dt_long %>%
- group_by(Contributor) %>%
- count(Contributor)
-
-colnames(Contributor_nodes_n) <- c("id", "value")
-
-ContributorGroups_nodes <- dt_long %>%
- select(Contributor, `Project Name`) %>%
- unique() %>%
- group_by(Contributor) %>%
- mutate(Projects = paste0(`Project Name`, collapse = ",")) %>%
- select(Contributor, Projects) %>%
- unique()
+node_counts <- credit_roles_long |>
+ count(Contributor) |>
+ rename(id = Contributor, value = n)
-colnames(ContributorGroups_nodes) <- c("id", "projects")
+node_projects <- credit_roles_long |>
+ distinct(Contributor, `Project Name`) |>
+ summarise(projects = str_c(`Project Name`, collapse = ","), .by = Contributor) |>
+ rename(id = Contributor)
-Contributor_nodes <- left_join(Contributor_nodes_n, ContributorGroups_nodes)
-Contributor_nodes$title <- paste0(Contributor_nodes$id,"
", gsub(pattern = ",", replacement = "
", x = Contributor_nodes$projects))
+contributor_nodes <- left_join(node_counts, node_projects, join_by(id)) |>
+ mutate(title = str_c("", id, "
", str_replace_all(projects, ",", "
")))
```
```{r create-edges}
# Create edges
-Contributor_edges <- which(upper.tri(Contributor_occur), arr.ind = TRUE) %>%
- as.data.frame() %>%
+contributor_edges <- which(upper.tri(cooccurrence_matrix), arr.ind = TRUE) |>
+ as.data.frame() |>
mutate(
- from = rownames((Contributor_occur))[row],
- to = colnames((Contributor_occur))[col],
- width_n = (Contributor_occur)[cbind(row, col)]
- ) %>%
- filter(width_n > 0) %>%
- mutate(width = (width_n/10)^(3)) %>%
- select(from, to, width)
+ from = rownames(cooccurrence_matrix)[row],
+ to = colnames(cooccurrence_matrix)[col],
+ width_n = cooccurrence_matrix[cbind(row, col)]
+ ) |>
+ filter(width_n > 0) |>
+ mutate(width = (width_n / 10)^3) |>
+ select(from, to, width)
# Add one hidden edge that will move the Chinese translator team closer to the center
-additional_hidden_edge <- data.frame(from = c("Fang Cathy", "Chen Liangjie", "Jin Shuxian", "Yang Jinbiao", "Liu Ruoting", "Wang Xinyu", "Xu Yu", "Ji Xuejun", "Wang Zixi"),
- to = c("Azevedo Flavio"),
- width = 0.0,
- hidden = TRUE)
+additional_hidden_edge <- data.frame(
+ from = c("Fang Cathy", "Chen Liangjie", "Jin Shuxian", "Yang Jinbiao",
+ "Liu Ruoting", "Wang Xinyu", "Xu Yu", "Ji Xuejun", "Wang Zixi"),
+ to = "Azevedo Flavio",
+ width = 0.0,
+ hidden = TRUE
+)
-Contributor_edges_plot <- rbind(Contributor_edges %>%
- mutate(hidden = F), additional_hidden_edge)
+plot_edges <- bind_rows(
+ contributor_edges |> mutate(hidden = FALSE),
+ additional_hidden_edge
+)
```
```{r network-visualization, echo=FALSE}
# Create the network visualization
-visNetwork(Contributor_nodes, Contributor_edges_plot, width = "100%", height = "800px") %>%
- visLayout(randomSeed = 1001) %>%
- visIgraphLayout(layout = "layout_with_fr", randomSeed = 1001) %>%
+visNetwork(contributor_nodes, plot_edges, width = "100%", height = "800px") |>
+ visLayout(randomSeed = 1001) |>
+ visIgraphLayout(layout = "layout_with_fr", randomSeed = 1001, start.temp = 16) |>
visPhysics(
enabled = TRUE,
stabilization = list(
@@ -211,48 +194,49 @@ visNetwork(Contributor_nodes, Contributor_edges_plot, width = "100%", height = "
),
solver = "forceAtlas2Based",
forceAtlas2Based = list(
- gravitationalConstant = -80, # Light repulsion to preserve central clusters
- centralGravity = 0.4, # Moderate central pull for distant nodes
- springLength = 150, # Medium springs - shorter for distant nodes
- springConstant = 0.003, # Moderate spring strength
- damping = 0.4, # Balanced damping
- avoidOverlap = 0.2 # Light overlap avoidance to preserve density
+ gravitationalConstant = -80,
+ centralGravity = 0.4,
+ springLength = 150,
+ springConstant = 0.003,
+ damping = 0.4,
+ avoidOverlap = 0.2
)
- ) %>%
- visEdges(color = list(color = "#87CEEB", opacity = 0.3)) %>%
+ ) |>
+ visEdges(color = list(color = "#87CEEB", opacity = 0.3, hover = "#A52828", highlight = "#A52828")) |>
visNodes(
font = list(size = 4, strokeWidth = 1, strokeColor = "white"),
size = 7,
borderWidth = 0,
color = list(background = "#000000", highlight = list(background = "#A52828", border = "darkred")),
scaling = list(min = 3, max = 15)
- ) %>%
+ ) |>
visInteraction(
dragNodes = TRUE,
dragView = TRUE,
hover = TRUE,
hoverConnectedEdges = TRUE,
selectable = TRUE,
+ selectConnectedEdges = TRUE,
multiselect = TRUE,
navigationButtons = TRUE
- ) %>%
+ ) |>
visOptions(
highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE, algorithm = "hierarchical"),
- nodesIdSelection = list(enabled = TRUE, style = "width: 200px;"),
- selectedBy = list(variable = "projects", multiple = TRUE, style = "width: 200px; background: #f8f8f8;")
- ) %>%
+ nodesIdSelection = list(enabled = TRUE, main = "Select by contributor", style = "width: 240px; background: #f8f8f8;"),
+ selectedBy = list(variable = "projects", multiple = TRUE, style = "width: 240px; background: #f8f8f8; margin-bottom: 30px;")
+ ) |>
htmlwidgets::onRender("
function(el, x) {
var network = this;
-
+
// Set initial zoom level immediately
network.on('initRedraw', function() {
network.moveTo({
- scale: 0.012,
+ scale: 0.042,
animation: false
});
});
-
+
// Maintain zoom after stabilization
network.on('stabilized', function() {
network.moveTo({
@@ -260,7 +244,7 @@ visNetwork(Contributor_nodes, Contributor_edges_plot, width = "100%", height = "
animation: false
});
});
-
+
network.on('click', function(params) {
if (params.nodes.length === 0) {
network.body.data.nodes.update(
@@ -285,3 +269,7 @@ visNetwork(Contributor_nodes, Contributor_edges_plot, width = "100%", height = "
}
")
```
+
+
diff --git a/content/contributor-analysis/projects-plot-1.png b/content/contributor-analysis/projects-plot-1.png
index 3a90d63dfc0..acf7313106e 100644
Binary files a/content/contributor-analysis/projects-plot-1.png and b/content/contributor-analysis/projects-plot-1.png differ
diff --git a/content/contributor-analysis/roles-plot-1.png b/content/contributor-analysis/roles-plot-1.png
index 16a71c8500e..43cb92a12fd 100644
Binary files a/content/contributor-analysis/roles-plot-1.png and b/content/contributor-analysis/roles-plot-1.png differ
diff --git a/content/contributor-analysis/treemap-plot-1.png b/content/contributor-analysis/treemap-plot-1.png
new file mode 100644
index 00000000000..28195823863
Binary files /dev/null and b/content/contributor-analysis/treemap-plot-1.png differ
diff --git a/layouts/contributors_analysis/single.html b/layouts/contributors_analysis/single.html
index 3a3e3d56e89..ecaa5ed8aae 100644
--- a/layouts/contributors_analysis/single.html
+++ b/layouts/contributors_analysis/single.html
@@ -1,19 +1,71 @@
{{- define "main" -}}
-
+
+
{{ partial "page_header" . }}
-
+
{{ .Content }}
-
+
-{{ partial "network-graph.html" . }}
+
+
+
+
+
diff --git a/layouts/partials/network-graph.html b/layouts/partials/network-graph.html
index b98b466f2a6..a18ea2a83c5 100644
--- a/layouts/partials/network-graph.html
+++ b/layouts/partials/network-graph.html
@@ -5661,6 +5661,9 @@ Network of Contributors
different projects. Each node represents a contributor, and the
connections (edges) represent shared project participation.
-
-
+
+
+
diff --git a/scripts/forrt_contribs/tenzing.py b/scripts/forrt_contribs/tenzing.py
index ecc642bda9d..06703d476f0 100644
--- a/scripts/forrt_contribs/tenzing.py
+++ b/scripts/forrt_contribs/tenzing.py
@@ -213,6 +213,14 @@ def fetch_all_contributor_data(df_index):
if col in cache_data.columns:
cache_data[col] = cache_data[col].astype(str).str.replace(r'[\r\n\t]+', '', regex=True).str.strip()
cache_data[col] = cache_data[col].replace('nan', '')
+ # Standardize names in cache by ORCID: when the same ORCID appears with different
+ # name variants across projects (e.g. swapped first/surname), use the first
+ # occurrence as the canonical name. Only affects the cache, not tenzing.md.
+ name_cols = ['First name', 'Middle name', 'Surname']
+ orcid_mask = cache_data['ORCID iD'] != ''
+ canonical = cache_data.loc[orcid_mask].groupby('ORCID iD')[name_cols].first()
+ for col in name_cols:
+ cache_data.loc[orcid_mask, col] = cache_data.loc[orcid_mask, 'ORCID iD'].map(canonical[col]).values
cache_data.to_csv(CACHE_FILE, index=False)
print(f"💾 Cache saved to {CACHE_FILE} ({len(cache_data)} rows, {len(cache_columns_present)} columns)")
diff --git a/scripts/forrt_contribs/tenzing_template.md b/scripts/forrt_contribs/tenzing_template.md
index 429520c8b71..14c0a85fd88 100644
--- a/scripts/forrt_contribs/tenzing_template.md
+++ b/scripts/forrt_contribs/tenzing_template.md
@@ -192,7 +192,7 @@ if (window.location.hash) {
-FORRT is driven by a **large and diverse community of contributors** that shape one or more of our projects. Below you can see everyone's scientific contributions in detail. Note that many also contribute to maintaining our community - we are equally grateful for their efforts. You can find out more about the scale of contributions at FORRT, including an interactive network graph, on our contributor analysis page.
+FORRT is driven by a **large and diverse community of contributors** that shape one or more of our projects. Below you can see everyone's scientific contributions in detail. Note that many also contribute to maintaining our community - we are equally grateful for their efforts. You can find out more about the scale of contributions at FORRT, including an interactive network graph, on our contributor analysis page.
diff --git a/static/partials/network-graph.html b/static/partials/network-graph.html
new file mode 100644
index 00000000000..13be8ca0735
--- /dev/null
+++ b/static/partials/network-graph.html
@@ -0,0 +1,5763 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+Network of Contributors
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
The interactive network visualization below shows the connections
+between FORRT contributors based on their collaborative work across
+different projects.
+
How to read the graph:
+
+- Nodes represent contributor names. Each node is
+connected to other people involved in the same project(s).
+- Edges (lines) represent connections between
+contributors who worked on the same project.
+
+
Use the dropdown menus below to search for a specific contributor by
+name or to filter the network by project. Hovering over a node
+highlights its connections and shows which projects that person
+contributed to. Clicking on a node highlights it in red, and clicking on
+an edge highlights the connection between two contributors. You can also
+zoom in and out using the scroll wheel or the navigation buttons.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+