creativecommons · oree-xx · Jan 7, 2026 · Jan 7, 2026 · Jan 7, 2026 · Jan 7, 2026
@@ -27,6 +27,17 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
+FILE_PATHS = [
+    shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_combined_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_lastest_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_prior_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_retired_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_country.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_language.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_restrictions.csv"),
+]
 
 
 def parse_arguments():
@@ -48,8 +59,12 @@ def parse_arguments():
     parser.add_argument(
         "--enable-git",
         action="store_true",
-        help="Enable git actions such as fetch, merge, add, commit, and push"
-        " (default: False)",
+        help="Enable git actions such as fetch, merge, add, commit, and push",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Regenerate data even if processed files already exist",
     )
     args = parser.parse_args()
     if not args.enable_save and args.enable_git:
@@ -62,6 +77,14 @@ def parse_arguments():
     return args
 
 
+def check_for_data_files(args, file_paths):
+    for path in file_paths:
+        if os.path.exists(path) and not args.force:
+            raise shared.QuantifyingException(
+                f"Processed data already exists for {QUARTER}", 0
+            )
+
+
 def data_to_csv(args, data, file_path):
     if not args.enable_save:
         return
@@ -308,6 +331,7 @@ def main():
     args = parse_arguments()
     shared.paths_log(LOGGER, PATHS)
     shared.git_fetch_and_merge(args, PATHS["repo"])
+    check_for_data_files(args, FILE_PATHS)
 
     # Count data
     file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")

@@ -24,6 +24,10 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
+FILE_PATHS = [
+    shared.path_join(PATHS["data_phase"], "github_totals_by_license.csv"),
+    shared.path_join(PATHS["data_phase"], "github_totals_by_restriction.csv"),
+]
 
 
 def parse_arguments():
@@ -48,6 +52,12 @@ def parse_arguments():
         help="Enable git actions such as fetch, merge, add, commit, and push"
         " (default: False)",
     )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Regenerate data even if processed files already exist",
+    )
+
     args = parser.parse_args()
     if not args.enable_save and args.enable_git:
         parser.error("--enable-git requires --enable-save")
@@ -59,11 +69,12 @@ def parse_arguments():
     return args
 
 
-def check_for_data_file(file_path):
-    if os.path.exists(file_path):
-        raise shared.QuantifyingException(
-            f"Processed data already exists for {QUARTER}", 0
-        )
+def check_for_data_files(args, file_paths):
+    for path in file_paths:
+        if os.path.exists(path) and not args.force:
+            raise shared.QuantifyingException(
+                f"Processed data already exists for {QUARTER}", 0
+            )
 
 
 def data_to_csv(args, data, file_path):
@@ -98,7 +109,6 @@ def process_totals_by_license(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "github_totals_by_license.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
@@ -133,15 +143,14 @@ def process_totals_by_restriction(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "github_totals_by_restriction.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
 def main():
     args = parse_arguments()
     shared.paths_log(LOGGER, PATHS)
     shared.git_fetch_and_merge(args, PATHS["repo"])
-
+    check_for_data_files(args, FILE_PATHS)
     file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
     count_data = shared.open_data_file(
         LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]

@@ -28,6 +28,17 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
+FILE_PATHS = [
+    shared.path_join(
+        PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
+    ),
+    shared.path_join(
+        PATHS["data_phase"], "wikipedia_least_language_usage.csv"
+    ),
+    shared.path_join(
+        PATHS["data_phase"], "wikipedia_language_representation.csv"
+    ),
+]
 
 
 def parse_arguments():
@@ -52,6 +63,12 @@ def parse_arguments():
         help="Enable git actions such as fetch, merge, add, commit, and push"
         " (default: False)",
     )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Regenerate data even if processed files already exist",
+    )
+
     args = parser.parse_args()
     if not args.enable_save and args.enable_git:
         parser.error("--enable-git requires --enable-save")
@@ -63,11 +80,12 @@ def parse_arguments():
     return args
 
 
-def check_for_data_file(file_path):
-    if os.path.exists(file_path):
-        raise shared.QuantifyingException(
-            f"Processed data already exists for {QUARTER}", 0
-        )
+def check_for_data_files(args, file_paths):
 def git_push_changes(args, repo_path): 
     if not args.enable_git: 
         return 
 def git_push_changes(args, repo_path): 
     if not args.enable_git: 
         return 
+    for path in file_paths:
+        if os.path.exists(path) and not args.force:
+            raise shared.QuantifyingException(
+                f"Processed data already exists for {QUARTER}", 0
+            )
 
 
 def data_to_csv(args, data, file_path):
@@ -98,7 +116,6 @@ def process_highest_language_usage(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, top_10, file_path)
 
 
@@ -122,7 +139,6 @@ def process_least_language_usage(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "wikipedia_least_language_usage.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, bottom_10, file_path)
 
 
@@ -149,14 +165,14 @@ def process_language_representation(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "wikipedia_language_representation.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, language_counts, file_path)
 
 
 def main():
     args = parse_arguments()
     shared.paths_log(LOGGER, PATHS)
     shared.git_fetch_and_merge(args, PATHS["repo"])
+    check_for_data_files(args, FILE_PATHS)
     file_count = shared.path_join(
         PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
     )

@@ -9,6 +9,7 @@
 import sys
 import textwrap
 import traceback
+from pathlib import Path
 
 # Third-party
 from pygments import highlight
@@ -27,7 +28,7 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
-SECTION = "Google Custom Search (GCS)"
+SECTION = Path(__file__).name
 
 
 def parse_arguments():

@@ -9,6 +9,7 @@
 import sys
 import textwrap
 import traceback
+from pathlib import Path
 
 # Third-party
 from pygments import highlight
@@ -25,7 +26,7 @@
 # Setup
 LOGGER, PATHS = shared.setup(__file__)
 QUARTER = os.path.basename(PATHS["data_quarter"])
-SECTION = "GitHub data"
+SECTION = Path(__file__).name
 
 
 def parse_arguments():

@@ -9,6 +9,7 @@
 import sys
 import textwrap
 import traceback
+from pathlib import Path
 
 # Third-party
 from pygments import highlight
@@ -25,7 +26,7 @@
 # Setup
 LOGGER, PATHS = shared.setup(__file__)
 QUARTER = os.path.basename(PATHS["data_quarter"])
-SECTION = "Wikipedia data"
+SECTION = Path(__file__).name
 
 
 def parse_arguments():

@@ -8,6 +8,7 @@
 import sys
 import textwrap
 import traceback
+from pathlib import Path
 
 # Third-party
 from pygments import highlight
@@ -25,7 +26,7 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
-SECTION = "Notes"
+SECTION = Path(__file__).name
 
 
 def parse_arguments():

@@ -269,6 +269,12 @@ def setup(current_file):
     return logger, paths
 
 
+def section_order():
+    report_dir = os.path.join(os.path.dirname(__file__), "3-report")
+    report_files = os.listdir(report_dir)
+    return report_files
+
+
 def update_readme(
     args,
     section_title,
@@ -280,6 +286,12 @@ def update_readme(
     """
     Update the README.md file with the generated images and descriptions.
     """
+    logger = args.logger
+    paths = args.paths
+    ordered_sections = section_order()
+    logger.info("ordered_sections:", ordered_sections)
+    logger.info("section_title:", repr(section_title))
+
     if not args.enable_save:
         return
     if image_path and not image_caption:
@@ -293,18 +305,15 @@ def update_readme(
             " caption is provided"
         )
 
-    logger = args.logger
-    paths = args.paths
-
     readme_path = path_join(paths["data"], args.quarter, "README.md")
 
     # Define section markers for each data source
-    section_start_line = f"<!-- {section_title} Start -->\n"
-    section_end_line = f"<!-- {section_title} End -->\n"
+    section_start_line = f"<!-- section start {section_title} -->\n"
+    section_end_line = f"<!-- section end {section_title} -->\n"
 
     # Define entry markers for each plot (optional) and description
-    entry_start_line = f"<!-- {entry_title} Start -->\n"
-    entry_end_line = f"<!-- {entry_title} End -->\n"
+    entry_start_line = f"<!-- entry start {entry_title} -->\n"
+    entry_end_line = f"<!-- entry end {entry_title} -->\n"
 
     if os.path.exists(readme_path):
         with open(readme_path, "r", encoding="utf-8") as f:
@@ -318,26 +327,39 @@ def update_readme(
         lines.insert(0, title_line)
         lines.insert(1, "\n")
 
-    # We only need to know the position of the end to append new entries
+    # Locate the data source section if it is already present
     if section_start_line in lines:
-        # Locate the data source section if it is already present
         section_end_index = lines.index(section_end_line)
     else:
-        # Add the data source section if it is absent
-        lines.extend(
-            [
-                f"{section_start_line}",
-                "\n",
-                "\n",
-                f"## {section_title}\n",
-                "\n",
-                "\n",
-                f"{section_end_line}",
-                "\n",
-            ]
-        )
-        section_end_index = lines.index(section_end_line)
+        insert_index = None
+        # If not present, we find the position to insert the section
+        current_postion = ordered_sections.index(section_title)
+        # Sections that should come before this section
+        sections_before = ordered_sections[:current_postion]
+        # we find the last existing section that comes before this section
+        for prev_section_title in reversed(sections_before):
+            prev_end_line = f"<!-- section end {prev_section_title} -->\n"
+            if prev_end_line in lines:
+                insert_index = lines.index(prev_end_line) + 1
+                break
 
+        # If none exist, insert at the top (after README title)
+        if insert_index is None:
+            insert_index = 2 if len(lines) >= 2 else len(lines)
+        # Insert the new data source section at correct position
+        new_section_line = [
+            f"{section_start_line}",
+            "\n",
+            "\n",
+            f"## {section_title}\n",
+            "\n",
+            "\n",
+            f"{section_end_line}",
+            "\n",
+        ]
+        # Insert the section at the correct position
+        lines = lines[:insert_index] + new_section_line + lines[insert_index:]
+        section_end_index = lines.index(section_end_line)
     # Locate the entry if it is already present
     if entry_start_line in lines:
         entry_start_index = lines.index(entry_start_line)