forrtproject · richarddushime · Jul 25, 2025 · Jul 25, 2025
@@ -12,11 +12,16 @@
 # Assuming 'df' contains the index data with Tenzing Links
 all_data_frames = []
 
+print("--- Reading Contributor Data ---")
 # Loop over both the Project Names and the Tenzing Links
 for project_name, url, project_url in zip(df['Project Name'], df['CSV Link'], df['Project URL']):
     # Make sure each URL is transformed into a CSV export URL as shown above
     data_frame = pd.read_csv(url)
 
+    # --- LOGGING ADDED HERE ---
+    # Log the number of contributors read from the current project
+    print(f"Read {len(data_frame)} contributors from '{project_name}'.")
+
     # Add a new column with the project name
     data_frame['Project Name'] = project_name
     data_frame['Project URL'] = project_url
@@ -163,8 +168,8 @@ def extract_orcid_id(value):
 
 # Perform the groupby operation without sorting
 summary = (merged_data.groupby(merged_data['ORCID iD'].fillna(merged_data['Name']), sort=False)
-                    .apply(concatenate_contributions)
-                    .reset_index())
+                       .apply(concatenate_contributions)
+                       .reset_index())
 
 # Separate the tuple into two columns
 summary[['original_order', 'Contributions']] = pd.DataFrame(summary[0].tolist(), index=summary.index)
@@ -179,8 +184,17 @@ def extract_orcid_id(value):
 summary = summary.reset_index(drop=True)
 summary_string = '\n\n'.join(summary['Contributions'])
 
+# --- LOGGING ADDED HERE ---
+# Log the final deduplicated number of contributors
+print("\n--- Processing Complete ---")
+print(f"Total number of unique contributors after deduplication: {len(summary)}")
+
 # Get the directory of the current script
-script_dir = os.path.dirname(os.path.abspath(__file__))
+# Using a try-except block in case __file__ is not defined (e.g., in a notebook)
+try:
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+except NameError:
+    script_dir = '.' # Default to the current directory
 
 # Construct the paths for the template and output files
 template_path = os.path.join(script_dir, 'tenzing_template.md')
@@ -196,3 +210,5 @@ def extract_orcid_id(value):
 # Save the combined content to 'tenzing.md'
 with open(output_path, 'w') as file:
     file.write(combined_content)
+
+print(f"\nSuccessfully generated the file at: {output_path}")