Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions scripts/forrt_contribs/tenzing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@
# Assuming 'df' contains the index data with Tenzing Links
all_data_frames = []

print("--- Reading Contributor Data ---")
# Loop over both the Project Names and the Tenzing Links
for project_name, url, project_url in zip(df['Project Name'], df['CSV Link'], df['Project URL']):
# Make sure each URL is transformed into a CSV export URL as shown above
data_frame = pd.read_csv(url)

# --- LOGGING ADDED HERE ---
# Log the number of contributors read from the current project
print(f"Read {len(data_frame)} contributors from '{project_name}'.")

# Add a new column with the project name
data_frame['Project Name'] = project_name
data_frame['Project URL'] = project_url
Expand Down Expand Up @@ -163,8 +168,8 @@ def extract_orcid_id(value):

# Perform the groupby operation without sorting
summary = (merged_data.groupby(merged_data['ORCID iD'].fillna(merged_data['Name']), sort=False)
.apply(concatenate_contributions)
.reset_index())
.apply(concatenate_contributions)
.reset_index())

# Separate the tuple into two columns
summary[['original_order', 'Contributions']] = pd.DataFrame(summary[0].tolist(), index=summary.index)
Expand All @@ -179,8 +184,17 @@ def extract_orcid_id(value):
summary = summary.reset_index(drop=True)
summary_string = '\n\n'.join(summary['Contributions'])

# --- LOGGING ADDED HERE ---
# Log the final deduplicated number of contributors
print("\n--- Processing Complete ---")
print(f"Total number of unique contributors after deduplication: {len(summary)}")

# Get the directory of the current script
script_dir = os.path.dirname(os.path.abspath(__file__))
# Using a try-except block in case __file__ is not defined (e.g., in a notebook)
try:
script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
script_dir = '.' # Default to the current directory

# Construct the paths for the template and output files
template_path = os.path.join(script_dir, 'tenzing_template.md')
Expand All @@ -196,3 +210,5 @@ def extract_orcid_id(value):
# Save the combined content to 'tenzing.md'
with open(output_path, 'w') as file:
file.write(combined_content)

print(f"\nSuccessfully generated the file at: {output_path}")