-
-
Notifications
You must be signed in to change notification settings - Fork 72
Improve report sections and avoid regenerating existing files #261
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
7ff5a69
a7645a5
297dc81
9f8ffd8
16cb9c9
5ae1584
80b6475
7eba804
241634a
8f4f079
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -28,6 +28,17 @@ | |||||||
|
|
||||||||
| # Constants | ||||||||
| QUARTER = os.path.basename(PATHS["data_quarter"]) | ||||||||
| FILE_PATHS = [ | ||||||||
| shared.path_join( | ||||||||
| PATHS["data_phase"], "wikipedia_highest_language_usage.csv" | ||||||||
| ), | ||||||||
| shared.path_join( | ||||||||
| PATHS["data_phase"], "wikipedia_least_language_usage.csv" | ||||||||
| ), | ||||||||
| shared.path_join( | ||||||||
| PATHS["data_phase"], "wikipedia_language_representation.csv" | ||||||||
| ), | ||||||||
| ] | ||||||||
|
|
||||||||
|
|
||||||||
| def parse_arguments(): | ||||||||
|
|
@@ -52,6 +63,12 @@ def parse_arguments(): | |||||||
| help="Enable git actions such as fetch, merge, add, commit, and push" | ||||||||
| " (default: False)", | ||||||||
| ) | ||||||||
| parser.add_argument( | ||||||||
| "--force", | ||||||||
| action="store_true", | ||||||||
| help="Regenerate data even if processed files already exist", | ||||||||
| ) | ||||||||
|
|
||||||||
| args = parser.parse_args() | ||||||||
| if not args.enable_save and args.enable_git: | ||||||||
| parser.error("--enable-git requires --enable-save") | ||||||||
|
|
@@ -63,11 +80,12 @@ def parse_arguments(): | |||||||
| return args | ||||||||
|
|
||||||||
|
|
||||||||
| def check_for_data_file(file_path): | ||||||||
| if os.path.exists(file_path): | ||||||||
| raise shared.QuantifyingException( | ||||||||
| f"Processed data already exists for {QUARTER}", 0 | ||||||||
| ) | ||||||||
| def check_for_data_files(args, file_paths): | ||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would exit function early if if args.force:
returnWe already do a similar thing in Lines 153 to 155 in 925e721
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, maybe it's worth moving this function to |
||||||||
| for path in file_paths: | ||||||||
| if os.path.exists(path) and not args.force: | ||||||||
| raise shared.QuantifyingException( | ||||||||
| f"Processed data already exists for {QUARTER}", 0 | ||||||||
| ) | ||||||||
|
|
||||||||
|
|
||||||||
| def data_to_csv(args, data, file_path): | ||||||||
|
|
@@ -98,7 +116,6 @@ def process_highest_language_usage(args, count_data): | |||||||
| file_path = shared.path_join( | ||||||||
| PATHS["data_phase"], "wikipedia_highest_language_usage.csv" | ||||||||
| ) | ||||||||
| check_for_data_file(file_path) | ||||||||
| data_to_csv(args, top_10, file_path) | ||||||||
|
|
||||||||
|
|
||||||||
|
|
@@ -122,7 +139,6 @@ def process_least_language_usage(args, count_data): | |||||||
| file_path = shared.path_join( | ||||||||
| PATHS["data_phase"], "wikipedia_least_language_usage.csv" | ||||||||
| ) | ||||||||
| check_for_data_file(file_path) | ||||||||
| data_to_csv(args, bottom_10, file_path) | ||||||||
|
|
||||||||
|
|
||||||||
|
|
@@ -149,14 +165,14 @@ def process_language_representation(args, count_data): | |||||||
| file_path = shared.path_join( | ||||||||
| PATHS["data_phase"], "wikipedia_language_representation.csv" | ||||||||
| ) | ||||||||
| check_for_data_file(file_path) | ||||||||
| data_to_csv(args, language_counts, file_path) | ||||||||
|
|
||||||||
|
|
||||||||
| def main(): | ||||||||
| args = parse_arguments() | ||||||||
| shared.paths_log(LOGGER, PATHS) | ||||||||
| shared.git_fetch_and_merge(args, PATHS["repo"]) | ||||||||
| check_for_data_files(args, FILE_PATHS) | ||||||||
| file_count = shared.path_join( | ||||||||
| PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv" | ||||||||
| ) | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
| import sys | ||
| import textwrap | ||
| import traceback | ||
| from pathlib import Path | ||
|
|
||
| # Third-party | ||
| from pygments import highlight | ||
|
|
@@ -27,7 +28,7 @@ | |
|
|
||
| # Constants | ||
| QUARTER = os.path.basename(PATHS["data_quarter"]) | ||
| SECTION = "Google Custom Search (GCS)" | ||
| SECTION = Path(__file__).name | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oops, I didn't think about the displayed section title. I think we need both |
||
|
|
||
|
|
||
| def parse_arguments(): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -269,6 +269,12 @@ def setup(current_file): | |
| return logger, paths | ||
|
|
||
|
|
||
| def section_order(): | ||
| report_dir = os.path.join(os.path.dirname(__file__), "3-report") | ||
| report_files = os.listdir(report_dir) | ||
| return report_files | ||
|
Comment on lines
+272
to
+275
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this function doesn't establish section order. please update to use |
||
|
|
||
|
|
||
| def update_readme( | ||
| args, | ||
| section_title, | ||
|
|
@@ -280,6 +286,12 @@ def update_readme( | |
| """ | ||
| Update the README.md file with the generated images and descriptions. | ||
| """ | ||
| logger = args.logger | ||
| paths = args.paths | ||
| ordered_sections = section_order() | ||
| logger.info("ordered_sections:", ordered_sections) | ||
| logger.info("section_title:", repr(section_title)) | ||
|
Comment on lines
+292
to
+293
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect you converted these from logger.info(f"ordered_sections: {ordered_sections}")
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For more information on the arguments accepted by |
||
|
|
||
| if not args.enable_save: | ||
| return | ||
| if image_path and not image_caption: | ||
|
|
@@ -293,18 +305,15 @@ def update_readme( | |
| " caption is provided" | ||
| ) | ||
|
|
||
| logger = args.logger | ||
| paths = args.paths | ||
|
|
||
| readme_path = path_join(paths["data"], args.quarter, "README.md") | ||
|
|
||
| # Define section markers for each data source | ||
| section_start_line = f"<!-- {section_title} Start -->\n" | ||
| section_end_line = f"<!-- {section_title} End -->\n" | ||
| section_start_line = f"<!-- section start {section_title} -->\n" | ||
| section_end_line = f"<!-- section end {section_title} -->\n" | ||
|
|
||
| # Define entry markers for each plot (optional) and description | ||
| entry_start_line = f"<!-- {entry_title} Start -->\n" | ||
| entry_end_line = f"<!-- {entry_title} End -->\n" | ||
| entry_start_line = f"<!-- entry start {entry_title} -->\n" | ||
| entry_end_line = f"<!-- entry end {entry_title} -->\n" | ||
|
|
||
| if os.path.exists(readme_path): | ||
| with open(readme_path, "r", encoding="utf-8") as f: | ||
|
|
@@ -318,26 +327,39 @@ def update_readme( | |
| lines.insert(0, title_line) | ||
| lines.insert(1, "\n") | ||
|
|
||
| # We only need to know the position of the end to append new entries | ||
| # Locate the data source section if it is already present | ||
| if section_start_line in lines: | ||
| # Locate the data source section if it is already present | ||
| section_end_index = lines.index(section_end_line) | ||
| else: | ||
| # Add the data source section if it is absent | ||
| lines.extend( | ||
| [ | ||
| f"{section_start_line}", | ||
| "\n", | ||
| "\n", | ||
| f"## {section_title}\n", | ||
| "\n", | ||
| "\n", | ||
| f"{section_end_line}", | ||
| "\n", | ||
| ] | ||
| ) | ||
| section_end_index = lines.index(section_end_line) | ||
| insert_index = None | ||
| # If not present, we find the position to insert the section | ||
| current_postion = ordered_sections.index(section_title) | ||
| # Sections that should come before this section | ||
| sections_before = ordered_sections[:current_postion] | ||
| # we find the last existing section that comes before this section | ||
| for prev_section_title in reversed(sections_before): | ||
| prev_end_line = f"<!-- section end {prev_section_title} -->\n" | ||
| if prev_end_line in lines: | ||
| insert_index = lines.index(prev_end_line) + 1 | ||
| break | ||
|
|
||
| # If none exist, insert at the top (after README title) | ||
| if insert_index is None: | ||
| insert_index = 2 if len(lines) >= 2 else len(lines) | ||
| # Insert the new data source section at correct position | ||
| new_section_line = [ | ||
| f"{section_start_line}", | ||
| "\n", | ||
| "\n", | ||
| f"## {section_title}\n", | ||
| "\n", | ||
| "\n", | ||
| f"{section_end_line}", | ||
| "\n", | ||
| ] | ||
| # Insert the section at the correct position | ||
| lines = lines[:insert_index] + new_section_line + lines[insert_index:] | ||
| section_end_index = lines.index(section_end_line) | ||
| # Locate the entry if it is already present | ||
| if entry_start_line in lines: | ||
| entry_start_index = lines.index(entry_start_line) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.