diff --git a/.jenkins/build.sh b/.jenkins/build.sh index f94623f0ef..9c25ed3c3a 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -46,21 +46,9 @@ awsv2 configure set default.s3.multipart_threshold 5120MB # Decide whether to parallelize tutorial builds, based on $JOB_BASE_NAME if [[ "${JOB_TYPE}" == "worker" ]]; then - # Step 1: Remove runnable code from tutorials that are not supposed to be run - python $DIR/remove_runnable_code.py beginner_source/aws_distributed_training_tutorial.py beginner_source/aws_distributed_training_tutorial.py || true - # Temp remove for mnist download issue. (Re-enabled for 1.8.1) - # python $DIR/remove_runnable_code.py beginner_source/fgsm_tutorial.py beginner_source/fgsm_tutorial.py || true - # python $DIR/remove_runnable_code.py intermediate_source/spatial_transformer_tutorial.py intermediate_source/spatial_transformer_tutorial.py || true - # Temp remove for 1.10 release. - # python $DIR/remove_runnable_code.py advanced_source/neural_style_tutorial.py advanced_source/neural_style_tutorial.py || true - - # TODO: Fix bugs in these tutorials to make them runnable again - # python $DIR/remove_runnable_code.py beginner_source/audio_classifier_tutorial.py beginner_source/audio_classifier_tutorial.py || true - - # Remove runnable code from tensorboard_profiler_tutorial.py as it frequently crashes, see https://github.com/pytorch/pytorch/issues/74139 - # python $DIR/remove_runnable_code.py intermediate_source/tensorboard_profiler_tutorial.py intermediate_source/tensorboard_profiler_tutorial.py || true - - # Step 2: Keep certain tutorials based on file count, and remove runnable code in all other tutorials + # Step 1: Determine which tutorials this worker should execute. + # FILES_TO_RUN is read by conf.py to set sphinx_gallery's filename_pattern, + # so only the assigned tutorials have their code executed. # IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename. FILES_TO_RUN=$(python .jenkins/get_files_to_run.py) echo "FILES_TO_RUN: " ${FILES_TO_RUN} @@ -146,7 +134,7 @@ elif [[ "${JOB_TYPE}" == "manager" ]]; then done # Step 4: Copy all generated files into docs - rsync -av docs_with_plot/docs/ docs --exclude='**aws_distributed_training_tutorial*' + rsync -av docs_with_plot/docs/ docs # Step 5: Remove INVISIBLE_CODE_BLOCK from .html/.rst.txt/.ipynb/.py files bash $DIR/remove_invisible_code_block_batch.sh docs diff --git a/.jenkins/delete_html_file_with_runnable_code_removed.py b/.jenkins/delete_html_file_with_runnable_code_removed.py deleted file mode 100644 index b84a0ecd92..0000000000 --- a/.jenkins/delete_html_file_with_runnable_code_removed.py +++ /dev/null @@ -1,11 +0,0 @@ -import sys -import os - -html_file_path = sys.argv[1] - -with open(html_file_path, 'r', encoding='utf-8') as html_file: - html = html_file.read() - -if "%%%%%%RUNNABLE_CODE_REMOVED%%%%%%" in html: - print("Removing " + html_file_path) - os.remove(html_file_path) diff --git a/.jenkins/get_files_to_run.py b/.jenkins/get_files_to_run.py index bdf4562a82..5f834a23ca 100644 --- a/.jenkins/get_files_to_run.py +++ b/.jenkins/get_files_to_run.py @@ -1,8 +1,7 @@ -from typing import Any, Dict, List, Optional, Tuple import json import os from pathlib import Path -from remove_runnable_code import remove_runnable_code +from typing import Any, Dict, List, Optional, Tuple # Calculate repo base dir @@ -10,7 +9,11 @@ def get_all_files() -> List[str]: - sources = [x.relative_to(REPO_BASE_DIR) for x in REPO_BASE_DIR.glob("*_source/**/*.py") if 'data' not in x.parts] + sources = [ + x.relative_to(REPO_BASE_DIR) + for x in REPO_BASE_DIR.glob("*_source/**/*.py") + if "data" not in x.parts + ] return sorted([str(x) for x in sources]) @@ -20,7 +23,9 @@ def read_metadata() -> Dict[str, Any]: def calculate_shards(all_files: List[str], num_shards: int = 20) -> List[List[str]]: - sharded_files: List[Tuple[float, List[str]]] = [(0.0, []) for _ in range(num_shards)] + sharded_files: List[Tuple[float, List[str]]] = [ + (0.0, []) for _ in range(num_shards) + ] metadata = read_metadata() def get_duration(file: str) -> int: @@ -41,10 +46,16 @@ def add_to_shard(i, filename): all_other_files = all_files.copy() needs_multigpu = list( - filter(lambda x: get_needs_machine(x) == "linux.16xlarge.nvidia.gpu", all_files,) + filter( + lambda x: get_needs_machine(x) == "linux.16xlarge.nvidia.gpu", + all_files, + ) ) needs_a10g = list( - filter(lambda x: get_needs_machine(x) == "linux.g5.4xlarge.nvidia.gpu", all_files,) + filter( + lambda x: get_needs_machine(x) == "linux.g5.4xlarge.nvidia.gpu", + all_files, + ) ) for filename in needs_multigpu: # currently, the only job that has multigpu is the 0th worker, @@ -56,38 +67,31 @@ def add_to_shard(i, filename): # so we'll add all the jobs that need this machine to the 1st worker add_to_shard(1, filename) all_other_files.remove(filename) - sorted_files = sorted(all_other_files, key=get_duration, reverse=True,) + sorted_files = sorted( + all_other_files, + key=get_duration, + reverse=True, + ) for filename in sorted_files: - min_shard_index = sorted(range(1, num_shards), key=lambda i: sharded_files[i][0])[ - 0 - ] + min_shard_index = sorted( + range(1, num_shards), key=lambda i: sharded_files[i][0] + )[0] add_to_shard(min_shard_index, filename) return [x[1] for x in sharded_files] -def compute_files_to_keep(files_to_run: List[str]) -> List[str]: - metadata = read_metadata() - files_to_keep = list(files_to_run) - for file in files_to_run: - extra_files = metadata.get(file, {}).get("extra_files", []) - files_to_keep.extend(extra_files) - return files_to_keep - - -def remove_other_files(all_files, files_to_keep) -> None: - - for file in all_files: - if file not in files_to_keep: - remove_runnable_code(file, file) - - -def parse_args() -> Any: +def parse_args(): from argparse import ArgumentParser + parser = ArgumentParser("Select files to run") parser.add_argument("--dry-run", action="store_true") - parser.add_argument("--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", "20"))) - parser.add_argument("--shard-num", type=int, default=int(os.environ.get("WORKER_ID", "1"))) + parser.add_argument( + "--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", "20")) + ) + parser.add_argument( + "--shard-num", type=int, default=int(os.environ.get("WORKER_ID", "1")) + ) return parser.parse_args() @@ -95,9 +99,9 @@ def main() -> None: args = parse_args() all_files = get_all_files() - files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[args.shard_num - 1] - if not args.dry_run: - remove_other_files(all_files, compute_files_to_keep(files_to_run)) + files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[ + args.shard_num - 1 + ] stripped_file_names = [Path(x).stem for x in files_to_run] print(" ".join(stripped_file_names)) diff --git a/.jenkins/get_sphinx_filenames.py b/.jenkins/get_sphinx_filenames.py deleted file mode 100644 index b84267b48a..0000000000 --- a/.jenkins/get_sphinx_filenames.py +++ /dev/null @@ -1,13 +0,0 @@ -from pathlib import Path -from typing import List - -from get_files_to_run import get_all_files -from validate_tutorials_built import NOT_RUN - - -def get_files_for_sphinx() -> List[str]: - all_py_files = get_all_files() - return [x for x in all_py_files if all(y not in x for y in NOT_RUN)] - - -SPHINX_SHOULD_RUN = "|".join(get_files_for_sphinx()) diff --git a/.jenkins/remove_runnable_code.py b/.jenkins/remove_runnable_code.py deleted file mode 100644 index 037017d8d7..0000000000 --- a/.jenkins/remove_runnable_code.py +++ /dev/null @@ -1,58 +0,0 @@ -import sys - -STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE = "STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE" -STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE = "STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE" -STATE_NORMAL = "STATE_NORMAL" - - -def remove_runnable_code(python_file_path, output_file_path): - with open(python_file_path, 'r', encoding='utf-8') as file: - lines = file.readlines() - ret_lines = [] - state = STATE_NORMAL - for line in lines: - if state == STATE_NORMAL: - if line.startswith('#'): - ret_lines.append(line) - state = STATE_NORMAL - elif ((line.startswith('"""') or line.startswith('r"""')) and - line.endswith('"""')): - ret_lines.append(line) - state = STATE_NORMAL - elif line.startswith('"""') or line.startswith('r"""'): - ret_lines.append(line) - state = STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE - elif ((line.startswith("'''") or line.startswith("r'''")) and - line.endswith("'''")): - ret_lines.append(line) - state = STATE_NORMAL - elif line.startswith("'''") or line.startswith("r'''"): - ret_lines.append(line) - state = STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE - else: - ret_lines.append("\n") - state = STATE_NORMAL - elif state == STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE: - if line.startswith('"""'): - ret_lines.append(line) - state = STATE_NORMAL - else: - ret_lines.append(line) - state = STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE - elif state == STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE: - if line.startswith("'''"): - ret_lines.append(line) - state = STATE_NORMAL - else: - ret_lines.append(line) - state = STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE - - ret_lines.append("\n# %%%%%%RUNNABLE_CODE_REMOVED%%%%%%") - - with open(output_file_path, 'w', encoding='utf-8') as file: - for line in ret_lines: - file.write(line) - - -if __name__ == "__main__": - remove_runnable_code(sys.argv[1], sys.argv[2]) diff --git a/conf.py b/conf.py index 67227c0784..e94652a8fc 100644 --- a/conf.py +++ b/conf.py @@ -47,7 +47,6 @@ import plotly.io as pio import pypandoc import torch -from get_sphinx_filenames import SPHINX_SHOULD_RUN pio.renderers.default = "sphinx_gallery" import multiprocessing @@ -159,7 +158,6 @@ def wrapper(*args, **kwargs): } - # -- Sphinx-gallery configuration -------------------------------------------- sphinx_gallery_conf = { @@ -171,7 +169,7 @@ def wrapper(*args, **kwargs): "unstable_source", ], "gallery_dirs": ["beginner", "intermediate", "advanced", "recipes", "unstable"], - "filename_pattern": re.compile(SPHINX_SHOULD_RUN), + "filename_pattern": "/", "promote_jupyter_magic": True, "backreferences_dir": None, "write_computation_times": True, @@ -251,7 +249,7 @@ def wrapper(*args, **kwargs): "github_version": "main", "doc_path": ".", "library_links": theme_variables.get("library_links", []), - #"pytorch_project": "tutorials", + # "pytorch_project": "tutorials", } @@ -268,6 +266,16 @@ def wrapper(*args, **kwargs): r"^(?!.*" + os.getenv("GALLERY_PATTERN") + r")" ) +if os.getenv("FILES_TO_RUN"): + # FILES_TO_RUN is set by CI workers to control which tutorials are + # executed during sharded builds. Only matching tutorials will have + # their code executed; all others will generate static pages without + # running any code. This replaces the old approach of mutating source + # files with remove_runnable_code.py. + files_to_run = os.getenv("FILES_TO_RUN").split() + pattern = "|".join(re.escape(f) for f in files_to_run) + sphinx_gallery_conf["filename_pattern"] = re.compile(pattern) + for i in range(len(sphinx_gallery_conf["examples_dirs"])): gallery_dir = Path(sphinx_gallery_conf["gallery_dirs"][i]) source_dir = Path(sphinx_gallery_conf["examples_dirs"][i])