Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 4 additions & 16 deletions .jenkins/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,9 @@ awsv2 configure set default.s3.multipart_threshold 5120MB

# Decide whether to parallelize tutorial builds, based on $JOB_BASE_NAME
if [[ "${JOB_TYPE}" == "worker" ]]; then
# Step 1: Remove runnable code from tutorials that are not supposed to be run
python $DIR/remove_runnable_code.py beginner_source/aws_distributed_training_tutorial.py beginner_source/aws_distributed_training_tutorial.py || true
# Temp remove for mnist download issue. (Re-enabled for 1.8.1)
# python $DIR/remove_runnable_code.py beginner_source/fgsm_tutorial.py beginner_source/fgsm_tutorial.py || true
# python $DIR/remove_runnable_code.py intermediate_source/spatial_transformer_tutorial.py intermediate_source/spatial_transformer_tutorial.py || true
# Temp remove for 1.10 release.
# python $DIR/remove_runnable_code.py advanced_source/neural_style_tutorial.py advanced_source/neural_style_tutorial.py || true

# TODO: Fix bugs in these tutorials to make them runnable again
# python $DIR/remove_runnable_code.py beginner_source/audio_classifier_tutorial.py beginner_source/audio_classifier_tutorial.py || true

# Remove runnable code from tensorboard_profiler_tutorial.py as it frequently crashes, see https://github.com/pytorch/pytorch/issues/74139
# python $DIR/remove_runnable_code.py intermediate_source/tensorboard_profiler_tutorial.py intermediate_source/tensorboard_profiler_tutorial.py || true

# Step 2: Keep certain tutorials based on file count, and remove runnable code in all other tutorials
# Step 1: Determine which tutorials this worker should execute.
# FILES_TO_RUN is read by conf.py to set sphinx_gallery's filename_pattern,
# so only the assigned tutorials have their code executed.
# IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename.
FILES_TO_RUN=$(python .jenkins/get_files_to_run.py)
echo "FILES_TO_RUN: " ${FILES_TO_RUN}
Expand Down Expand Up @@ -146,7 +134,7 @@ elif [[ "${JOB_TYPE}" == "manager" ]]; then
done

# Step 4: Copy all generated files into docs
rsync -av docs_with_plot/docs/ docs --exclude='**aws_distributed_training_tutorial*'
rsync -av docs_with_plot/docs/ docs

# Step 5: Remove INVISIBLE_CODE_BLOCK from .html/.rst.txt/.ipynb/.py files
bash $DIR/remove_invisible_code_block_batch.sh docs
Expand Down
11 changes: 0 additions & 11 deletions .jenkins/delete_html_file_with_runnable_code_removed.py

This file was deleted.

68 changes: 36 additions & 32 deletions .jenkins/get_files_to_run.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from typing import Any, Dict, List, Optional, Tuple
import json
import os
from pathlib import Path
from remove_runnable_code import remove_runnable_code
from typing import Any, Dict, List, Optional, Tuple


# Calculate repo base dir
REPO_BASE_DIR = Path(__file__).absolute().parent.parent


def get_all_files() -> List[str]:
sources = [x.relative_to(REPO_BASE_DIR) for x in REPO_BASE_DIR.glob("*_source/**/*.py") if 'data' not in x.parts]
sources = [
x.relative_to(REPO_BASE_DIR)
for x in REPO_BASE_DIR.glob("*_source/**/*.py")
if "data" not in x.parts
]
return sorted([str(x) for x in sources])


Expand All @@ -20,7 +23,9 @@ def read_metadata() -> Dict[str, Any]:


def calculate_shards(all_files: List[str], num_shards: int = 20) -> List[List[str]]:
sharded_files: List[Tuple[float, List[str]]] = [(0.0, []) for _ in range(num_shards)]
sharded_files: List[Tuple[float, List[str]]] = [
(0.0, []) for _ in range(num_shards)
]
metadata = read_metadata()

def get_duration(file: str) -> int:
Expand All @@ -41,10 +46,16 @@ def add_to_shard(i, filename):

all_other_files = all_files.copy()
needs_multigpu = list(
filter(lambda x: get_needs_machine(x) == "linux.16xlarge.nvidia.gpu", all_files,)
filter(
lambda x: get_needs_machine(x) == "linux.16xlarge.nvidia.gpu",
all_files,
)
)
needs_a10g = list(
filter(lambda x: get_needs_machine(x) == "linux.g5.4xlarge.nvidia.gpu", all_files,)
filter(
lambda x: get_needs_machine(x) == "linux.g5.4xlarge.nvidia.gpu",
all_files,
)
)
for filename in needs_multigpu:
# currently, the only job that has multigpu is the 0th worker,
Expand All @@ -56,48 +67,41 @@ def add_to_shard(i, filename):
# so we'll add all the jobs that need this machine to the 1st worker
add_to_shard(1, filename)
all_other_files.remove(filename)
sorted_files = sorted(all_other_files, key=get_duration, reverse=True,)
sorted_files = sorted(
all_other_files,
key=get_duration,
reverse=True,
)

for filename in sorted_files:
min_shard_index = sorted(range(1, num_shards), key=lambda i: sharded_files[i][0])[
0
]
min_shard_index = sorted(
range(1, num_shards), key=lambda i: sharded_files[i][0]
)[0]
add_to_shard(min_shard_index, filename)
return [x[1] for x in sharded_files]


def compute_files_to_keep(files_to_run: List[str]) -> List[str]:
metadata = read_metadata()
files_to_keep = list(files_to_run)
for file in files_to_run:
extra_files = metadata.get(file, {}).get("extra_files", [])
files_to_keep.extend(extra_files)
return files_to_keep


def remove_other_files(all_files, files_to_keep) -> None:

for file in all_files:
if file not in files_to_keep:
remove_runnable_code(file, file)


def parse_args() -> Any:
def parse_args():
from argparse import ArgumentParser

parser = ArgumentParser("Select files to run")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", "20")))
parser.add_argument("--shard-num", type=int, default=int(os.environ.get("WORKER_ID", "1")))
parser.add_argument(
"--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", "20"))
)
parser.add_argument(
"--shard-num", type=int, default=int(os.environ.get("WORKER_ID", "1"))
)
return parser.parse_args()


def main() -> None:
args = parse_args()

all_files = get_all_files()
files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[args.shard_num - 1]
if not args.dry_run:
remove_other_files(all_files, compute_files_to_keep(files_to_run))
files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[
args.shard_num - 1
]
stripped_file_names = [Path(x).stem for x in files_to_run]
print(" ".join(stripped_file_names))

Expand Down
13 changes: 0 additions & 13 deletions .jenkins/get_sphinx_filenames.py

This file was deleted.

58 changes: 0 additions & 58 deletions .jenkins/remove_runnable_code.py

This file was deleted.

16 changes: 12 additions & 4 deletions conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
import plotly.io as pio
import pypandoc
import torch
from get_sphinx_filenames import SPHINX_SHOULD_RUN

pio.renderers.default = "sphinx_gallery"
import multiprocessing
Expand Down Expand Up @@ -159,7 +158,6 @@ def wrapper(*args, **kwargs):
}



# -- Sphinx-gallery configuration --------------------------------------------

sphinx_gallery_conf = {
Expand All @@ -171,7 +169,7 @@ def wrapper(*args, **kwargs):
"unstable_source",
],
"gallery_dirs": ["beginner", "intermediate", "advanced", "recipes", "unstable"],
"filename_pattern": re.compile(SPHINX_SHOULD_RUN),
"filename_pattern": "/",
"promote_jupyter_magic": True,
"backreferences_dir": None,
"write_computation_times": True,
Expand Down Expand Up @@ -251,7 +249,7 @@ def wrapper(*args, **kwargs):
"github_version": "main",
"doc_path": ".",
"library_links": theme_variables.get("library_links", []),
#"pytorch_project": "tutorials",
# "pytorch_project": "tutorials",
}


Expand All @@ -268,6 +266,16 @@ def wrapper(*args, **kwargs):
r"^(?!.*" + os.getenv("GALLERY_PATTERN") + r")"
)

if os.getenv("FILES_TO_RUN"):
# FILES_TO_RUN is set by CI workers to control which tutorials are
# executed during sharded builds. Only matching tutorials will have
# their code executed; all others will generate static pages without
# running any code. This replaces the old approach of mutating source
# files with remove_runnable_code.py.
files_to_run = os.getenv("FILES_TO_RUN").split()
pattern = "|".join(re.escape(f) for f in files_to_run)
sphinx_gallery_conf["filename_pattern"] = re.compile(pattern)

for i in range(len(sphinx_gallery_conf["examples_dirs"])):
gallery_dir = Path(sphinx_gallery_conf["gallery_dirs"][i])
source_dir = Path(sphinx_gallery_conf["examples_dirs"][i])
Expand Down
Loading