diff --git a/Ironwood/Ironwood_Microbenchmarks_readme.md b/Ironwood/Ironwood_Microbenchmarks_readme.md index 60e6bbf1..7cbe065f 100644 --- a/Ironwood/Ironwood_Microbenchmarks_readme.md +++ b/Ironwood/Ironwood_Microbenchmarks_readme.md @@ -71,7 +71,7 @@ kubectl get nodes ### Deploying a single host job -Create a job manifest to run `2x2x1` microbenchmarks (`tpu7x-2x2x1-micobenchmarks.yaml`): +Create a job manifest to run `2x2x1` microbenchmarks (`tpu7x-2x2x1-microbenchmarks.yaml`): ```yaml apiVersion: v1 @@ -112,7 +112,7 @@ spec: Deploy the 2x2x1 microbenchmarks: ```bash -kubectl apply -f tpu7x-2x2x1-micobenchmarks.yaml +kubectl apply -f tpu7x-2x2x1-microbenchmarks.yaml ``` Monitor the results: @@ -129,7 +129,7 @@ kubectl delete pod tpu7x-single-host-microbenchmark ### Deploying a multi host job -Create a job manifest to run `4x4x4` microbenchmarks (`tpu7x-4x4x4-micobenchmarks.yaml`): +Create a job manifest to run `4x4x4` microbenchmarks (`tpu7x-4x4x4-microbenchmarks.yaml`): ```yaml apiVersion: v1 @@ -193,7 +193,7 @@ spec: Deploy the 4x4x4 microbenchmarks: ```bash -kubectl apply -f tpu7x-4x4x4-micobenchmarks.yaml +kubectl apply -f tpu7x-4x4x4-microbenchmarks.yaml ``` List all the jobs in the pod to get the name of a job: @@ -210,7 +210,7 @@ kubectl logs tpu7x-multi-host-microbenchmark-0-XXXXX Cleanup the job: ```bash -kubectl delete -f tpu7x-4x4x4-micobenchmarks.yaml +kubectl delete -f tpu7x-4x4x4-microbenchmarks.yaml ``` ## Microbenchmark scripts @@ -267,4 +267,4 @@ Examples can be found in the YAML files under config/ directory. If you wish to generate the xprof profile, set this parameter in the YAML file: * `trace_dir`: Dumps the xprof profile to either a local location or GCS bucket. -Examples can be found in the YAML files under config/ directory. \ No newline at end of file +Examples can be found in the YAML files under config/ directory. diff --git a/Ironwood/src/benchmark_compute.py b/Ironwood/src/benchmark_compute.py index 813a076c..bff3d12e 100644 --- a/Ironwood/src/benchmark_compute.py +++ b/Ironwood/src/benchmark_compute.py @@ -127,7 +127,7 @@ def quantization( OUT:FP8, SF:FP32 = Quantize(N:BF16) SF[i] = FP8_MAX / amax(IN[i]) OUT[i] = cast_fp8(IN[i] / SF[i]) - Dymaic scaling with absmax calibration method + Dynamic scaling with absmax calibration method """ def f(x): diff --git a/Ironwood/src/benchmark_utils.py b/Ironwood/src/benchmark_utils.py index ccd4f4c1..92964b22 100644 --- a/Ironwood/src/benchmark_utils.py +++ b/Ironwood/src/benchmark_utils.py @@ -588,7 +588,7 @@ def find_sparsecore_usage_from_xplane(log_dir: str) -> xplane_pb2.XSpace: def get_metrics_from_trace(trace: dict[str, Any], task: str) -> list[float]: - # Check if the given task name is a collective with corresponding TPU opertion. + # Check if the given task name is a collective with corresponding TPU operation. # This is a workaround and should be reverted or refactored in future. if task in TARGET_TASK_NAME_COLLECTIVES_MAP: try: diff --git a/src/benchmark_collectives.py b/src/benchmark_collectives.py index 30d1296b..fbfe88ef 100644 --- a/src/benchmark_collectives.py +++ b/src/benchmark_collectives.py @@ -199,7 +199,7 @@ def psum_benchmark_calculate_metrics( matrix_size_gbyte = matrix_dim * matrix_dim * dtype.dtype.itemsize / 1e9 # Calculate metrics for DCN benchmark if dcn_size > 1 and dcn_time_ms_list is not None: - # bandwidth is claculated as psum can be done via reduce_scatter + + # bandwidth is calculated as psum can be done via reduce_scatter + # all_gather so bandwidth is the sum of the two (formulas below) dcn_bandwidth_gbyte_s_list = [ matrix_size_gbyte