Skip to content
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_1d.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_1d"
csv_path: "../microbenchmarks/all_gather_1d"
xlml_metrics_dir: "../microbenchmarks/all_gather_1d"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_2d.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2, num_runs: 5} # Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2, num_runs: 5} # Non Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_2d"
csv_path: "../microbenchmarks/all_gather_2d"
xlml_metrics_dir: "../microbenchmarks/all_gather_2d"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_3d.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3, num_runs: 5} # Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3, num_runs: 5} # Non Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_3d"
csv_path: "../microbenchmarks/all_gather_3d"
xlml_metrics_dir: "../microbenchmarks/all_gather_3d"
Expand Down
10 changes: 3 additions & 7 deletions Ironwood/configs/collectives/all_gather_demo.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1} # Parallel Replica
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1} # Non-Parallel Replica
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2} # Non Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2} # Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3} # Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3} # Non Parallel Replica Groups

- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1", op_dimension: 1}
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2}
- {matrix_dim_range: {start: 2, end: 512, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3}

warmup_tries: 10
trace_dir: "../microbenchmarks/all_gather_demo"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_tpu7x_2x2x1.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x1"
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x1"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_tpu7x_2x2x2.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x2"
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x2"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_tpu7x_2x2x4.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"
csv_path: "../microbenchmarks/all_gather_tpu7x_2x2x4"
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x2x4"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_tpu7x_2x4x4.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
csv_path: "../microbenchmarks/all_gather_tpu7x_2x4x4"
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_2x4x4"
Expand Down
5 changes: 2 additions & 3 deletions Ironwood/configs/collectives/all_gather_tpu7x_4x4x4.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 10}
trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x4"
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4"
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4/hlo_graphs"
xla_dump_dir: "../microbenchmarks/all_gather_tpu7x_4x4x4/hlo_graphs"
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_gather_tpu7x_4x4x8.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_gather
benchmark_sweep_params:
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8"
csv_path: "../microbenchmarks/all_gather_tpu7x_4x4x8"
xlml_metrics_dir: "../microbenchmarks/all_gather_tpu7x_4x4x8"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_1d.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1" , op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1, num_runs: 5} # Non Parallel Replica
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "1x4x1" , op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_reduce_1d"
csv_path: "../microbenchmarks/all_reduce_1d"
xlml_metrics_dir: "../microbenchmarks/all_reduce_1d"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_2d.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2, num_runs: 5} # Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2, num_runs: 5} # Non Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x16x2", ici_size_range: 128, sharding_strategy: "1x16x1", op_dimension: 2, num_runs: 5}
trace_dir: "../microbenchmarks/all_reduce_2d"
csv_path: "../microbenchmarks/all_reduce_2d"
xlml_metrics_dir: "../microbenchmarks/all_reduce_2d"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_3d.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3, num_runs: 5} # Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3, num_runs: 5} # Non Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "64x2", ici_size_range: 128, sharding_strategy: "64x1", op_dimension: 3, num_runs: 5}
trace_dir: "../microbenchmarks/all_reduce_3d"
csv_path: "../microbenchmarks/all_reduce_3d"
xlml_metrics_dir: "../microbenchmarks/all_reduce_3d"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_tpu7x_2x2x1.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x1"
csv_path: "../microbenchmarks/psum_tpu7x_2x2x1"
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x1"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_tpu7x_2x2x2.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x2x2", ici_size_range: 16, sharding_strategy: "4x2x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x2"
csv_path: "../microbenchmarks/psum_tpu7x_2x2x2"
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x2"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_tpu7x_2x2x4.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 16, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x2", ici_size_range: 32, sharding_strategy: "4x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/psum_tpu7x_2x2x4"
csv_path: "../microbenchmarks/psum_tpu7x_2x2x4"
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x2x4"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_tpu7x_2x4x4.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 32, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "8x4x2", ici_size_range: 64, sharding_strategy: "8x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/psum_tpu7x_2x4x4"
csv_path: "../microbenchmarks/psum_tpu7x_2x4x4"
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_2x4x4"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_tpu7x_4x4x4.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 64, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "16x4x2", ici_size_range: 128, sharding_strategy: "16x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/psum_tpu7x_4x4x4"
csv_path: "../microbenchmarks/psum_tpu7x_4x4x4"
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_4x4x4"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_reduce_tpu7x_4x4x8.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: psum
benchmark_sweep_params:
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 128, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "32x4x2", ici_size_range: 256, sharding_strategy: "32x4x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/psum_tpu7x_4x4x8"
csv_path: "../microbenchmarks/psum_tpu7x_4x4x8"
xlml_metrics_dir: "../microbenchmarks/psum_tpu7x_4x4x8"
Expand Down
2 changes: 1 addition & 1 deletion Ironwood/configs/collectives/all_to_all_1d.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
benchmarks:
- benchmark_name: all_to_all
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1, num_runs: 5} # Non Parallel Replica
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "1x1x4", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_to_all_1d"
csv_path: "../microbenchmarks/all_to_all_1d"
xlml_metrics_dir: "../microbenchmarks/all_to_all_1d"
Expand Down
2 changes: 1 addition & 1 deletion Ironwood/configs/collectives/all_to_all_2d.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
benchmarks:
- benchmark_name: all_to_all
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2, num_runs: 5} # Non Parallel Replica
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x32", ici_size_range: 128, sharding_strategy: "1x32", op_dimension: 2, num_runs: 5}
trace_dir: "../microbenchmarks/all_to_all_2d"
csv_path: "../microbenchmarks/all_to_all_2d"
xlml_metrics_dir: "../microbenchmarks/all_to_all_2d"
Expand Down
2 changes: 1 addition & 1 deletion Ironwood/configs/collectives/all_to_all_3d.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
benchmarks:
- benchmark_name: all_to_all
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3, num_runs: 5} # Non Parallel Replica Groups
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x4x8", ici_size_range: 128, sharding_strategy: "4x4x8", op_dimension: 3, num_runs: 5}
trace_dir: "../microbenchmarks/all_to_all_3d"
csv_path: "../microbenchmarks/all_to_all_3d"
xlml_metrics_dir: "../microbenchmarks/all_to_all_3d"
Expand Down
3 changes: 1 addition & 2 deletions Ironwood/configs/collectives/all_to_all_tpu7x_2x2x1.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
benchmarks:
- benchmark_name: all_to_all
benchmark_sweep_params:
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5} # Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x2", op_dimension: 1, num_runs: 5} # Non-Parallel Replica
- {matrix_dim_range: {start: 8, end: 16384, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x2", ici_size_range: 8, sharding_strategy: "2x2x1", op_dimension: 1, num_runs: 5}
trace_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1"
csv_path: "../microbenchmarks/all_to_all_tpu7x_2x2x1"
xlml_metrics_dir: "../microbenchmarks/all_to_all_tpu7x_2x2x1"
Expand Down
Loading