|
36 | 36 | echo "" |
37 | 37 | echo "The diagnostic shows if time scales linearly with iterations," |
38 | 38 | echo "which indicates constant per-iteration CPU-GPU sync overhead." |
| 39 | + - name: Nsight Systems Profile (if available) |
| 40 | + shell: bash -l {0} |
| 41 | + continue-on-error: true |
| 42 | + run: | |
| 43 | + echo "=== NVIDIA Nsight Systems Profiling ===" |
| 44 | + if command -v nsys &> /dev/null; then |
| 45 | + echo "nsys found, running profile with 1000 iterations..." |
| 46 | + mkdir -p nsight_profiles |
| 47 | + nsys profile -o nsight_profiles/lax_scan_trace \ |
| 48 | + --trace=cuda,nvtx,osrt \ |
| 49 | + --cuda-memory-usage=true \ |
| 50 | + --stats=true \ |
| 51 | + python scripts/profile_lax_scan.py --nsys -n 1000 |
| 52 | + echo "" |
| 53 | + echo "Profile saved to nsight_profiles/lax_scan_trace.nsys-rep" |
| 54 | + echo "Download artifact and open in Nsight Systems UI to see CPU-GPU sync pattern" |
| 55 | + else |
| 56 | + echo "nsys not found, skipping Nsight profiling" |
| 57 | + echo "Install NVIDIA Nsight Systems to enable this profiling" |
| 58 | + fi |
| 59 | + - name: Upload Nsight Profile |
| 60 | + uses: actions/upload-artifact@v5 |
| 61 | + if: success() || failure() |
| 62 | + continue-on-error: true |
| 63 | + with: |
| 64 | + name: nsight-profile |
| 65 | + path: nsight_profiles/ |
| 66 | + if-no-files-found: ignore |
39 | 67 | # === Benchmark Tests (Bare Metal, Jupyter, Jupyter-Book) === |
40 | 68 | - name: Run Hardware Benchmarks (Bare Metal) |
41 | 69 | shell: bash -l {0} |
|
0 commit comments