diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 9a07cd281a2..1c48a31c7df 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -21,6 +21,8 @@ jobs: uses: ./.github/workflows/run-fuzzer.yml with: fuzz_target: file_io + family: "m8g.large" + image: "ubuntu24-full-arm64" secrets: R2_FUZZ_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }} R2_FUZZ_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }} @@ -69,6 +71,8 @@ jobs: uses: ./.github/workflows/run-fuzzer.yml with: fuzz_target: array_ops + family: "m8g.large" + image: "ubuntu24-full-arm64" secrets: R2_FUZZ_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }} R2_FUZZ_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }} @@ -103,6 +107,45 @@ jobs: uses: ./.github/workflows/run-fuzzer.yml with: fuzz_target: compress_roundtrip + family: "m8g.large" + image: "ubuntu24-full-arm64" secrets: R2_FUZZ_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }} R2_FUZZ_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }} + + # ============================================================================ + # GPU Compress Fuzzer (CUDA) + # ============================================================================ + gpu_compress_fuzz: + name: "GPU Compress Fuzz" + uses: ./.github/workflows/run-fuzzer.yml + with: + fuzz_target: compress_gpu + family: "g4dn" + image: "ubuntu24-gpu-x64" + extra_features: "cuda" + secrets: + R2_FUZZ_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }} + R2_FUZZ_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }} + +# report-gpu-compress-fuzz-failures: +# name: "Report GPU Compress Fuzz Failures" +# needs: gpu_compress_fuzz +# if: always() && needs.gpu_compress_fuzz.outputs.crashes_found == 'true' +# permissions: +# issues: write +# contents: read +# id-token: write +# pull-requests: read +# uses: ./.github/workflows/report-fuzz-crash.yml +# with: +# fuzz_target: compress_gpu +# crash_file: ${{ needs.gpu_compress_fuzz.outputs.first_crash_name }} +# artifact_url: ${{ needs.gpu_compress_fuzz.outputs.artifact_url }} +# artifact_name: compress_gpu-crash-artifacts +# logs_artifact_name: compress_gpu-logs +# branch: ${{ github.ref_name }} +# commit: ${{ github.sha }} +# secrets: +# claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} +# gh_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/run-fuzzer.yml b/.github/workflows/run-fuzzer.yml index 302555d7b54..c649b951256 100644 --- a/.github/workflows/run-fuzzer.yml +++ b/.github/workflows/run-fuzzer.yml @@ -12,6 +12,21 @@ on: required: false type: number default: 7200 + family: + description: "Runner family (e.g., m8g.large for CPU, g5+g4dn+g6 for GPU)" + required: false + type: string + default: "m8g.large" + image: + description: "Runner image (e.g., ubuntu24-full-arm64, ubuntu24-gpu-x64)" + required: false + type: string + default: "ubuntu24-full-arm64" + extra_features: + description: "Extra cargo features to enable (e.g., cuda)" + required: false + type: string + default: "" outputs: crashes_found: description: "Whether crashes were found" @@ -34,8 +49,8 @@ jobs: timeout-minutes: 230 # almost 4 hours runs-on: - runs-on=${{ github.run_id }} - - family=m8g.large - - image=ubuntu24-full-arm64 + - family=${{ inputs.family }} + - image=${{ inputs.image }} - disk=large - extras=s3-cache - tag=${{ inputs.fuzz_target }}-fuzz @@ -43,11 +58,6 @@ jobs: crashes_found: ${{ steps.check.outputs.crashes_found }} first_crash_name: ${{ steps.check.outputs.first_crash_name }} artifact_url: ${{ steps.upload_artifacts.outputs.artifact-url }} - env: - AWS_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }} - AWS_REGION: "us-east-1" - AWS_ENDPOINT_URL: "https://01e9655179bbec953276890b183039bc.r2.cloudflarestorage.com" steps: - uses: runs-on/action@v2 with: @@ -70,6 +80,11 @@ jobs: - name: Restore corpus shell: bash + env: + AWS_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }} + AWS_REGION: "us-east-1" + AWS_ENDPOINT_URL: "https://01e9655179bbec953276890b183039bc.r2.cloudflarestorage.com" run: | CORPUS_KEY="${{ inputs.fuzz_target }}_corpus.tar.zst" CORPUS_DIR="fuzz/corpus/${{ inputs.fuzz_target }}" @@ -99,8 +114,13 @@ jobs: - name: Run fuzzing target id: fuzz run: | + FEATURES_FLAG="" + if [ -n "${{ inputs.extra_features }}" ]; then + FEATURES_FLAG="--features ${{ inputs.extra_features }}" + fi RUSTFLAGS="--cfg vortex_nightly" RUST_BACKTRACE=1 \ cargo +nightly fuzz run --release --debug-assertions \ + $FEATURES_FLAG \ ${{ inputs.fuzz_target }} -- \ -max_total_time=${{ inputs.max_time }} -rss_limit_mb=0 \ 2>&1 | tee fuzz_output.log @@ -149,6 +169,11 @@ jobs: - name: Persist corpus shell: bash + env: + AWS_ACCESS_KEY_ID: ${{ secrets.R2_FUZZ_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_FUZZ_SECRET_ACCESS_KEY }} + AWS_REGION: "us-east-1" + AWS_ENDPOINT_URL: "https://01e9655179bbec953276890b183039bc.r2.cloudflarestorage.com" run: | CORPUS_KEY="${{ inputs.fuzz_target }}_corpus.tar.zst" CORPUS_DIR="fuzz/corpus/${{ inputs.fuzz_target }}" diff --git a/Cargo.lock b/Cargo.lock index 55984f4d702..43b31e21b6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10639,10 +10639,12 @@ dependencies = [ "itertools 0.14.0", "libfuzzer-sys", "strum 0.27.2", + "tokio", "vortex", "vortex-array", "vortex-btrblocks", "vortex-buffer", + "vortex-cuda", "vortex-dtype", "vortex-error", "vortex-file", diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index a9e2119bdb4..52281611140 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -22,6 +22,7 @@ default = ["native"] native = ["libfuzzer-sys", "zstd", "vortex-file", "vortex/files"] wasmfuzz = [] zstd = ["vortex/zstd"] +cuda = ["vortex-cuda", "tokio"] [dependencies] # Always needed - arbitrary is used for input generation @@ -48,6 +49,10 @@ vortex-utils = { workspace = true } libfuzzer-sys = { workspace = true, optional = true } vortex-file = { workspace = true, optional = true } +# GPU support dependencies (optional, only for CUDA fuzzing) +vortex-cuda = { path = "../vortex-cuda", optional = true } +tokio = { workspace = true, features = ["rt", "macros"], optional = true } + [lints] workspace = true @@ -82,3 +87,11 @@ name = "compress_roundtrip" path = "fuzz_targets/compress_roundtrip.rs" test = false required-features = ["native"] + +[[bin]] +bench = false +doc = false +name = "compress_gpu" +path = "fuzz_targets/compress_gpu.rs" +test = false +required-features = ["native", "cuda"] diff --git a/fuzz/build.rs b/fuzz/build.rs new file mode 100644 index 00000000000..4d2804cdb2e --- /dev/null +++ b/fuzz/build.rs @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::process::Command; + +fn main() { + // Declare the cfg so rustc doesn't warn about unexpected cfg. + println!("cargo::rustc-check-cfg=cfg(cuda_available)"); + + // Only enable CUDA on Linux (matching vortex-cuda's behavior) + if cfg!(not(target_os = "linux")) { + return; + } + + // Check if nvcc is available + if has_nvcc() { + println!("cargo:rustc-cfg=cuda_available"); + } +} + +fn has_nvcc() -> bool { + Command::new("nvcc") + .arg("--version") + .output() + .is_ok_and(|o| o.status.success()) +} diff --git a/fuzz/fuzz_targets/compress_gpu.rs b/fuzz/fuzz_targets/compress_gpu.rs new file mode 100644 index 00000000000..eea7ff6c5e3 --- /dev/null +++ b/fuzz/fuzz_targets/compress_gpu.rs @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![no_main] +#![allow(clippy::unwrap_used, clippy::result_large_err)] + +use libfuzzer_sys::Corpus; +use libfuzzer_sys::fuzz_target; +use vortex_error::vortex_panic; +use vortex_fuzz::FuzzCompressGpu; +use vortex_fuzz::run_compress_gpu; + +fuzz_target!(|fuzz: FuzzCompressGpu| -> Corpus { + // Use tokio runtime to run async GPU fuzzer + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + match rt.block_on(run_compress_gpu(fuzz)) { + Ok(true) => Corpus::Keep, + Ok(false) => Corpus::Reject, + Err(e) => { + vortex_panic!("{e}"); + } + } +}); diff --git a/fuzz/src/gpu/mod.rs b/fuzz/src/gpu/mod.rs new file mode 100644 index 00000000000..fac00bbe1a3 --- /dev/null +++ b/fuzz/src/gpu/mod.rs @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! GPU fuzzer module for testing CUDA decompression. +//! +//! This module generates arbitrary instances of GPU-supported compressed encodings, +//! then verifies that GPU decompression produces the same results as CPU decompression. + +use arbitrary::Arbitrary; +use arbitrary::Result; +use arbitrary::Unstructured; +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::arrays::ArbitraryDictArray; +use vortex_dtype::Nullability; +use vortex_dtype::PType; + +use crate::error::VortexFuzzResult; + +/// Which GPU-supported encoding to generate. +#[derive(Debug, Clone, Copy)] +pub enum GpuEncodingKind { + /// Dictionary encoding with GPU take support. + Dict, +} + +impl<'a> Arbitrary<'a> for GpuEncodingKind { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + // Currently only Dict is supported + match u.int_in_range(0..=0)? { + 0 => Ok(GpuEncodingKind::Dict), + _ => unreachable!(), + } + } +} + +/// Input for the GPU decompression fuzzer. +#[derive(Debug)] +pub struct FuzzCompressGpu { + pub array: ArrayRef, +} + +impl<'a> Arbitrary<'a> for FuzzCompressGpu { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let kind: GpuEncodingKind = u.arbitrary()?; + + let array = match kind { + GpuEncodingKind::Dict => { + // Dict already has Arbitrary support, use primitive values for GPU compatibility + let dtype = arbitrary_gpu_primitive_dtype(u)?; + ArbitraryDictArray::with_dtype(u, &dtype, None)? + .0 + .into_array() + } + }; + + Ok(FuzzCompressGpu { array }) + } +} + +/// Generate a random primitive DType suitable for GPU operations. +fn arbitrary_gpu_primitive_dtype(u: &mut Unstructured) -> Result { + let nullability: Nullability = u.arbitrary()?; + let ptype = match u.int_in_range(0..=9)? { + 0 => PType::U8, + 1 => PType::U16, + 2 => PType::U32, + 3 => PType::U64, + 4 => PType::I8, + 5 => PType::I16, + 6 => PType::I32, + 7 => PType::I64, + 8 => PType::F32, + 9 => PType::F64, + _ => unreachable!(), + }; + Ok(vortex_dtype::DType::Primitive(ptype, nullability)) +} + +/// Run the GPU decompression fuzzer. +/// +/// This function: +/// 1. Decompresses the array on CPU (reference) +/// 2. Decompresses the array on GPU +/// 3. Copies GPU result back to host using `CanonicalCudaExt::to_host` +/// 4. Compares the results +/// +/// Returns: +/// - `Ok(true)` - test passed, keep in corpus +/// - `Ok(false)` - test skipped (e.g., no CUDA), reject from corpus +/// - `Err(_)` - a bug was found +#[cfg(cuda_available)] +#[allow(clippy::result_large_err)] +pub async fn run_compress_gpu(fuzz: FuzzCompressGpu) -> VortexFuzzResult { + use vortex_array::Array; + use vortex_cuda::CanonicalCudaExt; + use vortex_cuda::CudaSession; + use vortex_cuda::executor::CudaArrayExt; + use vortex_cuda::initialize_cuda; + use vortex_cuda::session::CudaSessionExt; + use vortex_session::VortexSession; + + use crate::error::Backtrace; + use crate::error::VortexFuzzError; + + let FuzzCompressGpu { array } = fuzz; + + // Store original properties for error reporting + let original_len = array.len(); + + // 1. CPU decompression (reference) + let cpu_canonical = match array.to_canonical() { + Ok(c) => c, + Err(e) => { + return Err(VortexFuzzError::VortexError(e, Backtrace::capture())); + } + }; + + // 2. Create CUDA execution context + let session = VortexSession::empty(); + initialize_cuda(session.cuda_session().as_ref()); + + let mut cuda_ctx = session + .create_execution_ctx() + .vortex_expect("cannot create session"); + + // 3. GPU decompression + let gpu_canonical = match array.clone().execute_cuda(&mut cuda_ctx).await { + Ok(c) => c, + Err(e) => { + return Err(VortexFuzzError::VortexError(e, Backtrace::capture())); + } + }; + + // 4. Copy GPU result back to host using CanonicalCudaExt + let gpu_host_canonical = match gpu_canonical.to_host().await { + Ok(c) => c, + Err(e) => { + return Err(VortexFuzzError::VortexError(e, Backtrace::capture())); + } + }; + + // 5. Compare canonicals + let cpu_array = cpu_canonical.into_array(); + let gpu_array = gpu_host_canonical.into_array(); + + // Verify dtype is preserved + if cpu_array.dtype() != gpu_array.dtype() { + return Err(VortexFuzzError::DTypeMismatch( + cpu_array, + gpu_array, + 0, + Backtrace::capture(), + )); + } + + // Verify length is preserved + if original_len != gpu_array.len() { + return Err(VortexFuzzError::LengthMismatch( + original_len, + gpu_array.len(), + array, + gpu_array, + 0, + Backtrace::capture(), + )); + } + + // Compare element by element + for i in 0..original_len { + let cpu_scalar = cpu_array + .scalar_at(i) + .map_err(|e| VortexFuzzError::VortexError(e, Backtrace::capture()))?; + let gpu_scalar = gpu_array + .scalar_at(i) + .map_err(|e| VortexFuzzError::VortexError(e, Backtrace::capture()))?; + + if cpu_scalar != gpu_scalar { + return Err(VortexFuzzError::ArrayNotEqual( + cpu_scalar, + gpu_scalar, + i, + cpu_array, + gpu_array, + 0, + Backtrace::capture(), + )); + } + } + + Ok(true) +} + +/// No-op fallback when CUDA is not available. +#[cfg(not(cuda_available))] +pub async fn run_compress_gpu(_fuzz: FuzzCompressGpu) -> VortexFuzzResult { + // Reject from corpus when CUDA is not available + Ok(false) +} diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index dd0b3022642..91c75d4018d 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -10,6 +10,10 @@ pub mod error; // File module only available for native builds (requires vortex-file which uses tokio) #[cfg(not(target_arch = "wasm32"))] pub mod file; + +// GPU fuzzer module (only available when CUDA is available) +#[cfg(cuda_available)] +pub mod gpu; pub use array::Action; pub use array::CompressorStrategy; pub use array::ExpectedValue; @@ -20,6 +24,10 @@ pub use compress::FuzzCompressRoundtrip; pub use compress::run_compress_roundtrip; #[cfg(not(target_arch = "wasm32"))] pub use file::FuzzFileAction; +#[cfg(cuda_available)] +pub use gpu::FuzzCompressGpu; +#[cfg(cuda_available)] +pub use gpu::run_compress_gpu; // Runtime initialization - platform-specific #[cfg(not(target_arch = "wasm32"))] diff --git a/vortex-array/src/arrays/primitive/compute/take/avx2.rs b/vortex-array/src/arrays/primitive/compute/take/avx2.rs index c330a9226a3..5f46d626718 100644 --- a/vortex-array/src/arrays/primitive/compute/take/avx2.rs +++ b/vortex-array/src/arrays/primitive/compute/take/avx2.rs @@ -48,6 +48,7 @@ impl TakeImpl for TakeKernelAVX2 { /// /// The caller must ensure that if the validity has a length, it is the same length as the indices, /// and that the `avx2` feature is enabled. +#[allow(unused)] #[target_feature(enable = "avx2")] unsafe fn take_primitive_avx2( values: &[V], diff --git a/vortex-cuda/src/canonical.rs b/vortex-cuda/src/canonical.rs new file mode 100644 index 00000000000..7306c80aec1 --- /dev/null +++ b/vortex-cuda/src/canonical.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use async_trait::async_trait; +use vortex_array::Canonical; +use vortex_array::arrays::BoolArray; +use vortex_array::arrays::BoolArrayParts; +use vortex_array::arrays::DecimalArray; +use vortex_array::arrays::DecimalArrayParts; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::PrimitiveArrayParts; +use vortex_array::buffer::BufferHandle; +use vortex_error::VortexResult; + +/// Move all canonical data from to_host from device. +#[async_trait] +pub trait CanonicalCudaExt { + async fn to_host(self) -> VortexResult + where + Self: Sized; +} + +#[async_trait] +impl CanonicalCudaExt for Canonical { + async fn to_host(self) -> VortexResult { + match self { + n @ Canonical::Null(_) => Ok(n), + Canonical::Bool(bool) => { + // NOTE: update to copy to host when adding buffer handle. + // Also update other method to copy validity to host. + let BoolArrayParts { bits, validity, .. } = bool.into_parts(); + Ok(Canonical::Bool(BoolArray::from_bit_buffer(bits, validity))) + } + Canonical::Primitive(prim) => { + let PrimitiveArrayParts { + ptype, + buffer, + validity, + .. + } = prim.into_parts(); + Ok(Canonical::Primitive(PrimitiveArray::from_byte_buffer( + buffer.try_into_host()?.await?, + ptype, + validity, + ))) + } + Canonical::Decimal(decimal) => { + let DecimalArrayParts { + decimal_dtype, + values, + values_type, + validity, + .. + } = decimal.into_parts(); + Ok(Canonical::Decimal(unsafe { + DecimalArray::new_unchecked_handle( + BufferHandle::new_host(values.try_into_host()?.await?), + values_type, + decimal_dtype, + validity, + ) + })) + } + _ => todo!(), + } + } +} diff --git a/vortex-cuda/src/lib.rs b/vortex-cuda/src/lib.rs index d77be685b20..c331dc80103 100644 --- a/vortex-cuda/src/lib.rs +++ b/vortex-cuda/src/lib.rs @@ -3,12 +3,14 @@ //! CUDA support for Vortex arrays. +mod canonical; mod device_buffer; pub mod executor; mod kernel; mod session; mod stream; +pub use canonical::CanonicalCudaExt; pub use device_buffer::CudaBufferExt; pub use device_buffer::CudaDeviceBuffer; pub use executor::CudaExecutionCtx;