From b4c36ea8db60ff09bda1dd9b14f1e3937932ab53 Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Thu, 5 Feb 2026 15:14:40 +0200 Subject: [PATCH] fix: build vLLM from source for ARM64 CUDA 13 (NVIDIA DGX) The prebuilt vLLM ARM64 wheels have ABI incompatibility with PyTorch CUDA 13 nightly builds. For ARM64 with CUDA 13 (e.g., NVIDIA DGX GB300 Blackwell, DGX GB200): - Install CUDA toolkit 13.0 for compilation - Use PyTorch nightly with cu130 support - Build vLLM from source to ensure ABI compatibility Add VLLM_ARM64_BUILD_FROM_SOURCE build arg (default: true) to allow opting out of source builds for faster build times on non-CUDA 13 systems. Also: - Update AMD64 wheel path to manylinux_2_35 (required for cu130) - Bump vLLM to 0.15.1 Signed-off-by: Dorin Geman --- Dockerfile | 34 +++++++++++++++++++++++++++++++--- Makefile | 5 +++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index b20d796e..c6b2d94c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -85,26 +85,54 @@ ENTRYPOINT ["/app/model-runner"] # --- vLLM variant --- FROM llamacpp AS vllm -ARG VLLM_VERSION=0.12.0 +ARG VLLM_VERSION=0.15.1 ARG VLLM_CUDA_VERSION=cu130 ARG VLLM_PYTHON_TAG=cp38-abi3 ARG TARGETARCH +# Build vLLM from source on ARM64 for CUDA 13 compatibility (e.g., NVIDIA DGX). +# Set to "false" to use prebuilt wheels instead (faster build, but may not work on CUDA 13). +ARG VLLM_ARM64_BUILD_FROM_SOURCE=true USER root -RUN apt update && apt install -y python3 python3-venv python3-dev curl ca-certificates build-essential && rm -rf /var/lib/apt/lists/* +# Install build dependencies including CUDA toolkit for compiling vLLM from source on ARM64 +# Note: Base image already has CUDA repo configured, just install cuda-toolkit directly +RUN apt update && apt install -y \ + python3 python3-venv python3-dev \ + curl ca-certificates build-essential \ + git cmake ninja-build \ + && if [ "$(uname -m)" = "aarch64" ] && [ "$VLLM_ARM64_BUILD_FROM_SOURCE" = "true" ]; then \ + apt install -y cuda-toolkit-13-0; \ + fi \ + && rm -rf /var/lib/apt/lists/* + +# Set CUDA paths for ARM64 builds +ENV PATH=/usr/local/cuda-13.0/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH RUN mkdir -p /opt/vllm-env && chown -R modelrunner:modelrunner /opt/vllm-env USER modelrunner # Install uv and vLLM as modelrunner user +# For AMD64: Use prebuilt CUDA 13 wheels (PyTorch pulled as dependency) +# For ARM64 with VLLM_ARM64_BUILD_FROM_SOURCE=true: Build from source against PyTorch nightly +# For ARM64 with VLLM_ARM64_BUILD_FROM_SOURCE=false: Use prebuilt wheel (old behavior) RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \ && if [ "$TARGETARCH" = "amd64" ]; then \ - WHEEL_ARCH="manylinux_2_31_x86_64"; \ + WHEEL_ARCH="manylinux_2_35_x86_64"; \ WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl"; \ ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL"; \ + elif [ "$VLLM_ARM64_BUILD_FROM_SOURCE" = "true" ]; then \ + ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python \ + torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu130 \ + && git clone --depth 1 --branch v${VLLM_VERSION} https://github.com/vllm-project/vllm.git /tmp/vllm \ + && cd /tmp/vllm \ + && /opt/vllm-env/bin/python use_existing_torch.py \ + && ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python -r requirements/build.txt \ + && VLLM_TARGET_DEVICE=cuda ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python . --no-build-isolation \ + && rm -rf /tmp/vllm; \ else \ ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}"; \ fi diff --git a/Makefile b/Makefile index d39f2437..79251441 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ DOCKER_TARGET ?= final-llamacpp PORT := 8080 MODELS_PATH := $(shell pwd)/models-store LLAMA_ARGS ?= +EXTRA_DOCKER_BUILD_ARGS ?= DOCKER_BUILD_ARGS := \ --load \ --platform linux/$(shell docker version --format '{{.Server.Arch}}') \ @@ -84,11 +85,11 @@ lint: # Build Docker image docker-build: - docker buildx build $(DOCKER_BUILD_ARGS) . + docker buildx build $(DOCKER_BUILD_ARGS) $(EXTRA_DOCKER_BUILD_ARGS) . # Build multi-platform Docker image docker-build-multiplatform: - docker buildx build --platform linux/amd64,linux/arm64 $(DOCKER_BUILD_ARGS) . + docker buildx build --platform linux/amd64,linux/arm64 $(DOCKER_BUILD_ARGS) $(EXTRA_DOCKER_BUILD_ARGS) . # Run in Docker container with TCP port access and mounted model storage docker-run: docker-build