From 4553ee02c7434506f8226cf0b24de18e5ede0fd1 Mon Sep 17 00:00:00 2001 From: eureka928 Date: Fri, 30 Jan 2026 10:09:35 +0100 Subject: [PATCH 01/12] feat(proto): add speaker field to TranscriptSegment for diarization Add speaker field to the gRPC TranscriptSegment message and map it through the Go schema, enabling backends to return speaker labels. Signed-off-by: eureka928 --- backend/backend.proto | 1 + core/backend/transcript.go | 11 ++++++----- core/schema/transcription.go | 11 ++++++----- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/backend.proto b/backend/backend.proto index 50b239a779db..2ba04c883394 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -299,6 +299,7 @@ message TranscriptSegment { int64 end = 3; string text = 4; repeated int32 tokens = 5; + string speaker = 6; } message GenerateImageRequest { diff --git a/core/backend/transcript.go b/core/backend/transcript.go index 62b04874ce13..4c721e986455 100644 --- a/core/backend/transcript.go +++ b/core/backend/transcript.go @@ -49,11 +49,12 @@ func ModelTranscription(audio, language string, translate, diarize bool, prompt } tr.Segments = append(tr.Segments, schema.TranscriptionSegment{ - Text: s.Text, - Id: int(s.Id), - Start: time.Duration(s.Start), - End: time.Duration(s.End), - Tokens: tks, + Text: s.Text, + Id: int(s.Id), + Start: time.Duration(s.Start), + End: time.Duration(s.End), + Tokens: tks, + Speaker: s.Speaker, }) } return tr, err diff --git a/core/schema/transcription.go b/core/schema/transcription.go index d843a9d98fcd..dc22abe85ef3 100644 --- a/core/schema/transcription.go +++ b/core/schema/transcription.go @@ -3,11 +3,12 @@ package schema import "time" type TranscriptionSegment struct { - Id int `json:"id"` - Start time.Duration `json:"start"` - End time.Duration `json:"end"` - Text string `json:"text"` - Tokens []int `json:"tokens"` + Id int `json:"id"` + Start time.Duration `json:"start"` + End time.Duration `json:"end"` + Text string `json:"text"` + Tokens []int `json:"tokens"` + Speaker string `json:"speaker,omitempty"` } type TranscriptionResult struct { From c8245d069dcdb9a30b13ed4339786170dd0e9bde Mon Sep 17 00:00:00 2001 From: eureka928 Date: Fri, 30 Jan 2026 10:10:05 +0100 Subject: [PATCH 02/12] feat(whisperx): add whisperx backend for transcription with diarization Add Python gRPC backend using WhisperX for speech-to-text with word-level timestamps, forced alignment, and speaker diarization via pyannote-audio when HF_TOKEN is provided. Signed-off-by: eureka928 --- backend/python/whisperx/Makefile | 16 ++ backend/python/whisperx/backend.py | 169 ++++++++++++++++++ backend/python/whisperx/install.sh | 11 ++ backend/python/whisperx/protogen.sh | 11 ++ backend/python/whisperx/requirements-cpu.txt | 2 + .../python/whisperx/requirements-cublas12.txt | 2 + .../python/whisperx/requirements-cublas13.txt | 3 + .../python/whisperx/requirements-hipblas.txt | 3 + backend/python/whisperx/requirements.txt | 3 + backend/python/whisperx/run.sh | 9 + backend/python/whisperx/test.py | 124 +++++++++++++ backend/python/whisperx/test.sh | 11 ++ 12 files changed, 364 insertions(+) create mode 100644 backend/python/whisperx/Makefile create mode 100644 backend/python/whisperx/backend.py create mode 100755 backend/python/whisperx/install.sh create mode 100755 backend/python/whisperx/protogen.sh create mode 100644 backend/python/whisperx/requirements-cpu.txt create mode 100644 backend/python/whisperx/requirements-cublas12.txt create mode 100644 backend/python/whisperx/requirements-cublas13.txt create mode 100644 backend/python/whisperx/requirements-hipblas.txt create mode 100644 backend/python/whisperx/requirements.txt create mode 100755 backend/python/whisperx/run.sh create mode 100644 backend/python/whisperx/test.py create mode 100755 backend/python/whisperx/test.sh diff --git a/backend/python/whisperx/Makefile b/backend/python/whisperx/Makefile new file mode 100644 index 000000000000..8ad2368abb72 --- /dev/null +++ b/backend/python/whisperx/Makefile @@ -0,0 +1,16 @@ +.DEFAULT_GOAL := install + +.PHONY: install +install: + bash install.sh + +.PHONY: protogen-clean +protogen-clean: + $(RM) backend_pb2_grpc.py backend_pb2.py + +.PHONY: clean +clean: protogen-clean + rm -rf venv __pycache__ + +test: install + bash test.sh diff --git a/backend/python/whisperx/backend.py b/backend/python/whisperx/backend.py new file mode 100644 index 000000000000..7fd5cfb42b49 --- /dev/null +++ b/backend/python/whisperx/backend.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +""" +This is an extra gRPC server of LocalAI for WhisperX transcription +with speaker diarization, word-level timestamps, and forced alignment. +""" +from concurrent import futures +import time +import argparse +import signal +import sys +import os +import backend_pb2 +import backend_pb2_grpc + +import grpc + + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 +MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + BackendServicer is the class that implements the gRPC service + """ + def Health(self, request, context): + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + + def LoadModel(self, request, context): + import whisperx + import torch + + device = "cpu" + if request.CUDA: + device = "cuda" + mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() + if mps_available: + device = "mps" + + try: + print("Preparing WhisperX model, please wait", file=sys.stderr) + compute_type = "float16" if device != "cpu" else "int8" + self.model = whisperx.load_model( + request.Model, + device, + compute_type=compute_type, + ) + self.device = device + self.model_name = request.Model + + # Store HF token for diarization if available + self.hf_token = os.environ.get("HF_TOKEN", None) + self.diarize_pipeline = None + + # Cache for alignment models keyed by language code + self.align_cache = {} + + print(f"WhisperX model loaded: {request.Model} on {device}", file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def _get_align_model(self, language_code): + """Load or return cached alignment model for a given language.""" + import whisperx + if language_code not in self.align_cache: + model_a, metadata = whisperx.load_align_model( + language_code=language_code, + device=self.device, + ) + self.align_cache[language_code] = (model_a, metadata) + return self.align_cache[language_code] + + def AudioTranscription(self, request, context): + import whisperx + + resultSegments = [] + text = "" + try: + audio = whisperx.load_audio(request.dst) + + # Transcribe + transcript = self.model.transcribe( + audio, + batch_size=16, + language=request.language if request.language else None, + ) + + # Align for word-level timestamps + model_a, metadata = self._get_align_model(transcript["language"]) + transcript = whisperx.align( + transcript["segments"], + model_a, + metadata, + audio, + self.device, + return_char_alignments=False, + ) + + # Diarize if requested and HF token is available + if request.diarize and self.hf_token: + if self.diarize_pipeline is None: + self.diarize_pipeline = whisperx.DiarizationPipeline( + use_auth_token=self.hf_token, + device=self.device, + ) + diarize_segments = self.diarize_pipeline(audio) + transcript = whisperx.assign_word_speakers(diarize_segments, transcript) + + # Build result segments + for idx, seg in enumerate(transcript["segments"]): + seg_text = seg.get("text", "") + start = int(seg.get("start", 0)) + end = int(seg.get("end", 0)) + speaker = seg.get("speaker", "") + + resultSegments.append(backend_pb2.TranscriptSegment( + id=idx, + start=start, + end=end, + text=seg_text, + speaker=speaker, + )) + text += seg_text + + except Exception as err: + print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr) + return backend_pb2.TranscriptResult(segments=[], text="") + + return backend_pb2.TranscriptResult(segments=resultSegments, text=text) + +def serve(address): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS), + options=[ + ('grpc.max_message_length', 50 * 1024 * 1024), # 50MB + ('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB + ('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB + ]) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("Server started. Listening on: " + address, file=sys.stderr) + + # Define the signal handler function + def signal_handler(sig, frame): + print("Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + # Set the signal handlers for SIGINT and SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + + serve(args.addr) diff --git a/backend/python/whisperx/install.sh b/backend/python/whisperx/install.sh new file mode 100755 index 000000000000..4136d8765589 --- /dev/null +++ b/backend/python/whisperx/install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +installRequirements diff --git a/backend/python/whisperx/protogen.sh b/backend/python/whisperx/protogen.sh new file mode 100755 index 000000000000..1ad37dee164b --- /dev/null +++ b/backend/python/whisperx/protogen.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto diff --git a/backend/python/whisperx/requirements-cpu.txt b/backend/python/whisperx/requirements-cpu.txt new file mode 100644 index 000000000000..9e9dd9f7daa8 --- /dev/null +++ b/backend/python/whisperx/requirements-cpu.txt @@ -0,0 +1,2 @@ +torch==2.4.1 +whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/backend/python/whisperx/requirements-cublas12.txt b/backend/python/whisperx/requirements-cublas12.txt new file mode 100644 index 000000000000..9e9dd9f7daa8 --- /dev/null +++ b/backend/python/whisperx/requirements-cublas12.txt @@ -0,0 +1,2 @@ +torch==2.4.1 +whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/backend/python/whisperx/requirements-cublas13.txt b/backend/python/whisperx/requirements-cublas13.txt new file mode 100644 index 000000000000..8a8507199571 --- /dev/null +++ b/backend/python/whisperx/requirements-cublas13.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu130 +torch==2.9.1 +whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/backend/python/whisperx/requirements-hipblas.txt b/backend/python/whisperx/requirements-hipblas.txt new file mode 100644 index 000000000000..9f7a1778d771 --- /dev/null +++ b/backend/python/whisperx/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch +whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/backend/python/whisperx/requirements.txt b/backend/python/whisperx/requirements.txt new file mode 100644 index 000000000000..44b40efd0b1b --- /dev/null +++ b/backend/python/whisperx/requirements.txt @@ -0,0 +1,3 @@ +grpcio==1.71.0 +protobuf +grpcio-tools diff --git a/backend/python/whisperx/run.sh b/backend/python/whisperx/run.sh new file mode 100755 index 000000000000..eae121f37b0b --- /dev/null +++ b/backend/python/whisperx/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +startBackend $@ diff --git a/backend/python/whisperx/test.py b/backend/python/whisperx/test.py new file mode 100644 index 000000000000..c2b4db8b5b5e --- /dev/null +++ b/backend/python/whisperx/test.py @@ -0,0 +1,124 @@ +""" +A test script to test the gRPC service for WhisperX transcription +""" +import unittest +import subprocess +import time +import os +import tempfile +import shutil +import backend_pb2 +import backend_pb2_grpc + +import grpc + + +class TestBackendServicer(unittest.TestCase): + """ + TestBackendServicer is the class that tests the gRPC service + """ + def setUp(self): + """ + This method sets up the gRPC service by starting the server + """ + self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) + time.sleep(10) + + def tearDown(self) -> None: + """ + This method tears down the gRPC service by terminating the server + """ + self.service.terminate() + self.service.wait() + + def test_server_startup(self): + """ + This method tests if the server starts up successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.Health(backend_pb2.HealthMessage()) + self.assertEqual(response.message, b'OK') + except Exception as err: + print(err) + self.fail("Server failed to start") + finally: + self.tearDown() + + def test_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_audio_transcription(self): + """ + This method tests if audio transcription works successfully + """ + # Create a temporary directory for the audio file + temp_dir = tempfile.mkdtemp() + audio_file = os.path.join(temp_dir, 'audio.wav') + + try: + # Download the audio file to the temporary directory + print(f"Downloading audio file to {audio_file}...") + url = "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav" + result = subprocess.run( + ["wget", "-q", url, "-O", audio_file], + capture_output=True, + text=True + ) + if result.returncode != 0: + self.fail(f"Failed to download audio file: {result.stderr}") + + # Verify the file was downloaded + if not os.path.exists(audio_file): + self.fail(f"Audio file was not downloaded to {audio_file}") + + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + # Load the model first + load_response = stub.LoadModel(backend_pb2.ModelOptions(Model="tiny")) + self.assertTrue(load_response.success) + + # Perform transcription without diarization + transcript_request = backend_pb2.TranscriptRequest(dst=audio_file) + transcript_response = stub.AudioTranscription(transcript_request) + + # Print the transcribed text for debugging + print(f"Transcribed text: {transcript_response.text}") + print(f"Number of segments: {len(transcript_response.segments)}") + + # Verify response structure + self.assertIsNotNone(transcript_response) + self.assertIsNotNone(transcript_response.text) + self.assertGreater(len(transcript_response.text), 0) + self.assertGreater(len(transcript_response.segments), 0) + + # Verify segments have timing info + segment = transcript_response.segments[0] + self.assertIsNotNone(segment.text) + self.assertIsInstance(segment.id, int) + + except Exception as err: + print(err) + self.fail("AudioTranscription service failed") + finally: + self.tearDown() + # Clean up the temporary directory + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) diff --git a/backend/python/whisperx/test.sh b/backend/python/whisperx/test.sh new file mode 100755 index 000000000000..eb59f2aaf3f3 --- /dev/null +++ b/backend/python/whisperx/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +backend_dir=$(dirname $0) +if [ -d $backend_dir/common ]; then + source $backend_dir/common/libbackend.sh +else + source $backend_dir/../common/libbackend.sh +fi + +runUnittests From a87c507030c8c922d22be724407c5ac9b55e33d4 Mon Sep 17 00:00:00 2001 From: eureka928 Date: Fri, 30 Jan 2026 10:10:10 +0100 Subject: [PATCH 03/12] feat(whisperx): register whisperx backend in Makefile Signed-off-by: eureka928 --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 33f75c939c73..2fc43331b79e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/qwen-asr backends/voxcpm backends/whisperx GOCMD=go GOTEST=$(GOCMD) test @@ -318,6 +318,7 @@ prepare-test-extra: protogen-python $(MAKE) -C backend/python/qwen-tts $(MAKE) -C backend/python/qwen-asr $(MAKE) -C backend/python/voxcpm + $(MAKE) -C backend/python/whisperx test-extra: prepare-test-extra $(MAKE) -C backend/python/transformers test @@ -331,6 +332,7 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/qwen-tts test $(MAKE) -C backend/python/qwen-asr test $(MAKE) -C backend/python/voxcpm test + $(MAKE) -C backend/python/whisperx test DOCKER_IMAGE?=local-ai DOCKER_AIO_IMAGE?=local-ai-aio @@ -465,6 +467,7 @@ BACKEND_POCKET_TTS = pocket-tts|python|.|false|true BACKEND_QWEN_TTS = qwen-tts|python|.|false|true BACKEND_QWEN_ASR = qwen-asr|python|.|false|true BACKEND_VOXCPM = voxcpm|python|.|false|true +BACKEND_WHISPERX = whisperx|python|.|false|true # Helper function to build docker image for a backend # Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG) @@ -512,12 +515,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_POCKET_TTS))) $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_TTS))) $(eval $(call generate-docker-build-target,$(BACKEND_QWEN_ASR))) $(eval $(call generate-docker-build-target,$(BACKEND_VOXCPM))) +$(eval $(call generate-docker-build-target,$(BACKEND_WHISPERX))) # Pattern rule for docker-save targets docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-qwen-asr docker-build-voxcpm docker-build-whisperx ######################################################## ### Mock Backend for E2E Tests From 8e51db3cab43404ec0727c28b117aa983f1eddc5 Mon Sep 17 00:00:00 2001 From: eureka928 Date: Fri, 30 Jan 2026 10:10:14 +0100 Subject: [PATCH 04/12] feat(whisperx): add whisperx meta and image entries to index.yaml Signed-off-by: eureka928 --- backend/index.yaml | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/backend/index.yaml b/backend/index.yaml index e666cb14ade9..c0d31457701c 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -302,6 +302,25 @@ default: "cpu-moonshine" nvidia-cuda-13: "cuda13-moonshine" nvidia-cuda-12: "cuda12-moonshine" +- &whisperx + description: | + WhisperX provides fast automatic speech recognition with word-level timestamps, speaker diarization, + and forced alignment. Built on faster-whisper and pyannote-audio for high-accuracy transcription + with speaker identification. + urls: + - https://github.com/m-bain/whisperX + tags: + - speech-to-text + - diarization + - whisperx + license: BSD-4-Clause + name: "whisperx" + capabilities: + nvidia: "cuda12-whisperx" + amd: "rocm-whisperx" + default: "cpu-whisperx" + nvidia-cuda-13: "cuda13-whisperx" + nvidia-cuda-12: "cuda12-whisperx" - &kokoro icon: https://avatars.githubusercontent.com/u/166769057?v=4 description: | @@ -1417,6 +1436,55 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-moonshine" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-13-moonshine +## whisperx +- !!merge <<: *whisperx + name: "whisperx-development" + capabilities: + nvidia: "cuda12-whisperx-development" + amd: "rocm-whisperx-development" + default: "cpu-whisperx-development" + nvidia-cuda-13: "cuda13-whisperx-development" + nvidia-cuda-12: "cuda12-whisperx-development" +- !!merge <<: *whisperx + name: "cpu-whisperx" + uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-whisperx" + mirrors: + - localai/localai-backends:latest-cpu-whisperx +- !!merge <<: *whisperx + name: "cpu-whisperx-development" + uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisperx" + mirrors: + - localai/localai-backends:master-cpu-whisperx +- !!merge <<: *whisperx + name: "cuda12-whisperx" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisperx" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-12-whisperx +- !!merge <<: *whisperx + name: "cuda12-whisperx-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisperx" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-12-whisperx +- !!merge <<: *whisperx + name: "rocm-whisperx" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-whisperx" + mirrors: + - localai/localai-backends:latest-gpu-rocm-hipblas-whisperx +- !!merge <<: *whisperx + name: "rocm-whisperx-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-whisperx" + mirrors: + - localai/localai-backends:master-gpu-rocm-hipblas-whisperx +- !!merge <<: *whisperx + name: "cuda13-whisperx" + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-whisperx" + mirrors: + - localai/localai-backends:latest-gpu-nvidia-cuda-13-whisperx +- !!merge <<: *whisperx + name: "cuda13-whisperx-development" + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-whisperx" + mirrors: + - localai/localai-backends:master-gpu-nvidia-cuda-13-whisperx ## coqui - !!merge <<: *coqui From 70a64fe9b8d350e3354c909c31be346a18678c6a Mon Sep 17 00:00:00 2001 From: eureka928 Date: Fri, 30 Jan 2026 10:10:18 +0100 Subject: [PATCH 05/12] ci(whisperx): add build matrix entries for CPU, CUDA 12/13, and ROCm Signed-off-by: eureka928 --- .github/workflows/backend.yml | 52 +++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index e0d330d1d9f8..22449a3b3752 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -91,6 +91,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: '' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-cpu-whisperx' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'true' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -266,6 +279,19 @@ jobs: cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-whisperx' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "9" + platforms: 'linux/amd64' + tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" @@ -574,6 +600,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "13" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-13-whisperx' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -836,6 +875,19 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' + - build-type: 'hipblas' + cuda-major-version: "" + cuda-minor-version: "" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-rocm-hipblas-whisperx' + runs-on: 'bigger-runner' + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + skip-drivers: 'false' + backend: "whisperx" + dockerfile: "./backend/Dockerfile.python" + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" From 2d42c30f87e41f272493ed989f5ac260ff13c9d5 Mon Sep 17 00:00:00 2001 From: eureka928 Date: Fri, 30 Jan 2026 21:35:08 +0100 Subject: [PATCH 06/12] fix(whisperx): unpin torch versions and use CPU index for cpu requirements Address review feedback: - Use --extra-index-url for CPU torch wheels to reduce size - Remove torch version pins, let uv resolve compatible versions Signed-off-by: eureka928 --- backend/python/whisperx/requirements-cpu.txt | 3 ++- backend/python/whisperx/requirements-cublas12.txt | 2 +- backend/python/whisperx/requirements-cublas13.txt | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/python/whisperx/requirements-cpu.txt b/backend/python/whisperx/requirements-cpu.txt index 9e9dd9f7daa8..e6c9e71c38a8 100644 --- a/backend/python/whisperx/requirements-cpu.txt +++ b/backend/python/whisperx/requirements-cpu.txt @@ -1,2 +1,3 @@ -torch==2.4.1 +--extra-index-url https://download.pytorch.org/whl/cpu +torch whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/backend/python/whisperx/requirements-cublas12.txt b/backend/python/whisperx/requirements-cublas12.txt index 9e9dd9f7daa8..5b6f2d82f869 100644 --- a/backend/python/whisperx/requirements-cublas12.txt +++ b/backend/python/whisperx/requirements-cublas12.txt @@ -1,2 +1,2 @@ -torch==2.4.1 +torch whisperx @ git+https://github.com/m-bain/whisperX.git diff --git a/backend/python/whisperx/requirements-cublas13.txt b/backend/python/whisperx/requirements-cublas13.txt index 8a8507199571..c0e918b87034 100644 --- a/backend/python/whisperx/requirements-cublas13.txt +++ b/backend/python/whisperx/requirements-cublas13.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/cu130 -torch==2.9.1 +torch whisperx @ git+https://github.com/m-bain/whisperX.git From 53c18e17cd794ea67b6b5bcfe975614a449230dc Mon Sep 17 00:00:00 2001 From: eureka928 Date: Sun, 1 Feb 2026 03:37:49 +0100 Subject: [PATCH 07/12] fix(whisperx): pin torch ROCm variant to fix CI build failure Signed-off-by: eureka928 --- backend/python/whisperx/requirements-hipblas.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/whisperx/requirements-hipblas.txt b/backend/python/whisperx/requirements-hipblas.txt index 9f7a1778d771..2de8a9ce5f39 100644 --- a/backend/python/whisperx/requirements-hipblas.txt +++ b/backend/python/whisperx/requirements-hipblas.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch +torch==2.8.0+rocm6.4 whisperx @ git+https://github.com/m-bain/whisperX.git From e91d83b171930e89e24070cd62b473db707ba5dc Mon Sep 17 00:00:00 2001 From: eureka928 Date: Mon, 2 Feb 2026 04:05:40 +0100 Subject: [PATCH 08/12] fix(whisperx): pin torch CPU variant to fix uv resolution failure Pin torch==2.8.0+cpu so uv resolves the CPU wheel from the extra index instead of picking torch==2.8.0+cu128 from PyPI, which pulls unresolvable CUDA dependencies. Signed-off-by: eureka928 --- backend/python/whisperx/requirements-cpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/whisperx/requirements-cpu.txt b/backend/python/whisperx/requirements-cpu.txt index e6c9e71c38a8..b11993a0b588 100644 --- a/backend/python/whisperx/requirements-cpu.txt +++ b/backend/python/whisperx/requirements-cpu.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/cpu -torch +torch==2.8.0+cpu whisperx @ git+https://github.com/m-bain/whisperX.git From ce82ba35c407fadb1075875071433b474c0d0797 Mon Sep 17 00:00:00 2001 From: eureka928 Date: Mon, 2 Feb 2026 09:05:14 +0100 Subject: [PATCH 09/12] fix(whisperx): use unsafe-best-match index strategy to fix uv resolution failure uv's default first-match strategy finds torch on PyPI before checking the extra index, causing it to pick torch==2.8.0+cu128 instead of the CPU variant. This makes whisperx's transitive torch dependency unresolvable. Using unsafe-best-match lets uv consider all indexes. Signed-off-by: eureka928 --- backend/python/common/libbackend.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index c923c12cf62f..423e167e14b6 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -436,7 +436,7 @@ function installRequirements() { if [ "x${USE_PIP}" == "xtrue" ]; then pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --requirement "${reqFile}" else - uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --requirement "${reqFile}" + uv pip install --index-strategy unsafe-best-match ${EXTRA_PIP_INSTALL_FLAGS:-} --requirement "${reqFile}" fi echo "finished requirements install for ${reqFile}" fi From 7f5d72e8c469685514262d62296b851421c47223 Mon Sep 17 00:00:00 2001 From: eureka928 Date: Mon, 2 Feb 2026 09:51:16 +0100 Subject: [PATCH 10/12] fix(whisperx): drop +cpu local version suffix to fix uv resolution failure PEP 440 ==2.8.0 matches 2.8.0+cpu from the extra index, avoiding the issue where uv cannot locate an explicit +cpu local version specifier. This aligns with the pattern used by all other CPU backends. Signed-off-by: eureka928 --- backend/python/whisperx/requirements-cpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/whisperx/requirements-cpu.txt b/backend/python/whisperx/requirements-cpu.txt index b11993a0b588..fd6861dc16d6 100644 --- a/backend/python/whisperx/requirements-cpu.txt +++ b/backend/python/whisperx/requirements-cpu.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.8.0+cpu +torch==2.8.0 whisperx @ git+https://github.com/m-bain/whisperX.git From 6366f4f24f3e820a80cfe04c6bfed0e9f4c3f806 Mon Sep 17 00:00:00 2001 From: eureka928 Date: Mon, 2 Feb 2026 15:07:24 +0100 Subject: [PATCH 11/12] fix(backends): drop +rocm local version suffixes from hipblas requirements to fix uv resolution uv cannot resolve PEP 440 local version specifiers (e.g. +rocm6.4, +rocm6.3) in pinned requirements. The --extra-index-url already points to the correct ROCm wheel index and --index-strategy unsafe-best-match (set in libbackend.sh) ensures the ROCm variant is preferred. Applies the same fix as 7f5d72e8 (which resolved this for +cpu) across all 14 hipblas requirements files. Signed-off-by: eureka928 Co-Authored-By: Claude Opus 4.5 Signed-off-by: eureka928 --- backend/python/chatterbox/requirements-hipblas.txt | 4 ++-- backend/python/coqui/requirements-hipblas.txt | 4 ++-- backend/python/diffusers/requirements-hipblas.txt | 4 ++-- backend/python/kokoro/requirements-hipblas.txt | 4 ++-- backend/python/neutts/requirements-hipblas.txt | 2 +- backend/python/pocket-tts/requirements-hipblas.txt | 2 +- backend/python/qwen-asr/requirements-hipblas.txt | 2 +- backend/python/qwen-tts/requirements-hipblas.txt | 4 ++-- backend/python/rerankers/requirements-hipblas.txt | 2 +- backend/python/rfdetr/requirements-hipblas.txt | 4 ++-- backend/python/transformers/requirements-hipblas.txt | 2 +- backend/python/vibevoice/requirements-hipblas.txt | 4 ++-- backend/python/voxcpm/requirements-hipblas.txt | 2 +- backend/python/whisperx/requirements-hipblas.txt | 2 +- 14 files changed, 21 insertions(+), 21 deletions(-) diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt index ed30fb824107..9bf7792d85a9 100644 --- a/backend/python/chatterbox/requirements-hipblas.txt +++ b/backend/python/chatterbox/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.9.1+rocm6.4 -torchaudio==2.9.1+rocm6.4 +torch==2.9.1 +torchaudio==2.9.1 transformers numpy>=1.24.0,<1.26.0 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index 8e7d034591e3..2544e7f3ed46 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchaudio==2.8.0+rocm6.4 +torch==2.8.0 +torchaudio==2.8.0 transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt index b1f8b3e048c5..39e6ebb52832 100644 --- a/backend/python/diffusers/requirements-hipblas.txt +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchvision==0.23.0+rocm6.4 +torch==2.8.0 +torchvision==0.23.0 git+https://github.com/huggingface/diffusers opencv-python transformers diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt index 74262df5c3ce..9392e411e1e3 100644 --- a/backend/python/kokoro/requirements-hipblas.txt +++ b/backend/python/kokoro/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchaudio==2.8.0+rocm6.4 +torch==2.8.0 +torchaudio==2.8.0 transformers accelerate kokoro diff --git a/backend/python/neutts/requirements-hipblas.txt b/backend/python/neutts/requirements-hipblas.txt index 72d11e059817..2d214f8efb65 100644 --- a/backend/python/neutts/requirements-hipblas.txt +++ b/backend/python/neutts/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 +torch==2.8.0 transformers==4.56.1 accelerate librosa==0.11.0 diff --git a/backend/python/pocket-tts/requirements-hipblas.txt b/backend/python/pocket-tts/requirements-hipblas.txt index b6f9d2fb6a0a..cccb133ca7e8 100644 --- a/backend/python/pocket-tts/requirements-hipblas.txt +++ b/backend/python/pocket-tts/requirements-hipblas.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 pocket-tts scipy -torch==2.7.1+rocm6.3 +torch==2.7.1 diff --git a/backend/python/qwen-asr/requirements-hipblas.txt b/backend/python/qwen-asr/requirements-hipblas.txt index 6871f93f585c..e07c3be46f36 100644 --- a/backend/python/qwen-asr/requirements-hipblas.txt +++ b/backend/python/qwen-asr/requirements-hipblas.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 +torch==2.7.1 qwen-asr diff --git a/backend/python/qwen-tts/requirements-hipblas.txt b/backend/python/qwen-tts/requirements-hipblas.txt index d8a3e3616d40..f39b532af243 100644 --- a/backend/python/qwen-tts/requirements-hipblas.txt +++ b/backend/python/qwen-tts/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchaudio==2.7.1+rocm6.3 +torch==2.7.1 +torchaudio==2.7.1 qwen-tts sox \ No newline at end of file diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt index 7a72b3d0650f..1701ffe29ed2 100644 --- a/backend/python/rerankers/requirements-hipblas.txt +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 transformers accelerate -torch==2.8.0+rocm6.4 +torch==2.8.0 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-hipblas.txt b/backend/python/rfdetr/requirements-hipblas.txt index 884cfba7be46..2f10779cf58a 100644 --- a/backend/python/rfdetr/requirements-hipblas.txt +++ b/backend/python/rfdetr/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 -torchvision==0.23.0+rocm6.4 +torch==2.8.0 +torchvision==0.23.0 rfdetr opencv-python accelerate diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index 0576c6acf108..f4c37af0bedf 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 +torch==2.8.0 accelerate transformers llvmlite==0.43.0 diff --git a/backend/python/vibevoice/requirements-hipblas.txt b/backend/python/vibevoice/requirements-hipblas.txt index 931dd1e0a274..23c85d4eb069 100644 --- a/backend/python/vibevoice/requirements-hipblas.txt +++ b/backend/python/vibevoice/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchvision==0.22.1+rocm6.3 +torch==2.7.1 +torchvision==0.22.1 git+https://github.com/huggingface/diffusers opencv-python transformers>=4.51.3,<5.0.0 diff --git a/backend/python/voxcpm/requirements-hipblas.txt b/backend/python/voxcpm/requirements-hipblas.txt index 7541c8149db8..2ead77e1ebd6 100644 --- a/backend/python/voxcpm/requirements-hipblas.txt +++ b/backend/python/voxcpm/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 +torch==2.7.1 soundfile numpy voxcpm diff --git a/backend/python/whisperx/requirements-hipblas.txt b/backend/python/whisperx/requirements-hipblas.txt index 2de8a9ce5f39..1600400264e6 100644 --- a/backend/python/whisperx/requirements-hipblas.txt +++ b/backend/python/whisperx/requirements-hipblas.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0+rocm6.4 +torch==2.8.0 whisperx @ git+https://github.com/m-bain/whisperX.git From d6c7cf71e731d123a47983aad59ef1e2ab97c20d Mon Sep 17 00:00:00 2001 From: eureka928 Date: Mon, 2 Feb 2026 15:34:08 +0100 Subject: [PATCH 12/12] revert: scope hipblas suffix fix to whisperx only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts changes to non-whisperx hipblas requirements files per maintainer review — other backends are building fine with the +rocm local version suffix. Signed-off-by: eureka928 Co-Authored-By: Claude Opus 4.5 Signed-off-by: eureka928 --- backend/python/chatterbox/requirements-hipblas.txt | 4 ++-- backend/python/coqui/requirements-hipblas.txt | 4 ++-- backend/python/diffusers/requirements-hipblas.txt | 4 ++-- backend/python/kokoro/requirements-hipblas.txt | 4 ++-- backend/python/neutts/requirements-hipblas.txt | 2 +- backend/python/pocket-tts/requirements-hipblas.txt | 2 +- backend/python/qwen-asr/requirements-hipblas.txt | 2 +- backend/python/qwen-tts/requirements-hipblas.txt | 4 ++-- backend/python/rerankers/requirements-hipblas.txt | 2 +- backend/python/rfdetr/requirements-hipblas.txt | 4 ++-- backend/python/transformers/requirements-hipblas.txt | 2 +- backend/python/vibevoice/requirements-hipblas.txt | 4 ++-- backend/python/voxcpm/requirements-hipblas.txt | 2 +- 13 files changed, 20 insertions(+), 20 deletions(-) diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt index 9bf7792d85a9..ed30fb824107 100644 --- a/backend/python/chatterbox/requirements-hipblas.txt +++ b/backend/python/chatterbox/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.9.1 -torchaudio==2.9.1 +torch==2.9.1+rocm6.4 +torchaudio==2.9.1+rocm6.4 transformers numpy>=1.24.0,<1.26.0 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index 2544e7f3ed46..8e7d034591e3 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0 -torchaudio==2.8.0 +torch==2.8.0+rocm6.4 +torchaudio==2.8.0+rocm6.4 transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt index 39e6ebb52832..b1f8b3e048c5 100644 --- a/backend/python/diffusers/requirements-hipblas.txt +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0 -torchvision==0.23.0 +torch==2.8.0+rocm6.4 +torchvision==0.23.0+rocm6.4 git+https://github.com/huggingface/diffusers opencv-python transformers diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt index 9392e411e1e3..74262df5c3ce 100644 --- a/backend/python/kokoro/requirements-hipblas.txt +++ b/backend/python/kokoro/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0 -torchaudio==2.8.0 +torch==2.8.0+rocm6.4 +torchaudio==2.8.0+rocm6.4 transformers accelerate kokoro diff --git a/backend/python/neutts/requirements-hipblas.txt b/backend/python/neutts/requirements-hipblas.txt index 2d214f8efb65..72d11e059817 100644 --- a/backend/python/neutts/requirements-hipblas.txt +++ b/backend/python/neutts/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0 +torch==2.8.0+rocm6.4 transformers==4.56.1 accelerate librosa==0.11.0 diff --git a/backend/python/pocket-tts/requirements-hipblas.txt b/backend/python/pocket-tts/requirements-hipblas.txt index cccb133ca7e8..b6f9d2fb6a0a 100644 --- a/backend/python/pocket-tts/requirements-hipblas.txt +++ b/backend/python/pocket-tts/requirements-hipblas.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 pocket-tts scipy -torch==2.7.1 +torch==2.7.1+rocm6.3 diff --git a/backend/python/qwen-asr/requirements-hipblas.txt b/backend/python/qwen-asr/requirements-hipblas.txt index e07c3be46f36..6871f93f585c 100644 --- a/backend/python/qwen-asr/requirements-hipblas.txt +++ b/backend/python/qwen-asr/requirements-hipblas.txt @@ -1,3 +1,3 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1 +torch==2.7.1+rocm6.3 qwen-asr diff --git a/backend/python/qwen-tts/requirements-hipblas.txt b/backend/python/qwen-tts/requirements-hipblas.txt index f39b532af243..d8a3e3616d40 100644 --- a/backend/python/qwen-tts/requirements-hipblas.txt +++ b/backend/python/qwen-tts/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1 -torchaudio==2.7.1 +torch==2.7.1+rocm6.3 +torchaudio==2.7.1+rocm6.3 qwen-tts sox \ No newline at end of file diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt index 1701ffe29ed2..7a72b3d0650f 100644 --- a/backend/python/rerankers/requirements-hipblas.txt +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 transformers accelerate -torch==2.8.0 +torch==2.8.0+rocm6.4 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-hipblas.txt b/backend/python/rfdetr/requirements-hipblas.txt index 2f10779cf58a..884cfba7be46 100644 --- a/backend/python/rfdetr/requirements-hipblas.txt +++ b/backend/python/rfdetr/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0 -torchvision==0.23.0 +torch==2.8.0+rocm6.4 +torchvision==0.23.0+rocm6.4 rfdetr opencv-python accelerate diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index f4c37af0bedf..0576c6acf108 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.4 -torch==2.8.0 +torch==2.8.0+rocm6.4 accelerate transformers llvmlite==0.43.0 diff --git a/backend/python/vibevoice/requirements-hipblas.txt b/backend/python/vibevoice/requirements-hipblas.txt index 23c85d4eb069..931dd1e0a274 100644 --- a/backend/python/vibevoice/requirements-hipblas.txt +++ b/backend/python/vibevoice/requirements-hipblas.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1 -torchvision==0.22.1 +torch==2.7.1+rocm6.3 +torchvision==0.22.1+rocm6.3 git+https://github.com/huggingface/diffusers opencv-python transformers>=4.51.3,<5.0.0 diff --git a/backend/python/voxcpm/requirements-hipblas.txt b/backend/python/voxcpm/requirements-hipblas.txt index 2ead77e1ebd6..7541c8149db8 100644 --- a/backend/python/voxcpm/requirements-hipblas.txt +++ b/backend/python/voxcpm/requirements-hipblas.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1 +torch==2.7.1+rocm6.3 soundfile numpy voxcpm