feat(scripts): add MLX-LM server script for fast Apple Silicon inference

basnijholt · basnijholt · commit c48b13a260bf · 2025-12-04T21:39:57.000-08:00
- Add run-mlx-lm.sh script that starts an OpenAI-compatible MLX LLM server
- Default model: mlx-community/Qwen3-4B-4bit (configurable via MLX_MODEL env var)
- Runs on port 8080 (configurable via MLX_PORT env var)
- Only works on Apple Silicon (M1/M2/M3/M4)
- Add MLX-LLM pane to start-all-services.sh Zellij layout
- Update README with MLX-LM documentation
diff --git a/README.md b/README.md
@@ -298,6 +298,7 @@ Our installation scripts automatically handle all dependencies:
 | Service | Purpose | Auto-installed? |
 |---------|---------|-----------------|
 | **[Ollama](https://ollama.ai/)** | Local LLM for text processing | ✅ Yes, with default model |
+| **[MLX-LM](https://github.com/ml-explore/mlx-lm)** | Fast LLM on Apple Silicon | ⚙️ Optional, via `uvx` |
 | **[Wyoming Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper)** | Speech-to-text | ✅ Yes, via `uvx` |
 | **[Wyoming Piper](https://github.com/rhasspy/wyoming-piper)** | Text-to-speech | ✅ Yes, via `uvx` |
 | **[Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI)** | Premium TTS (optional) | ⚙️ Can be added later |
@@ -318,10 +319,13 @@ You can also use other OpenAI-compatible local servers:
 
 | Server | Purpose | Setup Required |
 |---------|---------|----------------|
+| **[MLX-LM](https://github.com/ml-explore/mlx-lm)** | Fast LLM inference on Apple Silicon | `./scripts/run-mlx-lm.sh` or use `--openai-base-url http://localhost:10500/v1` |
 | **llama.cpp** | Local LLM inference | Use `--openai-base-url http://localhost:8080/v1` |
 | **vLLM** | High-performance LLM serving | Use `--openai-base-url` with server endpoint |
 | **Ollama** | Default local LLM | Already configured as default |
 
+> **Apple Silicon Users**: MLX-LM provides significantly faster inference than Ollama on M1/M2/M3/M4 Macs. Start it with `./scripts/run-mlx-lm.sh` and use `--llm-provider openai --openai-base-url http://localhost:10500/v1` to connect.
+
 ## Usage
 
 This package provides multiple command-line tools, each designed for a specific purpose.
diff --git a/scripts/run-mlx-lm.sh b/scripts/run-mlx-lm.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+echo "🧠 Starting MLX LLM Server on port 8080..."
+
+# Check if running on macOS with Apple Silicon
+if [[ "$(uname)" != "Darwin" ]]; then
+    echo "❌ MLX only works on macOS with Apple Silicon."
+    exit 1
+fi
+
+if [[ "$(uname -m)" != "arm64" ]]; then
+    echo "❌ MLX requires Apple Silicon (M1/M2/M3/M4). Intel Macs are not supported."
+    exit 1
+fi
+
+# Default model - can be overridden with MLX_MODEL environment variable
+# Popular options:
+#   - mlx-community/Qwen3-4B-4bit (fast, high quality, default)
+#   - mlx-community/Qwen3-8B-4bit (larger, even better quality)
+#   - mlx-community/gpt-oss-20b-MXFP4-Q8 (20B parameter, high quality)
+MODEL="${MLX_MODEL:-mlx-community/Qwen3-4B-4bit}"
+PORT="${MLX_PORT:-10500}"
+
+echo "📦 Model: $MODEL"
+echo "🔌 Port: $PORT"
+echo ""
+echo "Usage with agent-cli:"
+echo "  agent-cli transcribe --llm --llm-provider openai --openai-base-url http://localhost:$PORT/v1 --llm-openai-model $MODEL"
+echo "  agent-cli autocorrect --llm-provider openai --openai-base-url http://localhost:$PORT/v1 --llm-openai-model $MODEL"
+echo ""
+echo "To make MLX the default, add to ~/.config/agent-cli/config.toml:"
+echo "  [defaults]"
+echo "  llm_provider = \"openai\""
+echo "  openai_base_url = \"http://localhost:$PORT/v1\""
+echo "  llm_openai_model = \"$MODEL\""
+echo ""
+
+# Run mlx-lm server using uvx
+# --host 0.0.0.0 allows connections from other machines/tools
+uvx --python 3.12 \
+    --from "mlx-lm" \
+    mlx_lm.server \
+    --model "$MODEL" \
+    --host 0.0.0.0 \
+    --port "$PORT"
diff --git a/scripts/start-all-services.sh b/scripts/start-all-services.sh
@@ -10,6 +10,21 @@ fi
 # Get the current directory
 SCRIPTS_DIR="$(cd "$(dirname "$0")" && pwd)"
 
+# Determine LLM pane based on platform
+# Use MLX-LLM on macOS ARM (Apple Silicon), Ollama otherwise
+if [[ "$(uname)" == "Darwin" && "$(uname -m)" == "arm64" ]]; then
+    LLM_PANE='            pane {
+                name "MLX-LLM"
+                cwd "'"$SCRIPTS_DIR"'"
+                command "./run-mlx-lm.sh"
+            }'
+else
+    LLM_PANE='            pane {
+                name "Ollama"
+                command "ollama"
+                args "serve"
+            }'
+fi
 
 # Create .runtime directory and Zellij layout file
 mkdir -p "$SCRIPTS_DIR/.runtime"
@@ -19,11 +34,7 @@ session_name "agent-cli"
 layout {
     pane split_direction="vertical" {
         pane split_direction="horizontal" {
-            pane {
-                name "Ollama"
-                command "ollama"
-                args "serve"
-            }
+$LLM_PANE
             pane {
                 name "Help"
                 command "sh"