From f51adf90c84d2dd48ca3c128bb3c0d7e6662aad8 Mon Sep 17 00:00:00 2001 From: tfwang Date: Thu, 15 Jan 2026 17:24:14 +0800 Subject: [PATCH 1/6] add Llama Stack quickstart guide and notebook demo --- ...w_to_Create_an_AI_Agent_with_LlamaStack.md | 44 ++ .../llama-stack/llama_stack_config.yaml | 60 +++ .../llama-stack/llama_stack_quickstart.ipynb | 483 ++++++++++++++++++ 3 files changed, 587 insertions(+) create mode 100644 docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md create mode 100644 docs/public/llama-stack/llama_stack_config.yaml create mode 100644 docs/public/llama-stack/llama_stack_quickstart.ipynb diff --git a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md new file mode 100644 index 0000000..ac5d674 --- /dev/null +++ b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md @@ -0,0 +1,44 @@ +--- +products: + - Alauda AI +kind: + - Solution +ProductsVersion: + - 4.x +--- + +# How To Create AI Agent with Llama Stack + +## Overview + +Llama Stack is a framework for building and running AI agents with tools. It provides a server-based architecture that enables developers to create agents that can interact with users, access external tools, and perform complex reasoning tasks. This guide provides a quickstart example for creating an AI Agent using Llama Stack. + +## Prerequisites + +- Llama Stack Server installed and running (see notebook for installation and startup instructions) + - For deploying Llama Stack Server on Kubernetes, refer to the [Kubernetes Deployment Guide](https://llamastack.github.io/docs/deploying/kubernetes_deployment) +- Access to a Notebook environment (e.g., Jupyter Notebook, JupyterLab, or similar) +- Python environment with `llama-stack-client` and required dependencies installed +- API key for the LLM provider (e.g., DeepSeek API key) + +## Quickstart + +A simple example of creating an AI Agent with Llama Stack is available here: [llama_stack_quickstart.ipynb](/llama-stack/llama_stack_quickstart.ipynb). The configuration file [llama_stack_config.yaml](/llama-stack/llama_stack_config.yaml) is also required. Download both files and upload them to a Notebook environment to run. + +The notebook demonstrates: +- Llama Stack Server installation and configuration +- Server startup and connection setup +- Tool definition using the `@client_tool` decorator (weather query tool example) +- Client connection to Llama Stack Server +- Model selection and Agent creation with tools and instructions +- Agent execution with session management and streaming responses +- Result handling and display + +## Additional Resources + +For more resources on developing AI Agents with Llama Stack, see: + +- [Llama Stack Documentation](https://llamastack.github.io/docs) - The official Llama Stack documentation covering all usage-related topics, API providers, and core concepts. +- [Llama Stack Core Concepts](https://llamastack.github.io/docs/concepts) - Deep dive into Llama Stack architecture, API stability, and resource management. +- [Llama Stack GitHub Repository](https://github.com/llamastack/llama-stack) - Source code, example applications, distribution configurations, and how to add new API providers. +- [Llama Stack Example Apps](https://github.com/llamastack/llama-stack-apps/) - Official examples demonstrating how to use Llama Stack in various scenarios. \ No newline at end of file diff --git a/docs/public/llama-stack/llama_stack_config.yaml b/docs/public/llama-stack/llama_stack_config.yaml new file mode 100644 index 0000000..c21d0fc --- /dev/null +++ b/docs/public/llama-stack/llama_stack_config.yaml @@ -0,0 +1,60 @@ +version: "2" +image_name: llama-stack-demo +apis: + - inference + - agents + - safety + - tool_runtime + - vector_io + - files + +providers: + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.API_KEY} + base_url: https://api.deepseek.com/v1 + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + backend: kv_default + namespace: agents + responses: + backend: sql_default + table_name: responses + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + tool_runtime: [] + vector_io: + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama-stack-demo}/sqlite_vec.db + persistence: + backend: kv_default + namespace: vector_io::sqlite_vec + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama-stack-demo}/files + metadata_store: + backend: sql_default + table_name: files_metadata + +metadata_store: + type: sqlite + +models: + - metadata: {} + model_id: deepseek/deepseek-chat + provider_id: openai + provider_model_id: deepseek-chat + model_type: llm \ No newline at end of file diff --git a/docs/public/llama-stack/llama_stack_quickstart.ipynb b/docs/public/llama-stack/llama_stack_quickstart.ipynb new file mode 100644 index 0000000..04dde08 --- /dev/null +++ b/docs/public/llama-stack/llama_stack_quickstart.ipynb @@ -0,0 +1,483 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llama Stack Quick Start Demo\n", + "\n", + "This notebook demonstrates how to use Llama Stack to run an agent with tools." + ] + }, + { + "cell_type": "markdown", + "id": "937f1aab", + "metadata": {}, + "source": [ + "## 1. Prepare Llama Stack Server (Prerequisites)\n", + "\n", + "Before running this notebook, you need to deploy and start the Llama Stack Server." + ] + }, + { + "cell_type": "markdown", + "id": "07f59470", + "metadata": {}, + "source": [ + "### Install Llama Stack and Dependencies\n", + "\n", + "If you haven't installed the Llama Stack yet, install it along with the required provider packages:\n", + "\n", + "```bash\n", + "pip install llama-stack sqlite_vec\n", + "```\n", + "\n", + "The `llama-stack` package will automatically install its core dependencies. Since the configuration file uses the `sqlite-vec` provider for vector storage, you also need to install the `sqlite_vec` package." + ] + }, + { + "cell_type": "markdown", + "id": "6dc839f0", + "metadata": {}, + "source": [ + "### Start the Server\n", + "\n", + "Set the required environment variable for the API key (used by the DeepSeek provider in the config):\n", + "\n", + "```bash\n", + "export API_KEY=your-deepseek-api-key\n", + "```\n", + "\n", + "**Note:** Replace `your-deepseek-api-key` with your actual DeepSeek API key.\n", + "\n", + "Run the following command in your terminal to start the server:\n", + "\n", + "```bash\n", + "llama stack run llama_stack_config.yaml --port 8321\n", + "```\n", + "\n", + "**Note:** The server must be running before you can connect to it from this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install \"llama-stack-client\" \"requests\" \"fastapi\" \"uvicorn\" --target ~/packages" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "\n", + "user_site_packages = Path.home() / \"packages\"\n", + "if str(user_site_packages) not in sys.path:\n", + " sys.path.insert(0, str(user_site_packages))\n", + "\n", + "import os\n", + "import requests\n", + "from typing import Dict, Any\n", + "from llama_stack_client import LlamaStackClient, Agent\n", + "from llama_stack_client.lib.agents.client_tool import client_tool\n", + "from llama_stack_client.lib.agents.event_logger import AgentEventLogger\n", + "\n", + "print('Libraries imported successfully')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Define a Tool\n", + "\n", + "Use the @client_tool decorator to define a weather query tool." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@client_tool\n", + "def get_weather(city: str) -> Dict[str, Any]:\n", + " \"\"\"Get current weather information for a specified city.\n", + "\n", + " Uses the wttr.in free weather API to fetch weather data.\n", + "\n", + " :param city: City name, e.g., Beijing, Tokyo, New York, Paris\n", + " :returns: Dictionary containing weather information including city, temperature, description, humidity, and wind speed\n", + " \"\"\"\n", + " try:\n", + " url = f'https://wttr.in/{city}?format=j1'\n", + " response = requests.get(url, timeout=10)\n", + " response.raise_for_status()\n", + " data = response.json()\n", + "\n", + " current = data['current_condition'][0]\n", + " return {\n", + " 'city': city,\n", + " 'temperature': f\"{current['temp_C']}°C\",\n", + " 'humidity': f\"{current['humidity']}%\",\n", + " }\n", + " except Exception as e:\n", + " return {'error': f'Failed to get weather information: {str(e)}'}\n", + "\n", + "print('Weather tool defined successfully')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Connect to Server and Create Agent\n", + "\n", + "Use LlamaStackClient to connect to the running server, create an Agent, and execute tool calls." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_url = os.getenv('LLAMA_STACK_URL', 'http://localhost:8321')\n", + "print(f'Connecting to Server: {base_url}')\n", + "\n", + "client = LlamaStackClient(base_url=base_url)\n", + "\n", + "# Get available models\n", + "print('Getting available models...')\n", + "try:\n", + " models = client.models.list()\n", + " if not models:\n", + " raise Exception('No models found')\n", + "\n", + " print(f'Found {len(models)} available models:')\n", + " for model in models[:5]: # Show only first 5\n", + " model_type = model.custom_metadata.get('model_type', 'unknown') if model.custom_metadata else 'unknown'\n", + " print(f' - {model.id} ({model_type})')\n", + "\n", + " # Select first LLM model\n", + " llm_model = next(\n", + " (m for m in models\n", + " if m.custom_metadata and m.custom_metadata.get('model_type') == 'llm'),\n", + " None\n", + " )\n", + " if not llm_model:\n", + " raise Exception('No LLM model found')\n", + "\n", + " model_id = llm_model.id\n", + " print(f'Using model: {model_id}\\n')\n", + "\n", + "except Exception as e:\n", + " print(f'Failed to get model list: {e}')\n", + " print('Make sure the server is running')\n", + "\n", + "\n", + "# Create Agent\n", + "print('Creating Agent...')\n", + "agent = Agent(\n", + " client,\n", + " model=model_id,\n", + " instructions='You are a helpful weather assistant. When users ask about weather, use the get_weather tool to query weather information, then answer based on the query results.',\n", + " tools=[get_weather],\n", + ")\n", + "\n", + "print('Agent created successfully')" + ] + }, + { + "cell_type": "markdown", + "id": "90c28b81", + "metadata": {}, + "source": [ + "## 6. Run the Agent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70e8d661", + "metadata": {}, + "outputs": [], + "source": [ + "# Create session\n", + "session_id = agent.create_session('weather-agent-session')\n", + "print(f'✓ Session created: {session_id}\\n')\n", + "\n", + "# First query\n", + "print('=' * 60)\n", + "print('User> What is the weather like in Beijing today?')\n", + "print('-' * 60)\n", + "\n", + "response_stream = agent.create_turn(\n", + " messages=[{'role': 'user', 'content': 'What is the weather like in Beijing today?'}],\n", + " session_id=session_id,\n", + " stream=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ca2f26f2", + "metadata": {}, + "source": [ + "### Display the Result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4728a638", + "metadata": {}, + "outputs": [], + "source": [ + "logger = AgentEventLogger()\n", + "for printable in logger.log(response_stream):\n", + " print(printable, end='', flush=True)\n", + "print('\\n')" + ] + }, + { + "cell_type": "markdown", + "id": "728530b0", + "metadata": {}, + "source": [ + "### Try Different Queries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed8cc5a0", + "metadata": {}, + "outputs": [], + "source": [ + "# Second query\n", + "print('=' * 60)\n", + "print('User> What is the weather in Shanghai?')\n", + "print('-' * 60)\n", + "\n", + "response_stream = agent.create_turn(\n", + " messages=[{'role': 'user', 'content': 'What is the weather in Shanghai?'}],\n", + " session_id=session_id,\n", + " stream=True,\n", + ")\n", + "\n", + "logger = AgentEventLogger()\n", + "for printable in logger.log(response_stream):\n", + " print(printable, end='', flush=True)\n", + "print('\\n')" + ] + }, + { + "cell_type": "markdown", + "id": "6f8d31d0", + "metadata": {}, + "source": [ + "## 7. FastAPI Service Example\n", + "\n", + "You can also run the agent as a FastAPI web service for production use. This allows you to expose the agent functionality via HTTP API endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5d732e4", + "metadata": {}, + "outputs": [], + "source": [ + "# Import FastAPI components\n", + "from fastapi import FastAPI\n", + "from pydantic import BaseModel\n", + "from threading import Thread\n", + "import time\n", + "\n", + "# Create a simple FastAPI app\n", + "api_app = FastAPI(title=\"Llama Stack Agent API\")\n", + "\n", + "class ChatRequest(BaseModel):\n", + " message: str\n", + "\n", + "\n", + "@api_app.post(\"/chat\")\n", + "async def chat(request: ChatRequest):\n", + " \"\"\"Chat endpoint that uses the Llama Stack Agent\"\"\"\n", + " session_id = agent.create_session('fastapi-weather-session')\n", + "\n", + " # Create turn and collect response\n", + " response_stream = agent.create_turn(\n", + " messages=[{'role': 'user', 'content': request.message}],\n", + " session_id=session_id,\n", + " stream=True,\n", + " )\n", + "\n", + " # Collect the full response\n", + " full_response = \"\"\n", + " logger = AgentEventLogger()\n", + " for printable in logger.log(response_stream):\n", + " full_response += printable\n", + "\n", + " return {\"response\": full_response}\n", + "\n", + "print(\"FastAPI app created. Use the next cell to start the server.\")" + ] + }, + { + "cell_type": "markdown", + "id": "475997ba", + "metadata": {}, + "source": [ + "### Start the FastAPI Server\n", + "\n", + "**Note**: In a notebook, you can start the server in a background thread. For production, run it as a separate process using `uvicorn`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f5db723", + "metadata": {}, + "outputs": [], + "source": [ + "# Start server in background thread (for notebook demonstration)\n", + "from uvicorn import Config, Server\n", + "\n", + "# Create a server instance that can be controlled\n", + "config = Config(api_app, host=\"127.0.0.1\", port=8000, log_level=\"info\")\n", + "server = Server(config)\n", + "\n", + "def run_server():\n", + " server.run()\n", + "\n", + "# Use daemon=True so the thread stops automatically when the kernel restarts\n", + "# This is safe for notebook demonstrations\n", + "# For production, use process managers instead of threads\n", + "server_thread = Thread(target=run_server, daemon=True)\n", + "server_thread.start()\n", + "\n", + "# Wait a moment for the server to start\n", + "time.sleep(2)\n", + "print(\"✓ FastAPI server started at http://127.0.0.1:8000\")" + ] + }, + { + "cell_type": "markdown", + "id": "715b2d47", + "metadata": {}, + "source": [ + "### Test the API\n", + "\n", + "Now you can call the API using HTTP requests:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "407b82af", + "metadata": {}, + "outputs": [], + "source": [ + "# Test the API endpoint\n", + "response = requests.post(\n", + " \"http://127.0.0.1:8000/chat\",\n", + " json={\"message\": \"What's the weather in Shanghai?\"},\n", + " timeout=60\n", + ")\n", + "\n", + "print(f\"Status Code: {response.status_code}\")\n", + "print(\"Response:\")\n", + "print(response.json().get('response'))" + ] + }, + { + "cell_type": "markdown", + "id": "945a776f", + "metadata": {}, + "source": [ + "### Stop the Server\n", + "\n", + "You can stop the server by calling its shutdown method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7795bba", + "metadata": {}, + "outputs": [], + "source": [ + "# Stop the server\n", + "if 'server' in globals() and server.started:\n", + " server.should_exit = True\n", + " print(\"✓ Server shutdown requested. It will stop after handling current requests.\")\n", + " print(\" Note: The server will also stop automatically when you restart the kernel.\")\n", + "else:\n", + " print(\"Server is not running or has already stopped.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. More Resources\n", + "\n", + "For more resources on developing AI Agents with Llama Stack, see:\n", + "\n", + "### Official Documentation\n", + "- [Llama Stack Documentation](https://llamastack.github.io/docs) - The official Llama Stack documentation covering all usage-related topics, API providers, and core concepts.\n", + "- [Llama Stack Core Concepts](https://llamastack.github.io/docs/concepts) - Deep dive into Llama Stack architecture, API stability, and resource management.\n", + "\n", + "### Code Examples and Projects\n", + "- [Llama Stack GitHub Repository](https://github.com/llamastack/llama-stack) - Source code, example applications, distribution configurations, and how to add new API providers.\n", + "- [Llama Stack Example Apps](https://github.com/llamastack/llama-stack-apps/) - Official examples demonstrating how to use Llama Stack in various scenarios.\n", + "\n", + "### Community and Support\n", + "- [Llama Stack GitHub Issues](https://github.com/llamastack/llama-stack/issues) - Report bugs, ask questions, and contribute to the project.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (langchain-demo)", + "language": "python", + "name": "langchain-demo" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 7f4289647c547f5e5f59266dc18b91fe932759ed Mon Sep 17 00:00:00 2001 From: tfwang Date: Thu, 15 Jan 2026 17:45:47 +0800 Subject: [PATCH 2/6] update --- docs/public/llama-stack/llama_stack_quickstart.ipynb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/public/llama-stack/llama_stack_quickstart.ipynb b/docs/public/llama-stack/llama_stack_quickstart.ipynb index 04dde08..4a33f7c 100644 --- a/docs/public/llama-stack/llama_stack_quickstart.ipynb +++ b/docs/public/llama-stack/llama_stack_quickstart.ipynb @@ -127,7 +127,7 @@ " Uses the wttr.in free weather API to fetch weather data.\n", "\n", " :param city: City name, e.g., Beijing, Tokyo, New York, Paris\n", - " :returns: Dictionary containing weather information including city, temperature, description, humidity, and wind speed\n", + " :returns: Dictionary containing weather information including city, temperature, description and humidity\n", " \"\"\"\n", " try:\n", " url = f'https://wttr.in/{city}?format=j1'\n", @@ -194,6 +194,7 @@ "except Exception as e:\n", " print(f'Failed to get model list: {e}')\n", " print('Make sure the server is running')\n", + " raise e\n", "\n", "\n", "# Create Agent\n", @@ -461,9 +462,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python (langchain-demo)", + "display_name": "Python (llama-stack-demo)", "language": "python", - "name": "langchain-demo" + "name": "llama-stack-demo" }, "language_info": { "codemirror_mode": { From a9b6173cb685db58e09655ab7316cb7b4306a1d2 Mon Sep 17 00:00:00 2001 From: tfwang Date: Mon, 26 Jan 2026 17:59:44 +0800 Subject: [PATCH 3/6] update --- ...w_to_Create_an_AI_Agent_with_LlamaStack.md | 94 ++++++++++++++++++- .../llama-stack/llama_stack_quickstart.ipynb | 13 ++- 2 files changed, 102 insertions(+), 5 deletions(-) diff --git a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md index ac5d674..a7ea1b0 100644 --- a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md +++ b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md @@ -15,12 +15,78 @@ Llama Stack is a framework for building and running AI agents with tools. It pro ## Prerequisites +- Python 3.12 or higher version environment (if not satisfied, refer to FAQ section for Python installation instructions) - Llama Stack Server installed and running (see notebook for installation and startup instructions) - - For deploying Llama Stack Server on Kubernetes, refer to the [Kubernetes Deployment Guide](https://llamastack.github.io/docs/deploying/kubernetes_deployment) + - For deploying Llama Stack Server on Kubernetes, refer to the [Deploy Llama Stack Server via Operator](#deploy-llama-stack-server-via-operator) section - Access to a Notebook environment (e.g., Jupyter Notebook, JupyterLab, or similar) - Python environment with `llama-stack-client` and required dependencies installed - API key for the LLM provider (e.g., DeepSeek API key) +## Deploy Llama Stack Server via Operator + +This section describes how to deploy Llama Stack Server on Kubernetes using the Llama Stack Operator. + +### Upload Operator + +Download the Llama Stack Operator installation file (e.g., `llama-stack-operator.alpha.ALL.v0.6.0.tgz`). + +Use the violet command to publish to the platform repository: + +```bash +violet push --platform-address=platform-access-address --platform-username=platform-admin --platform-password=platform-admin-password llama-stack-operator.alpha.ALL.v0.6.0.tgz +``` + +### Install Operator + +1. Go to the `Administrator` view in the Alauda Container Platform. + +2. In the left navigation, select `Marketplace` / `Operator Hub`. + +3. In the right panel, find `Llama Stack Operator` and click `Install`. + +4. Keep all parameters as default and complete the installation. + +### Deploy Llama Stack Server + +After the operator is installed, deploy Llama Stack Server by creating a `LlamaStackDistribution` custom resource: + +```yaml +apiVersion: llamastack.io/v1alpha1 +kind: LlamaStackDistribution +metadata: + annotations: + cpaas.io/display-name: "" + name: demo + namespace: default +spec: + network: + exposeRoute: false # Whether to expose the route externally + replicas: 1 # Number of server replicas + server: + containerSpec: + env: + - name: VLLM_URL + value: " https://api.deepseek.com/v1" # URL of the LLM API provider + - name: VLLM_MAX_TOKENS + value: "8192" # Maximum output tokens (DeepSeek Chat supports up to 8K) + - name: VLLM_API_TOKEN + value: XXX # API authentication token + name: llama-stack + port: 8321 + distribution: + name: starter # Distribution name (options: starter, postgres-demo, meta-reference-gpu) + storage: + mountPath: /home/lls/.lls + size: 20Gi +``` + +After deployment, the Llama Stack Server will be available within the cluster. The access URL is displayed in `status.serviceURL`, for example: + +```yaml +status: + serviceURL: http://demo-service.default.svc.cluster.local:8321 +``` + ## Quickstart A simple example of creating an AI Agent with Llama Stack is available here: [llama_stack_quickstart.ipynb](/llama-stack/llama_stack_quickstart.ipynb). The configuration file [llama_stack_config.yaml](/llama-stack/llama_stack_config.yaml) is also required. Download both files and upload them to a Notebook environment to run. @@ -34,6 +100,32 @@ The notebook demonstrates: - Agent execution with session management and streaming responses - Result handling and display +## FAQ + +### How to prepare Python 3.12 in Notebook + +1. Download the pre-compiled python installation package: + +```bash +wget -O /tmp/python312.tar.gz https://github.com/astral-sh/python-build-standalone/releases/download/20260114/cpython-3.12.12+20260114-x86_64-unknown-linux-gnu-install_only.tar.gz +``` + +2. Extract with: + +```bash +tar -xzf /tmp/python312.tar.gz -C ~/python312 --strip-components=1 +``` + +3. Register ipykernel: + +```bash +~/python312/bin/python -m ipykernel install --user --name python312 --display-name "Python 3.12" +``` + +4. Switch kernel in the notebook page + +**Note**: When executing python and pip commands directly in the notebook page, the default python will still be used. You need to specify the full path to use the python312 version commands. + ## Additional Resources For more resources on developing AI Agents with Llama Stack, see: diff --git a/docs/public/llama-stack/llama_stack_quickstart.ipynb b/docs/public/llama-stack/llama_stack_quickstart.ipynb index 4a33f7c..370d159 100644 --- a/docs/public/llama-stack/llama_stack_quickstart.ipynb +++ b/docs/public/llama-stack/llama_stack_quickstart.ipynb @@ -26,6 +26,8 @@ "source": [ "### Install Llama Stack and Dependencies\n", "\n", + "This section demonstrates how to start a local Llama Stack Server. Alternatively, you can deploy a server in a Kubernetes cluster by following the instructions in the [Deploy Llama Stack Server via Operator](../en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md#deploy-llama-stack-server-via-operator) section.\n", + "\n", "If you haven't installed the Llama Stack yet, install it along with the required provider packages:\n", "\n", "```bash\n", @@ -40,7 +42,7 @@ "id": "6dc839f0", "metadata": {}, "source": [ - "### Start the Server\n", + "### Start the Local Server\n", "\n", "Set the required environment variable for the API key (used by the DeepSeek provider in the config):\n", "\n", @@ -61,9 +63,12 @@ }, { "cell_type": "markdown", + "id": "e5d1fc8c", "metadata": {}, "source": [ - "## 2. Install Dependencies" + "## 2. Install Dependencies\n", + "\n", + "**Note:** `llama-stack-client` requires Python 3.12 or higher. If your Python version does not meet this requirement, refer to the FAQ section in the documentation: **How to prepare Python 3.12 in Notebook**." ] }, { @@ -72,7 +77,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install \"llama-stack-client\" \"requests\" \"fastapi\" \"uvicorn\" --target ~/packages" + "!pip install \"llama-stack-client>=0.4\" \"requests\" \"fastapi\" \"uvicorn\" --target ~/packages" ] }, { @@ -162,7 +167,7 @@ "metadata": {}, "outputs": [], "source": [ - "base_url = os.getenv('LLAMA_STACK_URL', 'http://localhost:8321')\n", + "base_url = os.getenv('LLAMA_STACK_URL', 'http://localhost:8321') # or change it to your server's URL\n", "print(f'Connecting to Server: {base_url}')\n", "\n", "client = LlamaStackClient(base_url=base_url)\n", From 6ed781b61b5550d6a7273fc39c2b01c1f33206c8 Mon Sep 17 00:00:00 2001 From: tfwang Date: Mon, 26 Jan 2026 23:10:39 +0800 Subject: [PATCH 4/6] update --- .../en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md | 2 +- docs/public/llama-stack/llama_stack_quickstart.ipynb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md index a7ea1b0..2ac50ab 100644 --- a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md +++ b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md @@ -66,7 +66,7 @@ spec: containerSpec: env: - name: VLLM_URL - value: " https://api.deepseek.com/v1" # URL of the LLM API provider + value: "https://api.deepseek.com/v1" # URL of the LLM API provider - name: VLLM_MAX_TOKENS value: "8192" # Maximum output tokens (DeepSeek Chat supports up to 8K) - name: VLLM_API_TOKEN diff --git a/docs/public/llama-stack/llama_stack_quickstart.ipynb b/docs/public/llama-stack/llama_stack_quickstart.ipynb index 370d159..297a668 100644 --- a/docs/public/llama-stack/llama_stack_quickstart.ipynb +++ b/docs/public/llama-stack/llama_stack_quickstart.ipynb @@ -132,7 +132,7 @@ " Uses the wttr.in free weather API to fetch weather data.\n", "\n", " :param city: City name, e.g., Beijing, Tokyo, New York, Paris\n", - " :returns: Dictionary containing weather information including city, temperature, description and humidity\n", + " :returns: Dictionary containing weather information including city, temperature and humidity\n", " \"\"\"\n", " try:\n", " url = f'https://wttr.in/{city}?format=j1'\n", @@ -199,7 +199,7 @@ "except Exception as e:\n", " print(f'Failed to get model list: {e}')\n", " print('Make sure the server is running')\n", - " raise e\n", + " raise\n", "\n", "\n", "# Create Agent\n", From dabdba1609aa6f42dc23f9a5c529697181336453 Mon Sep 17 00:00:00 2001 From: tfwang Date: Mon, 26 Jan 2026 23:40:31 +0800 Subject: [PATCH 5/6] update --- .../How_to_Create_an_AI_Agent_with_LlamaStack.md | 9 ++++++--- docs/public/llama-stack/llama_stack_quickstart.ipynb | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md index 2ac50ab..3d3a102 100644 --- a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md +++ b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md @@ -68,9 +68,12 @@ spec: - name: VLLM_URL value: "https://api.deepseek.com/v1" # URL of the LLM API provider - name: VLLM_MAX_TOKENS - value: "8192" # Maximum output tokens (DeepSeek Chat supports up to 8K) - - name: VLLM_API_TOKEN - value: XXX # API authentication token + value: "8192" # Maximum output tokens + - name: VLLM_API_TOKEN # Load LLM API token from secret + valueFrom: + secretKeyRef: + key: token + name: deepseek-api name: llama-stack port: 8321 distribution: diff --git a/docs/public/llama-stack/llama_stack_quickstart.ipynb b/docs/public/llama-stack/llama_stack_quickstart.ipynb index 297a668..96db7fc 100644 --- a/docs/public/llama-stack/llama_stack_quickstart.ipynb +++ b/docs/public/llama-stack/llama_stack_quickstart.ipynb @@ -131,7 +131,7 @@ "\n", " Uses the wttr.in free weather API to fetch weather data.\n", "\n", - " :param city: City name, e.g., Beijing, Tokyo, New York, Paris\n", + " :param city: City name, e.g., Beijing, Shanghai, Paris\n", " :returns: Dictionary containing weather information including city, temperature and humidity\n", " \"\"\"\n", " try:\n", From e8c9f10c0aea55d17c5967ad0d5fd86a0d0fa6f0 Mon Sep 17 00:00:00 2001 From: tfwang Date: Mon, 26 Jan 2026 23:58:24 +0800 Subject: [PATCH 6/6] update --- .../How_to_Create_an_AI_Agent_with_LlamaStack.md | 1 + docs/public/llama-stack/llama_stack_quickstart.ipynb | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md index 3d3a102..041ea64 100644 --- a/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md +++ b/docs/en/solutions/How_to_Create_an_AI_Agent_with_LlamaStack.md @@ -116,6 +116,7 @@ wget -O /tmp/python312.tar.gz https://github.com/astral-sh/python-build-standalo 2. Extract with: ```bash +mkdir -p ~/python312 tar -xzf /tmp/python312.tar.gz -C ~/python312 --strip-components=1 ``` diff --git a/docs/public/llama-stack/llama_stack_quickstart.ipynb b/docs/public/llama-stack/llama_stack_quickstart.ipynb index 96db7fc..8c5dc29 100644 --- a/docs/public/llama-stack/llama_stack_quickstart.ipynb +++ b/docs/public/llama-stack/llama_stack_quickstart.ipynb @@ -90,6 +90,7 @@ { "cell_type": "code", "execution_count": null, + "id": "cfd65276", "metadata": {}, "outputs": [], "source": [ @@ -103,6 +104,7 @@ "import os\n", "import requests\n", "from typing import Dict, Any\n", + "from urllib.parse import quote\n", "from llama_stack_client import LlamaStackClient, Agent\n", "from llama_stack_client.lib.agents.client_tool import client_tool\n", "from llama_stack_client.lib.agents.event_logger import AgentEventLogger\n", @@ -122,6 +124,7 @@ { "cell_type": "code", "execution_count": null, + "id": "c57f95e5", "metadata": {}, "outputs": [], "source": [ @@ -135,7 +138,9 @@ " :returns: Dictionary containing weather information including city, temperature and humidity\n", " \"\"\"\n", " try:\n", - " url = f'https://wttr.in/{city}?format=j1'\n", + " # URL encode the city name to handle spaces and special characters\n", + " encoded_city = quote(city)\n", + " url = f'https://wttr.in/{encoded_city}?format=j1'\n", " response = requests.get(url, timeout=10)\n", " response.raise_for_status()\n", " data = response.json()\n", @@ -329,7 +334,7 @@ "\n", "\n", "@api_app.post(\"/chat\")\n", - "async def chat(request: ChatRequest):\n", + "def chat(request: ChatRequest):\n", " \"\"\"Chat endpoint that uses the Llama Stack Agent\"\"\"\n", " session_id = agent.create_session('fastapi-weather-session')\n", "\n",