Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 55 additions & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,48 @@ permissions:
contents: write
id-token: write

env:
GGUF_MODEL_DIR: tests/models/unsloth/gemma-3-270m-it-GGUF
GGUF_MODEL_NAME: gemma-3-270m-it-UD-IQ2_M.gguf
GGUF_MODEL_URL: https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf

jobs:
download-model:
outputs:
cache-key: gguf-${{ steps.meta.outputs.hash }}
model-path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
name: Download GGUF model
runs-on: ubuntu-22.04
steps:
- name: Compute model URL hash
id: meta
run: |
if command -v sha256sum >/dev/null 2>&1; then
hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | sha256sum | cut -d' ' -f1)
else
hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | shasum -a 256 | cut -d' ' -f1)
fi
echo "hash=$hash" >> "$GITHUB_OUTPUT"

- name: Prepare model directory
run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"

- name: Restore GGUF cache
id: cache
uses: actions/cache@v4
with:
path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
key: gguf-${{ steps.meta.outputs.hash }}

- name: Download GGUF model
if: steps.cache.outputs.cache-hit != 'true'
run: |
curl -L --fail --retry 3 "${{ env.GGUF_MODEL_URL }}" -o "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
- name: Verify GGUF model
run: test -f "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"

build:
needs: download-model
runs-on: ${{ matrix.os }}
container: ${{ matrix.container && matrix.container || '' }}
name: ${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} build${{ matrix.arch != 'arm64-v8a' && matrix.name != 'ios-sim' && matrix.name != 'ios' && matrix.name != 'apple-xcframework' && matrix.name != 'android-aar' && ( matrix.name != 'macos' || matrix.arch != 'x86_64' ) && ' + test' || ''}}
Expand Down Expand Up @@ -48,12 +88,12 @@ jobs:
arch: x86_64
name: linux-musl-cpu
container: alpine:latest
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
- os: ubuntu-22.04
arch: x86_64
name: linux-musl-gpu
container: alpine:latest
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
- os: ubuntu-22.04-arm
arch: arm64
name: linux-musl-cpu
Expand All @@ -65,20 +105,20 @@ jobs:
- os: windows-2022
arch: x86_64
name: windows-cpu
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
- os: windows-2022
arch: x86_64
name: windows-gpu
make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
- os: ubuntu-22.04
arch: x86_64
name: android
make: PLATFORM=android ARCH=x86_64
make: SKIP_UNITTEST=1 PLATFORM=android ARCH=x86_64
sqlite-amalgamation-zip: https://sqlite.org/2025/sqlite-amalgamation-3490100.zip
- os: ubuntu-22.04
arch: arm64-v8a
name: android
make: PLATFORM=android ARCH=arm64-v8a
make: SKIP_UNITTEST=1 PLATFORM=android ARCH=arm64-v8a
- os: macos-15
name: ios
make: PLATFORM=ios LLAMA="-DGGML_NATIVE=OFF -DGGML_METAL=ON -DGGML_ACCELERATE=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Apple" WHISPER="-DWHISPER_COREML=ON -DWHISPER_COREML_ALLOW_FALLBACK=ON"
Expand Down Expand Up @@ -106,6 +146,15 @@ jobs:
with:
submodules: true

- name: Prepare GGUF model directory
run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"

- name: Restore GGUF cache
uses: actions/cache@v4
with:
path: ${{ needs.download-model.outputs.model-path }}
key: ${{ needs.download-model.outputs.cache-key }}

- name: android setup java
if: matrix.name == 'android-aar'
uses: actions/setup-java@v4
Expand Down
45 changes: 45 additions & 0 deletions API.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,35 @@ Frees the current inference context.
SELECT llm_context_free();
```

---
## `llm_context_size()`

**Returns:** `INTEGER`

**Description**:
Returns the total token capacity (context window) of the current llama context. Use this after `llm_context_create` to confirm the configured `context_size`. Raises an error if no context is active.

```sql
SELECT llm_context_size();
-- 4096
```

---

## `llm_context_used()`

**Returns:** `INTEGER`

**Description:**
Returns how many tokens of the current llama context have already been consumed. Combine this with `llm_context_size()` to monitor usage. Raises an error if no context is active.

**Example:**

```sql
SELECT llm_context_used();
-- 1024
```

---

## `llm_sampler_create()`
Expand Down Expand Up @@ -546,6 +575,22 @@ SELECT llm_sampler_init_penalties(64, 1.2, 0.5, 0.8);

---

## `llm_token_count(text TEXT)`

**Returns:** `INTEGER`

**Description:**
Returns how many tokens the current model would consume for the supplied `text`, using the active context’s vocabulary. Requires a context created via `llm_context_create`.

**Example:**

```sql
SELECT llm_token_count('Hello world!');
-- 5
```

---

## `llm_embed_generate(text TEXT, options TEXT)`

**Returns:** `BLOB` or `TEXT`
Expand Down
40 changes: 38 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ BUILD_LLAMA = $(BUILD_DIR)/llama.cpp
BUILD_WHISPER = $(BUILD_DIR)/whisper.cpp
BUILD_MINIAUDIO = $(BUILD_DIR)/miniaudio

# Test
# gemma-3-270m-it-UD-IQ2_M.gguf is just a lightweight model to use for testing
CTEST_BIN = $(BUILD_DIR)/tests/sqlite_ai_tests
GGUF_MODEL_DIR ?= tests/models/unsloth/gemma-3-270m-it-GGUF
GGUF_MODEL_NAME ?= gemma-3-270m-it-UD-IQ2_M.gguf
GGUF_MODEL_URL ?= https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
GGUF_MODEL_PATH := $(GGUF_MODEL_DIR)/$(GGUF_MODEL_NAME)
SKIP_UNITTEST ?= 0

# Compiler and flags
CC = gcc
CXX = g++
Expand All @@ -55,6 +64,14 @@ LLAMA_LDFLAGS = -L./$(BUILD_LLAMA)/common -L./$(BUILD_GGML)/lib -L./$(BUILD_LLAM
WHISPER_LDFLAGS = -L./$(BUILD_WHISPER)/src -lwhisper
MINIAUDIO_LDFLAGS = -L./$(BUILD_MINIAUDIO) -lminiaudio -lminiaudio_channel_combiner_node -lminiaudio_channel_separator_node -lminiaudio_ltrim_node -lminiaudio_reverb_node -lminiaudio_vocoder_node
LDFLAGS = $(LLAMA_LDFLAGS) $(WHISPER_LDFLAGS) $(MINIAUDIO_LDFLAGS)
SQLITE_TEST_LIBS =
ifneq ($(PLATFORM),windows)
SQLITE_TEST_LIBS += -lpthread -lm
ifneq ($(PLATFORM),macos)
SQLITE_TEST_LIBS += -ldl
endif
endif
SQLITE_TEST_SRC = tests/c/sqlite3.c

# Files
SRC_FILES = $(wildcard $(SRC_DIR)/*.c)
Expand Down Expand Up @@ -210,8 +227,27 @@ endif
$(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/llama.cpp.stamp
$(CC) $(CFLAGS) -O3 -fPIC -c $< -o $@

test: $(TARGET)
$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
$(CTEST_BIN): tests/c/unittest.c $(SQLITE_TEST_SRC)
@mkdir -p $(dir $@)
$(CC) -std=c11 -Wall -Wextra -DSQLITE_ENABLE_LOAD_EXTENSION -I$(SRC_DIR) tests/c/unittest.c $(SQLITE_TEST_SRC) -o $@ $(SQLITE_TEST_LIBS)

$(GGUF_MODEL_PATH):
@mkdir -p $(GGUF_MODEL_DIR)
curl -L --fail --retry 3 -o $@ $(GGUF_MODEL_URL)

TEST_DEPS := $(TARGET)
ifeq ($(SKIP_UNITTEST),0)
TEST_DEPS += $(CTEST_BIN) $(GGUF_MODEL_PATH)
endif

test: $(TEST_DEPS)
@echo "Running sqlite3 CLI smoke test (ensures .load works)..."
$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
ifeq ($(SKIP_UNITTEST),0)
$(CTEST_BIN) --extension "$(TARGET)" --model "$(GGUF_MODEL_PATH)"
else
@echo "Skipping C unit tests (SKIP_UNITTEST=$(SKIP_UNITTEST))."
endif

# Build submodules
ifeq ($(PLATFORM),windows)
Expand Down
40 changes: 35 additions & 5 deletions src/sqlite-ai.c
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ static bool llm_check_context (sqlite3_context *context) {

// MARK: - Chat Messages -

bool llm_messages_append (ai_messages *list, const char *role, const char *content, bool duplicate_content) {
bool llm_messages_append (ai_messages *list, const char *role, const char *content) {
if (list->count >= list->capacity) {
size_t new_cap = list->capacity ? list->capacity * 2 : MIN_ALLOC_MESSAGES;
llama_chat_message *new_items = sqlite3_realloc64(list->items, new_cap * sizeof(llama_chat_message));
Expand All @@ -796,7 +796,7 @@ bool llm_messages_append (ai_messages *list, const char *role, const char *conte

bool duplicate_role = ((role != ROLE_USER) && (role != ROLE_ASSISTANT));
list->items[list->count].role = (duplicate_role) ? sqlite_strdup(role) : role;
list->items[list->count].content = (duplicate_content) ? sqlite_strdup(content) : content;
list->items[list->count].content = sqlite_strdup(content);
list->count += 1;
return true;
}
Expand Down Expand Up @@ -1490,6 +1490,9 @@ static bool llm_chat_check_context (ai_context *ai) {
llama_sampler_chain_add(ai->sampler, llama_sampler_init_dist((uint32_t)LLAMA_DEFAULT_SEED));
}

// initialize the chat struct if already created
if (ai->chat.uuid[0] != '\0') return true;

// create history structs
ai_uuid_v7_string(ai->chat.uuid, true);

Expand All @@ -1509,7 +1512,7 @@ static bool llm_chat_save_response (ai_context *ai, ai_messages *messages, const
char *response = ai->chat.response.data;
if (!response) return false;

if (!llm_messages_append(messages, ROLE_ASSISTANT, response, false)) {
if (!llm_messages_append(messages, ROLE_ASSISTANT, response)) {
sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
return false;
}
Expand Down Expand Up @@ -1640,7 +1643,7 @@ static bool llm_chat_run (ai_context *ai, ai_cursor *c, const char *user_prompt)
buffer_t *formatted = &ai->chat.formatted;

// save prompt input in history
if (!llm_messages_append(messages, ROLE_USER, user_prompt, true)) {
if (!llm_messages_append(messages, ROLE_USER, user_prompt)) {
sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append message");
return false;
}
Expand Down Expand Up @@ -1976,7 +1979,7 @@ static void llm_chat_restore (sqlite3_context *context, int argc, sqlite3_value
const char *role = (const char *)sqlite3_column_text(vm, 0);
const char *content = (const char *)sqlite3_column_text(vm, 1);

if (!llm_messages_append(messages, role, content, true)) {
if (!llm_messages_append(messages, role, content)) {
sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
rc = SQLITE_OK;
goto abort_restore;
Expand Down Expand Up @@ -2369,6 +2372,27 @@ static void llm_context_create_textgen (sqlite3_context *context, int argc, sqli
llm_context_create_with_options(context, ai, options, options2);
}

static void llm_context_size (sqlite3_context *context, int argc, sqlite3_value **argv) {
ai_context *ai = (ai_context *)sqlite3_user_data(context);
if (!ai->ctx) {
sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
return;
}
uint32_t n_ctx = llama_n_ctx(ai->ctx);
sqlite3_result_int(context, n_ctx);
}

static void llm_context_used (sqlite3_context *context, int argc, sqlite3_value **argv) {
ai_context *ai = (ai_context *)sqlite3_user_data(context);
if (!ai->ctx) {
sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
return;
}
int32_t n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ai->ctx), 0) + 1;
if (n_ctx_used < 0) n_ctx_used = 0;
sqlite3_result_int(context, n_ctx_used);
}

static void llm_model_free (sqlite3_context *context, int argc, sqlite3_value **argv) {
ai_context *ai = (ai_context *)sqlite3_user_data(context);
ai_cleanup((void *)ai, true, false);
Expand Down Expand Up @@ -2707,6 +2731,12 @@ SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_a
rc = sqlite3_create_function(db, "llm_context_create", 1, SQLITE_UTF8, ctx, llm_context_create, NULL, NULL);
if (rc != SQLITE_OK) goto cleanup;

rc = sqlite3_create_function(db, "llm_context_size", 0, SQLITE_UTF8, ctx, llm_context_size, NULL, NULL);
if (rc != SQLITE_OK) goto cleanup;

rc = sqlite3_create_function(db, "llm_context_used", 0, SQLITE_UTF8, ctx, llm_context_used, NULL, NULL);
if (rc != SQLITE_OK) goto cleanup;

rc = sqlite3_create_function(db, "llm_context_create_embedding", 0, SQLITE_UTF8, ctx, llm_context_create_embedding, NULL, NULL);
if (rc != SQLITE_OK) goto cleanup;

Expand Down
2 changes: 1 addition & 1 deletion src/sqlite-ai.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
extern "C" {
#endif

#define SQLITE_AI_VERSION "0.7.57"
#define SQLITE_AI_VERSION "0.7.58"

SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);

Expand Down
Loading
Loading