sqliteai · andinux · Nov 13, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -7,8 +7,48 @@ permissions:
   contents: write
   id-token: write
 
+env:
+  GGUF_MODEL_DIR: tests/models/unsloth/gemma-3-270m-it-GGUF
+  GGUF_MODEL_NAME: gemma-3-270m-it-UD-IQ2_M.gguf
+  GGUF_MODEL_URL: https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
+
 jobs:
+  download-model:
+    outputs:
+      cache-key: gguf-${{ steps.meta.outputs.hash }}
+      model-path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
+    name: Download GGUF model
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Compute model URL hash
+        id: meta
+        run: |
+          if command -v sha256sum >/dev/null 2>&1; then
+            hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | sha256sum | cut -d' ' -f1)
+          else
+            hash=$(echo -n "${{ env.GGUF_MODEL_URL }}" | shasum -a 256 | cut -d' ' -f1)
+          fi
+          echo "hash=$hash" >> "$GITHUB_OUTPUT"
+
+      - name: Prepare model directory
+        run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+
+      - name: Restore GGUF cache
+        id: cache
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}
+          key: gguf-${{ steps.meta.outputs.hash }}
+
+      - name: Download GGUF model
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          curl -L --fail --retry 3 "${{ env.GGUF_MODEL_URL }}" -o "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
+      - name: Verify GGUF model
+        run: test -f "${{ env.GGUF_MODEL_DIR }}/${{ env.GGUF_MODEL_NAME }}"
+
   build:
+    needs: download-model
     runs-on: ${{ matrix.os }}
     container: ${{ matrix.container && matrix.container || '' }}
     name: ${{ matrix.name }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} build${{ matrix.arch != 'arm64-v8a' && matrix.name != 'ios-sim' && matrix.name != 'ios' && matrix.name != 'apple-xcframework' && matrix.name != 'android-aar' && ( matrix.name != 'macos' || matrix.arch != 'x86_64' ) && ' + test' || ''}}
@@ -48,12 +88,12 @@ jobs:
             arch: x86_64
             name: linux-musl-cpu
             container: alpine:latest
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
           - os: ubuntu-22.04
             arch: x86_64
             name: linux-musl-gpu
             container: alpine:latest
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_OPENCL=ON"
           - os: ubuntu-22.04-arm
             arch: arm64
             name: linux-musl-cpu
@@ -65,20 +105,20 @@ jobs:
           - os: windows-2022
             arch: x86_64
             name: windows-cpu
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_AVX2=ON"
           - os: windows-2022
             arch: x86_64
             name: windows-gpu
-            make: LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
+            make: SKIP_UNITTEST=1 LLAMA="-DGGML_NATIVE=OFF -DGGML_CPU=ON -DGGML_VULKAN=ON -DGGML_OPENCL=ON"
           - os: ubuntu-22.04
             arch: x86_64
             name: android
-            make: PLATFORM=android ARCH=x86_64
+            make: SKIP_UNITTEST=1 PLATFORM=android ARCH=x86_64
             sqlite-amalgamation-zip: https://sqlite.org/2025/sqlite-amalgamation-3490100.zip
           - os: ubuntu-22.04
             arch: arm64-v8a
             name: android
-            make: PLATFORM=android ARCH=arm64-v8a
+            make: SKIP_UNITTEST=1 PLATFORM=android ARCH=arm64-v8a
           - os: macos-15
             name: ios
             make: PLATFORM=ios LLAMA="-DGGML_NATIVE=OFF -DGGML_METAL=ON -DGGML_ACCELERATE=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Apple" WHISPER="-DWHISPER_COREML=ON -DWHISPER_COREML_ALLOW_FALLBACK=ON"
@@ -106,6 +146,15 @@ jobs:
         with:
           submodules: true
 
+      - name: Prepare GGUF model directory
+        run: mkdir -p "${{ env.GGUF_MODEL_DIR }}"
+
+      - name: Restore GGUF cache
+        uses: actions/cache@v4
+        with:
+          path: ${{ needs.download-model.outputs.model-path }}
+          key: ${{ needs.download-model.outputs.cache-key }}
+
       - name: android setup java
         if: matrix.name == 'android-aar'
         uses: actions/setup-java@v4

diff --git a/API.md b/API.md
@@ -245,6 +245,35 @@ Frees the current inference context.
 SELECT llm_context_free();
 ```
 
+---
+## `llm_context_size()`
+
+**Returns:** `INTEGER`
+
+**Description**:
+Returns the total token capacity (context window) of the current llama context. Use this after `llm_context_create` to confirm the configured `context_size`. Raises an error if no context is active.
+
+```sql
+SELECT llm_context_size();
+-- 4096
+```
+
+---
+
+## `llm_context_used()`
+
+**Returns:** `INTEGER`
+
+**Description:**
+Returns how many tokens of the current llama context have already been consumed. Combine this with `llm_context_size()` to monitor usage. Raises an error if no context is active.
+
+**Example:**
+
+```sql
+SELECT llm_context_used();
+-- 1024
+```
+
 ---
 
 ## `llm_sampler_create()`
@@ -546,6 +575,22 @@ SELECT llm_sampler_init_penalties(64, 1.2, 0.5, 0.8);
 
 ---
 
+## `llm_token_count(text TEXT)`
+
+**Returns:** `INTEGER`
+
+**Description:**
+Returns how many tokens the current model would consume for the supplied `text`, using the active context’s vocabulary. Requires a context created via `llm_context_create`.
+
+**Example:**
+
+```sql
+SELECT llm_token_count('Hello world!');
+-- 5
+```
+
+---
+
 ## `llm_embed_generate(text TEXT, options TEXT)`
 
 **Returns:** `BLOB` or `TEXT`

diff --git a/Makefile b/Makefile
@@ -37,6 +37,15 @@ BUILD_LLAMA = $(BUILD_DIR)/llama.cpp
 BUILD_WHISPER = $(BUILD_DIR)/whisper.cpp
 BUILD_MINIAUDIO = $(BUILD_DIR)/miniaudio
 
+# Test 
+# gemma-3-270m-it-UD-IQ2_M.gguf is just a lightweight model to use for testing
+CTEST_BIN = $(BUILD_DIR)/tests/sqlite_ai_tests
+GGUF_MODEL_DIR ?= tests/models/unsloth/gemma-3-270m-it-GGUF
+GGUF_MODEL_NAME ?= gemma-3-270m-it-UD-IQ2_M.gguf
+GGUF_MODEL_URL ?= https://huggingface.co/unsloth/gemma-3-270m-it-GGUF/resolve/main/gemma-3-270m-it-UD-IQ2_M.gguf
+GGUF_MODEL_PATH := $(GGUF_MODEL_DIR)/$(GGUF_MODEL_NAME)
+SKIP_UNITTEST ?= 0
+
 # Compiler and flags
 CC = gcc
 CXX = g++
@@ -55,6 +64,14 @@ LLAMA_LDFLAGS = -L./$(BUILD_LLAMA)/common -L./$(BUILD_GGML)/lib -L./$(BUILD_LLAM
 WHISPER_LDFLAGS = -L./$(BUILD_WHISPER)/src -lwhisper
 MINIAUDIO_LDFLAGS = -L./$(BUILD_MINIAUDIO) -lminiaudio -lminiaudio_channel_combiner_node -lminiaudio_channel_separator_node -lminiaudio_ltrim_node -lminiaudio_reverb_node -lminiaudio_vocoder_node
 LDFLAGS = $(LLAMA_LDFLAGS) $(WHISPER_LDFLAGS) $(MINIAUDIO_LDFLAGS)
+SQLITE_TEST_LIBS =
+ifneq ($(PLATFORM),windows)
+	SQLITE_TEST_LIBS += -lpthread -lm
+	ifneq ($(PLATFORM),macos)
+		SQLITE_TEST_LIBS += -ldl
+	endif
+endif
+SQLITE_TEST_SRC = tests/c/sqlite3.c
 
 # Files
 SRC_FILES = $(wildcard $(SRC_DIR)/*.c)
@@ -210,8 +227,27 @@ endif
 $(BUILD_DIR)/%.o: %.c $(BUILD_DIR)/llama.cpp.stamp
 	$(CC) $(CFLAGS) -O3 -fPIC -c $< -o $@
 
-test: $(TARGET)
-	$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
+$(CTEST_BIN): tests/c/unittest.c $(SQLITE_TEST_SRC)
+	@mkdir -p $(dir $@)
+	$(CC) -std=c11 -Wall -Wextra -DSQLITE_ENABLE_LOAD_EXTENSION -I$(SRC_DIR) tests/c/unittest.c $(SQLITE_TEST_SRC) -o $@ $(SQLITE_TEST_LIBS)
+
+$(GGUF_MODEL_PATH):
+	@mkdir -p $(GGUF_MODEL_DIR)
+	curl -L --fail --retry 3 -o $@ $(GGUF_MODEL_URL)
+
+TEST_DEPS := $(TARGET)
+ifeq ($(SKIP_UNITTEST),0)
+TEST_DEPS += $(CTEST_BIN) $(GGUF_MODEL_PATH)
+endif
+
+test: $(TEST_DEPS)
+		@echo "Running sqlite3 CLI smoke test (ensures .load works)..."
+		$(SQLITE3) ":memory:" -cmd ".bail on" ".load ./dist/ai" "SELECT ai_version();"
+ifeq ($(SKIP_UNITTEST),0)
+		$(CTEST_BIN) --extension "$(TARGET)" --model "$(GGUF_MODEL_PATH)"
+else
+		@echo "Skipping C unit tests (SKIP_UNITTEST=$(SKIP_UNITTEST))."
+endif
 
 # Build submodules
 ifeq ($(PLATFORM),windows)

diff --git a/src/sqlite-ai.c b/src/sqlite-ai.c
@@ -784,7 +784,7 @@ static bool llm_check_context (sqlite3_context *context) {
 
 // MARK: - Chat Messages -
 
-bool llm_messages_append (ai_messages *list, const char *role, const char *content, bool duplicate_content) {
+bool llm_messages_append (ai_messages *list, const char *role, const char *content) {
     if (list->count >= list->capacity) {
         size_t new_cap = list->capacity ? list->capacity * 2 : MIN_ALLOC_MESSAGES;
         llama_chat_message *new_items = sqlite3_realloc64(list->items, new_cap * sizeof(llama_chat_message));
@@ -796,7 +796,7 @@ bool llm_messages_append (ai_messages *list, const char *role, const char *conte
 
     bool duplicate_role = ((role != ROLE_USER) && (role != ROLE_ASSISTANT));
     list->items[list->count].role = (duplicate_role) ? sqlite_strdup(role) : role;
-    list->items[list->count].content = (duplicate_content) ? sqlite_strdup(content) : content;
+    list->items[list->count].content = sqlite_strdup(content);
     list->count += 1;
     return true;
 }
@@ -1490,6 +1490,9 @@ static bool llm_chat_check_context (ai_context *ai) {
         llama_sampler_chain_add(ai->sampler, llama_sampler_init_dist((uint32_t)LLAMA_DEFAULT_SEED));
     }
 
+    // initialize the chat struct if already created
+    if (ai->chat.uuid[0] != '\0') return true;
+
     // create history structs
     ai_uuid_v7_string(ai->chat.uuid, true);
 
@@ -1509,7 +1512,7 @@ static bool llm_chat_save_response (ai_context *ai, ai_messages *messages, const
     char *response = ai->chat.response.data;
     if (!response) return false;
 
-    if (!llm_messages_append(messages, ROLE_ASSISTANT, response, false)) {
+    if (!llm_messages_append(messages, ROLE_ASSISTANT, response)) {
         sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
         return false;
     }
@@ -1640,7 +1643,7 @@ static bool llm_chat_run (ai_context *ai, ai_cursor *c, const char *user_prompt)
     buffer_t *formatted = &ai->chat.formatted;
 
     // save prompt input in history
-    if (!llm_messages_append(messages, ROLE_USER, user_prompt, true)) {
+    if (!llm_messages_append(messages, ROLE_USER, user_prompt)) {
         sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append message");
         return false;
     }
@@ -1976,7 +1979,7 @@ static void llm_chat_restore (sqlite3_context *context, int argc, sqlite3_value
         const char *role = (const char *)sqlite3_column_text(vm, 0);
         const char *content = (const char *)sqlite3_column_text(vm, 1);
 
-        if (!llm_messages_append(messages, role, content, true)) {
+        if (!llm_messages_append(messages, role, content)) {
             sqlite_common_set_error (ai->context, ai->vtab, SQLITE_ERROR, "Failed to append response");
             rc = SQLITE_OK;
             goto abort_restore;
@@ -2369,6 +2372,27 @@ static void llm_context_create_textgen (sqlite3_context *context, int argc, sqli
     llm_context_create_with_options(context, ai, options, options2);
 }
 
+static void llm_context_size (sqlite3_context *context, int argc, sqlite3_value **argv) {
+    ai_context *ai = (ai_context *)sqlite3_user_data(context);
+    if (!ai->ctx) {
+        sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
+        return;
+    }
+    uint32_t n_ctx = llama_n_ctx(ai->ctx);
+    sqlite3_result_int(context, n_ctx);
+}
+
+static void llm_context_used (sqlite3_context *context, int argc, sqlite3_value **argv) {
+    ai_context *ai = (ai_context *)sqlite3_user_data(context);
+    if (!ai->ctx) {
+        sqlite_context_result_error(context, SQLITE_MISUSE, "No context found. Please call llm_context_create() before using this function.");
+        return;
+    }
+    int32_t n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ai->ctx), 0) + 1;
+    if (n_ctx_used < 0) n_ctx_used = 0;
+    sqlite3_result_int(context, n_ctx_used);
+}
+
 static void llm_model_free (sqlite3_context *context, int argc, sqlite3_value **argv) {
     ai_context *ai = (ai_context *)sqlite3_user_data(context);
     ai_cleanup((void *)ai, true, false);
@@ -2707,6 +2731,12 @@ SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_a
     rc = sqlite3_create_function(db, "llm_context_create", 1, SQLITE_UTF8, ctx, llm_context_create, NULL, NULL);
     if (rc != SQLITE_OK) goto cleanup;
 
+    rc = sqlite3_create_function(db, "llm_context_size", 0, SQLITE_UTF8, ctx, llm_context_size, NULL, NULL);
+    if (rc != SQLITE_OK) goto cleanup;
+
+    rc = sqlite3_create_function(db, "llm_context_used", 0, SQLITE_UTF8, ctx, llm_context_used, NULL, NULL);
+    if (rc != SQLITE_OK) goto cleanup;
+
     rc = sqlite3_create_function(db, "llm_context_create_embedding", 0, SQLITE_UTF8, ctx, llm_context_create_embedding, NULL, NULL);
     if (rc != SQLITE_OK) goto cleanup;
 

diff --git a/src/sqlite-ai.h b/src/sqlite-ai.h
@@ -24,7 +24,7 @@
 extern "C" {
 #endif
 
-#define SQLITE_AI_VERSION "0.7.57"
+#define SQLITE_AI_VERSION "0.7.58"
 
 SQLITE_AI_API int sqlite3_ai_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);