Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
{
"name": "Python 3.10",
"image": "mcr.microsoft.com/devcontainers/python:3.10",
"runArgs": [
"--runtime",
"nvidia",
"--gpus",
"all",
// optional but make sure CUDA workloads are available
"--env",
"NVIDIA_VISIBLE_DEVICES=all",
// optional but make sure CUDA workloads are available
"--env",
"NVIDIA_DRIVER_CAPABILITIES=compute,utility"
],
"customizations": {
"vscode": {
"extensions": [
Expand All @@ -26,13 +14,5 @@
"hbenl.vscode-test-explorer"
]
}
},
"hostRequirements": {
"gpu": "optional"
},
"remoteEnv": {
// optional but make sure CUDA workloads are available
"NVIDIA_VISIBLE_DEVICES": "all",
"NVIDIA_DRIVER_CAPABILITIES": "compute,utility"
}
}
38 changes: 38 additions & 0 deletions .devcontainer/py3.11/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"name": "Python 3.11",
"image": "mcr.microsoft.com/devcontainers/python:3.11",
"runArgs": [
"--runtime",
"nvidia",
"--gpus",
"all",
// optional but make sure CUDA workloads are available
"--env",
"NVIDIA_VISIBLE_DEVICES=all",
// optional but make sure CUDA workloads are available
"--env",
"NVIDIA_DRIVER_CAPABILITIES=compute,utility"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.black-formatter",
"ms-python.flake8",
"ms-python.isort",
"ms-python.vscode-pylance",
"ms-python.python",
"ms-python.debugpy",
"ms-python.vscode-python-envs",
"hbenl.vscode-test-explorer"
]
}
},
"hostRequirements": {
"gpu": "optional"
},
"remoteEnv": {
// optional but make sure CUDA workloads are available
"NVIDIA_VISIBLE_DEVICES": "all",
"NVIDIA_DRIVER_CAPABILITIES": "compute,utility"
}
}
4 changes: 4 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ jobs:

- name: Test
# Using default directory for models
# COVERAGE_CORE=pytrace: Workaround for Python 3.11 segfault with SQLite extensions + C tracer
# See: https://github.com/nedbat/coveragepy/issues/1665
env:
COVERAGE_CORE: ${{ matrix.python-version == '3.11' && 'pytrace' || '' }}
run: |
pytest --cov --cov-branch --cov-report=xml -v -m "not slow" ./tests

Expand Down
4 changes: 2 additions & 2 deletions src/sqlite_rag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,12 +439,12 @@ def reset(
def search(
ctx: typer.Context,
query: str,
limit: int = typer.Option(10, help="Number of results to return"),
limit: int = typer.Option(5, help="Number of results to return"),
debug: bool = typer.Option(
False,
"-d",
"--debug",
help="Print extra debug information with modern formatting",
help="Print extra debug information with sentence-level details",
),
peek: bool = typer.Option(
False, "--peek", help="Print debug information using compact table format"
Expand Down
34 changes: 28 additions & 6 deletions src/sqlite_rag/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,28 +76,50 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
)

# TODO: this table is not ready for sqlite-sync, it uses the id AUTOINCREMENT
cursor.execute(
cursor.executescript(
"""
CREATE TABLE IF NOT EXISTS chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
document_id TEXT,
content TEXT,
embedding BLOB,
FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
embedding BLOB
);
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks (document_id);
"""
)

cursor.executescript(
"""
CREATE TABLE IF NOT EXISTS sentences (
id TEXT PRIMARY KEY,
chunk_id INTEGER,
content TEXT,
embedding BLOB,
start_offset INTEGER,
end_offset INTEGER
);
CREATE INDEX IF NOT EXISTS idx_sentences_chunk_id ON sentences (chunk_id);
"""
)

cursor.execute(
"""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(content, content='chunks', content_rowid='id');
"""
)

cursor.execute(
f"""
SELECT vector_init('chunks', 'embedding', 'type={settings.vector_type},dimension={settings.embedding_dim},{settings.other_vector_options}');
"""
"""
SELECT vector_init('chunks', 'embedding', ?);
""",
(settings.get_vector_init_options(),),
)
# TODO: same configuration as chunks (or different options?)
cursor.execute(
"""
SELECT vector_init('sentences', 'embedding', ?);
""",
(settings.get_vector_init_options(),),
)

conn.commit()
Loading