Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mindsdb_sql_parser/__about__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__title__ = 'mindsdb_sql_parser'
__package_name__ = 'mindsdb_sql_parser'
__version__ = '0.11.4'
__version__ = '0.12.0'
__description__ = "Mindsdb SQL parser"
__email__ = "jorge@mindsdb.com"
__author__ = 'MindsDB Inc'
Expand Down
7 changes: 5 additions & 2 deletions mindsdb_sql_parser/ast/mindsdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
from .drop_job import DropJob
from .chatbot import CreateChatBot, UpdateChatBot, DropChatBot
from .trigger import CreateTrigger, DropTrigger
from .knowledge_base import CreateKnowledgeBase, DropKnowledgeBase, CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex \
, EvaluateKnowledgeBase
from .knowledge_base import (
CreateKnowledgeBase, DropKnowledgeBase, CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex, AlterKnowledgeBase,
EvaluateKnowledgeBase
)

from .skills import CreateSkill, DropSkill, UpdateSkill

# remove it in next release
Expand Down
68 changes: 58 additions & 10 deletions mindsdb_sql_parser/ast/mindsdb/knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,15 @@ def __init__(

def to_tree(self, *args, level=0, **kwargs):
ind = indent(level)
storage_str = f"{ind} storage={self.storage.to_string()},\n" if self.storage else ""
model_str = f"{ind} model={self.model.to_string()},\n" if self.model else ""
out_str = f"""
{ind}CreateKnowledgeBase(
{ind} if_not_exists={self.if_not_exists},
{ind} name={self.name.to_string()},
{ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None},
{model_str}{storage_str}{ind} params={self.params}
{ind})
"""
storage_str = f"{ind} storage={self.storage.to_string()},\n" if self.storage else ""
model_str = f"{ind} model={self.model.to_string()},\n" if self.model else ""
out_str = f"{ind}CreateKnowledgeBase(\n" \
f"{ind} if_not_exists={self.if_not_exists},\n" \
f"{ind} name={self.name.to_string()},\n" \
f"{ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None},\n" \
f"{ind}{model_str}{storage_str}{ind} params={self.params}\n" \
f"{ind})"

return out_str

def get_string(self, *args, **kwargs):
Expand Down Expand Up @@ -80,6 +79,55 @@ def __repr__(self) -> str:
return self.to_tree()


class AlterKnowledgeBase(ASTNode):
"""
Update the knowledge base
"""
def __init__(
self,
name,
params=None,
*args,
**kwargs,
):
"""
Args:
name: Identifier -- name of the knowledge base
params: dict -- additional parameters to pass to the knowledge base. E.g., chunking strategy, etc.
"""
super().__init__(*args, **kwargs)
self.name = name
self.params = params
def to_tree(self, *args, level=0, **kwargs):
ind = indent(level)
out_str = f"{ind}AlterKnowledgeBase(\n" \
f"{ind} name={self.name.to_string()},\n" \
f"{ind} params={self.params}\n" \
f"{ind})"
return out_str

def get_string(self, *args, **kwargs):

using_ar = []
params = self.params.copy()
if params:
using_ar += [f"{k}={repr(v)}" for k, v in params.items()]
if using_ar:
using_str = "USING " + ", ".join(using_ar)
else:
using_str = ""

out_str = (
f"ALTER KNOWLEDGE_BASE {self.name.to_string()} "
f"{using_str}"
)

return out_str

def __repr__(self) -> str:
return self.to_tree()


class DropKnowledgeBase(ASTNode):
"""
Delete a knowledge base
Expand Down
19 changes: 15 additions & 4 deletions mindsdb_sql_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from mindsdb_sql_parser.ast.mindsdb.latest import Latest
from mindsdb_sql_parser.ast.mindsdb.evaluate import Evaluate
from mindsdb_sql_parser.ast.mindsdb.knowledge_base import CreateKnowledgeBase, DropKnowledgeBase, \
CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex, EvaluateKnowledgeBase
AlterKnowledgeBase, CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex, EvaluateKnowledgeBase
from mindsdb_sql_parser.ast.mindsdb.skills import CreateSkill, DropSkill, UpdateSkill
from mindsdb_sql_parser.exceptions import ParsingException
from mindsdb_sql_parser.ast.mindsdb.retrain_predictor import RetrainPredictor
Expand Down Expand Up @@ -95,6 +95,7 @@ class MindsDBParser(Parser):
'create_trigger',
'drop_trigger',
'create_kb',
'alter_kb',
'drop_kb',
'evaluate_kb',
'create_skill',
Expand All @@ -113,9 +114,6 @@ def query(self, p):
@_(
'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier USING kw_parameter_list',
'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier',
# from select
'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier FROM LPAREN select RPAREN USING kw_parameter_list',
'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier FROM LPAREN select RPAREN',
)
def create_kb(self, p):
params = getattr(p, 'kw_parameter_list', {})
Expand Down Expand Up @@ -145,6 +143,19 @@ def create_kb(self, p):
if_not_exists=if_not_exists
)

@_(
'ALTER KNOWLEDGE_BASE identifier USING kw_parameter_list',
)
def alter_kb(self, p):
params = getattr(p, 'kw_parameter_list', {})
name = p.identifier
params = {k.lower(): v for k, v in params.items()} # case insensitive

return AlterKnowledgeBase(
name=name,
params=params
)

@_('CREATE INDEX ON KNOWLEDGE_BASE identifier')
def create_index(self, p):
return CreateKnowledgeBaseIndex(name=p.identifier)
Expand Down
94 changes: 58 additions & 36 deletions tests/test_mindsdb/test_knowledgebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from mindsdb_sql_parser.ast.mindsdb.knowledge_base import (
CreateKnowledgeBase,
DropKnowledgeBase,
AlterKnowledgeBase,
)
from mindsdb_sql_parser.ast import (
Select,
Expand All @@ -18,6 +19,7 @@
)
from mindsdb_sql_parser.utils import to_single_line


class TestKB:

def test_create_knowledge_base(self):
Expand Down Expand Up @@ -60,42 +62,6 @@ def test_create_knowledge_base(self):
ast = parse_sql(sql)
assert ast == expected_ast

# create from a query
sql = """
CREATE KNOWLEDGE_BASE my_knowledge_base
FROM (
SELECT id, content, embeddings, metadata
FROM my_table
JOIN my_embedding_model
)
USING
MODEL = mindsdb.my_embedding_model,
STORAGE = my_vector_database.some_table
"""
ast = parse_sql(sql)
expected_ast = CreateKnowledgeBase(
name=Identifier("my_knowledge_base"),
if_not_exists=False,
model=Identifier(parts=["mindsdb", "my_embedding_model"]),
storage=Identifier(parts=["my_vector_database", "some_table"]),
from_select=Select(
targets=[
Identifier("id"),
Identifier("content"),
Identifier("embeddings"),
Identifier("metadata"),
],
from_table=Join(
left=Identifier("my_table"),
right=Identifier("my_embedding_model"),
join_type="JOIN",
),
),
params={},
)

assert ast == expected_ast

# create without MODEL
sql = """
CREATE KNOWLEDGE_BASE my_knowledge_base
Expand Down Expand Up @@ -189,6 +155,62 @@ def test_create_knowledge_base(self):
)
assert ast == expected_ast

def disabled_test_create_from_select(self):
# create from a query
sql = """
CREATE KNOWLEDGE_BASE my_knowledge_base
FROM (
SELECT id, content, embeddings, metadata
FROM my_table
JOIN my_embedding_model
)
USING
MODEL = mindsdb.my_embedding_model,
STORAGE = my_vector_database.some_table
"""
ast = parse_sql(sql)
expected_ast = CreateKnowledgeBase(
name=Identifier("my_knowledge_base"),
if_not_exists=False,
model=Identifier(parts=["mindsdb", "my_embedding_model"]),
storage=Identifier(parts=["my_vector_database", "some_table"]),
from_select=Select(
targets=[
Identifier("id"),
Identifier("content"),
Identifier("embeddings"),
Identifier("metadata"),
],
from_table=Join(
left=Identifier("my_table"),
right=Identifier("my_embedding_model"),
join_type="JOIN",
),
),
params={},
)

assert ast == expected_ast

def test_update_knowledge_base(self):
# create without select
sql = """
ALTER KNOWLEDGE_BASE my_kb
USING
reranking_model={'provider': 'openai'},
embedding_model={'api_key': '123'}
"""
ast = parse_sql(sql)
expected_ast = AlterKnowledgeBase(
name=Identifier("my_kb"),
params={
'reranking_model': {'provider': 'openai'},
'embedding_model': {'api_key': '123'},
},
)
assert to_single_line(str(ast)) == to_single_line(str(expected_ast))
assert ast == expected_ast

def test_drop_knowledge_base(self):
# drop if exists
sql = """
Expand Down