From f5f13e887ac0786be349163f1efa00a500f5bbaf Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 14 Oct 2025 10:40:00 +0300 Subject: [PATCH 1/3] alter kb --- mindsdb_sql_parser/ast/mindsdb/__init__.py | 7 +- .../ast/mindsdb/knowledge_base.py | 51 ++++++++++ mindsdb_sql_parser/parser.py | 19 +++- tests/test_mindsdb/test_knowledgebase.py | 94 ++++++++++++------- 4 files changed, 129 insertions(+), 42 deletions(-) diff --git a/mindsdb_sql_parser/ast/mindsdb/__init__.py b/mindsdb_sql_parser/ast/mindsdb/__init__.py index 5b193fe..d6034af 100644 --- a/mindsdb_sql_parser/ast/mindsdb/__init__.py +++ b/mindsdb_sql_parser/ast/mindsdb/__init__.py @@ -17,8 +17,11 @@ from .drop_job import DropJob from .chatbot import CreateChatBot, UpdateChatBot, DropChatBot from .trigger import CreateTrigger, DropTrigger -from .knowledge_base import CreateKnowledgeBase, DropKnowledgeBase, CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex \ - , EvaluateKnowledgeBase +from .knowledge_base import ( + CreateKnowledgeBase, DropKnowledgeBase, CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex, AlterKnowledgeBase, + EvaluateKnowledgeBase +) + from .skills import CreateSkill, DropSkill, UpdateSkill # remove it in next release diff --git a/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py b/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py index 1ee82ee..e0fc02f 100644 --- a/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py +++ b/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py @@ -80,6 +80,57 @@ def __repr__(self) -> str: return self.to_tree() +class AlterKnowledgeBase(ASTNode): + """ + Update the knowledge base + """ + def __init__( + self, + name, + params=None, + *args, + **kwargs, + ): + """ + Args: + name: Identifier -- name of the knowledge base + params: dict -- additional parameters to pass to the knowledge base. E.g., chunking strategy, etc. + """ + super().__init__(*args, **kwargs) + self.name = name + self.params = params + def to_tree(self, *args, level=0, **kwargs): + ind = indent(level) + out_str = f""" + {ind}AlterKnowledgeBase( + {ind} name={self.name.to_string()}, + {ind} params={self.params} + {ind}) + """ + return out_str + + def get_string(self, *args, **kwargs): + + using_ar = [] + params = self.params.copy() + if params: + using_ar += [f"{k}={repr(v)}" for k, v in params.items()] + if using_ar: + using_str = "USING " + ", ".join(using_ar) + else: + using_str = "" + + out_str = ( + f"ALTER KNOWLEDGE_BASE {self.name.to_string()} " + f"{using_str}" + ) + + return out_str + + def __repr__(self) -> str: + return self.to_tree() + + class DropKnowledgeBase(ASTNode): """ Delete a knowledge base diff --git a/mindsdb_sql_parser/parser.py b/mindsdb_sql_parser/parser.py index 4f1394f..fd8d099 100644 --- a/mindsdb_sql_parser/parser.py +++ b/mindsdb_sql_parser/parser.py @@ -21,7 +21,7 @@ from mindsdb_sql_parser.ast.mindsdb.latest import Latest from mindsdb_sql_parser.ast.mindsdb.evaluate import Evaluate from mindsdb_sql_parser.ast.mindsdb.knowledge_base import CreateKnowledgeBase, DropKnowledgeBase, \ - CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex, EvaluateKnowledgeBase + AlterKnowledgeBase, CreateKnowledgeBaseIndex, DropKnowledgeBaseIndex, EvaluateKnowledgeBase from mindsdb_sql_parser.ast.mindsdb.skills import CreateSkill, DropSkill, UpdateSkill from mindsdb_sql_parser.exceptions import ParsingException from mindsdb_sql_parser.ast.mindsdb.retrain_predictor import RetrainPredictor @@ -95,6 +95,7 @@ class MindsDBParser(Parser): 'create_trigger', 'drop_trigger', 'create_kb', + 'alter_kb', 'drop_kb', 'evaluate_kb', 'create_skill', @@ -113,9 +114,6 @@ def query(self, p): @_( 'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier USING kw_parameter_list', 'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier', - # from select - 'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier FROM LPAREN select RPAREN USING kw_parameter_list', - 'CREATE KNOWLEDGE_BASE if_not_exists_or_empty identifier FROM LPAREN select RPAREN', ) def create_kb(self, p): params = getattr(p, 'kw_parameter_list', {}) @@ -145,6 +143,19 @@ def create_kb(self, p): if_not_exists=if_not_exists ) + @_( + 'ALTER KNOWLEDGE_BASE identifier USING kw_parameter_list', + ) + def alter_kb(self, p): + params = getattr(p, 'kw_parameter_list', {}) + name = p.identifier + params = {k.lower(): v for k, v in params.items()} # case insensitive + + return AlterKnowledgeBase( + name=name, + params=params + ) + @_('CREATE INDEX ON KNOWLEDGE_BASE identifier') def create_index(self, p): return CreateKnowledgeBaseIndex(name=p.identifier) diff --git a/tests/test_mindsdb/test_knowledgebase.py b/tests/test_mindsdb/test_knowledgebase.py index 7a4a8cb..171f4a2 100644 --- a/tests/test_mindsdb/test_knowledgebase.py +++ b/tests/test_mindsdb/test_knowledgebase.py @@ -3,6 +3,7 @@ from mindsdb_sql_parser.ast.mindsdb.knowledge_base import ( CreateKnowledgeBase, DropKnowledgeBase, + AlterKnowledgeBase, ) from mindsdb_sql_parser.ast import ( Select, @@ -18,6 +19,7 @@ ) from mindsdb_sql_parser.utils import to_single_line + class TestKB: def test_create_knowledge_base(self): @@ -60,42 +62,6 @@ def test_create_knowledge_base(self): ast = parse_sql(sql) assert ast == expected_ast - # create from a query - sql = """ - CREATE KNOWLEDGE_BASE my_knowledge_base - FROM ( - SELECT id, content, embeddings, metadata - FROM my_table - JOIN my_embedding_model - ) - USING - MODEL = mindsdb.my_embedding_model, - STORAGE = my_vector_database.some_table - """ - ast = parse_sql(sql) - expected_ast = CreateKnowledgeBase( - name=Identifier("my_knowledge_base"), - if_not_exists=False, - model=Identifier(parts=["mindsdb", "my_embedding_model"]), - storage=Identifier(parts=["my_vector_database", "some_table"]), - from_select=Select( - targets=[ - Identifier("id"), - Identifier("content"), - Identifier("embeddings"), - Identifier("metadata"), - ], - from_table=Join( - left=Identifier("my_table"), - right=Identifier("my_embedding_model"), - join_type="JOIN", - ), - ), - params={}, - ) - - assert ast == expected_ast - # create without MODEL sql = """ CREATE KNOWLEDGE_BASE my_knowledge_base @@ -189,6 +155,62 @@ def test_create_knowledge_base(self): ) assert ast == expected_ast + def disabled_test_create_from_select(self): + # create from a query + sql = """ + CREATE KNOWLEDGE_BASE my_knowledge_base + FROM ( + SELECT id, content, embeddings, metadata + FROM my_table + JOIN my_embedding_model + ) + USING + MODEL = mindsdb.my_embedding_model, + STORAGE = my_vector_database.some_table + """ + ast = parse_sql(sql) + expected_ast = CreateKnowledgeBase( + name=Identifier("my_knowledge_base"), + if_not_exists=False, + model=Identifier(parts=["mindsdb", "my_embedding_model"]), + storage=Identifier(parts=["my_vector_database", "some_table"]), + from_select=Select( + targets=[ + Identifier("id"), + Identifier("content"), + Identifier("embeddings"), + Identifier("metadata"), + ], + from_table=Join( + left=Identifier("my_table"), + right=Identifier("my_embedding_model"), + join_type="JOIN", + ), + ), + params={}, + ) + + assert ast == expected_ast + + def test_update_knowledge_base(self): + # create without select + sql = """ + ALTER KNOWLEDGE_BASE my_kb + USING + reranking_model={'provider': 'openai'}, + embedding_model={'api_key': '123'} + """ + ast = parse_sql(sql) + expected_ast = AlterKnowledgeBase( + name=Identifier("my_kb"), + params={ + 'reranking_model': {'provider': 'openai'}, + 'embedding_model': {'api_key': '123'}, + }, + ) + assert to_single_line(str(ast)) == to_single_line(str(expected_ast)) + assert ast == expected_ast + def test_drop_knowledge_base(self): # drop if exists sql = """ From b06775c2c408356ca06407d3e8afff2e140bd585 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 14 Oct 2025 12:42:53 +0300 Subject: [PATCH 2/3] bump version --- mindsdb_sql_parser/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb_sql_parser/__about__.py b/mindsdb_sql_parser/__about__.py index eecfdeb..e85b2b1 100644 --- a/mindsdb_sql_parser/__about__.py +++ b/mindsdb_sql_parser/__about__.py @@ -1,6 +1,6 @@ __title__ = 'mindsdb_sql_parser' __package_name__ = 'mindsdb_sql_parser' -__version__ = '0.11.4' +__version__ = '0.12.0' __description__ = "Mindsdb SQL parser" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 391825e2f7517569a6ff2aaa85487e301f0fa742 Mon Sep 17 00:00:00 2001 From: andrew Date: Tue, 14 Oct 2025 15:26:15 +0300 Subject: [PATCH 3/3] update to_tree --- .../ast/mindsdb/knowledge_base.py | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py b/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py index e0fc02f..d4ddf62 100644 --- a/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py +++ b/mindsdb_sql_parser/ast/mindsdb/knowledge_base.py @@ -37,16 +37,15 @@ def __init__( def to_tree(self, *args, level=0, **kwargs): ind = indent(level) - storage_str = f"{ind} storage={self.storage.to_string()},\n" if self.storage else "" - model_str = f"{ind} model={self.model.to_string()},\n" if self.model else "" - out_str = f""" - {ind}CreateKnowledgeBase( - {ind} if_not_exists={self.if_not_exists}, - {ind} name={self.name.to_string()}, - {ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None}, - {model_str}{storage_str}{ind} params={self.params} - {ind}) - """ + storage_str = f"{ind} storage={self.storage.to_string()},\n" if self.storage else "" + model_str = f"{ind} model={self.model.to_string()},\n" if self.model else "" + out_str = f"{ind}CreateKnowledgeBase(\n" \ + f"{ind} if_not_exists={self.if_not_exists},\n" \ + f"{ind} name={self.name.to_string()},\n" \ + f"{ind} from_query={self.from_query.to_tree(level=level + 1) if self.from_query else None},\n" \ + f"{ind}{model_str}{storage_str}{ind} params={self.params}\n" \ + f"{ind})" + return out_str def get_string(self, *args, **kwargs): @@ -101,12 +100,10 @@ def __init__( self.params = params def to_tree(self, *args, level=0, **kwargs): ind = indent(level) - out_str = f""" - {ind}AlterKnowledgeBase( - {ind} name={self.name.to_string()}, - {ind} params={self.params} - {ind}) - """ + out_str = f"{ind}AlterKnowledgeBase(\n" \ + f"{ind} name={self.name.to_string()},\n" \ + f"{ind} params={self.params}\n" \ + f"{ind})" return out_str def get_string(self, *args, **kwargs):