From e4f402985ad23ff99975b662717c24f18873f64a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=AD=E5=8D=9A=E5=8D=88?= Date: Tue, 12 Aug 2025 16:26:03 +0800 Subject: [PATCH 1/3] [fel] update llama splitter tool meta --- .../callable_registers.py | 29 - .../llama_splitter_tool.py | 47 +- .../fel_llama_splitter_tools/tools.json | 550 +++++++----------- .../types/document.py | 25 +- .../fel_llama_splitter_tools/types/media.py | 24 +- .../types/semantic_splitter_options.py | 21 + .../types/serializable.py | 25 - 7 files changed, 257 insertions(+), 464 deletions(-) delete mode 100644 framework/fel/python/plugins/fel_llama_splitter_tools/callable_registers.py create mode 100644 framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py delete mode 100644 framework/fel/python/plugins/fel_llama_splitter_tools/types/serializable.py diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/callable_registers.py b/framework/fel/python/plugins/fel_llama_splitter_tools/callable_registers.py deleted file mode 100644 index 0cde3122..00000000 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/callable_registers.py +++ /dev/null @@ -1,29 +0,0 @@ -# -- encoding: utf-8 -- -# Copyright (c) 2024 Huawei Technologies Co., Ltd. All Rights Reserved. -# This file is a part of the ModelEngine Project. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ====================================================================================================================== -import functools -from inspect import signature -from typing import Callable, Any, Tuple, List - -from fitframework import fit_logger -from fitframework.core.repo.fitable_register import register_fitable - - -def __invoke_tool(input_args: dict, tool_func: Callable[..., Any], **kwargs) -> Any: - return tool_func(**input_args, **kwargs) - - -def register_callable_tool(tool: Tuple[Callable[..., Any], List[str], str], module: str, generic_id: str): - func = tool[0] - fitable_id = f"{func.__name__}" - - tool_invoke = functools.partial(__invoke_tool, tool_func=func) - tool_invoke.__module__ = module - tool_invoke.__annotations__ = { - 'input_args': dict, - 'return': signature(func).return_annotation - } - register_fitable(generic_id, fitable_id, False, [], tool_invoke) - fit_logger.info("register: generic_id = %s, fitable_id = %s", generic_id, fitable_id, stacklevel=2) diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py b/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py index 9c8fb421..9c3b21df 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py @@ -6,7 +6,7 @@ import traceback from typing import Tuple, List, Any, Callable -from fitframework import fit_logger +from fitframework import fit_logger, fitable from llama_index.core.node_parser import ( SentenceSplitter, TokenTextSplitter, @@ -17,11 +17,11 @@ from llama_index.core.schema import Document as LDocument from llama_index.embeddings.openai import OpenAIEmbedding -from .callable_registers import register_callable_tool from .node_utils import to_llama_index_document +from .types.semantic_splitter_options import SemanticSplitterOptions - -def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]: +@fitable("llama.tools.sentence_splitter", "default") +def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int) -> List[str]: """Parse text with a preference for complete sentences.""" if len(text) == 0: return [] @@ -38,7 +38,8 @@ def sentence_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: return [] -def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]: +@fitable("llama.tools.token_text_splitter", "default") +def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overlap: int) -> List[str]: """Splitting text that looks at word tokens.""" if len(text) == 0: return [] @@ -55,14 +56,15 @@ def token_text_splitter(text: str, separator: str, chunk_size: int, chunk_overla return [] -def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, docs: List[LDocument], **kwargs) \ +# @fitable("llama.tools.semantic_splitter", "default") +def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, docs: List[LDocument], options: SemanticSplitterOptions) \ -> List[BaseNode]: """Splitting text that looks at word tokens.""" if len(docs) == 0: return [] - api_key = kwargs.get("api_key") - model_name = kwargs.get("model_name") - api_base = kwargs.get("api_base") + api_key = options.api_key + model_name = options.model_name + api_base = options.api_base embed_model = OpenAIEmbedding(model_name=model_name, api_base=api_base, api_key=api_key, max_tokens=4096) @@ -80,8 +82,9 @@ def semantic_splitter(buffer_size: int, breakpoint_percentile_threshold: int, do return [] +# @fitable("llama.tools.sentence_window_node_parser", "default") def sentence_window_node_parser(window_size: int, window_metadata_key: str, original_text_metadata_key: str, - docs: List[LDocument], **kwargs) -> List[BaseNode]: + docs: List[LDocument]) -> List[BaseNode]: """Splitting text that looks at word tokens.""" if len(docs) == 0: return [] @@ -96,26 +99,4 @@ def sentence_window_node_parser(window_size: int, window_metadata_key: str, orig except BaseException: fit_logger.error("Invoke semantic splitter failed.") traceback.print_exc() - return [] - - -# Tuple 结构: (tool_func, config_args, return_description) -splitter_basic_toolkit: List[Tuple[Callable[..., Any], List[str], str]] = [ - (sentence_splitter, ["text", "separator", "chunk_size", "chunk_overlap"], "Split sentences by sentence."), - (token_text_splitter, ["text", "separator", "chunk_size", "chunk_overlap"], "Split sentences by token."), - (semantic_splitter, - ["docs", "buffer_size", "breakpoint_percentile_threshold", "chunk_overlap", "model_name", "api_key", "api_base"], - "Split sentences by semantic."), - (sentence_window_node_parser, ["docs", "window_size", "window_metadata_key", "original_text_metadata_key"], - "Splits all documents into individual sentences") -] - -for tool in splitter_basic_toolkit: - register_callable_tool(tool, sentence_splitter.__module__, "llama_index.rag.toolkit") - -if __name__ == '__main__': - import time - from .llama_schema_helper import dump_llama_schema - - current_timestamp = time.strftime('%Y%m%d%H%M%S') - dump_llama_schema(splitter_basic_toolkit, f"./llama_tool_schema-{str(current_timestamp)}.json") + return [] \ No newline at end of file diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/tools.json b/framework/fel/python/plugins/fel_llama_splitter_tools/tools.json index ad8fad58..17896807 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/tools.json +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/tools.json @@ -1,368 +1,208 @@ { - "tools": [ - { - "tags": [ - "LlamaIndex" - ], - "runnables": { - "LlamaIndex": { - "genericableId": "llama_index.rag.toolkit", - "fitableId": "sentence_splitter" - } + "version" : "1.0.0", + "definitionGroups" : [ { + "name" : "llama_splitter_tools", + "summary" : "Llama文本分割工具定义组", + "description" : "提供多种文本分割策略的工具定义,包括句子分割和词令牌分割,用于将长文本分割成适合处理的较小块", + "extensions" : { }, + "definitions" : [ { + "schema" : { + "name" : "SentenceSplitterTool", + "description" : "按句子分割文本,优先保持完整句子。该工具会将输入文本按照指定的分隔符和块大小进行分割,同时保持句子的完整性,避免在句子中间断开", + "parameters" : { + "type" : "object", + "properties" : { + "text" : { + "defaultValue" : "", + "description" : "待分割的文本内容", + "name" : "text", + "type" : "string", + "example" : "这是一段需要分割的文本。它包含多个句子。每个句子都应该保持完整。" }, - "schema": { - "name": "sentence_splitter", - "description": "Parse text with a preference for complete sentences.", - "parameters": { - "type": "object", - "properties": { - "text": { - "type": "string", - "description": "text" - }, - "separator": { - "type": "string", - "description": "separator" - }, - "chunk_size": { - "type": "string", - "description": "chunk_size" - }, - "chunk_overlap": { - "type": "string", - "description": "chunk_overlap" - } - }, - "required": [ - "text", - "separator", - "chunk_size", - "chunk_overlap" - ] - }, - "return": { - "title": "Split Sentences By Sentence.", - "type": "array", - "items": { - "type": "string" - } - }, - "parameterExtensions": { - "config": [ - "text", - "separator", - "chunk_size", - "chunk_overlap" - ] - } + "separator" : { + "defaultValue" : "", + "description" : "用于分割文本的分隔符,通常是句号、问号、感叹号等标点符号", + "name" : "separator", + "type" : "string", + "example" : "。" + }, + "chunk_size" : { + "defaultValue" : "", + "description" : "每个文本块的目标大小(字符数),用于控制分割后的文本块长度", + "name" : "chunk_size", + "type" : "integer", + "example" : "100" + }, + "chunk_overlap_size" : { + "defaultValue" : "", + "description" : "相邻文本块之间的重叠大小(字符数),用于保持上下文连贯性", + "name" : "chunk_overlap_size", + "type" : "integer", + "example" : "20" } + }, + "required" : [ "text", "separator", "chunk_size", "chunk_overlap_size" ] }, - { - "tags": [ - "LlamaIndex" - ], - "runnables": { - "LlamaIndex": { - "genericableId": "llama_index.rag.toolkit", - "fitableId": "token_text_splitter" - } + "order" : [ "text", "separator", "chunk_size", "chunk_overlap_size" ], + "return" : { + "type" : "array", + "items" : { + "type" : "string" + }, + "convertor" : "" + } + } + }, { + "schema" : { + "name" : "TokenTextSplitterTool", + "description" : "基于词令牌分割文本。该工具使用词令牌(token)作为分割单位,确保分割后的文本块在语义上更加合理,适合用于语言模型处理", + "parameters" : { + "type" : "object", + "properties" : { + "text" : { + "defaultValue" : "", + "description" : "待分割的文本内容", + "name" : "text", + "type" : "string", + "example" : "这是一段需要按词令牌分割的文本内容。" + }, + "separator" : { + "defaultValue" : "", + "description" : "用于分割文本的分隔符,可以是空格、标点符号等", + "name" : "separator", + "type" : "string", + "example" : " " }, - "schema": { - "name": "token_text_splitter", - "description": "Splitting text that looks at word tokens.", - "parameters": { - "type": "object", - "properties": { - "text": { - "type": "string", - "description": "text" - }, - "separator": { - "type": "string", - "description": "separator" - }, - "chunk_size": { - "type": "string", - "description": "chunk_size" - }, - "chunk_overlap": { - "type": "string", - "description": "chunk_overlap" - } - }, - "required": [ - "text", - "separator", - "chunk_size", - "chunk_overlap" - ] - }, - "return": { - "title": "Split Sentences By Token.", - "type": "array", - "items": { - "type": "string" - } - }, - "parameterExtensions": { - "config": [ - "text", - "separator", - "chunk_size", - "chunk_overlap" - ] - } + "chunk_size" : { + "defaultValue" : "", + "description" : "每个文本块的目标大小(词令牌数量),用于控制分割后的文本块长度", + "name" : "chunk_size", + "type" : "integer", + "example" : "50" + }, + "chunk_overlap_size" : { + "defaultValue" : "", + "description" : "相邻文本块之间的重叠大小(词令牌数量),用于保持上下文连贯性", + "name" : "chunk_overlap_size", + "type" : "integer", + "example" : "10" } + }, + "required" : [ "text", "separator", "chunk_size", "chunk_overlap_size" ] }, - { - "tags": [ - "LlamaIndex" - ], - "runnables": { - "LlamaIndex": { - "genericableId": "llama_index.rag.toolkit", - "fitableId": "semantic_splitter" - } + "order" : [ "text", "separator", "chunk_size", "chunk_overlap_size" ], + "return" : { + "type" : "array", + "items" : { + "type" : "string" + }, + "convertor" : "" + } + } + } ] + } ], + "toolGroups" : [ { + "name" : "llama_splitter_tools_impl", + "summary" : "Llama文本分割工具实现组", + "description" : "提供多种文本分割策略的具体实现,包括句子分割和词令牌分割,为Llama模型提供高效的文本预处理能力", + "extensions" : { }, + "definitionGroupName" : "llama_splitter_tools", + "tools" : [ { + "namespace" : "sentence_splitter", + "schema" : { + "name" : "SentenceSplitterTool", + "description" : "按句子分割文本,优先保持完整句子。该工具会将输入文本按照指定的分隔符和块大小进行分割,同时保持句子的完整性,避免在句子中间断开,确保分割后的文本块在语义上保持连贯", + "parameters" : { + "type" : "object", + "properties" : { + "text" : { + "name" : "text", + "type" : "string" + }, + "separator" : { + "name" : "separator", + "type" : "string" }, - "schema": { - "name": "semantic_splitter", - "description": "Splitting text that looks at word tokens.", - "parameters": { - "type": "object", - "properties": { - "buffer_size": { - "type": "string", - "description": "buffer_size" - }, - "breakpoint_percentile_threshold": { - "type": "string", - "description": "breakpoint_percentile_threshold" - }, - "docs": { - "type": "string", - "description": "docs" - }, - "chunk_overlap": { - "type": "string", - "description": "chunk_overlap" - }, - "model_name": { - "type": "string", - "description": "model_name" - }, - "api_key": { - "type": "string", - "description": "api_key" - }, - "api_base": { - "type": "string", - "description": "api_base" - } - }, - "required": [ - "buffer_size", - "breakpoint_percentile_threshold", - "docs" - ] - }, - "return": { - "title": "Split Sentences By Semantic.", - "type": "array", - "items": { - "title": "BaseNode", - "description": "Base node Object.\n\nGeneric abstract interface for retrievable nodes", - "type": "object", - "properties": { - "id_": { - "title": "Id ", - "description": "Unique ID of the node.", - "type": "string" - }, - "embedding": { - "title": "Embedding", - "description": "Embedding of the node.", - "type": "array", - "items": { - "type": "number" - } - }, - "extra_info": { - "title": "Extra Info", - "description": "A flat dictionary of metadata fields", - "type": "object" - }, - "excluded_embed_metadata_keys": { - "title": "Excluded Embed Metadata Keys", - "description": "Metadata keys that are excluded from text for the embed model.", - "type": "array", - "items": { - "type": "string" - } - }, - "excluded_llm_metadata_keys": { - "title": "Excluded Llm Metadata Keys", - "description": "Metadata keys that are excluded from text for the LLM.", - "type": "array", - "items": { - "type": "string" - } - }, - "relationships": { - "title": "Relationships", - "description": "A mapping of relationships to other node information.", - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "$ref": "#/definitions/RelatedNodeInfo" - }, - { - "type": "array", - "items": { - "$ref": "#/definitions/RelatedNodeInfo" - } - } - ] - } - }, - "class_name": { - "title": "Class Name", - "type": "string", - "default": "base_component" - } - } - } - }, - "parameterExtensions": { - "config": [ - "docs", - "buffer_size", - "breakpoint_percentile_threshold", - "chunk_overlap", - "model_name", - "api_key", - "api_base" - ] - } + "chunk_size" : { + "name" : "chunk_size", + "type" : "integer" + }, + "chunk_overlap_size" : { + "name" : "chunk_overlap_size", + "type" : "integer" } + }, + "required" : [ ] }, - { - "tags": [ - "LlamaIndex" - ], - "runnables": { - "LlamaIndex": { - "genericableId": "llama_index.rag.toolkit", - "fitableId": "sentence_window_node_parser" - } + "order" : [ "text", "separator", "chunk_size", "chunk_overlap_size" ], + "return" : { + "name" : "", + "description" : "返回按句子分割的文本列表,优先保持完整句子。每个文本块都尽可能保持句子的完整性,避免在句子中间断开", + "type" : "array", + "items" : { + "type" : "string" + }, + "convertor" : "" + } + }, + "runnables" : { + "FIT" : { + "genericableId" : "llama.tools.sentence_splitter", + "fitableId" : "default" + } + }, + "extensions" : { + "tags" : [ "Llama" ] + }, + "definitionName" : "SentenceSplitterTool" + }, { + "namespace" : "token_text_splitter", + "schema" : { + "name" : "TokenTextSplitterTool", + "description" : "基于词令牌分割文本。该工具使用词令牌(token)作为分割单位,确保分割后的文本块在语义上更加合理,适合用于语言模型处理,能够更好地保持文本的语义完整性", + "parameters" : { + "type" : "object", + "properties" : { + "text" : { + "name" : "text", + "type" : "string" + }, + "separator" : { + "name" : "separator", + "type" : "string" }, - "schema": { - "name": "sentence_window_node_parser", - "description": "Splitting text that looks at word tokens.", - "parameters": { - "type": "object", - "properties": { - "window_size": { - "type": "string", - "description": "window_size" - }, - "window_metadata_key": { - "type": "string", - "description": "window_metadata_key" - }, - "original_text_metadata_key": { - "type": "string", - "description": "original_text_metadata_key" - }, - "docs": { - "type": "string", - "description": "docs" - } - }, - "required": [ - "window_size", - "window_metadata_key", - "original_text_metadata_key", - "docs" - ] - }, - "return": { - "title": "Splits All Documents Into Individual Sentences", - "type": "array", - "items": { - "title": "BaseNode", - "description": "Base node Object.\n\nGeneric abstract interface for retrievable nodes", - "type": "object", - "properties": { - "id_": { - "title": "Id ", - "description": "Unique ID of the node.", - "type": "string" - }, - "embedding": { - "title": "Embedding", - "description": "Embedding of the node.", - "type": "array", - "items": { - "type": "number" - } - }, - "extra_info": { - "title": "Extra Info", - "description": "A flat dictionary of metadata fields", - "type": "object" - }, - "excluded_embed_metadata_keys": { - "title": "Excluded Embed Metadata Keys", - "description": "Metadata keys that are excluded from text for the embed model.", - "type": "array", - "items": { - "type": "string" - } - }, - "excluded_llm_metadata_keys": { - "title": "Excluded Llm Metadata Keys", - "description": "Metadata keys that are excluded from text for the LLM.", - "type": "array", - "items": { - "type": "string" - } - }, - "relationships": { - "title": "Relationships", - "description": "A mapping of relationships to other node information.", - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "$ref": "#/definitions/RelatedNodeInfo" - }, - { - "type": "array", - "items": { - "$ref": "#/definitions/RelatedNodeInfo" - } - } - ] - } - }, - "class_name": { - "title": "Class Name", - "type": "string", - "default": "base_component" - } - } - } - }, - "parameterExtensions": { - "config": [ - "docs", - "window_size", - "window_metadata_key", - "original_text_metadata_key" - ] - } + "chunk_size" : { + "name" : "chunk_size", + "type" : "integer" + }, + "chunk_overlap_size" : { + "name" : "chunk_overlap_size", + "type" : "integer" } + }, + "required" : [ ] + }, + "order" : [ "text", "separator", "chunk_size", "chunk_overlap_size" ], + "return" : { + "name" : "", + "description" : "返回基于词令牌分割的文本列表。每个文本块都基于词令牌进行分割,确保在语义上更加合理,适合语言模型处理", + "type" : "array", + "items" : { + "type" : "string" + }, + "convertor" : "" + } + }, + "runnables" : { + "FIT" : { + "genericableId" : "llama.tools.token_text_splitter", + "fitableId" : "default" } - ] -} \ No newline at end of file + }, + "extensions" : { + "tags" : [ "Llama" ] + }, + "definitionName" : "TokenTextSplitterTool" + } ] + } ] +} diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py b/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py index 4989999f..665a3a47 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py @@ -5,18 +5,21 @@ # ====================================================================================================================== import typing -from .serializable import Serializable from .media import Media +class Document(object): + def __init__(self, content: str, media: Media , metadata: typing.Dict[str, object] ): + self.content = content + self.media = media + self.metadata = metadata + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + return self.__dict__ == other.__dict__ -class Document(Serializable): - """ - Document. - """ - content: str - media: Media = None - metadata: typing.Dict[str, object] + def __hash__(self): + return hash(tuple(self.__dict__.values())) - class Config: - frozen = True - smart_union = True + def __repr__(self): + return str((self.__dict__.values())) \ No newline at end of file diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py b/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py index b1bdb54a..41c9a760 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py @@ -3,16 +3,18 @@ # This file is a part of the ModelEngine Project. # Licensed under the MIT License. See License.txt in the project root for license information. # ====================================================================================================================== -from .serializable import Serializable +class Media(object): + def __init__(self, mime: str, data: str): + self.mime = mime + self.data = data + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + return self.__dict__ == other.__dict__ + def __hash__(self): + return hash(tuple(self.__dict__.values())) -class Media(Serializable): - """ - Media. - """ - mime: str - data: str - - class Config: - frozen = True - smart_union = True + def __repr__(self): + return str((self.__dict__.values())) \ No newline at end of file diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py b/framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py new file mode 100644 index 00000000..5aae3eff --- /dev/null +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py @@ -0,0 +1,21 @@ +# -- encoding: utf-8 -- +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# This file is a part of the ModelEngine Project. +# Licensed under the MIT License. See License.txt in the project root for license information. +# ====================================================================================================================== +class SemanticSplitterOptions(object): + def __init__(self , model_name : str , api_base : str , api_key : str): + self.model_name = model_name + self.api_key = api_key + self.api_base = api_base + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + return self.__dict__ == other.__dict__ + + def __hash__(self): + return hash(tuple(self.__dict__.values())) + + def __repr__(self): + return str((self.__dict__.values())) \ No newline at end of file diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/types/serializable.py b/framework/fel/python/plugins/fel_llama_splitter_tools/types/serializable.py deleted file mode 100644 index 4522897f..00000000 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/types/serializable.py +++ /dev/null @@ -1,25 +0,0 @@ -# -- encoding: utf-8 -- -# Copyright (c) 2024 Huawei Technologies Co., Ltd. All Rights Reserved. -# This file is a part of the ModelEngine Project. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ====================================================================================================================== -import typing - -try: - import pydantic - - if pydantic.__version__.startswith("1."): - raise ImportError - import pydantic.v1 as pydantic -except ImportError: - import pydantic - - -class Serializable(pydantic.BaseModel): - def json(self, **kwargs: typing.Any) -> str: - kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs} - return super().json(**kwargs_with_defaults) - - def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: - kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs} - return super().dict(**kwargs_with_defaults) \ No newline at end of file From aeff11196a725adc45f6b13be1dcf0f7bb9aea5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=AD=E5=8D=9A=E5=8D=88?= Date: Wed, 13 Aug 2025 09:06:19 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../python/plugins/fel_llama_splitter_tools/types/document.py | 1 + .../fel/python/plugins/fel_llama_splitter_tools/types/media.py | 1 + .../fel_llama_splitter_tools/types/semantic_splitter_options.py | 1 + 3 files changed, 3 insertions(+) diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py b/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py index 665a3a47..47755baf 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/types/document.py @@ -3,6 +3,7 @@ # This file is a part of the ModelEngine Project. # Licensed under the MIT License. See License.txt in the project root for license information. # ====================================================================================================================== + import typing from .media import Media diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py b/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py index 41c9a760..30b77060 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/types/media.py @@ -3,6 +3,7 @@ # This file is a part of the ModelEngine Project. # Licensed under the MIT License. See License.txt in the project root for license information. # ====================================================================================================================== + class Media(object): def __init__(self, mime: str, data: str): self.mime = mime diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py b/framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py index 5aae3eff..ab3f3020 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/types/semantic_splitter_options.py @@ -3,6 +3,7 @@ # This file is a part of the ModelEngine Project. # Licensed under the MIT License. See License.txt in the project root for license information. # ====================================================================================================================== + class SemanticSplitterOptions(object): def __init__(self , model_name : str , api_base : str , api_key : str): self.model_name = model_name From 59ab0a5b41fa7856812236519d1d3a30120ad692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=AD=E5=8D=9A=E5=8D=88?= Date: Wed, 13 Aug 2025 10:20:51 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../plugins/fel_llama_splitter_tools/llama_splitter_tool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py b/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py index 9c3b21df..3a2decfd 100644 --- a/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py +++ b/framework/fel/python/plugins/fel_llama_splitter_tools/llama_splitter_tool.py @@ -6,7 +6,8 @@ import traceback from typing import Tuple, List, Any, Callable -from fitframework import fit_logger, fitable +from fitframework import fit_logger +from fitframework.api.decorators import fitable from llama_index.core.node_parser import ( SentenceSplitter, TokenTextSplitter,