From 3d293b4cf70f78f9af7639ea8333210e3fd9aa62 Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Wed, 30 Apr 2025 22:35:37 +0000 Subject: [PATCH 1/3] rename langchain BaseMessage --- src/agentlab/llm/llm_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py index 2536200e..ce65a752 100644 --- a/src/agentlab/llm/llm_utils.py +++ b/src/agentlab/llm/llm_utils.py @@ -14,7 +14,8 @@ import numpy as np import tiktoken import yaml -from langchain.schema import BaseMessage +# from langchain.schema import BaseMessage +from langchain.schema import BaseMessage as LangchainBaseMessage from langchain_community.adapters.openai import convert_message_to_dict from PIL import Image from transformers import AutoModel, AutoTokenizer @@ -23,14 +24,14 @@ from agentlab.llm.chat_api import ChatModel -def messages_to_dict(messages: list[dict] | list[BaseMessage]) -> dict: +def messages_to_dict(messages: list[dict] | list[LangchainBaseMessage]) -> dict: new_messages = Discussion() for m in messages: if isinstance(m, dict): new_messages.add_message(m) elif isinstance(m, str): new_messages.add_message({"role": "", "content": m}) - elif isinstance(m, BaseMessage): + elif isinstance(m, LangchainBaseMessage): new_messages.add_message(convert_message_to_dict(m)) else: raise ValueError(f"Unknown message type: {type(m)}") From 61106a2701771603b0b3dd12b57378817eef607e Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Wed, 30 Apr 2025 23:34:17 +0000 Subject: [PATCH 2/3] fix: formatting --- src/agentlab/llm/llm_utils.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py index ce65a752..a6b1bf92 100644 --- a/src/agentlab/llm/llm_utils.py +++ b/src/agentlab/llm/llm_utils.py @@ -14,7 +14,6 @@ import numpy as np import tiktoken import yaml -# from langchain.schema import BaseMessage from langchain.schema import BaseMessage as LangchainBaseMessage from langchain_community.adapters.openai import convert_message_to_dict from PIL import Image @@ -186,11 +185,15 @@ def get_tokenizer(model_name="gpt-4"): try: return tiktoken.encoding_for_model(model_name) except KeyError: - logging.info(f"Could not find a tokenizer for model {model_name}. Trying HuggingFace.") + logging.info( + f"Could not find a tokenizer for model {model_name}. Trying HuggingFace." + ) try: return AutoTokenizer.from_pretrained(model_name) except OSError: - logging.info(f"Could not find a tokenizer for model {model_name}. Defaulting to gpt-4.") + logging.info( + f"Could not find a tokenizer for model {model_name}. Defaulting to gpt-4." + ) return tiktoken.encoding_for_model("gpt-4") @@ -402,7 +405,9 @@ def __str__(self, warn_if_image=False) -> str: else: logging.info(msg) - return "\n".join([elem["text"] for elem in self["content"] if elem["type"] == "text"]) + return "\n".join( + [elem["text"] for elem in self["content"] if elem["type"] == "text"] + ) def add_content(self, type: str, content: Any): if isinstance(self["content"], str): @@ -540,11 +545,12 @@ def __getitem__(self, key): def to_markdown(self): self.merge() - return "\n".join([f"Message {i}\n{m.to_markdown()}\n" for i, m in enumerate(self.messages)]) + return "\n".join( + [f"Message {i}\n{m.to_markdown()}\n" for i, m in enumerate(self.messages)] + ) if __name__ == "__main__": - # model_to_download = "THUDM/agentlm-70b" model_to_download = "databricks/dbrx-instruct" save_dir = "/mnt/ui_copilot/data_rw/base_models/" From 8c73b2327810b831251f5d9f9b0f6f0208abb3a9 Mon Sep 17 00:00:00 2001 From: amanjaiswal73892 Date: Wed, 30 Apr 2025 23:47:59 +0000 Subject: [PATCH 3/3] fix: formatting ruff -> black --- src/agentlab/llm/llm_utils.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py index a6b1bf92..4a0a5f5b 100644 --- a/src/agentlab/llm/llm_utils.py +++ b/src/agentlab/llm/llm_utils.py @@ -185,15 +185,11 @@ def get_tokenizer(model_name="gpt-4"): try: return tiktoken.encoding_for_model(model_name) except KeyError: - logging.info( - f"Could not find a tokenizer for model {model_name}. Trying HuggingFace." - ) + logging.info(f"Could not find a tokenizer for model {model_name}. Trying HuggingFace.") try: return AutoTokenizer.from_pretrained(model_name) except OSError: - logging.info( - f"Could not find a tokenizer for model {model_name}. Defaulting to gpt-4." - ) + logging.info(f"Could not find a tokenizer for model {model_name}. Defaulting to gpt-4.") return tiktoken.encoding_for_model("gpt-4") @@ -405,9 +401,7 @@ def __str__(self, warn_if_image=False) -> str: else: logging.info(msg) - return "\n".join( - [elem["text"] for elem in self["content"] if elem["type"] == "text"] - ) + return "\n".join([elem["text"] for elem in self["content"] if elem["type"] == "text"]) def add_content(self, type: str, content: Any): if isinstance(self["content"], str): @@ -545,9 +539,7 @@ def __getitem__(self, key): def to_markdown(self): self.merge() - return "\n".join( - [f"Message {i}\n{m.to_markdown()}\n" for i, m in enumerate(self.messages)] - ) + return "\n".join([f"Message {i}\n{m.to_markdown()}\n" for i, m in enumerate(self.messages)]) if __name__ == "__main__":