diff --git a/langfuse/model.py b/langfuse/model.py index 6380bf5f2..f2072f400 100644 --- a/langfuse/model.py +++ b/langfuse/model.py @@ -156,7 +156,75 @@ def get_langchain_prompt(self): @staticmethod def _get_langchain_prompt_string(content: str): - return re.sub(r"{{\s*(\w+)\s*}}", r"{\g<1>}", content) + json_escaped_content = BasePromptClient._escape_json_for_langchain(content) + + return re.sub(r"{{\s*(\w+)\s*}}", r"{\g<1>}", json_escaped_content) + + @staticmethod + def _escape_json_for_langchain(text: str) -> str: + """Escapes every curly-brace that is part of a JSON object by doubling it. + + A curly brace is considered “JSON-related” when, after skipping any + immediate whitespace, the next non-whitespace character is a single + or double quote. + + Braces that are already doubled (e.g. {{variable}} placeholders) are + left untouched. + + Parameters + ---------- + text : str + The input string that may contain JSON snippets. + + Returns: + ------- + str + The string with JSON-related braces doubled. + """ + out = [] # collected characters + stack = [] # True = “this { belongs to JSON”, False = normal “{” + i, n = 0, len(text) + + while i < n: + ch = text[i] + + # ---------- opening brace ---------- + if ch == "{": + # leave existing “{{ …” untouched + if i + 1 < n and text[i + 1] == "{": + out.append("{{") + i += 2 + continue + + # look ahead to find the next non-space character + j = i + 1 + while j < n and text[j].isspace(): + j += 1 + + is_json = j < n and text[j] in {"'", '"'} + out.append("{{" if is_json else "{") + stack.append(is_json) # remember how this “{” was treated + i += 1 + continue + + # ---------- closing brace ---------- + elif ch == "}": + # leave existing “… }}” untouched + if i + 1 < n and text[i + 1] == "}": + out.append("}}") + i += 2 + continue + + is_json = stack.pop() if stack else False + out.append("}}" if is_json else "}") + i += 1 + continue + + # ---------- any other character ---------- + out.append(ch) + i += 1 + + return "".join(out) class TextPromptClient(BasePromptClient): diff --git a/tests/test_prompt_compilation.py b/tests/test_prompt_compilation.py index 856025717..c3bcc11aa 100644 --- a/tests/test_prompt_compilation.py +++ b/tests/test_prompt_compilation.py @@ -1,6 +1,13 @@ import pytest - -from langfuse.model import TemplateParser +from langchain.prompts import ChatPromptTemplate, PromptTemplate + +from langfuse.api.resources.prompts import ChatMessage, Prompt_Chat +from langfuse.model import ( + ChatPromptClient, + Prompt_Text, + TemplateParser, + TextPromptClient, +) def test_basic_replacement(): @@ -181,3 +188,547 @@ def test_unescaped_JSON_variable_value(): ) def test_various_templates(template, data, expected): assert TemplateParser.compile_template(template, data) == expected + + +class TestLangchainPromptCompilation: + """Test cases for Langchain prompt compilation with JSON handling.""" + + def test_normal_variables_with_nested_json(self): + """Test normal variables (double braces) alongside complex, nested JSON.""" + prompt_string = """This is a prompt with {{animal}} and {{location}}. + +{{ + "metadata": {{ + "context": "test", + "nested": {{ + "animal": {{animal}}, + "properties": {{ + "location": "{{location}}", + "count": 42 + }} + }} + }}, + "data": [ + {{ + "type": "primary", + "value": {{animal}} + }} + ] +}}""" + + prompt = TextPromptClient( + Prompt_Text( + type="text", + name="nested_json_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=prompt_string, + ) + ) + + langchain_prompt_string = prompt.get_langchain_prompt() + langchain_prompt = PromptTemplate.from_template(langchain_prompt_string) + formatted_prompt = langchain_prompt.format(animal="cat", location="Paris") + + expected = """This is a prompt with cat and Paris. + +{ + "metadata": { + "context": "test", + "nested": { + "animal": cat, + "properties": { + "location": "Paris", + "count": 42 + } + } + }, + "data": [ + { + "type": "primary", + "value": cat + } + ] +}""" + + assert formatted_prompt == expected + + def test_mixed_variables_with_nested_json(self): + """Test normal variables (double braces) and Langchain variables (single braces) with nested JSON.""" + prompt_string = """Normal variable: {{user_name}} +Langchain variable: {user_age} + +{{ + "user": {{ + "name": {{user_name}}, + "age": {user_age}, + "profile": {{ + "settings": {{ + "theme": "dark", + "notifications": true + }} + }} + }}, + "system": {{ + "version": "1.0", + "active": true + }} +}}""" + + prompt = TextPromptClient( + Prompt_Text( + type="text", + name="mixed_variables_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=prompt_string, + ) + ) + + langchain_prompt_string = prompt.get_langchain_prompt() + langchain_prompt = PromptTemplate.from_template(langchain_prompt_string) + formatted_prompt = langchain_prompt.format(user_name="Alice", user_age=25) + + expected = """Normal variable: Alice +Langchain variable: 25 + +{ + "user": { + "name": Alice, + "age": 25, + "profile": { + "settings": { + "theme": "dark", + "notifications": true + } + } + }, + "system": { + "version": "1.0", + "active": true + } +}""" + + assert formatted_prompt == expected + + def test_variables_inside_and_alongside_json(self): + """Test variables both alongside AND INSIDE complex nested JSON.""" + prompt_string = """System message: {{system_msg}} +User input: {user_input} + +{{ + "request": {{ + "system": {{system_msg}}, + "user": {user_input}, + "config": {{ + "model": "gpt-4", + "temperature": 0.7, + "metadata": {{ + "session": {{session_id}}, + "timestamp": {timestamp}, + "nested_data": {{ + "level1": {{ + "level2": {{ + "user_var": {{user_name}}, + "system_var": {system_status} + }} + }} + }} + }} + }} + }}, + "context": {{context_data}} +}} + +Final note: {{system_msg}} and {user_input}""" + + prompt = TextPromptClient( + Prompt_Text( + type="text", + name="variables_inside_json_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=prompt_string, + ) + ) + + langchain_prompt_string = prompt.get_langchain_prompt() + langchain_prompt = PromptTemplate.from_template(langchain_prompt_string) + formatted_prompt = langchain_prompt.format( + system_msg="Hello", + user_input="Test input", + session_id="sess123", + timestamp=1234567890, + user_name="Bob", + system_status="active", + context_data="context_info", + ) + + expected = """System message: Hello +User input: Test input + +{ + "request": { + "system": Hello, + "user": Test input, + "config": { + "model": "gpt-4", + "temperature": 0.7, + "metadata": { + "session": sess123, + "timestamp": 1234567890, + "nested_data": { + "level1": { + "level2": { + "user_var": Bob, + "system_var": active + } + } + } + } + } + }, + "context": context_info +} + +Final note: Hello and Test input""" + + assert formatted_prompt == expected + + def test_edge_case_empty_json_objects(self): + """Test edge case with empty JSON objects and arrays.""" + prompt_string = """Variable: {{test_var}} + +{{ + "empty_object": {{}}, + "empty_array": [], + "mixed": {{ + "data": {{test_var}}, + "empty": {{}}, + "nested_empty": {{ + "inner": {{}} + }} + }} +}}""" + + prompt = TextPromptClient( + Prompt_Text( + type="text", + name="empty_json_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=prompt_string, + ) + ) + + langchain_prompt_string = prompt.get_langchain_prompt() + langchain_prompt = PromptTemplate.from_template(langchain_prompt_string) + formatted_prompt = langchain_prompt.format(test_var="value") + + expected = """Variable: value + +{ + "empty_object": {}, + "empty_array": [], + "mixed": { + "data": value, + "empty": {}, + "nested_empty": { + "inner": {} + } + } +}""" + + assert formatted_prompt == expected + + def test_edge_case_nested_quotes_in_json(self): + """Test edge case with nested quotes and escaped characters in JSON.""" + prompt_string = """Message: {{message}} + +{{ + "text": "This is a \\"quoted\\" string", + "user_message": {{message}}, + "escaped": "Line 1\\\\nLine 2", + "complex": {{ + "description": "Contains 'single' and \\"double\\" quotes", + "dynamic": {{message}} + }} +}}""" + + prompt = TextPromptClient( + Prompt_Text( + type="text", + name="nested_quotes_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=prompt_string, + ) + ) + + langchain_prompt_string = prompt.get_langchain_prompt() + langchain_prompt = PromptTemplate.from_template(langchain_prompt_string) + formatted_prompt = langchain_prompt.format(message="Hello world") + + expected = """Message: Hello world + +{ + "text": "This is a \\"quoted\\" string", + "user_message": Hello world, + "escaped": "Line 1\\\\nLine 2", + "complex": { + "description": "Contains 'single' and \\"double\\" quotes", + "dynamic": Hello world + } +}""" + + assert formatted_prompt == expected + + def test_edge_case_json_with_variables_in_strings(self): + """Test that double braces inside JSON strings are treated as normal variables.""" + prompt_string = """Variable: {{test_var}} + +{{ + "text_with_braces": "This has {{connector}} characters", + "also_braces": "Format: {{key}} = {{value}}", + "user_data": {{test_var}} +}}""" + + prompt = TextPromptClient( + Prompt_Text( + type="text", + name="variables_in_strings_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=prompt_string, + ) + ) + + langchain_prompt_string = prompt.get_langchain_prompt() + langchain_prompt = PromptTemplate.from_template(langchain_prompt_string) + formatted_prompt = langchain_prompt.format( + test_var="test_value", key="name", value="John", connector="special" + ) + + expected = """Variable: test_value + +{ + "text_with_braces": "This has special characters", + "also_braces": "Format: name = John", + "user_data": test_value +}""" + + assert formatted_prompt == expected + + def test_complex_real_world_scenario(self): + """Test a complex real-world scenario combining all features.""" + prompt_string = """System: {{system_prompt}} +User query: {user_query} +Context: {{context}} + +{{ + "request": {{ + "system_instruction": {{system_prompt}}, + "user_input": {user_query}, + "context": {{context}}, + "settings": {{ + "model": "gpt-4", + "temperature": 0.7, + "max_tokens": 1000, + "functions": [ + {{ + "name": "search", + "description": "Search for information", + "parameters": {{ + "query": {user_query}, + "context": {{context}} + }} + }} + ] + }}, + "metadata": {{ + "session_id": {{session_id}}, + "timestamp": {timestamp}, + "user_info": {{ + "id": {user_id}, + "preferences": {{ + "language": "en", + "format": "json" + }} + }} + }} + }}, + "response_format": {{ + "type": "structured", + "schema": {{ + "answer": "string", + "confidence": "number", + "sources": "array" + }} + }} +}} + +Instructions: Use {{system_prompt}} to process {user_query} with context {{context}}.""" + + prompt = TextPromptClient( + Prompt_Text( + type="text", + name="complex_scenario_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=prompt_string, + ) + ) + + langchain_prompt_string = prompt.get_langchain_prompt() + langchain_prompt = PromptTemplate.from_template(langchain_prompt_string) + formatted_prompt = langchain_prompt.format( + system_prompt="You are a helpful assistant", + user_query="What is the weather?", + context="Weather inquiry", + session_id="sess_123", + timestamp=1234567890, + user_id="user_456", + ) + + expected = """System: You are a helpful assistant +User query: What is the weather? +Context: Weather inquiry + +{ + "request": { + "system_instruction": You are a helpful assistant, + "user_input": What is the weather?, + "context": Weather inquiry, + "settings": { + "model": "gpt-4", + "temperature": 0.7, + "max_tokens": 1000, + "functions": [ + { + "name": "search", + "description": "Search for information", + "parameters": { + "query": What is the weather?, + "context": Weather inquiry + } + } + ] + }, + "metadata": { + "session_id": sess_123, + "timestamp": 1234567890, + "user_info": { + "id": user_456, + "preferences": { + "language": "en", + "format": "json" + } + } + } + }, + "response_format": { + "type": "structured", + "schema": { + "answer": "string", + "confidence": "number", + "sources": "array" + } + } +} + +Instructions: Use You are a helpful assistant to process What is the weather? with context Weather inquiry.""" + + assert formatted_prompt == expected + + def test_chat_prompt_with_json_variables(self): + """Test that chat prompts work correctly with JSON handling and variables.""" + chat_messages = [ + ChatMessage( + role="system", + content="""You are {{assistant_type}} assistant. + +Configuration: +{{ + "settings": {{ + "model": "{{model_name}}", + "temperature": {temperature}, + "capabilities": [ + {{ + "name": "search", + "enabled": {{search_enabled}}, + "params": {{ + "provider": "{{search_provider}}" + }} + }} + ] + }} +}}""", + ), + ChatMessage( + role="user", + content="Hello {{user_name}}! I need help with: {{user_request}}", + ), + ] + + prompt = ChatPromptClient( + Prompt_Chat( + type="chat", + name="chat_json_test", + version=1, + config={}, + tags=[], + labels=[], + prompt=chat_messages, + ) + ) + + langchain_messages = prompt.get_langchain_prompt() + langchain_prompt = ChatPromptTemplate.from_messages(langchain_messages) + formatted_messages = langchain_prompt.format_messages( + assistant_type="helpful", + model_name="gpt-4", + temperature=0.7, + search_enabled="true", + search_provider="google", + user_name="Alice", + user_request="data analysis", + ) + + expected_system = """You are helpful assistant. + +Configuration: +{ + "settings": { + "model": "gpt-4", + "temperature": 0.7, + "capabilities": [ + { + "name": "search", + "enabled": true, + "params": { + "provider": "google" + } + } + ] + } +}""" + + expected_user = "Hello Alice! I need help with: data analysis" + + assert len(formatted_messages) == 2 + assert formatted_messages[0].content == expected_system + assert formatted_messages[1].content == expected_user