Add integration tests for tool calling with Meta and Cohere models

fede-kamel · fede-kamel · commit d590f209463a · 2025-10-21T10:06:21.000-04:00
Tests verify that the fix prevents infinite tool calling loops for:
- Meta Llama 4 Scout
- Meta Llama 3.3 70B
- Cohere Command A
- Cohere Command R Plus

Each test confirms that after receiving tool results, the model
generates a final response without making additional tool calls.

Signed-off-by: Federico Kamelhar &lt;federico.kamelhar@oracle.com&gt;
diff --git a/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py b/libs/oci/tests/integration_tests/chat_models/test_tool_calling.py
@@ -0,0 +1,190 @@
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+"""Integration tests for tool calling with OCI Generative AI chat models.
+
+These tests verify that tool calling works correctly without infinite loops
+for both Meta and Cohere models after receiving tool results.
+"""
+
+import os
+import pytest
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_oci.chat_models import ChatOCIGenAI
+from langgraph.graph import StateGraph, START, END, MessagesState
+from langgraph.prebuilt import ToolNode
+from langchain.tools import StructuredTool
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a given city name."""
+    weather_data = {
+        "chicago": "Sunny, 65°F",
+        "new york": "Cloudy, 60°F",
+        "san francisco": "Foggy, 58°F",
+    }
+    return weather_data.get(city.lower(), f"Weather data not available for {city}")
+
+
+@pytest.fixture
+def weather_tool():
+    """Create a weather tool for testing."""
+    return StructuredTool.from_function(
+        func=get_weather,
+        name="get_weather",
+        description="Get the current weather for a given city name.",
+    )
+
+
+def create_agent(model_id: str, weather_tool: StructuredTool):
+    """Create a LangGraph agent with tool calling."""
+    chat_model = ChatOCIGenAI(
+        model_id=model_id,
+        service_endpoint=f"https://inference.generativeai.{os.getenv('OCI_REGION', 'us-chicago-1')}.oci.oraclecloud.com",
+        compartment_id=os.getenv("OCI_COMP"),
+        model_kwargs={"temperature": 0.3, "max_tokens": 512, "top_p": 0.9},
+        auth_type="SECURITY_TOKEN",
+        auth_profile="DEFAULT",
+        auth_file_location=os.path.expanduser("~/.oci/config"),
+        disable_streaming="tool_calling",
+    )
+
+    tool_node = ToolNode(tools=[weather_tool])
+    model_with_tools = chat_model.bind_tools([weather_tool])
+
+    def call_model(state: MessagesState):
+        """Call the model with tools bound."""
+        messages = state["messages"]
+        response = model_with_tools.invoke(messages)
+        return {"messages": [response]}
+
+    def should_continue(state: MessagesState):
+        """Check if the model wants to call a tool."""
+        messages = state["messages"]
+        last_message = messages[-1]
+
+        if hasattr(last_message, "tool_calls") and last_message.tool_calls:
+            return "tools"
+        return END
+
+    builder = StateGraph(MessagesState)
+    builder.add_node("call_model", call_model)
+    builder.add_node("tools", tool_node)
+    builder.add_edge(START, "call_model")
+    builder.add_conditional_edges("call_model", should_continue, ["tools", END])
+    builder.add_edge("tools", "call_model")
+
+    return builder.compile()
+
+
+@pytest.mark.requires("oci")
+@pytest.mark.parametrize(
+    "model_id",
+    [
+        "meta.llama-4-scout-17b-16e-instruct",
+        "meta.llama-3.3-70b-instruct",
+        "cohere.command-a-03-2025",
+        "cohere.command-r-plus-08-2024",
+    ],
+)
+def test_tool_calling_no_infinite_loop(model_id: str, weather_tool: StructuredTool):
+    """Test that tool calling works without infinite loops.
+
+    This test verifies that after a tool is called and results are returned,
+    the model generates a final response without making additional tool calls,
+    preventing infinite loops.
+
+    The fix sets tool_choice='none' when ToolMessages are present in the
+    conversation history, which tells the model to stop calling tools.
+    """
+    agent = create_agent(model_id, weather_tool)
+
+    # Invoke the agent
+    result = agent.invoke(
+        {
+            "messages": [
+                SystemMessage(
+                    content="You are a helpful assistant. Use the available tools when needed to answer questions accurately."
+                ),
+                HumanMessage(content="What's the weather in Chicago?"),
+            ]
+        }
+    )
+
+    messages = result["messages"]
+
+    # Verify the conversation structure
+    assert len(messages) >= 4, "Should have at least: System, Human, AI (tool call), Tool, AI (final)"
+
+    # Find tool messages
+    tool_messages = [msg for msg in messages if type(msg).__name__ == "ToolMessage"]
+    assert len(tool_messages) >= 1, "Should have at least one tool result"
+
+    # Find AI messages with tool calls
+    ai_tool_calls = [
+        msg for msg in messages
+        if type(msg).__name__ == "AIMessage" and hasattr(msg, "tool_calls") and msg.tool_calls
+    ]
+    # The model should call the tool, but after receiving results, should not call again
+    # Allow flexibility - some models might make 1 call, others might need 2, but should stop
+    assert len(ai_tool_calls) <= 2, f"Model made too many tool calls ({len(ai_tool_calls)}), possible infinite loop"
+
+    # Verify final message is an AI response without tool calls
+    final_message = messages[-1]
+    assert type(final_message).__name__ == "AIMessage", "Final message should be AIMessage"
+    assert final_message.content, "Final message should have content"
+    assert not (hasattr(final_message, "tool_calls") and final_message.tool_calls), \
+        "Final message should not have tool_calls (infinite loop prevention)"
+
+    # Verify the answer mentions the weather
+    assert "65" in final_message.content or "sunny" in final_message.content.lower(), \
+        "Final response should mention the weather data"
+
+
+@pytest.mark.requires("oci")
+def test_meta_llama_tool_calling(weather_tool: StructuredTool):
+    """Specific test for Meta Llama models to ensure fix works."""
+    model_id = "meta.llama-4-scout-17b-16e-instruct"
+    agent = create_agent(model_id, weather_tool)
+
+    result = agent.invoke(
+        {
+            "messages": [
+                SystemMessage(content="You are a helpful assistant."),
+                HumanMessage(content="Check the weather in San Francisco."),
+            ]
+        }
+    )
+
+    messages = result["messages"]
+    final_message = messages[-1]
+
+    # Meta Llama was specifically affected by infinite loops
+    # Verify it stops after receiving tool results
+    assert type(final_message).__name__ == "AIMessage"
+    assert not (hasattr(final_message, "tool_calls") and final_message.tool_calls)
+    assert "foggy" in final_message.content.lower() or "58" in final_message.content
+
+
+@pytest.mark.requires("oci")
+def test_cohere_tool_calling(weather_tool: StructuredTool):
+    """Specific test for Cohere models to ensure they work correctly."""
+    model_id = "cohere.command-a-03-2025"
+    agent = create_agent(model_id, weather_tool)
+
+    result = agent.invoke(
+        {
+            "messages": [
+                SystemMessage(content="You are a helpful assistant."),
+                HumanMessage(content="What's the weather like in New York?"),
+            ]
+        }
+    )
+
+    messages = result["messages"]
+    final_message = messages[-1]
+
+    # Cohere models should handle tool calling naturally
+    assert type(final_message).__name__ == "AIMessage"
+    assert not (hasattr(final_message, "tool_calls") and final_message.tool_calls)
+    assert "60" in final_message.content or "cloudy" in final_message.content.lower()