From b226846766b50794e2d553c7834c68e8ca138dad Mon Sep 17 00:00:00 2001 From: Jakub Zika Date: Thu, 15 Jan 2026 16:24:16 +0100 Subject: [PATCH 1/3] add configurable reasoning preservation for openai completions Standard behavior: discard reasoning before last user message (based on OpenAI SDK). Model-level config enables preservation (e.g. GLM-4.7 with preserved thinking). Changes: - prune-history: add keep-history-reasoning parameter - Tests: cover both pruning and preservation - Docs: add keepHistoryReasoning to model schema --- CHANGELOG.md | 2 + docs/configuration.md | 3 +- docs/models.md | 53 +++++++++++++++------ src/eca/llm_api.clj | 2 + src/eca/llm_providers/openai_chat.clj | 37 +++++++------- test/eca/llm_providers/openai_chat_test.clj | 31 +++++++++--- 6 files changed, 90 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68c2a200..71764395 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- (OpenAI Chat) - Configurable reasoning history via `reasoningHistory` (model-level, default: all) + ## 0.94.2 - Fix autocompact not cleaning tokens in memory and thinking it should auto compact again. diff --git a/docs/configuration.md b/docs/configuration.md index c84eb018..e2d42177 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -693,7 +693,8 @@ To configure, add your OTLP collector config via `:otlp` map following [otlp aut thinkTagEnd?: string; models: {[key: string]: { modelName?: string; - extraPayload?: {[key: string]: any} + extraPayload?: {[key: string]: any}; + keepHistoryReasoning?: boolean; }}; }}; defaultModel?: string; diff --git a/docs/models.md b/docs/models.md index cc8da224..242fab44 100644 --- a/docs/models.md +++ b/docs/models.md @@ -61,19 +61,20 @@ You just need to add your provider to `providers` and make sure add the required Schema: -| Option | Type | Description | Required | -|-------------------------------|--------|--------------------------------------------------------------------------------------------------------------|----------| -| `api` | string | The API schema to use (`"openai-responses"`, `"openai-chat"`, or `"anthropic"`) | Yes | -| `url` | string | API URL (with support for env like `${env:MY_URL}`) | No* | -| `key` | string | API key (with support for `${env:MY_KEY}` or `{netrc:api.my-provider.com}` | No* | -| `completionUrlRelativePath` | string | Optional override for the completion endpoint path (see defaults below and examples like Azure) | No | -| `thinkTagStart` | string | Optional override the think start tag tag for openai-chat (Default: "") api | No | -| `thinkTagEnd` | string | Optional override the think end tag for openai-chat (Default: "") api | No | -| `httpClient` | map | Allow customize the http-client for this provider requests, like changing http version | No | -| `models` | map | Key: model name, value: its config | Yes | -| `models extraPayload` | map | Extra payload sent in body to LLM | No | -| `models modelName` | string | Override model name, useful to have multiple models with different configs and names that use same LLM model | No | -| `fetchModels` | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers) | No | +| Option | Type | Description | Required | +|---------------------------------------|---------|--------------------------------------------------------------------------------------------------------------|----------| +| `api` | string | The API schema to use (`"openai-responses"`, `"openai-chat"`, or `"anthropic"`) | Yes | +| `url` | string | API URL (with support for env like `${env:MY_URL}`) | No* | +| `key` | string | API key (with support for `${env:MY_KEY}` or `{netrc:api.my-provider.com}` | No* | +| `completionUrlRelativePath` | string | Optional override for the completion endpoint path (see defaults below and examples like Azure) | No | +| `thinkTagStart` | string | Optional override the think start tag tag for openai-chat (Default: "") api | No | +| `thinkTagEnd` | string | Optional override the think end tag for openai-chat (Default: "") api | No | +| `httpClient` | map | Allow customize the http-client for this provider requests, like changing http version | No | +| `models` | map | Key: model name, value: its config | Yes | +| `models extraPayload` | map | Extra payload sent in body to LLM | No | +| `models modelName` | string | Override model name, useful to have multiple models with different configs and names that use same LLM model | No | +| `models keepHistoryReasoning` | boolean | Keep `reason` messages in conversation history. Default: `false` | No | +| `fetchModels` | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers) | No | _* url and key will be searched as envs `_API_URL` and `_API_KEY`, they require the env to be found or config to work._ @@ -120,6 +121,30 @@ Examples: This way both will use gpt-5 model but one will override the reasoning to be high instead of the default. +=== "History reasoning" + + `keepHistoryReasoning` preserves reasoning in conversation history. Set for specific models: + + ```javascript title="~/.config/eca/config.json" + { + "providers": { + "z-ai": { + "api": "openai-chat", + "url": "https://api.z.ai/api/paas/v4/", + "key": "your-api-key", + "models": { + "GLM-4.7": { + "keepHistoryReasoning": true, // Preserves reasoning + "extraPayload": {"clear_thinking": false} // Preserved thinking (see https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking) + } + } + } + } + } + ``` + + Default: `false`. + === "Dynamic model discovery" For OpenAI-compatible providers, set `fetchModels: true` to automatically discover available models: @@ -211,7 +236,7 @@ Notes: 3. Type the chosen method 4. Authenticate in your browser, copy the code. 5. Paste and send the code and done! - + === "Codex / Openai" 1. Login to Openai via the chat command `/login`. diff --git a/src/eca/llm_api.clj b/src/eca/llm_api.clj index 96536a8d..571b4e33 100644 --- a/src/eca/llm_api.clj +++ b/src/eca/llm_api.clj @@ -206,6 +206,7 @@ (let [url-relative-path (:completionUrlRelativePath provider-config) think-tag-start (:thinkTagStart provider-config) think-tag-end (:thinkTagEnd provider-config) + keep-history-reasoning (:keepHistoryReasoning model-config) http-client (:httpClient provider-config)] (handler {:model real-model @@ -221,6 +222,7 @@ :url-relative-path url-relative-path :think-tag-start think-tag-start :think-tag-end think-tag-end + :keep-history-reasoning keep-history-reasoning :http-client http-client :api-url api-url :api-key api-key} diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj index a6cfb164..18b1ce82 100644 --- a/src/eca/llm_providers/openai_chat.clj +++ b/src/eca/llm_providers/openai_chat.clj @@ -384,19 +384,24 @@ (reset! reasoning-state* {:id nil :type nil :content "" :buffer ""}))) (defn ^:private prune-history - "Ensure DeepSeek-style reasoning_content is discarded from history but kept for the active turn. - Only drops 'reason' messages WITH :delta-reasoning? before the last user message. - Think-tag based reasoning (without :delta-reasoning?) is preserved and transformed to assistant messages." - [messages] - (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)] - (->> messages - (keep-indexed (fn [i m] - (when-not (and (= "reason" (:role m)) - (get-in m [:content :delta-reasoning?]) - (< i last-user-idx)) - m))) - vec) - messages)) + "Discard reasoning messages from history. + Reasoning with :delta-reasoning? is preserved in the same turn (as required by Deepseek). + This corresponds to the implementation standard. However, it can be change it at the model level configuration. + Parameters: + - messages: the conversation history + - keep-history-reasoning: if true, preserve all reasoning in history" + [messages keep-history-reasoning] + (if keep-history-reasoning + messages + (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)] + (->> messages + (keep-indexed (fn [i m] + (when-not (and (= "reason" (:role m)) + (or (< i last-user-idx) + (not (get-in m [:content :delta-reasoning?])))) + m))) + vec) + messages))) (defn chat-completion! "Primary entry point for OpenAI chat completions with streaming support. @@ -406,14 +411,14 @@ Compatible with OpenRouter and other OpenAI-compatible providers." [{:keys [model user-messages instructions temperature api-key api-url url-relative-path past-messages tools extra-payload extra-headers supports-image? - think-tag-start think-tag-end http-client]} + think-tag-start think-tag-end keep-history-reasoning http-client]} {:keys [on-message-received on-error on-prepare-tool-call on-tools-called on-reason on-usage-updated] :as callbacks}] (let [think-tag-start (or think-tag-start "") think-tag-end (or think-tag-end "") stream? (boolean callbacks) system-messages (when instructions [{:role "system" :content instructions}]) ;; Pipeline: prune history -> normalize -> merge adjacent assistants -> filter - all-messages (prune-history (vec (concat past-messages user-messages))) + all-messages (prune-history (vec (concat past-messages user-messages)) keep-history-reasoning) messages (vec (concat system-messages (normalize-messages all-messages supports-image? think-tag-start think-tag-end))) @@ -473,7 +478,7 @@ tool-calls)) on-tools-called-wrapper (fn on-tools-called-wrapper [tools-to-call on-tools-called handle-response] (when-let [{:keys [new-messages]} (on-tools-called tools-to-call)] - (let [pruned-messages (prune-history new-messages) + (let [pruned-messages (prune-history new-messages keep-history-reasoning) new-messages-list (vec (concat system-messages (normalize-messages pruned-messages supports-image? think-tag-start think-tag-end))) diff --git a/test/eca/llm_providers/openai_chat_test.clj b/test/eca/llm_providers/openai_chat_test.clj index 67c9edf8..eb1b642f 100644 --- a/test/eca/llm_providers/openai_chat_test.clj +++ b/test/eca/llm_providers/openai_chat_test.clj @@ -259,7 +259,7 @@ {:role "assistant" :reasoning_content "Thinking..."}]))))) (deftest prune-history-test - (testing "Drops reason messages WITH :delta-reasoning? before the last user message (DeepSeek)" + (testing "Drops all reason messages before the last user message by default" (is (match? [{:role "user" :content "Q1"} {:role "assistant" :content "A1"} @@ -272,15 +272,14 @@ {:role "assistant" :content "A1"} {:role "user" :content "Q2"} {:role "reason" :content {:text "r2" :delta-reasoning? true}} - {:role "assistant" :content "A2"}])))) + {:role "assistant" :content "A2"}] + false)))) - (testing "Preserves reason messages WITHOUT :delta-reasoning? (think-tag based)" + (testing "Preserves reason messages (without :delta-reasoning?) before last user message" (is (match? [{:role "user" :content "Q1"} - {:role "reason" :content {:text "thinking..."}} {:role "assistant" :content "A1"} {:role "user" :content "Q2"} - {:role "reason" :content {:text "more thinking..."}} {:role "assistant" :content "A2"}] (#'llm-providers.openai-chat/prune-history [{:role "user" :content "Q1"} @@ -288,12 +287,30 @@ {:role "assistant" :content "A1"} {:role "user" :content "Q2"} {:role "reason" :content {:text "more thinking..."}} - {:role "assistant" :content "A2"}])))) + {:role "assistant" :content "A2"}] + false)))) + + (testing "Preserves all reasoning when keep-history-reasoning is true (Bedrock)" + (is (match? + [{:role "user" :content "Q1"} + {:role "reason" :content {:text "r1"}} + {:role "assistant" :content "A1"} + {:role "user" :content "Q2"} + {:role "reason" :content {:text "r2"}} + {:role "assistant" :content "A2"}] + (#'llm-providers.openai-chat/prune-history + [{:role "user" :content "Q1"} + {:role "reason" :content {:text "r1"}} + {:role "assistant" :content "A1"} + {:role "user" :content "Q2"} + {:role "reason" :content {:text "r2"}} + {:role "assistant" :content "A2"}] + true)))) (testing "No user message leaves list unchanged" (let [msgs [{:role "assistant" :content "A"} {:role "reason" :content {:text "r"}}]] - (is (= msgs (#'llm-providers.openai-chat/prune-history msgs)))))) + (is (= msgs (#'llm-providers.openai-chat/prune-history msgs false)))))) (deftest valid-message-test (testing "Tool messages are always kept" From c32e4ad9245412a3a60d2c2d6a2513ca3a6c8769 Mon Sep 17 00:00:00 2001 From: Jakub Zika Date: Sat, 17 Jan 2026 08:03:58 +0100 Subject: [PATCH 2/3] fix tests and improve docs --- docs/models.md | 5 ++++- integration-test/integration/chat/github_copilot_test.clj | 2 +- integration-test/integration/chat/google_test.clj | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/models.md b/docs/models.md index 242fab44..de142768 100644 --- a/docs/models.md +++ b/docs/models.md @@ -122,8 +122,11 @@ Examples: This way both will use gpt-5 model but one will override the reasoning to be high instead of the default. === "History reasoning" + `keepHistoryReasoning` - Determines whether the model's internal reasoning chain is persisted in the conversation history for subsequent turns. - `keepHistoryReasoning` preserves reasoning in conversation history. Set for specific models: + - **Standard Behavior**: Most models expect reasoning blocks (e.g., `` tags or `reasoning_content`) to be removed in subsequent requests to save tokens and avoid bias. + - **Usage**: Enable this for models that explicitly support "preserved thinking," or if you want to experiment with letting the model see its previous thought process (with XML-based reasoning). + - **Example**: See [GLM-4.7 with Preserved thinking](https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking). ```javascript title="~/.config/eca/config.json" { diff --git a/integration-test/integration/chat/github_copilot_test.clj b/integration-test/integration/chat/github_copilot_test.clj index f8ba132c..a85d26f5 100644 --- a/integration-test/integration/chat/github_copilot_test.clj +++ b/integration-test/integration/chat/github_copilot_test.clj @@ -168,7 +168,7 @@ (match-content chat-id "system" {:type "progress" :state "finished"}) (is (match? {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]} - {:role "assistant" :content [{:type "output_text" :text "I should say hello\nhello there!"}]} + {:role "assistant" :content [{:type "output_text" :text "hello there!"}]} {:role "user" :content [{:type "input_text" :text "how are you?"}]}] :instructions (m/pred string?)} (llm.mocks/get-req-body :reasoning-1))))))) diff --git a/integration-test/integration/chat/google_test.clj b/integration-test/integration/chat/google_test.clj index 5f34d341..30222616 100644 --- a/integration-test/integration/chat/google_test.clj +++ b/integration-test/integration/chat/google_test.clj @@ -167,7 +167,7 @@ (match-content chat-id "system" {:type "progress" :state "finished"}) (is (match? {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]} - {:role "assistant" :content [{:type "output_text" :text "I should say hello\nhello there!"}]} + {:role "assistant" :content [{:type "output_text" :text "hello there!"}]} {:role "user" :content [{:type "input_text" :text "how are you?"}]}] :instructions (m/pred string?)} (llm.mocks/get-req-body :reasoning-1))))))) From 4a4d99085800be8cc20e6ba922ffd46874e07fcc Mon Sep 17 00:00:00 2001 From: Jakub Zika Date: Sat, 24 Jan 2026 17:19:33 +0100 Subject: [PATCH 3/3] Replace keepHistoryReasoning boolean with reasoningHistory Introduce a more granular control for reasoning retention in requests: "all" (default, send everything) "turn" (current turn only) "off (discard all) Both delta-reasoning (reasoning_content) and think-tag reasoning are handled uniformly. DB storage is unaffected. Reasoning is always persisted for UI display. This setting only controls what gets sent back to the model. --- docs/configuration.md | 2 +- docs/models.md | 35 +++++----------- .../integration/chat/github_copilot_test.clj | 2 +- .../integration/chat/google_test.clj | 2 +- src/eca/config.clj | 3 +- src/eca/llm_api.clj | 7 +++- src/eca/llm_providers/openai_chat.clj | 41 ++++++++++--------- test/eca/llm_providers/openai_chat_test.clj | 40 ++++++++++++++---- 8 files changed, 75 insertions(+), 57 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index e2d42177..c9ad8708 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -694,7 +694,7 @@ To configure, add your OTLP collector config via `:otlp` map following [otlp aut models: {[key: string]: { modelName?: string; extraPayload?: {[key: string]: any}; - keepHistoryReasoning?: boolean; + reasoningHistory?: "all" | "turn" | "off"; }}; }}; defaultModel?: string; diff --git a/docs/models.md b/docs/models.md index de142768..e3114211 100644 --- a/docs/models.md +++ b/docs/models.md @@ -73,7 +73,7 @@ Schema: | `models` | map | Key: model name, value: its config | Yes | | `models extraPayload` | map | Extra payload sent in body to LLM | No | | `models modelName` | string | Override model name, useful to have multiple models with different configs and names that use same LLM model | No | -| `models keepHistoryReasoning` | boolean | Keep `reason` messages in conversation history. Default: `false` | No | +| `models reasoningHistory` | string | Controls reasoning in conversation history: `"all"` (default), `"turn"`, or `"off"` | No | | `fetchModels` | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers) | No | _* url and key will be searched as envs `_API_URL` and `_API_KEY`, they require the env to be found or config to work._ @@ -121,32 +121,19 @@ Examples: This way both will use gpt-5 model but one will override the reasoning to be high instead of the default. -=== "History reasoning" - `keepHistoryReasoning` - Determines whether the model's internal reasoning chain is persisted in the conversation history for subsequent turns. +=== "Reasoning in conversation history" + `reasoningHistory` - Controls whether and how the model's reasoning (thinking blocks, reasoning_content) is included in conversation history sent to the model. + This **only applies** to `openai_chat` API and it controls both tag-based thinking and the preservation of `reasoning_content`. - - **Standard Behavior**: Most models expect reasoning blocks (e.g., `` tags or `reasoning_content`) to be removed in subsequent requests to save tokens and avoid bias. - - **Usage**: Enable this for models that explicitly support "preserved thinking," or if you want to experiment with letting the model see its previous thought process (with XML-based reasoning). - - **Example**: See [GLM-4.7 with Preserved thinking](https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking). + **Available modes:** - ```javascript title="~/.config/eca/config.json" - { - "providers": { - "z-ai": { - "api": "openai-chat", - "url": "https://api.z.ai/api/paas/v4/", - "key": "your-api-key", - "models": { - "GLM-4.7": { - "keepHistoryReasoning": true, // Preserves reasoning - "extraPayload": {"clear_thinking": false} // Preserved thinking (see https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking) - } - } - } - } - } - ``` + - **`"all"`** (default, safe choice) - Send all reasoning blocks back to the model. The model can see its full chain of thought from previous turns. This is the safest option. + - **`"turn"`** - Send only reasoning from the current conversation turn (after the last user message). Previous reasoning is discarded before sending to the API. + - **`"off"`** - Never send reasoning blocks to the model. All reasoning is discarded before API calls. + + **Note:** Reasoning is always shown to you in the UI and stored in chat history—this setting only controls what gets sent to the model in API requests. - Default: `false`. + Default: `"all"`. === "Dynamic model discovery" diff --git a/integration-test/integration/chat/github_copilot_test.clj b/integration-test/integration/chat/github_copilot_test.clj index a85d26f5..f8ba132c 100644 --- a/integration-test/integration/chat/github_copilot_test.clj +++ b/integration-test/integration/chat/github_copilot_test.clj @@ -168,7 +168,7 @@ (match-content chat-id "system" {:type "progress" :state "finished"}) (is (match? {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]} - {:role "assistant" :content [{:type "output_text" :text "hello there!"}]} + {:role "assistant" :content [{:type "output_text" :text "I should say hello\nhello there!"}]} {:role "user" :content [{:type "input_text" :text "how are you?"}]}] :instructions (m/pred string?)} (llm.mocks/get-req-body :reasoning-1))))))) diff --git a/integration-test/integration/chat/google_test.clj b/integration-test/integration/chat/google_test.clj index 30222616..5f34d341 100644 --- a/integration-test/integration/chat/google_test.clj +++ b/integration-test/integration/chat/google_test.clj @@ -167,7 +167,7 @@ (match-content chat-id "system" {:type "progress" :state "finished"}) (is (match? {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]} - {:role "assistant" :content [{:type "output_text" :text "hello there!"}]} + {:role "assistant" :content [{:type "output_text" :text "I should say hello\nhello there!"}]} {:role "user" :content [{:type "input_text" :text "how are you?"}]}] :instructions (m/pred string?)} (llm.mocks/get-req-body :reasoning-1))))))) diff --git a/src/eca/config.clj b/src/eca/config.clj index 6b0b494c..5fdb7d9f 100644 --- a/src/eca/config.clj +++ b/src/eca/config.clj @@ -336,7 +336,8 @@ {:kebab-case-key [[:providers]] :keywordize-val - [[:providers :ANY :httpClient]] + [[:providers :ANY :httpClient] + [:providers :ANY :models :ANY :reasoningHistory]] :stringfy-key [[:behavior] [:providers] diff --git a/src/eca/llm_api.clj b/src/eca/llm_api.clj index 571b4e33..7e865ce2 100644 --- a/src/eca/llm_api.clj +++ b/src/eca/llm_api.clj @@ -97,6 +97,7 @@ provider-config (get-in config [:providers provider]) model-config (get-in provider-config [:models model]) extra-payload (:extraPayload model-config) + reasoning-history (or (:reasoningHistory model-config) :all) [auth-type api-key] (llm-util/provider-api-key provider provider-auth config) api-url (llm-util/provider-api-url provider config) {:keys [handler]} (provider->api-handler provider config) @@ -123,6 +124,7 @@ :web-search web-search :extra-payload (merge {:parallel_tool_calls true} extra-payload) + :reasoning-history reasoning-history :api-url api-url :api-key api-key :auth-type auth-type} @@ -157,6 +159,7 @@ :tools tools :extra-payload (merge {:parallel_tool_calls true} extra-payload) + :reasoning-history reasoning-history :api-url api-url :api-key api-key :extra-headers {"openai-intent" "conversation-panel" @@ -179,6 +182,7 @@ :tools tools :think-tag-start "" :think-tag-end "" + :reasoning-history reasoning-history :extra-payload (merge {:parallel_tool_calls false} (when reason? {:extra_body {:google {:thinking_config {:include_thoughts true}}}}) @@ -206,7 +210,6 @@ (let [url-relative-path (:completionUrlRelativePath provider-config) think-tag-start (:thinkTagStart provider-config) think-tag-end (:thinkTagEnd provider-config) - keep-history-reasoning (:keepHistoryReasoning model-config) http-client (:httpClient provider-config)] (handler {:model real-model @@ -222,7 +225,7 @@ :url-relative-path url-relative-path :think-tag-start think-tag-start :think-tag-end think-tag-end - :keep-history-reasoning keep-history-reasoning + :reasoning-history reasoning-history :http-client http-client :api-url api-url :api-key api-key} diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj index 18b1ce82..4fa1af56 100644 --- a/src/eca/llm_providers/openai_chat.clj +++ b/src/eca/llm_providers/openai_chat.clj @@ -384,24 +384,27 @@ (reset! reasoning-state* {:id nil :type nil :content "" :buffer ""}))) (defn ^:private prune-history - "Discard reasoning messages from history. - Reasoning with :delta-reasoning? is preserved in the same turn (as required by Deepseek). - This corresponds to the implementation standard. However, it can be change it at the model level configuration. + "Discard reasoning messages from history based on reasoning-history mode. + Parameters: - messages: the conversation history - - keep-history-reasoning: if true, preserve all reasoning in history" - [messages keep-history-reasoning] - (if keep-history-reasoning - messages - (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)] - (->> messages - (keep-indexed (fn [i m] - (when-not (and (= "reason" (:role m)) - (or (< i last-user-idx) - (not (get-in m [:content :delta-reasoning?])))) - m))) - vec) - messages))) + - reasoning-history: controls reasoning retention + - :all - preserve all reasoning in history (safe default) + - :turn - preserve reasoning only in the current turn (after last user message) + - :off - discard all reasoning messages" + [messages reasoning-history] + (case reasoning-history + :all messages + :off (filterv #(not= "reason" (:role %)) messages) + :turn (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)] + (->> messages + (keep-indexed (fn [i m] + (when-not (and (= "reason" (:role m)) + (< i last-user-idx)) + m))) + vec) + messages) + messages)) (defn chat-completion! "Primary entry point for OpenAI chat completions with streaming support. @@ -411,14 +414,14 @@ Compatible with OpenRouter and other OpenAI-compatible providers." [{:keys [model user-messages instructions temperature api-key api-url url-relative-path past-messages tools extra-payload extra-headers supports-image? - think-tag-start think-tag-end keep-history-reasoning http-client]} + think-tag-start think-tag-end reasoning-history http-client]} {:keys [on-message-received on-error on-prepare-tool-call on-tools-called on-reason on-usage-updated] :as callbacks}] (let [think-tag-start (or think-tag-start "") think-tag-end (or think-tag-end "") stream? (boolean callbacks) system-messages (when instructions [{:role "system" :content instructions}]) ;; Pipeline: prune history -> normalize -> merge adjacent assistants -> filter - all-messages (prune-history (vec (concat past-messages user-messages)) keep-history-reasoning) + all-messages (prune-history (vec (concat past-messages user-messages)) reasoning-history) messages (vec (concat system-messages (normalize-messages all-messages supports-image? think-tag-start think-tag-end))) @@ -478,7 +481,7 @@ tool-calls)) on-tools-called-wrapper (fn on-tools-called-wrapper [tools-to-call on-tools-called handle-response] (when-let [{:keys [new-messages]} (on-tools-called tools-to-call)] - (let [pruned-messages (prune-history new-messages keep-history-reasoning) + (let [pruned-messages (prune-history new-messages reasoning-history) new-messages-list (vec (concat system-messages (normalize-messages pruned-messages supports-image? think-tag-start think-tag-end))) diff --git a/test/eca/llm_providers/openai_chat_test.clj b/test/eca/llm_providers/openai_chat_test.clj index eb1b642f..1051b6db 100644 --- a/test/eca/llm_providers/openai_chat_test.clj +++ b/test/eca/llm_providers/openai_chat_test.clj @@ -259,7 +259,7 @@ {:role "assistant" :reasoning_content "Thinking..."}]))))) (deftest prune-history-test - (testing "Drops all reason messages before the last user message by default" + (testing "reasoningHistory \"turn\" drops all reason messages before the last user message" (is (match? [{:role "user" :content "Q1"} {:role "assistant" :content "A1"} @@ -273,13 +273,14 @@ {:role "user" :content "Q2"} {:role "reason" :content {:text "r2" :delta-reasoning? true}} {:role "assistant" :content "A2"}] - false)))) + :turn)))) - (testing "Preserves reason messages (without :delta-reasoning?) before last user message" + (testing "reasoningHistory \"turn\" also drops think-tag reasoning before last user message" (is (match? [{:role "user" :content "Q1"} {:role "assistant" :content "A1"} {:role "user" :content "Q2"} + {:role "reason" :content {:text "more thinking..."}} {:role "assistant" :content "A2"}] (#'llm-providers.openai-chat/prune-history [{:role "user" :content "Q1"} @@ -288,9 +289,9 @@ {:role "user" :content "Q2"} {:role "reason" :content {:text "more thinking..."}} {:role "assistant" :content "A2"}] - false)))) + :turn)))) - (testing "Preserves all reasoning when keep-history-reasoning is true (Bedrock)" + (testing "reasoningHistory \"all\" preserves all reasoning" (is (match? [{:role "user" :content "Q1"} {:role "reason" :content {:text "r1"}} @@ -305,12 +306,35 @@ {:role "user" :content "Q2"} {:role "reason" :content {:text "r2"}} {:role "assistant" :content "A2"}] - true)))) + :all)))) - (testing "No user message leaves list unchanged" + (testing "reasoningHistory \"off\" removes all reasoning messages" + (is (match? + [{:role "user" :content "Q1"} + {:role "assistant" :content "A1"} + {:role "user" :content "Q2"} + {:role "assistant" :content "A2"}] + (#'llm-providers.openai-chat/prune-history + [{:role "user" :content "Q1"} + {:role "reason" :content {:text "r1" :delta-reasoning? true}} + {:role "assistant" :content "A1"} + {:role "user" :content "Q2"} + {:role "reason" :content {:text "r2"}} + {:role "assistant" :content "A2"}] + :off)))) + + (testing "No user message - reasoningHistory \"turn\" leaves list unchanged" (let [msgs [{:role "assistant" :content "A"} {:role "reason" :content {:text "r"}}]] - (is (= msgs (#'llm-providers.openai-chat/prune-history msgs false)))))) + (is (= msgs (#'llm-providers.openai-chat/prune-history msgs :turn))))) + + (testing "No user message - reasoningHistory \"off\" removes reason" + (is (match? + [{:role "assistant" :content "A"}] + (#'llm-providers.openai-chat/prune-history + [{:role "assistant" :content "A"} + {:role "reason" :content {:text "r"}}] + :off))))) (deftest valid-message-test (testing "Tool messages are always kept"