From b226846766b50794e2d553c7834c68e8ca138dad Mon Sep 17 00:00:00 2001
From: Jakub Zika <zikajk@gmail.com>
Date: Thu, 15 Jan 2026 16:24:16 +0100
Subject: [PATCH 1/3] add configurable reasoning preservation for openai
 completions

Standard behavior: discard reasoning before last user message (based on OpenAI SDK).
Model-level config enables preservation (e.g. GLM-4.7 with preserved thinking).

Changes:
- prune-history: add keep-history-reasoning parameter
- Tests: cover both pruning and preservation
- Docs: add keepHistoryReasoning to model schema
---
 CHANGELOG.md                                |  2 +
 docs/configuration.md                       |  3 +-
 docs/models.md                              | 53 +++++++++++++++------
 src/eca/llm_api.clj                         |  2 +
 src/eca/llm_providers/openai_chat.clj       | 37 +++++++-------
 test/eca/llm_providers/openai_chat_test.clj | 31 +++++++++---
 6 files changed, 90 insertions(+), 38 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68c2a200..71764395 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+- (OpenAI Chat) - Configurable reasoning history via `reasoningHistory` (model-level, default: all)
+
 ## 0.94.2
 
 - Fix autocompact not cleaning tokens in memory and thinking it should auto compact again.
diff --git a/docs/configuration.md b/docs/configuration.md
index c84eb018..e2d42177 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -693,7 +693,8 @@ To configure, add your OTLP collector config via `:otlp` map following [otlp aut
             thinkTagEnd?: string;
             models: {[key: string]: {
               modelName?: string;
-              extraPayload?: {[key: string]: any}
+              extraPayload?: {[key: string]: any};
+              keepHistoryReasoning?: boolean;
             }};
         }};
         defaultModel?: string;
diff --git a/docs/models.md b/docs/models.md
index cc8da224..242fab44 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -61,19 +61,20 @@ You just need to add your provider to `providers` and make sure add the required
 
 Schema:
 
-| Option                        | Type   | Description                                                                                                  | Required |
-|-------------------------------|--------|--------------------------------------------------------------------------------------------------------------|----------|
-| `api`                         | string | The API schema to use (`"openai-responses"`, `"openai-chat"`, or `"anthropic"`)                              | Yes      |
-| `url`                         | string | API URL (with support for env like `${env:MY_URL}`)                                                          | No*      |
-| `key`                         | string | API key (with support for `${env:MY_KEY}` or `{netrc:api.my-provider.com}`                                   | No*      |
-| `completionUrlRelativePath`   | string | Optional override for the completion endpoint path (see defaults below and examples like Azure)              | No       |
-| `thinkTagStart`               | string | Optional override the think start tag tag for openai-chat (Default: "<think>") api                           | No       |
-| `thinkTagEnd`                 | string | Optional override the think end tag for openai-chat (Default: "</think>") api                                | No       |
-| `httpClient`                  | map    | Allow customize the http-client for this provider requests, like changing http version                       | No       |
-| `models`                      | map    | Key: model name, value: its config                                                                           | Yes      |
-| `models <model> extraPayload` | map    | Extra payload sent in body to LLM                                                                            | No       |
-| `models <model> modelName`    | string | Override model name, useful to have multiple models with different configs and names that use same LLM model | No       |
-| `fetchModels`                 | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers) | No |
+| Option                                | Type    | Description                                                                                                  | Required |
+|---------------------------------------|---------|--------------------------------------------------------------------------------------------------------------|----------|
+| `api`                                 | string  | The API schema to use (`"openai-responses"`, `"openai-chat"`, or `"anthropic"`)                              | Yes      |
+| `url`                                 | string  | API URL (with support for env like `${env:MY_URL}`)                                                          | No*      |
+| `key`                                 | string  | API key (with support for `${env:MY_KEY}` or `{netrc:api.my-provider.com}`                                   | No*      |
+| `completionUrlRelativePath`           | string  | Optional override for the completion endpoint path (see defaults below and examples like Azure)              | No       |
+| `thinkTagStart`                       | string  | Optional override the think start tag tag for openai-chat (Default: "<think>") api                           | No       |
+| `thinkTagEnd`                         | string  | Optional override the think end tag for openai-chat (Default: "</think>") api                                | No       |
+| `httpClient`                          | map     | Allow customize the http-client for this provider requests, like changing http version                       | No       |
+| `models`                              | map     | Key: model name, value: its config                                                                           | Yes      |
+| `models <model> extraPayload`         | map     | Extra payload sent in body to LLM                                                                            | No       |
+| `models <model> modelName`            | string  | Override model name, useful to have multiple models with different configs and names that use same LLM model | No       |
+| `models <model> keepHistoryReasoning` | boolean | Keep `reason` messages in conversation history. Default: `false`                                             | No       |
+| `fetchModels`                         | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers)                       | No       |
 
 _* url and key will be searched as envs `<provider>_API_URL` and `<provider>_API_KEY`, they require the env to be found or config to work._
 
@@ -120,6 +121,30 @@ Examples:
 
     This way both will use gpt-5 model but one will override the reasoning to be high instead of the default.
 
+=== "History reasoning"
+
+    `keepHistoryReasoning` preserves reasoning in conversation history. Set for specific models:
+
+    ```javascript title="~/.config/eca/config.json"
+    {
+      "providers": {
+        "z-ai": {
+          "api": "openai-chat",
+          "url": "https://api.z.ai/api/paas/v4/",
+          "key": "your-api-key",
+          "models": {
+            "GLM-4.7": {
+              "keepHistoryReasoning": true,  // Preserves reasoning
+              "extraPayload": {"clear_thinking": false} // Preserved thinking (see https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking)
+			  }
+          }
+        }
+      }
+    }
+    ```
+
+    Default: `false`.
+
 === "Dynamic model discovery"
 
     For OpenAI-compatible providers, set `fetchModels: true` to automatically discover available models:
@@ -211,7 +236,7 @@ Notes:
     3. Type the chosen method
     4. Authenticate in your browser, copy the code.
     5. Paste and send the code and done!
-    
+
 === "Codex / Openai"
 
     1. Login to Openai via the chat command `/login`.
diff --git a/src/eca/llm_api.clj b/src/eca/llm_api.clj
index 96536a8d..571b4e33 100644
--- a/src/eca/llm_api.clj
+++ b/src/eca/llm_api.clj
@@ -206,6 +206,7 @@
         (let [url-relative-path (:completionUrlRelativePath provider-config)
               think-tag-start (:thinkTagStart provider-config)
               think-tag-end (:thinkTagEnd provider-config)
+              keep-history-reasoning (:keepHistoryReasoning model-config)
               http-client (:httpClient provider-config)]
           (handler
            {:model real-model
@@ -221,6 +222,7 @@
             :url-relative-path url-relative-path
             :think-tag-start think-tag-start
             :think-tag-end think-tag-end
+            :keep-history-reasoning keep-history-reasoning
             :http-client http-client
             :api-url api-url
             :api-key api-key}
diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj
index a6cfb164..18b1ce82 100644
--- a/src/eca/llm_providers/openai_chat.clj
+++ b/src/eca/llm_providers/openai_chat.clj
@@ -384,19 +384,24 @@
     (reset! reasoning-state* {:id nil :type nil :content "" :buffer ""})))
 
 (defn ^:private prune-history
-  "Ensure DeepSeek-style reasoning_content is discarded from history but kept for the active turn.
-   Only drops 'reason' messages WITH :delta-reasoning? before the last user message.
-   Think-tag based reasoning (without :delta-reasoning?) is preserved and transformed to assistant messages."
-  [messages]
-  (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)]
-    (->> messages
-         (keep-indexed (fn [i m]
-                         (when-not (and (= "reason" (:role m))
-                                        (get-in m [:content :delta-reasoning?])
-                                        (< i last-user-idx))
-                           m)))
-         vec)
-    messages))
+  "Discard reasoning messages from history.
+   Reasoning with :delta-reasoning? is preserved in the same turn (as required by Deepseek).
+   This corresponds to the implementation standard. However, it can be change it at the model level configuration.
+   Parameters:
+   - messages: the conversation history
+   - keep-history-reasoning: if true, preserve all reasoning in history"
+  [messages keep-history-reasoning]
+  (if keep-history-reasoning
+    messages
+    (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)]
+      (->> messages
+           (keep-indexed (fn [i m]
+                           (when-not (and (= "reason" (:role m))
+                                          (or (< i last-user-idx)
+                                              (not (get-in m [:content :delta-reasoning?]))))
+                             m)))
+           vec)
+      messages)))
 
 (defn chat-completion!
   "Primary entry point for OpenAI chat completions with streaming support.
@@ -406,14 +411,14 @@
    Compatible with OpenRouter and other OpenAI-compatible providers."
   [{:keys [model user-messages instructions temperature api-key api-url url-relative-path
            past-messages tools extra-payload extra-headers supports-image?
-           think-tag-start think-tag-end http-client]}
+           think-tag-start think-tag-end keep-history-reasoning http-client]}
    {:keys [on-message-received on-error on-prepare-tool-call on-tools-called on-reason on-usage-updated] :as callbacks}]
   (let [think-tag-start (or think-tag-start "<think>")
         think-tag-end (or think-tag-end "</think>")
         stream? (boolean callbacks)
         system-messages (when instructions [{:role "system" :content instructions}])
         ;; Pipeline: prune history -> normalize -> merge adjacent assistants -> filter
-        all-messages (prune-history (vec (concat past-messages user-messages)))
+        all-messages (prune-history (vec (concat past-messages user-messages)) keep-history-reasoning)
         messages (vec (concat
                        system-messages
                        (normalize-messages all-messages supports-image? think-tag-start think-tag-end)))
@@ -473,7 +478,7 @@
                                        tool-calls))
         on-tools-called-wrapper (fn on-tools-called-wrapper [tools-to-call on-tools-called handle-response]
                                   (when-let [{:keys [new-messages]} (on-tools-called tools-to-call)]
-                                    (let [pruned-messages (prune-history new-messages)
+                                    (let [pruned-messages (prune-history new-messages keep-history-reasoning)
                                           new-messages-list (vec (concat
                                                                   system-messages
                                                                   (normalize-messages pruned-messages supports-image? think-tag-start think-tag-end)))
diff --git a/test/eca/llm_providers/openai_chat_test.clj b/test/eca/llm_providers/openai_chat_test.clj
index 67c9edf8..eb1b642f 100644
--- a/test/eca/llm_providers/openai_chat_test.clj
+++ b/test/eca/llm_providers/openai_chat_test.clj
@@ -259,7 +259,7 @@
            {:role "assistant" :reasoning_content "Thinking..."}])))))
 
 (deftest prune-history-test
-  (testing "Drops reason messages WITH :delta-reasoning? before the last user message (DeepSeek)"
+  (testing "Drops all reason messages before the last user message by default"
     (is (match?
          [{:role "user" :content "Q1"}
           {:role "assistant" :content "A1"}
@@ -272,15 +272,14 @@
            {:role "assistant" :content "A1"}
            {:role "user" :content "Q2"}
            {:role "reason" :content {:text "r2" :delta-reasoning? true}}
-           {:role "assistant" :content "A2"}]))))
+           {:role "assistant" :content "A2"}]
+          false))))
 
-  (testing "Preserves reason messages WITHOUT :delta-reasoning? (think-tag based)"
+  (testing "Preserves reason messages (without :delta-reasoning?) before last user message"
     (is (match?
          [{:role "user" :content "Q1"}
-          {:role "reason" :content {:text "thinking..."}}
           {:role "assistant" :content "A1"}
           {:role "user" :content "Q2"}
-          {:role "reason" :content {:text "more thinking..."}}
           {:role "assistant" :content "A2"}]
          (#'llm-providers.openai-chat/prune-history
           [{:role "user" :content "Q1"}
@@ -288,12 +287,30 @@
            {:role "assistant" :content "A1"}
            {:role "user" :content "Q2"}
            {:role "reason" :content {:text "more thinking..."}}
-           {:role "assistant" :content "A2"}]))))
+           {:role "assistant" :content "A2"}]
+          false))))
+
+  (testing "Preserves all reasoning when keep-history-reasoning is true (Bedrock)"
+    (is (match?
+         [{:role "user" :content "Q1"}
+          {:role "reason" :content {:text "r1"}}
+          {:role "assistant" :content "A1"}
+          {:role "user" :content "Q2"}
+          {:role "reason" :content {:text "r2"}}
+          {:role "assistant" :content "A2"}]
+         (#'llm-providers.openai-chat/prune-history
+          [{:role "user" :content "Q1"}
+           {:role "reason" :content {:text "r1"}}
+           {:role "assistant" :content "A1"}
+           {:role "user" :content "Q2"}
+           {:role "reason" :content {:text "r2"}}
+           {:role "assistant" :content "A2"}]
+          true))))
 
   (testing "No user message leaves list unchanged"
     (let [msgs [{:role "assistant" :content "A"}
                 {:role "reason" :content {:text "r"}}]]
-      (is (= msgs (#'llm-providers.openai-chat/prune-history msgs))))))
+      (is (= msgs (#'llm-providers.openai-chat/prune-history msgs false))))))
 
 (deftest valid-message-test
   (testing "Tool messages are always kept"

From c32e4ad9245412a3a60d2c2d6a2513ca3a6c8769 Mon Sep 17 00:00:00 2001
From: Jakub Zika <zikajk@gmail.com>
Date: Sat, 17 Jan 2026 08:03:58 +0100
Subject: [PATCH 2/3] fix tests and improve docs

---
 docs/models.md                                            | 5 ++++-
 integration-test/integration/chat/github_copilot_test.clj | 2 +-
 integration-test/integration/chat/google_test.clj         | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/models.md b/docs/models.md
index 242fab44..de142768 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -122,8 +122,11 @@ Examples:
     This way both will use gpt-5 model but one will override the reasoning to be high instead of the default.
 
 === "History reasoning"
+	`keepHistoryReasoning` - Determines whether the model's internal reasoning chain is persisted in the conversation history for subsequent turns.
 
-    `keepHistoryReasoning` preserves reasoning in conversation history. Set for specific models:
+	- **Standard Behavior**: Most models expect reasoning blocks (e.g., `<think>` tags or `reasoning_content`) to be removed in subsequent requests to save tokens and avoid bias.
+	- **Usage**: Enable this for models that explicitly support "preserved thinking," or if you want to experiment with letting the model see its previous thought process (with XML-based reasoning).
+	- **Example**: See [GLM-4.7 with Preserved thinking](https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking).
 
     ```javascript title="~/.config/eca/config.json"
     {
diff --git a/integration-test/integration/chat/github_copilot_test.clj b/integration-test/integration/chat/github_copilot_test.clj
index f8ba132c..a85d26f5 100644
--- a/integration-test/integration/chat/github_copilot_test.clj
+++ b/integration-test/integration/chat/github_copilot_test.clj
@@ -168,7 +168,7 @@
         (match-content chat-id "system" {:type "progress" :state "finished"})
         (is (match?
              {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]}
-                      {:role "assistant" :content [{:type "output_text" :text "<think>I should say hello</think>\nhello there!"}]}
+                      {:role "assistant" :content [{:type "output_text" :text "hello there!"}]}
                       {:role "user" :content [{:type "input_text" :text "how are you?"}]}]
               :instructions (m/pred string?)}
              (llm.mocks/get-req-body :reasoning-1)))))))
diff --git a/integration-test/integration/chat/google_test.clj b/integration-test/integration/chat/google_test.clj
index 5f34d341..30222616 100644
--- a/integration-test/integration/chat/google_test.clj
+++ b/integration-test/integration/chat/google_test.clj
@@ -167,7 +167,7 @@
         (match-content chat-id "system" {:type "progress" :state "finished"})
         (is (match?
              {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]}
-                      {:role "assistant" :content [{:type "output_text" :text "<thought>I should say hello</thought>\nhello there!"}]}
+                      {:role "assistant" :content [{:type "output_text" :text "hello there!"}]}
                       {:role "user" :content [{:type "input_text" :text "how are you?"}]}]
               :instructions (m/pred string?)}
              (llm.mocks/get-req-body :reasoning-1)))))))

From 4a4d99085800be8cc20e6ba922ffd46874e07fcc Mon Sep 17 00:00:00 2001
From: Jakub Zika <zikajk@gmail.com>
Date: Sat, 24 Jan 2026 17:19:33 +0100
Subject: [PATCH 3/3] Replace keepHistoryReasoning boolean with
 reasoningHistory

Introduce a more granular control for reasoning retention in requests:
"all" (default, send everything)
"turn" (current turn only)
"off (discard all)

Both delta-reasoning (reasoning_content) and think-tag reasoning are handled uniformly.

DB storage is unaffected. Reasoning is always persisted for UI display.
This setting only controls what gets sent back to the model.
---
 docs/configuration.md                         |  2 +-
 docs/models.md                                | 35 +++++-----------
 .../integration/chat/github_copilot_test.clj  |  2 +-
 .../integration/chat/google_test.clj          |  2 +-
 src/eca/config.clj                            |  3 +-
 src/eca/llm_api.clj                           |  7 +++-
 src/eca/llm_providers/openai_chat.clj         | 41 ++++++++++---------
 test/eca/llm_providers/openai_chat_test.clj   | 40 ++++++++++++++----
 8 files changed, 75 insertions(+), 57 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index e2d42177..c9ad8708 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -694,7 +694,7 @@ To configure, add your OTLP collector config via `:otlp` map following [otlp aut
             models: {[key: string]: {
               modelName?: string;
               extraPayload?: {[key: string]: any};
-              keepHistoryReasoning?: boolean;
+              reasoningHistory?: "all" | "turn" | "off";
             }};
         }};
         defaultModel?: string;
diff --git a/docs/models.md b/docs/models.md
index de142768..e3114211 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -73,7 +73,7 @@ Schema:
 | `models`                              | map     | Key: model name, value: its config                                                                           | Yes      |
 | `models <model> extraPayload`         | map     | Extra payload sent in body to LLM                                                                            | No       |
 | `models <model> modelName`            | string  | Override model name, useful to have multiple models with different configs and names that use same LLM model | No       |
-| `models <model> keepHistoryReasoning` | boolean | Keep `reason` messages in conversation history. Default: `false`                                             | No       |
+| `models <model> reasoningHistory`     | string  | Controls reasoning in conversation history: `"all"` (default), `"turn"`, or `"off"`                          | No       |
 | `fetchModels`                         | boolean | Enable automatic model discovery from `/models` endpoint (OpenAI-compatible providers)                       | No       |
 
 _* url and key will be searched as envs `<provider>_API_URL` and `<provider>_API_KEY`, they require the env to be found or config to work._
@@ -121,32 +121,19 @@ Examples:
 
     This way both will use gpt-5 model but one will override the reasoning to be high instead of the default.
 
-=== "History reasoning"
-	`keepHistoryReasoning` - Determines whether the model's internal reasoning chain is persisted in the conversation history for subsequent turns.
+=== "Reasoning in conversation history"
+	`reasoningHistory` - Controls whether and how the model's reasoning (thinking blocks, reasoning_content) is included in conversation history sent to the model.
+	This **only applies** to `openai_chat` API and it controls both tag-based thinking and the preservation of  `reasoning_content`.
 
-	- **Standard Behavior**: Most models expect reasoning blocks (e.g., `<think>` tags or `reasoning_content`) to be removed in subsequent requests to save tokens and avoid bias.
-	- **Usage**: Enable this for models that explicitly support "preserved thinking," or if you want to experiment with letting the model see its previous thought process (with XML-based reasoning).
-	- **Example**: See [GLM-4.7 with Preserved thinking](https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking).
+	**Available modes:**
 
-    ```javascript title="~/.config/eca/config.json"
-    {
-      "providers": {
-        "z-ai": {
-          "api": "openai-chat",
-          "url": "https://api.z.ai/api/paas/v4/",
-          "key": "your-api-key",
-          "models": {
-            "GLM-4.7": {
-              "keepHistoryReasoning": true,  // Preserves reasoning
-              "extraPayload": {"clear_thinking": false} // Preserved thinking (see https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking)
-			  }
-          }
-        }
-      }
-    }
-    ```
+	- **`"all"`** (default, safe choice) - Send all reasoning blocks back to the model. The model can see its full chain of thought from previous turns. This is the safest option.
+	- **`"turn"`** - Send only reasoning from the current conversation turn (after the last user message). Previous reasoning is discarded before sending to the API.
+	- **`"off"`** - Never send reasoning blocks to the model. All reasoning is discarded before API calls.
+
+	**Note:** Reasoning is always shown to you in the UI and stored in chat history—this setting only controls what gets sent to the model in API requests.
 
-    Default: `false`.
+    Default: `"all"`.
 
 === "Dynamic model discovery"
 
diff --git a/integration-test/integration/chat/github_copilot_test.clj b/integration-test/integration/chat/github_copilot_test.clj
index a85d26f5..f8ba132c 100644
--- a/integration-test/integration/chat/github_copilot_test.clj
+++ b/integration-test/integration/chat/github_copilot_test.clj
@@ -168,7 +168,7 @@
         (match-content chat-id "system" {:type "progress" :state "finished"})
         (is (match?
              {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]}
-                      {:role "assistant" :content [{:type "output_text" :text "hello there!"}]}
+                      {:role "assistant" :content [{:type "output_text" :text "<think>I should say hello</think>\nhello there!"}]}
                       {:role "user" :content [{:type "input_text" :text "how are you?"}]}]
               :instructions (m/pred string?)}
              (llm.mocks/get-req-body :reasoning-1)))))))
diff --git a/integration-test/integration/chat/google_test.clj b/integration-test/integration/chat/google_test.clj
index 30222616..5f34d341 100644
--- a/integration-test/integration/chat/google_test.clj
+++ b/integration-test/integration/chat/google_test.clj
@@ -167,7 +167,7 @@
         (match-content chat-id "system" {:type "progress" :state "finished"})
         (is (match?
              {:input [{:role "user" :content [{:type "input_text" :text "hello!"}]}
-                      {:role "assistant" :content [{:type "output_text" :text "hello there!"}]}
+                      {:role "assistant" :content [{:type "output_text" :text "<thought>I should say hello</thought>\nhello there!"}]}
                       {:role "user" :content [{:type "input_text" :text "how are you?"}]}]
               :instructions (m/pred string?)}
              (llm.mocks/get-req-body :reasoning-1)))))))
diff --git a/src/eca/config.clj b/src/eca/config.clj
index 6b0b494c..5fdb7d9f 100644
--- a/src/eca/config.clj
+++ b/src/eca/config.clj
@@ -336,7 +336,8 @@
   {:kebab-case-key
    [[:providers]]
    :keywordize-val
-   [[:providers :ANY :httpClient]]
+   [[:providers :ANY :httpClient]
+    [:providers :ANY :models :ANY :reasoningHistory]]
    :stringfy-key
    [[:behavior]
     [:providers]
diff --git a/src/eca/llm_api.clj b/src/eca/llm_api.clj
index 571b4e33..7e865ce2 100644
--- a/src/eca/llm_api.clj
+++ b/src/eca/llm_api.clj
@@ -97,6 +97,7 @@
         provider-config (get-in config [:providers provider])
         model-config (get-in provider-config [:models model])
         extra-payload (:extraPayload model-config)
+        reasoning-history (or (:reasoningHistory model-config) :all)
         [auth-type api-key] (llm-util/provider-api-key provider provider-auth config)
         api-url (llm-util/provider-api-url provider config)
         {:keys [handler]} (provider->api-handler provider config)
@@ -123,6 +124,7 @@
           :web-search web-search
           :extra-payload (merge {:parallel_tool_calls true}
                                 extra-payload)
+          :reasoning-history reasoning-history
           :api-url api-url
           :api-key api-key
           :auth-type auth-type}
@@ -157,6 +159,7 @@
           :tools tools
           :extra-payload (merge {:parallel_tool_calls true}
                                 extra-payload)
+          :reasoning-history reasoning-history
           :api-url api-url
           :api-key api-key
           :extra-headers {"openai-intent" "conversation-panel"
@@ -179,6 +182,7 @@
           :tools tools
           :think-tag-start "<thought>"
           :think-tag-end "</thought>"
+          :reasoning-history reasoning-history
           :extra-payload (merge {:parallel_tool_calls false}
                                 (when reason?
                                   {:extra_body {:google {:thinking_config {:include_thoughts true}}}})
@@ -206,7 +210,6 @@
         (let [url-relative-path (:completionUrlRelativePath provider-config)
               think-tag-start (:thinkTagStart provider-config)
               think-tag-end (:thinkTagEnd provider-config)
-              keep-history-reasoning (:keepHistoryReasoning model-config)
               http-client (:httpClient provider-config)]
           (handler
            {:model real-model
@@ -222,7 +225,7 @@
             :url-relative-path url-relative-path
             :think-tag-start think-tag-start
             :think-tag-end think-tag-end
-            :keep-history-reasoning keep-history-reasoning
+            :reasoning-history reasoning-history
             :http-client http-client
             :api-url api-url
             :api-key api-key}
diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj
index 18b1ce82..4fa1af56 100644
--- a/src/eca/llm_providers/openai_chat.clj
+++ b/src/eca/llm_providers/openai_chat.clj
@@ -384,24 +384,27 @@
     (reset! reasoning-state* {:id nil :type nil :content "" :buffer ""})))
 
 (defn ^:private prune-history
-  "Discard reasoning messages from history.
-   Reasoning with :delta-reasoning? is preserved in the same turn (as required by Deepseek).
-   This corresponds to the implementation standard. However, it can be change it at the model level configuration.
+  "Discard reasoning messages from history based on reasoning-history mode.
+
    Parameters:
    - messages: the conversation history
-   - keep-history-reasoning: if true, preserve all reasoning in history"
-  [messages keep-history-reasoning]
-  (if keep-history-reasoning
-    messages
-    (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)]
-      (->> messages
-           (keep-indexed (fn [i m]
-                           (when-not (and (= "reason" (:role m))
-                                          (or (< i last-user-idx)
-                                              (not (get-in m [:content :delta-reasoning?]))))
-                             m)))
-           vec)
-      messages)))
+   - reasoning-history: controls reasoning retention
+     - :all  - preserve all reasoning in history (safe default)
+     - :turn - preserve reasoning only in the current turn (after last user message)
+     - :off  - discard all reasoning messages"
+  [messages reasoning-history]
+  (case reasoning-history
+    :all messages
+    :off (filterv #(not= "reason" (:role %)) messages)
+    :turn (if-let [last-user-idx (llm-util/find-last-user-msg-idx messages)]
+            (->> messages
+                 (keep-indexed (fn [i m]
+                                 (when-not (and (= "reason" (:role m))
+                                                (< i last-user-idx))
+                                   m)))
+                 vec)
+            messages)
+    messages))
 
 (defn chat-completion!
   "Primary entry point for OpenAI chat completions with streaming support.
@@ -411,14 +414,14 @@
    Compatible with OpenRouter and other OpenAI-compatible providers."
   [{:keys [model user-messages instructions temperature api-key api-url url-relative-path
            past-messages tools extra-payload extra-headers supports-image?
-           think-tag-start think-tag-end keep-history-reasoning http-client]}
+           think-tag-start think-tag-end reasoning-history http-client]}
    {:keys [on-message-received on-error on-prepare-tool-call on-tools-called on-reason on-usage-updated] :as callbacks}]
   (let [think-tag-start (or think-tag-start "<think>")
         think-tag-end (or think-tag-end "</think>")
         stream? (boolean callbacks)
         system-messages (when instructions [{:role "system" :content instructions}])
         ;; Pipeline: prune history -> normalize -> merge adjacent assistants -> filter
-        all-messages (prune-history (vec (concat past-messages user-messages)) keep-history-reasoning)
+        all-messages (prune-history (vec (concat past-messages user-messages)) reasoning-history)
         messages (vec (concat
                        system-messages
                        (normalize-messages all-messages supports-image? think-tag-start think-tag-end)))
@@ -478,7 +481,7 @@
                                        tool-calls))
         on-tools-called-wrapper (fn on-tools-called-wrapper [tools-to-call on-tools-called handle-response]
                                   (when-let [{:keys [new-messages]} (on-tools-called tools-to-call)]
-                                    (let [pruned-messages (prune-history new-messages keep-history-reasoning)
+                                    (let [pruned-messages (prune-history new-messages reasoning-history)
                                           new-messages-list (vec (concat
                                                                   system-messages
                                                                   (normalize-messages pruned-messages supports-image? think-tag-start think-tag-end)))
diff --git a/test/eca/llm_providers/openai_chat_test.clj b/test/eca/llm_providers/openai_chat_test.clj
index eb1b642f..1051b6db 100644
--- a/test/eca/llm_providers/openai_chat_test.clj
+++ b/test/eca/llm_providers/openai_chat_test.clj
@@ -259,7 +259,7 @@
            {:role "assistant" :reasoning_content "Thinking..."}])))))
 
 (deftest prune-history-test
-  (testing "Drops all reason messages before the last user message by default"
+  (testing "reasoningHistory \"turn\" drops all reason messages before the last user message"
     (is (match?
          [{:role "user" :content "Q1"}
           {:role "assistant" :content "A1"}
@@ -273,13 +273,14 @@
            {:role "user" :content "Q2"}
            {:role "reason" :content {:text "r2" :delta-reasoning? true}}
            {:role "assistant" :content "A2"}]
-          false))))
+          :turn))))
 
-  (testing "Preserves reason messages (without :delta-reasoning?) before last user message"
+  (testing "reasoningHistory \"turn\" also drops think-tag reasoning before last user message"
     (is (match?
          [{:role "user" :content "Q1"}
           {:role "assistant" :content "A1"}
           {:role "user" :content "Q2"}
+          {:role "reason" :content {:text "more thinking..."}}
           {:role "assistant" :content "A2"}]
          (#'llm-providers.openai-chat/prune-history
           [{:role "user" :content "Q1"}
@@ -288,9 +289,9 @@
            {:role "user" :content "Q2"}
            {:role "reason" :content {:text "more thinking..."}}
            {:role "assistant" :content "A2"}]
-          false))))
+          :turn))))
 
-  (testing "Preserves all reasoning when keep-history-reasoning is true (Bedrock)"
+  (testing "reasoningHistory \"all\" preserves all reasoning"
     (is (match?
          [{:role "user" :content "Q1"}
           {:role "reason" :content {:text "r1"}}
@@ -305,12 +306,35 @@
            {:role "user" :content "Q2"}
            {:role "reason" :content {:text "r2"}}
            {:role "assistant" :content "A2"}]
-          true))))
+          :all))))
 
-  (testing "No user message leaves list unchanged"
+  (testing "reasoningHistory \"off\" removes all reasoning messages"
+    (is (match?
+         [{:role "user" :content "Q1"}
+          {:role "assistant" :content "A1"}
+          {:role "user" :content "Q2"}
+          {:role "assistant" :content "A2"}]
+         (#'llm-providers.openai-chat/prune-history
+          [{:role "user" :content "Q1"}
+           {:role "reason" :content {:text "r1" :delta-reasoning? true}}
+           {:role "assistant" :content "A1"}
+           {:role "user" :content "Q2"}
+           {:role "reason" :content {:text "r2"}}
+           {:role "assistant" :content "A2"}]
+          :off))))
+
+  (testing "No user message - reasoningHistory \"turn\" leaves list unchanged"
     (let [msgs [{:role "assistant" :content "A"}
                 {:role "reason" :content {:text "r"}}]]
-      (is (= msgs (#'llm-providers.openai-chat/prune-history msgs false))))))
+      (is (= msgs (#'llm-providers.openai-chat/prune-history msgs :turn)))))
+
+  (testing "No user message - reasoningHistory \"off\" removes reason"
+    (is (match?
+         [{:role "assistant" :content "A"}]
+         (#'llm-providers.openai-chat/prune-history
+          [{:role "assistant" :content "A"}
+           {:role "reason" :content {:text "r"}}]
+          :off)))))
 
 (deftest valid-message-test
   (testing "Tool messages are always kept"