diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh
index 022c9a1cc0..6bfb13ee52 100755
--- a/prepare_llm_models.sh
+++ b/prepare_llm_models.sh
@@ -35,6 +35,7 @@ HERMES3_MODEL="NousResearch/Hermes-3-Llama-3.1-8B"
 PHI4_MODEL="microsoft/Phi-4-mini-instruct"
 MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3"
 GPT_OSS="openai/gpt-oss-20b"
+DEVSTRAL_MODEL="unsloth/Devstral-Small-2507"
 
 if [ "$(python3 -c 'import sys; print(sys.version_info[1])')" -le "8" ]; then echo "Prepare models with python > 3.8."; exit 1 ; fi
 
@@ -182,3 +183,14 @@ if [ ! -f "$1/$GPT_OSS/$TOKENIZER_FILE" ]; then
   echo "[ERROR] Models file $1/$GPT_OSS/$TOKENIZER_FILE does not exist."
   exit 1
 fi
+
+if [ -f "$1/$DEVSTRAL_MODEL/$TOKENIZER_FILE" ]; then
+  echo "Models file $1/$DEVSTRAL_MODEL/$TOKENIZER_FILE exists. Skipping downloading models."
+else
+  mkdir -p $1/$DEVSTRAL_MODEL
+  convert_tokenizer $DEVSTRAL_MODEL --with_detokenizer -o $1/$DEVSTRAL_MODEL
+fi
+if [ ! -f "$1/$DEVSTRAL_MODEL/$TOKENIZER_FILE" ]; then
+  echo "[ERROR] Models file $1/$DEVSTRAL_MODEL/$TOKENIZER_FILE does not exist."
+  exit 1
+fi
diff --git a/src/llm/BUILD b/src/llm/BUILD
index bfe45b3036..ae37d936ca 100644
--- a/src/llm/BUILD
+++ b/src/llm/BUILD
@@ -137,6 +137,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w
             "io_processing/hermes3/tool_parser.hpp",
             "io_processing/llama3/tool_parser.hpp",
             "io_processing/phi4/tool_parser.hpp",
+            "io_processing/devstral/tool_parser.hpp",
             "io_processing/mistral/tool_parser.hpp",
             "io_processing/qwen3/reasoning_parser.hpp",
             "io_processing/gptoss/reasoning_parser.hpp",
@@ -148,6 +149,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w
             "io_processing/hermes3/tool_parser.cpp",
             "io_processing/llama3/tool_parser.cpp",
             "io_processing/phi4/tool_parser.cpp",
+            "io_processing/devstral/tool_parser.cpp",
             "io_processing/mistral/tool_parser.cpp",
             "io_processing/qwen3/reasoning_parser.cpp",
             "io_processing/gptoss/reasoning_parser.cpp",
@@ -176,11 +178,13 @@ ovms_cc_library(
             "io_processing/phi4/generation_config_builder.hpp",
             "io_processing/llama3/generation_config_builder.hpp",
             "io_processing/hermes3/generation_config_builder.hpp",
+            "io_processing/devstral/generation_config_builder.hpp",
             "io_processing/generation_config_builder.hpp"],
     srcs = ["io_processing/base_generation_config_builder.cpp",
             "io_processing/phi4/generation_config_builder.cpp",
             "io_processing/llama3/generation_config_builder.cpp",
-            "io_processing/hermes3/generation_config_builder.cpp"],
+            "io_processing/hermes3/generation_config_builder.cpp",
+            "io_processing/devstral/generation_config_builder.cpp"],
     deps = [
         ":openai_request",
         "//src:libovmslogging",
diff --git a/src/llm/io_processing/devstral/generation_config_builder.cpp b/src/llm/io_processing/devstral/generation_config_builder.cpp
new file mode 100644
index 0000000000..f6dced3673
--- /dev/null
+++ b/src/llm/io_processing/devstral/generation_config_builder.cpp
@@ -0,0 +1,57 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <openvino/genai/generation_config.hpp>
+
+#include "generation_config_builder.hpp"
+
+namespace ovms {
+
+void DevstralGenerationConfigBuilder::parseConfigFromRequest(const OpenAIChatCompletionsRequest& request) {
+    // Call the base class method to fill in common configuration
+    BaseGenerationConfigBuilder::parseConfigFromRequest(request);
+
+    // For now the only specific part is related to tools, so if there are no tools provided in the request
+    // we can exit early
+    if (request.toolNameSchemaMap.empty()) {
+        return;
+    }
+
+    if (enableToolGuidedGeneration || request.toolChoice == "required") {
+        // Set tool guided generation config specific to Devstral model
+        auto triggeredTags = std::make_shared<ov::genai::StructuredOutputConfig::TriggeredTags>();
+        triggeredTags->triggers.push_back("[TOOL_CALLS]");
+
+        for (const auto& [toolName, toolSchemaWrapper] : request.toolNameSchemaMap) {
+            const auto& toolSchema = toolSchemaWrapper.stringRepr;
+            ov::genai::StructuredOutputConfig::Tag tagItem;
+            tagItem.begin = "[TOOL_CALLS]" + toolName + "[ARGS]";
+            tagItem.end = "";
+            tagItem.content = ov::genai::StructuredOutputConfig::JSONSchema(toolSchema);
+            triggeredTags->tags.push_back(tagItem);
+        }
+        if (request.toolChoice == "required") {
+            triggeredTags->at_least_one = true;
+        }
+        ov::genai::StructuredOutputConfig::StructuralTag structuralTag = triggeredTags;
+        setStructuralTagsConfig(structuralTag);
+    }
+}
+
+}  // namespace ovms
diff --git a/src/llm/io_processing/devstral/generation_config_builder.hpp b/src/llm/io_processing/devstral/generation_config_builder.hpp
new file mode 100644
index 0000000000..97666f17d9
--- /dev/null
+++ b/src/llm/io_processing/devstral/generation_config_builder.hpp
@@ -0,0 +1,33 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include "../base_generation_config_builder.hpp"
+
+namespace ovms {
+
+/*
+ * DevstralGenerationConfigBuilder extends BaseGenerationConfigBuilder to provide specific configuration for Devstral model.
+ * It overrides the parseConfigFromRequest method to set tool guided generation config.
+ */
+class DevstralGenerationConfigBuilder : public BaseGenerationConfigBuilder {
+public:
+    DevstralGenerationConfigBuilder() = delete;
+    explicit DevstralGenerationConfigBuilder(const ov::genai::GenerationConfig& baseConfig, bool enableToolGuidedGeneration, DecodingMethod decodingMethod) :
+        BaseGenerationConfigBuilder(baseConfig, enableToolGuidedGeneration, decodingMethod) {}
+
+    void parseConfigFromRequest(const OpenAIChatCompletionsRequest& request) override;
+};
+}  // namespace ovms
diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp
new file mode 100644
index 0000000000..2274d2e2b0
--- /dev/null
+++ b/src/llm/io_processing/devstral/tool_parser.cpp
@@ -0,0 +1,230 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include <openvino/genai/tokenizer.hpp>
+#include <string>
+#include <vector>
+#include <regex>
+
+#include "src/port/rapidjson_document.hpp"
+#include "src/logging.hpp"
+#include "src/llm/io_processing/utils.hpp"
+#include "src/stringutils.hpp"
+#include "tool_parser.hpp"
+
+namespace ovms {
+
+void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
+    // expected format: [TOOL_CALLS]tool_name[ARGS]{"arg1": "value1", ...}
+    if (parsedOutput.content.empty() || generatedTokens.size() <= 0) {
+        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls");
+        return;
+    }
+    size_t firstToolTokenIndex;
+    auto it = std::find(generatedTokens.begin(), generatedTokens.end(), this->botTokenId);
+    if (it != generatedTokens.end()) {
+        firstToolTokenIndex = std::distance(generatedTokens.begin(), it);
+    } else {
+        return;
+    }
+
+    size_t firstArgsTokenIndex;
+    auto itArgs = std::find(generatedTokens.begin() + firstToolTokenIndex, generatedTokens.end(), this->argsTokenId);
+    if (itArgs != generatedTokens.end()) {
+        firstArgsTokenIndex = std::distance(generatedTokens.begin(), itArgs);
+    } else {
+        return;
+    }
+    if (firstToolTokenIndex > firstArgsTokenIndex) {
+        SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "First tool token index is greater than first args token index.");
+        return;
+    }
+    std::vector<int64_t> toolNameTokens(generatedTokens.begin() + (firstToolTokenIndex + 1), generatedTokens.begin() + (firstArgsTokenIndex));
+    std::vector<int64_t> argumentsTokens(generatedTokens.begin() + (firstArgsTokenIndex + 1), generatedTokens.end());
+
+    ToolCall toolCall;
+    std::string toolName = tokenizer.decode(toolNameTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)});
+    std::string arguments = tokenizer.decode(argumentsTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)});
+    ovms::trim(toolName);  // trim in case of extra spaces/newlines
+    toolCall.name = toolName;
+    if (arguments.empty()) {
+        arguments = "{}";  // set empty arguments to {}
+    }
+    toolCall.arguments = arguments;
+    toolCall.id = generateRandomId();  // Generate a random ID for the tool call
+    parsedOutput.toolCalls.push_back(toolCall);
+
+    // get subset of generatedTokens starting from begin() to firstArgsTokenIndex
+    std::vector<int64_t> contentTokens;
+    if (firstToolTokenIndex > 0) {
+        contentTokens = std::vector<int64_t>(generatedTokens.begin(), generatedTokens.begin() + firstToolTokenIndex);
+        parsedOutput.content = tokenizer.decode(contentTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)});  // Return only the content till tool call
+    } else {
+        parsedOutput.content = tokenizer.decode(contentTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)});
+    }
+    return;
+}
+
+std::optional<rapidjson::Document> DevstralToolParser::sendFullDelta(ToolCall& toolCall) {
+    rapidjson::Document argsDelta;
+    argsDelta.Parse(toolCall.arguments.c_str());
+    rapidjson::Document argumentsWrapper;
+    argumentsWrapper.SetObject();
+    rapidjson::Document::AllocatorType& allocator = argumentsWrapper.GetAllocator();
+    // now we need to add string toolCall.arguments to argumentsWrapper under "arguments" key
+    rapidjson::Value toolCallsString(rapidjson::kStringType);
+    toolCallsString.SetString(toolCall.arguments.c_str(), allocator);
+    argumentsWrapper.AddMember("arguments", toolCallsString, allocator);
+    auto currentDelta = wrapDelta(argumentsWrapper, this->toolCallIndex);
+    return currentDelta;
+}
+
+rapidjson::Document DevstralToolParser::wrapCombinedDelta(ToolCall& toolCall) {
+    rapidjson::Document wrappedDelta;
+    wrappedDelta.SetObject();
+    rapidjson::Value toolCalls(rapidjson::kArrayType);
+    rapidjson::Value toolCallObj(rapidjson::kObjectType);
+    rapidjson::Value idValue(generateRandomId().c_str(), wrappedDelta.GetAllocator());
+    rapidjson::Value toolCallsString(rapidjson::kStringType);
+
+    toolCallObj.AddMember("id", idValue, wrappedDelta.GetAllocator());
+    toolCallObj.AddMember("type", "function", wrappedDelta.GetAllocator());
+    toolCallObj.AddMember("index", toolCallIndex, wrappedDelta.GetAllocator());
+    rapidjson::Value functionObj(rapidjson::kObjectType);
+    rapidjson::Value nameValue(toolCall.name.c_str(), wrappedDelta.GetAllocator());
+    functionObj.AddMember("name", nameValue, wrappedDelta.GetAllocator());
+    // now we need to add string toolCall.arguments to argumentsWrapper under "arguments" key
+
+    toolCallsString.SetString(toolCall.arguments.c_str(), wrappedDelta.GetAllocator());
+    functionObj.AddMember("arguments", toolCallsString, wrappedDelta.GetAllocator());
+    toolCallObj.AddMember("function", functionObj, wrappedDelta.GetAllocator());
+    toolCalls.PushBack(toolCallObj, wrappedDelta.GetAllocator());
+    rapidjson::Value deltaWrapper(rapidjson::kObjectType);
+    deltaWrapper.AddMember("tool_calls", toolCalls, wrappedDelta.GetAllocator());
+    wrappedDelta.AddMember("delta", deltaWrapper, wrappedDelta.GetAllocator());
+    return wrappedDelta;
+}
+
+rapidjson::Document DevstralToolParser::parseContentChunk() {
+    rapidjson::StringBuffer buffer;
+    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+    writer.StartObject();
+    writer.String("delta");
+    writer.StartObject();
+    writer.String("content");
+    writer.String(streamContent.c_str());
+    writer.EndObject();
+    writer.EndObject();
+    rapidjson::Document doc;
+    doc.Parse(buffer.GetString());
+    streamContent.clear();
+    return doc;
+}
+
+std::optional<rapidjson::Document> DevstralToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) {
+    /* 
+    Devstral [TOOL_CALL]tool_name[ARGS]arguments[</s>]
+    It does not support parallel tool calls, so tool calls are always in sequence.
+
+    We have three processing states:
+        AWAITING_START_TAG,
+        AWAITING_ARGS_TAG,
+        PROCESSING_ARGS
+
+    We store the history of chunks in streamContent string. After state changes are detected, we clear the streamContent to only keep unprocessed part.
+    */
+
+    this->streamContent += chunk;
+    SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Chunk content: '{}', StreamContent: '{}', State: {}", chunk, this->streamContent, std::to_string(this->internalState));
+    if (this->internalState == AWAITING_START_TAG) {
+        // if chunk ends with </s> we need to remove it and return parsed content immediately
+        if (chunk.size() >= this->parsingEndTag.size() &&
+            chunk.substr(chunk.size() - this->parsingEndTag.size()) == this->parsingEndTag) {
+            // remove </s> from streamContent
+            this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->parsingEndTag.size());
+            SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Found end tag in chunk while awaiting start tag. Returning content chunk.");
+            return parseContentChunk();
+        }
+        size_t pos = chunk.find(this->parsingToolCallsStartTag);
+        if (pos != std::string::npos) {
+            this->internalState = AWAITING_ARGS_TAG;
+            this->toolCallIndex++;
+            if (pos == 0) {
+                this->streamContent.clear();
+                return std::nullopt;
+            } else {
+                this->streamContent = this->streamContent.substr(pos + this->parsingToolCallsStartTag.length());  // "[TOOLS_CALLS]" length is 13
+                return parseContentChunk();
+            }
+        } else {
+            return parseContentChunk();
+        }
+    }
+    if (this->internalState == AWAITING_ARGS_TAG) {
+        size_t pos = this->streamContent.find(this->parsingArgsStartTag);
+        if (pos != std::string::npos) {
+            this->internalState = PROCESSING_ARGS;
+            this->toolName = this->streamContent.substr(0, pos);
+            ovms::trim(this->toolName);  // trim in case of extra spaces/newlines
+            this->streamContent = this->streamContent.substr(pos + this->parsingArgsStartTag.length());
+            // check if chunk ends with </s>, if yes, we need return full tool call delta
+            if (this->streamContent.size() >= this->parsingEndTag.size() &&
+                this->streamContent.substr(this->streamContent.size() - this->parsingEndTag.size()) == this->parsingEndTag) {
+                // remove </s> from streamContent
+                ToolCall toolCall;
+                toolCall.name = this->toolName;
+                this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->parsingEndTag.size());
+                if (!this->streamContent.empty()) {
+                    toolCall.arguments = this->streamContent;
+                } else {
+                    toolCall.arguments = "{}";
+                }
+                this->streamContent = "";
+                return wrapCombinedDelta(toolCall);
+            } else {
+                return wrapFirstDelta(this->toolName, this->toolCallIndex);
+            }
+        } else {
+            return std::nullopt;
+        }
+    }
+    if (this->internalState == PROCESSING_ARGS) {
+        size_t endPos = this->streamContent.find(this->parsingEndTag);
+        std::string arguments;
+        if (endPos != std::string::npos) {
+            arguments = this->streamContent.substr(0, endPos);
+        } else {
+            arguments = this->streamContent;
+        }
+
+        ToolCall toolCall;
+        if (!arguments.empty())
+            toolCall.arguments = arguments;
+        else
+            toolCall.arguments = "{}";
+        toolCall.name = this->toolName;
+        this->streamContent = "";
+        return sendFullDelta(toolCall);
+    }
+    return std::nullopt;
+}
+// Static member definitions
+const std::string DevstralToolParser::parsingArgsStartTag = "[ARGS]";
+const std::string DevstralToolParser::parsingToolCallsStartTag = "[TOOL_CALLS]";
+const std::string DevstralToolParser::parsingEndTag = "</s>";
+const int64_t DevstralToolParser::argsTokenId = 32;  // [ARGS]
+const int64_t DevstralToolParser::botTokenId = 9;    // [TOOL_CALLS]
+}  // namespace ovms
diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp
new file mode 100644
index 0000000000..c07b38b34e
--- /dev/null
+++ b/src/llm/io_processing/devstral/tool_parser.hpp
@@ -0,0 +1,80 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <openvino/genai/tokenizer.hpp>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "src/port/rapidjson_document.hpp"
+#include "src/llm/io_processing/base_output_parser.hpp"
+#include "src/llm/io_processing/partial_json_builder.hpp"
+#include "src/llm/apis/tool_schema_wrapper.hpp"
+
+namespace ovms {
+class DevstralToolParser : public BaseOutputParser {
+    static const int64_t argsTokenId;  // [ARGS]
+    static const int64_t botTokenId;   // [TOOL_CALLS]
+
+    // in streaming mode we can rely on tags in string format as tokens are not available
+    static const std::string parsingArgsStartTag;
+    static const std::string parsingToolCallsStartTag;
+    static const std::string parsingEndTag;
+
+    enum InternalState {
+        AWAITING_START_TAG,
+        AWAITING_ARGS_TAG,
+        PROCESSING_ARGS
+    };
+
+    InternalState internalState = AWAITING_START_TAG;
+    const ToolsSchemas_t& toolSchemas;
+    // Index to track the current tool call being processed (-1 means no tool call has been started yet)
+    int toolCallIndex = -1;
+    std::string streamContent = "";  // content accumulated from stream chunks
+    std::string toolName = "";
+    std::optional<rapidjson::Document> sendFullDelta(ToolCall& toolCall);
+
+public:
+    DevstralToolParser() = delete;
+    DevstralToolParser(ov::genai::Tokenizer& tokenizer, const ToolsSchemas_t& toolSchemas) :
+        BaseOutputParser(tokenizer),
+        toolSchemas(toolSchemas) {}
+
+    void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
+    std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
+    rapidjson::Document parseContentChunk();
+    rapidjson::Document wrapCombinedDelta(ToolCall& toolCall);
+    const std::vector<std::string>& getParsingStartTags() const override {
+        static const std::vector<std::string> toolCallStartTags{parsingToolCallsStartTag};
+        return toolCallStartTags;
+    }
+    const std::vector<std::string>& getSpecialParsingStartTags() const override {
+        static const std::vector<std::string> specialParsingStartTags{};
+        return specialParsingStartTags;
+    }
+    // Tools calls are expected to be the last part of the content, so we do not specify an end tag.
+    const std::string& getParsingEndTag() const override {
+        return this->parsingEndTag;
+    }
+
+    bool requiresStreamingWithSpecialTokens() const override {
+        return true;
+    }
+};
+
+}  // namespace ovms
diff --git a/src/llm/io_processing/generation_config_builder.hpp b/src/llm/io_processing/generation_config_builder.hpp
index 663d4a9b1a..2423cd074d 100644
--- a/src/llm/io_processing/generation_config_builder.hpp
+++ b/src/llm/io_processing/generation_config_builder.hpp
@@ -24,6 +24,7 @@
 #include "phi4/generation_config_builder.hpp"
 #include "llama3/generation_config_builder.hpp"
 #include "hermes3/generation_config_builder.hpp"
+#include "devstral/generation_config_builder.hpp"
 #include "../apis/openai_request.hpp"
 #include "../../logging.hpp"
 
@@ -44,6 +45,8 @@ class GenerationConfigBuilder {
             builder_impl = std::make_unique<Hermes3GenerationConfigBuilder>(baseConfig, enableToolGuidedGeneration, decodingMethod);
         } else if (toolParserName == "phi4") {
             builder_impl = std::make_unique<Phi4GenerationConfigBuilder>(baseConfig, enableToolGuidedGeneration, decodingMethod);
+        } else if (toolParserName == "devstral") {
+            builder_impl = std::make_unique<DevstralGenerationConfigBuilder>(baseConfig, enableToolGuidedGeneration, decodingMethod);
         } else {
             if (enableToolGuidedGeneration) {
                 SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Option enable_tool_guided_generation is set, but will not be effective since no valid tool parser has been provided.");
diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp
index cf0a805f59..1c060375df 100644
--- a/src/llm/io_processing/output_parser.cpp
+++ b/src/llm/io_processing/output_parser.cpp
@@ -27,6 +27,7 @@
 #include "gptoss/tool_parser.hpp"
 #include "qwen3/reasoning_parser.hpp"
 #include "qwen3coder/qwen3coder_tool_parser.hpp"
+#include "devstral/tool_parser.hpp"
 #include "gptoss/reasoning_parser.hpp"
 
 namespace ovms {
@@ -168,6 +169,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to
         toolParser = std::make_unique<GptOssToolParser>(tokenizer);
     } else if (toolParserName == "qwen3coder") {
         toolParser = std::make_unique<Qwen3CoderToolParser>(tokenizer, toolNameSchemaMap);
+    } else if (toolParserName == "devstral") {
+        toolParser = std::make_unique<DevstralToolParser>(tokenizer, toolNameSchemaMap);
     } else if (!toolParserName.empty()) {
         throw std::runtime_error("Unsupported tool parser: " + toolParserName);
     }
diff --git a/src/llm/io_processing/output_parser.hpp b/src/llm/io_processing/output_parser.hpp
index 613e0a993e..4b5d1c0420 100644
--- a/src/llm/io_processing/output_parser.hpp
+++ b/src/llm/io_processing/output_parser.hpp
@@ -87,8 +87,13 @@ class OutputParser {
     std::optional<rapidjson::Document> parseChunk(const std::string& chunkResponse, const bool toolsAvailable, ov::genai::GenerationFinishReason finishReason);
 
     bool requiresStreamingWithSpecialTokens() const {
-        return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) &&
-               (toolParser && toolParser->requiresStreamingWithSpecialTokens());
+        if (!reasoningParser) {
+            return toolParser && toolParser->requiresStreamingWithSpecialTokens();
+        } else if (!toolParser) {
+            return reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens();
+        } else {
+            return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) && (toolParser && toolParser->requiresStreamingWithSpecialTokens());
+        }
     }
 };
 }  // namespace ovms
diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp
index 345d1c362b..428d28762e 100644
--- a/src/llm/servable.cpp
+++ b/src/llm/servable.cpp
@@ -103,12 +103,17 @@ absl::Status GenAiServable::processTokenizeRequest(std::shared_ptr<GenAiServable
 }
 
 absl::Status GenAiServable::parseRequest(std::shared_ptr<GenAiServableExecutionContext>& executionContext) {
-    executionContext->apiHandler = std::make_shared<OpenAIChatCompletionsHandler>(*executionContext->payload.parsedJson,
-        executionContext->endpoint,
-        std::chrono::system_clock::now(),
-        getProperties()->tokenizer,
-        getProperties()->toolParserName,
-        getProperties()->reasoningParserName);
+    try {
+        executionContext->apiHandler = std::make_shared<OpenAIChatCompletionsHandler>(*executionContext->payload.parsedJson,
+            executionContext->endpoint,
+            std::chrono::system_clock::now(),
+            getProperties()->tokenizer,
+            getProperties()->toolParserName,
+            getProperties()->reasoningParserName);
+    } catch (const std::exception& e) {
+        SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Failed to create API handler: {}", e.what());
+        return absl::InvalidArgumentError(std::string("Failed to create API handler: ") + e.what());
+    }
     auto& config = ovms::Config::instance();
 
     auto status = executionContext->apiHandler->parseRequest(getProperties()->maxTokensLimit, getProperties()->bestOfLimit, getProperties()->maxModelLength, config.getServerSettings().allowedLocalMediaPath);
diff --git a/src/test/llm/output_parsers/devstral_output_parser_test.cpp b/src/test/llm/output_parsers/devstral_output_parser_test.cpp
new file mode 100644
index 0000000000..ca61b5c2cc
--- /dev/null
+++ b/src/test/llm/output_parsers/devstral_output_parser_test.cpp
@@ -0,0 +1,372 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include <gtest/gtest.h>
+#include <openvino/genai/tokenizer.hpp>
+#include <string>
+#include <vector>
+
+#include "src/llm/io_processing/base_output_parser.hpp"
+#include "src/llm/io_processing/output_parser.hpp"
+#include "test/platform_utils.hpp"
+
+using namespace ovms;
+
+#ifdef _WIN32
+const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\unsloth\\Devstral-Small-2507";
+#else
+// Hardcoded for usage in docker container
+const std::string tokenizerPath = "/ovms/src/test/llm_testing/unsloth/Devstral-Small-2507/";
+#endif
+
+static ovms::ToolsSchemas_t EMPTY_TOOLS_SCHEMA = {};  // not used for mistral
+static std::unique_ptr<ov::genai::Tokenizer> devstralTokenizer;
+
+class DevstralOutputParserTest : public ::testing::Test {
+protected:
+    std::unique_ptr<OutputParser> outputParserWithRegularToolParsing;
+
+    static void SetUpTestSuite() {
+        try {
+            devstralTokenizer = std::make_unique<ov::genai::Tokenizer>(tokenizerPath);
+        } catch (const std::exception& e) {
+            FAIL() << "Failed to initialize devstral tokenizer: " << e.what();
+        } catch (...) {
+            FAIL() << "Failed to initialize devstral tokenizer due to unknown error.";
+        }
+    }
+
+    static void TearDownTestSuite() {
+        devstralTokenizer.reset();
+    }
+
+    void SetUp() override {
+        // declare tools_schema
+        static std::map<std::string, std::string> toolSchemasInput = {
+            {"example_tool", R"({"properties": {"arg1": {"type": "string", "description": "A string argument."}}, "required": ["arg1"]})"},
+        };
+
+        static std::vector<std::unique_ptr<rapidjson::Document>> schemaDocsStorage;
+
+        auto convertStringToolSchemasStringToToolsSchemas = [](
+                                                                const std::map<std::string, std::string>& input) -> ToolsSchemas_t {
+            ToolsSchemas_t result;
+            schemaDocsStorage.clear();
+            for (const auto& [name, schemaStr] : input) {
+                auto schemaDoc = std::make_unique<rapidjson::Document>();
+                if (schemaDoc->Parse(schemaStr.c_str()).HasParseError()) {
+                    throw std::runtime_error("Failed to parse schema for tool: " + name);
+                }
+                result[name] = {schemaDoc.get(), schemaStr};
+                schemaDocsStorage.push_back(std::move(schemaDoc));
+            }
+            return result;
+        };
+
+        static ovms::ToolsSchemas_t toolsSchemas = convertStringToolSchemasStringToToolsSchemas(toolSchemasInput);
+        outputParserWithRegularToolParsing = std::make_unique<OutputParser>(*devstralTokenizer, "devstral", "", toolsSchemas);
+    }
+};
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall) {
+    std::string input = "[TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1 with new line \\n and \"quote\" and slash \\ \",\"arg2\":42}</s>";
+    std::string testInput = input;
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1 with new line \\n and \"quote\" and slash \\ \",\"arg2\":42}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall_MissingEndTag) {
+    std::string testInput = "Reasoning before tool call [TOOL_CALLS] example_tool [ARGS]{\"arg1\":\"value1\",\"arg2\":42}";
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "Reasoning before tool call ");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall_EmptyArguments) {
+    std::string testInput = "Reasoning before tool call [TOOL_CALLS]example_tool[ARGS]</s>";
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "Reasoning before tool call ");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
+    std::string input = "This is a regular model response without tool calls.";
+    auto generatedTensor = devstralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls.");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 0);
+    EXPECT_EQ(parsedOutput.reasoning, "");
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) {
+    std::string testInput = "Reasoning before tool call [TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}</s>";
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "Reasoning before tool call ");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithInvalidOrder) {
+    std::string testInput = "Reasoning before tool call [ARGS]example_tool[TOOL_CALLS]{\"arg1\":\"value1\",\"arg2\":42}</s>";
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "Reasoning before tool call example_tool{\"arg1\":\"value1\",\"arg2\":42}");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 0);
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithMissingArgsTag) {
+    std::string input = "Some content [TOOL_CALLS]example_tool{\"arg1\":\"value1\",\"arg2\":42}</s>";
+    std::string testInput = input;
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    // Same expected content as tokenizer does not add special tokens
+    EXPECT_EQ(parsedOutput.content, "Some content example_tool{\"arg1\":\"value1\",\"arg2\":42}");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 0);
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithArrayArguments) {
+    std::string input = "[TOOL_CALLS]example_tool[ARGS]{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}</s>";
+    std::string testInput = input;
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
+}
+
+TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithInvalidArguments) {
+    std::string input = "[TOOL_CALLS]example_tool[ARGS]{ \"filepath\": \"/var/log/db.log\", \"status\": </s>";
+    std::string testInput = input;
+    auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
+    std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
+    ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
+    EXPECT_EQ(parsedOutput.content, "");
+    EXPECT_EQ(parsedOutput.reasoning, "");
+    ASSERT_EQ(parsedOutput.toolCalls.size(), 1);
+    EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool");
+    EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{ \"filepath\": \"/var/log/db.log\", \"status\": ");
+    EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false);
+}
+
+TEST_F(DevstralOutputParserTest, HolisticStreaming) {
+    std::vector<std::tuple<std::string, ov::genai::GenerationFinishReason, std::optional<std::string>>> chunkToDeltaVec{
+        // Tool call phase
+        // Starting first tool. Collecting chunk until full name is received. Don't return until then.
+        {"Reasoning", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"Reasoning"}})"},
+        {"example", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"example"}})"},
+        {"[TOOL_CALLS]", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {" get", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {"_", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {"weather", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {" [ARGS]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"get_weather"}}]}})"},
+        {"{\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]}})"},
+        {"city\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city\":"}}]}})"},
+        {" \"Paris", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \"Paris"}}]}})"},
+        {" \"capital of ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \"capital of "}}]}})"},
+        {"art\\vine \\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"art\\vine \\n"}}]}})"},
+        // Last chunk is added in the for loop below
+    };
+    ToolsSchemas_t tools_schemas = {
+        {"get_weather", ToolSchemaWrapper{}}};
+    for (auto lastFinishReason : {ov::genai::GenerationFinishReason::STOP, ov::genai::GenerationFinishReason::LENGTH}) {
+        // Need to have new output parser per case to simulate separate request processing
+        outputParserWithRegularToolParsing = std::make_unique<OutputParser>(*devstralTokenizer, "devstral", "", tools_schemas);
+        auto chunkToDeltaVecCopy = chunkToDeltaVec;
+        if (lastFinishReason == ov::genai::GenerationFinishReason::STOP) {
+            chunkToDeltaVecCopy.push_back({"\"}", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]}})"});
+        } else {
+            chunkToDeltaVecCopy.push_back({"\"}", ov::genai::GenerationFinishReason::LENGTH, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]}})"});
+        }
+        int64_t chunkIteration = -1;
+        for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVecCopy) {
+            chunkIteration++;
+            std::optional<rapidjson::Document> doc = outputParserWithRegularToolParsing->parseChunk(chunk, true, finishReason);
+            if (!expectedDelta.has_value() && !doc.has_value()) {
+                continue;  // Both are nullopt, OK
+            }
+            if (expectedDelta.has_value() && doc.has_value()) {
+                rapidjson::StringBuffer buffer;
+                rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+                doc->Accept(writer);
+                std::string docStr = buffer.GetString();
+                // If both strings contain "id":"...", compare id values by length and alphanumeric, else compare whole strings
+                std::string expected = expectedDelta.value();
+                std::string idKey = "\"id\":\"";
+                auto docIdPos = docStr.find(idKey);
+                auto expectedIdPos = expected.find(idKey);
+                if (docIdPos != std::string::npos && expectedIdPos != std::string::npos) {
+                    auto docIdStart = docIdPos + idKey.size();
+                    auto docIdEnd = docStr.find("\"", docIdStart);
+                    auto expectedIdStart = expectedIdPos + idKey.size();
+                    auto expectedIdEnd = expected.find("\"", expectedIdStart);
+                    ASSERT_NE(docIdEnd, std::string::npos);
+                    ASSERT_NE(expectedIdEnd, std::string::npos);
+                    std::string docId = docStr.substr(docIdStart, docIdEnd - docIdStart);
+                    std::string expectedId = expected.substr(expectedIdStart, expectedIdEnd - expectedIdStart);
+                    EXPECT_EQ(docId.size(), expectedId.size()) << "ID length mismatch for chunk: " << chunk;
+                    EXPECT_TRUE(std::all_of(docId.begin(), docId.end(), ::isalnum)) << "ID not alphanumeric for chunk: " << chunk;
+                    // Compare everything except the id value
+                    std::string docStrNoId = docStr;
+                    std::string expectedNoId = expected;
+                    docStrNoId.replace(docIdStart, docId.size(), std::string(docId.size(), '*'));
+                    expectedNoId.replace(expectedIdStart, expectedId.size(), std::string(expectedId.size(), '*'));
+                    EXPECT_EQ(docStrNoId, expectedNoId) << "Mismatch for chunk (ignoring id value): " << chunk;
+                } else {
+                    EXPECT_EQ(docStr, expected) << "Mismatch for chunk: [" << chunk << "] got [" << docStr << "] but expected [" << expected << "]" << chunkIteration;
+                }
+            } else if (expectedDelta.has_value()) {
+                FAIL() << "Mismatch for chunk: [" << chunk << "] got nothing but expected [" << expectedDelta.value() << "]" << chunkIteration;
+            } else if (doc.has_value()) {
+                rapidjson::StringBuffer buffer;
+                rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+                doc->Accept(writer);
+                std::string docStr = buffer.GetString();
+                FAIL() << "Mismatch for chunk: [" << chunk << "] expected nothing but got [" << docStr << "]" << chunkIteration;
+            } else {
+                FAIL() << "Mismatch for chunk: [" << chunk << "] " << chunkIteration;
+            }
+        }
+    }
+}
+
+TEST_F(DevstralOutputParserTest, EmptyArgumentsStreaming) {
+    std::vector<std::tuple<std::string, ov::genai::GenerationFinishReason, std::optional<std::string>>> chunkToDeltaVec{
+        // Tool call phase
+        // Starting first tool. Collecting chunk until full name is received. Don't return until then.
+        {"[TOOL_CALLS]", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {"list", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {"_", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {"tools", ov::genai::GenerationFinishReason::NONE, std::nullopt},
+        {"[ARGS]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"list_tools"}}]}})"},
+        {"</s>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{}"}}]}})"},
+    };
+    ToolsSchemas_t tools_schemas = {
+        {"list_tools", ToolSchemaWrapper{}}};
+
+    int64_t chunkIteration = 0;
+    for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) {
+        chunkIteration++;
+        std::optional<rapidjson::Document> doc = outputParserWithRegularToolParsing->parseChunk(chunk, true, finishReason);
+        if (!expectedDelta.has_value() && !doc.has_value()) {
+            continue;  // Both are nullopt, OK
+        }
+        if (expectedDelta.has_value() && doc.has_value()) {
+            rapidjson::StringBuffer buffer;
+            rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+            doc->Accept(writer);
+            std::string docStr = buffer.GetString();
+            // If both strings contain "id":"...", compare id values by length and alphanumeric, else compare whole strings
+            std::string expected = expectedDelta.value();
+            std::string idKey = "\"id\":\"";
+            auto docIdPos = docStr.find(idKey);
+            auto expectedIdPos = expected.find(idKey);
+            if (docIdPos != std::string::npos && expectedIdPos != std::string::npos) {
+                auto docIdStart = docIdPos + idKey.size();
+                auto docIdEnd = docStr.find("\"", docIdStart);
+                auto expectedIdStart = expectedIdPos + idKey.size();
+                auto expectedIdEnd = expected.find("\"", expectedIdStart);
+                ASSERT_NE(docIdEnd, std::string::npos);
+                ASSERT_NE(expectedIdEnd, std::string::npos);
+                std::string docId = docStr.substr(docIdStart, docIdEnd - docIdStart);
+                std::string expectedId = expected.substr(expectedIdStart, expectedIdEnd - expectedIdStart);
+                EXPECT_EQ(docId.size(), expectedId.size()) << "ID length mismatch for chunk: " << chunk;
+                EXPECT_TRUE(std::all_of(docId.begin(), docId.end(), ::isalnum)) << "ID not alphanumeric for chunk: " << chunk;
+                // Compare everything except the id value
+                std::string docStrNoId = docStr;
+                std::string expectedNoId = expected;
+                docStrNoId.replace(docIdStart, docId.size(), std::string(docId.size(), '*'));
+                expectedNoId.replace(expectedIdStart, expectedId.size(), std::string(expectedId.size(), '*'));
+                EXPECT_EQ(docStrNoId, expectedNoId) << "Mismatch for chunk (ignoring id value): " << chunk;
+            } else {
+                EXPECT_EQ(docStr, expected) << "Mismatch for chunk: [" << chunk << "] got [" << docStr << "] but expected [" << expected << "]" << chunkIteration;
+            }
+        } else if (expectedDelta.has_value()) {
+            FAIL() << "Mismatch for chunk: [" << chunk << "] got nothing but expected [" << expectedDelta.value() << "]" << chunkIteration;
+        } else if (doc.has_value()) {
+            rapidjson::StringBuffer buffer;
+            rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+            doc->Accept(writer);
+            std::string docStr = buffer.GetString();
+            FAIL() << "Mismatch for chunk: [" << chunk << "] expected nothing but got [" << docStr << "]" << chunkIteration;
+        } else {
+            FAIL() << "Mismatch for chunk: [" << chunk << "] " << chunkIteration;
+        }
+    }
+}
+
+TEST_F(DevstralOutputParserTest, ToolCallsWithoutToolsInTheRequestStreaming) {
+    std::vector<std::pair<std::string, std::optional<std::string>>> chunkToDeltaVec{
+        // Tool parser is available, but tools are not in the request so every chunk is just a regular content
+        {"[TOOL_CALLS]", "{\"delta\":{\"content\":\"[TOOL_CALLS]\"}}"},
+        {"get_", "{\"delta\":{\"content\":\"get_\"}}"},
+        {"weather", "{\"delta\":{\"content\":\"weather\"}}"},
+        {"[ARGS]", "{\"delta\":{\"content\":\"[ARGS]\"}}"},
+        {"{\"", "{\"delta\":{\"content\":\"{\\\"\"}}"},
+        {"city\":", "{\"delta\":{\"content\":\"city\\\":\"}}"},
+        {"\"Paris\"", "{\"delta\":{\"content\":\"\\\"Paris\\\"\"}}"},
+        {"}", "{\"delta\":{\"content\":\"}\"}}"},
+    };
+
+    for (const auto& [chunk, expectedDelta] : chunkToDeltaVec) {
+        // Second argument is false as we simulate the case where tools have not been provided in the request
+        std::optional<rapidjson::Document> doc = outputParserWithRegularToolParsing->parseChunk(chunk, false, ov::genai::GenerationFinishReason::NONE);
+        if (!expectedDelta.has_value() && !doc.has_value()) {
+            continue;  // Both are nullopt, OK
+        }
+        if (expectedDelta.has_value() && doc.has_value()) {
+            rapidjson::StringBuffer buffer;
+            rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+            doc->Accept(writer);
+            std::string docStr = buffer.GetString();
+            std::string expected = expectedDelta.value();
+            EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk;
+        } else {
+            FAIL() << "Mismatch between expectedDelta and doc for chunk: " << chunk;
+        }
+    }
+}
diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat
index 88c6d04a17..de2512fe1a 100644
--- a/windows_prepare_llm_models.bat
+++ b/windows_prepare_llm_models.bat
@@ -33,8 +33,6 @@ set "RERANK_MODEL=BAAI/bge-reranker-base"
 set "TEXT_GENERATION_MODEL=HuggingFaceTB/SmolLM2-360M-Instruct"
 set "FACEBOOK_MODEL=facebook/opt-125m"
 set "VLM_MODEL=OpenGVLab/InternVL2-1B"
-set "TOKENIZER_FILE=openvino_tokenizer.bin"
-set "LEGACY_MODEL_FILE=1\model.bin"
 
 :: Models for tools testing. Only tokenizers are downloaded.
 set "QWEN3_MODEL=Qwen/Qwen3-8B"
@@ -43,6 +41,7 @@ set "HERMES3_MODEL=NousResearch/Hermes-3-Llama-3.1-8B"
 set "PHI4_MODEL=microsoft/Phi-4-mini-instruct"
 set "MISTRAL_MODEL=mistralai/Mistral-7B-Instruct-v0.3"
 set "GPTOSS_MODEL=openai/gpt-oss-20b"
+set "DEVSTRAL_MODEL=unsloth/Devstral-Small-2507"
 
 echo Downloading LLM testing models to directory %~1
 set "PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly"
@@ -51,7 +50,6 @@ C:\opt\Python312\python.exe -m venv .venv
 if !errorlevel! neq 0 exit /b !errorlevel!
 call .\.venv\Scripts\Activate.bat
 if !errorlevel! neq 0 exit /b !errorlevel!
-set
 python -m pip install --upgrade pip
 if !errorlevel! neq 0 exit /b !errorlevel!
 pip install -U -r demos\common\export_models\requirements.txt
@@ -59,160 +57,63 @@ if !errorlevel! neq 0 exit /b !errorlevel!
 
 if not exist "%~1" mkdir "%~1"
 
-if exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading text generation model to %~1\%TEXT_GENERATION_MODEL% directory.
-  python demos\common\export_models\export_model.py text_generation --source_model "%TEXT_GENERATION_MODEL%" --weight-format int8 --model_repository_path %~1
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-)
 
-if exist "%~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading text generation model to %~1\%FACEBOOK_MODEL% directory.
-  python demos\common\export_models\export_model.py text_generation --source_model "%FACEBOOK_MODEL%" --weight-format int8 --model_repository_path %~1
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE% does not exist.
-  exit /b 1
-)
+:: Export models
+call :download_export_model "%VLM_MODEL%" "text_generation" "--weight-format int4" "%~1"
+call :download_export_model "%TEXT_GENERATION_MODEL%" "text_generation" "--weight-format int8" "%~1"
+call :download_export_model "%FACEBOOK_MODEL%" "text_generation" "--weight-format int8" "%~1"
+call :download_export_model "%RERANK_MODEL%" "rerank_ov" "--weight-format int8 --model_name %RERANK_MODEL%\ov" "%~1"
+call :download_export_model "%EMBEDDING_MODEL%" "embeddings_ov" "--weight-format int8 --model_name %EMBEDDING_MODEL%\ov" "%~1"
 
-if not exist "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" (
-    echo Copying dummy chat template to %TEXT_GENERATION_MODEL% model directory.
-    copy /Y "src\test\llm\dummy_facebook_template.jinja" "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja"
+if not exist "%~1\%FACEBOOK_MODEL%\chat_template.jinja" (
+    echo Copying dummy chat template to %FACEBOOK_MODEL% model directory.
+    copy /Y "src\test\llm\dummy_facebook_template.jinja" "%~1\%FACEBOOK_MODEL%\chat_template.jinja"
     if !errorlevel! neq 0 exit /b !errorlevel!
 )
 
-if exist "%~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE%" (
-  echo Models file %~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading embeddings model to %~1\%EMBEDDING_MODEL%\ov directory.
-  python demos\common\export_models\export_model.py embeddings_ov --source_model "%EMBEDDING_MODEL%" --weight-format int8 --model_repository_path %~1 --model_name "%EMBEDDING_MODEL%\ov"
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE%" (
-  echo Models file %~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-) 
-
-if exist "%~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE%" (
-  echo Models file %~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading rerank model to %~1\%RERANK_MODEL% directory.
-  python demos\common\export_models\export_model.py rerank --source_model "%RERANK_MODEL%" --weight-format int8 --model_repository_path %~1
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE%" (
-  echo Models file %~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE% does not exists.
-  exit /b 1
-) 
-
-if exist "%~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE%" (
-  echo Models file %~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading rerank model to %~1\%RERANK_MODEL%\ov directory.
-  python demos\common\export_models\export_model.py rerank_ov --source_model "%RERANK_MODEL%" --weight-format int8 --model_repository_path %~1 --model_name "%RERANK_MODEL%\ov"
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE%" (
-  echo Models file %~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-) 
-
-if exist "%~1\%VLM_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%VLM_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading visual language model to %~1\%VLM_MODEL% directory.
-  python demos\common\export_models\export_model.py text_generation --source_model "%VLM_MODEL%" --weight-format int4 --kv_cache_precision u8 --model_repository_path %~1
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%VLM_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%VLM_MODEL%\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-) 
-
-if exist "%~1\%QWEN3_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%QWEN3_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading tokenizer and detokenizer for Qwen3 model to %~1\%QWEN3_MODEL% directory.
-  mkdir "%~1\%QWEN3_MODEL%"
-  convert_tokenizer "%QWEN3_MODEL%" --with_detokenizer -o "%~1\%QWEN3_MODEL%"
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%QWEN3_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%QWEN3_MODEL%\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-) 
+:: Download tokenizers for tools testing
+call :download_tokenizer "%QWEN3_MODEL%" "%~1\%QWEN3_MODEL%"
+call :download_tokenizer "%LLAMA3_MODEL%" "%~1\%LLAMA3_MODEL%"
+call :download_tokenizer "%HERMES3_MODEL%" "%~1\%HERMES3_MODEL%"
+call :download_tokenizer "%PHI4_MODEL%" "%~1\%PHI4_MODEL%"
+call :download_tokenizer "%MISTRAL_MODEL%" "%~1\%MISTRAL_MODEL%"
+call :download_tokenizer "%GPTOSS_MODEL%" "%~1\%GPTOSS_MODEL%"
+call :download_tokenizer "%DEVSTRAL_MODEL%" "%~1\%DEVSTRAL_MODEL%"
 
-if exist "%~1\%LLAMA3_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%LLAMA3_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading tokenizer and detokenizer for Llama3.1 model to %~1\%LLAMA3_MODEL% directory.
-  mkdir "%~1\%LLAMA3_MODEL%"
-  convert_tokenizer "%LLAMA3_MODEL%" --with_detokenizer -o "%~1\%LLAMA3_MODEL%"
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%LLAMA3_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%LLAMA3_MODEL%\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-)
+exit /b 0
 
-if exist "%~1\%HERMES3_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%HERMES3_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading tokenizer and detokenizer for Hermes3 model to %~1\%HERMES3_MODEL% directory.
-  mkdir "%~1\%HERMES3_MODEL%"
-  convert_tokenizer "%HERMES3_MODEL%" --with_detokenizer -o "%~1\%HERMES3_MODEL%"
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%HERMES3_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%HERMES3_MODEL%\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-) 
+:: Helper subroutine to download export models
+:download_export_model
+set "model=%~1"
+set "model_type=%~2"
+set "export_args=%~3"
+set "repository=%~4"
 
-if exist "%~1\%PHI4_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%PHI4_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
+if not exist "%repository%\%model%\openvino_tokenizer.bin" (
+  echo Downloading %model_type% model to %repository%\%model% directory.
+  python demos\common\export_models\export_model.py %model_type% --source_model "%model%" %export_args% --model_repository_path %repository%
 ) else (
-  echo Downloading tokenizer and detokenizer for Phi-4 model to %~1\%PHI4_MODEL% directory.
-  mkdir "%~1\%PHI4_MODEL%"
-  convert_tokenizer "%PHI4_MODEL%" --with_detokenizer -o "%~1\%PHI4_MODEL%"
-  if !errorlevel! neq 0 exit /b !errorlevel!
+  echo Models file %repository%\%model%\openvino_tokenizer.bin exists. Skipping downloading models.
 )
-if not exist "%~1\%PHI4_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%PHI4_MODEL%\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-) 
+exit /b 0
 
-if exist "%~1\%MISTRAL_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%MISTRAL_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
-) else (
-  echo Downloading tokenizer and detokenizer for Mistral model to %~1\%MISTRAL_MODEL% directory.
-  mkdir "%~1\%MISTRAL_MODEL%"
-  convert_tokenizer "%MISTRAL_MODEL%" --with_detokenizer -o "%~1\%MISTRAL_MODEL%"
-  if !errorlevel! neq 0 exit /b !errorlevel!
-)
-if not exist "%~1\%MISTRAL_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%MISTRAL_MODEL%\%TOKENIZER_FILE% does not exists.
-  exit /b 1
-)
+:: Helper subroutine to download tokenizers
+:download_tokenizer
+set "model=%~1"
+set "check_path=%~2"
 
-if exist "%~1\%GPTOSS_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%GPTOSS_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models.
+if exist "%check_path%" (
+  echo Models file %check_path% exists. Skipping downloading models.
 ) else (
-  echo Downloading tokenizer and detokenizer for GPT-OSS model to %~1\%GPTOSS_MODEL% directory.
-  mkdir "%~1\%GPTOSS_MODEL%"
-  convert_tokenizer "%GPTOSS_MODEL%" --with_detokenizer -o "%~1\%GPTOSS_MODEL%"
+  echo Downloading tokenizer and detokenizer for %model% model to %check_path% directory.
+  mkdir "%check_path%"
+  convert_tokenizer "%model%" --with_detokenizer -o "%check_path%"
   if !errorlevel! neq 0 exit /b !errorlevel!
 )
-if not exist "%~1\%GPTOSS_MODEL%\%TOKENIZER_FILE%" (
-  echo Models file %~1\%GPTOSS_MODEL%\%TOKENIZER_FILE% does not exists.
+if not exist "%check_path%\openvino_tokenizer.bin" (
+  echo Models file %check_path%\openvino_tokenizer.bin does not exist.
   exit /b 1
 )
+exit /b 0
 
 endlocal