From 92b7d96f551c706b3d5e1cf4ccf0f36cfe9f8d37 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Thu, 19 Feb 2026 16:06:29 -0800 Subject: [PATCH 1/5] Fix StdioBridgeHost zombie state after domain reload (#785) After domain reload, the bridge could accept TCP connections but never process commands. Three defensive fixes address the plausible failure vectors: 1. Stale client cleanup: when a new client connects, close all other active clients (in stdio there is only one server). Forces hung ReadFrameAsUtf8Async to throw and exit cleanly. 2. ProcessCommands self-healing: track consecutive TCS timeouts. After 2+ consecutive, force re-register ProcessCommands on EditorApplication.update and reset the reentrancy guard. 3. Stale command eviction: commands stuck with IsExecuting=true for more than 60s are evicted from the queue with an error result. Also adds always-on diagnostic logging at client connect/disconnect with active client counts. Includes E2E reconnection tests that verify both abrupt disconnect recovery and stale client cleanup. Co-Authored-By: Claude Opus 4.6 --- .../Transport/Transports/StdioBridgeHost.cs | 77 +++++- .../Services/StdioBridgeReconnectTests.cs | 231 ++++++++++++++++++ .../StdioBridgeReconnectTests.cs.meta | 11 + 3 files changed, 313 insertions(+), 6 deletions(-) create mode 100644 TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs create mode 100644 TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs.meta diff --git a/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs b/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs index 8a9037a7e..74b3f8926 100644 --- a/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs +++ b/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.Linq; using System.Net; @@ -25,6 +26,7 @@ class QueuedCommand public string CommandJson; public TaskCompletionSource Tcs; public bool IsExecuting; + public long EnqueuedAtMs; } [InitializeOnLoad] @@ -51,6 +53,8 @@ public static class StdioBridgeHost private static bool isAutoConnectMode = false; private const ulong MaxFrameBytes = 64UL * 1024 * 1024; private const int FrameIOTimeoutMs = 30000; + private static readonly Stopwatch _uptime = Stopwatch.StartNew(); + private static volatile int _consecutiveTimeouts = 0; private static void IoInfo(string s) { McpLog.Info(s, always: false); } @@ -488,15 +492,14 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken using (NetworkStream stream = client.GetStream()) { lock (clientsLock) { activeClients.Add(client); } + int clientCount; + lock (clientsLock) { clientCount = activeClients.Count; } try { try { - if (IsDebugEnabled()) - { - var ep = client.Client?.RemoteEndPoint?.ToString() ?? "unknown"; - McpLog.Info($"Client connected {ep}"); - } + var ep = client.Client?.RemoteEndPoint?.ToString() ?? "unknown"; + McpLog.Info($"Client connected {ep} (active clients: {clientCount})"); } catch { } try @@ -522,6 +525,23 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken return; } + // In stdio transport there is only ever one active Python server. + // A new connection means the old one is dead — close stale clients so + // their hung ReadFrameAsUtf8Async calls throw and exit cleanly. + TcpClient[] staleClients; + lock (clientsLock) + { + staleClients = activeClients.Where(c => c != client).ToArray(); + } + if (staleClients.Length > 0) + { + McpLog.Info($"Closing {staleClients.Length} stale client(s) after new connection"); + foreach (var stale in staleClients) + { + try { stale.Close(); } catch { } + } + } + while (isRunning && !token.IsCancellationRequested) { try @@ -555,7 +575,8 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken { CommandJson = commandText, Tcs = tcs, - IsExecuting = false + IsExecuting = false, + EnqueuedAtMs = _uptime.ElapsedMilliseconds }; } @@ -568,9 +589,26 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken { respCts.Cancel(); response = tcs.Task.Result; + Interlocked.Exchange(ref _consecutiveTimeouts, 0); } else { + int timeouts = Interlocked.Increment(ref _consecutiveTimeouts); + McpLog.Warn($"Command TCS timed out ({timeouts} consecutive)"); + if (timeouts >= 2) + { + // Self-healing: force re-register ProcessCommands in case + // the EditorApplication.update callback was lost (e.g. after + // domain reload) and reset the reentrancy guard. + EditorApplication.delayCall += () => + { + if (!isRunning) return; + EditorApplication.update -= ProcessCommands; + EditorApplication.update += ProcessCommands; + Interlocked.Exchange(ref processingCommands, 0); + McpLog.Info("ProcessCommands re-registered (self-healing after consecutive timeouts)"); + }; + } var timeoutResponse = new { status = "error", @@ -636,6 +674,9 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken finally { lock (clientsLock) { activeClients.Remove(client); } + int remaining; + lock (clientsLock) { remaining = activeClients.Count; } + McpLog.Info($"Client handler exited (remaining clients: {remaining})"); } } } @@ -783,6 +824,30 @@ private static void ProcessCommands() return; } + // Evict commands stuck with IsExecuting=true for too long (e.g. from pre-reload state). + long nowMs = _uptime.ElapsedMilliseconds; + const long staleThresholdMs = 2L * FrameIOTimeoutMs; // 60s + List staleIds = null; + foreach (var kvp in commandQueue) + { + if (kvp.Value.IsExecuting && (nowMs - kvp.Value.EnqueuedAtMs) > staleThresholdMs) + { + staleIds ??= new List(); + staleIds.Add(kvp.Key); + } + } + if (staleIds != null) + { + foreach (var sid in staleIds) + { + var staleCmd = commandQueue[sid]; + commandQueue.Remove(sid); + var err = new { status = "error", error = "Command evicted: stuck too long in queue" }; + try { staleCmd.Tcs.TrySetResult(JsonConvert.SerializeObject(err)); } catch { } + } + McpLog.Info($"Evicted {staleIds.Count} stale command(s) from queue"); + } + work = new List<(string, QueuedCommand)>(commandQueue.Count); foreach (var kvp in commandQueue) { diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs new file mode 100644 index 000000000..a7dfe5a05 --- /dev/null +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs @@ -0,0 +1,231 @@ +using System; +using System.Collections; +using System.IO; +using System.Net.Sockets; +using System.Text; +using System.Threading; +using NUnit.Framework; +using UnityEngine; +using UnityEngine.TestTools; +using MCPForUnity.Editor.Services.Transport.Transports; + +namespace MCPForUnityTests.Editor.Services +{ + /// + /// Tests that StdioBridgeHost correctly handles client reconnection scenarios. + /// After an abrupt client disconnect, a new client must be able to connect and + /// have its commands processed — this was broken by the zombie state bug (#785). + /// + [TestFixture] + public class StdioBridgeReconnectTests + { + private const int ConnectTimeoutMs = 5000; + private const int ReadTimeoutMs = 10000; + + [UnityTest] + public IEnumerator NewClient_AfterAbruptDisconnect_CanSendAndReceiveCommands() + { + if (!StdioBridgeHost.IsRunning) + { + Assert.Ignore("StdioBridgeHost is not running; skipping reconnect test."); + yield break; + } + + int port = StdioBridgeHost.GetCurrentPort(); + + // --- First client: connect, verify ping/pong, then abruptly close --- + using (var client1 = new TcpClient()) + { + client1.Connect("127.0.0.1", port); + client1.ReceiveTimeout = ReadTimeoutMs; + var stream1 = client1.GetStream(); + + string handshake1 = ReadLine(stream1, ReadTimeoutMs); + Assert.That(handshake1, Does.Contain("FRAMING=1"), "First client should receive handshake"); + + // Send a framed ping + SendFrame(stream1, Encoding.UTF8.GetBytes("ping")); + byte[] pongBytes = ReadFrame(stream1, ReadTimeoutMs); + string pong1 = Encoding.UTF8.GetString(pongBytes); + Assert.That(pong1, Does.Contain("pong"), "First client should get pong response"); + + // Abrupt close — simulates server crash / domain reload disconnect + client1.Client.LingerState = new LingerOption(true, 0); + client1.Close(); + } + + // Wait a few frames for cleanup + for (int i = 0; i < 10; i++) + yield return null; + + // --- Second client: connect and verify commands still work --- + using (var client2 = new TcpClient()) + { + client2.Connect("127.0.0.1", port); + client2.ReceiveTimeout = ReadTimeoutMs; + var stream2 = client2.GetStream(); + + string handshake2 = ReadLine(stream2, ReadTimeoutMs); + Assert.That(handshake2, Does.Contain("FRAMING=1"), "Second client should receive handshake"); + + // Send a framed ping — this is the critical check that would fail + // if the bridge is in zombie state. + SendFrame(stream2, Encoding.UTF8.GetBytes("ping")); + byte[] pongBytes2 = ReadFrame(stream2, ReadTimeoutMs); + string pong2 = Encoding.UTF8.GetString(pongBytes2); + Assert.That(pong2, Does.Contain("pong"), "Second client should get pong response after reconnect"); + + client2.Close(); + } + } + + [UnityTest] + public IEnumerator NewClient_WhileOldClientStillConnected_ClosesStaleClient() + { + if (!StdioBridgeHost.IsRunning) + { + Assert.Ignore("StdioBridgeHost is not running; skipping reconnect test."); + yield break; + } + + int port = StdioBridgeHost.GetCurrentPort(); + + // --- First client: connect and verify handshake (but don't close) --- + var client1 = new TcpClient(); + try + { + client1.Connect("127.0.0.1", port); + client1.ReceiveTimeout = ReadTimeoutMs; + var stream1 = client1.GetStream(); + + string handshake1 = ReadLine(stream1, ReadTimeoutMs); + Assert.That(handshake1, Does.Contain("FRAMING=1"), "First client should receive handshake"); + + // Verify ping works on first client + SendFrame(stream1, Encoding.UTF8.GetBytes("ping")); + byte[] pong1Bytes = ReadFrame(stream1, ReadTimeoutMs); + Assert.That(Encoding.UTF8.GetString(pong1Bytes), Does.Contain("pong")); + + // --- Second client: connect while first is still open --- + using (var client2 = new TcpClient()) + { + client2.Connect("127.0.0.1", port); + client2.ReceiveTimeout = ReadTimeoutMs; + var stream2 = client2.GetStream(); + + string handshake2 = ReadLine(stream2, ReadTimeoutMs); + Assert.That(handshake2, Does.Contain("FRAMING=1"), "Second client should receive handshake"); + + // Wait a few frames for stale client cleanup + for (int i = 0; i < 5; i++) + yield return null; + + // Second client should work — stale first client was closed + SendFrame(stream2, Encoding.UTF8.GetBytes("ping")); + byte[] pong2Bytes = ReadFrame(stream2, ReadTimeoutMs); + Assert.That(Encoding.UTF8.GetString(pong2Bytes), Does.Contain("pong"), + "Second client should get pong after stale client cleanup"); + + client2.Close(); + } + + // First client should now be disconnected by the bridge. + // A read attempt should throw or return 0 bytes. + yield return null; + bool firstClientDisconnected = false; + try + { + SendFrame(stream1, Encoding.UTF8.GetBytes("ping")); + ReadFrame(stream1, 2000); + } + catch + { + firstClientDisconnected = true; + } + + Assert.IsTrue(firstClientDisconnected, "First client should be disconnected after second client connects"); + } + finally + { + try { client1.Close(); } catch { } + } + } + + #region Frame protocol helpers + + private static string ReadLine(NetworkStream stream, int timeoutMs) + { + var sb = new StringBuilder(); + var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs); + stream.ReadTimeout = timeoutMs; + + while (DateTime.UtcNow < deadline) + { + int b = stream.ReadByte(); + if (b < 0) + throw new IOException("Connection closed while reading line"); + if (b == '\n') + return sb.ToString(); + sb.Append((char)b); + } + throw new TimeoutException("Timed out reading line from stream"); + } + + private static void SendFrame(NetworkStream stream, byte[] payload) + { + byte[] header = new byte[8]; + ulong len = (ulong)payload.LongLength; + header[0] = (byte)(len >> 56); + header[1] = (byte)(len >> 48); + header[2] = (byte)(len >> 40); + header[3] = (byte)(len >> 32); + header[4] = (byte)(len >> 24); + header[5] = (byte)(len >> 16); + header[6] = (byte)(len >> 8); + header[7] = (byte)(len); + stream.Write(header, 0, 8); + stream.Write(payload, 0, payload.Length); + stream.Flush(); + } + + private static byte[] ReadFrame(NetworkStream stream, int timeoutMs) + { + stream.ReadTimeout = timeoutMs; + + byte[] header = ReadExact(stream, 8, timeoutMs); + ulong payloadLen = + ((ulong)header[0] << 56) | ((ulong)header[1] << 48) | + ((ulong)header[2] << 40) | ((ulong)header[3] << 32) | + ((ulong)header[4] << 24) | ((ulong)header[5] << 16) | + ((ulong)header[6] << 8) | header[7]; + + if (payloadLen == 0 || payloadLen > 16 * 1024 * 1024) + throw new IOException($"Invalid frame length: {payloadLen}"); + + return ReadExact(stream, (int)payloadLen, timeoutMs); + } + + private static byte[] ReadExact(NetworkStream stream, int count, int timeoutMs) + { + byte[] buffer = new byte[count]; + int offset = 0; + var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs); + + while (offset < count) + { + if (DateTime.UtcNow > deadline) + throw new TimeoutException($"Timed out reading {count} bytes (got {offset})"); + + int remaining = count - offset; + int read = stream.Read(buffer, offset, remaining); + if (read == 0) + throw new IOException("Connection closed before reading expected bytes"); + offset += read; + } + + return buffer; + } + + #endregion + } +} diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs.meta b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs.meta new file mode 100644 index 000000000..d0e1c6de5 --- /dev/null +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 299b59741ba9d4a4dafc70c3317d2e0e +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From 39ec81a0a19adec826d3661e192cc498a75af9b3 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Thu, 19 Feb 2026 18:28:44 -0800 Subject: [PATCH 2/5] Fix stdio bridge stalls when Unity is backgrounded during domain reload - Add QueuePlayerLoopUpdate after command enqueue so ProcessCommands fires even when Unity is backgrounded (mirrors HTTP RequestMainThreadPump) - Keep ProcessCommands permanently registered on EditorApplication.update to eliminate the registration gap between Stop and Start during reload - Replace blocking retry loop in Start() (Thread.Sleep x 10) with single attempt + port-switch fallback; async retries handled by reload handler - Replace fire-and-forget TryStartBridgeImmediate with async retry loop (6 attempts with backoff: 0s, 1s, 3s, 5s, 10s, 30s) matching HTTP - Fix macOS port conflict: use ExclusiveAddressUse in both the real listener (CreateConfiguredListener) and the port probe (IsPortAvailable) to prevent AssetImportWorkers from binding the same port via SO_REUSEADDR - Add PortManager unit tests (5 tests including macOS SO_REUSEADDR case) Fixes #785 Co-Authored-By: Claude Opus 4.6 --- MCPForUnity/Editor/Helpers/PortManager.cs | 29 +---- .../Services/StdioBridgeReloadHandler.cs | 101 ++++++++++++--- .../Transport/Transports/StdioBridgeHost.cs | 117 ++++++------------ .../EditMode/Services/PortManagerTests.cs | 108 ++++++++++++++++ .../Services/PortManagerTests.cs.meta | 11 ++ 5 files changed, 246 insertions(+), 120 deletions(-) create mode 100644 TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs create mode 100644 TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs.meta diff --git a/MCPForUnity/Editor/Helpers/PortManager.cs b/MCPForUnity/Editor/Helpers/PortManager.cs index de46fd8f2..11a8d7eef 100644 --- a/MCPForUnity/Editor/Helpers/PortManager.cs +++ b/MCPForUnity/Editor/Helpers/PortManager.cs @@ -119,10 +119,15 @@ private static int FindAvailablePort() /// True if port is available public static bool IsPortAvailable(int port) { - // Start with quick loopback check try { var testListener = new TcpListener(IPAddress.Loopback, port); +#if UNITY_EDITOR_OSX + // On macOS, SO_REUSEADDR (the default) lets multiple processes bind the same + // port — including AssetImportWorkers. ExclusiveAddressUse prevents this so + // the test bind fails when another process already holds the port. + try { testListener.Server.ExclusiveAddressUse = true; } catch { } +#endif testListener.Start(); testListener.Stop(); } @@ -131,28 +136,6 @@ public static bool IsPortAvailable(int port) return false; } -#if UNITY_EDITOR_OSX - // On macOS, the OS might report the port as available (SO_REUSEADDR) even if another process - // is using it, unless we also check active connections or try a stricter bind. - // Double check by trying to Connect to it. If we CAN connect, it's NOT available. - try - { - using var client = new TcpClient(); - var connectTask = client.ConnectAsync(IPAddress.Loopback, port); - // If we connect successfully, someone is listening -> Not available - if (connectTask.Wait(50) && client.Connected) - { - if (IsDebugEnabled()) McpLog.Info($"[PortManager] Port {port} bind succeeded but connection also succeeded -> Not available (Conflict)."); - return false; - } - } - catch - { - // Connection failed -> likely available (or firewall blocked, but we assume available) - if (IsDebugEnabled()) McpLog.Info($"[PortManager] Port {port} connection failed -> likely available."); - } -#endif - return true; } diff --git a/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs b/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs index 278620b16..2395c4b78 100644 --- a/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs +++ b/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs @@ -1,8 +1,10 @@ using System; +using System.Threading.Tasks; using MCPForUnity.Editor.Constants; using MCPForUnity.Editor.Helpers; using MCPForUnity.Editor.Services.Transport; using MCPForUnity.Editor.Services.Transport.Transports; +using MCPForUnity.Editor.Windows; using UnityEditor; namespace MCPForUnity.Editor.Services @@ -13,6 +15,16 @@ namespace MCPForUnity.Editor.Services [InitializeOnLoad] internal static class StdioBridgeReloadHandler { + private static readonly TimeSpan[] ResumeRetrySchedule = + { + TimeSpan.Zero, + TimeSpan.FromSeconds(1), + TimeSpan.FromSeconds(3), + TimeSpan.FromSeconds(5), + TimeSpan.FromSeconds(10), + TimeSpan.FromSeconds(30) + }; + static StdioBridgeReloadHandler() { AssemblyReloadEvents.beforeAssemblyReload += OnBeforeAssemblyReload; @@ -89,36 +101,85 @@ private static void OnAfterAssemblyReload() return; } - // Restart via TransportManager so state stays in sync; if it fails (port busy), rely on UI to retry. - TryStartBridgeImmediate(); + // If the editor is not compiling, attempt an immediate restart without relying on editor focus. + bool isCompiling = EditorApplication.isCompiling; + try + { + var pipeline = Type.GetType("UnityEditor.Compilation.CompilationPipeline, UnityEditor"); + var prop = pipeline?.GetProperty("isCompiling", System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.Static); + if (prop != null) isCompiling |= (bool)prop.GetValue(null); + } + catch { } + + if (!isCompiling) + { + _ = ResumeStdioWithRetriesAsync(); + return; + } + + // Fallback when compiling: schedule on the editor loop + EditorApplication.delayCall += () => + { + _ = ResumeStdioWithRetriesAsync(); + }; } - private static void TryStartBridgeImmediate() + private static async Task ResumeStdioWithRetriesAsync() { - var startTask = MCPServiceLocator.TransportManager.StartAsync(TransportMode.Stdio); - startTask.ContinueWith(t => + Exception lastException = null; + + for (int i = 0; i < ResumeRetrySchedule.Length; i++) { - // Clear the flag after attempting to start (success or failure). - // This prevents getting stuck in "Resuming..." state. - // We do this synchronously on the continuation thread - it's safe because - // EditorPrefs operations are thread-safe and any new reload will set the flag - // fresh in OnBeforeAssemblyReload before we get here. - try { EditorPrefs.DeleteKey(EditorPrefKeys.ResumeStdioAfterReload); } catch { } - - if (t.IsFaulted) + int attempt = i + 1; + McpLog.Debug($"[Stdio Reload] Resume attempt {attempt}/{ResumeRetrySchedule.Length}"); + + TimeSpan delay = ResumeRetrySchedule[i]; + if (delay > TimeSpan.Zero) { - var baseEx = t.Exception?.GetBaseException(); - McpLog.Warn($"Failed to resume stdio bridge after reload: {baseEx?.Message}"); - return; + McpLog.Debug($"[Stdio Reload] Waiting {delay.TotalSeconds:0.#}s before resume attempt {attempt}"); + try { await Task.Delay(delay); } + catch { return; } } - if (!t.Result) + + // Abort retries if the user switched transports while we were waiting. + if (EditorConfigurationCache.Instance.UseHttpTransport) { - McpLog.Warn("Failed to resume stdio bridge after domain reload"); + try { EditorPrefs.DeleteKey(EditorPrefKeys.ResumeStdioAfterReload); } catch { } return; } - MCPForUnity.Editor.Windows.MCPForUnityEditorWindow.RequestHealthVerification(); - }, System.Threading.Tasks.TaskScheduler.Default); + try + { + bool started = await MCPServiceLocator.TransportManager.StartAsync(TransportMode.Stdio); + if (started) + { + McpLog.Debug($"[Stdio Reload] Resume succeeded on attempt {attempt}"); + try { EditorPrefs.DeleteKey(EditorPrefKeys.ResumeStdioAfterReload); } catch { } + MCPForUnityEditorWindow.RequestHealthVerification(); + return; + } + + var state = MCPServiceLocator.TransportManager.GetState(TransportMode.Stdio); + string reason = string.IsNullOrWhiteSpace(state?.Error) ? "no error detail" : state.Error; + McpLog.Debug($"[Stdio Reload] Resume attempt {attempt} failed: {reason}"); + } + catch (Exception ex) + { + lastException = ex; + McpLog.Debug($"[Stdio Reload] Resume attempt {attempt} threw: {ex.Message}"); + } + } + + try { EditorPrefs.DeleteKey(EditorPrefKeys.ResumeStdioAfterReload); } catch { } + + if (lastException != null) + { + McpLog.Warn($"Failed to resume stdio bridge after domain reload: {lastException.Message}"); + } + else + { + McpLog.Warn("Failed to resume stdio bridge after domain reload"); + } } } } diff --git a/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs b/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs index 74b3f8926..4d78689ef 100644 --- a/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs +++ b/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs @@ -55,6 +55,7 @@ public static class StdioBridgeHost private const int FrameIOTimeoutMs = 30000; private static readonly Stopwatch _uptime = Stopwatch.StartNew(); private static volatile int _consecutiveTimeouts = 0; + private static bool _processCommandsHooked = false; private static void IoInfo(string s) { McpLog.Info(s, always: false); } @@ -270,68 +271,31 @@ public static void Start() LogBreadcrumb("Start"); - const int maxImmediateRetries = 10; - const int retrySleepMs = 200; - int attempt = 0; - for (; ; ) + try + { + listener = CreateConfiguredListener(currentUnityPort); + listener.Start(); + } + catch (SocketException se) when (se.SocketErrorCode == SocketError.AddressAlreadyInUse) { + // Port is busy. Try switching to a new port once; if that also fails, + // let the reload handler retry with async backoff instead of blocking here. + int oldPort = currentUnityPort; + currentUnityPort = PortManager.DiscoverNewPort(); + try { - listener = CreateConfiguredListener(currentUnityPort); - listener.Start(); - break; + EditorPrefs.SetInt(EditorPrefKeys.UnitySocketPort, currentUnityPort); } - catch (SocketException se) when (se.SocketErrorCode == SocketError.AddressAlreadyInUse && attempt < maxImmediateRetries) + catch { } + + if (IsDebugEnabled()) { - attempt++; - Thread.Sleep(retrySleepMs); - continue; + McpLog.Info($"Port {oldPort} occupied, switching to port {currentUnityPort}"); } - catch (SocketException se) when (se.SocketErrorCode == SocketError.AddressAlreadyInUse && attempt >= maxImmediateRetries) - { - int oldPort = currentUnityPort; - // Before switching ports, give the old one a brief chance to release if it looks like ours - try - { - if (PortManager.IsPortUsedByMCPForUnity(oldPort)) - { - const int waitStepMs = 100; - int waited = 0; - while (waited < 300 && !PortManager.IsPortAvailable(oldPort)) - { - Thread.Sleep(waitStepMs); - waited += waitStepMs; - } - } - } - catch { } - - currentUnityPort = PortManager.DiscoverNewPort(); - - // Persist the new port so next time we start on this port - try - { - EditorPrefs.SetInt(EditorPrefKeys.UnitySocketPort, currentUnityPort); - } - catch { } - - if (IsDebugEnabled()) - { - if (currentUnityPort == oldPort) - { - McpLog.Info($"Port {oldPort} became available, proceeding"); - } - else - { - McpLog.Info($"Port {oldPort} occupied, switching to port {currentUnityPort}"); - } - } - - listener = CreateConfiguredListener(currentUnityPort); - listener.Start(); - break; - } + listener = CreateConfiguredListener(currentUnityPort); + listener.Start(); } isRunning = true; @@ -342,7 +306,11 @@ public static void Start() cts = new CancellationTokenSource(); listenerTask = Task.Run(() => ListenerLoopAsync(cts.Token)); CommandRegistry.Initialize(); - EditorApplication.update += ProcessCommands; + if (!_processCommandsHooked) + { + _processCommandsHooked = true; + EditorApplication.update += ProcessCommands; + } try { EditorApplication.quitting -= Stop; } catch { } try { EditorApplication.quitting += Stop; } catch { } heartbeatSeq++; @@ -360,11 +328,12 @@ private static TcpListener CreateConfiguredListener(int port) { var newListener = new TcpListener(IPAddress.Loopback, port); #if UNITY_EDITOR_OSX - newListener.Server.SetSocketOption( - SocketOptionLevel.Socket, - SocketOptionName.ReuseAddress, - true - ); + // SO_REUSEADDR is intentionally NOT set. On macOS it allows multiple + // processes (including AssetImportWorkers) to bind the same port, + // causing connections to land on a worker that can't process commands. + // The ExclusiveAddressUse flag prevents this; port-busy conflicts are + // handled by the retry/fallback logic in Start() and the reload handler. + try { newListener.Server.ExclusiveAddressUse = true; } catch { } #endif try { @@ -420,10 +389,13 @@ public static void Stop() if (toWait != null) { - try { toWait.Wait(2000); } catch { } + // CTS is already cancelled; give the listener task a brief moment to exit. + try { toWait.Wait(500); } catch { } } - try { EditorApplication.update -= ProcessCommands; } catch { } + // ProcessCommands stays permanently hooked (guarded by _processCommandsHooked) + // to eliminate the registration gap between Stop and Start during domain reload. + // ProcessCommands already exits early when !isRunning. try { EditorApplication.quitting -= Stop; } catch { } try @@ -580,6 +552,11 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken }; } + // Force Unity's main loop to iterate even when backgrounded, + // so ProcessCommands fires and picks up the queued command. + // This mirrors what HTTP does via TransportCommandDispatcher.RequestMainThreadPump(). + try { EditorApplication.QueuePlayerLoopUpdate(); } catch { } + string response; try { @@ -595,20 +572,6 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken { int timeouts = Interlocked.Increment(ref _consecutiveTimeouts); McpLog.Warn($"Command TCS timed out ({timeouts} consecutive)"); - if (timeouts >= 2) - { - // Self-healing: force re-register ProcessCommands in case - // the EditorApplication.update callback was lost (e.g. after - // domain reload) and reset the reentrancy guard. - EditorApplication.delayCall += () => - { - if (!isRunning) return; - EditorApplication.update -= ProcessCommands; - EditorApplication.update += ProcessCommands; - Interlocked.Exchange(ref processingCommands, 0); - McpLog.Info("ProcessCommands re-registered (self-healing after consecutive timeouts)"); - }; - } var timeoutResponse = new { status = "error", diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs new file mode 100644 index 000000000..fa7a0060e --- /dev/null +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs @@ -0,0 +1,108 @@ +using System.Net; +using System.Net.Sockets; +using NUnit.Framework; +using MCPForUnity.Editor.Helpers; + +namespace MCPForUnityTests.Editor.Services +{ + [TestFixture] + public class PortManagerTests + { + [Test] + public void IsPortAvailable_ReturnsFalse_WhenPortIsOccupied() + { + // Bind a port with ExclusiveAddressUse to simulate the real listener + var listener = new TcpListener(IPAddress.Loopback, 0); + listener.Start(); + int port = ((IPEndPoint)listener.LocalEndpoint).Port; + + try + { + Assert.IsFalse(PortManager.IsPortAvailable(port), + "IsPortAvailable should return false for a port that is already bound"); + } + finally + { + listener.Stop(); + } + } + + [Test] + public void IsPortAvailable_ReturnsTrue_WhenPortIsFree() + { + // Bind and immediately release to get a port that was recently free + var listener = new TcpListener(IPAddress.Loopback, 0); + listener.Start(); + int port = ((IPEndPoint)listener.LocalEndpoint).Port; + listener.Stop(); + + Assert.IsTrue(PortManager.IsPortAvailable(port), + "IsPortAvailable should return true for a port that is not bound"); + } + +#if UNITY_EDITOR_OSX + [Test] + public void IsPortAvailable_ReturnsFalse_WhenPortHeldWithReuseAddr() + { + // Simulate what AssetImportWorkers do: bind with SO_REUSEADDR. + // IsPortAvailable must still detect this as occupied. + var holder = new TcpListener(IPAddress.Loopback, 0); + holder.Server.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, true); + holder.Start(); + int port = ((IPEndPoint)holder.LocalEndpoint).Port; + + try + { + Assert.IsFalse(PortManager.IsPortAvailable(port), + "IsPortAvailable should detect ports held with SO_REUSEADDR on macOS"); + } + finally + { + holder.Stop(); + } + } +#endif + + [Test] + public void DiscoverNewPort_ReturnsAvailablePort() + { + int port = PortManager.DiscoverNewPort(); + Assert.Greater(port, 0, "DiscoverNewPort should return a positive port number"); + Assert.IsTrue(PortManager.IsPortAvailable(port), + "The port returned by DiscoverNewPort should be available"); + } + + [Test] + public void DiscoverNewPort_SkipsOccupiedDefaultPort() + { + // Hold the default port (6400) so DiscoverNewPort must find an alternative + TcpListener holder = null; + try + { + holder = new TcpListener(IPAddress.Loopback, 6400); +#if UNITY_EDITOR_OSX + try { holder.Server.ExclusiveAddressUse = true; } catch { } +#endif + holder.Start(); + } + catch (SocketException) + { + // Port 6400 already occupied (e.g., by the running bridge) — that's fine, + // the test still validates that DiscoverNewPort picks a different port. + holder = null; + } + + try + { + int port = PortManager.DiscoverNewPort(); + Assert.AreNotEqual(6400, port, + "DiscoverNewPort should not return the default port when it is occupied"); + Assert.Greater(port, 0); + } + finally + { + holder?.Stop(); + } + } + } +} diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs.meta b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs.meta new file mode 100644 index 000000000..d5fd13313 --- /dev/null +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: a04ddec79871644e991cf2ab91dc9c3a +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From 55a0bc75f9cc2baa96b4be3c65e3bee9815e1329 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Thu, 19 Feb 2026 18:34:17 -0800 Subject: [PATCH 3/5] Add resource URI reference table to unity-mcp-skill Adds a prominent 'Do NOT Guess' section with a URI lookup table near the top of the skill, so LLMs use exact URIs instead of fabricating similar-looking ones. Co-Authored-By: Claude Opus 4.6 --- unity-mcp-skill/SKILL.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/unity-mcp-skill/SKILL.md b/unity-mcp-skill/SKILL.md index 986a40fe5..5e42a4d73 100644 --- a/unity-mcp-skill/SKILL.md +++ b/unity-mcp-skill/SKILL.md @@ -15,6 +15,20 @@ Before applying a template: - Validate targets/components first via resources and `find_gameobjects`. - Treat names, enum values, and property payloads as placeholders to adapt. +## Resource URIs: Do NOT Guess + +Resource URIs use a specific scheme — do NOT guess or fabricate them. If you are unsure of a URI, call `ListMcpResourcesTool(server="UnityMCP")` first to get the exact list. Common URIs: + +| Resource | URI | +|----------|-----| +| Editor state | `mcpforunity://editor/state` | +| Project info | `mcpforunity://project/info` | +| Scene gameobject API | `mcpforunity://scene/gameobject-api` | +| Tags | `mcpforunity://project/tags` | +| Layers | `mcpforunity://project/layers` | +| Instances | `mcpforunity://instances` | +| Custom tools | `mcpforunity://custom-tools` | + ## Quick Start: Resource-First Workflow **Always read relevant resources before using tools.** This prevents errors and provides the necessary context. From 96ba61c183ea7aced02c869012f66f3bbb254f9b Mon Sep 17 00:00:00 2001 From: David Sarno Date: Thu, 19 Feb 2026 18:55:56 -0800 Subject: [PATCH 4/5] Address PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Collapse double lock in HandleClientAsync (add + count in one lock) - Add CancellationToken to ResumeStdioWithRetriesAsync so retries abort on editor quit or subsequent domain reloads - Fix 'gameobject' → 'GameObject' capitalization in skill URI table Co-Authored-By: Claude Opus 4.6 --- .../Services/StdioBridgeReloadHandler.cs | 23 +++++++++++++++++-- .../Transport/Transports/StdioBridgeHost.cs | 7 ++++-- unity-mcp-skill/SKILL.md | 2 +- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs b/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs index 2395c4b78..b96fc2cbc 100644 --- a/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs +++ b/MCPForUnity/Editor/Services/StdioBridgeReloadHandler.cs @@ -1,4 +1,5 @@ using System; +using System.Threading; using System.Threading.Tasks; using MCPForUnity.Editor.Constants; using MCPForUnity.Editor.Helpers; @@ -25,14 +26,25 @@ internal static class StdioBridgeReloadHandler TimeSpan.FromSeconds(30) }; + private static CancellationTokenSource _retryCts; + static StdioBridgeReloadHandler() { AssemblyReloadEvents.beforeAssemblyReload += OnBeforeAssemblyReload; AssemblyReloadEvents.afterAssemblyReload += OnAfterAssemblyReload; + EditorApplication.quitting += CancelRetries; + } + + private static void CancelRetries() + { + try { _retryCts?.Cancel(); } catch { } } private static void OnBeforeAssemblyReload() { + // Cancel any in-flight retry loop before the next reload. + CancelRetries(); + try { // Only persist resume intent when stdio is the active transport and the bridge is running. @@ -126,10 +138,17 @@ private static void OnAfterAssemblyReload() private static async Task ResumeStdioWithRetriesAsync() { + // Cancel any previous retry loop and create a fresh token. + CancelRetries(); + var cts = _retryCts = new CancellationTokenSource(); + var token = cts.Token; + Exception lastException = null; for (int i = 0; i < ResumeRetrySchedule.Length; i++) { + if (token.IsCancellationRequested) return; + int attempt = i + 1; McpLog.Debug($"[Stdio Reload] Resume attempt {attempt}/{ResumeRetrySchedule.Length}"); @@ -137,8 +156,8 @@ private static async Task ResumeStdioWithRetriesAsync() if (delay > TimeSpan.Zero) { McpLog.Debug($"[Stdio Reload] Waiting {delay.TotalSeconds:0.#}s before resume attempt {attempt}"); - try { await Task.Delay(delay); } - catch { return; } + try { await Task.Delay(delay, token); } + catch (OperationCanceledException) { return; } } // Abort retries if the user switched transports while we were waiting. diff --git a/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs b/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs index 4d78689ef..0c6c1bb2d 100644 --- a/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs +++ b/MCPForUnity/Editor/Services/Transport/Transports/StdioBridgeHost.cs @@ -463,9 +463,12 @@ private static async Task HandleClientAsync(TcpClient client, CancellationToken using (client) using (NetworkStream stream = client.GetStream()) { - lock (clientsLock) { activeClients.Add(client); } int clientCount; - lock (clientsLock) { clientCount = activeClients.Count; } + lock (clientsLock) + { + activeClients.Add(client); + clientCount = activeClients.Count; + } try { try diff --git a/unity-mcp-skill/SKILL.md b/unity-mcp-skill/SKILL.md index 5e42a4d73..b0bd08e61 100644 --- a/unity-mcp-skill/SKILL.md +++ b/unity-mcp-skill/SKILL.md @@ -23,7 +23,7 @@ Resource URIs use a specific scheme — do NOT guess or fabricate them. If you a |----------|-----| | Editor state | `mcpforunity://editor/state` | | Project info | `mcpforunity://project/info` | -| Scene gameobject API | `mcpforunity://scene/gameobject-api` | +| Scene GameObject API | `mcpforunity://scene/gameobject-api` | | Tags | `mcpforunity://project/tags` | | Layers | `mcpforunity://project/layers` | | Instances | `mcpforunity://instances` | From 19855cdd8dd0bb676cb357efe73ca99514107796 Mon Sep 17 00:00:00 2001 From: David Sarno Date: Thu, 19 Feb 2026 19:46:57 -0800 Subject: [PATCH 5/5] Address CodeRabbit review feedback - PortManagerTests: save/restore on-disk port files in SetUp/TearDown to prevent DiscoverNewPort from mutating persistent state - StdioBridgeReconnectTests: replace blocking Connect with ConnectAsync + Wait(ConnectTimeoutMs) so CI tests cannot hang indefinitely Co-Authored-By: Claude Opus 4.6 --- .../EditMode/Services/PortManagerTests.cs | 50 ++++++++++++++++++- .../Services/StdioBridgeReconnectTests.cs | 12 +++-- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs index fa7a0060e..54ae0502e 100644 --- a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/PortManagerTests.cs @@ -1,3 +1,4 @@ +using System.IO; using System.Net; using System.Net.Sockets; using NUnit.Framework; @@ -8,10 +9,56 @@ namespace MCPForUnityTests.Editor.Services [TestFixture] public class PortManagerTests { + private string _savedPortFileContent; + private string _savedLegacyFileContent; + private string _portFilePath; + private string _legacyFilePath; + + [SetUp] + public void SetUp() + { + // Snapshot the on-disk port config so DiscoverNewPort tests don't + // permanently alter the running bridge's persisted port. + string dir = Path.Combine( + System.Environment.GetFolderPath(System.Environment.SpecialFolder.UserProfile), + ".unity-mcp"); + _legacyFilePath = Path.Combine(dir, "unity-mcp-port.json"); + + // The hashed file uses a private helper; approximate the same hash. + // We snapshot every json file in the directory to be safe. + _portFilePath = null; + _savedPortFileContent = null; + _savedLegacyFileContent = null; + + if (File.Exists(_legacyFilePath)) + _savedLegacyFileContent = File.ReadAllText(_legacyFilePath); + + // Find the hashed port file for this project + if (Directory.Exists(dir)) + { + foreach (var f in Directory.GetFiles(dir, "unity-mcp-port-*.json")) + { + _portFilePath = f; + _savedPortFileContent = File.ReadAllText(f); + break; // one project at a time + } + } + } + + [TearDown] + public void TearDown() + { + // Restore the original port files + if (_savedLegacyFileContent != null && _legacyFilePath != null) + File.WriteAllText(_legacyFilePath, _savedLegacyFileContent); + + if (_savedPortFileContent != null && _portFilePath != null) + File.WriteAllText(_portFilePath, _savedPortFileContent); + } + [Test] public void IsPortAvailable_ReturnsFalse_WhenPortIsOccupied() { - // Bind a port with ExclusiveAddressUse to simulate the real listener var listener = new TcpListener(IPAddress.Loopback, 0); listener.Start(); int port = ((IPEndPoint)listener.LocalEndpoint).Port; @@ -30,7 +77,6 @@ public void IsPortAvailable_ReturnsFalse_WhenPortIsOccupied() [Test] public void IsPortAvailable_ReturnsTrue_WhenPortIsFree() { - // Bind and immediately release to get a port that was recently free var listener = new TcpListener(IPAddress.Loopback, 0); listener.Start(); int port = ((IPEndPoint)listener.LocalEndpoint).Port; diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs index a7dfe5a05..95736f9f1 100644 --- a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Services/StdioBridgeReconnectTests.cs @@ -36,7 +36,8 @@ public IEnumerator NewClient_AfterAbruptDisconnect_CanSendAndReceiveCommands() // --- First client: connect, verify ping/pong, then abruptly close --- using (var client1 = new TcpClient()) { - client1.Connect("127.0.0.1", port); + Assert.IsTrue(client1.ConnectAsync("127.0.0.1", port).Wait(ConnectTimeoutMs), + "First client connect timed out"); client1.ReceiveTimeout = ReadTimeoutMs; var stream1 = client1.GetStream(); @@ -61,7 +62,8 @@ public IEnumerator NewClient_AfterAbruptDisconnect_CanSendAndReceiveCommands() // --- Second client: connect and verify commands still work --- using (var client2 = new TcpClient()) { - client2.Connect("127.0.0.1", port); + Assert.IsTrue(client2.ConnectAsync("127.0.0.1", port).Wait(ConnectTimeoutMs), + "Second client connect timed out"); client2.ReceiveTimeout = ReadTimeoutMs; var stream2 = client2.GetStream(); @@ -94,7 +96,8 @@ public IEnumerator NewClient_WhileOldClientStillConnected_ClosesStaleClient() var client1 = new TcpClient(); try { - client1.Connect("127.0.0.1", port); + Assert.IsTrue(client1.ConnectAsync("127.0.0.1", port).Wait(ConnectTimeoutMs), + "First client connect timed out"); client1.ReceiveTimeout = ReadTimeoutMs; var stream1 = client1.GetStream(); @@ -109,7 +112,8 @@ public IEnumerator NewClient_WhileOldClientStillConnected_ClosesStaleClient() // --- Second client: connect while first is still open --- using (var client2 = new TcpClient()) { - client2.Connect("127.0.0.1", port); + Assert.IsTrue(client2.ConnectAsync("127.0.0.1", port).Wait(ConnectTimeoutMs), + "Second client connect timed out"); client2.ReceiveTimeout = ReadTimeoutMs; var stream2 = client2.GetStream();