From c3bbb70a0fb73d00f3ab7adb98da798b44e3a366 Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Sun, 17 Nov 2024 13:29:16 +0100 Subject: [PATCH] Add precompile statements to improve TTFW (Time-To-First-Worker) Before: ```julia-repl julia> @time addprocs(1) 4.592567 seconds (6.56 M allocations: 328.356 MiB, 1.24% gc time, 89.93% compilation time) 1-element Vector{Int64}: 2 julia> @time addprocs(1) 3.466125 seconds (25.68 k allocations: 1.455 MiB, 1.12% compilation time) 1-element Vector{Int64}: 3 ``` After: ```julia-repl julia> @time addprocs(1) 3.204588 seconds (3.09 M allocations: 154.957 MiB, 1.04% gc time, 71.17% compilation time) 1-element Vector{Int64}: 2 julia> @time addprocs(1) 1.932320 seconds (25.67 k allocations: 1.454 MiB, 2.07% compilation time) 1-element Vector{Int64}: 3 ``` Could probably be improved in the future. A tricky part is running SnoopCompile inside the worker to get inference results. --- src/precompile.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/precompile.jl b/src/precompile.jl index 816905c..6026c6a 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -1,6 +1,19 @@ precompile(Tuple{typeof(DistributedNext.remotecall),Function,Int,Module,Vararg{Any, 100}}) precompile(Tuple{typeof(DistributedNext.procs)}) precompile(Tuple{typeof(DistributedNext.finalize_ref), DistributedNext.Future}) +precompile(Tuple{typeof(DistributedNext.setup_launched_worker), DistributedNext.LocalManager, DistributedNext.WorkerConfig, Vector{Int}}) +precompile(Tuple{typeof(DistributedNext.process_tcp_streams), Sockets.TCPSocket, Sockets.TCPSocket, Bool}) + +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.IdentifySocketMsg, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.CallWaitMsg, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.CallMsg{:call}, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.CallMsg{:call_fetch}, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.ResultMsg, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.IdentifySocketAckMsg, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.RemoteDoMsg, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.JoinPGRPMsg, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) +precompile(Tuple{typeof(DistributedNext.handle_msg), DistributedNext.JoinCompleteMsg, DistributedNext.MsgHeader, Sockets.TCPSocket, Sockets.TCPSocket, VersionNumber}) + # This is disabled because it doesn't give much benefit # and the code in Distributed is poorly typed causing many invalidations # TODO: Maybe reenable now that Distributed is not in sysimage.