diff --git a/src/driver.jl b/src/driver.jl index 950ea272..b1d55d33 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -197,7 +197,7 @@ const __llvm_initialized = Ref(false) end @tracepoint "IR generation" begin - ir, compiled = irgen(job) + ir, compiled, gv_to_value = irgen(job) if job.config.entry_abi === :specfunc entry_fn = compiled[job.source].specfunc else @@ -256,6 +256,9 @@ const __llvm_initialized = Ref(false) dyn_ir, dyn_meta = codegen(:llvm, CompilerJob(dyn_job; config)) dyn_entry_fn = LLVM.name(dyn_meta.entry) merge!(compiled, dyn_meta.compiled) + if haskey(dyn_meta, :gv_to_value) + merge!(gv_to_value, dyn_meta.gv_to_value) + end @assert context(dyn_ir) == context(ir) link!(ir, dyn_ir) changed = true @@ -422,7 +425,7 @@ const __llvm_initialized = Ref(false) @tracepoint "verification" verify(ir) end - return ir, (; entry, compiled) + return ir, (; entry, compiled, gv_to_value) end @locked function emit_asm(@nospecialize(job::CompilerJob), ir::LLVM.Module, diff --git a/src/irgen.jl b/src/irgen.jl index a7c36a60..5149e9f0 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -1,7 +1,7 @@ # LLVM IR generation function irgen(@nospecialize(job::CompilerJob)) - mod, compiled = @tracepoint "emission" compile_method_instance(job) + mod, compiled, gv_to_value = @tracepoint "emission" compile_method_instance(job) if job.config.entry_abi === :specfunc entry_fn = compiled[job.source].specfunc else @@ -55,6 +55,11 @@ function irgen(@nospecialize(job::CompilerJob)) new_name = safe_name(old_name) if old_name != new_name LLVM.name!(val, new_name) + val = get(gv_to_value, old_name, nothing) + if val !== nothing + delete!(gv_to_value, old_name) + gv_to_value[new_name] = val + end end end @@ -120,7 +125,7 @@ function irgen(@nospecialize(job::CompilerJob)) can_throw(job) || lower_throw!(mod) end - return mod, compiled + return mod, compiled, gv_to_value end diff --git a/src/jlgen.jl b/src/jlgen.jl index 0d380cbf..0e2f6d3a 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -766,23 +766,76 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) cache_gbl = nothing end - if VERSION >= v"1.13.0-DEV.623" - # Since Julia 1.13, the caller is responsible for initializing global variables that - # point to global values or bindings with their address in memory. + # Since Julia 1.13, the caller is responsible for initializing global variables that + # point to global values or bindings with their address in memory. + # Similarly on previous versions when imaging=true, it is also the caller's responsibility + # (see https://github.com/JuliaGPU/GPUCompiler.jl/issues/753), but we can support this on versions + # that have HAS_LLVM_GVS_GLOBALS. + gvs = nothing + inits = nothing + @static if VERSION >= v"1.13.0-DEV.623" num_gvars = Ref{Csize_t}(0) @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, - C_NULL::Ptr{Cvoid})::Nothing + C_NULL::Ptr{Cvoid} + )::Nothing gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[]) @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, - gvs::Ptr{LLVM.API.LLVMOpaqueValue})::Nothing + gvs::Ptr{LLVM.API.LLVMOpaqueValue} + )::Nothing + inits = Vector{Ptr{Cvoid}}(undef, num_gvars[]) @ccall jl_get_llvm_gv_inits(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, inits::Ptr{Cvoid})::Nothing + elseif HAS_LLVM_GVS_GLOBALS + if VERSION >= v"1.12.0-DEV.1703" + num_gvars = Ref{Csize_t}(0) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + C_NULL::Ptr{Cvoid} + )::Nothing + gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[]) + @ccall jl_get_llvm_gvs_globals(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + gvs::Ptr{LLVM.API.LLVMOpaqueValue} + )::Nothing + inits = Vector{Ptr{Cvoid}}(undef, num_gvars[]) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t}, + inits::Ptr{Cvoid} + )::Nothing + else + gvs = get_llvm_global_vars(native_code) + inits = get_llvm_global_inits(native_code) + end + end + # Maintain a map from global variables to their initialized Julia values. + # The objects pointed to are perma-rooted, during codegen. + # It is legal to call `Base.unsafe_pointer_to_objref` on `values(gv_to_value)`, + # but x->pointer_from_objref(Base.unsafe_pointer_to_objref(x)) is not idempotent, + # thus we store raw pointers here. + # Currently GVs are privatized, so users may have to handle embedded pointers, + # but this dictionary provides a clear indication that the embedded pointer is + # indeed avalid Julia object. + gv_to_value = Dict{String, Ptr{Cvoid}}() + + # On certain version of Julia we have no reliable way to match the `gvs` to their initializers `inits`. + if gvs === nothing + # global variables here properly. + for gv in globals(llvm_mod) + if !haskey(metadata(gv), "julia.constgv") + continue + end + gv_to_value[LLVM.name(gv)] = C_NULL + end + else + @assert inits !== nothing for (gv_ref, init) in zip(gvs, inits) gv = GlobalVariable(gv_ref) - val = const_inttoptr(ConstantInt(Int64(init)), LLVM.PointerType()) - initializer!(gv, val) + gv_to_value[LLVM.name(gv)] = init + # set the initializer + # TODO(vc): To enable full relocation we should actually strip out the initializers here. + if LLVM.isnull(initializer(gv)) + val = const_inttoptr(ConstantInt(Int64(init)), LLVM.PointerType()) + initializer!(gv, val) + end end end @@ -854,7 +907,7 @@ function compile_method_instance(@nospecialize(job::CompilerJob)) # ensure that the requested method instance was compiled @assert haskey(compiled, job.source) - return llvm_mod, compiled + return llvm_mod, compiled, gv_to_value end # partially revert JuliaLangjulia#49391 diff --git a/src/utils.jl b/src/utils.jl index 095f22dc..674d8f9b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -182,3 +182,59 @@ function kernels(mod::LLVM.Module) end return vals end + +@static if VERSION < v"1.13.0-DEV.623" + import Libdl + + const HAS_LLVM_GVS_GLOBALS = Libdl.dlsym( + unsafe_load(cglobal(:jl_libjulia_handle, Ptr{Cvoid})), :jl_get_llvm_gvs_globals, throw_error=false) !== nothing + + const AL_N_INLINE = 29 + + # Mirrors arraylist_t + mutable struct ArrayList + len::Csize_t + max::Csize_t + items::Ptr{Ptr{Cvoid}} + _space::NTuple{AL_N_INLINE, Ptr{Cvoid}} + + function ArrayList() + list = new(0, AL_N_INLINE, Ptr{Ptr{Cvoid}}(C_NULL), ntuple(_ -> Ptr{Cvoid}(C_NULL), AL_N_INLINE)) + list.items = Base.pointer_from_objref(list) + fieldoffset(typeof(list), 4) + + finalizer(list) do list + if list.items != Base.pointer_from_objref(list) + fieldoffset(typeof(list), 4) + Libc.free(list.items) + end + end + return list + end + end + + function get_llvm_global_vars(native_code::Ptr{Cvoid}) + gvs_list = ArrayList() + GC.@preserve gvs_list begin + p_gvs = Base.pointer_from_objref(gvs_list) + @ccall jl_get_llvm_gvs_globals(native_code::Ptr{Cvoid}, p_gvs::Ptr{Cvoid})::Nothing + gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, gvs_list.len) + items = Base.unsafe_convert(Ptr{Ptr{LLVM.API.LLVMOpaqueValue}}, gvs_list.items) + for i in 1:gvs_list.len + gvs[i] = unsafe_load(items, i) + end + end + return gvs + end + + function get_llvm_global_inits(native_code::Ptr{Cvoid}) + inits_list = ArrayList() + GC.@preserve inits_list begin + p_inits = Base.pointer_from_objref(inits_list) + @ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, p_inits::Ptr{Cvoid})::Nothing + inits = Vector{Ptr{Cvoid}}(undef, inits_list.len) + for i in 1:inits_list.len + inits[i] = unsafe_load(inits_list.items, i) + end + end + return inits + end +end diff --git a/test/helpers/native.jl b/test/helpers/native.jl index d53ff172..656028f4 100644 --- a/test/helpers/native.jl +++ b/test/helpers/native.jl @@ -14,8 +14,10 @@ struct CompilerParams <: AbstractCompilerParams new(entry_safepoint, method_table) end +module Runtime end + NativeCompilerJob = CompilerJob{NativeCompilerTarget,CompilerParams} -GPUCompiler.runtime_module(::NativeCompilerJob) = TestRuntime +GPUCompiler.runtime_module(::NativeCompilerJob) = Runtime GPUCompiler.method_table(@nospecialize(job::NativeCompilerJob)) = job.config.params.method_table GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = job.config.params.entry_safepoint @@ -24,7 +26,7 @@ function create_job(@nospecialize(func), @nospecialize(types); entry_safepoint::Bool=false, method_table=test_method_table, kwargs...) config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) - target = NativeCompilerTarget() + target = NativeCompilerTarget(;jlruntime=true) params = CompilerParams(entry_safepoint, method_table) config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs diff --git a/test/native.jl b/test/native.jl index da08764f..83339ea9 100644 --- a/test/native.jl +++ b/test/native.jl @@ -36,16 +36,19 @@ end @testset "compilation database" begin mod = @eval module $(gensym()) @noinline inner(x) = x+1 - function outer(x) - return inner(x) + function outer(x, sym) + if sym == :a + return inner(x) + end + return x end end - job, _ = Native.create_job(mod.outer, (Int,)) + job, _ = Native.create_job(mod.outer, (Int, Symbol)) JuliaContext() do ctx - ir, meta = GPUCompiler.compile(:llvm, job) + ir, meta = GPUCompiler.compile(:llvm, job; validate=false) - meth = only(methods(mod.outer, (Int,))) + meth = only(methods(mod.outer, (Int, Symbol))) mis = filter(mi->mi.def == meth, keys(meta.compiled)) @test length(mis) == 1 @@ -53,6 +56,16 @@ end other_mis = filter(mi->mi.def != meth, keys(meta.compiled)) @test length(other_mis) == 1 @test only(other_mis).def in methods(mod.inner) + + if VERSION >= v"1.12" + @test length(meta.gv_to_value) == 1 + end + # TODO: Global values get privatized, so we can't find them by name anymore. + # %.not = icmp eq ptr %"sym::Symbol", inttoptr (i64 140096668482288 to ptr), !dbg !38 + # for (name, v) in meta.gv_to_value + # gv = globals(ir)[name] + # @test LLVM.initializer(gv) === v + # end end end diff --git a/test/native/precompile.jl b/test/native/precompile.jl index 6fe981a5..d4c0a7ac 100644 --- a/test/native/precompile.jl +++ b/test/native/precompile.jl @@ -13,12 +13,34 @@ precompile_test_harness("Inference caching") do load_path A[1] = x return end + + function kernel_w_global(A, x, sym) + if sym == :A + A[1] = x + end + return + end + + function square(x) + return x*x + end let job, _ = NativeCompiler.Native.create_job(kernel, (Vector{Int}, Int)) precompile(job) end + let + job, _ = NativeCompiler.Native.create_job(kernel_w_global, (Vector{Int}, Int, Symbol)) + precompile(job) + end + + let + # Emit the func abi to box the return + job, _ = NativeCompiler.Native.create_job(square, (Float64,), entry_abi=:func) + precompile(job) + end + # identity is foreign @setup_workload begin job, _ = NativeCompiler.Native.create_job(identity, (Int,)) @@ -28,7 +50,7 @@ precompile_test_harness("Inference caching") do load_path end end) |> string) - Base.compilecache(Base.PkgId("NativeBackend")) + Base.compilecache(Base.PkgId("NativeBackend"), stderr, stdout) @eval let import NativeCompiler @@ -47,6 +69,12 @@ precompile_test_harness("Inference caching") do load_path kernel_mi = GPUCompiler.methodinstance(typeof(NativeBackend.kernel), Tuple{Vector{Int}, Int}) @test check_presence(kernel_mi, token) + kernel_w_global_mi = GPUCompiler.methodinstance(typeof(NativeBackend.kernel_w_global), Tuple{Vector{Int}, Int, Symbol}) + @test check_presence(kernel_w_global_mi, token) + + square_mi = GPUCompiler.methodinstance(typeof(NativeBackend.square), Tuple{Float64}) + @test check_presence(square_mi, token) + # check that identity survived @test check_presence(identity_mi, token) broken=VERSION>=v"1.12.0-DEV.1268" diff --git a/test/ptx/precompile.jl b/test/ptx/precompile.jl index b5f980c9..e0739df0 100644 --- a/test/ptx/precompile.jl +++ b/test/ptx/precompile.jl @@ -25,7 +25,7 @@ precompile_test_harness("Inference caching") do load_path end end) |> string) - Base.compilecache(Base.PkgId("PTXBackend")) + Base.compilecache(Base.PkgId("PTXBackend"), stderr, stdout) @eval let import PTXCompiler