Skip to content
7 changes: 5 additions & 2 deletions src/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ const __llvm_initialized = Ref(false)
end

@tracepoint "IR generation" begin
ir, compiled = irgen(job)
ir, compiled, gv_to_value = irgen(job)
if job.config.entry_abi === :specfunc
entry_fn = compiled[job.source].specfunc
else
Expand Down Expand Up @@ -256,6 +256,9 @@ const __llvm_initialized = Ref(false)
dyn_ir, dyn_meta = codegen(:llvm, CompilerJob(dyn_job; config))
dyn_entry_fn = LLVM.name(dyn_meta.entry)
merge!(compiled, dyn_meta.compiled)
if haskey(dyn_meta, :gv_to_value)
merge!(gv_to_value, dyn_meta.gv_to_value)
end
Comment on lines +259 to +261
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We always include gv_to_value, so why the conditional merge?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Downstream CI broke on Enzyme.jl
https://buildkite.com/julialang/gpucompiler-dot-jl/builds/2129/steps/canvas

and I would like to make this not a breaking release

@assert context(dyn_ir) == context(ir)
link!(ir, dyn_ir)
changed = true
Expand Down Expand Up @@ -422,7 +425,7 @@ const __llvm_initialized = Ref(false)
@tracepoint "verification" verify(ir)
end

return ir, (; entry, compiled)
return ir, (; entry, compiled, gv_to_value)
end

@locked function emit_asm(@nospecialize(job::CompilerJob), ir::LLVM.Module,
Expand Down
9 changes: 7 additions & 2 deletions src/irgen.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# LLVM IR generation

function irgen(@nospecialize(job::CompilerJob))
mod, compiled = @tracepoint "emission" compile_method_instance(job)
mod, compiled, gv_to_value = @tracepoint "emission" compile_method_instance(job)
if job.config.entry_abi === :specfunc
entry_fn = compiled[job.source].specfunc
else
Expand Down Expand Up @@ -55,6 +55,11 @@ function irgen(@nospecialize(job::CompilerJob))
new_name = safe_name(old_name)
if old_name != new_name
LLVM.name!(val, new_name)
val = get(gv_to_value, old_name, nothing)
if val !== nothing
delete!(gv_to_value, old_name)
gv_to_value[new_name] = val
end
end
end

Expand Down Expand Up @@ -120,7 +125,7 @@ function irgen(@nospecialize(job::CompilerJob))
can_throw(job) || lower_throw!(mod)
end

return mod, compiled
return mod, compiled, gv_to_value
end


Expand Down
69 changes: 61 additions & 8 deletions src/jlgen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -766,23 +766,76 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
cache_gbl = nothing
end

if VERSION >= v"1.13.0-DEV.623"
# Since Julia 1.13, the caller is responsible for initializing global variables that
# point to global values or bindings with their address in memory.
# Since Julia 1.13, the caller is responsible for initializing global variables that
# point to global values or bindings with their address in memory.
# Similarly on previous versions when imaging=true, it is also the caller's responsibility
# (see https://github.com/JuliaGPU/GPUCompiler.jl/issues/753), but we can support this on versions
# that have HAS_LLVM_GVS_GLOBALS.
gvs = nothing
inits = nothing
@static if VERSION >= v"1.13.0-DEV.623"
num_gvars = Ref{Csize_t}(0)
@ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t},
C_NULL::Ptr{Cvoid})::Nothing
C_NULL::Ptr{Cvoid}
)::Nothing
gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[])
@ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t},
gvs::Ptr{LLVM.API.LLVMOpaqueValue})::Nothing
gvs::Ptr{LLVM.API.LLVMOpaqueValue}
)::Nothing

inits = Vector{Ptr{Cvoid}}(undef, num_gvars[])
@ccall jl_get_llvm_gv_inits(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t},
inits::Ptr{Cvoid})::Nothing
elseif HAS_LLVM_GVS_GLOBALS
if VERSION >= v"1.12.0-DEV.1703"
num_gvars = Ref{Csize_t}(0)
@ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t},
C_NULL::Ptr{Cvoid}
)::Nothing
gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, num_gvars[])
@ccall jl_get_llvm_gvs_globals(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t},
gvs::Ptr{LLVM.API.LLVMOpaqueValue}
)::Nothing
inits = Vector{Ptr{Cvoid}}(undef, num_gvars[])
@ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, num_gvars::Ptr{Csize_t},
inits::Ptr{Cvoid}
)::Nothing
else
gvs = get_llvm_global_vars(native_code)
inits = get_llvm_global_inits(native_code)
end
end

# Maintain a map from global variables to their initialized Julia values.
# The objects pointed to are perma-rooted, during codegen.
# It is legal to call `Base.unsafe_pointer_to_objref` on `values(gv_to_value)`,
# but x->pointer_from_objref(Base.unsafe_pointer_to_objref(x)) is not idempotent,
# thus we store raw pointers here.
# Currently GVs are privatized, so users may have to handle embedded pointers,
# but this dictionary provides a clear indication that the embedded pointer is
# indeed avalid Julia object.
gv_to_value = Dict{String, Ptr{Cvoid}}()

# On certain version of Julia we have no reliable way to match the `gvs` to their initializers `inits`.
if gvs === nothing
# global variables here properly.
for gv in globals(llvm_mod)
if !haskey(metadata(gv), "julia.constgv")
continue
end
gv_to_value[LLVM.name(gv)] = C_NULL
end
else
@assert inits !== nothing
for (gv_ref, init) in zip(gvs, inits)
gv = GlobalVariable(gv_ref)
val = const_inttoptr(ConstantInt(Int64(init)), LLVM.PointerType())
initializer!(gv, val)
gv_to_value[LLVM.name(gv)] = init
# set the initializer
# TODO(vc): To enable full relocation we should actually strip out the initializers here.
if LLVM.isnull(initializer(gv))
val = const_inttoptr(ConstantInt(Int64(init)), LLVM.PointerType())
initializer!(gv, val)
end
end
end

Expand Down Expand Up @@ -854,7 +907,7 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
# ensure that the requested method instance was compiled
@assert haskey(compiled, job.source)

return llvm_mod, compiled
return llvm_mod, compiled, gv_to_value
end

# partially revert JuliaLangjulia#49391
Expand Down
56 changes: 56 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,59 @@ function kernels(mod::LLVM.Module)
end
return vals
end

@static if VERSION < v"1.13.0-DEV.623"
import Libdl

const HAS_LLVM_GVS_GLOBALS = Libdl.dlsym(
unsafe_load(cglobal(:jl_libjulia_handle, Ptr{Cvoid})), :jl_get_llvm_gvs_globals, throw_error=false) !== nothing

const AL_N_INLINE = 29

# Mirrors arraylist_t
mutable struct ArrayList
len::Csize_t
max::Csize_t
items::Ptr{Ptr{Cvoid}}
_space::NTuple{AL_N_INLINE, Ptr{Cvoid}}

function ArrayList()
list = new(0, AL_N_INLINE, Ptr{Ptr{Cvoid}}(C_NULL), ntuple(_ -> Ptr{Cvoid}(C_NULL), AL_N_INLINE))
list.items = Base.pointer_from_objref(list) + fieldoffset(typeof(list), 4)

finalizer(list) do list
if list.items != Base.pointer_from_objref(list) + fieldoffset(typeof(list), 4)
Libc.free(list.items)
end
end
return list
end
end

function get_llvm_global_vars(native_code::Ptr{Cvoid})
gvs_list = ArrayList()
GC.@preserve gvs_list begin
p_gvs = Base.pointer_from_objref(gvs_list)
@ccall jl_get_llvm_gvs_globals(native_code::Ptr{Cvoid}, p_gvs::Ptr{Cvoid})::Nothing
gvs = Vector{Ptr{LLVM.API.LLVMOpaqueValue}}(undef, gvs_list.len)
items = Base.unsafe_convert(Ptr{Ptr{LLVM.API.LLVMOpaqueValue}}, gvs_list.items)
for i in 1:gvs_list.len
gvs[i] = unsafe_load(items, i)
end
end
return gvs
end

function get_llvm_global_inits(native_code::Ptr{Cvoid})
inits_list = ArrayList()
GC.@preserve inits_list begin
p_inits = Base.pointer_from_objref(inits_list)
@ccall jl_get_llvm_gvs(native_code::Ptr{Cvoid}, p_inits::Ptr{Cvoid})::Nothing
inits = Vector{Ptr{Cvoid}}(undef, inits_list.len)
for i in 1:inits_list.len
inits[i] = unsafe_load(inits_list.items, i)
end
end
return inits
end
end
6 changes: 4 additions & 2 deletions test/helpers/native.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ struct CompilerParams <: AbstractCompilerParams
new(entry_safepoint, method_table)
end

module Runtime end

NativeCompilerJob = CompilerJob{NativeCompilerTarget,CompilerParams}
GPUCompiler.runtime_module(::NativeCompilerJob) = TestRuntime
GPUCompiler.runtime_module(::NativeCompilerJob) = Runtime

GPUCompiler.method_table(@nospecialize(job::NativeCompilerJob)) = job.config.params.method_table
GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = job.config.params.entry_safepoint
Expand All @@ -24,7 +26,7 @@ function create_job(@nospecialize(func), @nospecialize(types);
entry_safepoint::Bool=false, method_table=test_method_table, kwargs...)
config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS)
source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter())
target = NativeCompilerTarget()
target = NativeCompilerTarget(;jlruntime=true)
params = CompilerParams(entry_safepoint, method_table)
config = CompilerConfig(target, params; kernel=false, config_kwargs...)
CompilerJob(source, config), kwargs
Expand Down
23 changes: 18 additions & 5 deletions test/native.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,36 @@ end
@testset "compilation database" begin
mod = @eval module $(gensym())
@noinline inner(x) = x+1
function outer(x)
return inner(x)
function outer(x, sym)
if sym == :a
return inner(x)
end
return x
end
end

job, _ = Native.create_job(mod.outer, (Int,))
job, _ = Native.create_job(mod.outer, (Int, Symbol))
JuliaContext() do ctx
ir, meta = GPUCompiler.compile(:llvm, job)
ir, meta = GPUCompiler.compile(:llvm, job; validate=false)

meth = only(methods(mod.outer, (Int,)))
meth = only(methods(mod.outer, (Int, Symbol)))

mis = filter(mi->mi.def == meth, keys(meta.compiled))
@test length(mis) == 1

other_mis = filter(mi->mi.def != meth, keys(meta.compiled))
@test length(other_mis) == 1
@test only(other_mis).def in methods(mod.inner)

if VERSION >= v"1.12"
@test length(meta.gv_to_value) == 1
end
# TODO: Global values get privatized, so we can't find them by name anymore.
# %.not = icmp eq ptr %"sym::Symbol", inttoptr (i64 140096668482288 to ptr), !dbg !38
# for (name, v) in meta.gv_to_value
# gv = globals(ir)[name]
# @test LLVM.initializer(gv) === v
# end
end
end

Expand Down
30 changes: 29 additions & 1 deletion test/native/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,34 @@ precompile_test_harness("Inference caching") do load_path
A[1] = x
return
end

function kernel_w_global(A, x, sym)
if sym == :A
A[1] = x
end
return
end

function square(x)
return x*x
end

let
job, _ = NativeCompiler.Native.create_job(kernel, (Vector{Int}, Int))
precompile(job)
end

let
job, _ = NativeCompiler.Native.create_job(kernel_w_global, (Vector{Int}, Int, Symbol))
precompile(job)
end

let
# Emit the func abi to box the return
job, _ = NativeCompiler.Native.create_job(square, (Float64,), entry_abi=:func)
precompile(job)
end

# identity is foreign
@setup_workload begin
job, _ = NativeCompiler.Native.create_job(identity, (Int,))
Expand All @@ -28,7 +50,7 @@ precompile_test_harness("Inference caching") do load_path
end
end) |> string)

Base.compilecache(Base.PkgId("NativeBackend"))
Base.compilecache(Base.PkgId("NativeBackend"), stderr, stdout)
@eval let
import NativeCompiler

Expand All @@ -47,6 +69,12 @@ precompile_test_harness("Inference caching") do load_path
kernel_mi = GPUCompiler.methodinstance(typeof(NativeBackend.kernel), Tuple{Vector{Int}, Int})
@test check_presence(kernel_mi, token)

kernel_w_global_mi = GPUCompiler.methodinstance(typeof(NativeBackend.kernel_w_global), Tuple{Vector{Int}, Int, Symbol})
@test check_presence(kernel_w_global_mi, token)

square_mi = GPUCompiler.methodinstance(typeof(NativeBackend.square), Tuple{Float64})
@test check_presence(square_mi, token)

# check that identity survived
@test check_presence(identity_mi, token) broken=VERSION>=v"1.12.0-DEV.1268"

Expand Down
2 changes: 1 addition & 1 deletion test/ptx/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ precompile_test_harness("Inference caching") do load_path
end
end) |> string)

Base.compilecache(Base.PkgId("PTXBackend"))
Base.compilecache(Base.PkgId("PTXBackend"), stderr, stdout)
@eval let
import PTXCompiler

Expand Down
Loading