Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "FixedEffectModels"
uuid = "9d5cd8c9-2029-5cab-9928-427838db53e3"
version = "1.12.2"
version = "1.13.0"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Expand All @@ -19,7 +19,7 @@ Vcov = "ec2bfdc2-55df-4fc9-b9ae-4958c2cf2486"

[compat]
DataFrames = "0.21, 0.22, 1"
FixedEffects = "2.3"
FixedEffects = "2.6"
PrecompileTools = "1"
Reexport = "0.1, 0.2, 1"
Statistics = "1"
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ Finally, you can use [RegressionTables.jl](https://github.com/jmboehm/Regression

## Performances

### MultiThreads
`FixedEffectModels` is multi-threaded. Use the option `nthreads` to select the number of threads to use in the estimation (defaults to `Threads.nthreads()`).
`FixedEffectModels` is multi-threaded by default. Launch Julia with multiple threads to benefit from the speedup. You can verify how many threads are available in your session with `Threads.nthreads()`.

### GPUs
The package has an experimental support for GPUs. This can make the package 2x-5x faster for complicated problems.
Expand Down
28 changes: 13 additions & 15 deletions benchmark/benchmark.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using DataFrames, Random, CategoricalArrays
@time using FixedEffectModels
# 13s precompiling
# 0.418712 seconds (742.49 k allocations: 45.500 MiB, 4.07% gc time, 1.07% compilation time)
# Very simple setup
N = 10000000
K = 100
Expand All @@ -12,21 +12,21 @@ y= 3 .* x1 .+ 5 .* x2 .+ cos.(id1) .+ cos.(id2).^2 .+ randn(N)
df = DataFrame(id1 = id1, id2 = id2, x1 = x1, x2 = x2, y = y)
# first time
@time reg(df, @formula(y ~ x1 + x2))
# 1.823s
# 1.810739 seconds (14.05 M allocations: 1.583 GiB, 3.76% gc time, 84.28% compilation time: 91% of which was recompilation)
@time reg(df, @formula(y ~ x1 + x2))
# 0.353469 seconds (441 allocations: 691.439 MiB, 3.65% gc time)
# 0.288344 seconds (712 allocations: 920.405 MiB, 18.71% gc time)
@time reg(df, @formula(y ~ x1 + x2), Vcov.cluster(:id2))
# 0.763999 seconds (2.96 M allocations: 967.418 MiB, 2.29% gc time, 54.39% compilation time: 5% of which was recompilation)
# 0.528590 seconds (1.33 M allocations: 1.039 GiB, 8.76% gc time, 55.33% compilation time)
@time reg(df, @formula(y ~ x1 + x2), Vcov.cluster(:id2))
# 0.401544 seconds (622 allocations: 768.943 MiB, 3.52% gc time)
# 0.268619 seconds (879 allocations: 997.916 MiB, 15.08% gc time)
@time reg(df, @formula(y ~ x1 + x2 + fe(id1)))
# 0.893835 seconds (1.03 k allocations: 929.130 MiB, 54.19% gc time)
# 0.824536 seconds (3.09 M allocations: 1.426 GiB, 5.37% gc time, 61.39% compilation time: 3% of which was recompilation)
@time reg(df, @formula(y ~ x1 + x2 + fe(id1)))
# 0.474160 seconds (1.13 k allocations: 933.340 MiB, 1.74% gc time)
# 0.356793 seconds (1.41 k allocations: 1.276 GiB, 19.31% gc time)
@time reg(df, @formula(y ~ x1 + x2 + fe(id1)), Vcov.cluster(:id1))
# 0.598816 seconds (261.08 k allocations: 1.007 GiB, 8.29% gc time, 9.21% compilation time)
# 0.435500 seconds (495.96 k allocations: 1.381 GiB, 15.97% gc time, 10.72% compilation time)
@time reg(df, @formula(y ~ x1 + x2 + fe(id1) + fe(id2)))
# 1.584573 seconds (489.64 k allocations: 1.094 GiB, 2.10% gc time, 8.53% compilation time)
# 1.367264 seconds (1.91 M allocations: 1.592 GiB, 5.74% gc time, 23.85% compilation time: 20% of which was recompilation)

# More complicated setup
N = 800000 # number of observations
Expand All @@ -39,9 +39,7 @@ x2 = cos.(id1) + sin.(id2) + randn(N)
y= 3 .* x1 .+ 5 .* x2 .+ cos.(id1) .+ cos.(id2).^2 .+ randn(N)
df = DataFrame(id1 = id1, id2 = id2, x1 = x1, x2 = x2, y = y)
@time reg(df, @formula(y ~ x1 + x2 + fe(id1) + fe(id2)))
# 2.504294 seconds (75.83 k allocations: 95.525 MiB, 0.23% gc time)
# for some reason in 1.10 I now get worse time (iter 200)
# 4.709078 seconds (108.98 k allocations: 101.417 MiB)
# 1.546023 seconds (19.89 k allocations: 119.673 MiB, 1.70% gc time)



Expand All @@ -57,8 +55,8 @@ X1 = rand(n)
ln_y = 3 .* X1 .+ rand(n)
df = DataFrame(X1 = X1, ln_y = ln_y, id1 = id1, id2 = id2, id3 = id3)
@time reg(df, @formula(ln_y ~ X1 + fe(id1)), Vcov.cluster(:id1))
# 0.543996 seconds (873 allocations: 815.677 MiB, 34.15% gc time)
# 0.311420 seconds (1.35 k allocations: 1.052 GiB, 22.30% gc time)
@time reg(df, @formula(ln_y ~ X1 + fe(id1) + fe(id2)), Vcov.cluster(:id1))
# 1.301908 seconds (3.03 k allocations: 968.729 MiB, 25.84% gc time)
# 0.808992 seconds (3.52 k allocations: 1.272 GiB, 8.68% gc time)
@time reg(df, @formula(ln_y ~ X1 + fe(id1) + fe(id2) + fe(id3)), Vcov.cluster(:id1))
# 1.658832 seconds (4.17 k allocations: 1.095 GiB, 29.78% gc time)
# 0.950808 seconds (4.75 k allocations: 1.496 GiB, 7.48% gc time)
2 changes: 1 addition & 1 deletion benchmark/benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Code to reproduce this graph:
FixedEffectModels.jl v1.9.0 (Julia 1.9)
```julia
using DataFrames, CategoricalArrays, FixedEffectModels
N = 10000000
N = 10_000_000
K = 100
id1 = rand(1:(N/K), N)
id2 = rand(1:K, N)
Expand Down
20 changes: 20 additions & 0 deletions benchmark/benchmark_Metal.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using DataFrames, Random, Metal, FixedEffectModels
Random.seed!(1234)
# More complicated setup
N = 8_000_000 # number of observations
M = 400_000 # number of workers
O = 50_000 # number of firms
id1 = rand(1:M, N)
id2 = [rand(max(1, div(x, 8)-10):min(O, div(x, 8)+10)) for x in id1]
x1 = 5 * cos.(id1) + 5 * sin.(id2) + randn(N)
x2 = cos.(id1) + sin.(id2) + randn(N)
x3 = cos.(id1) + sin.(id2) + randn(N)
y= 3 .* x1 .+ 5 .* x2 .+ cos.(id1) .+ cos.(id2).^2 .+ randn(N)
df = DataFrame(id1 = id1, id2 = id2, x1 = x1, x2 = x2, x3 = x3, y = y)
@time reg(df, @formula(y ~ x1 + x2 + x3 + fe(id1) + fe(id2)), maxiter = 200, double_precision = false)
# 30.002852 seconds (66.42 M allocations: 4.917 GiB, 0.83% gc time, 16.25% compilation time: 6% of which was recompilation)
@time reg(df, @formula(y ~ x1 + x2 + x3 + fe(id1) + fe(id2)), method = :Metal, maxiter = 200)
# 16.634684 seconds (3.50 M allocations: 1.407 GiB, 0.69% gc time, 1.49% compilation time: <1% of which was recompilation)



2 changes: 1 addition & 1 deletion benchmark/result.jl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
using DataFrames, CSV, Gadflydf = CSV.read("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/benchmark.csv", DataFrame)df."fixest (R)" = df."fixest (R)" ./ df."FixedEffectModels.jl (Julia)"df."lfe (R)" = df."lfe (R)" ./ df."FixedEffectModels.jl (Julia)"df."reghdfe (Stata)" = df."reghdfe (Stata)" ./ df."FixedEffectModels.jl (Julia)"df."FixedEffectModels.jl (Julia)" = df."FixedEffectModels.jl (Julia)" ./ df."FixedEffectModels.jl (Julia)"mdf = stack(df, Not([:Command, :Order]))mdf = rename(mdf, :variable => :Language)p = plot(mdf, x = "Command", y = "value", color = "Language", Guide.ylabel("Time (Ratio to Julia)"), Guide.xlabel("Command"), Scale.y_log10)draw(PNG("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/fixedeffectmodels_benchmark.png", 8inch, 5inch, dpi=300), p)
using DataFrames, CSV, StatsPlotsdf = CSV.read("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/benchmark2.csv", DataFrame)df."fixest (R)" = df."fixest (R)" ./ df."FixedEffectModels.jl (Julia)"df."lfe (R)" = df."lfe (R)" ./ df."FixedEffectModels.jl (Julia)"df."reg / reghdfe (Stata)" = df."reg / reghdfe (Stata)" ./ df."FixedEffectModels.jl (Julia)"df."FixedEffectModels.jl (Julia)" = df."FixedEffectModels.jl (Julia)" ./ df."FixedEffectModels.jl (Julia)"mdf = stack(df, Not([:Command, :Order]))mdf = rename(mdf, :variable => :Language)p = @df mdf plot( :Command, :value, group = :Language, yaxis = :log10, xlabel = "Command", ylabel = "Time (Ratio to Julia)", legend = :top, seriestype = :scatter, # or :line / :path depending on what you want palette = :tol_light, dpi = 200, size=(8 * 100 * 2 /3, 5 * 100 * 2 /3))savefig("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/fixedeffectmodels_benchmark.png")
Expand Down
22 changes: 9 additions & 13 deletions src/fit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ Estimate a linear model with high dimensional categorical variables / instrument
* `weights::Union{Nothing, Symbol}` A symbol to refer to a columns for weights
* `save::Symbol`: Should residuals and eventual estimated fixed effects saved in a dataframe? Default to `:none` Use `save = :residuals` to only save residuals, `save = :fe` to only save fixed effects, `save = :all` for both. Once saved, they can then be accessed using `residuals(m)` or `fe(m)` where `m` is the object returned by the estimation. The returned DataFrame is automatically aligned with the original DataFrame.
* `method::Symbol`: A symbol for the method. Default is :cpu. Alternatively, use :CUDA or :Metal (in this case, you need to import the respective package before importing FixedEffectModels)
* `nthreads::Integer` Number of threads to use in the estimation. If `method = :cpu`, defaults to `Threads.nthreads()`. Otherwise, defaults to 256.
* `double_precision::Bool`: Should the demeaning operation use Float64 rather than Float32? Default to true if `method =:cpu' and false if `method = :CUDA` or `method = :Metal`.
* `tol::Real` Tolerance. Default to 1e-6.
* `maxiter::Integer = 10000`: Maximum number of iterations
Expand Down Expand Up @@ -53,15 +52,15 @@ function reg(df,
weights::Union{Symbol, Nothing} = nothing,
save::Union{Bool, Symbol} = :none,
method::Symbol = :cpu,
nthreads::Integer = method == :cpu ? Threads.nthreads() : 256,
nthreads::Union{Integer, Nothing} = nothing,
double_precision::Bool = method == :cpu,
tol::Real = 1e-6,
maxiter::Integer = 10000,
drop_singletons::Bool = true,
progress_bar::Bool = true,
subset::Union{Nothing, AbstractVector} = nothing,
first_stage::Bool = true)
StatsAPI.fit(FixedEffectModel, formula, df, vcov; contrasts = contrasts, weights = weights, save = save, method = method, nthreads = nthreads, double_precision = double_precision, tol = tol, maxiter = maxiter, drop_singletons = drop_singletons, progress_bar = progress_bar, subset = subset, first_stage = first_stage)
StatsAPI.fit(FixedEffectModel, formula, df, vcov; contrasts = contrasts, weights = weights, save = save, method = method, double_precision = double_precision, tol = tol, maxiter = maxiter, drop_singletons = drop_singletons, progress_bar = progress_bar, subset = subset, first_stage = first_stage)
end

function StatsAPI.fit(::Type{FixedEffectModel},
Expand All @@ -72,8 +71,8 @@ function StatsAPI.fit(::Type{FixedEffectModel},
@nospecialize(weights::Union{Symbol, Nothing} = nothing),
@nospecialize(save::Union{Bool, Symbol} = :none),
@nospecialize(method::Symbol = :cpu),
@nospecialize(nthreads::Integer = method == :cpu ? Threads.nthreads() : 256),
@nospecialize(double_precision::Bool = true),
@nospecialize(nthreads::Union{Integer, Nothing} = nothing),
@nospecialize(double_precision::Bool = method == :cpu),
@nospecialize(tol::Real = 1e-6),
@nospecialize(maxiter::Integer = 10000),
@nospecialize(drop_singletons::Bool = true),
Expand All @@ -94,7 +93,9 @@ function StatsAPI.fit(::Type{FixedEffectModel},
info("method = :gpu is deprecated. Use method = :CUDA or method = :Metal")
method = :CUDA
end

if nthreads !== nothing
info("The keyword argument nthreads is deprecated. Multiple threads are now used by default.")
end
if save == true
save = :all
elseif save == false
Expand All @@ -105,11 +106,6 @@ function StatsAPI.fit(::Type{FixedEffectModel},
end
save_residuals = (save == :residuals) | (save == :all)

if method == :cpu && nthreads > Threads.nthreads()
@warn "Keyword argument nthreads = $(nthreads) is ignored (Julia was started with only $(Threads.nthreads()) threads)."
nthreads = Threads.nthreads()
end

##############################################################################
##
## Parse formula
Expand Down Expand Up @@ -166,7 +162,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},

n_singletons = 0
if drop_singletons
n_singletons = drop_singletons!(esample, fes, nthreads)
n_singletons = drop_singletons!(esample, fes)
end

nobs = sum(esample)
Expand Down Expand Up @@ -240,7 +236,7 @@ function StatsAPI.fit(::Type{FixedEffectModel},
sumsquares_pre = [sum(abs2, x) for x in cols]

# partial out fixed effects
feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(subfes, weights, Val{method}, nthreads)
feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(subfes, weights, Val{method})

# partial out fixed effects
_, iterations, convergeds = solve_residuals!(cols, feM; maxiter = maxiter, tol = tol, progress_bar = progress_bar)
Expand Down
2 changes: 1 addition & 1 deletion src/utils/fixedeffects.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
##
##############################################################################

function drop_singletons!(esample, fes::Vector{<:FixedEffect}, nthreads)
function drop_singletons!(esample, fes::Vector{<:FixedEffect})
nsingletons = 0
ncleanpasses = 0
caches = [Vector{UInt8}(undef, fes[i].n) for i in eachindex(fes)]
Expand Down