From d1709e805bc02e79a6ffeff0b0ef21169b6bf038 Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Wed, 28 Jan 2026 19:11:49 -0500 Subject: [PATCH 1/8] Refactor nthreads parameter handling in fit.jl Removed default value for nthreads and adjusted related logic. --- src/fit.jl | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/fit.jl b/src/fit.jl index b4d6dad..6205070 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -53,7 +53,7 @@ function reg(df, weights::Union{Symbol, Nothing} = nothing, save::Union{Bool, Symbol} = :none, method::Symbol = :cpu, - nthreads::Integer = method == :cpu ? Threads.nthreads() : 256, + nthreads::Union{Integer, Nothing} = nothing, double_precision::Bool = method == :cpu, tol::Real = 1e-6, maxiter::Integer = 10000, @@ -72,7 +72,7 @@ function StatsAPI.fit(::Type{FixedEffectModel}, @nospecialize(weights::Union{Symbol, Nothing} = nothing), @nospecialize(save::Union{Bool, Symbol} = :none), @nospecialize(method::Symbol = :cpu), - @nospecialize(nthreads::Integer = method == :cpu ? Threads.nthreads() : 256), + @nospecialize(nthreads::Union{Integer, Nothing} = nothing, @nospecialize(double_precision::Bool = true), @nospecialize(tol::Real = 1e-6), @nospecialize(maxiter::Integer = 10000), @@ -105,11 +105,6 @@ function StatsAPI.fit(::Type{FixedEffectModel}, end save_residuals = (save == :residuals) | (save == :all) - if method == :cpu && nthreads > Threads.nthreads() - @warn "Keyword argument nthreads = $(nthreads) is ignored (Julia was started with only $(Threads.nthreads()) threads)." - nthreads = Threads.nthreads() - end - ############################################################################## ## ## Parse formula @@ -166,7 +161,7 @@ function StatsAPI.fit(::Type{FixedEffectModel}, n_singletons = 0 if drop_singletons - n_singletons = drop_singletons!(esample, fes, nthreads) + n_singletons = drop_singletons!(esample, fes) end nobs = sum(esample) From e51c296267f0f2e7aeb8f1334e5ce9fd86fb859a Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Wed, 28 Jan 2026 19:13:07 -0500 Subject: [PATCH 2/8] Remove nthreads parameter from drop_singletons! function --- src/utils/fixedeffects.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/fixedeffects.jl b/src/utils/fixedeffects.jl index 0e2a77e..ae2a8a4 100644 --- a/src/utils/fixedeffects.jl +++ b/src/utils/fixedeffects.jl @@ -4,7 +4,7 @@ ## ############################################################################## -function drop_singletons!(esample, fes::Vector{<:FixedEffect}, nthreads) +function drop_singletons!(esample, fes::Vector{<:FixedEffect}) nsingletons = 0 ncleanpasses = 0 caches = [Vector{UInt8}(undef, fes[i].n) for i in eachindex(fes)] From 1506494b0a9ace684eb808521fbaff3bb8130f47 Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Wed, 28 Jan 2026 19:18:59 -0500 Subject: [PATCH 3/8] Update default double_precision based on method Change default value of double_precision based on method. --- src/fit.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fit.jl b/src/fit.jl index 6205070..e5dd428 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -11,7 +11,6 @@ Estimate a linear model with high dimensional categorical variables / instrument * `weights::Union{Nothing, Symbol}` A symbol to refer to a columns for weights * `save::Symbol`: Should residuals and eventual estimated fixed effects saved in a dataframe? Default to `:none` Use `save = :residuals` to only save residuals, `save = :fe` to only save fixed effects, `save = :all` for both. Once saved, they can then be accessed using `residuals(m)` or `fe(m)` where `m` is the object returned by the estimation. The returned DataFrame is automatically aligned with the original DataFrame. * `method::Symbol`: A symbol for the method. Default is :cpu. Alternatively, use :CUDA or :Metal (in this case, you need to import the respective package before importing FixedEffectModels) -* `nthreads::Integer` Number of threads to use in the estimation. If `method = :cpu`, defaults to `Threads.nthreads()`. Otherwise, defaults to 256. * `double_precision::Bool`: Should the demeaning operation use Float64 rather than Float32? Default to true if `method =:cpu' and false if `method = :CUDA` or `method = :Metal`. * `tol::Real` Tolerance. Default to 1e-6. * `maxiter::Integer = 10000`: Maximum number of iterations @@ -73,7 +72,7 @@ function StatsAPI.fit(::Type{FixedEffectModel}, @nospecialize(save::Union{Bool, Symbol} = :none), @nospecialize(method::Symbol = :cpu), @nospecialize(nthreads::Union{Integer, Nothing} = nothing, - @nospecialize(double_precision::Bool = true), + @nospecialize(double_precision::Bool = method == :cpu), @nospecialize(tol::Real = 1e-6), @nospecialize(maxiter::Integer = 10000), @nospecialize(drop_singletons::Bool = true), From bfd473489a823e37d65b0c5874f0ae220b24442f Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Thu, 29 Jan 2026 21:15:55 -0500 Subject: [PATCH 4/8] Bump version to 1.13.0 and update FixedEffects compatibility Updated version and dependency compatibility for FixedEffects. --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 067a8c7..4f92e50 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "FixedEffectModels" uuid = "9d5cd8c9-2029-5cab-9928-427838db53e3" -version = "1.12.2" +version = "1.13.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" @@ -19,7 +19,7 @@ Vcov = "ec2bfdc2-55df-4fc9-b9ae-4958c2cf2486" [compat] DataFrames = "0.21, 0.22, 1" -FixedEffects = "2.3" +FixedEffects = "2.6" PrecompileTools = "1" Reexport = "0.1, 0.2, 1" Statistics = "1" From b5fdc1fc3d11ea3f3a8e8b0b70a70b7c696faa35 Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Thu, 29 Jan 2026 21:20:29 -0500 Subject: [PATCH 5/8] Remove deprecated nthreads argument and update solver Updated fit.jl to remove deprecated nthreads argument and adjust AbstractFixedEffectSolver instantiation. --- src/fit.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/fit.jl b/src/fit.jl index e5dd428..ea18e21 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -93,7 +93,9 @@ function StatsAPI.fit(::Type{FixedEffectModel}, info("method = :gpu is deprecated. Use method = :CUDA or method = :Metal") method = :CUDA end - + if nthreads !== nothing + info("The keyword argument nthreads is deprecated. Multiple threads are now used by default.") + end if save == true save = :all elseif save == false @@ -234,7 +236,7 @@ function StatsAPI.fit(::Type{FixedEffectModel}, sumsquares_pre = [sum(abs2, x) for x in cols] # partial out fixed effects - feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(subfes, weights, Val{method}, nthreads) + feM = AbstractFixedEffectSolver{double_precision ? Float64 : Float32}(subfes, weights, Val{method}) # partial out fixed effects _, iterations, convergeds = solve_residuals!(cols, feM; maxiter = maxiter, tol = tol, progress_bar = progress_bar) From 7710989f805764d00e473344c1f8344ff9e715e9 Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Thu, 29 Jan 2026 21:24:19 -0500 Subject: [PATCH 6/8] Update multi-threading explanation in README Clarified the usage of multi-threading in FixedEffectModels. --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 835cf8f..6808562 100755 --- a/README.md +++ b/README.md @@ -119,8 +119,7 @@ Finally, you can use [RegressionTables.jl](https://github.com/jmboehm/Regression ## Performances -### MultiThreads -`FixedEffectModels` is multi-threaded. Use the option `nthreads` to select the number of threads to use in the estimation (defaults to `Threads.nthreads()`). +`FixedEffectModels` is multi-threaded by default. Launch Julia with multiple threads to benefit from the speedup. You can verify how many threads are available in your session with `Threads.nthreads()`. ### GPUs The package has an experimental support for GPUs. This can make the package 2x-5x faster for complicated problems. From fbc9e309c0e08770dcf3b594ee329c0586391147 Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Thu, 29 Jan 2026 21:35:40 -0500 Subject: [PATCH 7/8] Refactor fit function to remove nthreads parameter Removed 'nthreads' from the fit function call parameters. --- src/fit.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fit.jl b/src/fit.jl index ea18e21..8df0e16 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -60,7 +60,7 @@ function reg(df, progress_bar::Bool = true, subset::Union{Nothing, AbstractVector} = nothing, first_stage::Bool = true) - StatsAPI.fit(FixedEffectModel, formula, df, vcov; contrasts = contrasts, weights = weights, save = save, method = method, nthreads = nthreads, double_precision = double_precision, tol = tol, maxiter = maxiter, drop_singletons = drop_singletons, progress_bar = progress_bar, subset = subset, first_stage = first_stage) + StatsAPI.fit(FixedEffectModel, formula, df, vcov; contrasts = contrasts, weights = weights, save = save, method = method, double_precision = double_precision, tol = tol, maxiter = maxiter, drop_singletons = drop_singletons, progress_bar = progress_bar, subset = subset, first_stage = first_stage) end function StatsAPI.fit(::Type{FixedEffectModel}, @@ -71,7 +71,7 @@ function StatsAPI.fit(::Type{FixedEffectModel}, @nospecialize(weights::Union{Symbol, Nothing} = nothing), @nospecialize(save::Union{Bool, Symbol} = :none), @nospecialize(method::Symbol = :cpu), - @nospecialize(nthreads::Union{Integer, Nothing} = nothing, + @nospecialize(nthreads::Union{Integer, Nothing} = nothing), @nospecialize(double_precision::Bool = method == :cpu), @nospecialize(tol::Real = 1e-6), @nospecialize(maxiter::Integer = 10000), From 0fbc200e31bff4243a5522333a4fcd3a9af122e4 Mon Sep 17 00:00:00 2001 From: Matthieu Gomez Date: Thu, 29 Jan 2026 22:04:01 -0500 Subject: [PATCH 8/8] update benchmarks --- benchmark/benchmark.jl | 28 +++++++++++++--------------- benchmark/benchmark.md | 2 +- benchmark/benchmark_Metal.jl | 20 ++++++++++++++++++++ benchmark/result.jl | 2 +- 4 files changed, 35 insertions(+), 17 deletions(-) create mode 100755 benchmark/benchmark_Metal.jl diff --git a/benchmark/benchmark.jl b/benchmark/benchmark.jl index b852052..f517866 100755 --- a/benchmark/benchmark.jl +++ b/benchmark/benchmark.jl @@ -1,6 +1,6 @@ using DataFrames, Random, CategoricalArrays @time using FixedEffectModels -# 13s precompiling +# 0.418712 seconds (742.49 k allocations: 45.500 MiB, 4.07% gc time, 1.07% compilation time) # Very simple setup N = 10000000 K = 100 @@ -12,21 +12,21 @@ y= 3 .* x1 .+ 5 .* x2 .+ cos.(id1) .+ cos.(id2).^2 .+ randn(N) df = DataFrame(id1 = id1, id2 = id2, x1 = x1, x2 = x2, y = y) # first time @time reg(df, @formula(y ~ x1 + x2)) -# 1.823s +# 1.810739 seconds (14.05 M allocations: 1.583 GiB, 3.76% gc time, 84.28% compilation time: 91% of which was recompilation) @time reg(df, @formula(y ~ x1 + x2)) -# 0.353469 seconds (441 allocations: 691.439 MiB, 3.65% gc time) +# 0.288344 seconds (712 allocations: 920.405 MiB, 18.71% gc time) @time reg(df, @formula(y ~ x1 + x2), Vcov.cluster(:id2)) -# 0.763999 seconds (2.96 M allocations: 967.418 MiB, 2.29% gc time, 54.39% compilation time: 5% of which was recompilation) +# 0.528590 seconds (1.33 M allocations: 1.039 GiB, 8.76% gc time, 55.33% compilation time) @time reg(df, @formula(y ~ x1 + x2), Vcov.cluster(:id2)) -# 0.401544 seconds (622 allocations: 768.943 MiB, 3.52% gc time) +# 0.268619 seconds (879 allocations: 997.916 MiB, 15.08% gc time) @time reg(df, @formula(y ~ x1 + x2 + fe(id1))) -# 0.893835 seconds (1.03 k allocations: 929.130 MiB, 54.19% gc time) +# 0.824536 seconds (3.09 M allocations: 1.426 GiB, 5.37% gc time, 61.39% compilation time: 3% of which was recompilation) @time reg(df, @formula(y ~ x1 + x2 + fe(id1))) -# 0.474160 seconds (1.13 k allocations: 933.340 MiB, 1.74% gc time) +# 0.356793 seconds (1.41 k allocations: 1.276 GiB, 19.31% gc time) @time reg(df, @formula(y ~ x1 + x2 + fe(id1)), Vcov.cluster(:id1)) -# 0.598816 seconds (261.08 k allocations: 1.007 GiB, 8.29% gc time, 9.21% compilation time) +# 0.435500 seconds (495.96 k allocations: 1.381 GiB, 15.97% gc time, 10.72% compilation time) @time reg(df, @formula(y ~ x1 + x2 + fe(id1) + fe(id2))) -# 1.584573 seconds (489.64 k allocations: 1.094 GiB, 2.10% gc time, 8.53% compilation time) +# 1.367264 seconds (1.91 M allocations: 1.592 GiB, 5.74% gc time, 23.85% compilation time: 20% of which was recompilation) # More complicated setup N = 800000 # number of observations @@ -39,9 +39,7 @@ x2 = cos.(id1) + sin.(id2) + randn(N) y= 3 .* x1 .+ 5 .* x2 .+ cos.(id1) .+ cos.(id2).^2 .+ randn(N) df = DataFrame(id1 = id1, id2 = id2, x1 = x1, x2 = x2, y = y) @time reg(df, @formula(y ~ x1 + x2 + fe(id1) + fe(id2))) -# 2.504294 seconds (75.83 k allocations: 95.525 MiB, 0.23% gc time) -# for some reason in 1.10 I now get worse time (iter 200) -# 4.709078 seconds (108.98 k allocations: 101.417 MiB) +# 1.546023 seconds (19.89 k allocations: 119.673 MiB, 1.70% gc time) @@ -57,8 +55,8 @@ X1 = rand(n) ln_y = 3 .* X1 .+ rand(n) df = DataFrame(X1 = X1, ln_y = ln_y, id1 = id1, id2 = id2, id3 = id3) @time reg(df, @formula(ln_y ~ X1 + fe(id1)), Vcov.cluster(:id1)) -# 0.543996 seconds (873 allocations: 815.677 MiB, 34.15% gc time) +# 0.311420 seconds (1.35 k allocations: 1.052 GiB, 22.30% gc time) @time reg(df, @formula(ln_y ~ X1 + fe(id1) + fe(id2)), Vcov.cluster(:id1)) -# 1.301908 seconds (3.03 k allocations: 968.729 MiB, 25.84% gc time) +# 0.808992 seconds (3.52 k allocations: 1.272 GiB, 8.68% gc time) @time reg(df, @formula(ln_y ~ X1 + fe(id1) + fe(id2) + fe(id3)), Vcov.cluster(:id1)) -# 1.658832 seconds (4.17 k allocations: 1.095 GiB, 29.78% gc time) +# 0.950808 seconds (4.75 k allocations: 1.496 GiB, 7.48% gc time) diff --git a/benchmark/benchmark.md b/benchmark/benchmark.md index 3217393..3b9fb49 100755 --- a/benchmark/benchmark.md +++ b/benchmark/benchmark.md @@ -6,7 +6,7 @@ Code to reproduce this graph: FixedEffectModels.jl v1.9.0 (Julia 1.9) ```julia using DataFrames, CategoricalArrays, FixedEffectModels - N = 10000000 + N = 10_000_000 K = 100 id1 = rand(1:(N/K), N) id2 = rand(1:K, N) diff --git a/benchmark/benchmark_Metal.jl b/benchmark/benchmark_Metal.jl new file mode 100755 index 0000000..89dffad --- /dev/null +++ b/benchmark/benchmark_Metal.jl @@ -0,0 +1,20 @@ +using DataFrames, Random, Metal, FixedEffectModels +Random.seed!(1234) +# More complicated setup +N = 8_000_000 # number of observations +M = 400_000 # number of workers +O = 50_000 # number of firms +id1 = rand(1:M, N) +id2 = [rand(max(1, div(x, 8)-10):min(O, div(x, 8)+10)) for x in id1] +x1 = 5 * cos.(id1) + 5 * sin.(id2) + randn(N) +x2 = cos.(id1) + sin.(id2) + randn(N) +x3 = cos.(id1) + sin.(id2) + randn(N) +y= 3 .* x1 .+ 5 .* x2 .+ cos.(id1) .+ cos.(id2).^2 .+ randn(N) +df = DataFrame(id1 = id1, id2 = id2, x1 = x1, x2 = x2, x3 = x3, y = y) +@time reg(df, @formula(y ~ x1 + x2 + x3 + fe(id1) + fe(id2)), maxiter = 200, double_precision = false) +# 30.002852 seconds (66.42 M allocations: 4.917 GiB, 0.83% gc time, 16.25% compilation time: 6% of which was recompilation) +@time reg(df, @formula(y ~ x1 + x2 + x3 + fe(id1) + fe(id2)), method = :Metal, maxiter = 200) +# 16.634684 seconds (3.50 M allocations: 1.407 GiB, 0.69% gc time, 1.49% compilation time: <1% of which was recompilation) + + + diff --git a/benchmark/result.jl b/benchmark/result.jl index 07ad850..bebb8a4 100644 --- a/benchmark/result.jl +++ b/benchmark/result.jl @@ -1 +1 @@ -using DataFrames, CSV, Gadfly df = CSV.read("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/benchmark.csv", DataFrame) df."fixest (R)" = df."fixest (R)" ./ df."FixedEffectModels.jl (Julia)" df."lfe (R)" = df."lfe (R)" ./ df."FixedEffectModels.jl (Julia)" df."reghdfe (Stata)" = df."reghdfe (Stata)" ./ df."FixedEffectModels.jl (Julia)" df."FixedEffectModels.jl (Julia)" = df."FixedEffectModels.jl (Julia)" ./ df."FixedEffectModels.jl (Julia)" mdf = stack(df, Not([:Command, :Order])) mdf = rename(mdf, :variable => :Language) p = plot(mdf, x = "Command", y = "value", color = "Language", Guide.ylabel("Time (Ratio to Julia)"), Guide.xlabel("Command"), Scale.y_log10) draw(PNG("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/fixedeffectmodels_benchmark.png", 8inch, 5inch, dpi=300), p) \ No newline at end of file +using DataFrames, CSV, StatsPlots df = CSV.read("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/benchmark2.csv", DataFrame) df."fixest (R)" = df."fixest (R)" ./ df."FixedEffectModels.jl (Julia)" df."lfe (R)" = df."lfe (R)" ./ df."FixedEffectModels.jl (Julia)" df."reg / reghdfe (Stata)" = df."reg / reghdfe (Stata)" ./ df."FixedEffectModels.jl (Julia)" df."FixedEffectModels.jl (Julia)" = df."FixedEffectModels.jl (Julia)" ./ df."FixedEffectModels.jl (Julia)" mdf = stack(df, Not([:Command, :Order])) mdf = rename(mdf, :variable => :Language) p = @df mdf plot( :Command, :value, group = :Language, yaxis = :log10, xlabel = "Command", ylabel = "Time (Ratio to Julia)", legend = :top, seriestype = :scatter, # or :line / :path depending on what you want palette = :tol_light, dpi = 200, size=(8 * 100 * 2 /3, 5 * 100 * 2 /3) ) savefig("/Users/matthieugomez/Dropbox/Github/FixedEffectModels.jl/benchmark/fixedeffectmodels_benchmark.png") \ No newline at end of file