Skip to content

Commit 48db336

Browse files
committed
reduce allocations in freqresp
Due to https://github.com/JuliaLang/julia/issues/44290 we incured a lot of allocations in `freqresp`
1 parent 383e0b6 commit 48db336

File tree

2 files changed

+65
-8
lines changed

2 files changed

+65
-8
lines changed

src/freqresp.jl

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ end
6767
@inbounds for i in eachindex(w_vec)
6868
R[:,:,i] .= sys.D
6969
end
70-
return PDT(R)
70+
return PDT(R)::PDT
7171
end
7272
local F, Q
7373
try
@@ -79,22 +79,77 @@ end
7979
rethrow()
8080
end
8181
A = F.H
82-
C = sys.C*Q
82+
C = complex.(sys.C*Q) # We make C complex in order to not incur allocations in mul! below
8383
B = Q\sys.B
8484
D = sys.D
8585

8686
te = sys.timeevol
8787
Bc = similar(B, T) # for storage
88+
w = -_freq(w_vec[1], te)
89+
u = Vector{typeof(zero(eltype(A.data))+w)}(undef, sys.nx)
90+
cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations
8891
@inbounds for i in eachindex(w_vec)
89-
Ri = @views R[:,:,i]
92+
Ri = @view(R[:, :, i])
9093
copyto!(Ri,D) # start with the D-matrix
9194
isinf(w_vec[i]) && continue
9295
copyto!(Bc,B) # initialize storage to B
9396
w = -_freq(w_vec[i], te)
94-
ldiv!(A, Bc, shift = w) # B += (A - w*I)\B # solve (A-wI)X = B, storing result in B
97+
ldiv2!(u, cs, A, Bc, shift = w) # B += (A - w*I)\B # solve (A-wI)X = B, storing result in B
9598
mul!(Ri, C, Bc, -1, 1) # use of 5-arg mul to subtract from D already in Ri. - rather than + since (A - w*I) instead of (w*I - A)
9699
end
97-
PDT(R) # PermutedDimsArray doesn't allocate to perform the permutation
100+
PDT(R)::PDT # PermutedDimsArray doesn't allocate to perform the permutation
101+
end
102+
103+
#=
104+
Custom implementation of hessenberg ldiv to allow reuse of u and cs
105+
With this method, the following benchmark goes from
106+
(100017 allocations: 11.48 MiB) # before
107+
to
108+
(17 allocations: 35.45 KiB) # after
109+
110+
w = exp10.(LinRange(-2, 2, 50000))
111+
G = ssrand(2,2,3)
112+
R = freqresp(G, w).parent;
113+
@btime ControlSystems.freqresp!($R, $G, $w);
114+
=#
115+
function ldiv2!(u, cs, F::UpperHessenberg, B::AbstractVecOrMat; shift::Number=false)
116+
LinearAlgebra.checksquare(F)
117+
m = size(F,1)
118+
m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m"))
119+
LinearAlgebra.require_one_based_indexing(B)
120+
n = size(B,2)
121+
H = F.data
122+
μ = shift
123+
copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m]
124+
u[m] += μ
125+
X = B # not a copy, just rename to match paper
126+
@inbounds for k = m:-1:2
127+
c, s, ρ = LinearAlgebra.givensAlgorithm(u[k], H[k,k-1])
128+
cs[k] = (c, s)
129+
for i = 1:n
130+
X[k,i] /= ρ
131+
t₁ = s * X[k,i]; t₂ = c * X[k,i]
132+
@simd for j = 1:k-2
133+
X[j,i] -= u[j]*t₂ + H[j,k-1]*t₁
134+
end
135+
X[k-1,i] -= u[k-1]*t₂ + (H[k-1,k-1] + μ) * t₁
136+
end
137+
@simd for j = 1:k-2
138+
u[j] = H[j,k-1]*c - u[j]*s'
139+
end
140+
u[k-1] = (H[k-1,k-1] + μ) * c - u[k-1]*s'
141+
end
142+
for i = 1:n
143+
τ₁ = X[1,i] / u[1]
144+
@inbounds for j = 2:m
145+
τ₂ = X[j,i]
146+
c, s = cs[j]
147+
X[j-1,i] = c*τ₁ + s*τ₂
148+
τ₁ = c*τ₂ - s'τ₁
149+
end
150+
X[m,i] = τ₁
151+
end
152+
return X
98153
end
99154

100155
function freqresp_nohess(sys::AbstractStateSpace, w_vec::AbstractVector{W}) where W <: Real
@@ -120,9 +175,10 @@ freqresp_nohess
120175
@inbounds for i in eachindex(w_vec)
121176
R[:,:,i] .= sys.D
122177
end
123-
return PDT(R)
178+
return PDT(R)::PDT
124179
end
125-
A,B,C,D = ssdata(sys)
180+
A,B,C0,D = ssdata(sys)
181+
C = complex.(C0) # We make C complex in order to not incur allocations in mul! below
126182
te = sys.timeevol
127183
Ac = (A+one(T)*I) # for storage
128184
Adiag = diagind(A)
@@ -136,7 +192,7 @@ freqresp_nohess
136192
Bc = Ac \ B # Bc = (A - w*I)\B # avoid inplace to handle sparse matrices etc.
137193
mul!(Ri, C, Bc, -1, 1) # use of 5-arg mul to subtract from D already in Ri. - rather than + since (A - w*I) instead of (w*I - A)
138194
end
139-
PDT(R) # PermutedDimsArray doesn't allocate to perform the permutation
195+
PDT(R)::PDT # PermutedDimsArray doesn't allocate to perform the permutation
140196
end
141197

142198

test/test_freqresp.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ resp2 = reshape((im*w .+ 2)./(im*w .+ 1), length(w), 1, 1)
6666
@inferred freqresp(sys2s, w)
6767
@inferred freqresp(G2, w)
6868
@inferred freqresp(H2, w)
69+
@test @allocated freqresp(sys2, w) < 1.2*305551488 # allow 20% increase due to compiler variations
6970

7071

7172
## Complex-coefficient system

0 commit comments

Comments
 (0)