Skip to content

Commit ee8a1bb

Browse files
authored
Merge pull request #649 from JuliaControl/freqresp_allocs
reduce allocations in freqresp
2 parents 383e0b6 + 2f57fc9 commit ee8a1bb

File tree

2 files changed

+67
-9
lines changed

2 files changed

+67
-9
lines changed

src/freqresp.jl

Lines changed: 66 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,34 +67,90 @@ end
6767
@inbounds for i in eachindex(w_vec)
6868
R[:,:,i] .= sys.D
6969
end
70-
return PDT(R)
70+
return PDT(R)::PDT
7171
end
7272
local F, Q
7373
try
7474
F = hessenberg(sys.A)
7575
Q = Matrix(F.Q)
7676
catch e
7777
# For matrix types that do not have a hessenberg implementation, we call the standard version of freqresp.
78-
e isa MethodError && return freqresp_nohess!(R, sys, w_vec)
78+
e isa Union{MethodError, ErrorException} && return freqresp_nohess!(R, sys, w_vec)
79+
# ErrorException appears if we try to access Q on a type which does not have Q as a field or property, notably HessenbergFactorization from GenericLinearAlgebra
7980
rethrow()
8081
end
8182
A = F.H
82-
C = sys.C*Q
83+
C = complex.(sys.C*Q) # We make C complex in order to not incur allocations in mul! below
8384
B = Q\sys.B
8485
D = sys.D
8586

8687
te = sys.timeevol
8788
Bc = similar(B, T) # for storage
89+
w = -_freq(w_vec[1], te)
90+
u = Vector{typeof(zero(eltype(A.data))+w)}(undef, sys.nx)
91+
cs = Vector{Tuple{real(eltype(u)),eltype(u)}}(undef, length(u)) # store Givens rotations
8892
@inbounds for i in eachindex(w_vec)
89-
Ri = @views R[:,:,i]
93+
Ri = @view(R[:, :, i])
9094
copyto!(Ri,D) # start with the D-matrix
9195
isinf(w_vec[i]) && continue
9296
copyto!(Bc,B) # initialize storage to B
9397
w = -_freq(w_vec[i], te)
94-
ldiv!(A, Bc, shift = w) # B += (A - w*I)\B # solve (A-wI)X = B, storing result in B
98+
ldiv2!(u, cs, A, Bc, shift = w) # B += (A - w*I)\B # solve (A-wI)X = B, storing result in B
9599
mul!(Ri, C, Bc, -1, 1) # use of 5-arg mul to subtract from D already in Ri. - rather than + since (A - w*I) instead of (w*I - A)
96100
end
97-
PDT(R) # PermutedDimsArray doesn't allocate to perform the permutation
101+
PDT(R)::PDT # PermutedDimsArray doesn't allocate to perform the permutation
102+
end
103+
104+
#=
105+
Custom implementation of hessenberg ldiv to allow reuse of u and cs
106+
With this method, the following benchmark goes from
107+
(100017 allocations: 11.48 MiB) # before
108+
to
109+
(17 allocations: 35.45 KiB) # after
110+
111+
w = exp10.(LinRange(-2, 2, 50000))
112+
G = ssrand(2,2,3)
113+
R = freqresp(G, w).parent;
114+
@btime ControlSystems.freqresp!($R, $G, $w);
115+
=#
116+
function ldiv2!(u, cs, F::UpperHessenberg, B::AbstractVecOrMat; shift::Number=false)
117+
LinearAlgebra.checksquare(F)
118+
m = size(F,1)
119+
m != size(B,1) && throw(DimensionMismatch("wrong right-hand-side # rows != $m"))
120+
LinearAlgebra.require_one_based_indexing(B)
121+
n = size(B,2)
122+
H = F.data
123+
μ = shift
124+
copyto!(u, 1, H, m*(m-1)+1, m) # u .= H[:,m]
125+
u[m] += μ
126+
X = B # not a copy, just rename to match paper
127+
@inbounds for k = m:-1:2
128+
c, s, ρ = LinearAlgebra.givensAlgorithm(u[k], H[k,k-1])
129+
cs[k] = (c, s)
130+
for i = 1:n
131+
X[k,i] /= ρ
132+
t₁ = s * X[k,i]; t₂ = c * X[k,i]
133+
@simd for j = 1:k-2
134+
X[j,i] -= u[j]*t₂ + H[j,k-1]*t₁
135+
end
136+
X[k-1,i] -= u[k-1]*t₂ + (H[k-1,k-1] + μ) * t₁
137+
end
138+
@simd for j = 1:k-2
139+
u[j] = H[j,k-1]*c - u[j]*s'
140+
end
141+
u[k-1] = (H[k-1,k-1] + μ) * c - u[k-1]*s'
142+
end
143+
for i = 1:n
144+
τ₁ = X[1,i] / u[1]
145+
@inbounds for j = 2:m
146+
τ₂ = X[j,i]
147+
c, s = cs[j]
148+
X[j-1,i] = c*τ₁ + s*τ₂
149+
τ₁ = c*τ₂ - s'τ₁
150+
end
151+
X[m,i] = τ₁
152+
end
153+
return X
98154
end
99155

100156
function freqresp_nohess(sys::AbstractStateSpace, w_vec::AbstractVector{W}) where W <: Real
@@ -120,9 +176,10 @@ freqresp_nohess
120176
@inbounds for i in eachindex(w_vec)
121177
R[:,:,i] .= sys.D
122178
end
123-
return PDT(R)
179+
return PDT(R)::PDT
124180
end
125-
A,B,C,D = ssdata(sys)
181+
A,B,C0,D = ssdata(sys)
182+
C = complex.(C0) # We make C complex in order to not incur allocations in mul! below
126183
te = sys.timeevol
127184
Ac = (A+one(T)*I) # for storage
128185
Adiag = diagind(A)
@@ -136,7 +193,7 @@ freqresp_nohess
136193
Bc = Ac \ B # Bc = (A - w*I)\B # avoid inplace to handle sparse matrices etc.
137194
mul!(Ri, C, Bc, -1, 1) # use of 5-arg mul to subtract from D already in Ri. - rather than + since (A - w*I) instead of (w*I - A)
138195
end
139-
PDT(R) # PermutedDimsArray doesn't allocate to perform the permutation
196+
PDT(R)::PDT # PermutedDimsArray doesn't allocate to perform the permutation
140197
end
141198

142199

test/test_freqresp.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ resp2 = reshape((im*w .+ 2)./(im*w .+ 1), length(w), 1, 1)
6666
@inferred freqresp(sys2s, w)
6767
@inferred freqresp(G2, w)
6868
@inferred freqresp(H2, w)
69+
@test (@allocated freqresp(sys2, w)) < 1.2*56672 # allow 20% increase due to compiler variations
6970

7071

7172
## Complex-coefficient system

0 commit comments

Comments
 (0)