diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 0a5f97889196c..2711bba5cd3ac 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -707,25 +707,32 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat # Compute U and V: Even/odd terms in Padé numerator & denom # Expansion of k=1 in for loop P = A2 - U = mul!(C[4]*P, true, C[2]*I, true, true) #U = C[2]*I + C[4]*P - V = mul!(C[3]*P, true, C[1]*I, true, true) #V = C[1]*I + C[3]*P + U = similar(P) + V = similar(P) + for ind in CartesianIndices(P) + U[ind] = C[4]*P[ind] + C[2]*I[ind] + V[ind] = C[3]*P[ind] + C[1]*I[ind] + end for k in 2:(div(length(C), 2) - 1) P *= A2 - for ind in eachindex(P) + for ind in eachindex(P, U, V) U[ind] += C[2k + 2] * P[ind] V[ind] += C[2k + 1] * P[ind] end end - U = A * U + # U = A * U, but we overwrite P to avoid an allocation + mul!(P, A, U) + # P may be seen as an alias for U in the following code # Padé approximant: (V-U)\(V+U) - tmp1, tmp2 = A, A2 # Reuse already allocated arrays - for ind in eachindex(tmp1) - tmp1[ind] = V[ind] - U[ind] - tmp2[ind] = V[ind] + U[ind] + VminU, VplusU = V, U # Reuse already allocated arrays + for ind in eachindex(V, U) + vi, ui = V[ind], P[ind] + VminU[ind] = vi - ui + VplusU[ind] = vi + ui end - X = LAPACK.gesv!(tmp1, tmp2)[1] + X = LAPACK.gesv!(VminU, VplusU)[1] else s = log2(nA/5.4) # power of 2 later reversed by squaring if s > 0 @@ -793,10 +800,14 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat end if ilo > 1 # apply lower permutations in reverse order - for j in (ilo-1):-1:1; rcswap!(j, Int(scale[j]), X) end + for j in (ilo-1):-1:1 + rcswap!(j, Int(scale[j]), X) + end end if ihi < n # apply upper permutations in forward order - for j in (ihi+1):n; rcswap!(j, Int(scale[j]), X) end + for j in (ihi+1):n + rcswap!(j, Int(scale[j]), X) + end end X end