Skip to content

Commit

Permalink
Beginning to add support for SubArray functionality with BLAS/LAPACK.…
Browse files Browse the repository at this point in the history
… Addresses #93.

 - pointer(::SubArray) implemented
 - functionality for chol(), lu() (with economy), and qr() added
  • Loading branch information
GeorgeXing committed Aug 15, 2011
1 parent 605a07c commit 3137dc3
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 51 deletions.
7 changes: 6 additions & 1 deletion j/abstractarray.j
Original file line number Diff line number Diff line change
Expand Up @@ -1171,4 +1171,9 @@ function stride(s::SubArray, i::Int)
return isa(s.indexes[i],Range1{Index}) ? a :
s.indexes[i].step > 0 ? s.indexes[i].step * a :
error("stride: must have ranges with positive step")
end
end

pointer{T,N}(x::SubArray{T,N}) = pointer(x.parent) + sub2ind(size(x.parent),
ntuple(N,i->x.indexes[i].start)...)*sizeof(T)
iscomplex(::SubArray{Complex128}) = true
iscomplex(::SubArray{Complex64}) = true
104 changes: 63 additions & 41 deletions j/linalg_lapack.j
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ macro jl_lapack_potrf_macro(potrf, eltype)
# INTEGER INFO, LDA, N
# * .. Array Arguments ..
# DOUBLE PRECISION A( LDA, * )
function jl_lapack_potrf(uplo, n, A::Matrix{$eltype}, lda)
function jl_lapack_potrf(uplo, n, A::AbstractMatrix{$eltype}, lda)
info = Array(Int32, 1)
a = pointer(A)
ccall(dlsym(libLAPACK, $potrf),
Void,
(Ptr{Uint8}, Ptr{Int32}, Ptr{$eltype}, Ptr{Int32}, Ptr{Int32}),
uplo, int32(n), A, int32(lda), info)
uplo, int32(n), a, int32(lda), info)
return info[1]
end

Expand All @@ -26,11 +27,17 @@ end
@jl_lapack_potrf_macro :zpotrf_ Complex128
@jl_lapack_potrf_macro :cpotrf_ Complex64

function chol(A::Matrix)
#does not check that input matrix is symmetric/hermitian
#(uses upper triangular half)
#Possible TODO: "economy mode"
function chol{T<:Union(Float32,Float64,Complex64,Complex128)}(A::Union(Matrix,SubArray{T,2}))
n = int32(size(A, 1))
R = triu(A)

info = jl_lapack_potrf("U", n, R, n)
if isa(A, Matrix)
R = triu(A)
else
R = triu(A[:,:])
end
info = jl_lapack_potrf("U", n, R, stride(A,2))

if info == 0; return R; end
if info > 0; error("Matrix not Positive Definite"); end
Expand All @@ -46,13 +53,14 @@ macro jl_lapack_getrf_macro(getrf, eltype)
# * .. Array Arguments ..
# INTEGER IPIV( * )
# DOUBLE PRECISION A( LDA, * )
function jl_lapack_getrf(m, n, A::Matrix{$eltype}, lda, ipiv)
function jl_lapack_getrf(m, n, A::AbstractMatrix{$eltype}, lda, ipiv)
info = Array(Int32, 1)
a = pointer(A)
ccall(dlsym(libLAPACK, $getrf),
Void,
(Ptr{Int32}, Ptr{Int32}, Ptr{$eltype},
Ptr{Int32}, Ptr{Int32}, Ptr{Int32}),
int32(m), int32(n), A, int32(lda), ipiv, info)
int32(m), int32(n), a, int32(lda), ipiv, info)
return info[1]
end

Expand All @@ -65,16 +73,21 @@ end
@jl_lapack_getrf_macro :cgetrf_ Complex64

lu(A::Matrix) = lu(A, false)

function lu(A::Matrix, economy::Bool)
lu{T}(A::SubArray{T,2}) = lu(A,false)
function lu{T<:Union(Float32,Float64,Complex64,Complex128)}(A::Union(Matrix,SubArray{T,2}),
economy::Bool)
m, n = size(A)
LU = A
if !economy
LU = copy(A)
if isa(A, Matrix)
LU = copy(A)
else
LU = A[:,:]
end
end
ipiv = Array(Int32, min(m,n))

info = jl_lapack_getrf(m, n, LU, m, ipiv)
info = jl_lapack_getrf(m, n, LU, stride(LU,2), ipiv)

if info > 0; error("Matrix is singular"); end
P = linspace(1, m)
Expand Down Expand Up @@ -104,11 +117,12 @@ macro jl_lapack_qr_macro(real_geqp3, complex_geqp3, orgqr, ungqr, eltype, celtyp
# DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
function jl_lapack_geqp3(m, n, A::Matrix{$eltype}, lda, jpvt, tau, work, lwork)
info = Array(Int32, 1)
a = pointer(A)
ccall(dlsym(libLAPACK, $real_geqp3),
Void,
(Ptr{Int32}, Ptr{Int32}, Ptr{$eltype}, Ptr{Int32},
Ptr{Int32}, Ptr{$eltype}, Ptr{$eltype}, Ptr{Int32}, Ptr{Int32}),
int32(m), int32(n), A, int32(lda), jpvt, tau, work, int32(lwork), info)
int32(m), int32(n), a, int32(lda), jpvt, tau, work, int32(lwork), info)
return info[1]
end

Expand All @@ -119,13 +133,14 @@ macro jl_lapack_qr_macro(real_geqp3, complex_geqp3, orgqr, ungqr, eltype, celtyp
# INTEGER JPVT( * )
# DOUBLE PRECISION RWORK( * )
# COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * )
function jl_lapack_geqp3(m, n, A::Matrix{$celtype}, lda, jpvt, tau, work, lwork, rwork)
function jl_lapack_geqp3(m, n, A::AbstractMatrix{$celtype}, lda, jpvt, tau, work, lwork, rwork)
info = Array(Int32, 1)
a = pointer(A)
ccall(dlsym(libLAPACK, $complex_geqp3),
Void,
(Ptr{Int32}, Ptr{Int32}, Ptr{$celtype}, Ptr{Int32},
Ptr{Int32}, Ptr{$celtype}, Ptr{$celtype}, Ptr{Int32}, Ptr{$eltype}, Ptr{Int32}),
int32(m), int32(n), A, int32(lda), jpvt, tau, work, int32(lwork), rwork, info)
int32(m), int32(n), a, int32(lda), jpvt, tau, work, int32(lwork), rwork, info)
return info[1]
end

Expand All @@ -134,13 +149,14 @@ macro jl_lapack_qr_macro(real_geqp3, complex_geqp3, orgqr, ungqr, eltype, celtyp
# INTEGER INFO, K, LDA, LWORK, M, N
# * .. Array Arguments ..
# DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
function jl_lapack_orgqr(m, n, k, A::Matrix{$eltype}, lda, tau, work, lwork)
function jl_lapack_orgqr(m, n, k, A::AbstractMatrix{$eltype}, lda, tau, work, lwork)
info = Array(Int32, 1)
a = pointer(A)
ccall(dlsym(libLAPACK, $orgqr),
Void,
(Ptr{Int32}, Ptr{Int32}, Ptr{Int32}, Ptr{$eltype},
Ptr{Int32}, Ptr{$eltype}, Ptr{$eltype}, Ptr{Int32}, Ptr{Int32}),
int32(m), int32(n), int32(k), A, int32(lda), tau, work, int32(lwork), info)
int32(m), int32(n), int32(k), a, int32(lda), tau, work, int32(lwork), info)
return info[1]
end

Expand All @@ -149,13 +165,14 @@ macro jl_lapack_qr_macro(real_geqp3, complex_geqp3, orgqr, ungqr, eltype, celtyp
# INTEGER INFO, K, LDA, LWORK, M, N
#* .. Array Arguments ..
# COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * )
function jl_lapack_ungqr(m, n, k, A::Matrix{$celtype}, lda, tau, work, lwork)
function jl_lapack_ungqr(m, n, k, A::AbstractMatrix{$celtype}, lda, tau, work, lwork)
info = Array(Int32, 1)
a = pointer(A)
ccall(dlsym(libLAPACK, $ungqr),
Void,
(Ptr{Int32}, Ptr{Int32}, Ptr{Int32}, Ptr{$celtype},
Ptr{Int32}, Ptr{$celtype}, Ptr{$celtype}, Ptr{Int32}, Ptr{Int32}),
int32(m), int32(n), int32(k), A, int32(lda), tau, work, int32(lwork), info)
int32(m), int32(n), int32(k), a, int32(lda), tau, work, int32(lwork), info)
return info[1]
end

Expand All @@ -165,9 +182,14 @@ end
@jl_lapack_qr_macro :dgeqp3_ :zgeqp3_ :dorgqr_ :zungqr_ Float64 Complex128
@jl_lapack_qr_macro :sgeqp3_ :cgeqp3_ :sorgqr_ :cungqr_ Float32 Complex64

function qr{T}(A::Matrix{T})
#possible TODO: economy mode?
function qr{T<:Union(Float32,Float64,Complex64,Complex128)}(A::Union(Matrix{T},SubArray{T,2}))
m, n = size(A)
QR = copy(A)
if isa(A, Matrix)
QR = copy(A)
else
QR = A[:,:]
end
jpvt = zeros(Int32, n)
k = min(m,n)
tau = Array(T, k)
Expand All @@ -179,19 +201,19 @@ function qr{T}(A::Matrix{T})
work = zeros(T,1)
lwork = int32(-1)
if iscomplex(A)
info = jl_lapack_geqp3(m, n, QR, m, jpvt, tau, work, lwork, rwork)
info = jl_lapack_geqp3(m, n, QR, stride(QR,2), jpvt, tau, work, lwork, rwork)
else
info = jl_lapack_geqp3(m, n, QR, m, jpvt, tau, work, lwork)
info = jl_lapack_geqp3(m, n, QR, stride(QR,2), jpvt, tau, work, lwork)
end

if info == 0; lwork = real(work[1]); work = Array(T, long(lwork))
else error("Error in LAPACK geqp3"); end

# Compute QR factorization
if iscomplex(A)
info = jl_lapack_geqp3(m, n, QR, m, jpvt, tau, work, lwork, rwork)
info = jl_lapack_geqp3(m, n, QR, stride(QR,2), jpvt, tau, work, lwork, rwork)
else
info = jl_lapack_geqp3(m, n, QR, m, jpvt, tau, work, lwork)
info = jl_lapack_geqp3(m, n, QR, stride(QR,2), jpvt, tau, work, lwork)
end

if info > 0; error("Matrix is singular"); end
Expand All @@ -201,19 +223,19 @@ function qr{T}(A::Matrix{T})
# Workspace query to form Q
lwork2 = int32(-1)
if iscomplex(A)
info = jl_lapack_ungqr(m, k, k, QR, m, tau, work, lwork2)
info = jl_lapack_ungqr(m, k, k, QR, stride(QR,2), tau, work, lwork2)
else
info = jl_lapack_orgqr(m, k, k, QR, m, tau, work, lwork2)
info = jl_lapack_orgqr(m, k, k, QR, stride(QR,2), tau, work, lwork2)
end

if info == 0; lwork2 = real(work[1]); work = Array(T, long(lwork2))
else error("Error in LAPACK orgqr/ungqr"); end

# Compute Q
if iscomplex(A)
info = jl_lapack_ungqr(m, k, k, QR, m, tau, work, lwork2)
info = jl_lapack_ungqr(m, k, k, QR, stride(QR,2), tau, work, lwork2)
else
info = jl_lapack_orgqr(m, k, k, QR, m, tau, work, lwork2)
info = jl_lapack_orgqr(m, k, k, QR, stride(QR,2), tau, work, lwork2)
end

if info == 0; return (QR[:, 1:k], R[1:k, :], jpvt); end
Expand Down Expand Up @@ -324,19 +346,19 @@ function eig{T}(A::Matrix{T})
lwork = -1

if iscomplex(A)
info = jl_lapack_heev(jobz, uplo, n, EV, n, W, work, lwork, rwork)
info = jl_lapack_heev(jobz, uplo, n, EV, stride(A,2), W, work, lwork, rwork)
else
info = jl_lapack_syev(jobz, uplo, n, EV, n, W, work, lwork)
info = jl_lapack_syev(jobz, uplo, n, EV, stride(A,2), W, work, lwork)
end

if info == 0; lwork = real(work[1]); work = Array(T, long(lwork));
else error("Error in LAPACK syev/heev"); end

# Compute eigenvalues, eigenvectors
if iscomplex(A)
info = jl_lapack_heev(jobz, uplo, n, EV, n, W, work, lwork, rwork)
info = jl_lapack_heev(jobz, uplo, n, EV, stride(A,2), W, work, lwork, rwork)
else
info = jl_lapack_syev(jobz, uplo, n, EV, n, W, work, lwork)
info = jl_lapack_syev(jobz, uplo, n, EV, stride(A,2), W, work, lwork)
end

if info == 0; return (diagm(W), EV); end
Expand All @@ -362,19 +384,19 @@ function eig{T}(A::Matrix{T})
lwork = -1

if iscomplex(A)
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, n, W, VL, n, VR, n, work, lwork, rwork)
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, stride(A,2), W, VL, n, VR, n, work, lwork, rwork)
else
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, n, WR, WI, VL, n, VR, n, work, lwork)
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, stride(A,2), WR, WI, VL, n, VR, n, work, lwork)
end

if info == 0; lwork = real(work[1]); work = Array(T, long(lwork));
else error("Error in LAPACK geev"); end

# Compute eigenvalues, eigenvectors
if iscomplex(A)
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, n, W, VL, n, VR, n, work, lwork, rwork)
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, stride(A,2), W, VL, n, VR, n, work, lwork, rwork)
else
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, n, WR, WI, VL, n, VR, n, work, lwork)
info = jl_lapack_geev(jobvl, jobvr, n, Acopy, stride(A,2), WR, WI, VL, n, VR, n, work, lwork)
end

if info != 0; error("Error in LAPACK geev"); end
Expand Down Expand Up @@ -467,19 +489,19 @@ function svd{T}(A::Matrix{T})
work = zeros(T, 1)
lwork = -1
if iscomplex(A)
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, m, S, U, m, VT, n, work, lwork, rwork)
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, stride(A,2), S, U, m, VT, n, work, lwork, rwork)
else
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, m, S, U, m, VT, n, work, lwork)
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, stride(A,2), S, U, m, VT, n, work, lwork)
end

if info == 0; lwork = real(work[1]); work = Array(T, long(lwork));
else error("Error in LAPACK gesvd"); end

# Compute SVD
if iscomplex(A)
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, m, S, U, m, VT, n, work, lwork, rwork)
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, stride(A,2), S, U, m, VT, n, work, lwork, rwork)
else
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, m, S, U, m, VT, n, work, lwork)
info = jl_lapack_gesvd(jobu, jobvt, m, n, X, stride(A,2), S, U, m, VT, n, work, lwork)
end

SIGMA = zeros(T, m, n)
Expand Down
9 changes: 0 additions & 9 deletions test/bench/hpl_par.j
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,10 @@ function hpl_par2(A::Matrix, b::Vector)
##Trailing updates
(i == nB) ? (I = (C.dist[i]):n) :
(I = (C.dist[i]):(C.dist[i+1]-1))
#C_II = convert(Array, C[I,I])
C_II = C[I,I]
L_II = tril(C_II, -1) + eye(length(I))
K = (I[length(I)]+1):n
if length(K) > 0
#C_KI = convert(Array, C[K,I])
C_KI = C[K,I]
else
C_KI = zeros(0)
Expand Down Expand Up @@ -205,7 +203,6 @@ function panel_factor2(C, i, n)
(C.dist[i+1] == n+2) ? (I = (C.dist[i]):n) :
(I = (C.dist[i]):(C.dist[i+1]-1))
K = I[1]:n
#C_KI = convert(Array, C[K,I])
C_KI = C[K,I]
#(C_KI, panel_p) = lu(C_KI, true) #economy mode
panel_p = lu(C_KI, true)[2]
Expand All @@ -218,15 +215,13 @@ function permute(C, i, j, panel_p, n, flag)
if flag
K = (C.dist[i]):n
J = (n+1):(n+1)
#C_KJ = convert(Array, C[K,J])
C_KJ = C[K,J]

C_KJ = C_KJ[panel_p,:]
C[K,J] = C_KJ
else
K = (C.dist[i]):n
J = (C.dist[j]):(C.dist[j+1]-1)
#C_KJ = convert(Array, C[K,J])
C_KJ = C[K,J]

C_KJ = C_KJ[panel_p,:]
Expand All @@ -242,10 +237,8 @@ function trailing_update2(C, L_II, C_KI, i, j, n, flag, dep)
I = C.dist[i]:n
J = (n+1):(n+1)
K = (I[length(I)]+1):n
#C_IJ = convert(Array,C[I,J])
C_IJ = C[I,J]
if length(K) > 0
#C_KJ = convert(Array,C[K,J])
C_KJ = C[K,J]
else
C_KJ = zeros(0)
Expand All @@ -261,9 +254,7 @@ function trailing_update2(C, L_II, C_KI, i, j, n, flag, dep)
J = (C.dist[j]):(C.dist[j+1]-1)
K = (I[length(I)]+1):n
C_IJ = C[I,J]
#C_IJ = convert(Array,C[I,J])
if length(K) > 0
#C_KJ = convert(Array,C[K,J])
C_KJ = C[K,J]
else
C_KJ = zeros(0)
Expand Down

0 comments on commit 3137dc3

Please sign in to comment.