Skip to content

Commit

Permalink
Merge pull request #1080 from jprhyne/master
Browse files Browse the repository at this point in the history
Adding a recursive xLARFT
  • Loading branch information
langou authored Dec 3, 2024
2 parents 7b4c3a3 + e9b05ef commit 6ec7f2b
Show file tree
Hide file tree
Showing 10 changed files with 3,105 additions and 578 deletions.
10 changes: 8 additions & 2 deletions SRC/VARIANTS/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o

QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o

LARFTL2 = larft/LL-LVL2/clarft.o larft/LL-LVL2/dlarft.o larft/LL-LVL2/slarft.o larft/LL-LVL2/zlarft.o


.PHONY: all
all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a
all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a larftl2.a

cholrl.a: $(CHOLRL)
$(AR) $(ARFLAGS) $@ $^
Expand All @@ -58,9 +60,13 @@ qrll.a: $(QRLL)
$(AR) $(ARFLAGS) $@ $^
$(RANLIB) $@

larftl2.a: $(LARFTL2)
$(AR) $(ARFLAGS) $@ $^
$(RANLIB) $@

.PHONY: clean cleanobj cleanlib
clean: cleanobj cleanlib
cleanobj:
rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL)
rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) $(LARFTL2)
cleanlib:
rm -f *.a
2 changes: 2 additions & 0 deletions SRC/VARIANTS/README
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ This directory contains several variants of LAPACK routines in single/double/com
- [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL
- [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL
- [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP
- [sdcz]larft using a Left Looking Level 2 BLAS version algorithm - Directory: SRC/VARIANTS/larft/LL-LVL2

References:For a more detailed description please refer to
- [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
Expand All @@ -44,6 +45,7 @@ Corresponding libraries created in SRC/VARIANTS:
- QR Left Looking : qrll.a
- Cholesky Right Looking : cholrl.a
- Cholesky Top : choltop.a
- LARFT Level 2: larftl2.a


===========
Expand Down
328 changes: 328 additions & 0 deletions SRC/VARIANTS/larft/LL-LVL2/clarft.f
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
*> \brief \b CLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download CLARFT + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarft.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
*
* .. Scalar Arguments ..
* CHARACTER DIRECT, STOREV
* INTEGER K, LDT, LDV, N
* ..
* .. Array Arguments ..
* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CLARFT forms the triangular factor T of a complex block reflector H
*> of order n, which is defined as a product of k elementary reflectors.
*>
*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
*>
*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
*>
*> If STOREV = 'C', the vector which defines the elementary reflector
*> H(i) is stored in the i-th column of the array V, and
*>
*> H = I - V * T * V**H
*>
*> If STOREV = 'R', the vector which defines the elementary reflector
*> H(i) is stored in the i-th row of the array V, and
*>
*> H = I - V**H * T * V
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] DIRECT
*> \verbatim
*> DIRECT is CHARACTER*1
*> Specifies the order in which the elementary reflectors are
*> multiplied to form the block reflector:
*> = 'F': H = H(1) H(2) . . . H(k) (Forward)
*> = 'B': H = H(k) . . . H(2) H(1) (Backward)
*> \endverbatim
*>
*> \param[in] STOREV
*> \verbatim
*> STOREV is CHARACTER*1
*> Specifies how the vectors which define the elementary
*> reflectors are stored (see also Further Details):
*> = 'C': columnwise
*> = 'R': rowwise
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The order of the block reflector H. N >= 0.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> The order of the triangular factor T (= the number of
*> elementary reflectors). K >= 1.
*> \endverbatim
*>
*> \param[in] V
*> \verbatim
*> V is COMPLEX array, dimension
*> (LDV,K) if STOREV = 'C'
*> (LDV,N) if STOREV = 'R'
*> The matrix V. See further details.
*> \endverbatim
*>
*> \param[in] LDV
*> \verbatim
*> LDV is INTEGER
*> The leading dimension of the array V.
*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
*> \endverbatim
*>
*> \param[in] TAU
*> \verbatim
*> TAU is COMPLEX array, dimension (K)
*> TAU(i) must contain the scalar factor of the elementary
*> reflector H(i).
*> \endverbatim
*>
*> \param[out] T
*> \verbatim
*> T is COMPLEX array, dimension (LDT,K)
*> The k by k triangular factor T of the block reflector.
*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
*> lower triangular. The rest of the array is not used.
*> \endverbatim
*>
*> \param[in] LDT
*> \verbatim
*> LDT is INTEGER
*> The leading dimension of the array T. LDT >= K.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup larft
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> The shape of the matrix V and the storage of the vectors which define
*> the H(i) is best illustrated by the following example with n = 5 and
*> k = 3. The elements equal to 1 are not stored.
*>
*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
*>
*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
*> ( v1 1 ) ( 1 v2 v2 v2 )
*> ( v1 v2 1 ) ( 1 v3 v3 )
*> ( v1 v2 v3 )
*> ( v1 v2 v3 )
*>
*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
*>
*> V = ( v1 v2 v3 ) V = ( v1 v1 1 )
*> ( v1 v2 v3 ) ( v2 v2 v2 1 )
*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
*> ( 1 v3 )
*> ( 1 )
*> \endverbatim
*>
* =====================================================================
SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
CHARACTER DIRECT, STOREV
INTEGER K, LDT, LDV, N
* ..
* .. Array Arguments ..
COMPLEX T( LDT, * ), TAU( * ), V( LDV, * )
* ..
*
* =====================================================================
*
* .. Parameters ..
COMPLEX ONE, ZERO
PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ),
$ ZERO = ( 0.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
INTEGER I, J, PREVLASTV, LASTV
* ..
* .. External Subroutines ..
EXTERNAL CGEMM, CGEMV, CTRMV
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
IF( LSAME( DIRECT, 'F' ) ) THEN
PREVLASTV = N
DO I = 1, K
PREVLASTV = MAX( PREVLASTV, I )
IF( TAU( I ).EQ.ZERO ) THEN
*
* H(i) = I
*
DO J = 1, I
T( J, I ) = ZERO
END DO
ELSE
*
* general case
*
IF( LSAME( STOREV, 'C' ) ) THEN
* Skip any trailing zeros.
DO LASTV = N, I+1, -1
IF( V( LASTV, I ).NE.ZERO ) EXIT
END DO
DO J = 1, I-1
T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
END DO
J = MIN( LASTV, PREVLASTV )
*
* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
*
CALL CGEMV( 'Conjugate transpose', J-I, I-1,
$ -TAU( I ), V( I+1, 1 ), LDV,
$ V( I+1, I ), 1,
$ ONE, T( 1, I ), 1 )
ELSE
* Skip any trailing zeros.
DO LASTV = N, I+1, -1
IF( V( I, LASTV ).NE.ZERO ) EXIT
END DO
DO J = 1, I-1
T( J, I ) = -TAU( I ) * V( J , I )
END DO
J = MIN( LASTV, PREVLASTV )
*
* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
*
CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
$ V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
$ ONE, T( 1, I ), LDT )
END IF
*
* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
*
CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
$ T,
$ LDT, T( 1, I ), 1 )
T( I, I ) = TAU( I )
IF( I.GT.1 ) THEN
PREVLASTV = MAX( PREVLASTV, LASTV )
ELSE
PREVLASTV = LASTV
END IF
END IF
END DO
ELSE
PREVLASTV = 1
DO I = K, 1, -1
IF( TAU( I ).EQ.ZERO ) THEN
*
* H(i) = I
*
DO J = I, K
T( J, I ) = ZERO
END DO
ELSE
*
* general case
*
IF( I.LT.K ) THEN
IF( LSAME( STOREV, 'C' ) ) THEN
* Skip any leading zeros.
DO LASTV = 1, I-1
IF( V( LASTV, I ).NE.ZERO ) EXIT
END DO
DO J = I+1, K
T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
END DO
J = MAX( LASTV, PREVLASTV )
*
* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
*
CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I,
$ -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
$ 1, ONE, T( I+1, I ), 1 )
ELSE
* Skip any leading zeros.
DO LASTV = 1, I-1
IF( V( I, LASTV ).NE.ZERO ) EXIT
END DO
DO J = I+1, K
T( J, I ) = -TAU( I ) * V( J, N-K+I )
END DO
J = MAX( LASTV, PREVLASTV )
*
* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
*
CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J,
$ -TAU( I ),
$ V( I+1, J ), LDV, V( I, J ), LDV,
$ ONE, T( I+1, I ), LDT )
END IF
*
* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
*
CALL CTRMV( 'Lower', 'No transpose', 'Non-unit',
$ K-I,
$ T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
IF( I.GT.1 ) THEN
PREVLASTV = MIN( PREVLASTV, LASTV )
ELSE
PREVLASTV = LASTV
END IF
END IF
T( I, I ) = TAU( I )
END IF
END DO
END IF
RETURN
*
* End of CLARFT
*
END
Loading

0 comments on commit 6ec7f2b

Please sign in to comment.