Merge pull request #1080 from jprhyne/master

Adding a recursive xLARFT
Reference-LAPACK · Dec 3, 2024 · 6ec7f2b · 6ec7f2b
2 parents 7b4c3a3 + e9b05ef
commit 6ec7f2b
Show file tree

Hide file tree

Showing 10 changed files with 3,105 additions and 578 deletions.
diff --git a/SRC/VARIANTS/Makefile b/SRC/VARIANTS/Makefile
@@ -30,9 +30,11 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
 
 QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o
 
+LARFTL2 = larft/LL-LVL2/clarft.o larft/LL-LVL2/dlarft.o larft/LL-LVL2/slarft.o larft/LL-LVL2/zlarft.o
+
 
 .PHONY: all
-all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a
+all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a larftl2.a
 
 cholrl.a: $(CHOLRL)
 	$(AR) $(ARFLAGS) $@ $^
@@ -58,9 +60,13 @@ qrll.a: $(QRLL)
 	$(AR) $(ARFLAGS) $@ $^
 	$(RANLIB) $@
 
+larftl2.a: $(LARFTL2)
+	$(AR) $(ARFLAGS) $@ $^
+	$(RANLIB) $@
+
 .PHONY: clean cleanobj cleanlib
 clean: cleanobj cleanlib
 cleanobj:
-	rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL)
+	rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) $(LARFTL2)
 cleanlib:
 	rm -f *.a
diff --git a/SRC/VARIANTS/README b/SRC/VARIANTS/README
@@ -23,6 +23,7 @@ This directory contains several variants of LAPACK routines in single/double/com
 	- [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL
 	- [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL
 	- [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP
+	- [sdcz]larft using a Left Looking Level 2 BLAS version algorithm - Directory: SRC/VARIANTS/larft/LL-LVL2
 
 References:For a more detailed description please refer to
 	- [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
@@ -44,6 +45,7 @@ Corresponding libraries created in SRC/VARIANTS:
 	- QR Left Looking : qrll.a
 	- Cholesky Right Looking : cholrl.a
 	- Cholesky Top : choltop.a
+	- LARFT Level 2: larftl2.a
 
 
 ===========

diff --git a/SRC/VARIANTS/larft/LL-LVL2/clarft.f b/SRC/VARIANTS/larft/LL-LVL2/clarft.f
@@ -0,0 +1,328 @@
+*> \brief \b CLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download CLARFT + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarft.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIRECT, STOREV
+*       INTEGER            K, LDT, LDV, N
+*       ..
+*       .. Array Arguments ..
+*       COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> CLARFT forms the triangular factor T of a complex block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*>    H  =  I - V * T * V**H
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*>    H  =  I - V**H * T * V
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*>          DIRECT is CHARACTER*1
+*>          Specifies the order in which the elementary reflectors are
+*>          multiplied to form the block reflector:
+*>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*>          STOREV is CHARACTER*1
+*>          Specifies how the vectors which define the elementary
+*>          reflectors are stored (see also Further Details):
+*>          = 'C': columnwise
+*>          = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>          The order of the triangular factor T (= the number of
+*>          elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*>          V is COMPLEX array, dimension
+*>                               (LDV,K) if STOREV = 'C'
+*>                               (LDV,N) if STOREV = 'R'
+*>          The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*>          LDV is INTEGER
+*>          The leading dimension of the array V.
+*>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*>          TAU is COMPLEX array, dimension (K)
+*>          TAU(i) must contain the scalar factor of the elementary
+*>          reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*>          T is COMPLEX array, dimension (LDT,K)
+*>          The k by k triangular factor T of the block reflector.
+*>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*>          lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*>          LDT is INTEGER
+*>          The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  The shape of the matrix V and the storage of the vectors which define
+*>  the H(i) is best illustrated by the following example with n = 5 and
+*>  k = 3. The elements equal to 1 are not stored.
+*>
+*>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*>
+*>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*>                   ( v1  1    )                     (     1 v2 v2 v2 )
+*>                   ( v1 v2  1 )                     (        1 v3 v3 )
+*>                   ( v1 v2 v3 )
+*>                   ( v1 v2 v3 )
+*>
+*>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*>
+*>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*>                   (     1 v3 )
+*>                   (        1 )
+*> \endverbatim
+*>
+*  =====================================================================
+      SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK auxiliary routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, PREVLASTV, LASTV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGEMV, CTRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         PREVLASTV = N
+         DO I = 1, K
+            PREVLASTV = MAX( PREVLASTV, I )
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = 1, I
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( LASTV, I ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
+*
+                  CALL CGEMV( 'Conjugate transpose', J-I, I-1,
+     $                        -TAU( I ), V( I+1, 1 ), LDV,
+     $                        V( I+1, I ), 1,
+     $                        ONE, T( 1, I ), 1 )
+               ELSE
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( I, LASTV ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * V( J , I )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
+*
+                  CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
+     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
+     $                        ONE, T( 1, I ), LDT )
+               END IF
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+     $                     T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+               IF( I.GT.1 ) THEN
+                  PREVLASTV = MAX( PREVLASTV, LASTV )
+               ELSE
+                  PREVLASTV = LASTV
+               END IF
+            END IF
+         END DO
+      ELSE
+         PREVLASTV = 1
+         DO I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = I, K
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( LASTV, I ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
+*
+                     CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I,
+     $                           -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
+     $                           1, ONE, T( I+1, I ), 1 )
+                  ELSE
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( I, LASTV ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
+*
+                     CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J,
+     $                           -TAU( I ),
+     $                           V( I+1, J ), LDV, V( I, J ), LDV,
+     $                           ONE, T( I+1, I ), LDT )
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL CTRMV( 'Lower', 'No transpose', 'Non-unit',
+     $                        K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+                  IF( I.GT.1 ) THEN
+                     PREVLASTV = MIN( PREVLASTV, LASTV )
+                  ELSE
+                     PREVLASTV = LASTV
+                  END IF
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+         END DO
+      END IF
+      RETURN
+*
+*     End of CLARFT
+*
+      END