LCOV - code coverage report
Current view: top level - /builds/ug4-project/ugcore/ug4-new/plugins/SuperLU6/external/superlu/SRC - dsp_blas2.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 0.0 % 151 0
Test Date: 2026-06-01 23:54:59 Functions: 0.0 % 2 0

            Line data    Source code
       1              : /*! \file
       2              : Copyright (c) 2003, The Regents of the University of California, through
       3              : Lawrence Berkeley National Laboratory (subject to receipt of any required 
       4              : approvals from U.S. Dept. of Energy) 
       5              : 
       6              : All rights reserved. 
       7              : 
       8              : The source code is distributed under BSD license, see the file License.txt
       9              : at the top-level directory.
      10              : */
      11              : 
      12              : /*! @file dsp_blas2.c
      13              :  * \brief Sparse BLAS 2, using some dense BLAS 2 operations
      14              :  *
      15              :  * <pre>
      16              :  * -- SuperLU routine (version 5.1) --
      17              :  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
      18              :  * and Lawrence Berkeley National Lab.
      19              :  * October 15, 2003
      20              :  *
      21              :  * Last update: December 3, 2015
      22              :  * </pre>
      23              :  */
      24              : /*
      25              :  * File name:           dsp_blas2.c
      26              :  * Purpose:             Sparse BLAS 2, using some dense BLAS 2 operations.
      27              :  */
      28              : 
      29              : #include "slu_ddefs.h"
      30              : 
      31              : /*! \brief Solves one of the systems of equations A*x = b,   or   A'*x = b
      32              :  * 
      33              :  * <pre>
      34              :  *   Purpose
      35              :  *   =======
      36              :  *
      37              :  *   sp_dtrsv() solves one of the systems of equations   
      38              :  *       A*x = b,   or   A'*x = b,
      39              :  *   where b and x are n element vectors and A is a sparse unit , or   
      40              :  *   non-unit, upper or lower triangular matrix.   
      41              :  *   No test for singularity or near-singularity is included in this   
      42              :  *   routine. Such tests must be performed before calling this routine.   
      43              :  *
      44              :  *   Parameters   
      45              :  *   ==========   
      46              :  *
      47              :  *   uplo   - (input) char*
      48              :  *            On entry, uplo specifies whether the matrix is an upper or   
      49              :  *             lower triangular matrix as follows:   
      50              :  *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
      51              :  *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
      52              :  *
      53              :  *   trans  - (input) char*
      54              :  *             On entry, trans specifies the equations to be solved as   
      55              :  *             follows:   
      56              :  *                trans = 'N' or 'n'   A*x = b.   
      57              :  *                trans = 'T' or 't'   A'*x = b.
      58              :  *                trans = 'C' or 'c'   A'*x = b.   
      59              :  *
      60              :  *   diag   - (input) char*
      61              :  *             On entry, diag specifies whether or not A is unit   
      62              :  *             triangular as follows:   
      63              :  *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
      64              :  *                diag = 'N' or 'n'   A is not assumed to be unit   
      65              :  *                                    triangular.   
      66              :  *           
      67              :  *   L       - (input) SuperMatrix*
      68              :  *             The factor L from the factorization Pr*A*Pc=L*U. Use
      69              :  *             compressed row subscripts storage for supernodes,
      70              :  *             i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
      71              :  *
      72              :  *   U       - (input) SuperMatrix*
      73              :  *              The factor U from the factorization Pr*A*Pc=L*U.
      74              :  *              U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
      75              :  *    
      76              :  *   x       - (input/output) double*
      77              :  *             Before entry, the incremented array X must contain the n   
      78              :  *             element right-hand side vector b. On exit, X is overwritten 
      79              :  *             with the solution vector x.
      80              :  *
      81              :  *   info    - (output) int*
      82              :  *             If *info = -i, the i-th argument had an illegal value.
      83              :  * </pre>
      84              :  */
      85              : int
      86            0 : sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
      87              :          SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
      88              : {
      89              : #ifdef _CRAY
      90              :     _fcd ftcs1 = _cptofcd("L", strlen("L")),
      91              :          ftcs2 = _cptofcd("N", strlen("N")),
      92              :          ftcs3 = _cptofcd("U", strlen("U"));
      93              : #endif
      94              :     SCformat *Lstore;
      95              :     NCformat *Ustore;
      96              :     double   *Lval, *Uval;
      97            0 :     int incx = 1, incy = 1;
      98              :     double alpha = 1.0, beta = 1.0;
      99              :     int nrow, irow, jcol;
     100              :     int fsupc, nsupr, nsupc;
     101              :     int_t luptr, istart, i, k, iptr;
     102              :     double *work;
     103              :     flops_t solve_ops;
     104              : 
     105              :     /* Test the input parameters */
     106            0 :     *info = 0;
     107            0 :     if ( strncmp(uplo,"L", 1)!=0 && strncmp(uplo, "U", 1)!=0 ) *info = -1;
     108            0 :     else if ( strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 && 
     109            0 :               strncmp(trans, "C", 1)!=0) *info = -2;
     110            0 :     else if ( strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0 )
     111            0 :          *info = -3;
     112            0 :     else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
     113            0 :     else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
     114            0 :     if ( *info ) {
     115            0 :         int ii = -(*info);
     116            0 :         input_error("sp_dtrsv", &ii);
     117              :         return 0;
     118              :     }
     119              : 
     120            0 :     Lstore = L->Store;
     121            0 :     Lval = Lstore->nzval;
     122            0 :     Ustore = U->Store;
     123            0 :     Uval = Ustore->nzval;
     124              :     solve_ops = 0;
     125              : 
     126            0 :     if ( !(work = doubleCalloc(L->nrow)) )
     127            0 :         ABORT("Malloc fails for work in sp_dtrsv().");
     128              :     
     129            0 :     if ( strncmp(trans, "N", 1)==0 ) {        /* Form x := inv(A)*x. */
     130              :         
     131            0 :         if ( strncmp(uplo, "L", 1)==0 ) {
     132              :             /* Form x := inv(L)*x */
     133            0 :             if ( L->nrow == 0 ) return 0; /* Quick return */
     134              :             
     135            0 :             for (k = 0; k <= Lstore->nsuper; k++) {
     136            0 :                 fsupc = L_FST_SUPC(k);
     137            0 :                 istart = L_SUB_START(fsupc);
     138            0 :                 nsupr = L_SUB_START(fsupc+1) - istart;
     139            0 :                 nsupc = L_FST_SUPC(k+1) - fsupc;
     140            0 :                 luptr = L_NZ_START(fsupc);
     141            0 :                 nrow = nsupr - nsupc;
     142              : 
     143            0 :                 solve_ops += nsupc * (nsupc - 1);
     144            0 :                 solve_ops += 2 * nrow * nsupc;
     145              : 
     146            0 :                 if ( nsupc == 1 ) {
     147            0 :                     for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
     148            0 :                         irow = L_SUB(iptr);
     149            0 :                         ++luptr;
     150            0 :                         x[irow] -= x[fsupc] * Lval[luptr];
     151              :                     }
     152              :                 } else {
     153              : #ifdef USE_VENDOR_BLAS
     154              : #ifdef _CRAY
     155              :                     STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
     156              :                         &x[fsupc], &incx);
     157              :                 
     158              :                     SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
     159              :                         &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
     160              : #else
     161              :                     dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
     162              :                         &x[fsupc], &incx);
     163              :                 
     164              :                     dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
     165              :                         &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
     166              : #endif
     167              : #else
     168            0 :                     dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
     169              :                 
     170            0 :                     dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
     171              :                              &x[fsupc], &work[0] );
     172              : #endif          
     173              :                 
     174            0 :                     iptr = istart + nsupc;
     175            0 :                     for (i = 0; i < nrow; ++i, ++iptr) {
     176            0 :                         irow = L_SUB(iptr);
     177            0 :                         x[irow] -= work[i];     /* Scatter */
     178            0 :                         work[i] = 0.0;
     179              : 
     180              :                     }
     181              :                 }
     182              :             } /* for k ... */
     183              :             
     184              :         } else {
     185              :             /* Form x := inv(U)*x */
     186              :             
     187            0 :             if ( U->nrow == 0 ) return 0; /* Quick return */
     188              :             
     189            0 :             for (k = Lstore->nsuper; k >= 0; k--) {
     190            0 :                 fsupc = L_FST_SUPC(k);
     191            0 :                 nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
     192            0 :                 nsupc = L_FST_SUPC(k+1) - fsupc;
     193            0 :                 luptr = L_NZ_START(fsupc);
     194              :                 
     195            0 :                 solve_ops += nsupc * (nsupc + 1);
     196              : 
     197            0 :                 if ( nsupc == 1 ) {
     198            0 :                     x[fsupc] /= Lval[luptr];
     199            0 :                     for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
     200            0 :                         irow = U_SUB(i);
     201            0 :                         x[irow] -= x[fsupc] * Uval[i];
     202              :                     }
     203              :                 } else {
     204              : #ifdef USE_VENDOR_BLAS
     205              : #ifdef _CRAY
     206              :                     STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
     207              :                        &x[fsupc], &incx);
     208              : #else
     209              :                     dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
     210              :                            &x[fsupc], &incx);
     211              : #endif
     212              : #else           
     213            0 :                     dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
     214              : #endif          
     215              : 
     216            0 :                     for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
     217            0 :                         solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
     218            0 :                         for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
     219            0 :                                 i++) {
     220            0 :                             irow = U_SUB(i);
     221            0 :                             x[irow] -= x[jcol] * Uval[i];
     222              :                         }
     223              :                     }
     224              :                 }
     225              :             } /* for k ... */
     226              :             
     227              :         }
     228              :     } else { /* Form x := inv(A')*x */
     229              :         
     230            0 :         if ( strncmp(uplo, "L", 1)==0 ) {
     231              :             /* Form x := inv(L')*x */
     232            0 :             if ( L->nrow == 0 ) return 0; /* Quick return */
     233              :             
     234            0 :             for (k = Lstore->nsuper; k >= 0; --k) {
     235            0 :                 fsupc = L_FST_SUPC(k);
     236            0 :                 istart = L_SUB_START(fsupc);
     237            0 :                 nsupr = L_SUB_START(fsupc+1) - istart;
     238            0 :                 nsupc = L_FST_SUPC(k+1) - fsupc;
     239            0 :                 luptr = L_NZ_START(fsupc);
     240              : 
     241            0 :                 solve_ops += 2 * (nsupr - nsupc) * nsupc;
     242              : 
     243            0 :                 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
     244            0 :                     iptr = istart + nsupc;
     245            0 :                     for (i = L_NZ_START(jcol) + nsupc; 
     246            0 :                                 i < L_NZ_START(jcol+1); i++) {
     247            0 :                         irow = L_SUB(iptr);
     248            0 :                         x[jcol] -= x[irow] * Lval[i];
     249            0 :                         iptr++;
     250              :                     }
     251              :                 }
     252              :                 
     253            0 :                 if ( nsupc > 1 ) {
     254            0 :                     solve_ops += nsupc * (nsupc - 1);
     255              : #ifdef _CRAY
     256              :                     ftcs1 = _cptofcd("L", strlen("L"));
     257              :                     ftcs2 = _cptofcd("T", strlen("T"));
     258              :                     ftcs3 = _cptofcd("U", strlen("U"));
     259              :                     STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
     260              :                         &x[fsupc], &incx);
     261              : #else
     262            0 :                     dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
     263            0 :                         &x[fsupc], &incx);
     264              : #endif
     265              :                 }
     266              :             }
     267              :         } else {
     268              :             /* Form x := inv(U')*x */
     269            0 :             if ( U->nrow == 0 ) return 0; /* Quick return */
     270              :             
     271            0 :             for (k = 0; k <= Lstore->nsuper; k++) {
     272            0 :                 fsupc = L_FST_SUPC(k);
     273            0 :                 nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
     274            0 :                 nsupc = L_FST_SUPC(k+1) - fsupc;
     275            0 :                 luptr = L_NZ_START(fsupc);
     276              : 
     277            0 :                 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
     278            0 :                     solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
     279            0 :                     for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
     280            0 :                         irow = U_SUB(i);
     281            0 :                         x[jcol] -= x[irow] * Uval[i];
     282              :                     }
     283              :                 }
     284              : 
     285            0 :                 solve_ops += nsupc * (nsupc + 1);
     286              : 
     287            0 :                 if ( nsupc == 1 ) {
     288            0 :                     x[fsupc] /= Lval[luptr];
     289              :                 } else {
     290              : #ifdef _CRAY
     291              :                     ftcs1 = _cptofcd("U", strlen("U"));
     292              :                     ftcs2 = _cptofcd("T", strlen("T"));
     293              :                     ftcs3 = _cptofcd("N", strlen("N"));
     294              :                     STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
     295              :                             &x[fsupc], &incx);
     296              : #else
     297            0 :                     dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
     298            0 :                             &x[fsupc], &incx);
     299              : #endif
     300              :                 }
     301              :             } /* for k ... */
     302              :         }
     303              :     }
     304              : 
     305            0 :     stat->ops[SOLVE] += solve_ops;
     306            0 :     SUPERLU_FREE(work);
     307            0 :     return 0;
     308              : }
     309              : 
     310              : 
     311              : 
     312              : /*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
     313              :  *
     314              :  * <pre>
     315              :  *   Purpose   
     316              :  *   =======   
     317              :  *
     318              :  *   sp_dgemv()  performs one of the matrix-vector operations   
     319              :  *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
     320              :  *   where alpha and beta are scalars, x and y are vectors and A is a
     321              :  *   sparse A->nrow by A->ncol matrix.   
     322              :  *
     323              :  *   Parameters   
     324              :  *   ==========   
     325              :  *
     326              :  *   TRANS  - (input) char*
     327              :  *            On entry, TRANS specifies the operation to be performed as   
     328              :  *            follows:   
     329              :  *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
     330              :  *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
     331              :  *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
     332              :  *
     333              :  *   ALPHA  - (input) double
     334              :  *            On entry, ALPHA specifies the scalar alpha.   
     335              :  *
     336              :  *   A      - (input) SuperMatrix*
     337              :  *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
     338              :  *            Currently, the type of A can be:
     339              :  *                Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
     340              :  *            In the future, more general A can be handled.
     341              :  *
     342              :  *   X      - (input) double*, array of DIMENSION at least   
     343              :  *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
     344              :  *            and at least   
     345              :  *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
     346              :  *            Before entry, the incremented array X must contain the   
     347              :  *            vector x.   
     348              :  *
     349              :  *   INCX   - (input) int
     350              :  *            On entry, INCX specifies the increment for the elements of   
     351              :  *            X. INCX must not be zero.   
     352              :  *
     353              :  *   BETA   - (input) double
     354              :  *            On entry, BETA specifies the scalar beta. When BETA is   
     355              :  *            supplied as zero then Y need not be set on input.   
     356              :  *
     357              :  *   Y      - (output) double*,  array of DIMENSION at least   
     358              :  *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
     359              :  *            and at least   
     360              :  *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
     361              :  *            Before entry with BETA non-zero, the incremented array Y   
     362              :  *            must contain the vector y. On exit, Y is overwritten by the 
     363              :  *            updated vector y.
     364              :  *           
     365              :  *   INCY   - (input) int
     366              :  *            On entry, INCY specifies the increment for the elements of   
     367              :  *            Y. INCY must not be zero.   
     368              :  *
     369              :  *   ==== Sparse Level 2 Blas routine.   
     370              :  * </pre>
     371              :  */
     372              : 
     373              : int
     374            0 : sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x, 
     375              :          int incx, double beta, double *y, int incy)
     376              : {
     377              :     /* Local variables */
     378              :     NCformat *Astore;
     379              :     double   *Aval;
     380              :     int info;
     381              :     double temp;
     382              :     int lenx, leny;
     383              :     int iy, jx, jy, kx, ky, irow;
     384              :     int_t i, j;
     385              :     int notran;
     386              : 
     387            0 :     notran = ( strncmp(trans, "N", 1)==0 || strncmp(trans, "n", 1)==0 );
     388            0 :     Astore = A->Store;
     389            0 :     Aval = Astore->nzval;
     390              :     
     391              :     /* Test the input parameters */
     392            0 :     info = 0;
     393            0 :     if ( !notran && strncmp(trans, "T", 1)!=0 && strncmp(trans, "C", 1)!=0 )
     394            0 :         info = 1;
     395            0 :     else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
     396            0 :     else if (incx == 0) info = 5;
     397            0 :     else if (incy == 0) info = 8;
     398            0 :     if (info != 0) {
     399            0 :         input_error("sp_dgemv ", &info);
     400            0 :         return 0;
     401              :     }
     402              : 
     403              :     /* Quick return if possible. */
     404            0 :     if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
     405              :         return 0;
     406              : 
     407              :     /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
     408              :        up the start points in  X  and  Y. */
     409            0 :     if (strncmp(trans, "N", 1)==0) {
     410              :         lenx = A->ncol;
     411              :         leny = A->nrow;
     412              :     } else {
     413              :         lenx = A->nrow;
     414              :         leny = A->ncol;
     415              :     }
     416            0 :     if (incx > 0) kx = 0;
     417            0 :     else kx =  - (lenx - 1) * incx;
     418            0 :     if (incy > 0) ky = 0;
     419            0 :     else ky =  - (leny - 1) * incy;
     420              : 
     421              :     /* Start the operations. In this version the elements of A are   
     422              :        accessed sequentially with one pass through A. */
     423              :     /* First form  y := beta*y. */
     424            0 :     if (beta != 1.) {
     425            0 :         if (incy == 1) {
     426            0 :             if (beta == 0.)
     427            0 :                 for (i = 0; i < leny; ++i) y[i] = 0.;
     428              :             else
     429            0 :                 for (i = 0; i < leny; ++i) y[i] = beta * y[i];
     430              :         } else {
     431              :             iy = ky;
     432            0 :             if (beta == 0.)
     433            0 :                 for (i = 0; i < leny; ++i) {
     434            0 :                     y[iy] = 0.;
     435            0 :                     iy += incy;
     436              :                 }
     437              :             else
     438            0 :                 for (i = 0; i < leny; ++i) {
     439            0 :                     y[iy] = beta * y[iy];
     440            0 :                     iy += incy;
     441              :                 }
     442              :         }
     443              :     }
     444              :     
     445            0 :     if (alpha == 0.) return 0;
     446              : 
     447            0 :     if ( notran ) {
     448              :         /* Form  y := alpha*A*x + y. */
     449              :         jx = kx;
     450            0 :         if (incy == 1) {
     451            0 :             for (j = 0; j < A->ncol; ++j) {
     452            0 :                 if (x[jx] != 0.) {
     453            0 :                     temp = alpha * x[jx];
     454            0 :                     for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
     455            0 :                         irow = Astore->rowind[i];
     456            0 :                         y[irow] += temp * Aval[i];
     457              :                     }
     458              :                 }
     459            0 :                 jx += incx;
     460              :             }
     461              :         } else {
     462            0 :             ABORT("Not implemented.");
     463              :         }
     464              :     } else {
     465              :         /* Form  y := alpha*A'*x + y. */
     466              :         jy = ky;
     467            0 :         if (incx == 1) {
     468            0 :             for (j = 0; j < A->ncol; ++j) {
     469              :                 temp = 0.;
     470            0 :                 for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
     471            0 :                     irow = Astore->rowind[i];
     472            0 :                     temp += Aval[i] * x[irow];
     473              :                 }
     474            0 :                 y[jy] += alpha * temp;
     475            0 :                 jy += incy;
     476              :             }
     477              :         } else {
     478            0 :             ABORT("Not implemented.");
     479              :         }
     480              :     }
     481              : 
     482              :     return 0;
     483              : } /* sp_dgemv */
     484              : 
        

Generated by: LCOV version 2.0-1