LCOV - code coverage report
Current view: top level - /builds/ug4-project/ugcore/ug4-new/plugins/SuperLU6/external/superlu/SRC - dcolumn_bmod.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 0.0 % 118 0
Test Date: 2026-06-01 23:54:59 Functions: 0.0 % 1 0

            Line data    Source code
       1              : /*! \file
       2              : Copyright (c) 2003, The Regents of the University of California, through
       3              : Lawrence Berkeley National Laboratory (subject to receipt of any required 
       4              : approvals from U.S. Dept. of Energy) 
       5              : 
       6              : All rights reserved. 
       7              : 
       8              : The source code is distributed under BSD license, see the file License.txt
       9              : at the top-level directory.
      10              : */
      11              : 
      12              : /*! @file dcolumn_bmod.c
      13              :  *  \brief performs numeric block updates
      14              :  *
      15              :  * <pre>
      16              :  * -- SuperLU routine (version 3.0) --
      17              :  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
      18              :  * and Lawrence Berkeley National Lab.
      19              :  * October 15, 2003
      20              :  *
      21              :  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
      22              :  *
      23              :  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
      24              :  * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
      25              :  * 
      26              :  *  Permission is hereby granted to use or copy this program for any
      27              :  *  purpose, provided the above notices are retained on all copies.
      28              :  *  Permission to modify the code and to distribute modified code is
      29              :  *  granted, provided the above notices are retained, and a notice that
      30              :  *  the code was modified is included with the above copyright notice.
      31              :  * </pre>
      32              : */
      33              : 
      34              : #include <stdio.h>
      35              : #include <stdlib.h>
      36              : #include "slu_ddefs.h"
      37              : 
      38              : 
      39              : /*! \brief 
      40              :  *
      41              :  * <pre>
      42              :  * Purpose:
      43              :  * ========
      44              :  * Performs numeric block updates (sup-col) in topological order.
      45              :  * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
      46              :  * Special processing on the supernodal portion of L\\U[*,j]
      47              :  * Return value:   0 - successful return
      48              :  *               > 0 - number of bytes allocated when run out of space
      49              :  * </pre>
      50              :  */
      51              : int
      52            0 : dcolumn_bmod (
      53              :              const int  jcol,     /* in */
      54              :              const int  nseg,     /* in */
      55              :              double     *dense,   /* in */
      56              :              double     *tempv,   /* working array */
      57              :              int        *segrep,  /* in */
      58              :              int        *repfnz,  /* in */
      59              :              int        fpanelc,  /* in -- first column in the current panel */
      60              :              GlobalLU_t *Glu,     /* modified */
      61              :              SuperLUStat_t *stat  /* output */
      62              :              )
      63              : {
      64              : 
      65              : #ifdef _CRAY
      66              :     _fcd ftcs1 = _cptofcd("L", strlen("L")),
      67              :          ftcs2 = _cptofcd("N", strlen("N")),
      68              :          ftcs3 = _cptofcd("U", strlen("U"));
      69              : #endif
      70              :     int         incx = 1, incy = 1;
      71              :     double      alpha, beta;
      72              :     
      73              :     /* krep = representative of current k-th supernode
      74              :      * fsupc = first supernodal column
      75              :      * nsupc = no of columns in supernode
      76              :      * nsupr = no of rows in supernode (used as leading dimension)
      77              :      * luptr = location of supernodal LU-block in storage
      78              :      * kfnz = first nonz in the k-th supernodal segment
      79              :      * no_zeros = no of leading zeros in a supernodal U-segment
      80              :      */
      81              :     double      ukj, ukj1, ukj2;
      82              :     int_t        luptr, luptr1, luptr2;
      83              :     int          fsupc, nsupc, nsupr, segsze;
      84              :     int          nrow;    /* No of rows in the matrix of matrix-vector */
      85              :     int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
      86              :     int_t        lptr, kfnz, isub, irow, i;
      87              :     int_t        no_zeros, new_next, ufirst, nextlu;
      88              :     int          fst_col; /* First column within small LU update */
      89              :     int          d_fsupc; /* Distance between the first column of the current
      90              :                              panel and the first column of the current snode. */
      91              :     int          *xsup, *supno;
      92              :     int_t        *lsub, *xlsub;
      93              :     double       *lusup;
      94              :     int_t        *xlusup;
      95              :     int_t        nzlumax;
      96              :     double       *tempv1;
      97              :     double      zero = 0.0;
      98              :     double      one = 1.0;
      99              :     double      none = -1.0;
     100              :     int_t        mem_error;
     101            0 :     flops_t      *ops = stat->ops;
     102              : 
     103            0 :     xsup    = Glu->xsup;
     104            0 :     supno   = Glu->supno;
     105            0 :     lsub    = Glu->lsub;
     106            0 :     xlsub   = Glu->xlsub;
     107            0 :     lusup   = (double *) Glu->lusup;
     108            0 :     xlusup  = Glu->xlusup;
     109            0 :     nzlumax = Glu->nzlumax;
     110            0 :     jcolp1 = jcol + 1;
     111            0 :     jsupno = supno[jcol];
     112              :     
     113              :     /* 
     114              :      * For each nonz supernode segment of U[*,j] in topological order 
     115              :      */
     116            0 :     k = nseg - 1;
     117            0 :     for (ksub = 0; ksub < nseg; ksub++) {
     118              : 
     119            0 :         krep = segrep[k];
     120            0 :         k--;
     121            0 :         ksupno = supno[krep];
     122            0 :         if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
     123              : 
     124            0 :             fsupc = xsup[ksupno];
     125            0 :             fst_col = SUPERLU_MAX ( fsupc, fpanelc );
     126              : 
     127              :             /* Distance from the current supernode to the current panel; 
     128              :                d_fsupc=0 if fsupc > fpanelc. */
     129            0 :             d_fsupc = fst_col - fsupc; 
     130              : 
     131            0 :             luptr = xlusup[fst_col] + d_fsupc;
     132            0 :             lptr = xlsub[fsupc] + d_fsupc;
     133              : 
     134            0 :             kfnz = repfnz[krep];
     135            0 :             kfnz = SUPERLU_MAX ( kfnz, fpanelc );
     136              : 
     137            0 :             segsze = krep - kfnz + 1;
     138            0 :             nsupc = krep - fst_col + 1;
     139            0 :             nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
     140            0 :             nrow = nsupr - d_fsupc - nsupc;
     141            0 :             krep_ind = lptr + nsupc - 1;
     142              : 
     143            0 :             ops[TRSV] += segsze * (segsze - 1);
     144            0 :             ops[GEMV] += 2 * nrow * segsze;
     145              : 
     146              : 
     147              :             /* 
     148              :              * Case 1: Update U-segment of size 1 -- col-col update 
     149              :              */
     150            0 :             if ( segsze == 1 ) {
     151            0 :                 ukj = dense[lsub[krep_ind]];
     152            0 :                 luptr += nsupr*(nsupc-1) + nsupc;
     153              : 
     154            0 :                 for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
     155            0 :                     irow = lsub[i];
     156            0 :                     dense[irow] -=  ukj*lusup[luptr];
     157            0 :                     luptr++;
     158              :                 }
     159              : 
     160            0 :             } else if ( segsze <= 3 ) {
     161            0 :                 ukj = dense[lsub[krep_ind]];
     162            0 :                 luptr += nsupr*(nsupc-1) + nsupc-1;
     163            0 :                 ukj1 = dense[lsub[krep_ind - 1]];
     164            0 :                 luptr1 = luptr - nsupr;
     165              : 
     166            0 :                 if ( segsze == 2 ) { /* Case 2: 2cols-col update */
     167            0 :                     ukj -= ukj1 * lusup[luptr1];
     168            0 :                     dense[lsub[krep_ind]] = ukj;
     169            0 :                     for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
     170            0 :                         irow = lsub[i];
     171            0 :                         luptr++;
     172            0 :                         luptr1++;
     173            0 :                         dense[irow] -= ( ukj*lusup[luptr]
     174            0 :                                         + ukj1*lusup[luptr1] );
     175              :                     }
     176              :                 } else { /* Case 3: 3cols-col update */
     177            0 :                     ukj2 = dense[lsub[krep_ind - 2]];
     178            0 :                     luptr2 = luptr1 - nsupr;
     179            0 :                     ukj1 -= ukj2 * lusup[luptr2-1];
     180            0 :                     ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
     181            0 :                     dense[lsub[krep_ind]] = ukj;
     182            0 :                     dense[lsub[krep_ind-1]] = ukj1;
     183            0 :                     for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
     184            0 :                         irow = lsub[i];
     185            0 :                         luptr++;
     186            0 :                         luptr1++;
     187            0 :                         luptr2++;
     188            0 :                         dense[irow] -= ( ukj*lusup[luptr]
     189            0 :                              + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
     190              :                     }
     191              :                 }
     192              : 
     193              : 
     194              : 
     195              :             } else {
     196              :                 /*
     197              :                  * Case: sup-col update
     198              :                  * Perform a triangular solve and block update,
     199              :                  * then scatter the result of sup-col update to dense
     200              :                  */
     201              : 
     202            0 :                 no_zeros = kfnz - fst_col;
     203              : 
     204              :                 /* Copy U[*,j] segment from dense[*] to tempv[*] */
     205            0 :                 isub = lptr + no_zeros;
     206            0 :                 for (i = 0; i < segsze; i++) {
     207            0 :                     irow = lsub[isub];
     208            0 :                     tempv[i] = dense[irow];
     209            0 :                     ++isub; 
     210              :                 }
     211              : 
     212              :                 /* Dense triangular solve -- start effective triangle */
     213            0 :                 luptr += nsupr * no_zeros + no_zeros; 
     214              :                 
     215              : #ifdef USE_VENDOR_BLAS
     216              : #ifdef _CRAY
     217              :                 STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
     218              :                        &nsupr, tempv, &incx );
     219              : #else           
     220              :                 dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
     221              :                        &nsupr, tempv, &incx );
     222              : #endif          
     223              :                 luptr += segsze;  /* Dense matrix-vector */
     224              :                 tempv1 = &tempv[segsze];
     225              :                 alpha = one;
     226              :                 beta = zero;
     227              : #ifdef _CRAY
     228              :                 SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
     229              :                        &nsupr, tempv, &incx, &beta, tempv1, &incy );
     230              : #else
     231              :                 dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
     232              :                        &nsupr, tempv, &incx, &beta, tempv1, &incy );
     233              : #endif
     234              : #else
     235            0 :                 dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
     236              : 
     237            0 :                 luptr += segsze;  /* Dense matrix-vector */
     238            0 :                 tempv1 = &tempv[segsze];
     239            0 :                 dmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
     240              : #endif
     241              :                 
     242              :                 
     243              :                 /* Scatter tempv[] into SPA dense[] as a temporary storage */
     244              :                 isub = lptr + no_zeros;
     245            0 :                 for (i = 0; i < segsze; i++) {
     246            0 :                     irow = lsub[isub];
     247            0 :                     dense[irow] = tempv[i];
     248            0 :                     tempv[i] = zero;
     249            0 :                     ++isub;
     250              :                 }
     251              : 
     252              :                 /* Scatter tempv1[] into SPA dense[] */
     253            0 :                 for (i = 0; i < nrow; i++) {
     254            0 :                     irow = lsub[isub];
     255            0 :                     dense[irow] -= tempv1[i];
     256            0 :                     tempv1[i] = zero;
     257            0 :                     ++isub;
     258              :                 }
     259              :             }
     260              :             
     261              :         } /* if jsupno ... */
     262              : 
     263              :     } /* for each segment... */
     264              : 
     265              :     /*
     266              :      *  Process the supernodal portion of L\U[*,j]
     267              :      */
     268            0 :     nextlu = xlusup[jcol];
     269            0 :     fsupc = xsup[jsupno];
     270              : 
     271              :     /* Copy the SPA dense into L\U[*,j] */
     272            0 :     new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
     273            0 :     while ( new_next > nzlumax ) {
     274            0 :         mem_error = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu);
     275            0 :         if (mem_error) return (mem_error);
     276            0 :         lusup = (double *) Glu->lusup;
     277            0 :         lsub = Glu->lsub;
     278              :     }
     279              : 
     280            0 :     for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
     281            0 :         irow = lsub[isub];
     282            0 :         lusup[nextlu] = dense[irow];
     283            0 :         dense[irow] = zero;
     284            0 :         ++nextlu;
     285              :     }
     286              : 
     287            0 :     xlusup[jcolp1] = nextlu;    /* Close L\U[*,jcol] */
     288              : 
     289              :     /* For more updates within the panel (also within the current supernode), 
     290              :      * should start from the first column of the panel, or the first column 
     291              :      * of the supernode, whichever is bigger. There are 2 cases:
     292              :      *    1) fsupc < fpanelc, then fst_col := fpanelc
     293              :      *    2) fsupc >= fpanelc, then fst_col := fsupc
     294              :      */
     295            0 :     fst_col = SUPERLU_MAX ( fsupc, fpanelc );
     296              : 
     297            0 :     if ( fst_col < jcol ) {
     298              : 
     299              :         /* Distance between the current supernode and the current panel.
     300              :            d_fsupc=0 if fsupc >= fpanelc. */
     301            0 :         d_fsupc = fst_col - fsupc;
     302              : 
     303            0 :         lptr = xlsub[fsupc] + d_fsupc;
     304            0 :         luptr = xlusup[fst_col] + d_fsupc;
     305            0 :         nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
     306            0 :         nsupc = jcol - fst_col; /* Excluding jcol */
     307            0 :         nrow = nsupr - d_fsupc - nsupc;
     308              : 
     309              :         /* Points to the beginning of jcol in snode L\U(jsupno) */
     310            0 :         ufirst = xlusup[jcol] + d_fsupc;        
     311              : 
     312            0 :         ops[TRSV] += nsupc * (nsupc - 1);
     313            0 :         ops[GEMV] += 2 * nrow * nsupc;
     314              :         
     315              : #ifdef USE_VENDOR_BLAS
     316              : #ifdef _CRAY
     317              :         STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
     318              :                &nsupr, &lusup[ufirst], &incx );
     319              : #else
     320              :         dtrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
     321              :                &nsupr, &lusup[ufirst], &incx );
     322              : #endif
     323              :         
     324              :         alpha = none; beta = one; /* y := beta*y + alpha*A*x */
     325              : 
     326              : #ifdef _CRAY
     327              :         SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
     328              :                &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
     329              : #else
     330              :         dgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
     331              :                &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
     332              : #endif
     333              : #else
     334            0 :         dlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
     335              : 
     336            0 :         dmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
     337              :                 &lusup[ufirst], tempv );
     338              :         
     339              :         /* Copy updates from tempv[*] into lusup[*] */
     340            0 :         isub = ufirst + nsupc;
     341            0 :         for (i = 0; i < nrow; i++) {
     342            0 :             lusup[isub] -= tempv[i];
     343            0 :             tempv[i] = 0.0;
     344            0 :             ++isub;
     345              :         }
     346              : 
     347              : #endif
     348              :         
     349              :         
     350              :     } /* if fst_col < jcol ... */ 
     351              : 
     352              :     return 0;
     353              : }
        

Generated by: LCOV version 2.0-1