LCOV - code coverage report
Current view: top level - /builds/ug4-project/ugcore/ug4-new/plugins/SuperLU6/external/superlu/SRC - dpanel_bmod.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 0.0 % 162 0
Test Date: 2026-06-01 23:54:59 Functions: 0.0 % 1 0

            Line data    Source code
       1              : /*! \file
       2              : Copyright (c) 2003, The Regents of the University of California, through
       3              : Lawrence Berkeley National Laboratory (subject to receipt of any required 
       4              : approvals from U.S. Dept. of Energy) 
       5              : 
       6              : All rights reserved. 
       7              : 
       8              : The source code is distributed under BSD license, see the file License.txt
       9              : at the top-level directory.
      10              : */
      11              : 
      12              : /*! @file dpanel_bmod.c
      13              :  * \brief Performs numeric block updates
      14              :  *
      15              :  * <pre>
      16              :  * -- SuperLU routine (version 3.0) --
      17              :  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
      18              :  * and Lawrence Berkeley National Lab.
      19              :  * October 15, 2003
      20              :  *
      21              :  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
      22              :  *
      23              :  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
      24              :  * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
      25              :  * 
      26              :  * Permission is hereby granted to use or copy this program for any
      27              :  * purpose, provided the above notices are retained on all copies.
      28              :  * Permission to modify the code and to distribute modified code is
      29              :  * granted, provided the above notices are retained, and a notice that
      30              :  * the code was modified is included with the above copyright notice.
      31              :  * </pre>
      32              :  */
      33              : /*
      34              : 
      35              : */
      36              : 
      37              : #include <stdio.h>
      38              : #include <stdlib.h>
      39              : #include "slu_ddefs.h"
      40              : 
      41              : /*! \brief
      42              :  *
      43              :  * <pre>
      44              :  * Purpose
      45              :  * =======
      46              :  *
      47              :  *    Performs numeric block updates (sup-panel) in topological order.
      48              :  *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
      49              :  *    Special processing on the supernodal portion of L\U[*,j]
      50              :  *
      51              :  *    Before entering this routine, the original nonzeros in the panel 
      52              :  *    were already copied into the spa[m,w].
      53              :  *
      54              :  *    Updated/Output parameters-
      55              :  *    dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
      56              :  *    collectively in the m-by-w vector dense[*]. 
      57              :  * </pre>
      58              :  */
      59              : 
      60              : void
      61            0 : dpanel_bmod (
      62              :             const int  m,          /* in - number of rows in the matrix */
      63              :             const int  w,          /* in */
      64              :             const int  jcol,       /* in */
      65              :             const int  nseg,       /* in */
      66              :             double     *dense,     /* out, of size n by w */
      67              :             double     *tempv,     /* working array */
      68              :             int        *segrep,    /* in */
      69              :             int        *repfnz,    /* in, of size n by w */
      70              :             GlobalLU_t *Glu,       /* modified */
      71              :             SuperLUStat_t *stat    /* output */
      72              :             )
      73              : {
      74              : 
      75              : 
      76              : #ifdef USE_VENDOR_BLAS
      77              : #ifdef _CRAY
      78              :     _fcd ftcs1 = _cptofcd("L", strlen("L")),
      79              :          ftcs2 = _cptofcd("N", strlen("N")),
      80              :          ftcs3 = _cptofcd("U", strlen("U"));
      81              : #endif
      82              :     int          incx = 1, incy = 1;
      83              :     double       alpha, beta;
      84              : #endif
      85              : 
      86              :     register int k, ksub;
      87              :     int          fsupc, nsupc, nsupr, nrow;
      88              :     int          krep, krep_ind;
      89              :     double       ukj, ukj1, ukj2;
      90              :     int_t        luptr, luptr1, luptr2;
      91              :     int          segsze;
      92              :     int          block_nrow;  /* no of rows in a block row */
      93              :     int_t        lptr;        /* Points to the row subscripts of a supernode */
      94              :     int          kfnz, irow, no_zeros; 
      95              :     register int isub, isub1, i;
      96              :     register int jj;          /* Index through each column in the panel */
      97              :     int          *xsup, *supno;
      98              :     int_t        *lsub, *xlsub;
      99              :     double       *lusup;
     100              :     int_t        *xlusup;
     101              :     int          *repfnz_col; /* repfnz[] for a column in the panel */
     102              :     double       *dense_col;  /* dense[] for a column in the panel */
     103              :     double       *tempv1;             /* Used in 1-D update */
     104              :     double       *TriTmp, *MatvecTmp; /* used in 2-D update */
     105              :     double      zero = 0.0;
     106              :     double      one = 1.0;
     107              :     register int ldaTmp;
     108              :     register int r_ind, r_hi;
     109              :     int  maxsuper, rowblk, colblk;
     110            0 :     flops_t  *ops = stat->ops;
     111              :     
     112            0 :     xsup    = Glu->xsup;
     113            0 :     supno   = Glu->supno;
     114            0 :     lsub    = Glu->lsub;
     115            0 :     xlsub   = Glu->xlsub;
     116            0 :     lusup   = (double *) Glu->lusup;
     117            0 :     xlusup  = Glu->xlusup;
     118              :     
     119            0 :     maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) );
     120            0 :     rowblk   = sp_ienv(4);
     121            0 :     colblk   = sp_ienv(5);
     122            0 :     ldaTmp   = maxsuper + rowblk;
     123              : 
     124              :     /* 
     125              :      * For each nonz supernode segment of U[*,j] in topological order 
     126              :      */
     127            0 :     k = nseg - 1;
     128            0 :     for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
     129              : 
     130              :         /* krep = representative of current k-th supernode
     131              :          * fsupc = first supernodal column
     132              :          * nsupc = no of columns in a supernode
     133              :          * nsupr = no of rows in a supernode
     134              :          */
     135            0 :         krep = segrep[k--];
     136            0 :         fsupc = xsup[supno[krep]];
     137            0 :         nsupc = krep - fsupc + 1;
     138            0 :         nsupr = xlsub[fsupc+1] - xlsub[fsupc];
     139            0 :         nrow = nsupr - nsupc;
     140              :         lptr = xlsub[fsupc];
     141            0 :         krep_ind = lptr + nsupc - 1;
     142              : 
     143              :         repfnz_col = repfnz;
     144              :         dense_col = dense;
     145              :         
     146            0 :         if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
     147              : 
     148              :             TriTmp = tempv;
     149              :         
     150              :             /* Sequence through each column in panel -- triangular solves */
     151            0 :             for (jj = jcol; jj < jcol + w; jj++,
     152            0 :                  repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
     153              : 
     154            0 :                 kfnz = repfnz_col[krep];
     155            0 :                 if ( kfnz == SLU_EMPTY ) continue;      /* Skip any zero segment */
     156              :             
     157            0 :                 segsze = krep - kfnz + 1;
     158            0 :                 luptr = xlusup[fsupc];
     159              : 
     160            0 :                 ops[TRSV] += segsze * (segsze - 1);
     161            0 :                 ops[GEMV] += 2 * nrow * segsze;
     162              :         
     163              :                 /* Case 1: Update U-segment of size 1 -- col-col update */
     164            0 :                 if ( segsze == 1 ) {
     165            0 :                     ukj = dense_col[lsub[krep_ind]];
     166            0 :                     luptr += nsupr*(nsupc-1) + nsupc;
     167              : 
     168            0 :                     for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
     169            0 :                         irow = lsub[i];
     170            0 :                         dense_col[irow] -= ukj * lusup[luptr];
     171            0 :                         ++luptr;
     172              :                     }
     173              : 
     174            0 :                 } else if ( segsze <= 3 ) {
     175            0 :                     ukj = dense_col[lsub[krep_ind]];
     176            0 :                     ukj1 = dense_col[lsub[krep_ind - 1]];
     177            0 :                     luptr += nsupr*(nsupc-1) + nsupc-1;
     178            0 :                     luptr1 = luptr - nsupr;
     179              : 
     180            0 :                     if ( segsze == 2 ) {
     181            0 :                         ukj -= ukj1 * lusup[luptr1];
     182            0 :                         dense_col[lsub[krep_ind]] = ukj;
     183            0 :                         for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
     184            0 :                             irow = lsub[i];
     185            0 :                             luptr++; luptr1++;
     186            0 :                             dense_col[irow] -= (ukj*lusup[luptr]
     187            0 :                                                 + ukj1*lusup[luptr1]);
     188              :                         }
     189              :                     } else {
     190            0 :                         ukj2 = dense_col[lsub[krep_ind - 2]];
     191            0 :                         luptr2 = luptr1 - nsupr;
     192            0 :                         ukj1 -= ukj2 * lusup[luptr2-1];
     193            0 :                         ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
     194            0 :                         dense_col[lsub[krep_ind]] = ukj;
     195            0 :                         dense_col[lsub[krep_ind-1]] = ukj1;
     196            0 :                         for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
     197            0 :                             irow = lsub[i];
     198            0 :                             luptr++; luptr1++; luptr2++;
     199            0 :                             dense_col[irow] -= ( ukj*lusup[luptr]
     200            0 :                              + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
     201              :                         }
     202              :                     }
     203              : 
     204              :                 } else  {       /* segsze >= 4 */
     205              :                     
     206              :                     /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
     207              :                        holds the result of triangular solves.    */
     208            0 :                     no_zeros = kfnz - fsupc;
     209            0 :                     isub = lptr + no_zeros;
     210            0 :                     for (i = 0; i < segsze; ++i) {
     211            0 :                         irow = lsub[isub];
     212            0 :                         TriTmp[i] = dense_col[irow]; /* Gather */
     213            0 :                         ++isub;
     214              :                     }
     215              :                     
     216              :                     /* start effective triangle */
     217            0 :                     luptr += nsupr * no_zeros + no_zeros;
     218              : 
     219              : #ifdef USE_VENDOR_BLAS
     220              : #ifdef _CRAY
     221              :                     STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
     222              :                            &nsupr, TriTmp, &incx );
     223              : #else
     224              :                     dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
     225              :                            &nsupr, TriTmp, &incx );
     226              : #endif
     227              : #else           
     228            0 :                     dlsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
     229              : #endif
     230              :                     
     231              : 
     232              :                 } /* else ... */
     233              :             
     234              :             }  /* for jj ... end tri-solves */
     235              : 
     236              :             /* Block row updates; push all the way into dense[*] block */
     237            0 :             for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
     238              :                 
     239            0 :                 r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
     240            0 :                 block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
     241            0 :                 luptr = xlusup[fsupc] + nsupc + r_ind;
     242            0 :                 isub1 = lptr + nsupc + r_ind;
     243              :                 
     244              :                 repfnz_col = repfnz;
     245              :                 TriTmp = tempv;
     246              :                 dense_col = dense;
     247              :                 
     248              :                 /* Sequence through each column in panel -- matrix-vector */
     249            0 :                 for (jj = jcol; jj < jcol + w; jj++,
     250            0 :                      repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
     251              :                     
     252            0 :                     kfnz = repfnz_col[krep];
     253            0 :                     if ( kfnz == SLU_EMPTY ) continue; /* Skip any zero segment */
     254              :                     
     255            0 :                     segsze = krep - kfnz + 1;
     256            0 :                     if ( segsze <= 3 ) continue;   /* skip unrolled cases */
     257              :                     
     258              :                     /* Perform a block update, and scatter the result of
     259              :                        matrix-vector to dense[].                 */
     260            0 :                     no_zeros = kfnz - fsupc;
     261            0 :                     luptr1 = luptr + nsupr * no_zeros;
     262            0 :                     MatvecTmp = &TriTmp[maxsuper];
     263              :                     
     264              : #ifdef USE_VENDOR_BLAS
     265              :                     alpha = one; 
     266              :                     beta = zero;
     267              : #ifdef _CRAY
     268              :                     SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
     269              :                            &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
     270              : #else
     271              :                     dgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
     272              :                            &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
     273              : #endif
     274              : #else
     275            0 :                     dmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
     276              :                            TriTmp, MatvecTmp);
     277              : #endif
     278              :                     
     279              :                     /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
     280              :                      * such that MatvecTmp[*] can be re-used for the
     281              :                      * the next blok row update. dense[] will be copied into 
     282              :                      * global store after the whole panel has been finished.
     283              :                      */
     284              :                     isub = isub1;
     285            0 :                     for (i = 0; i < block_nrow; i++) {
     286            0 :                         irow = lsub[isub];
     287            0 :                         dense_col[irow] -= MatvecTmp[i];
     288            0 :                         MatvecTmp[i] = zero;
     289            0 :                         ++isub;
     290              :                     }
     291              :                     
     292              :                 } /* for jj ... */
     293              :                 
     294              :             } /* for each block row ... */
     295              :             
     296              :             /* Scatter the triangular solves into SPA dense[*] */
     297              :             repfnz_col = repfnz;
     298              :             TriTmp = tempv;
     299              :             dense_col = dense;
     300              :             
     301            0 :             for (jj = jcol; jj < jcol + w; jj++,
     302            0 :                  repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
     303            0 :                 kfnz = repfnz_col[krep];
     304            0 :                 if ( kfnz == SLU_EMPTY ) continue; /* Skip any zero segment */
     305              :                 
     306            0 :                 segsze = krep - kfnz + 1;
     307            0 :                 if ( segsze <= 3 ) continue; /* skip unrolled cases */
     308              :                 
     309            0 :                 no_zeros = kfnz - fsupc;                
     310            0 :                 isub = lptr + no_zeros;
     311            0 :                 for (i = 0; i < segsze; i++) {
     312            0 :                     irow = lsub[isub];
     313            0 :                     dense_col[irow] = TriTmp[i];
     314            0 :                     TriTmp[i] = zero;
     315            0 :                     ++isub;
     316              :                 }
     317              :                 
     318              :             } /* for jj ... */
     319              :             
     320              :         } else { /* 1-D block modification */
     321              :             
     322              :             
     323              :             /* Sequence through each column in the panel */
     324            0 :             for (jj = jcol; jj < jcol + w; jj++,
     325            0 :                  repfnz_col += m, dense_col += m) {
     326              :                 
     327            0 :                 kfnz = repfnz_col[krep];
     328            0 :                 if ( kfnz == SLU_EMPTY ) continue;      /* Skip any zero segment */
     329              :                 
     330            0 :                 segsze = krep - kfnz + 1;
     331            0 :                 luptr = xlusup[fsupc];
     332              : 
     333            0 :                 ops[TRSV] += segsze * (segsze - 1);
     334            0 :                 ops[GEMV] += 2 * nrow * segsze;
     335              :                 
     336              :                 /* Case 1: Update U-segment of size 1 -- col-col update */
     337            0 :                 if ( segsze == 1 ) {
     338            0 :                     ukj = dense_col[lsub[krep_ind]];
     339            0 :                     luptr += nsupr*(nsupc-1) + nsupc;
     340              : 
     341            0 :                     for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
     342            0 :                         irow = lsub[i];
     343            0 :                         dense_col[irow] -= ukj * lusup[luptr];
     344            0 :                         ++luptr;
     345              :                     }
     346              : 
     347            0 :                 } else if ( segsze <= 3 ) {
     348            0 :                     ukj = dense_col[lsub[krep_ind]];
     349            0 :                     luptr += nsupr*(nsupc-1) + nsupc-1;
     350            0 :                     ukj1 = dense_col[lsub[krep_ind - 1]];
     351            0 :                     luptr1 = luptr - nsupr;
     352              : 
     353            0 :                     if ( segsze == 2 ) {
     354            0 :                         ukj -= ukj1 * lusup[luptr1];
     355            0 :                         dense_col[lsub[krep_ind]] = ukj;
     356            0 :                         for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
     357            0 :                             irow = lsub[i];
     358            0 :                             ++luptr;  ++luptr1;
     359            0 :                             dense_col[irow] -= (ukj*lusup[luptr]
     360            0 :                                                 + ukj1*lusup[luptr1]);
     361              :                         }
     362              :                     } else {
     363            0 :                         ukj2 = dense_col[lsub[krep_ind - 2]];
     364            0 :                         luptr2 = luptr1 - nsupr;
     365            0 :                         ukj1 -= ukj2 * lusup[luptr2-1];
     366            0 :                         ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
     367            0 :                         dense_col[lsub[krep_ind]] = ukj;
     368            0 :                         dense_col[lsub[krep_ind-1]] = ukj1;
     369            0 :                         for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
     370            0 :                             irow = lsub[i];
     371            0 :                             ++luptr; ++luptr1; ++luptr2;
     372            0 :                             dense_col[irow] -= ( ukj*lusup[luptr]
     373            0 :                              + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
     374              :                         }
     375              :                     }
     376              : 
     377              :                 } else  { /* segsze >= 4 */
     378              :                     /* 
     379              :                      * Perform a triangular solve and block update,
     380              :                      * then scatter the result of sup-col update to dense[].
     381              :                      */
     382            0 :                     no_zeros = kfnz - fsupc;
     383              :                     
     384              :                     /* Copy U[*,j] segment from dense[*] to tempv[*]: 
     385              :                      *    The result of triangular solve is in tempv[*];
     386              :                      *    The result of matrix vector update is in dense_col[*]
     387              :                      */
     388            0 :                     isub = lptr + no_zeros;
     389            0 :                     for (i = 0; i < segsze; ++i) {
     390            0 :                         irow = lsub[isub];
     391            0 :                         tempv[i] = dense_col[irow]; /* Gather */
     392            0 :                         ++isub;
     393              :                     }
     394              :                     
     395              :                     /* start effective triangle */
     396            0 :                     luptr += nsupr * no_zeros + no_zeros;
     397              :                     
     398              : #ifdef USE_VENDOR_BLAS
     399              : #ifdef _CRAY
     400              :                     STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
     401              :                            &nsupr, tempv, &incx );
     402              : #else
     403              :                     dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
     404              :                            &nsupr, tempv, &incx );
     405              : #endif
     406              :                     
     407              :                     luptr += segsze;    /* Dense matrix-vector */
     408              :                     tempv1 = &tempv[segsze];
     409              :                     alpha = one;
     410              :                     beta = zero;
     411              : #ifdef _CRAY
     412              :                     SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
     413              :                            &nsupr, tempv, &incx, &beta, tempv1, &incy );
     414              : #else
     415              :                     dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
     416              :                            &nsupr, tempv, &incx, &beta, tempv1, &incy );
     417              : #endif
     418              : #else
     419            0 :                     dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
     420              :                     
     421            0 :                     luptr += segsze;        /* Dense matrix-vector */
     422            0 :                     tempv1 = &tempv[segsze];
     423            0 :                     dmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
     424              : #endif
     425              :                     
     426              :                     /* Scatter tempv[*] into SPA dense[*] temporarily, such
     427              :                      * that tempv[*] can be used for the triangular solve of
     428              :                      * the next column of the panel. They will be copied into 
     429              :                      * ucol[*] after the whole panel has been finished.
     430              :                      */
     431              :                     isub = lptr + no_zeros;
     432            0 :                     for (i = 0; i < segsze; i++) {
     433            0 :                         irow = lsub[isub];
     434            0 :                         dense_col[irow] = tempv[i];
     435            0 :                         tempv[i] = zero;
     436            0 :                         isub++;
     437              :                     }
     438              :                     
     439              :                     /* Scatter the update from tempv1[*] into SPA dense[*] */
     440              :                     /* Start dense rectangular L */
     441            0 :                     for (i = 0; i < nrow; i++) {
     442            0 :                         irow = lsub[isub];
     443            0 :                         dense_col[irow] -= tempv1[i];
     444            0 :                         tempv1[i] = zero;
     445            0 :                         ++isub; 
     446              :                     }
     447              :                     
     448              :                 } /* else segsze>=4 ... */
     449              :                 
     450              :             } /* for each column in the panel... */
     451              :             
     452              :         } /* else 1-D update ... */
     453              : 
     454              :     } /* for each updating supernode ... */
     455              : 
     456            0 : }
     457              : 
        

Generated by: LCOV version 2.0-1