Line data Source code
1 : /*! \file
2 : Copyright (c) 2003, The Regents of the University of California, through
3 : Lawrence Berkeley National Laboratory (subject to receipt of any required
4 : approvals from U.S. Dept. of Energy)
5 :
6 : All rights reserved.
7 :
8 : The source code is distributed under BSD license, see the file License.txt
9 : at the top-level directory.
10 : */
11 :
12 : /*! @file dcolumn_bmod.c
13 : * \brief performs numeric block updates
14 : *
15 : * <pre>
16 : * -- SuperLU routine (version 3.0) --
17 : * Univ. of California Berkeley, Xerox Palo Alto Research Center,
18 : * and Lawrence Berkeley National Lab.
19 : * October 15, 2003
20 : *
21 : * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
22 : *
23 : * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
24 : * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
25 : *
26 : * Permission is hereby granted to use or copy this program for any
27 : * purpose, provided the above notices are retained on all copies.
28 : * Permission to modify the code and to distribute modified code is
29 : * granted, provided the above notices are retained, and a notice that
30 : * the code was modified is included with the above copyright notice.
31 : * </pre>
32 : */
33 :
34 : #include <stdio.h>
35 : #include <stdlib.h>
36 : #include "slu_ddefs.h"
37 :
38 :
39 : /*! \brief
40 : *
41 : * <pre>
42 : * Purpose:
43 : * ========
44 : * Performs numeric block updates (sup-col) in topological order.
45 : * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
46 : * Special processing on the supernodal portion of L\\U[*,j]
47 : * Return value: 0 - successful return
48 : * > 0 - number of bytes allocated when run out of space
49 : * </pre>
50 : */
51 : int
52 0 : dcolumn_bmod (
53 : const int jcol, /* in */
54 : const int nseg, /* in */
55 : double *dense, /* in */
56 : double *tempv, /* working array */
57 : int *segrep, /* in */
58 : int *repfnz, /* in */
59 : int fpanelc, /* in -- first column in the current panel */
60 : GlobalLU_t *Glu, /* modified */
61 : SuperLUStat_t *stat /* output */
62 : )
63 : {
64 :
65 : #ifdef _CRAY
66 : _fcd ftcs1 = _cptofcd("L", strlen("L")),
67 : ftcs2 = _cptofcd("N", strlen("N")),
68 : ftcs3 = _cptofcd("U", strlen("U"));
69 : #endif
70 : int incx = 1, incy = 1;
71 : double alpha, beta;
72 :
73 : /* krep = representative of current k-th supernode
74 : * fsupc = first supernodal column
75 : * nsupc = no of columns in supernode
76 : * nsupr = no of rows in supernode (used as leading dimension)
77 : * luptr = location of supernodal LU-block in storage
78 : * kfnz = first nonz in the k-th supernodal segment
79 : * no_zeros = no of leading zeros in a supernodal U-segment
80 : */
81 : double ukj, ukj1, ukj2;
82 : int_t luptr, luptr1, luptr2;
83 : int fsupc, nsupc, nsupr, segsze;
84 : int nrow; /* No of rows in the matrix of matrix-vector */
85 : int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
86 : int_t lptr, kfnz, isub, irow, i;
87 : int_t no_zeros, new_next, ufirst, nextlu;
88 : int fst_col; /* First column within small LU update */
89 : int d_fsupc; /* Distance between the first column of the current
90 : panel and the first column of the current snode. */
91 : int *xsup, *supno;
92 : int_t *lsub, *xlsub;
93 : double *lusup;
94 : int_t *xlusup;
95 : int_t nzlumax;
96 : double *tempv1;
97 : double zero = 0.0;
98 : double one = 1.0;
99 : double none = -1.0;
100 : int_t mem_error;
101 0 : flops_t *ops = stat->ops;
102 :
103 0 : xsup = Glu->xsup;
104 0 : supno = Glu->supno;
105 0 : lsub = Glu->lsub;
106 0 : xlsub = Glu->xlsub;
107 0 : lusup = (double *) Glu->lusup;
108 0 : xlusup = Glu->xlusup;
109 0 : nzlumax = Glu->nzlumax;
110 0 : jcolp1 = jcol + 1;
111 0 : jsupno = supno[jcol];
112 :
113 : /*
114 : * For each nonz supernode segment of U[*,j] in topological order
115 : */
116 0 : k = nseg - 1;
117 0 : for (ksub = 0; ksub < nseg; ksub++) {
118 :
119 0 : krep = segrep[k];
120 0 : k--;
121 0 : ksupno = supno[krep];
122 0 : if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
123 :
124 0 : fsupc = xsup[ksupno];
125 0 : fst_col = SUPERLU_MAX ( fsupc, fpanelc );
126 :
127 : /* Distance from the current supernode to the current panel;
128 : d_fsupc=0 if fsupc > fpanelc. */
129 0 : d_fsupc = fst_col - fsupc;
130 :
131 0 : luptr = xlusup[fst_col] + d_fsupc;
132 0 : lptr = xlsub[fsupc] + d_fsupc;
133 :
134 0 : kfnz = repfnz[krep];
135 0 : kfnz = SUPERLU_MAX ( kfnz, fpanelc );
136 :
137 0 : segsze = krep - kfnz + 1;
138 0 : nsupc = krep - fst_col + 1;
139 0 : nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
140 0 : nrow = nsupr - d_fsupc - nsupc;
141 0 : krep_ind = lptr + nsupc - 1;
142 :
143 0 : ops[TRSV] += segsze * (segsze - 1);
144 0 : ops[GEMV] += 2 * nrow * segsze;
145 :
146 :
147 : /*
148 : * Case 1: Update U-segment of size 1 -- col-col update
149 : */
150 0 : if ( segsze == 1 ) {
151 0 : ukj = dense[lsub[krep_ind]];
152 0 : luptr += nsupr*(nsupc-1) + nsupc;
153 :
154 0 : for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
155 0 : irow = lsub[i];
156 0 : dense[irow] -= ukj*lusup[luptr];
157 0 : luptr++;
158 : }
159 :
160 0 : } else if ( segsze <= 3 ) {
161 0 : ukj = dense[lsub[krep_ind]];
162 0 : luptr += nsupr*(nsupc-1) + nsupc-1;
163 0 : ukj1 = dense[lsub[krep_ind - 1]];
164 0 : luptr1 = luptr - nsupr;
165 :
166 0 : if ( segsze == 2 ) { /* Case 2: 2cols-col update */
167 0 : ukj -= ukj1 * lusup[luptr1];
168 0 : dense[lsub[krep_ind]] = ukj;
169 0 : for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
170 0 : irow = lsub[i];
171 0 : luptr++;
172 0 : luptr1++;
173 0 : dense[irow] -= ( ukj*lusup[luptr]
174 0 : + ukj1*lusup[luptr1] );
175 : }
176 : } else { /* Case 3: 3cols-col update */
177 0 : ukj2 = dense[lsub[krep_ind - 2]];
178 0 : luptr2 = luptr1 - nsupr;
179 0 : ukj1 -= ukj2 * lusup[luptr2-1];
180 0 : ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
181 0 : dense[lsub[krep_ind]] = ukj;
182 0 : dense[lsub[krep_ind-1]] = ukj1;
183 0 : for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
184 0 : irow = lsub[i];
185 0 : luptr++;
186 0 : luptr1++;
187 0 : luptr2++;
188 0 : dense[irow] -= ( ukj*lusup[luptr]
189 0 : + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
190 : }
191 : }
192 :
193 :
194 :
195 : } else {
196 : /*
197 : * Case: sup-col update
198 : * Perform a triangular solve and block update,
199 : * then scatter the result of sup-col update to dense
200 : */
201 :
202 0 : no_zeros = kfnz - fst_col;
203 :
204 : /* Copy U[*,j] segment from dense[*] to tempv[*] */
205 0 : isub = lptr + no_zeros;
206 0 : for (i = 0; i < segsze; i++) {
207 0 : irow = lsub[isub];
208 0 : tempv[i] = dense[irow];
209 0 : ++isub;
210 : }
211 :
212 : /* Dense triangular solve -- start effective triangle */
213 0 : luptr += nsupr * no_zeros + no_zeros;
214 :
215 : #ifdef USE_VENDOR_BLAS
216 : #ifdef _CRAY
217 : STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
218 : &nsupr, tempv, &incx );
219 : #else
220 : dtrsv_( "L", "N", "U", &segsze, &lusup[luptr],
221 : &nsupr, tempv, &incx );
222 : #endif
223 : luptr += segsze; /* Dense matrix-vector */
224 : tempv1 = &tempv[segsze];
225 : alpha = one;
226 : beta = zero;
227 : #ifdef _CRAY
228 : SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
229 : &nsupr, tempv, &incx, &beta, tempv1, &incy );
230 : #else
231 : dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
232 : &nsupr, tempv, &incx, &beta, tempv1, &incy );
233 : #endif
234 : #else
235 0 : dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
236 :
237 0 : luptr += segsze; /* Dense matrix-vector */
238 0 : tempv1 = &tempv[segsze];
239 0 : dmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
240 : #endif
241 :
242 :
243 : /* Scatter tempv[] into SPA dense[] as a temporary storage */
244 : isub = lptr + no_zeros;
245 0 : for (i = 0; i < segsze; i++) {
246 0 : irow = lsub[isub];
247 0 : dense[irow] = tempv[i];
248 0 : tempv[i] = zero;
249 0 : ++isub;
250 : }
251 :
252 : /* Scatter tempv1[] into SPA dense[] */
253 0 : for (i = 0; i < nrow; i++) {
254 0 : irow = lsub[isub];
255 0 : dense[irow] -= tempv1[i];
256 0 : tempv1[i] = zero;
257 0 : ++isub;
258 : }
259 : }
260 :
261 : } /* if jsupno ... */
262 :
263 : } /* for each segment... */
264 :
265 : /*
266 : * Process the supernodal portion of L\U[*,j]
267 : */
268 0 : nextlu = xlusup[jcol];
269 0 : fsupc = xsup[jsupno];
270 :
271 : /* Copy the SPA dense into L\U[*,j] */
272 0 : new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
273 0 : while ( new_next > nzlumax ) {
274 0 : mem_error = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu);
275 0 : if (mem_error) return (mem_error);
276 0 : lusup = (double *) Glu->lusup;
277 0 : lsub = Glu->lsub;
278 : }
279 :
280 0 : for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
281 0 : irow = lsub[isub];
282 0 : lusup[nextlu] = dense[irow];
283 0 : dense[irow] = zero;
284 0 : ++nextlu;
285 : }
286 :
287 0 : xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */
288 :
289 : /* For more updates within the panel (also within the current supernode),
290 : * should start from the first column of the panel, or the first column
291 : * of the supernode, whichever is bigger. There are 2 cases:
292 : * 1) fsupc < fpanelc, then fst_col := fpanelc
293 : * 2) fsupc >= fpanelc, then fst_col := fsupc
294 : */
295 0 : fst_col = SUPERLU_MAX ( fsupc, fpanelc );
296 :
297 0 : if ( fst_col < jcol ) {
298 :
299 : /* Distance between the current supernode and the current panel.
300 : d_fsupc=0 if fsupc >= fpanelc. */
301 0 : d_fsupc = fst_col - fsupc;
302 :
303 0 : lptr = xlsub[fsupc] + d_fsupc;
304 0 : luptr = xlusup[fst_col] + d_fsupc;
305 0 : nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
306 0 : nsupc = jcol - fst_col; /* Excluding jcol */
307 0 : nrow = nsupr - d_fsupc - nsupc;
308 :
309 : /* Points to the beginning of jcol in snode L\U(jsupno) */
310 0 : ufirst = xlusup[jcol] + d_fsupc;
311 :
312 0 : ops[TRSV] += nsupc * (nsupc - 1);
313 0 : ops[GEMV] += 2 * nrow * nsupc;
314 :
315 : #ifdef USE_VENDOR_BLAS
316 : #ifdef _CRAY
317 : STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr],
318 : &nsupr, &lusup[ufirst], &incx );
319 : #else
320 : dtrsv_( "L", "N", "U", &nsupc, &lusup[luptr],
321 : &nsupr, &lusup[ufirst], &incx );
322 : #endif
323 :
324 : alpha = none; beta = one; /* y := beta*y + alpha*A*x */
325 :
326 : #ifdef _CRAY
327 : SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
328 : &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
329 : #else
330 : dgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
331 : &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
332 : #endif
333 : #else
334 0 : dlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
335 :
336 0 : dmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
337 : &lusup[ufirst], tempv );
338 :
339 : /* Copy updates from tempv[*] into lusup[*] */
340 0 : isub = ufirst + nsupc;
341 0 : for (i = 0; i < nrow; i++) {
342 0 : lusup[isub] -= tempv[i];
343 0 : tempv[i] = 0.0;
344 0 : ++isub;
345 : }
346 :
347 : #endif
348 :
349 :
350 : } /* if fst_col < jcol ... */
351 :
352 : return 0;
353 : }
|