Line data Source code
1 : /*! \file
2 : Copyright (c) 2003, The Regents of the University of California, through
3 : Lawrence Berkeley National Laboratory (subject to receipt of any required
4 : approvals from U.S. Dept. of Energy)
5 :
6 : All rights reserved.
7 :
8 : The source code is distributed under BSD license, see the file License.txt
9 : at the top-level directory.
10 : */
11 :
12 : /*! @file dsp_blas2.c
13 : * \brief Sparse BLAS 2, using some dense BLAS 2 operations
14 : *
15 : * <pre>
16 : * -- SuperLU routine (version 5.1) --
17 : * Univ. of California Berkeley, Xerox Palo Alto Research Center,
18 : * and Lawrence Berkeley National Lab.
19 : * October 15, 2003
20 : *
21 : * Last update: December 3, 2015
22 : * </pre>
23 : */
24 : /*
25 : * File name: dsp_blas2.c
26 : * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
27 : */
28 :
29 : #include "slu_ddefs.h"
30 :
31 : /*! \brief Solves one of the systems of equations A*x = b, or A'*x = b
32 : *
33 : * <pre>
34 : * Purpose
35 : * =======
36 : *
37 : * sp_dtrsv() solves one of the systems of equations
38 : * A*x = b, or A'*x = b,
39 : * where b and x are n element vectors and A is a sparse unit , or
40 : * non-unit, upper or lower triangular matrix.
41 : * No test for singularity or near-singularity is included in this
42 : * routine. Such tests must be performed before calling this routine.
43 : *
44 : * Parameters
45 : * ==========
46 : *
47 : * uplo - (input) char*
48 : * On entry, uplo specifies whether the matrix is an upper or
49 : * lower triangular matrix as follows:
50 : * uplo = 'U' or 'u' A is an upper triangular matrix.
51 : * uplo = 'L' or 'l' A is a lower triangular matrix.
52 : *
53 : * trans - (input) char*
54 : * On entry, trans specifies the equations to be solved as
55 : * follows:
56 : * trans = 'N' or 'n' A*x = b.
57 : * trans = 'T' or 't' A'*x = b.
58 : * trans = 'C' or 'c' A'*x = b.
59 : *
60 : * diag - (input) char*
61 : * On entry, diag specifies whether or not A is unit
62 : * triangular as follows:
63 : * diag = 'U' or 'u' A is assumed to be unit triangular.
64 : * diag = 'N' or 'n' A is not assumed to be unit
65 : * triangular.
66 : *
67 : * L - (input) SuperMatrix*
68 : * The factor L from the factorization Pr*A*Pc=L*U. Use
69 : * compressed row subscripts storage for supernodes,
70 : * i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
71 : *
72 : * U - (input) SuperMatrix*
73 : * The factor U from the factorization Pr*A*Pc=L*U.
74 : * U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
75 : *
76 : * x - (input/output) double*
77 : * Before entry, the incremented array X must contain the n
78 : * element right-hand side vector b. On exit, X is overwritten
79 : * with the solution vector x.
80 : *
81 : * info - (output) int*
82 : * If *info = -i, the i-th argument had an illegal value.
83 : * </pre>
84 : */
85 : int
86 0 : sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
87 : SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
88 : {
89 : #ifdef _CRAY
90 : _fcd ftcs1 = _cptofcd("L", strlen("L")),
91 : ftcs2 = _cptofcd("N", strlen("N")),
92 : ftcs3 = _cptofcd("U", strlen("U"));
93 : #endif
94 : SCformat *Lstore;
95 : NCformat *Ustore;
96 : double *Lval, *Uval;
97 0 : int incx = 1, incy = 1;
98 : double alpha = 1.0, beta = 1.0;
99 : int nrow, irow, jcol;
100 : int fsupc, nsupr, nsupc;
101 : int_t luptr, istart, i, k, iptr;
102 : double *work;
103 : flops_t solve_ops;
104 :
105 : /* Test the input parameters */
106 0 : *info = 0;
107 0 : if ( strncmp(uplo,"L", 1)!=0 && strncmp(uplo, "U", 1)!=0 ) *info = -1;
108 0 : else if ( strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 &&
109 0 : strncmp(trans, "C", 1)!=0) *info = -2;
110 0 : else if ( strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0 )
111 0 : *info = -3;
112 0 : else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
113 0 : else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
114 0 : if ( *info ) {
115 0 : int ii = -(*info);
116 0 : input_error("sp_dtrsv", &ii);
117 : return 0;
118 : }
119 :
120 0 : Lstore = L->Store;
121 0 : Lval = Lstore->nzval;
122 0 : Ustore = U->Store;
123 0 : Uval = Ustore->nzval;
124 : solve_ops = 0;
125 :
126 0 : if ( !(work = doubleCalloc(L->nrow)) )
127 0 : ABORT("Malloc fails for work in sp_dtrsv().");
128 :
129 0 : if ( strncmp(trans, "N", 1)==0 ) { /* Form x := inv(A)*x. */
130 :
131 0 : if ( strncmp(uplo, "L", 1)==0 ) {
132 : /* Form x := inv(L)*x */
133 0 : if ( L->nrow == 0 ) return 0; /* Quick return */
134 :
135 0 : for (k = 0; k <= Lstore->nsuper; k++) {
136 0 : fsupc = L_FST_SUPC(k);
137 0 : istart = L_SUB_START(fsupc);
138 0 : nsupr = L_SUB_START(fsupc+1) - istart;
139 0 : nsupc = L_FST_SUPC(k+1) - fsupc;
140 0 : luptr = L_NZ_START(fsupc);
141 0 : nrow = nsupr - nsupc;
142 :
143 0 : solve_ops += nsupc * (nsupc - 1);
144 0 : solve_ops += 2 * nrow * nsupc;
145 :
146 0 : if ( nsupc == 1 ) {
147 0 : for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
148 0 : irow = L_SUB(iptr);
149 0 : ++luptr;
150 0 : x[irow] -= x[fsupc] * Lval[luptr];
151 : }
152 : } else {
153 : #ifdef USE_VENDOR_BLAS
154 : #ifdef _CRAY
155 : STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
156 : &x[fsupc], &incx);
157 :
158 : SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
159 : &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
160 : #else
161 : dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
162 : &x[fsupc], &incx);
163 :
164 : dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
165 : &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
166 : #endif
167 : #else
168 0 : dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
169 :
170 0 : dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
171 : &x[fsupc], &work[0] );
172 : #endif
173 :
174 0 : iptr = istart + nsupc;
175 0 : for (i = 0; i < nrow; ++i, ++iptr) {
176 0 : irow = L_SUB(iptr);
177 0 : x[irow] -= work[i]; /* Scatter */
178 0 : work[i] = 0.0;
179 :
180 : }
181 : }
182 : } /* for k ... */
183 :
184 : } else {
185 : /* Form x := inv(U)*x */
186 :
187 0 : if ( U->nrow == 0 ) return 0; /* Quick return */
188 :
189 0 : for (k = Lstore->nsuper; k >= 0; k--) {
190 0 : fsupc = L_FST_SUPC(k);
191 0 : nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
192 0 : nsupc = L_FST_SUPC(k+1) - fsupc;
193 0 : luptr = L_NZ_START(fsupc);
194 :
195 0 : solve_ops += nsupc * (nsupc + 1);
196 :
197 0 : if ( nsupc == 1 ) {
198 0 : x[fsupc] /= Lval[luptr];
199 0 : for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
200 0 : irow = U_SUB(i);
201 0 : x[irow] -= x[fsupc] * Uval[i];
202 : }
203 : } else {
204 : #ifdef USE_VENDOR_BLAS
205 : #ifdef _CRAY
206 : STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
207 : &x[fsupc], &incx);
208 : #else
209 : dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
210 : &x[fsupc], &incx);
211 : #endif
212 : #else
213 0 : dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
214 : #endif
215 :
216 0 : for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
217 0 : solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
218 0 : for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
219 0 : i++) {
220 0 : irow = U_SUB(i);
221 0 : x[irow] -= x[jcol] * Uval[i];
222 : }
223 : }
224 : }
225 : } /* for k ... */
226 :
227 : }
228 : } else { /* Form x := inv(A')*x */
229 :
230 0 : if ( strncmp(uplo, "L", 1)==0 ) {
231 : /* Form x := inv(L')*x */
232 0 : if ( L->nrow == 0 ) return 0; /* Quick return */
233 :
234 0 : for (k = Lstore->nsuper; k >= 0; --k) {
235 0 : fsupc = L_FST_SUPC(k);
236 0 : istart = L_SUB_START(fsupc);
237 0 : nsupr = L_SUB_START(fsupc+1) - istart;
238 0 : nsupc = L_FST_SUPC(k+1) - fsupc;
239 0 : luptr = L_NZ_START(fsupc);
240 :
241 0 : solve_ops += 2 * (nsupr - nsupc) * nsupc;
242 :
243 0 : for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
244 0 : iptr = istart + nsupc;
245 0 : for (i = L_NZ_START(jcol) + nsupc;
246 0 : i < L_NZ_START(jcol+1); i++) {
247 0 : irow = L_SUB(iptr);
248 0 : x[jcol] -= x[irow] * Lval[i];
249 0 : iptr++;
250 : }
251 : }
252 :
253 0 : if ( nsupc > 1 ) {
254 0 : solve_ops += nsupc * (nsupc - 1);
255 : #ifdef _CRAY
256 : ftcs1 = _cptofcd("L", strlen("L"));
257 : ftcs2 = _cptofcd("T", strlen("T"));
258 : ftcs3 = _cptofcd("U", strlen("U"));
259 : STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
260 : &x[fsupc], &incx);
261 : #else
262 0 : dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
263 0 : &x[fsupc], &incx);
264 : #endif
265 : }
266 : }
267 : } else {
268 : /* Form x := inv(U')*x */
269 0 : if ( U->nrow == 0 ) return 0; /* Quick return */
270 :
271 0 : for (k = 0; k <= Lstore->nsuper; k++) {
272 0 : fsupc = L_FST_SUPC(k);
273 0 : nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
274 0 : nsupc = L_FST_SUPC(k+1) - fsupc;
275 0 : luptr = L_NZ_START(fsupc);
276 :
277 0 : for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
278 0 : solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
279 0 : for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
280 0 : irow = U_SUB(i);
281 0 : x[jcol] -= x[irow] * Uval[i];
282 : }
283 : }
284 :
285 0 : solve_ops += nsupc * (nsupc + 1);
286 :
287 0 : if ( nsupc == 1 ) {
288 0 : x[fsupc] /= Lval[luptr];
289 : } else {
290 : #ifdef _CRAY
291 : ftcs1 = _cptofcd("U", strlen("U"));
292 : ftcs2 = _cptofcd("T", strlen("T"));
293 : ftcs3 = _cptofcd("N", strlen("N"));
294 : STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
295 : &x[fsupc], &incx);
296 : #else
297 0 : dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
298 0 : &x[fsupc], &incx);
299 : #endif
300 : }
301 : } /* for k ... */
302 : }
303 : }
304 :
305 0 : stat->ops[SOLVE] += solve_ops;
306 0 : SUPERLU_FREE(work);
307 0 : return 0;
308 : }
309 :
310 :
311 :
312 : /*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
313 : *
314 : * <pre>
315 : * Purpose
316 : * =======
317 : *
318 : * sp_dgemv() performs one of the matrix-vector operations
319 : * y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
320 : * where alpha and beta are scalars, x and y are vectors and A is a
321 : * sparse A->nrow by A->ncol matrix.
322 : *
323 : * Parameters
324 : * ==========
325 : *
326 : * TRANS - (input) char*
327 : * On entry, TRANS specifies the operation to be performed as
328 : * follows:
329 : * TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
330 : * TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
331 : * TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
332 : *
333 : * ALPHA - (input) double
334 : * On entry, ALPHA specifies the scalar alpha.
335 : *
336 : * A - (input) SuperMatrix*
337 : * Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
338 : * Currently, the type of A can be:
339 : * Stype = NC or NCP; Dtype = SLU_D; Mtype = GE.
340 : * In the future, more general A can be handled.
341 : *
342 : * X - (input) double*, array of DIMENSION at least
343 : * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
344 : * and at least
345 : * ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
346 : * Before entry, the incremented array X must contain the
347 : * vector x.
348 : *
349 : * INCX - (input) int
350 : * On entry, INCX specifies the increment for the elements of
351 : * X. INCX must not be zero.
352 : *
353 : * BETA - (input) double
354 : * On entry, BETA specifies the scalar beta. When BETA is
355 : * supplied as zero then Y need not be set on input.
356 : *
357 : * Y - (output) double*, array of DIMENSION at least
358 : * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
359 : * and at least
360 : * ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
361 : * Before entry with BETA non-zero, the incremented array Y
362 : * must contain the vector y. On exit, Y is overwritten by the
363 : * updated vector y.
364 : *
365 : * INCY - (input) int
366 : * On entry, INCY specifies the increment for the elements of
367 : * Y. INCY must not be zero.
368 : *
369 : * ==== Sparse Level 2 Blas routine.
370 : * </pre>
371 : */
372 :
373 : int
374 0 : sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x,
375 : int incx, double beta, double *y, int incy)
376 : {
377 : /* Local variables */
378 : NCformat *Astore;
379 : double *Aval;
380 : int info;
381 : double temp;
382 : int lenx, leny;
383 : int iy, jx, jy, kx, ky, irow;
384 : int_t i, j;
385 : int notran;
386 :
387 0 : notran = ( strncmp(trans, "N", 1)==0 || strncmp(trans, "n", 1)==0 );
388 0 : Astore = A->Store;
389 0 : Aval = Astore->nzval;
390 :
391 : /* Test the input parameters */
392 0 : info = 0;
393 0 : if ( !notran && strncmp(trans, "T", 1)!=0 && strncmp(trans, "C", 1)!=0 )
394 0 : info = 1;
395 0 : else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
396 0 : else if (incx == 0) info = 5;
397 0 : else if (incy == 0) info = 8;
398 0 : if (info != 0) {
399 0 : input_error("sp_dgemv ", &info);
400 0 : return 0;
401 : }
402 :
403 : /* Quick return if possible. */
404 0 : if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
405 : return 0;
406 :
407 : /* Set LENX and LENY, the lengths of the vectors x and y, and set
408 : up the start points in X and Y. */
409 0 : if (strncmp(trans, "N", 1)==0) {
410 : lenx = A->ncol;
411 : leny = A->nrow;
412 : } else {
413 : lenx = A->nrow;
414 : leny = A->ncol;
415 : }
416 0 : if (incx > 0) kx = 0;
417 0 : else kx = - (lenx - 1) * incx;
418 0 : if (incy > 0) ky = 0;
419 0 : else ky = - (leny - 1) * incy;
420 :
421 : /* Start the operations. In this version the elements of A are
422 : accessed sequentially with one pass through A. */
423 : /* First form y := beta*y. */
424 0 : if (beta != 1.) {
425 0 : if (incy == 1) {
426 0 : if (beta == 0.)
427 0 : for (i = 0; i < leny; ++i) y[i] = 0.;
428 : else
429 0 : for (i = 0; i < leny; ++i) y[i] = beta * y[i];
430 : } else {
431 : iy = ky;
432 0 : if (beta == 0.)
433 0 : for (i = 0; i < leny; ++i) {
434 0 : y[iy] = 0.;
435 0 : iy += incy;
436 : }
437 : else
438 0 : for (i = 0; i < leny; ++i) {
439 0 : y[iy] = beta * y[iy];
440 0 : iy += incy;
441 : }
442 : }
443 : }
444 :
445 0 : if (alpha == 0.) return 0;
446 :
447 0 : if ( notran ) {
448 : /* Form y := alpha*A*x + y. */
449 : jx = kx;
450 0 : if (incy == 1) {
451 0 : for (j = 0; j < A->ncol; ++j) {
452 0 : if (x[jx] != 0.) {
453 0 : temp = alpha * x[jx];
454 0 : for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
455 0 : irow = Astore->rowind[i];
456 0 : y[irow] += temp * Aval[i];
457 : }
458 : }
459 0 : jx += incx;
460 : }
461 : } else {
462 0 : ABORT("Not implemented.");
463 : }
464 : } else {
465 : /* Form y := alpha*A'*x + y. */
466 : jy = ky;
467 0 : if (incx == 1) {
468 0 : for (j = 0; j < A->ncol; ++j) {
469 : temp = 0.;
470 0 : for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
471 0 : irow = Astore->rowind[i];
472 0 : temp += Aval[i] * x[irow];
473 : }
474 0 : y[jy] += alpha * temp;
475 0 : jy += incy;
476 : }
477 : } else {
478 0 : ABORT("Not implemented.");
479 : }
480 : }
481 :
482 : return 0;
483 : } /* sp_dgemv */
484 :
|