Line data Source code
1 : /*! \file
2 : Copyright (c) 2003, The Regents of the University of California, through
3 : Lawrence Berkeley National Laboratory (subject to receipt of any required
4 : approvals from U.S. Dept. of Energy)
5 :
6 : All rights reserved.
7 :
8 : The source code is distributed under BSD license, see the file License.txt
9 : at the top-level directory.
10 : */
11 :
12 : /*! @file dgstrf.c
13 : * \brief Computes an LU factorization of a general sparse matrix
14 : *
15 : * <pre>
16 : * -- SuperLU routine (version 7.0.0) --
17 : * Univ. of California Berkeley, Xerox Palo Alto Research Center,
18 : * and Lawrence Berkeley National Lab.
19 : * October 15, 2003
20 : * August 2024
21 : *
22 : * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
23 : *
24 : * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
25 : * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
26 : *
27 : * Permission is hereby granted to use or copy this program for any
28 : * purpose, provided the above notices are retained on all copies.
29 : * Permission to modify the code and to distribute modified code is
30 : * granted, provided the above notices are retained, and a notice that
31 : * the code was modified is included with the above copyright notice.
32 : * </pre>
33 : */
34 :
35 :
36 : #include "slu_ddefs.h"
37 :
38 : /*! \brief
39 : *
40 : * <pre>
41 : * Purpose
42 : * =======
43 : *
44 : * DGSTRF computes an LU factorization of a general sparse m-by-n
45 : * matrix A using partial pivoting with row interchanges.
46 : * The factorization has the form
47 : * Pr * A = L * U
48 : * where Pr is a row permutation matrix, L is lower triangular with unit
49 : * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
50 : * triangular (upper trapezoidal if A->nrow < A->ncol).
51 : *
52 : * See supermatrix.h for the definition of 'SuperMatrix' structure.
53 : *
54 : * Arguments
55 : * =========
56 : *
57 : * options (input) superlu_options_t*
58 : * The structure defines the input parameters to control
59 : * how the LU decomposition will be performed.
60 : *
61 : * A (input) SuperMatrix*
62 : * Original matrix A, permuted by columns, of dimension
63 : * (A->nrow, A->ncol). The type of A can be:
64 : * Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
65 : *
66 : * relax (input) int
67 : * To control degree of relaxing supernodes. If the number
68 : * of nodes (columns) in a subtree of the elimination tree is less
69 : * than relax, this subtree is considered as one supernode,
70 : * regardless of the row structures of those columns.
71 : *
72 : * panel_size (input) int
73 : * A panel consists of at most panel_size consecutive columns.
74 : *
75 : * etree (input) int*, dimension (A->ncol)
76 : * Elimination tree of A'*A.
77 : * Note: etree is a vector of parent pointers for a forest whose
78 : * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
79 : * On input, the columns of A should be permuted so that the
80 : * etree is in a certain postorder.
81 : *
82 : * work (input/output) void*, size (lwork) (in bytes)
83 : * User-supplied work space and space for the output data structures.
84 : * Not referenced if lwork = 0;
85 : *
86 : * lwork (input) int
87 : * Specifies the size of work array in bytes.
88 : * = 0: allocate space internally by system malloc;
89 : * > 0: use user-supplied work array of length lwork in bytes,
90 : * returns error if space runs out.
91 : * = -1: the routine guesses the amount of space needed without
92 : * performing the factorization, and returns it in
93 : * *info; no other side effects.
94 : *
95 : * perm_c (input) int*, dimension (A->ncol)
96 : * Column permutation vector, which defines the
97 : * permutation matrix Pc; perm_c[i] = j means column i of A is
98 : * in position j in A*Pc.
99 : * When searching for diagonal, perm_c[*] is applied to the
100 : * row subscripts of A, so that diagonal threshold pivoting
101 : * can find the diagonal of A, rather than that of A*Pc.
102 : *
103 : * perm_r (input/output) int*, dimension (A->nrow)
104 : * Row permutation vector which defines the permutation matrix Pr,
105 : * perm_r[i] = j means row i of A is in position j in Pr*A.
106 : * If options->Fact == SamePattern_SameRowPerm, the pivoting routine
107 : * will try to use the input perm_r, unless a certain threshold
108 : * criterion is violated. In that case, perm_r is overwritten by
109 : * a new permutation determined by partial pivoting or diagonal
110 : * threshold pivoting.
111 : * Otherwise, perm_r is output argument;
112 : *
113 : * L (output) SuperMatrix*
114 : * The factor L from the factorization Pr*A=L*U; use compressed row
115 : * subscripts storage for supernodes, i.e., L has type:
116 : * Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
117 : *
118 : * U (output) SuperMatrix*
119 : * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
120 : * storage scheme, i.e., U has types: Stype = SLU_NC,
121 : * Dtype = SLU_D, Mtype = SLU_TRU.
122 : *
123 : * Glu (input/output) GlobalLU_t *
124 : * If options->Fact == SamePattern_SameRowPerm, it is an input;
125 : * The matrix A will be factorized assuming that a
126 : * factorization of a matrix with the same sparsity pattern
127 : * and similar numerical values was performed prior to this one.
128 : * Therefore, this factorization will reuse both row and column
129 : * scaling factors R and C, both row and column permutation
130 : * vectors perm_r and perm_c, and the L & U data structures
131 : * set up from the previous factorization.
132 : * Otherwise, it is an output.
133 : *
134 : * stat (output) SuperLUStat_t*
135 : * Record the statistics on runtime and floating-point operation count.
136 : * See slu_util.h for the definition of 'SuperLUStat_t'.
137 : *
138 : * info (output) int*
139 : * = 0: successful exit
140 : * < 0: if info = -i, the i-th argument had an illegal value
141 : * > 0: if info = i, and i is
142 : * <= A->ncol: U(i,i) is exactly zero. The factorization has
143 : * been completed, but the factor U is exactly singular,
144 : * and division by zero will occur if it is used to solve a
145 : * system of equations.
146 : * > A->ncol: number of bytes allocated when memory allocation
147 : * failure occurred, plus A->ncol. If lwork = -1, it is
148 : * the estimated amount of space needed, plus A->ncol.
149 : *
150 : * ======================================================================
151 : *
152 : * Local Working Arrays:
153 : * ======================
154 : * m = number of rows in the matrix
155 : * n = number of columns in the matrix
156 : *
157 : * xprune[0:n-1]: xprune[*] points to locations in subscript
158 : * vector lsub[*]. For column i, xprune[i] denotes the point where
159 : * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
160 : * to be traversed for symbolic factorization.
161 : *
162 : * marker[0:3*m-1]: marker[i] = j means that node i has been
163 : * reached when working on column j.
164 : * Storage: relative to original row subscripts
165 : * NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
166 : * see dpanel_dfs.c; marker2 is used for inner-factorization,
167 : * see dcolumn_dfs.c.
168 : *
169 : * parent[0:m-1]: parent vector used during dfs
170 : * Storage: relative to new row subscripts
171 : *
172 : * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
173 : * unexplored neighbor of i in lsub[*]
174 : *
175 : * segrep[0:nseg-1]: contains the list of supernodal representatives
176 : * in topological order of the dfs. A supernode representative is the
177 : * last column of a supernode.
178 : * The maximum size of segrep[] is n.
179 : *
180 : * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
181 : * supernodal representative r, repfnz[r] is the location of the first
182 : * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
183 : * indicates the supernode r has been explored.
184 : * NOTE: There are W of them, each used for one column of a panel.
185 : *
186 : * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
187 : * the panel diagonal. These are filled in during dpanel_dfs(), and are
188 : * used later in the inner LU factorization within the panel.
189 : * panel_lsub[]/dense[] pair forms the SPA data structure.
190 : * NOTE: There are W of them.
191 : *
192 : * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
193 : * NOTE: there are W of them.
194 : *
195 : * tempv[0:*]: real temporary used for dense numeric kernels;
196 : * The size of this array is defined by NUM_TEMPV() in slu_ddefs.h.
197 : * </pre>
198 : */
199 :
200 : void
201 0 : dgstrf (superlu_options_t *options, SuperMatrix *A,
202 : int relax, int panel_size, int *etree, void *work, int_t lwork,
203 : int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
204 : GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */
205 : SuperLUStat_t *stat, int_t *info)
206 : {
207 : /* Local working arrays */
208 : NCPformat *Astore;
209 : int *iperm_r = NULL; /* inverse of perm_r; used when
210 : options->Fact == SamePattern_SameRowPerm */
211 : int *iperm_c; /* inverse of perm_c */
212 : int *iwork;
213 : double *dwork;
214 : int *segrep, *repfnz, *parent;
215 : int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
216 : int_t *xprune, *xplore;
217 : int *marker;
218 : double *dense, *tempv;
219 : int *relax_end;
220 : double *a;
221 : int_t *asub, *xa_begin, *xa_end;
222 : int_t *xlsub, *xlusup, *xusub;
223 : int *xsup, *supno;
224 : int_t nzlumax;
225 0 : double fill_ratio = sp_ienv(6); /* estimated fill ratio */
226 :
227 : /* Local scalars */
228 0 : fact_t fact = options->Fact;
229 0 : double diag_pivot_thresh = options->DiagPivotThresh;
230 : int pivrow; /* pivotal row number in the original matrix A */
231 : int nseg1; /* no of segments in U-column above panel row jcol */
232 : int nseg; /* no of segments in each U-column */
233 : register int jcol, jj;
234 : register int kcol; /* end column of a relaxed snode */
235 : register int icol;
236 : int_t i, k, iinfo, new_next, nextlu, nextu;
237 : int m, n, min_mn, jsupno, fsupc;
238 : int w_def; /* upper bound on panel width */
239 : int usepr, iperm_r_allocated = 0;
240 : int_t nnzL, nnzU;
241 0 : int *panel_histo = stat->panel_histo;
242 0 : flops_t *ops = stat->ops;
243 :
244 : iinfo = 0;
245 0 : m = A->nrow;
246 0 : n = A->ncol;
247 0 : min_mn = SUPERLU_MIN(m, n);
248 0 : Astore = A->Store;
249 0 : a = Astore->nzval;
250 0 : asub = Astore->rowind;
251 0 : xa_begin = Astore->colbeg;
252 0 : xa_end = Astore->colend;
253 :
254 : /* Allocate storage common to the factor routines */
255 0 : *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz,
256 : panel_size, fill_ratio, L, U, Glu, &iwork, &dwork);
257 0 : if ( *info ) return;
258 :
259 0 : xsup = Glu->xsup;
260 0 : supno = Glu->supno;
261 0 : xlsub = Glu->xlsub;
262 0 : xlusup = Glu->xlusup;
263 0 : xusub = Glu->xusub;
264 :
265 0 : SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
266 : &repfnz, &panel_lsub, &xprune, &marker);
267 0 : dSetRWork(m, panel_size, dwork, &dense, &tempv);
268 :
269 0 : usepr = (fact == SamePattern_SameRowPerm);
270 0 : if ( usepr ) {
271 : /* Compute the inverse of perm_r */
272 0 : iperm_r = (int *) int32Malloc(m);
273 0 : for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
274 : iperm_r_allocated = 1;
275 : }
276 0 : iperm_c = (int *) int32Malloc(n);
277 0 : for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
278 :
279 : /* Identify relaxed snodes */
280 0 : relax_end = (int *) intMalloc(n);
281 0 : if ( options->SymmetricMode == YES ) {
282 0 : heap_relax_snode(n, etree, relax, marker, relax_end);
283 : } else {
284 0 : relax_snode(n, etree, relax, marker, relax_end);
285 : }
286 :
287 0 : ifill (perm_r, m, SLU_EMPTY);
288 0 : ifill (marker, m * NO_MARKER, SLU_EMPTY);
289 0 : supno[0] = -1;
290 0 : xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
291 : w_def = panel_size;
292 :
293 : /*
294 : * Work on one "panel" at a time. A panel is one of the following:
295 : * (a) a relaxed supernode at the bottom of the etree, or
296 : * (b) panel_size contiguous columns, defined by the user
297 : */
298 0 : for (jcol = 0; jcol < min_mn; ) {
299 :
300 0 : if ( relax_end[jcol] != SLU_EMPTY ) { /* start of a relaxed snode */
301 : kcol = relax_end[jcol]; /* end of the relaxed snode */
302 0 : panel_histo[kcol-jcol+1]++;
303 :
304 : /* --------------------------------------
305 : * Factorize the relaxed supernode(jcol:kcol)
306 : * -------------------------------------- */
307 : /* Determine the union of the row structure of the snode */
308 0 : if ( (*info = dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
309 : xprune, marker, Glu)) != 0 )
310 : return;
311 :
312 0 : nextu = xusub[jcol];
313 0 : nextlu = xlusup[jcol];
314 0 : jsupno = supno[jcol];
315 0 : fsupc = xsup[jsupno];
316 0 : new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
317 0 : nzlumax = Glu->nzlumax;
318 0 : while ( new_next > nzlumax ) {
319 0 : if ( (*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)) )
320 : return;
321 : }
322 :
323 0 : for (icol = jcol; icol<= kcol; icol++) {
324 0 : xusub[icol+1] = nextu;
325 :
326 : /* Scatter into SPA dense[*] */
327 0 : for (k = xa_begin[icol]; k < xa_end[icol]; k++)
328 0 : dense[asub[k]] = a[k];
329 :
330 : /* Numeric update within the snode */
331 0 : dsnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat);
332 :
333 0 : if ( (*info = dpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
334 : iperm_r, iperm_c, &pivrow, Glu, stat)) )
335 0 : if ( iinfo == 0 ) iinfo = *info;
336 :
337 : #if ( DEBUGlevel>=2 )
338 : dprint_lu_col("[1]: ", icol, pivrow, xprune, Glu);
339 : #endif
340 :
341 : }
342 :
343 : jcol = icol;
344 :
345 : } else { /* Work on one panel of panel_size columns */
346 :
347 : /* Adjust panel_size so that a panel won't overlap with the next
348 : * relaxed snode.
349 : */
350 : panel_size = w_def;
351 0 : for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
352 0 : if ( relax_end[k] != SLU_EMPTY ) {
353 0 : panel_size = k - jcol;
354 0 : break;
355 : }
356 0 : if ( k == min_mn ) panel_size = min_mn - jcol;
357 0 : panel_histo[panel_size]++;
358 :
359 : /* symbolic factor on a panel of columns */
360 0 : dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
361 : dense, panel_lsub, segrep, repfnz, xprune,
362 : marker, parent, xplore, Glu);
363 :
364 : /* numeric sup-panel updates in topological order */
365 0 : dpanel_bmod(m, panel_size, jcol, nseg1, dense,
366 : tempv, segrep, repfnz, Glu, stat);
367 :
368 : /* Sparse LU within the panel, and below panel diagonal */
369 0 : for ( jj = jcol; jj < jcol + panel_size; jj++) {
370 0 : k = (jj - jcol) * m; /* column index for w-wide arrays */
371 :
372 0 : nseg = nseg1; /* Begin after all the panel segments */
373 :
374 0 : if ((*info = dcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
375 0 : segrep, &repfnz[k], xprune, marker,
376 : parent, xplore, Glu)) != 0) return;
377 :
378 : /* Numeric updates */
379 0 : if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k],
380 0 : tempv, &segrep[nseg1], &repfnz[k],
381 : jcol, Glu, stat)) != 0) return;
382 :
383 : /* Copy the U-segments to ucol[*] */
384 0 : if ((*info = dcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
385 : perm_r, &dense[k], Glu)) != 0)
386 : return;
387 :
388 0 : if ( (*info = dpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
389 : iperm_r, iperm_c, &pivrow, Glu, stat)) )
390 0 : if ( iinfo == 0 ) iinfo = *info;
391 :
392 : /* Prune columns (0:jj-1) using column jj */
393 0 : dpruneL(jj, perm_r, pivrow, nseg, segrep,
394 0 : &repfnz[k], xprune, Glu);
395 :
396 : /* Reset repfnz[] for this column */
397 0 : resetrep_col (nseg, segrep, &repfnz[k]);
398 :
399 : #if ( DEBUGlevel>=2 )
400 : dprint_lu_col("[2]: ", jj, pivrow, xprune, Glu);
401 : #endif
402 :
403 : }
404 :
405 : jcol += panel_size; /* Move to the next panel */
406 :
407 : } /* else */
408 :
409 : } /* for */
410 :
411 0 : *info = iinfo;
412 :
413 : /* Complete perm_r[] for rank-deficient or tall-skinny matrices */
414 : /* k is the rank of U
415 : pivots have been completed for rows < k
416 : Now fill in the pivots for rows k to m */
417 0 : k = iinfo == 0 ? n : (int)iinfo - 1;
418 0 : if (m > k) {
419 : /* if k == m, then all the row permutations are complete and
420 : we can short circuit looking through the rest of the vector */
421 0 : for (i = 0; i < m && k < m; ++i) {
422 0 : if (perm_r[i] == SLU_EMPTY) {
423 0 : perm_r[i] = k;
424 0 : ++k;
425 : }
426 :
427 : }
428 : }
429 :
430 0 : countnz(min_mn, xprune, &nnzL, &nnzU, Glu);
431 0 : fixupL(min_mn, perm_r, Glu);
432 :
433 0 : dLUWorkFree(iwork, dwork, Glu); /* Free work space and compress storage */
434 0 : SUPERLU_FREE(xplore);
435 0 : SUPERLU_FREE(xprune);
436 :
437 0 : if ( fact == SamePattern_SameRowPerm ) {
438 : /* L and U structures may have changed due to possibly different
439 : pivoting, even though the storage is available.
440 : There could also be memory expansions, so the array locations
441 : may have changed, */
442 0 : ((SCformat *)L->Store)->nnz = nnzL;
443 0 : ((SCformat *)L->Store)->nsuper = Glu->supno[n];
444 0 : ((SCformat *)L->Store)->nzval = (double *) Glu->lusup;
445 0 : ((SCformat *)L->Store)->nzval_colptr = Glu->xlusup;
446 0 : ((SCformat *)L->Store)->rowind = Glu->lsub;
447 0 : ((SCformat *)L->Store)->rowind_colptr = Glu->xlsub;
448 0 : ((NCformat *)U->Store)->nnz = nnzU;
449 0 : ((NCformat *)U->Store)->nzval = (double *) Glu->ucol;
450 0 : ((NCformat *)U->Store)->rowind = Glu->usub;
451 0 : ((NCformat *)U->Store)->colptr = Glu->xusub;
452 : } else {
453 0 : dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL,
454 0 : (double *) Glu->lusup, Glu->xlusup,
455 : Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup,
456 : SLU_SC, SLU_D, SLU_TRLU);
457 0 : dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU,
458 0 : (double *) Glu->ucol, Glu->usub, Glu->xusub,
459 : SLU_NC, SLU_D, SLU_TRU);
460 : }
461 :
462 0 : ops[FACT] += ops[TRSV] + ops[GEMV];
463 0 : stat->expansions = --(Glu->num_expansions);
464 :
465 0 : if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
466 0 : SUPERLU_FREE (iperm_c);
467 0 : SUPERLU_FREE (relax_end);
468 :
469 : }
|