Line data Source code
1 : /*! \file
2 : Copyright (c) 2003, The Regents of the University of California, through
3 : Lawrence Berkeley National Laboratory (subject to receipt of any required
4 : approvals from U.S. Dept. of Energy)
5 :
6 : All rights reserved.
7 :
8 : The source code is distributed under BSD license, see the file License.txt
9 : at the top-level directory.
10 : */
11 :
12 : /*! @file dmemory.c
13 : * \brief Memory details
14 : *
15 : * <pre>
16 : * -- SuperLU routine (version 7.0.0) --
17 : * Lawrence Berkeley National Laboratory.
18 : * June 30, 2009
19 : * August 2024
20 : * </pre>
21 : */
22 : #include "slu_ddefs.h"
23 :
24 :
25 : /* Internal prototypes */
26 : void *dexpand (int_t *, MemType, int_t, int, GlobalLU_t *);
27 : int dLUWorkInit (int, int, int, int **, double **, GlobalLU_t *);
28 : void copy_mem_double (int_t, void *, void *);
29 : void dStackCompress (GlobalLU_t *);
30 : void dSetupSpace (void *, int_t, GlobalLU_t *);
31 : void *duser_malloc (int, int, GlobalLU_t *);
32 : void duser_free (int, int, GlobalLU_t *);
33 :
34 : /* External prototypes (in memory.c - prec-independent) */
35 : extern void copy_mem_int (int, void *, void *);
36 : extern void user_bcopy (char *, char *, int);
37 :
38 :
39 : /* Macros to manipulate stack */
40 : #define StackFull(x) ( x + Glu->stack.used >= Glu->stack.size )
41 : #define NotDoubleAlign(addr) ( (intptr_t)addr & 7 )
42 : #define DoubleAlign(addr) ( ((intptr_t)addr + 7) & ~7L )
43 : #define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
44 : (w + 1) * m * sizeof(double) )
45 : #define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
46 :
47 :
48 :
49 :
50 : /*! \brief Setup the memory model to be used for factorization.
51 : *
52 : * lwork = 0: use system malloc;
53 : * lwork > 0: use user-supplied work[] space.
54 : */
55 0 : void dSetupSpace(void *work, int_t lwork, GlobalLU_t *Glu)
56 : {
57 0 : if ( lwork == 0 ) {
58 0 : Glu->MemModel = SYSTEM; /* malloc/free */
59 0 : } else if ( lwork > 0 ) {
60 0 : Glu->MemModel = USER; /* user provided space */
61 0 : Glu->stack.used = 0;
62 0 : Glu->stack.top1 = 0;
63 0 : Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */
64 0 : Glu->stack.size = Glu->stack.top2;
65 0 : Glu->stack.array = (void *) work;
66 : }
67 0 : }
68 :
69 :
70 :
71 0 : void *duser_malloc(int bytes, int which_end, GlobalLU_t *Glu)
72 : {
73 : void *buf;
74 :
75 0 : if ( StackFull(bytes) ) return (NULL);
76 :
77 0 : if ( which_end == HEAD ) {
78 0 : buf = (char*) Glu->stack.array + Glu->stack.top1;
79 0 : Glu->stack.top1 += bytes;
80 : } else {
81 0 : Glu->stack.top2 -= bytes;
82 0 : buf = (char*) Glu->stack.array + Glu->stack.top2;
83 : }
84 :
85 0 : Glu->stack.used += bytes;
86 0 : return buf;
87 : }
88 :
89 :
90 0 : void duser_free(int bytes, int which_end, GlobalLU_t *Glu)
91 : {
92 0 : if ( which_end == HEAD ) {
93 0 : Glu->stack.top1 -= bytes;
94 : } else {
95 0 : Glu->stack.top2 += bytes;
96 : }
97 0 : Glu->stack.used -= bytes;
98 0 : }
99 :
100 :
101 :
102 : /*!
103 : * Calculate memory usage
104 : *
105 : * \param mem_usage consists of the following fields:
106 : * - <tt>for_lu (float)</tt>
107 : * The amount of space used in bytes for the L\\U data structures.
108 : * - <tt>total_needed (float)</tt>
109 : * The amount of space needed in bytes to perform factorization.
110 : */
111 0 : int dQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
112 : {
113 : SCformat *Lstore;
114 : NCformat *Ustore;
115 0 : register int n, iword, dword, panel_size = sp_ienv(1);
116 :
117 0 : Lstore = L->Store;
118 0 : Ustore = U->Store;
119 0 : n = L->ncol;
120 : iword = sizeof(int);
121 : dword = sizeof(double);
122 :
123 : /* For LU factors */
124 0 : mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword +
125 0 : Lstore->nzval_colptr[n] * dword +
126 0 : Lstore->rowind_colptr[n] * iword );
127 0 : mem_usage->for_lu += (float)( (n + 1.0) * iword +
128 0 : Ustore->colptr[n] * (dword + iword) );
129 :
130 : /* Working storage to support factorization */
131 0 : mem_usage->total_needed = mem_usage->for_lu +
132 0 : (float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword +
133 0 : (panel_size + 1.0) * n * dword );
134 :
135 0 : return 0;
136 : } /* dQuerySpace */
137 :
138 :
139 : /*!
140 : * Calculate memory usage
141 : *
142 : * \param mem_usage consists of the following fields:
143 : * - <tt>for_lu (float)</tt>
144 : * The amount of space used in bytes for the L\\U data structures.
145 : * - <tt>total_needed (float)</tt>
146 : * The amount of space needed in bytes to perform factorization.
147 : *
148 : */
149 0 : int ilu_dQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
150 : {
151 : SCformat *Lstore;
152 : NCformat *Ustore;
153 0 : register int n, panel_size = sp_ienv(1);
154 : register float iword, dword;
155 :
156 0 : Lstore = L->Store;
157 0 : Ustore = U->Store;
158 0 : n = L->ncol;
159 : iword = sizeof(int);
160 : dword = sizeof(double);
161 :
162 : /* For LU factors */
163 0 : mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword +
164 0 : Lstore->nzval_colptr[n] * dword +
165 0 : Lstore->rowind_colptr[n] * iword );
166 0 : mem_usage->for_lu += (float)( (n + 1.0f) * iword +
167 0 : Ustore->colptr[n] * (dword + iword) );
168 :
169 : /* Working storage to support factorization.
170 : ILU needs 5*n more integers than LU */
171 0 : mem_usage->total_needed = mem_usage->for_lu +
172 0 : (float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword +
173 0 : (panel_size + 1.0f) * n * dword );
174 :
175 0 : return 0;
176 : } /* ilu_dQuerySpace */
177 :
178 :
179 : /*! \brief Allocate storage for the data structures common to all factor routines.
180 : *
181 : * <pre>
182 : * For those unpredictable size, estimate as fill_ratio * nnz(A).
183 : * Return value:
184 : * If lwork = -1, return the estimated amount of space required, plus n;
185 : * otherwise, return the amount of space actually allocated when
186 : * memory allocation failure occurred.
187 : * </pre>
188 : */
189 : int_t
190 0 : dLUMemInit(fact_t fact, void *work, int_t lwork, int m, int n, int_t annz,
191 : int panel_size, double fill_ratio, SuperMatrix *L, SuperMatrix *U,
192 : GlobalLU_t *Glu, int **iwork, double **dwork)
193 : {
194 : int info, iword, dword;
195 : SCformat *Lstore;
196 : NCformat *Ustore;
197 : int *xsup, *supno;
198 : int_t *lsub, *xlsub;
199 : double *lusup;
200 : int_t *xlusup;
201 : double *ucol;
202 : int_t *usub, *xusub;
203 : int_t nzlmax, nzumax, nzlumax;
204 :
205 : iword = sizeof(int);
206 : dword = sizeof(double);
207 0 : Glu->n = n;
208 0 : Glu->num_expansions = 0;
209 :
210 0 : Glu->expanders = (ExpHeader *) SUPERLU_MALLOC( NO_MEMTYPE *
211 : sizeof(ExpHeader) );
212 0 : if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
213 :
214 0 : if ( fact != SamePattern_SameRowPerm ) {
215 : /* Guess for L\U factors */
216 0 : nzumax = nzlumax = nzlmax = fill_ratio * annz;
217 : //nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz;
218 :
219 0 : if ( lwork == -1 ) {
220 0 : return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
221 0 : + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
222 : } else {
223 0 : dSetupSpace(work, lwork, Glu);
224 : }
225 :
226 : #if ( PRNTlevel >= 1 )
227 : printf("dLUMemInit() called: fill_ratio %.0f, nzlmax %lld, nzumax %lld\n",
228 : fill_ratio, (long long) nzlmax, (long long) nzumax);
229 : fflush(stdout);
230 : #endif
231 :
232 : /* Integer pointers for L\U factors */
233 0 : if ( Glu->MemModel == SYSTEM ) {
234 0 : xsup = int32Malloc(n+1);
235 0 : supno = int32Malloc(n+1);
236 0 : xlsub = intMalloc(n+1);
237 0 : xlusup = intMalloc(n+1);
238 0 : xusub = intMalloc(n+1);
239 : } else {
240 0 : xsup = (int *)duser_malloc((n+1) * iword, HEAD, Glu);
241 0 : supno = (int *)duser_malloc((n+1) * iword, HEAD, Glu);
242 0 : xlsub = duser_malloc((n+1) * iword, HEAD, Glu);
243 0 : xlusup = duser_malloc((n+1) * iword, HEAD, Glu);
244 0 : xusub = duser_malloc((n+1) * iword, HEAD, Glu);
245 : }
246 :
247 0 : lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
248 0 : ucol = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
249 0 : lsub = (int_t *) dexpand( &nzlmax, LSUB, 0, 0, Glu );
250 0 : usub = (int_t *) dexpand( &nzumax, USUB, 0, 1, Glu );
251 :
252 0 : while ( !lusup || !ucol || !lsub || !usub ) {
253 0 : if ( Glu->MemModel == SYSTEM ) {
254 0 : SUPERLU_FREE(lusup);
255 0 : SUPERLU_FREE(ucol);
256 0 : SUPERLU_FREE(lsub);
257 0 : SUPERLU_FREE(usub);
258 : } else {
259 0 : duser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword,
260 : HEAD, Glu);
261 : }
262 0 : nzlumax /= 2;
263 0 : nzumax /= 2;
264 0 : nzlmax /= 2;
265 0 : if ( nzlumax < annz ) {
266 : printf("Not enough memory to perform factorization.\n");
267 0 : return (dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
268 : }
269 : #if ( PRNTlevel >= 1)
270 : printf("dLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
271 : (long) nzlmax, (long) nzumax);
272 : fflush(stdout);
273 : #endif
274 0 : lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
275 0 : ucol = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
276 0 : lsub = (int_t *) dexpand( &nzlmax, LSUB, 0, 0, Glu );
277 0 : usub = (int_t *) dexpand( &nzumax, USUB, 0, 1, Glu );
278 : }
279 :
280 : } else {
281 : /* fact == SamePattern_SameRowPerm */
282 0 : Lstore = L->Store;
283 0 : Ustore = U->Store;
284 0 : xsup = Lstore->sup_to_col;
285 0 : supno = Lstore->col_to_sup;
286 0 : xlsub = Lstore->rowind_colptr;
287 0 : xlusup = Lstore->nzval_colptr;
288 0 : xusub = Ustore->colptr;
289 0 : nzlmax = Glu->nzlmax; /* max from previous factorization */
290 0 : nzumax = Glu->nzumax;
291 0 : nzlumax = Glu->nzlumax;
292 :
293 0 : if ( lwork == -1 ) {
294 0 : return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
295 0 : + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
296 0 : } else if ( lwork == 0 ) {
297 0 : Glu->MemModel = SYSTEM;
298 : } else {
299 0 : Glu->MemModel = USER;
300 0 : Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */
301 0 : Glu->stack.size = Glu->stack.top2;
302 : }
303 :
304 0 : lsub = Glu->expanders[LSUB].mem = Lstore->rowind;
305 0 : lusup = Glu->expanders[LUSUP].mem = Lstore->nzval;
306 0 : usub = Glu->expanders[USUB].mem = Ustore->rowind;
307 0 : ucol = Glu->expanders[UCOL].mem = Ustore->nzval;;
308 0 : Glu->expanders[LSUB].size = nzlmax;
309 0 : Glu->expanders[LUSUP].size = nzlumax;
310 0 : Glu->expanders[USUB].size = nzumax;
311 0 : Glu->expanders[UCOL].size = nzumax;
312 : }
313 :
314 0 : Glu->xsup = xsup;
315 0 : Glu->supno = supno;
316 0 : Glu->lsub = lsub;
317 0 : Glu->xlsub = xlsub;
318 0 : Glu->lusup = (void *) lusup;
319 0 : Glu->xlusup = xlusup;
320 0 : Glu->ucol = (void *) ucol;
321 0 : Glu->usub = usub;
322 0 : Glu->xusub = xusub;
323 0 : Glu->nzlmax = nzlmax;
324 0 : Glu->nzumax = nzumax;
325 0 : Glu->nzlumax = nzlumax;
326 :
327 0 : info = dLUWorkInit(m, n, panel_size, iwork, dwork, Glu);
328 0 : if ( info )
329 0 : return ( info + dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
330 :
331 0 : ++Glu->num_expansions;
332 0 : return 0;
333 :
334 : } /* dLUMemInit */
335 :
336 : /*! \brief Allocate known working storage.
337 : * Returns 0 if success, otherwise
338 : * returns the number of bytes allocated so far when failure occurred.
339 : */
340 : int
341 0 : dLUWorkInit(int m, int n, int panel_size, int **iworkptr,
342 : double **dworkptr, GlobalLU_t *Glu)
343 : {
344 : int isize, dsize, extra;
345 : double *old_ptr;
346 0 : int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
347 0 : rowblk = sp_ienv(4);
348 :
349 : /* xplore[m] and xprune[n] can be 64-bit; they are allocated separately */
350 : //isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
351 0 : isize = ( (2 * panel_size + 2 + NO_MARKER ) * m ) * sizeof(int);
352 0 : dsize = (m * panel_size +
353 0 : NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double);
354 :
355 0 : if ( Glu->MemModel == SYSTEM )
356 0 : *iworkptr = (int *) int32Calloc(isize/sizeof(int));
357 : else
358 0 : *iworkptr = (int *) duser_malloc(isize, TAIL, Glu);
359 0 : if ( ! *iworkptr ) {
360 0 : fprintf(stderr, "dLUWorkInit: malloc fails for local iworkptr[]\n");
361 0 : return (isize + n);
362 : }
363 :
364 0 : if ( Glu->MemModel == SYSTEM )
365 0 : *dworkptr = (double *) SUPERLU_MALLOC(dsize);
366 : else {
367 0 : *dworkptr = (double *) duser_malloc(dsize, TAIL, Glu);
368 0 : if ( NotDoubleAlign(*dworkptr) ) {
369 : old_ptr = *dworkptr;
370 0 : *dworkptr = (double*) DoubleAlign(*dworkptr);
371 0 : *dworkptr = (double*) ((double*)*dworkptr - 1);
372 0 : extra = (char*)old_ptr - (char*)*dworkptr;
373 : #if ( DEBUGlevel>=1 )
374 : printf("dLUWorkInit: not aligned, extra %d\n", extra); fflush(stdout);
375 : #endif
376 0 : Glu->stack.top2 -= extra;
377 0 : Glu->stack.used += extra;
378 : }
379 : }
380 0 : if ( ! *dworkptr ) {
381 0 : fprintf(stderr, "malloc fails for local dworkptr[].");
382 0 : return (isize + dsize + n);
383 : }
384 :
385 : return 0;
386 : } /* end dLUWorkInit */
387 :
388 :
389 : /*! \brief Set up pointers for real working arrays.
390 : */
391 : void
392 0 : dSetRWork(int m, int panel_size, double *dworkptr,
393 : double **dense, double **tempv)
394 : {
395 : double zero = 0.0;
396 :
397 0 : int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
398 0 : rowblk = sp_ienv(4);
399 0 : *dense = dworkptr;
400 0 : *tempv = *dense + panel_size*m;
401 0 : dfill (*dense, m * panel_size, zero);
402 0 : dfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
403 0 : }
404 :
405 : /*! \brief Free the working storage used by factor routines.
406 : */
407 0 : void dLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu)
408 : {
409 0 : if ( Glu->MemModel == SYSTEM ) {
410 0 : SUPERLU_FREE (iwork);
411 0 : SUPERLU_FREE (dwork);
412 : } else {
413 0 : Glu->stack.used -= (Glu->stack.size - Glu->stack.top2);
414 0 : Glu->stack.top2 = Glu->stack.size;
415 : /* dStackCompress(Glu); */
416 : }
417 :
418 0 : SUPERLU_FREE (Glu->expanders);
419 0 : Glu->expanders = NULL;
420 0 : }
421 :
422 : /*! \brief Expand the data structures for L and U during the factorization.
423 : *
424 : * <pre>
425 : * Return value: 0 - successful return
426 : * > 0 - number of bytes allocated when run out of space
427 : * </pre>
428 : */
429 : int_t
430 0 : dLUMemXpand(int jcol,
431 : int_t next, /* number of elements currently in the factors */
432 : MemType mem_type, /* which type of memory to expand */
433 : int_t *maxlen, /* modified - maximum length of a data structure */
434 : GlobalLU_t *Glu /* modified - global LU data structures */
435 : )
436 : {
437 : void *new_mem;
438 :
439 : #if ( DEBUGlevel>=1 )
440 : printf("dLUMemXpand[1]: jcol %d, next %lld, maxlen %lld, MemType %d\n",
441 : jcol, (long long) next, (long long) *maxlen, mem_type);
442 : #endif
443 :
444 0 : if (mem_type == USUB)
445 0 : new_mem = dexpand(maxlen, mem_type, next, 1, Glu);
446 : else
447 0 : new_mem = dexpand(maxlen, mem_type, next, 0, Glu);
448 :
449 0 : if ( !new_mem ) {
450 0 : int_t nzlmax = Glu->nzlmax;
451 0 : int_t nzumax = Glu->nzumax;
452 0 : int_t nzlumax = Glu->nzlumax;
453 0 : fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
454 0 : return (dmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
455 : }
456 :
457 0 : switch ( mem_type ) {
458 0 : case LUSUP:
459 0 : Glu->lusup = (void *) new_mem;
460 0 : Glu->nzlumax = *maxlen;
461 0 : break;
462 0 : case UCOL:
463 0 : Glu->ucol = (void *) new_mem;
464 0 : Glu->nzumax = *maxlen;
465 0 : break;
466 0 : case LSUB:
467 0 : Glu->lsub = (int_t *) new_mem;
468 0 : Glu->nzlmax = *maxlen;
469 0 : break;
470 0 : case USUB:
471 0 : Glu->usub = (int_t *) new_mem;
472 0 : Glu->nzumax = *maxlen;
473 0 : break;
474 : default: break;
475 : }
476 :
477 : return 0;
478 :
479 : }
480 :
481 :
482 : void
483 0 : copy_mem_double(int_t howmany, void *old, void *new)
484 : {
485 : register int_t i;
486 : double *dold = old;
487 : double *dnew = new;
488 0 : for (i = 0; i < howmany; i++) dnew[i] = dold[i];
489 0 : }
490 :
491 : /*! \brief Expand the existing storage to accommodate more fill-ins.
492 : */
493 : void
494 0 : *dexpand (
495 : int_t *prev_len, /* length used from previous call */
496 : MemType type, /* which part of the memory to expand */
497 : int_t len_to_copy, /* size of the memory to be copied to new store */
498 : int keep_prev, /* = 1: use prev_len;
499 : = 0: compute new_len to expand */
500 : GlobalLU_t *Glu /* modified - global LU data structures */
501 : )
502 : {
503 : float EXPAND = 1.5;
504 : float alpha;
505 : void *new_mem, *old_mem;
506 : int_t new_len, bytes_to_copy;
507 : int tries, lword, extra;
508 0 : ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */
509 :
510 : alpha = EXPAND;
511 :
512 0 : if ( Glu->num_expansions == 0 || keep_prev ) {
513 : /* First time allocate requested */
514 0 : new_len = *prev_len;
515 : } else {
516 0 : new_len = alpha * *prev_len;
517 : }
518 :
519 0 : if ( type == LSUB || type == USUB ) lword = sizeof(int_t);
520 : else lword = sizeof(double);
521 :
522 0 : if ( Glu->MemModel == SYSTEM ) {
523 0 : new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
524 0 : if ( Glu->num_expansions != 0 ) {
525 : tries = 0;
526 0 : if ( keep_prev ) {
527 0 : if ( !new_mem ) return (NULL);
528 : } else {
529 0 : while ( !new_mem ) {
530 0 : if ( ++tries > 10 ) return (NULL);
531 0 : alpha = Reduce(alpha);
532 0 : new_len = alpha * *prev_len;
533 0 : new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
534 : }
535 : }
536 0 : if ( type == LSUB || type == USUB ) {
537 0 : copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
538 : } else {
539 0 : copy_mem_double(len_to_copy, expanders[type].mem, new_mem);
540 : }
541 0 : SUPERLU_FREE (expanders[type].mem);
542 : }
543 0 : expanders[type].mem = (void *) new_mem;
544 :
545 : } else { /* MemModel == USER */
546 :
547 0 : if ( Glu->num_expansions == 0 ) { /* First time initialization */
548 :
549 0 : new_mem = duser_malloc(new_len * lword, HEAD, Glu);
550 0 : if ( NotDoubleAlign(new_mem) &&
551 : (type == LUSUP || type == UCOL) ) {
552 : old_mem = new_mem;
553 0 : new_mem = (void *)DoubleAlign(new_mem);
554 0 : extra = (char*)new_mem - (char*)old_mem;
555 : #if ( DEBUGlevel>=1 )
556 : printf("expand(): not aligned, extra %d\n", extra);
557 : #endif
558 0 : Glu->stack.top1 += extra;
559 0 : Glu->stack.used += extra;
560 : }
561 :
562 0 : expanders[type].mem = (void *) new_mem;
563 :
564 : } else { /* CASE: num_expansions != 0 */
565 :
566 : tries = 0;
567 0 : extra = (new_len - *prev_len) * lword;
568 0 : if ( keep_prev ) {
569 0 : if ( StackFull(extra) ) return (NULL);
570 : } else {
571 0 : while ( StackFull(extra) ) {
572 0 : if ( ++tries > 10 ) return (NULL);
573 0 : alpha = Reduce(alpha);
574 0 : new_len = alpha * *prev_len;
575 0 : extra = (new_len - *prev_len) * lword;
576 : }
577 : }
578 :
579 : /* Need to expand the memory: moving the content after the current MemType
580 : to make extra room for the current MemType.
581 : Memory layout: [ LUSUP || UCOL || LSUB || USUB ]
582 : */
583 0 : if ( type != USUB ) {
584 0 : new_mem = (void*)((char*)expanders[type + 1].mem + extra);
585 0 : bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1
586 0 : - (char*)expanders[type + 1].mem;
587 0 : user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
588 :
589 0 : if ( type < USUB ) {
590 0 : Glu->usub = expanders[USUB].mem =
591 0 : (void*)((char*)expanders[USUB].mem + extra);
592 : }
593 0 : if ( type < LSUB ) {
594 0 : Glu->lsub = expanders[LSUB].mem =
595 0 : (void*)((char*)expanders[LSUB].mem + extra);
596 : }
597 0 : if ( type < UCOL ) {
598 0 : Glu->ucol = expanders[UCOL].mem =
599 0 : (void*)((char*)expanders[UCOL].mem + extra);
600 : }
601 0 : Glu->stack.top1 += extra;
602 0 : Glu->stack.used += extra;
603 0 : if ( type == UCOL ) {
604 0 : Glu->stack.top1 += extra; /* Add same amount for USUB */
605 0 : Glu->stack.used += extra;
606 : }
607 :
608 : } /* end expansion */
609 :
610 : } /* else ... */
611 : }
612 :
613 0 : expanders[type].size = new_len;
614 0 : *prev_len = new_len;
615 0 : if ( Glu->num_expansions ) ++Glu->num_expansions;
616 :
617 0 : return (void *) expanders[type].mem;
618 :
619 : } /* dexpand */
620 :
621 :
622 : /*! \brief Compress the work[] array to remove fragmentation.
623 : */
624 : void
625 0 : dStackCompress(GlobalLU_t *Glu)
626 : {
627 : register int iword, dword, ndim;
628 : char *last, *fragment;
629 : int_t *ifrom, *ito;
630 : double *dfrom, *dto;
631 : int_t *xlsub, *lsub, *xusub, *usub, *xlusup;
632 : double *ucol, *lusup;
633 :
634 : iword = sizeof(int);
635 : dword = sizeof(double);
636 0 : ndim = Glu->n;
637 :
638 0 : xlsub = Glu->xlsub;
639 0 : lsub = Glu->lsub;
640 0 : xusub = Glu->xusub;
641 0 : usub = Glu->usub;
642 0 : xlusup = Glu->xlusup;
643 0 : ucol = Glu->ucol;
644 0 : lusup = Glu->lusup;
645 :
646 : dfrom = ucol;
647 0 : dto = (double *)((char*)lusup + xlusup[ndim] * dword);
648 0 : copy_mem_double(xusub[ndim], dfrom, dto);
649 : ucol = dto;
650 :
651 : ifrom = lsub;
652 0 : ito = (int_t *) ((char*)ucol + xusub[ndim] * iword);
653 0 : copy_mem_int(xlsub[ndim], ifrom, ito);
654 : lsub = ito;
655 :
656 : ifrom = usub;
657 0 : ito = (int_t *) ((char*)lsub + xlsub[ndim] * iword);
658 0 : copy_mem_int(xusub[ndim], ifrom, ito);
659 : usub = ito;
660 :
661 0 : last = (char*)usub + xusub[ndim] * iword;
662 0 : fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last);
663 0 : Glu->stack.used -= (long int) fragment;
664 0 : Glu->stack.top1 -= (long int) fragment;
665 :
666 0 : Glu->ucol = ucol;
667 0 : Glu->lsub = lsub;
668 0 : Glu->usub = usub;
669 :
670 : #if ( DEBUGlevel>=1 )
671 : printf("dStackCompress: fragment %lld\n", (long long) fragment);
672 : /* for (last = 0; last < ndim; ++last)
673 : print_lu_col("After compress:", last, 0);*/
674 : #endif
675 :
676 0 : }
677 :
678 : /*! \brief Allocate storage for original matrix A
679 : */
680 : void
681 0 : dallocateA(int n, int_t nnz, double **a, int_t **asub, int_t **xa)
682 : {
683 0 : *a = (double *) doubleMalloc(nnz);
684 0 : *asub = (int_t *) intMalloc(nnz);
685 0 : *xa = (int_t *) intMalloc(n+1);
686 0 : }
687 :
688 :
689 0 : double *doubleMalloc(size_t n)
690 : {
691 : double *buf;
692 0 : buf = (double *) SUPERLU_MALLOC(n * (size_t) sizeof(double));
693 0 : if ( !buf ) {
694 0 : ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
695 : }
696 0 : return (buf);
697 : }
698 :
699 0 : double *doubleCalloc(size_t n)
700 : {
701 : double *buf;
702 : register size_t i;
703 : double zero = 0.0;
704 0 : buf = (double *) SUPERLU_MALLOC(n * (size_t) sizeof(double));
705 0 : if ( !buf ) {
706 0 : ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
707 : }
708 0 : for (i = 0; i < n; ++i) buf[i] = zero;
709 0 : return (buf);
710 : }
711 :
712 :
713 0 : int_t dmemory_usage(const int_t nzlmax, const int_t nzumax,
714 : const int_t nzlumax, const int n)
715 : {
716 : register int iword, liword, dword;
717 :
718 : iword = sizeof(int);
719 : liword = sizeof(int_t);
720 : dword = sizeof(double);
721 :
722 0 : return (10 * n * iword +
723 0 : nzlmax * liword + nzumax * (liword + dword) + nzlumax * dword);
724 :
725 : }
|