Actual source code: spbas_cholesky.h


  2: /*
  3:    spbas_cholesky_row_alloc:
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
  8: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
  9: {
 11:   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 12:   retval.values[k] = &retval.alloc_val[*n_alloc_used];
 13:   *n_alloc_used   += r_nnz;
 14:   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
 15:   return(0);
 16: }

 18: /*
 19:   spbas_cholesky_garbage_collect:
 20:      move the rows which have been calculated so far, as well as
 21:      those currently under construction, to the front of the
 22:      array, while putting them in the proper order.
 23:      When it seems necessary, enlarge the current arrays.

 25:      NB: re-allocation is being simulated using
 26:          PetscMalloc, memcpy, PetscFree, because
 27:          PetscRealloc does not seem to exist.

 29: */
 30: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
 31:                                                                                     Only the storage, not the contents of this matrix is changed in this function */
 32:                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
 33:                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
 34:                                                                                     places in the arrays: they need not be moved any more */
 35:                                               PetscInt     *n_alloc_used,   /* I/O:  */
 36:                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
 37: {
 38: /* PSEUDO-CODE:
 39:   1. Choose the appropriate size for the arrays
 40:   2. Rescue the arrays which would be lost during garbage collection
 41:   3. Reallocate and correct administration
 42:   4. Move all arrays so that they are in proper order */

 44:   PetscInt        i,j;
 45:   PetscInt        nrows         = result->nrows;
 46:   PetscInt        n_alloc_ok    =0;
 47:   PetscInt        n_alloc_ok_max=0;
 48:   PetscErrorCode  ierr;
 49:   PetscInt        need_already  = 0;
 50:   PetscInt        n_rows_ahead  =0;
 51:   PetscInt        max_need_extra= 0;
 52:   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 53:   PetscInt        n_alloc_now     = result->n_alloc_icol;
 54:   PetscInt        *alloc_icol_old = result->alloc_icol;
 55:   PetscScalar     *alloc_val_old  = result->alloc_val;
 56:   PetscInt        *icol_rescue;
 57:   PetscScalar     *val_rescue;
 58:   PetscInt        n_rescue;
 59:   PetscInt        n_row_rescue;
 60:   PetscInt        i_here, i_last, n_copy;
 61:   const PetscReal xtra_perc = 20;

 64:   /*********************************************************
 65:   1. Choose appropriate array size
 66:   Count number of rows and memory usage which is already final */
 67:   for (i=0; i<i_row; i++)  {
 68:     n_alloc_ok     += result->row_nnz[i];
 69:     n_alloc_ok_max += max_row_nnz[i];
 70:   }

 72:   /* Count currently needed memory usage and future memory requirements
 73:     (max, predicted)*/
 74:   for (i=i_row; i<nrows; i++) {
 75:     if (!result->row_nnz[i]) {
 76:       max_need_extra += max_row_nnz[i];
 77:     } else {
 78:       need_already += max_row_nnz[i];
 79:       n_rows_ahead++;
 80:     }
 81:   }

 83:   /* Make maximal and realistic memory requirement estimates */
 84:   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 85:   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));

 87:   /* Choose array sizes */
 88:   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
 89:   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
 90:   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
 91:   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));

 93:   /* If new estimate is less than what we already have,
 94:     don't reallocate, just garbage-collect */
 95:   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
 96:     n_alloc = result->n_alloc_icol;
 97:   }

 99:   /* Motivate dimension choice */
100:   PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);
101:   if (n_alloc_max == n_alloc_est) {
102:     PetscInfo(NULL,"this is the correct size\n");
103:   } else if (n_alloc_now >= n_alloc_est) {
104:     PetscInfo(NULL,"the current size, which seems enough\n");
105:   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
106:     PetscInfo(NULL,"the maximum estimate\n");
107:   } else {
108:     PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
109:   }

111:   /**********************************************************
112:   2. Rescue arrays which would be lost
113:   Count how many rows and nonzeros will have to be rescued
114:   when moving all arrays in place */
115:   n_row_rescue = 0; n_rescue = 0;
116:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
117:   else {
118:     i = *n_row_alloc_ok - 1;

120:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
121:   }

123:   for (i=*n_row_alloc_ok; i<nrows; i++) {
124:     i_here = result->icols[i]-result->alloc_icol;
125:     i_last = i_here + result->row_nnz[i];
126:     if (result->row_nnz[i]>0) {
127:       if (*n_alloc_used > i_here || i_last > n_alloc) {
128:         n_rescue += result->row_nnz[i];
129:         n_row_rescue++;
130:       }

132:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
133:       else         *n_alloc_used += max_row_nnz[i];
134:     }
135:   }

137:   /* Allocate rescue arrays */
138:   PetscMalloc1(n_rescue, &icol_rescue);
139:   PetscMalloc1(n_rescue, &val_rescue);

141:   /* Rescue the arrays which need rescuing */
142:   n_row_rescue = 0; n_rescue = 0;
143:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
144:   else {
145:     i             = *n_row_alloc_ok - 1;
146:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
147:   }

149:   for (i=*n_row_alloc_ok; i<nrows; i++) {
150:     i_here = result->icols[i]-result->alloc_icol;
151:     i_last = i_here + result->row_nnz[i];
152:     if (result->row_nnz[i]>0) {
153:       if (*n_alloc_used > i_here || i_last > n_alloc) {
154:         PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
155:         PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
156:         n_rescue += result->row_nnz[i];
157:         n_row_rescue++;
158:       }

160:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
161:       else         *n_alloc_used += max_row_nnz[i];
162:     }
163:   }

165:   /**********************************************************
166:   3. Reallocate and correct administration */

168:   if (n_alloc != result->n_alloc_icol) {
169:     n_copy = PetscMin(n_alloc,result->n_alloc_icol);

171:     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.

173:         Allocate new icol-data, copy old contents */
174:     PetscMalloc1(n_alloc, &result->alloc_icol);
175:     PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);

177:     /* Update administration, Reset pointers to new arrays  */
178:     result->n_alloc_icol = n_alloc;
179:     for (i=0; i<nrows; i++) {
180:       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
181:       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
182:     }

184:     /* Delete old array */
185:     PetscFree(alloc_icol_old);

187:     /* Allocate new value-data, copy old contents */
188:     PetscMalloc1(n_alloc, &result->alloc_val);
189:     PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);

191:     /* Update administration, Reset pointers to new arrays  */
192:     result->n_alloc_val = n_alloc;
193:     for (i=0; i<nrows; i++) {
194:       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
195:     }

197:     /* Delete old array */
198:     PetscFree(alloc_val_old);
199:   }

201:   /*********************************************************
202:   4. Copy all the arrays to their proper places */
203:   n_row_rescue = 0; n_rescue = 0;
204:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
205:   else {
206:     i = *n_row_alloc_ok - 1;

208:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
209:   }

211:   for (i=*n_row_alloc_ok; i<nrows; i++) {
212:     i_here = result->icols[i]-result->alloc_icol;
213:     i_last = i_here + result->row_nnz[i];

215:     result->icols[i] = result->alloc_icol + *n_alloc_used;
216:     result->values[i]= result->alloc_val  + *n_alloc_used;

218:     if (result->row_nnz[i]>0) {
219:       if (*n_alloc_used > i_here || i_last > n_alloc) {
220:         PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
221:         PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);

223:         n_rescue += result->row_nnz[i];
224:         n_row_rescue++;
225:       } else {
226:         for (j=0; j<result->row_nnz[i]; j++) {
227:           result->icols[i][j]  = result->alloc_icol[i_here+j];
228:           result->values[i][j] = result->alloc_val[i_here+j];
229:         }
230:       }
231:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
232:       else         *n_alloc_used += max_row_nnz[i];
233:     }
234:   }

236:   /* Delete the rescue arrays */
237:   PetscFree(icol_rescue);
238:   PetscFree(val_rescue);

240:   *n_row_alloc_ok = i_row;
241:   return(0);
242: }

244: /*
245:   spbas_incomplete_cholesky:
246:      incomplete Cholesky decomposition of a square, symmetric,
247:      positive definite matrix.

249:      In case negative diagonals are encountered, function returns
250:      NEGATIVE_DIAGONAL. When this happens, call this function again
251:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
252:      droptol
253: */
254: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
255: {
256:   PetscInt        jL;
257:   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
258:   PetscInt        *ai=a->i,*aj=a->j;
259:   MatScalar       *aa=a->a;
260:   PetscInt        nrows, ncols;
261:   PetscInt        *max_row_nnz;
262:   PetscErrorCode  ierr;
263:   spbas_matrix    retval;
264:   PetscScalar     *diag;
265:   PetscScalar     *val;
266:   PetscScalar     *lvec;
267:   PetscScalar     epsdiag;
268:   PetscInt        i,j,k;
269:   const PetscBool do_values = PETSC_TRUE;
270:   PetscInt        *r1_icol;
271:   PetscScalar     *r1_val;
272:   PetscInt        *r_icol;
273:   PetscInt        r_nnz;
274:   PetscScalar     *r_val;
275:   PetscInt        *A_icol;
276:   PetscInt        A_nnz;
277:   PetscScalar     *A_val;
278:   PetscInt        *p_icol;
279:   PetscInt        p_nnz;
280:   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
281:   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */

284:   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
285:   MatGetSize(A, &nrows, &ncols);
286:   MatGetTrace(A, &epsdiag);

288:   epsdiag *= epsdiag_in / nrows;

290:   PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);

292:   if (droptol<1e-10) droptol=1e-10;

294:   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");

296:   retval.nrows        = nrows;
297:   retval.ncols        = nrows;
298:   retval.nnz          = pattern.nnz/10;
299:   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
300:   retval.block_data   = PETSC_TRUE;

302:   spbas_allocate_pattern(&retval, do_values);
303:   PetscArrayzero(retval.row_nnz, nrows);
304:   spbas_allocate_data(&retval);
305:   retval.nnz = 0;

307:   PetscMalloc1(nrows, &diag);
308:   PetscCalloc1(nrows, &val);
309:   PetscCalloc1(nrows, &lvec);
310:   PetscCalloc1(nrows, &max_row_nnz);

312:   /* Check correct format of sparseness pattern */
313:   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");

315:   /* Count the nonzeros on transpose of pattern */
316:   for (i = 0; i<nrows; i++)  {
317:     p_nnz  = pattern.row_nnz[i];
318:     p_icol = pattern.icols[i];
319:     for (j=0; j<p_nnz; j++)  {
320:       max_row_nnz[i+p_icol[j]]++;
321:     }
322:   }

324:   /* Calculate rows of L */
325:   for (i = 0; i<nrows; i++)  {
326:     p_nnz  = pattern.row_nnz[i];
327:     p_icol = pattern.icols[i];

329:     r_nnz  = retval.row_nnz[i];
330:     r_icol = retval.icols[i];
331:     r_val  = retval.values[i];
332:     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
333:     A_icol = &aj[ai[rip[i]]];
334:     A_val  = &aa[ai[rip[i]]];

336:     /* Calculate  val += A(i,i:n)'; */
337:     for (j=0; j<A_nnz; j++) {
338:       k = riip[A_icol[j]];
339:       if (k>=i) val[k] = A_val[j];
340:     }

342:     /*  Add regularization */
343:     val[i] += epsdiag;

345:     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
346:         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
347:     for (j=0; j<r_nnz; j++)  {
348:       k       = r_icol[j];
349:       lvec[k] = diag[k] * r_val[j];
350:       val[i] -= r_val[j] * lvec[k];
351:     }

353:     /* Calculate the new diagonal */
354:     diag[i] = val[i];
355:     if (PetscRealPart(diag[i])<droptol) {
356:       PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
357:       PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);

359:       /* Delete the whole matrix at once. */
360:       spbas_delete(retval);
361:       return NEGATIVE_DIAGONAL;
362:     }

364:     /* If necessary, allocate arrays */
365:     if (r_nnz==0) {
366:       spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
367:       if (ierr == PETSC_ERR_MEM) {
368:         spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
369:         spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
370:       } else 
371:       r_icol = retval.icols[i];
372:       r_val  = retval.values[i];
373:     }

375:     /* Now, fill in */
376:     r_icol[r_nnz] = i;
377:     r_val [r_nnz] = 1.0;
378:     r_nnz++;
379:     retval.row_nnz[i]++;

381:     retval.nnz += r_nnz;

383:     /* Calculate
384:         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
385:     for (j=1; j<p_nnz; j++)  {
386:       k       = i+p_icol[j];
387:       r1_icol = retval.icols[k];
388:       r1_val  = retval.values[k];
389:       for (jL=0; jL<retval.row_nnz[k]; jL++) {
390:         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
391:       }
392:       val[k] /= diag[i];

394:       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
395:         /* If necessary, allocate arrays */
396:         if (retval.row_nnz[k]==0) {
397:           spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
398:           if (ierr == PETSC_ERR_MEM) {
399:             spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
400:             spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
401:             r_icol = retval.icols[i];
402:           } else 
403:         }

405:         retval.icols[k][retval.row_nnz[k]]  = i;
406:         retval.values[k][retval.row_nnz[k]] = val[k];
407:         retval.row_nnz[k]++;
408:       }
409:       val[k] = 0;
410:     }

412:     /* Erase the values used in the work arrays */
413:     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
414:   }

416:   PetscFree(lvec);
417:   PetscFree(val);

419:   spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
420:   PetscFree(max_row_nnz);

422:   /* Place the inverse of the diagonals in the matrix */
423:   for (i=0; i<nrows; i++) {
424:     r_nnz = retval.row_nnz[i];

426:     retval.values[i][r_nnz-1] = 1.0 / diag[i];
427:     for (j=0; j<r_nnz-1; j++) {
428:       retval.values[i][j] *= -1;
429:     }
430:   }
431:   PetscFree(diag);
432:   *matrix_L = retval;
433:   return(0);
434: }