Actual source code: bddcprivate.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/ksp/pc/impls/bddc/bddc.h>
  3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4: #include <../src/mat/impls/dense/seq/dense.h>
  5: #include <petscdmplex.h>
  6: #include <petscblaslapack.h>
  7: #include <petsc/private/sfimpl.h>
  8: #include <petsc/private/dmpleximpl.h>
  9: #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17:   PetscScalar    *uwork,*data,*U, ds = 0.;
 18:   PetscReal      *sing;
 19:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 20:   PetscInt       ulw,i,nr,nc,n;
 21: #if defined(PETSC_USE_COMPLEX)
 22:   PetscReal      *rwork2;
 23: #endif

 25:   MatGetSize(A,&nr,&nc);
 26:   if (!nr || !nc) return 0;

 28:   /* workspace */
 29:   if (!work) {
 30:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 31:     PetscMalloc1(ulw,&uwork);
 32:   } else {
 33:     ulw   = lw;
 34:     uwork = work;
 35:   }
 36:   n = PetscMin(nr,nc);
 37:   if (!rwork) {
 38:     PetscMalloc1(n,&sing);
 39:   } else {
 40:     sing = rwork;
 41:   }

 43:   /* SVD */
 44:   PetscMalloc1(nr*nr,&U);
 45:   PetscBLASIntCast(nr,&bM);
 46:   PetscBLASIntCast(nc,&bN);
 47:   PetscBLASIntCast(ulw,&lwork);
 48:   MatDenseGetArray(A,&data);
 49:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 50: #if !defined(PETSC_USE_COMPLEX)
 51:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 52: #else
 53:   PetscMalloc1(5*n,&rwork2);
 54:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr));
 55:   PetscFree(rwork2);
 56: #endif
 57:   PetscFPTrapPop();
 59:   MatDenseRestoreArray(A,&data);
 60:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 61:   if (!rwork) {
 62:     PetscFree(sing);
 63:   }
 64:   if (!work) {
 65:     PetscFree(uwork);
 66:   }
 67:   /* create B */
 68:   if (!range) {
 69:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 70:     MatDenseGetArray(*B,&data);
 71:     PetscArraycpy(data,U+nr*i,(nr-i)*nr);
 72:   } else {
 73:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 74:     MatDenseGetArray(*B,&data);
 75:     PetscArraycpy(data,U,i*nr);
 76:   }
 77:   MatDenseRestoreArray(*B,&data);
 78:   PetscFree(U);
 79:   return 0;
 80: }

 82: /* TODO REMOVE */
 83: #if defined(PRINT_GDET)
 84: static int inc = 0;
 85: static int lev = 0;
 86: #endif

 88: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 89: {
 90:   Mat            GE,GEd;
 91:   PetscInt       rsize,csize,esize;
 92:   PetscScalar    *ptr;

 94:   ISGetSize(edge,&esize);
 95:   if (!esize) return 0;
 96:   ISGetSize(extrow,&rsize);
 97:   ISGetSize(extcol,&csize);

 99:   /* gradients */
100:   ptr  = work + 5*esize;
101:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
102:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
103:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
104:   MatDestroy(&GE);

106:   /* constants */
107:   ptr += rsize*csize;
108:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
109:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
110:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
111:   MatDestroy(&GE);
112:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
113:   MatDestroy(&GEd);

115:   if (corners) {
116:     Mat               GEc;
117:     const PetscScalar *vals;
118:     PetscScalar       v;

120:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
121:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
122:     MatDenseGetArrayRead(GEd,&vals);
123:     /* v    = PetscAbsScalar(vals[0]) */;
124:     v    = 1.;
125:     cvals[0] = vals[0]/v;
126:     cvals[1] = vals[1]/v;
127:     MatDenseRestoreArrayRead(GEd,&vals);
128:     MatScale(*GKins,1./v);
129: #if defined(PRINT_GDET)
130:     {
131:       PetscViewer viewer;
132:       char filename[256];
133:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
134:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
135:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
136:       PetscObjectSetName((PetscObject)GEc,"GEc");
137:       MatView(GEc,viewer);
138:       PetscObjectSetName((PetscObject)(*GKins),"GK");
139:       MatView(*GKins,viewer);
140:       PetscObjectSetName((PetscObject)GEd,"Gproj");
141:       MatView(GEd,viewer);
142:       PetscViewerDestroy(&viewer);
143:     }
144: #endif
145:     MatDestroy(&GEd);
146:     MatDestroy(&GEc);
147:   }

149:   return 0;
150: }

152: PetscErrorCode PCBDDCNedelecSupport(PC pc)
153: {
154:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
155:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
156:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
157:   Vec                    tvec;
158:   PetscSF                sfv;
159:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
160:   MPI_Comm               comm;
161:   IS                     lned,primals,allprimals,nedfieldlocal;
162:   IS                     *eedges,*extrows,*extcols,*alleedges;
163:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
164:   PetscScalar            *vals,*work;
165:   PetscReal              *rwork;
166:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
167:   PetscInt               ne,nv,Lv,order,n,field;
168:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
169:   PetscInt               i,j,extmem,cum,maxsize,nee;
170:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
171:   PetscInt               *sfvleaves,*sfvroots;
172:   PetscInt               *corners,*cedges;
173:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
174:   PetscInt               *emarks;
175:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
176:   PetscErrorCode         ierr;

178:   /* If the discrete gradient is defined for a subset of dofs and global is true,
179:      it assumes G is given in global ordering for all the dofs.
180:      Otherwise, the ordering is global for the Nedelec field */
181:   order      = pcbddc->nedorder;
182:   conforming = pcbddc->conforming;
183:   field      = pcbddc->nedfield;
184:   global     = pcbddc->nedglobal;
185:   setprimal  = PETSC_FALSE;
186:   print      = PETSC_FALSE;
187:   singular   = PETSC_FALSE;

189:   /* Command line customization */
190:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
191:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
192:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
193:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
194:   /* print debug info TODO: to be removed */
195:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
196:   PetscOptionsEnd();

198:   /* Return if there are no edges in the decomposition and the problem is not singular */
199:   MatISGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
200:   ISLocalToGlobalMappingGetSize(al2g,&n);
201:   PetscObjectGetComm((PetscObject)pc,&comm);
202:   if (!singular) {
203:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
204:     lrc[0] = PETSC_FALSE;
205:     for (i=0;i<n;i++) {
206:       if (PetscRealPart(vals[i]) > 2.) {
207:         lrc[0] = PETSC_TRUE;
208:         break;
209:       }
210:     }
211:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
212:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
213:     if (!lrc[1]) return 0;
214:   }

216:   /* Get Nedelec field */
218:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
219:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
220:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
221:     ISGetLocalSize(nedfieldlocal,&ne);
222:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
223:     ne            = n;
224:     nedfieldlocal = NULL;
225:     global        = PETSC_TRUE;
226:   } else if (field == PETSC_DECIDE) {
227:     PetscInt rst,ren,*idx;

229:     PetscArrayzero(matis->sf_leafdata,n);
230:     PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
231:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
232:     for (i=rst;i<ren;i++) {
233:       PetscInt nc;

235:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
236:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
237:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
238:     }
239:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
240:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
241:     PetscMalloc1(n,&idx);
242:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
243:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
244:   } else {
245:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
246:   }

248:   /* Sanity checks */

253:   /* Just set primal dofs and return */
254:   if (setprimal) {
255:     IS       enedfieldlocal;
256:     PetscInt *eidxs;

258:     PetscMalloc1(ne,&eidxs);
259:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
260:     if (nedfieldlocal) {
261:       ISGetIndices(nedfieldlocal,&idxs);
262:       for (i=0,cum=0;i<ne;i++) {
263:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
264:           eidxs[cum++] = idxs[i];
265:         }
266:       }
267:       ISRestoreIndices(nedfieldlocal,&idxs);
268:     } else {
269:       for (i=0,cum=0;i<ne;i++) {
270:         if (PetscRealPart(vals[i]) > 2.) {
271:           eidxs[cum++] = i;
272:         }
273:       }
274:     }
275:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
276:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
277:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
278:     PetscFree(eidxs);
279:     ISDestroy(&nedfieldlocal);
280:     ISDestroy(&enedfieldlocal);
281:     return 0;
282:   }

284:   /* Compute some l2g maps */
285:   if (nedfieldlocal) {
286:     IS is;

288:     /* need to map from the local Nedelec field to local numbering */
289:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
290:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
291:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
292:     ISLocalToGlobalMappingCreateIS(is,&al2g);
293:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
294:     if (global) {
295:       PetscObjectReference((PetscObject)al2g);
296:       el2g = al2g;
297:     } else {
298:       IS gis;

300:       ISRenumber(is,NULL,NULL,&gis);
301:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
302:       ISDestroy(&gis);
303:     }
304:     ISDestroy(&is);
305:   } else {
306:     /* restore default */
307:     pcbddc->nedfield = -1;
308:     /* one ref for the destruction of al2g, one for el2g */
309:     PetscObjectReference((PetscObject)al2g);
310:     PetscObjectReference((PetscObject)al2g);
311:     el2g = al2g;
312:     fl2g = NULL;
313:   }

315:   /* Start communication to drop connections for interior edges (for cc analysis only) */
316:   PetscArrayzero(matis->sf_leafdata,n);
317:   PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
318:   if (nedfieldlocal) {
319:     ISGetIndices(nedfieldlocal,&idxs);
320:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
321:     ISRestoreIndices(nedfieldlocal,&idxs);
322:   } else {
323:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
324:   }
325:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
326:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

328:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
329:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
330:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
331:     if (global) {
332:       PetscInt rst;

334:       MatGetOwnershipRange(G,&rst,NULL);
335:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
336:         if (matis->sf_rootdata[i] < 2) {
337:           matis->sf_rootdata[cum++] = i + rst;
338:         }
339:       }
340:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
341:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
342:     } else {
343:       PetscInt *tbz;

345:       PetscMalloc1(ne,&tbz);
346:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
347:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
348:       ISGetIndices(nedfieldlocal,&idxs);
349:       for (i=0,cum=0;i<ne;i++)
350:         if (matis->sf_leafdata[idxs[i]] == 1)
351:           tbz[cum++] = i;
352:       ISRestoreIndices(nedfieldlocal,&idxs);
353:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
354:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
355:       PetscFree(tbz);
356:     }
357:   } else { /* we need the entire G to infer the nullspace */
358:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
359:     G    = pcbddc->discretegradient;
360:   }

362:   /* Extract subdomain relevant rows of G */
363:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
364:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
365:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
366:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
367:   ISDestroy(&lned);
368:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
369:   MatDestroy(&lGall);
370:   MatISGetLocalMat(lGis,&lG);

372:   /* SF for nodal dofs communications */
373:   MatGetLocalSize(G,NULL,&Lv);
374:   MatISGetLocalToGlobalMapping(lGis,NULL,&vl2g);
375:   PetscObjectReference((PetscObject)vl2g);
376:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
377:   PetscSFCreate(comm,&sfv);
378:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
379:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
380:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
381:   i    = singular ? 2 : 1;
382:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

384:   /* Destroy temporary G created in MATIS format and modified G */
385:   PetscObjectReference((PetscObject)lG);
386:   MatDestroy(&lGis);
387:   MatDestroy(&G);

389:   if (print) {
390:     PetscObjectSetName((PetscObject)lG,"initial_lG");
391:     MatView(lG,NULL);
392:   }

394:   /* Save lG for values insertion in change of basis */
395:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

397:   /* Analyze the edge-nodes connections (duplicate lG) */
398:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
399:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
400:   PetscBTCreate(nv,&btv);
401:   PetscBTCreate(ne,&bte);
402:   PetscBTCreate(ne,&btb);
403:   PetscBTCreate(ne,&btbd);
404:   PetscBTCreate(nv,&btvcand);
405:   /* need to import the boundary specification to ensure the
406:      proper detection of coarse edges' endpoints */
407:   if (pcbddc->DirichletBoundariesLocal) {
408:     IS is;

410:     if (fl2g) {
411:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
412:     } else {
413:       is = pcbddc->DirichletBoundariesLocal;
414:     }
415:     ISGetLocalSize(is,&cum);
416:     ISGetIndices(is,&idxs);
417:     for (i=0;i<cum;i++) {
418:       if (idxs[i] >= 0) {
419:         PetscBTSet(btb,idxs[i]);
420:         PetscBTSet(btbd,idxs[i]);
421:       }
422:     }
423:     ISRestoreIndices(is,&idxs);
424:     if (fl2g) {
425:       ISDestroy(&is);
426:     }
427:   }
428:   if (pcbddc->NeumannBoundariesLocal) {
429:     IS is;

431:     if (fl2g) {
432:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
433:     } else {
434:       is = pcbddc->NeumannBoundariesLocal;
435:     }
436:     ISGetLocalSize(is,&cum);
437:     ISGetIndices(is,&idxs);
438:     for (i=0;i<cum;i++) {
439:       if (idxs[i] >= 0) {
440:         PetscBTSet(btb,idxs[i]);
441:       }
442:     }
443:     ISRestoreIndices(is,&idxs);
444:     if (fl2g) {
445:       ISDestroy(&is);
446:     }
447:   }

449:   /* Count neighs per dof */
450:   ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
451:   ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);

453:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
454:      for proper detection of coarse edges' endpoints */
455:   PetscBTCreate(ne,&btee);
456:   for (i=0;i<ne;i++) {
457:     if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
458:       PetscBTSet(btee,i);
459:     }
460:   }
461:   PetscMalloc1(ne,&marks);
462:   if (!conforming) {
463:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
464:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
465:   }
466:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
467:   MatSeqAIJGetArray(lGe,&vals);
468:   cum  = 0;
469:   for (i=0;i<ne;i++) {
470:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
471:     if (!PetscBTLookup(btee,i)) {
472:       marks[cum++] = i;
473:       continue;
474:     }
475:     /* set badly connected edge dofs as primal */
476:     if (!conforming) {
477:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
478:         marks[cum++] = i;
479:         PetscBTSet(bte,i);
480:         for (j=ii[i];j<ii[i+1];j++) {
481:           PetscBTSet(btv,jj[j]);
482:         }
483:       } else {
484:         /* every edge dofs should be connected trough a certain number of nodal dofs
485:            to other edge dofs belonging to coarse edges
486:            - at most 2 endpoints
487:            - order-1 interior nodal dofs
488:            - no undefined nodal dofs (nconn < order)
489:         */
490:         PetscInt ends = 0,ints = 0, undef = 0;
491:         for (j=ii[i];j<ii[i+1];j++) {
492:           PetscInt v = jj[j],k;
493:           PetscInt nconn = iit[v+1]-iit[v];
494:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
495:           if (nconn > order) ends++;
496:           else if (nconn == order) ints++;
497:           else undef++;
498:         }
499:         if (undef || ends > 2 || ints != order -1) {
500:           marks[cum++] = i;
501:           PetscBTSet(bte,i);
502:           for (j=ii[i];j<ii[i+1];j++) {
503:             PetscBTSet(btv,jj[j]);
504:           }
505:         }
506:       }
507:     }
508:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
509:     if (!order && ii[i+1] != ii[i]) {
510:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
511:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
512:     }
513:   }
514:   PetscBTDestroy(&btee);
515:   MatSeqAIJRestoreArray(lGe,&vals);
516:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
517:   if (!conforming) {
518:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
519:     MatDestroy(&lGt);
520:   }
521:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

523:   /* identify splitpoints and corner candidates */
524:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
525:   if (print) {
526:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
527:     MatView(lGe,NULL);
528:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
529:     MatView(lGt,NULL);
530:   }
531:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
532:   MatSeqAIJGetArray(lGt,&vals);
533:   for (i=0;i<nv;i++) {
534:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
535:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
536:     if (!order) { /* variable order */
537:       PetscReal vorder = 0.;

539:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
540:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
542:       ord  = 1;
543:     }
544:     PetscAssert(test%ord == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT,test,i,ord);
545:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
546:       if (PetscBTLookup(btbd,jj[j])) {
547:         bdir = PETSC_TRUE;
548:         break;
549:       }
550:       if (vc != ecount[jj[j]]) {
551:         sneighs = PETSC_FALSE;
552:       } else {
553:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
554:         for (k=0;k<vc;k++) {
555:           if (vn[k] != en[k]) {
556:             sneighs = PETSC_FALSE;
557:             break;
558:           }
559:         }
560:       }
561:     }
562:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
563:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
564:       PetscBTSet(btv,i);
565:     } else if (test == ord) {
566:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
567:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
568:         PetscBTSet(btv,i);
569:       } else {
570:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
571:         PetscBTSet(btvcand,i);
572:       }
573:     }
574:   }
575:   ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
576:   ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
577:   PetscBTDestroy(&btbd);

579:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
580:   if (order != 1) {
581:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
582:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
583:     for (i=0;i<nv;i++) {
584:       if (PetscBTLookup(btvcand,i)) {
585:         PetscBool found = PETSC_FALSE;
586:         for (j=ii[i];j<ii[i+1] && !found;j++) {
587:           PetscInt k,e = jj[j];
588:           if (PetscBTLookup(bte,e)) continue;
589:           for (k=iit[e];k<iit[e+1];k++) {
590:             PetscInt v = jjt[k];
591:             if (v != i && PetscBTLookup(btvcand,v)) {
592:               found = PETSC_TRUE;
593:               break;
594:             }
595:           }
596:         }
597:         if (!found) {
598:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D CLEARED\n",i);
599:           PetscBTClear(btvcand,i);
600:         } else {
601:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D ACCEPTED\n",i);
602:         }
603:       }
604:     }
605:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
606:   }
607:   MatSeqAIJRestoreArray(lGt,&vals);
608:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
609:   MatDestroy(&lGe);

611:   /* Get the local G^T explicitly */
612:   MatDestroy(&lGt);
613:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
614:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

616:   /* Mark interior nodal dofs */
617:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
618:   PetscBTCreate(nv,&btvi);
619:   for (i=1;i<n_neigh;i++) {
620:     for (j=0;j<n_shared[i];j++) {
621:       PetscBTSet(btvi,shared[i][j]);
622:     }
623:   }
624:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

626:   /* communicate corners and splitpoints */
627:   PetscMalloc1(nv,&vmarks);
628:   PetscArrayzero(sfvleaves,nv);
629:   PetscArrayzero(sfvroots,Lv);
630:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

632:   if (print) {
633:     IS tbz;

635:     cum = 0;
636:     for (i=0;i<nv;i++)
637:       if (sfvleaves[i])
638:         vmarks[cum++] = i;

640:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
641:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
642:     ISView(tbz,NULL);
643:     ISDestroy(&tbz);
644:   }

646:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
647:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
648:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);
649:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);

651:   /* Zero rows of lGt corresponding to identified corners
652:      and interior nodal dofs */
653:   cum = 0;
654:   for (i=0;i<nv;i++) {
655:     if (sfvleaves[i]) {
656:       vmarks[cum++] = i;
657:       PetscBTSet(btv,i);
658:     }
659:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
660:   }
661:   PetscBTDestroy(&btvi);
662:   if (print) {
663:     IS tbz;

665:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
666:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
667:     ISView(tbz,NULL);
668:     ISDestroy(&tbz);
669:   }
670:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
671:   PetscFree(vmarks);
672:   PetscSFDestroy(&sfv);
673:   PetscFree2(sfvleaves,sfvroots);

675:   /* Recompute G */
676:   MatDestroy(&lG);
677:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
678:   if (print) {
679:     PetscObjectSetName((PetscObject)lG,"used_lG");
680:     MatView(lG,NULL);
681:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
682:     MatView(lGt,NULL);
683:   }

685:   /* Get primal dofs (if any) */
686:   cum = 0;
687:   for (i=0;i<ne;i++) {
688:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
689:   }
690:   if (fl2g) {
691:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
692:   }
693:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
694:   if (print) {
695:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
696:     ISView(primals,NULL);
697:   }
698:   PetscBTDestroy(&bte);
699:   /* TODO: what if the user passed in some of them ?  */
700:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
701:   ISDestroy(&primals);

703:   /* Compute edge connectivity */
704:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");

706:   /* Symbolic conn = lG*lGt */
707:   MatProductCreate(lG,lGt,NULL,&conn);
708:   MatProductSetType(conn,MATPRODUCT_AB);
709:   MatProductSetAlgorithm(conn,"default");
710:   MatProductSetFill(conn,PETSC_DEFAULT);
711:   PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");
712:   MatProductSetFromOptions(conn);
713:   MatProductSymbolic(conn);

715:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
716:   if (fl2g) {
717:     PetscBT   btf;
718:     PetscInt  *iia,*jja,*iiu,*jju;
719:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

721:     /* create CSR for all local dofs */
722:     PetscMalloc1(n+1,&iia);
723:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
725:       iiu = pcbddc->mat_graph->xadj;
726:       jju = pcbddc->mat_graph->adjncy;
727:     } else if (pcbddc->use_local_adj) {
728:       rest = PETSC_TRUE;
729:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
730:     } else {
731:       free   = PETSC_TRUE;
732:       PetscMalloc2(n+1,&iiu,n,&jju);
733:       iiu[0] = 0;
734:       for (i=0;i<n;i++) {
735:         iiu[i+1] = i+1;
736:         jju[i]   = -1;
737:       }
738:     }

740:     /* import sizes of CSR */
741:     iia[0] = 0;
742:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

744:     /* overwrite entries corresponding to the Nedelec field */
745:     PetscBTCreate(n,&btf);
746:     ISGetIndices(nedfieldlocal,&idxs);
747:     for (i=0;i<ne;i++) {
748:       PetscBTSet(btf,idxs[i]);
749:       iia[idxs[i]+1] = ii[i+1]-ii[i];
750:     }

752:     /* iia in CSR */
753:     for (i=0;i<n;i++) iia[i+1] += iia[i];

755:     /* jja in CSR */
756:     PetscMalloc1(iia[n],&jja);
757:     for (i=0;i<n;i++)
758:       if (!PetscBTLookup(btf,i))
759:         for (j=0;j<iiu[i+1]-iiu[i];j++)
760:           jja[iia[i]+j] = jju[iiu[i]+j];

762:     /* map edge dofs connectivity */
763:     if (jj) {
764:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
765:       for (i=0;i<ne;i++) {
766:         PetscInt e = idxs[i];
767:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
768:       }
769:     }
770:     ISRestoreIndices(nedfieldlocal,&idxs);
771:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
772:     if (rest) {
773:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
774:     }
775:     if (free) {
776:       PetscFree2(iiu,jju);
777:     }
778:     PetscBTDestroy(&btf);
779:   } else {
780:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
781:   }

783:   /* Analyze interface for edge dofs */
784:   PCBDDCAnalyzeInterface(pc);
785:   pcbddc->mat_graph->twodim = PETSC_FALSE;

787:   /* Get coarse edges in the edge space */
788:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
789:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

791:   if (fl2g) {
792:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
793:     PetscMalloc1(nee,&eedges);
794:     for (i=0;i<nee;i++) {
795:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
796:     }
797:   } else {
798:     eedges  = alleedges;
799:     primals = allprimals;
800:   }

802:   /* Mark fine edge dofs with their coarse edge id */
803:   PetscArrayzero(marks,ne);
804:   ISGetLocalSize(primals,&cum);
805:   ISGetIndices(primals,&idxs);
806:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
807:   ISRestoreIndices(primals,&idxs);
808:   if (print) {
809:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
810:     ISView(primals,NULL);
811:   }

813:   maxsize = 0;
814:   for (i=0;i<nee;i++) {
815:     PetscInt size,mark = i+1;

817:     ISGetLocalSize(eedges[i],&size);
818:     ISGetIndices(eedges[i],&idxs);
819:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
820:     ISRestoreIndices(eedges[i],&idxs);
821:     maxsize = PetscMax(maxsize,size);
822:   }

824:   /* Find coarse edge endpoints */
825:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
826:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
827:   for (i=0;i<nee;i++) {
828:     PetscInt mark = i+1,size;

830:     ISGetLocalSize(eedges[i],&size);
831:     if (!size && nedfieldlocal) continue;
833:     ISGetIndices(eedges[i],&idxs);
834:     if (print) {
835:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
836:       ISView(eedges[i],NULL);
837:     }
838:     for (j=0;j<size;j++) {
839:       PetscInt k, ee = idxs[j];
840:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %D\n",ee);
841:       for (k=ii[ee];k<ii[ee+1];k++) {
842:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %D\n",jj[k]);
843:         if (PetscBTLookup(btv,jj[k])) {
844:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %D\n",jj[k]);
845:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
846:           PetscInt  k2;
847:           PetscBool corner = PETSC_FALSE;
848:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
849:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
850:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
851:                if the edge dof lie on the natural part of the boundary */
852:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
853:               corner = PETSC_TRUE;
854:               break;
855:             }
856:           }
857:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
858:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %D\n",jj[k]);
859:             PetscBTSet(btv,jj[k]);
860:           } else {
861:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
862:           }
863:         }
864:       }
865:     }
866:     ISRestoreIndices(eedges[i],&idxs);
867:   }
868:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
869:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
870:   PetscBTDestroy(&btb);

872:   /* Reset marked primal dofs */
873:   ISGetLocalSize(primals,&cum);
874:   ISGetIndices(primals,&idxs);
875:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
876:   ISRestoreIndices(primals,&idxs);

878:   /* Now use the initial lG */
879:   MatDestroy(&lG);
880:   MatDestroy(&lGt);
881:   lG   = lGinit;
882:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

884:   /* Compute extended cols indices */
885:   PetscBTCreate(nv,&btvc);
886:   PetscBTCreate(nee,&bter);
887:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
888:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
889:   i   *= maxsize;
890:   PetscCalloc1(nee,&extcols);
891:   PetscMalloc2(i,&extrow,i,&gidxs);
892:   eerr = PETSC_FALSE;
893:   for (i=0;i<nee;i++) {
894:     PetscInt size,found = 0;

896:     cum  = 0;
897:     ISGetLocalSize(eedges[i],&size);
898:     if (!size && nedfieldlocal) continue;
900:     ISGetIndices(eedges[i],&idxs);
901:     PetscBTMemzero(nv,btvc);
902:     for (j=0;j<size;j++) {
903:       PetscInt k,ee = idxs[j];
904:       for (k=ii[ee];k<ii[ee+1];k++) {
905:         PetscInt vv = jj[k];
906:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
907:         else if (!PetscBTLookupSet(btvc,vv)) found++;
908:       }
909:     }
910:     ISRestoreIndices(eedges[i],&idxs);
911:     PetscSortRemoveDupsInt(&cum,extrow);
912:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
913:     PetscSortIntWithArray(cum,gidxs,extrow);
914:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
915:     /* it may happen that endpoints are not defined at this point
916:        if it is the case, mark this edge for a second pass */
917:     if (cum != size -1 || found != 2) {
918:       PetscBTSet(bter,i);
919:       if (print) {
920:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
921:         ISView(eedges[i],NULL);
922:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
923:         ISView(extcols[i],NULL);
924:       }
925:       eerr = PETSC_TRUE;
926:     }
927:   }
929:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
930:   if (done) {
931:     PetscInt *newprimals;

933:     PetscMalloc1(ne,&newprimals);
934:     ISGetLocalSize(primals,&cum);
935:     ISGetIndices(primals,&idxs);
936:     PetscArraycpy(newprimals,idxs,cum);
937:     ISRestoreIndices(primals,&idxs);
938:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
939:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
940:     for (i=0;i<nee;i++) {
941:       PetscBool has_candidates = PETSC_FALSE;
942:       if (PetscBTLookup(bter,i)) {
943:         PetscInt size,mark = i+1;

945:         ISGetLocalSize(eedges[i],&size);
946:         ISGetIndices(eedges[i],&idxs);
947:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
948:         for (j=0;j<size;j++) {
949:           PetscInt k,ee = idxs[j];
950:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
951:           for (k=ii[ee];k<ii[ee+1];k++) {
952:             /* set all candidates located on the edge as corners */
953:             if (PetscBTLookup(btvcand,jj[k])) {
954:               PetscInt k2,vv = jj[k];
955:               has_candidates = PETSC_TRUE;
956:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %D\n",vv);
957:               PetscBTSet(btv,vv);
958:               /* set all edge dofs connected to candidate as primals */
959:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
960:                 if (marks[jjt[k2]] == mark) {
961:                   PetscInt k3,ee2 = jjt[k2];
962:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %D\n",ee2);
963:                   newprimals[cum++] = ee2;
964:                   /* finally set the new corners */
965:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
966:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %D\n",jj[k3]);
967:                     PetscBTSet(btv,jj[k3]);
968:                   }
969:                 }
970:               }
971:             } else {
972:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %D\n",jj[k]);
973:             }
974:           }
975:         }
976:         if (!has_candidates) { /* circular edge */
977:           PetscInt k, ee = idxs[0],*tmarks;

979:           PetscCalloc1(ne,&tmarks);
980:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %D\n",i);
981:           for (k=ii[ee];k<ii[ee+1];k++) {
982:             PetscInt k2;
983:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %D\n",jj[k]);
984:             PetscBTSet(btv,jj[k]);
985:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
986:           }
987:           for (j=0;j<size;j++) {
988:             if (tmarks[idxs[j]] > 1) {
989:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %D\n",idxs[j]);
990:               newprimals[cum++] = idxs[j];
991:             }
992:           }
993:           PetscFree(tmarks);
994:         }
995:         ISRestoreIndices(eedges[i],&idxs);
996:       }
997:       ISDestroy(&extcols[i]);
998:     }
999:     PetscFree(extcols);
1000:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1001:     PetscSortRemoveDupsInt(&cum,newprimals);
1002:     if (fl2g) {
1003:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1004:       ISDestroy(&primals);
1005:       for (i=0;i<nee;i++) {
1006:         ISDestroy(&eedges[i]);
1007:       }
1008:       PetscFree(eedges);
1009:     }
1010:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1011:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1012:     PetscFree(newprimals);
1013:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1014:     ISDestroy(&primals);
1015:     PCBDDCAnalyzeInterface(pc);
1016:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1017:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1018:     if (fl2g) {
1019:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1020:       PetscMalloc1(nee,&eedges);
1021:       for (i=0;i<nee;i++) {
1022:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1023:       }
1024:     } else {
1025:       eedges  = alleedges;
1026:       primals = allprimals;
1027:     }
1028:     PetscCalloc1(nee,&extcols);

1030:     /* Mark again */
1031:     PetscArrayzero(marks,ne);
1032:     for (i=0;i<nee;i++) {
1033:       PetscInt size,mark = i+1;

1035:       ISGetLocalSize(eedges[i],&size);
1036:       ISGetIndices(eedges[i],&idxs);
1037:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1038:       ISRestoreIndices(eedges[i],&idxs);
1039:     }
1040:     if (print) {
1041:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1042:       ISView(primals,NULL);
1043:     }

1045:     /* Recompute extended cols */
1046:     eerr = PETSC_FALSE;
1047:     for (i=0;i<nee;i++) {
1048:       PetscInt size;

1050:       cum  = 0;
1051:       ISGetLocalSize(eedges[i],&size);
1052:       if (!size && nedfieldlocal) continue;
1054:       ISGetIndices(eedges[i],&idxs);
1055:       for (j=0;j<size;j++) {
1056:         PetscInt k,ee = idxs[j];
1057:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1058:       }
1059:       ISRestoreIndices(eedges[i],&idxs);
1060:       PetscSortRemoveDupsInt(&cum,extrow);
1061:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1062:       PetscSortIntWithArray(cum,gidxs,extrow);
1063:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1064:       if (cum != size -1) {
1065:         if (print) {
1066:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1067:           ISView(eedges[i],NULL);
1068:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1069:           ISView(extcols[i],NULL);
1070:         }
1071:         eerr = PETSC_TRUE;
1072:       }
1073:     }
1074:   }
1075:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1076:   PetscFree2(extrow,gidxs);
1077:   PetscBTDestroy(&bter);
1078:   if (print) PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);
1079:   /* an error should not occur at this point */

1082:   /* Check the number of endpoints */
1083:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1084:   PetscMalloc1(2*nee,&corners);
1085:   PetscMalloc1(nee,&cedges);
1086:   for (i=0;i<nee;i++) {
1087:     PetscInt size, found = 0, gc[2];

1089:     /* init with defaults */
1090:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1091:     ISGetLocalSize(eedges[i],&size);
1092:     if (!size && nedfieldlocal) continue;
1094:     ISGetIndices(eedges[i],&idxs);
1095:     PetscBTMemzero(nv,btvc);
1096:     for (j=0;j<size;j++) {
1097:       PetscInt k,ee = idxs[j];
1098:       for (k=ii[ee];k<ii[ee+1];k++) {
1099:         PetscInt vv = jj[k];
1100:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1102:           corners[i*2+found++] = vv;
1103:         }
1104:       }
1105:     }
1106:     if (found != 2) {
1107:       PetscInt e;
1108:       if (fl2g) {
1109:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1110:       } else {
1111:         e = idxs[0];
1112:       }
1113:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1114:     }

1116:     /* get primal dof index on this coarse edge */
1117:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1118:     if (gc[0] > gc[1]) {
1119:       PetscInt swap  = corners[2*i];
1120:       corners[2*i]   = corners[2*i+1];
1121:       corners[2*i+1] = swap;
1122:     }
1123:     cedges[i] = idxs[size-1];
1124:     ISRestoreIndices(eedges[i],&idxs);
1125:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1126:   }
1127:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1128:   PetscBTDestroy(&btvc);

1130:   if (PetscDefined(USE_DEBUG)) {
1131:     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1132:      not interfere with neighbouring coarse edges */
1133:     PetscMalloc1(nee+1,&emarks);
1134:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1135:     for (i=0;i<nv;i++) {
1136:       PetscInt emax = 0,eemax = 0;

1138:       if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1139:       PetscArrayzero(emarks,nee+1);
1140:       for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1141:       for (j=1;j<nee+1;j++) {
1142:         if (emax < emarks[j]) {
1143:           emax = emarks[j];
1144:           eemax = j;
1145:         }
1146:       }
1147:       /* not relevant for edges */
1148:       if (!eemax) continue;

1150:       for (j=ii[i];j<ii[i+1];j++) {
1151:         if (marks[jj[j]] && marks[jj[j]] != eemax) {
1152:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1153:         }
1154:       }
1155:     }
1156:     PetscFree(emarks);
1157:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1158:   }

1160:   /* Compute extended rows indices for edge blocks of the change of basis */
1161:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1162:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1163:   extmem *= maxsize;
1164:   PetscMalloc1(extmem*nee,&extrow);
1165:   PetscMalloc1(nee,&extrows);
1166:   PetscCalloc1(nee,&extrowcum);
1167:   for (i=0;i<nv;i++) {
1168:     PetscInt mark = 0,size,start;

1170:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1171:     for (j=ii[i];j<ii[i+1];j++)
1172:       if (marks[jj[j]] && !mark)
1173:         mark = marks[jj[j]];

1175:     /* not relevant */
1176:     if (!mark) continue;

1178:     /* import extended row */
1179:     mark--;
1180:     start = mark*extmem+extrowcum[mark];
1181:     size = ii[i+1]-ii[i];
1183:     PetscArraycpy(extrow+start,jj+ii[i],size);
1184:     extrowcum[mark] += size;
1185:   }
1186:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1187:   MatDestroy(&lGt);
1188:   PetscFree(marks);

1190:   /* Compress extrows */
1191:   cum  = 0;
1192:   for (i=0;i<nee;i++) {
1193:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1194:     PetscSortRemoveDupsInt(&size,start);
1195:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1196:     cum  = PetscMax(cum,size);
1197:   }
1198:   PetscFree(extrowcum);
1199:   PetscBTDestroy(&btv);
1200:   PetscBTDestroy(&btvcand);

1202:   /* Workspace for lapack inner calls and VecSetValues */
1203:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1205:   /* Create change of basis matrix (preallocation can be improved) */
1206:   MatCreate(comm,&T);
1207:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1208:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1209:   MatSetType(T,MATAIJ);
1210:   MatSeqAIJSetPreallocation(T,10,NULL);
1211:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1212:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1213:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1214:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1215:   ISLocalToGlobalMappingDestroy(&al2g);

1217:   /* Defaults to identity */
1218:   MatCreateVecs(pc->pmat,&tvec,NULL);
1219:   VecSet(tvec,1.0);
1220:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1221:   VecDestroy(&tvec);

1223:   /* Create discrete gradient for the coarser level if needed */
1224:   MatDestroy(&pcbddc->nedcG);
1225:   ISDestroy(&pcbddc->nedclocal);
1226:   if (pcbddc->current_level < pcbddc->max_levels) {
1227:     ISLocalToGlobalMapping cel2g,cvl2g;
1228:     IS                     wis,gwis;
1229:     PetscInt               cnv,cne;

1231:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1232:     if (fl2g) {
1233:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1234:     } else {
1235:       PetscObjectReference((PetscObject)wis);
1236:       pcbddc->nedclocal = wis;
1237:     }
1238:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1239:     ISDestroy(&wis);
1240:     ISRenumber(gwis,NULL,&cne,&wis);
1241:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1242:     ISDestroy(&wis);
1243:     ISDestroy(&gwis);

1245:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1246:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1247:     ISDestroy(&wis);
1248:     ISRenumber(gwis,NULL,&cnv,&wis);
1249:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1250:     ISDestroy(&wis);
1251:     ISDestroy(&gwis);

1253:     MatCreate(comm,&pcbddc->nedcG);
1254:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1255:     MatSetType(pcbddc->nedcG,MATAIJ);
1256:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1257:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1258:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1259:     ISLocalToGlobalMappingDestroy(&cel2g);
1260:     ISLocalToGlobalMappingDestroy(&cvl2g);
1261:   }
1262:   ISLocalToGlobalMappingDestroy(&vl2g);

1264: #if defined(PRINT_GDET)
1265:   inc = 0;
1266:   lev = pcbddc->current_level;
1267: #endif

1269:   /* Insert values in the change of basis matrix */
1270:   for (i=0;i<nee;i++) {
1271:     Mat         Gins = NULL, GKins = NULL;
1272:     IS          cornersis = NULL;
1273:     PetscScalar cvals[2];

1275:     if (pcbddc->nedcG) {
1276:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1277:     }
1278:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1279:     if (Gins && GKins) {
1280:       const PetscScalar *data;
1281:       const PetscInt    *rows,*cols;
1282:       PetscInt          nrh,nch,nrc,ncc;

1284:       ISGetIndices(eedges[i],&cols);
1285:       /* H1 */
1286:       ISGetIndices(extrows[i],&rows);
1287:       MatGetSize(Gins,&nrh,&nch);
1288:       MatDenseGetArrayRead(Gins,&data);
1289:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1290:       MatDenseRestoreArrayRead(Gins,&data);
1291:       ISRestoreIndices(extrows[i],&rows);
1292:       /* complement */
1293:       MatGetSize(GKins,&nrc,&ncc);
1297:       MatDenseGetArrayRead(GKins,&data);
1298:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1299:       MatDenseRestoreArrayRead(GKins,&data);

1301:       /* coarse discrete gradient */
1302:       if (pcbddc->nedcG) {
1303:         PetscInt cols[2];

1305:         cols[0] = 2*i;
1306:         cols[1] = 2*i+1;
1307:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1308:       }
1309:       ISRestoreIndices(eedges[i],&cols);
1310:     }
1311:     ISDestroy(&extrows[i]);
1312:     ISDestroy(&extcols[i]);
1313:     ISDestroy(&cornersis);
1314:     MatDestroy(&Gins);
1315:     MatDestroy(&GKins);
1316:   }
1317:   ISLocalToGlobalMappingDestroy(&el2g);

1319:   /* Start assembling */
1320:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1321:   if (pcbddc->nedcG) {
1322:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1323:   }

1325:   /* Free */
1326:   if (fl2g) {
1327:     ISDestroy(&primals);
1328:     for (i=0;i<nee;i++) {
1329:       ISDestroy(&eedges[i]);
1330:     }
1331:     PetscFree(eedges);
1332:   }

1334:   /* hack mat_graph with primal dofs on the coarse edges */
1335:   {
1336:     PCBDDCGraph graph   = pcbddc->mat_graph;
1337:     PetscInt    *oqueue = graph->queue;
1338:     PetscInt    *ocptr  = graph->cptr;
1339:     PetscInt    ncc,*idxs;

1341:     /* find first primal edge */
1342:     if (pcbddc->nedclocal) {
1343:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1344:     } else {
1345:       if (fl2g) {
1346:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1347:       }
1348:       idxs = cedges;
1349:     }
1350:     cum = 0;
1351:     while (cum < nee && cedges[cum] < 0) cum++;

1353:     /* adapt connected components */
1354:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1355:     graph->cptr[0] = 0;
1356:     for (i=0,ncc=0;i<graph->ncc;i++) {
1357:       PetscInt lc = ocptr[i+1]-ocptr[i];
1358:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1359:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1360:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1361:         ncc++;
1362:         lc--;
1363:         cum++;
1364:         while (cum < nee && cedges[cum] < 0) cum++;
1365:       }
1366:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1367:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1368:       ncc++;
1369:     }
1370:     graph->ncc = ncc;
1371:     if (pcbddc->nedclocal) {
1372:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1373:     }
1374:     PetscFree2(ocptr,oqueue);
1375:   }
1376:   ISLocalToGlobalMappingDestroy(&fl2g);
1377:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1378:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1379:   MatDestroy(&conn);

1381:   ISDestroy(&nedfieldlocal);
1382:   PetscFree(extrow);
1383:   PetscFree2(work,rwork);
1384:   PetscFree(corners);
1385:   PetscFree(cedges);
1386:   PetscFree(extrows);
1387:   PetscFree(extcols);
1388:   MatDestroy(&lG);

1390:   /* Complete assembling */
1391:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1392:   if (pcbddc->nedcG) {
1393:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1394: #if 0
1395:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1396:     MatView(pcbddc->nedcG,NULL);
1397: #endif
1398:   }

1400:   /* set change of basis */
1401:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1402:   MatDestroy(&T);

1404:   return 0;
1405: }

1407: /* the near-null space of BDDC carries information on quadrature weights,
1408:    and these can be collinear -> so cheat with MatNullSpaceCreate
1409:    and create a suitable set of basis vectors first */
1410: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1411: {
1412:   PetscInt       i;

1414:   for (i=0;i<nvecs;i++) {
1415:     PetscInt first,last;

1417:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1419:     if (i>=first && i < last) {
1420:       PetscScalar *data;
1421:       VecGetArray(quad_vecs[i],&data);
1422:       if (!has_const) {
1423:         data[i-first] = 1.;
1424:       } else {
1425:         data[2*i-first] = 1./PetscSqrtReal(2.);
1426:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1427:       }
1428:       VecRestoreArray(quad_vecs[i],&data);
1429:     }
1430:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1431:   }
1432:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1433:   for (i=0;i<nvecs;i++) { /* reset vectors */
1434:     PetscInt first,last;
1435:     VecLockReadPop(quad_vecs[i]);
1436:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1437:     if (i>=first && i < last) {
1438:       PetscScalar *data;
1439:       VecGetArray(quad_vecs[i],&data);
1440:       if (!has_const) {
1441:         data[i-first] = 0.;
1442:       } else {
1443:         data[2*i-first] = 0.;
1444:         data[2*i-first+1] = 0.;
1445:       }
1446:       VecRestoreArray(quad_vecs[i],&data);
1447:     }
1448:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1449:     VecLockReadPush(quad_vecs[i]);
1450:   }
1451:   return 0;
1452: }

1454: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1455: {
1456:   Mat                    loc_divudotp;
1457:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1458:   ISLocalToGlobalMapping map;
1459:   PetscScalar            *vals;
1460:   const PetscScalar      *array;
1461:   PetscInt               i,maxneighs = 0,maxsize,*gidxs;
1462:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1463:   PetscMPIInt            rank;

1465:   ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1466:   for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs);
1467:   MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1468:   if (!maxneighs) {
1469:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1470:     *nnsp = NULL;
1471:     return 0;
1472:   }
1473:   maxsize = 0;
1474:   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1475:   PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1476:   /* create vectors to hold quadrature weights */
1477:   MatCreateVecs(A,&quad_vec,NULL);
1478:   if (!transpose) {
1479:     MatISGetLocalToGlobalMapping(A,&map,NULL);
1480:   } else {
1481:     MatISGetLocalToGlobalMapping(A,NULL,&map);
1482:   }
1483:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1484:   VecDestroy(&quad_vec);
1485:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1486:   for (i=0;i<maxneighs;i++) {
1487:     VecLockReadPop(quad_vecs[i]);
1488:   }

1490:   /* compute local quad vec */
1491:   MatISGetLocalMat(divudotp,&loc_divudotp);
1492:   if (!transpose) {
1493:     MatCreateVecs(loc_divudotp,&v,&p);
1494:   } else {
1495:     MatCreateVecs(loc_divudotp,&p,&v);
1496:   }
1497:   VecSet(p,1.);
1498:   if (!transpose) {
1499:     MatMultTranspose(loc_divudotp,p,v);
1500:   } else {
1501:     MatMult(loc_divudotp,p,v);
1502:   }
1503:   if (vl2l) {
1504:     Mat        lA;
1505:     VecScatter sc;

1507:     MatISGetLocalMat(A,&lA);
1508:     MatCreateVecs(lA,&vins,NULL);
1509:     VecScatterCreate(v,NULL,vins,vl2l,&sc);
1510:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1511:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1512:     VecScatterDestroy(&sc);
1513:   } else {
1514:     vins = v;
1515:   }
1516:   VecGetArrayRead(vins,&array);
1517:   VecDestroy(&p);

1519:   /* insert in global quadrature vecs */
1520:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1521:   for (i=1;i<n_neigh;i++) {
1522:     const PetscInt    *idxs;
1523:     PetscInt          idx,nn,j;

1525:     idxs = shared[i];
1526:     nn   = n_shared[i];
1527:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1528:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1529:     idx  = -(idx+1);
1531:     ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1532:     VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1533:   }
1534:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1535:   VecRestoreArrayRead(vins,&array);
1536:   if (vl2l) {
1537:     VecDestroy(&vins);
1538:   }
1539:   VecDestroy(&v);
1540:   PetscFree2(gidxs,vals);

1542:   /* assemble near null space */
1543:   for (i=0;i<maxneighs;i++) {
1544:     VecAssemblyBegin(quad_vecs[i]);
1545:   }
1546:   for (i=0;i<maxneighs;i++) {
1547:     VecAssemblyEnd(quad_vecs[i]);
1548:     VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1549:     VecLockReadPush(quad_vecs[i]);
1550:   }
1551:   VecDestroyVecs(maxneighs,&quad_vecs);
1552:   return 0;
1553: }

1555: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1556: {
1557:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

1559:   if (primalv) {
1560:     if (pcbddc->user_primal_vertices_local) {
1561:       IS list[2], newp;

1563:       list[0] = primalv;
1564:       list[1] = pcbddc->user_primal_vertices_local;
1565:       ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1566:       ISSortRemoveDups(newp);
1567:       ISDestroy(&list[1]);
1568:       pcbddc->user_primal_vertices_local = newp;
1569:     } else {
1570:       PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1571:     }
1572:   }
1573:   return 0;
1574: }

1576: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1577: {
1578:   PetscInt f, *comp  = (PetscInt *)ctx;

1580:   for (f=0;f<Nf;f++) out[f] = X[*comp];
1581:   return 0;
1582: }

1584: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1585: {
1587:   Vec            local,global;
1588:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1589:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1590:   PetscBool      monolithic = PETSC_FALSE;

1592:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1593:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1594:   PetscOptionsEnd();
1595:   /* need to convert from global to local topology information and remove references to information in global ordering */
1596:   MatCreateVecs(pc->pmat,&global,NULL);
1597:   MatCreateVecs(matis->A,&local,NULL);
1598:   VecBindToCPU(global,PETSC_TRUE);
1599:   VecBindToCPU(local,PETSC_TRUE);
1600:   if (monolithic) { /* just get block size to properly compute vertices */
1601:     if (pcbddc->vertex_size == 1) {
1602:       MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1603:     }
1604:     goto boundary;
1605:   }

1607:   if (pcbddc->user_provided_isfordofs) {
1608:     if (pcbddc->n_ISForDofs) {
1609:       PetscInt i;

1611:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1612:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1613:         PetscInt bs;

1615:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1616:         ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1617:         ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1618:         ISDestroy(&pcbddc->ISForDofs[i]);
1619:       }
1620:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1621:       pcbddc->n_ISForDofs = 0;
1622:       PetscFree(pcbddc->ISForDofs);
1623:     }
1624:   } else {
1625:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1626:       DM dm;

1628:       MatGetDM(pc->pmat, &dm);
1629:       if (!dm) {
1630:         PCGetDM(pc, &dm);
1631:       }
1632:       if (dm) {
1633:         IS      *fields;
1634:         PetscInt nf,i;

1636:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1637:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1638:         for (i=0;i<nf;i++) {
1639:           PetscInt bs;

1641:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1642:           ISGetBlockSize(fields[i],&bs);
1643:           ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1644:           ISDestroy(&fields[i]);
1645:         }
1646:         PetscFree(fields);
1647:         pcbddc->n_ISForDofsLocal = nf;
1648:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1649:         PetscContainer   c;

1651:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1652:         if (c) {
1653:           MatISLocalFields lf;
1654:           PetscContainerGetPointer(c,(void**)&lf);
1655:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1656:         } else { /* fallback, create the default fields if bs > 1 */
1657:           PetscInt i, n = matis->A->rmap->n;
1658:           MatGetBlockSize(pc->pmat,&i);
1659:           if (i > 1) {
1660:             pcbddc->n_ISForDofsLocal = i;
1661:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1662:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1663:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1664:             }
1665:           }
1666:         }
1667:       }
1668:     } else {
1669:       PetscInt i;
1670:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1671:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1672:       }
1673:     }
1674:   }

1676: boundary:
1677:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1678:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1679:   } else if (pcbddc->DirichletBoundariesLocal) {
1680:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1681:   }
1682:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1683:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1684:   } else if (pcbddc->NeumannBoundariesLocal) {
1685:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1686:   }
1687:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1688:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1689:   }
1690:   VecDestroy(&global);
1691:   VecDestroy(&local);
1692:   /* detect local disconnected subdomains if requested (use matis->A) */
1693:   if (pcbddc->detect_disconnected) {
1694:     IS        primalv = NULL;
1695:     PetscInt  i;
1696:     PetscBool filter = pcbddc->detect_disconnected_filter;

1698:     for (i=0;i<pcbddc->n_local_subs;i++) {
1699:       ISDestroy(&pcbddc->local_subs[i]);
1700:     }
1701:     PetscFree(pcbddc->local_subs);
1702:     PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1703:     PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1704:     ISDestroy(&primalv);
1705:   }
1706:   /* early stage corner detection */
1707:   {
1708:     DM dm;

1710:     MatGetDM(pc->pmat,&dm);
1711:     if (!dm) {
1712:       PCGetDM(pc,&dm);
1713:     }
1714:     if (dm) {
1715:       PetscBool isda;

1717:       PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1718:       if (isda) {
1719:         ISLocalToGlobalMapping l2l;
1720:         IS                     corners;
1721:         Mat                    lA;
1722:         PetscBool              gl,lo;

1724:         {
1725:           Vec               cvec;
1726:           const PetscScalar *coords;
1727:           PetscInt          dof,n,cdim;
1728:           PetscBool         memc = PETSC_TRUE;

1730:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1731:           DMGetCoordinates(dm,&cvec);
1732:           VecGetLocalSize(cvec,&n);
1733:           VecGetBlockSize(cvec,&cdim);
1734:           n   /= cdim;
1735:           PetscFree(pcbddc->mat_graph->coords);
1736:           PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1737:           VecGetArrayRead(cvec,&coords);
1738: #if defined(PETSC_USE_COMPLEX)
1739:           memc = PETSC_FALSE;
1740: #endif
1741:           if (dof != 1) memc = PETSC_FALSE;
1742:           if (memc) {
1743:             PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);
1744:           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1745:             PetscReal *bcoords = pcbddc->mat_graph->coords;
1746:             PetscInt  i, b, d;

1748:             for (i=0;i<n;i++) {
1749:               for (b=0;b<dof;b++) {
1750:                 for (d=0;d<cdim;d++) {
1751:                   bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1752:                 }
1753:               }
1754:             }
1755:           }
1756:           VecRestoreArrayRead(cvec,&coords);
1757:           pcbddc->mat_graph->cdim  = cdim;
1758:           pcbddc->mat_graph->cnloc = dof*n;
1759:           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1760:         }
1761:         DMDAGetSubdomainCornersIS(dm,&corners);
1762:         MatISGetLocalMat(pc->pmat,&lA);
1763:         MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1764:         MatISRestoreLocalMat(pc->pmat,&lA);
1765:         lo   = (PetscBool)(l2l && corners);
1766:         MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1767:         if (gl) { /* From PETSc's DMDA */
1768:           const PetscInt    *idx;
1769:           PetscInt          dof,bs,*idxout,n;

1771:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1772:           ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1773:           ISGetLocalSize(corners,&n);
1774:           ISGetIndices(corners,&idx);
1775:           if (bs == dof) {
1776:             PetscMalloc1(n,&idxout);
1777:             ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1778:           } else { /* the original DMDA local-to-local map have been modified */
1779:             PetscInt i,d;

1781:             PetscMalloc1(dof*n,&idxout);
1782:             for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1783:             ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);

1785:             bs = 1;
1786:             n *= dof;
1787:           }
1788:           ISRestoreIndices(corners,&idx);
1789:           DMDARestoreSubdomainCornersIS(dm,&corners);
1790:           ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1791:           PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1792:           ISDestroy(&corners);
1793:           pcbddc->corner_selected  = PETSC_TRUE;
1794:           pcbddc->corner_selection = PETSC_TRUE;
1795:         }
1796:         if (corners) {
1797:           DMDARestoreSubdomainCornersIS(dm,&corners);
1798:         }
1799:       }
1800:     }
1801:   }
1802:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1803:     DM dm;

1805:     MatGetDM(pc->pmat,&dm);
1806:     if (!dm) {
1807:       PCGetDM(pc,&dm);
1808:     }
1809:     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1810:       Vec            vcoords;
1811:       PetscSection   section;
1812:       PetscReal      *coords;
1813:       PetscInt       d,cdim,nl,nf,**ctxs;
1814:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1815:       /* debug coordinates */
1816:       PetscViewer       viewer;
1817:       PetscBool         flg;
1818:       PetscViewerFormat format;
1819:       const char        *prefix;

1821:       DMGetCoordinateDim(dm,&cdim);
1822:       DMGetLocalSection(dm,&section);
1823:       PetscSectionGetNumFields(section,&nf);
1824:       DMCreateGlobalVector(dm,&vcoords);
1825:       VecGetLocalSize(vcoords,&nl);
1826:       PetscMalloc1(nl*cdim,&coords);
1827:       PetscMalloc2(nf,&funcs,nf,&ctxs);
1828:       PetscMalloc1(nf,&ctxs[0]);
1829:       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1830:       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;

1832:       /* debug coordinates */
1833:       PCGetOptionsPrefix(pc,&prefix);
1834:       PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords),((PetscObject)vcoords)->options,prefix,"-pc_bddc_coords_vec_view",&viewer,&format,&flg);
1835:       if (flg) PetscViewerPushFormat(viewer,format);
1836:       for (d=0;d<cdim;d++) {
1837:         PetscInt          i;
1838:         const PetscScalar *v;
1839:         char              name[16];

1841:         for (i=0;i<nf;i++) ctxs[i][0] = d;
1842:         PetscSNPrintf(name,sizeof(name),"bddc_coords_%d",(int)d);
1843:         PetscObjectSetName((PetscObject)vcoords,name);
1844:         DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1845:         if (flg) VecView(vcoords,viewer);
1846:         VecGetArrayRead(vcoords,&v);
1847:         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1848:         VecRestoreArrayRead(vcoords,&v);
1849:       }
1850:       VecDestroy(&vcoords);
1851:       PCSetCoordinates(pc,cdim,nl,coords);
1852:       PetscFree(coords);
1853:       PetscFree(ctxs[0]);
1854:       PetscFree2(funcs,ctxs);
1855:       if (flg) {
1856:         PetscViewerPopFormat(viewer);
1857:         PetscViewerDestroy(&viewer);
1858:       }
1859:     }
1860:   }
1861:   return 0;
1862: }

1864: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1865: {
1866:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1867:   IS              nis;
1868:   const PetscInt  *idxs;
1869:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;

1872:   if (mop == MPI_LAND) {
1873:     /* init rootdata with true */
1874:     for (i=0;i<pc->pmat->rmap->n;i++) matis->sf_rootdata[i] = 1;
1875:   } else {
1876:     PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
1877:   }
1878:   PetscArrayzero(matis->sf_leafdata,n);
1879:   ISGetLocalSize(*is,&nd);
1880:   ISGetIndices(*is,&idxs);
1881:   for (i=0;i<nd;i++)
1882:     if (-1 < idxs[i] && idxs[i] < n)
1883:       matis->sf_leafdata[idxs[i]] = 1;
1884:   ISRestoreIndices(*is,&idxs);
1885:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);
1886:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,mop);
1887:   PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1888:   PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1889:   if (mop == MPI_LAND) {
1890:     PetscMalloc1(nd,&nidxs);
1891:   } else {
1892:     PetscMalloc1(n,&nidxs);
1893:   }
1894:   for (i=0,nnd=0;i<n;i++)
1895:     if (matis->sf_leafdata[i])
1896:       nidxs[nnd++] = i;
1897:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1898:   ISDestroy(is);
1899:   *is  = nis;
1900:   return 0;
1901: }

1903: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1904: {
1905:   PC_IS             *pcis = (PC_IS*)(pc->data);
1906:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);

1908:   if (!pcbddc->benign_have_null) {
1909:     return 0;
1910:   }
1911:   if (pcbddc->ChangeOfBasisMatrix) {
1912:     Vec swap;

1914:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1915:     swap = pcbddc->work_change;
1916:     pcbddc->work_change = r;
1917:     r = swap;
1918:   }
1919:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1920:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1921:   PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1922:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1923:   PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1924:   KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1925:   VecSet(z,0.);
1926:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1927:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1928:   if (pcbddc->ChangeOfBasisMatrix) {
1929:     pcbddc->work_change = r;
1930:     VecCopy(z,pcbddc->work_change);
1931:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1932:   }
1933:   return 0;
1934: }

1936: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1937: {
1938:   PCBDDCBenignMatMult_ctx ctx;
1939:   PetscBool               apply_right,apply_left,reset_x;

1941:   MatShellGetContext(A,&ctx);
1942:   if (transpose) {
1943:     apply_right = ctx->apply_left;
1944:     apply_left = ctx->apply_right;
1945:   } else {
1946:     apply_right = ctx->apply_right;
1947:     apply_left = ctx->apply_left;
1948:   }
1949:   reset_x = PETSC_FALSE;
1950:   if (apply_right) {
1951:     const PetscScalar *ax;
1952:     PetscInt          nl,i;

1954:     VecGetLocalSize(x,&nl);
1955:     VecGetArrayRead(x,&ax);
1956:     PetscArraycpy(ctx->work,ax,nl);
1957:     VecRestoreArrayRead(x,&ax);
1958:     for (i=0;i<ctx->benign_n;i++) {
1959:       PetscScalar    sum,val;
1960:       const PetscInt *idxs;
1961:       PetscInt       nz,j;
1962:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1963:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1964:       sum = 0.;
1965:       if (ctx->apply_p0) {
1966:         val = ctx->work[idxs[nz-1]];
1967:         for (j=0;j<nz-1;j++) {
1968:           sum += ctx->work[idxs[j]];
1969:           ctx->work[idxs[j]] += val;
1970:         }
1971:       } else {
1972:         for (j=0;j<nz-1;j++) {
1973:           sum += ctx->work[idxs[j]];
1974:         }
1975:       }
1976:       ctx->work[idxs[nz-1]] -= sum;
1977:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1978:     }
1979:     VecPlaceArray(x,ctx->work);
1980:     reset_x = PETSC_TRUE;
1981:   }
1982:   if (transpose) {
1983:     MatMultTranspose(ctx->A,x,y);
1984:   } else {
1985:     MatMult(ctx->A,x,y);
1986:   }
1987:   if (reset_x) {
1988:     VecResetArray(x);
1989:   }
1990:   if (apply_left) {
1991:     PetscScalar *ay;
1992:     PetscInt    i;

1994:     VecGetArray(y,&ay);
1995:     for (i=0;i<ctx->benign_n;i++) {
1996:       PetscScalar    sum,val;
1997:       const PetscInt *idxs;
1998:       PetscInt       nz,j;
1999:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
2000:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
2001:       val = -ay[idxs[nz-1]];
2002:       if (ctx->apply_p0) {
2003:         sum = 0.;
2004:         for (j=0;j<nz-1;j++) {
2005:           sum += ay[idxs[j]];
2006:           ay[idxs[j]] += val;
2007:         }
2008:         ay[idxs[nz-1]] += sum;
2009:       } else {
2010:         for (j=0;j<nz-1;j++) {
2011:           ay[idxs[j]] += val;
2012:         }
2013:         ay[idxs[nz-1]] = 0.;
2014:       }
2015:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2016:     }
2017:     VecRestoreArray(y,&ay);
2018:   }
2019:   return 0;
2020: }

2022: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2023: {
2024:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2025:   return 0;
2026: }

2028: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2029: {
2030:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2031:   return 0;
2032: }

2034: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2035: {
2036:   PC_IS                   *pcis = (PC_IS*)pc->data;
2037:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
2038:   PCBDDCBenignMatMult_ctx ctx;

2040:   if (!restore) {
2041:     Mat                A_IB,A_BI;
2042:     PetscScalar        *work;
2043:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

2046:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return 0;
2047:     PetscMalloc1(pcis->n,&work);
2048:     MatCreate(PETSC_COMM_SELF,&A_IB);
2049:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2050:     MatSetType(A_IB,MATSHELL);
2051:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2052:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2053:     PetscNew(&ctx);
2054:     MatShellSetContext(A_IB,ctx);
2055:     ctx->apply_left = PETSC_TRUE;
2056:     ctx->apply_right = PETSC_FALSE;
2057:     ctx->apply_p0 = PETSC_FALSE;
2058:     ctx->benign_n = pcbddc->benign_n;
2059:     if (reuse) {
2060:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2061:       ctx->free = PETSC_FALSE;
2062:     } else { /* TODO: could be optimized for successive solves */
2063:       ISLocalToGlobalMapping N_to_D;
2064:       PetscInt               i;

2066:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2067:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2068:       for (i=0;i<pcbddc->benign_n;i++) {
2069:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2070:       }
2071:       ISLocalToGlobalMappingDestroy(&N_to_D);
2072:       ctx->free = PETSC_TRUE;
2073:     }
2074:     ctx->A = pcis->A_IB;
2075:     ctx->work = work;
2076:     MatSetUp(A_IB);
2077:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2078:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2079:     pcis->A_IB = A_IB;

2081:     /* A_BI as A_IB^T */
2082:     MatCreateTranspose(A_IB,&A_BI);
2083:     pcbddc->benign_original_mat = pcis->A_BI;
2084:     pcis->A_BI = A_BI;
2085:   } else {
2086:     if (!pcbddc->benign_original_mat) {
2087:       return 0;
2088:     }
2089:     MatShellGetContext(pcis->A_IB,&ctx);
2090:     MatDestroy(&pcis->A_IB);
2091:     pcis->A_IB = ctx->A;
2092:     ctx->A = NULL;
2093:     MatDestroy(&pcis->A_BI);
2094:     pcis->A_BI = pcbddc->benign_original_mat;
2095:     pcbddc->benign_original_mat = NULL;
2096:     if (ctx->free) {
2097:       PetscInt i;
2098:       for (i=0;i<ctx->benign_n;i++) {
2099:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
2100:       }
2101:       PetscFree(ctx->benign_zerodiag_subs);
2102:     }
2103:     PetscFree(ctx->work);
2104:     PetscFree(ctx);
2105:   }
2106:   return 0;
2107: }

2109: /* used just in bddc debug mode */
2110: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2111: {
2112:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2113:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2114:   Mat            An;

2116:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2117:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2118:   if (is1) {
2119:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2120:     MatDestroy(&An);
2121:   } else {
2122:     *B = An;
2123:   }
2124:   return 0;
2125: }

2127: /* TODO: add reuse flag */
2128: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2129: {
2130:   Mat            Bt;
2131:   PetscScalar    *a,*bdata;
2132:   const PetscInt *ii,*ij;
2133:   PetscInt       m,n,i,nnz,*bii,*bij;
2134:   PetscBool      flg_row;

2136:   MatGetSize(A,&n,&m);
2137:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2138:   MatSeqAIJGetArray(A,&a);
2139:   nnz = n;
2140:   for (i=0;i<ii[n];i++) {
2141:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2142:   }
2143:   PetscMalloc1(n+1,&bii);
2144:   PetscMalloc1(nnz,&bij);
2145:   PetscMalloc1(nnz,&bdata);
2146:   nnz = 0;
2147:   bii[0] = 0;
2148:   for (i=0;i<n;i++) {
2149:     PetscInt j;
2150:     for (j=ii[i];j<ii[i+1];j++) {
2151:       PetscScalar entry = a[j];
2152:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2153:         bij[nnz] = ij[j];
2154:         bdata[nnz] = entry;
2155:         nnz++;
2156:       }
2157:     }
2158:     bii[i+1] = nnz;
2159:   }
2160:   MatSeqAIJRestoreArray(A,&a);
2161:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2162:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2163:   {
2164:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2165:     b->free_a = PETSC_TRUE;
2166:     b->free_ij = PETSC_TRUE;
2167:   }
2168:   if (*B == A) {
2169:     MatDestroy(&A);
2170:   }
2171:   *B = Bt;
2172:   return 0;
2173: }

2175: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2176: {
2177:   Mat                    B = NULL;
2178:   DM                     dm;
2179:   IS                     is_dummy,*cc_n;
2180:   ISLocalToGlobalMapping l2gmap_dummy;
2181:   PCBDDCGraph            graph;
2182:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2183:   PetscInt               i,n;
2184:   PetscInt               *xadj,*adjncy;
2185:   PetscBool              isplex = PETSC_FALSE;

2187:   if (ncc) *ncc = 0;
2188:   if (cc) *cc = NULL;
2189:   if (primalv) *primalv = NULL;
2190:   PCBDDCGraphCreate(&graph);
2191:   MatGetDM(pc->pmat,&dm);
2192:   if (!dm) {
2193:     PCGetDM(pc,&dm);
2194:   }
2195:   if (dm) {
2196:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2197:   }
2198:   if (filter) isplex = PETSC_FALSE;

2200:   if (isplex) { /* this code has been modified from plexpartition.c */
2201:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2202:     PetscInt      *adj = NULL;
2203:     IS             cellNumbering;
2204:     const PetscInt *cellNum;
2205:     PetscBool      useCone, useClosure;
2206:     PetscSection   section;
2207:     PetscSegBuffer adjBuffer;
2208:     PetscSF        sfPoint;

2210:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2211:     DMGetPointSF(dm, &sfPoint);
2212:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2213:     /* Build adjacency graph via a section/segbuffer */
2214:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2215:     PetscSectionSetChart(section, pStart, pEnd);
2216:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2217:     /* Always use FVM adjacency to create partitioner graph */
2218:     DMGetBasicAdjacency(dm, &useCone, &useClosure);
2219:     DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2220:     DMPlexGetCellNumbering(dm, &cellNumbering);
2221:     ISGetIndices(cellNumbering, &cellNum);
2222:     for (n = 0, p = pStart; p < pEnd; p++) {
2223:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2224:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2225:       adjSize = PETSC_DETERMINE;
2226:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2227:       for (a = 0; a < adjSize; ++a) {
2228:         const PetscInt point = adj[a];
2229:         if (pStart <= point && point < pEnd) {
2230:           PetscInt *PETSC_RESTRICT pBuf;
2231:           PetscSectionAddDof(section, p, 1);
2232:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2233:           *pBuf = point;
2234:         }
2235:       }
2236:       n++;
2237:     }
2238:     DMSetBasicAdjacency(dm, useCone, useClosure);
2239:     /* Derive CSR graph from section/segbuffer */
2240:     PetscSectionSetUp(section);
2241:     PetscSectionGetStorageSize(section, &size);
2242:     PetscMalloc1(n+1, &xadj);
2243:     for (idx = 0, p = pStart; p < pEnd; p++) {
2244:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2245:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2246:     }
2247:     xadj[n] = size;
2248:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2249:     /* Clean up */
2250:     PetscSegBufferDestroy(&adjBuffer);
2251:     PetscSectionDestroy(&section);
2252:     PetscFree(adj);
2253:     graph->xadj = xadj;
2254:     graph->adjncy = adjncy;
2255:   } else {
2256:     Mat       A;
2257:     PetscBool isseqaij, flg_row;

2259:     MatISGetLocalMat(pc->pmat,&A);
2260:     if (!A->rmap->N || !A->cmap->N) {
2261:       PCBDDCGraphDestroy(&graph);
2262:       return 0;
2263:     }
2264:     PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2265:     if (!isseqaij && filter) {
2266:       PetscBool isseqdense;

2268:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2269:       if (!isseqdense) {
2270:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2271:       } else { /* TODO: rectangular case and LDA */
2272:         PetscScalar *array;
2273:         PetscReal   chop=1.e-6;

2275:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2276:         MatDenseGetArray(B,&array);
2277:         MatGetSize(B,&n,NULL);
2278:         for (i=0;i<n;i++) {
2279:           PetscInt j;
2280:           for (j=i+1;j<n;j++) {
2281:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2282:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2283:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2284:           }
2285:         }
2286:         MatDenseRestoreArray(B,&array);
2287:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2288:       }
2289:     } else {
2290:       PetscObjectReference((PetscObject)A);
2291:       B = A;
2292:     }
2293:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2295:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2296:     if (filter) {
2297:       PetscScalar *data;
2298:       PetscInt    j,cum;

2300:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2301:       MatSeqAIJGetArray(B,&data);
2302:       cum = 0;
2303:       for (i=0;i<n;i++) {
2304:         PetscInt t;

2306:         for (j=xadj[i];j<xadj[i+1];j++) {
2307:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2308:             continue;
2309:           }
2310:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2311:         }
2312:         t = xadj_filtered[i];
2313:         xadj_filtered[i] = cum;
2314:         cum += t;
2315:       }
2316:       MatSeqAIJRestoreArray(B,&data);
2317:       graph->xadj = xadj_filtered;
2318:       graph->adjncy = adjncy_filtered;
2319:     } else {
2320:       graph->xadj = xadj;
2321:       graph->adjncy = adjncy;
2322:     }
2323:   }
2324:   /* compute local connected components using PCBDDCGraph */
2325:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2326:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2327:   ISDestroy(&is_dummy);
2328:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2329:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2330:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2331:   PCBDDCGraphComputeConnectedComponents(graph);

2333:   /* partial clean up */
2334:   PetscFree2(xadj_filtered,adjncy_filtered);
2335:   if (B) {
2336:     PetscBool flg_row;
2337:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2338:     MatDestroy(&B);
2339:   }
2340:   if (isplex) {
2341:     PetscFree(xadj);
2342:     PetscFree(adjncy);
2343:   }

2345:   /* get back data */
2346:   if (isplex) {
2347:     if (ncc) *ncc = graph->ncc;
2348:     if (cc || primalv) {
2349:       Mat          A;
2350:       PetscBT      btv,btvt;
2351:       PetscSection subSection;
2352:       PetscInt     *ids,cum,cump,*cids,*pids;

2354:       DMPlexGetSubdomainSection(dm,&subSection);
2355:       MatISGetLocalMat(pc->pmat,&A);
2356:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2357:       PetscBTCreate(A->rmap->n,&btv);
2358:       PetscBTCreate(A->rmap->n,&btvt);

2360:       cids[0] = 0;
2361:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2362:         PetscInt j;

2364:         PetscBTMemzero(A->rmap->n,btvt);
2365:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2366:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2368:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2369:           for (k = 0; k < 2*size; k += 2) {
2370:             PetscInt s, pp, p = closure[k], off, dof, cdof;

2372:             PetscSectionGetConstraintDof(subSection,p,&cdof);
2373:             PetscSectionGetOffset(subSection,p,&off);
2374:             PetscSectionGetDof(subSection,p,&dof);
2375:             for (s = 0; s < dof-cdof; s++) {
2376:               if (PetscBTLookupSet(btvt,off+s)) continue;
2377:               if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s;
2378:               else pids[cump++] = off+s; /* cross-vertex */
2379:             }
2380:             DMPlexGetTreeParent(dm,p,&pp,NULL);
2381:             if (pp != p) {
2382:               PetscSectionGetConstraintDof(subSection,pp,&cdof);
2383:               PetscSectionGetOffset(subSection,pp,&off);
2384:               PetscSectionGetDof(subSection,pp,&dof);
2385:               for (s = 0; s < dof-cdof; s++) {
2386:                 if (PetscBTLookupSet(btvt,off+s)) continue;
2387:                 if (!PetscBTLookup(btv,off+s)) ids[cum++] = off+s;
2388:                 else pids[cump++] = off+s; /* cross-vertex */
2389:               }
2390:             }
2391:           }
2392:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2393:         }
2394:         cids[i+1] = cum;
2395:         /* mark dofs as already assigned */
2396:         for (j = cids[i]; j < cids[i+1]; j++) {
2397:           PetscBTSet(btv,ids[j]);
2398:         }
2399:       }
2400:       if (cc) {
2401:         PetscMalloc1(graph->ncc,&cc_n);
2402:         for (i = 0; i < graph->ncc; i++) {
2403:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2404:         }
2405:         *cc = cc_n;
2406:       }
2407:       if (primalv) {
2408:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2409:       }
2410:       PetscFree3(ids,cids,pids);
2411:       PetscBTDestroy(&btv);
2412:       PetscBTDestroy(&btvt);
2413:     }
2414:   } else {
2415:     if (ncc) *ncc = graph->ncc;
2416:     if (cc) {
2417:       PetscMalloc1(graph->ncc,&cc_n);
2418:       for (i=0;i<graph->ncc;i++) {
2419:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2420:       }
2421:       *cc = cc_n;
2422:     }
2423:   }
2424:   /* clean up graph */
2425:   graph->xadj = NULL;
2426:   graph->adjncy = NULL;
2427:   PCBDDCGraphDestroy(&graph);
2428:   return 0;
2429: }

2431: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2432: {
2433:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2434:   PC_IS*         pcis = (PC_IS*)(pc->data);
2435:   IS             dirIS = NULL;
2436:   PetscInt       i;

2438:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2439:   if (zerodiag) {
2440:     Mat            A;
2441:     Vec            vec3_N;
2442:     PetscScalar    *vals;
2443:     const PetscInt *idxs;
2444:     PetscInt       nz,*count;

2446:     /* p0 */
2447:     VecSet(pcis->vec1_N,0.);
2448:     PetscMalloc1(pcis->n,&vals);
2449:     ISGetLocalSize(zerodiag,&nz);
2450:     ISGetIndices(zerodiag,&idxs);
2451:     for (i=0;i<nz;i++) vals[i] = 1.;
2452:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2453:     VecAssemblyBegin(pcis->vec1_N);
2454:     VecAssemblyEnd(pcis->vec1_N);
2455:     /* v_I */
2456:     VecSetRandom(pcis->vec2_N,NULL);
2457:     for (i=0;i<nz;i++) vals[i] = 0.;
2458:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2459:     ISRestoreIndices(zerodiag,&idxs);
2460:     ISGetIndices(pcis->is_B_local,&idxs);
2461:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2462:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2463:     ISRestoreIndices(pcis->is_B_local,&idxs);
2464:     if (dirIS) {
2465:       PetscInt n;

2467:       ISGetLocalSize(dirIS,&n);
2468:       ISGetIndices(dirIS,&idxs);
2469:       for (i=0;i<n;i++) vals[i] = 0.;
2470:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2471:       ISRestoreIndices(dirIS,&idxs);
2472:     }
2473:     VecAssemblyBegin(pcis->vec2_N);
2474:     VecAssemblyEnd(pcis->vec2_N);
2475:     VecDuplicate(pcis->vec1_N,&vec3_N);
2476:     VecSet(vec3_N,0.);
2477:     MatISGetLocalMat(pc->pmat,&A);
2478:     MatMult(A,pcis->vec1_N,vec3_N);
2479:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2481:     PetscFree(vals);
2482:     VecDestroy(&vec3_N);

2484:     /* there should not be any pressure dofs lying on the interface */
2485:     PetscCalloc1(pcis->n,&count);
2486:     ISGetIndices(pcis->is_B_local,&idxs);
2487:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2488:     ISRestoreIndices(pcis->is_B_local,&idxs);
2489:     ISGetIndices(zerodiag,&idxs);
2491:     ISRestoreIndices(zerodiag,&idxs);
2492:     PetscFree(count);
2493:   }
2494:   ISDestroy(&dirIS);

2496:   /* check PCBDDCBenignGetOrSetP0 */
2497:   VecSetRandom(pcis->vec1_global,NULL);
2498:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2499:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2500:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2501:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2502:   for (i=0;i<pcbddc->benign_n;i++) {
2503:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2505:   }
2506:   return 0;
2507: }

2509: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2510: {
2511:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2512:   Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);
2513:   IS             pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2514:   PetscInt       nz,n,benign_n,bsp = 1;
2515:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2516:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2519:   if (reuse) goto project_b0;
2520:   PetscSFDestroy(&pcbddc->benign_sf);
2521:   MatDestroy(&pcbddc->benign_B0);
2522:   for (n=0;n<pcbddc->benign_n;n++) {
2523:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2524:   }
2525:   PetscFree(pcbddc->benign_zerodiag_subs);
2526:   has_null_pressures = PETSC_TRUE;
2527:   have_null = PETSC_TRUE;
2528:   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2529:      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2530:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2531:      If not, a change of basis on pressures is not needed
2532:      since the local Schur complements are already SPD
2533:   */
2534:   if (pcbddc->n_ISForDofsLocal) {
2535:     IS        iP = NULL;
2536:     PetscInt  p,*pp;
2537:     PetscBool flg;

2539:     PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2540:     n    = pcbddc->n_ISForDofsLocal;
2541:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2542:     PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2543:     PetscOptionsEnd();
2544:     if (!flg) {
2545:       n = 1;
2546:       pp[0] = pcbddc->n_ISForDofsLocal-1;
2547:     }

2549:     bsp = 0;
2550:     for (p=0;p<n;p++) {
2551:       PetscInt bs;

2554:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2555:       bsp += bs;
2556:     }
2557:     PetscMalloc1(bsp,&bzerodiag);
2558:     bsp  = 0;
2559:     for (p=0;p<n;p++) {
2560:       const PetscInt *idxs;
2561:       PetscInt       b,bs,npl,*bidxs;

2563:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2564:       ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2565:       ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2566:       PetscMalloc1(npl/bs,&bidxs);
2567:       for (b=0;b<bs;b++) {
2568:         PetscInt i;

2570:         for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2571:         ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2572:         bsp++;
2573:       }
2574:       PetscFree(bidxs);
2575:       ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2576:     }
2577:     ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);

2579:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2580:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2581:     if (iP) {
2582:       IS newpressures;

2584:       ISDifference(pressures,iP,&newpressures);
2585:       ISDestroy(&pressures);
2586:       pressures = newpressures;
2587:     }
2588:     ISSorted(pressures,&sorted);
2589:     if (!sorted) {
2590:       ISSort(pressures);
2591:     }
2592:     PetscFree(pp);
2593:   }

2595:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2596:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2597:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2598:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2599:   ISSorted(zerodiag,&sorted);
2600:   if (!sorted) {
2601:     ISSort(zerodiag);
2602:   }
2603:   PetscObjectReference((PetscObject)zerodiag);
2604:   zerodiag_save = zerodiag;
2605:   ISGetLocalSize(zerodiag,&nz);
2606:   if (!nz) {
2607:     if (n) have_null = PETSC_FALSE;
2608:     has_null_pressures = PETSC_FALSE;
2609:     ISDestroy(&zerodiag);
2610:   }
2611:   recompute_zerodiag = PETSC_FALSE;

2613:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2614:   zerodiag_subs    = NULL;
2615:   benign_n         = 0;
2616:   n_interior_dofs  = 0;
2617:   interior_dofs    = NULL;
2618:   nneu             = 0;
2619:   if (pcbddc->NeumannBoundariesLocal) {
2620:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2621:   }
2622:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2623:   if (checkb) { /* need to compute interior nodes */
2624:     PetscInt n,i,j;
2625:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2626:     PetscInt *iwork;

2628:     ISLocalToGlobalMappingGetSize(matis->rmapping,&n);
2629:     ISLocalToGlobalMappingGetInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared);
2630:     PetscCalloc1(n,&iwork);
2631:     PetscMalloc1(n,&interior_dofs);
2632:     for (i=1;i<n_neigh;i++)
2633:       for (j=0;j<n_shared[i];j++)
2634:           iwork[shared[i][j]] += 1;
2635:     for (i=0;i<n;i++)
2636:       if (!iwork[i])
2637:         interior_dofs[n_interior_dofs++] = i;
2638:     PetscFree(iwork);
2639:     ISLocalToGlobalMappingRestoreInfo(matis->rmapping,&n_neigh,&neigh,&n_shared,&shared);
2640:   }
2641:   if (has_null_pressures) {
2642:     IS             *subs;
2643:     PetscInt       nsubs,i,j,nl;
2644:     const PetscInt *idxs;
2645:     PetscScalar    *array;
2646:     Vec            *work;

2648:     subs  = pcbddc->local_subs;
2649:     nsubs = pcbddc->n_local_subs;
2650:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2651:     if (checkb) {
2652:       VecDuplicateVecs(matis->y,2,&work);
2653:       ISGetLocalSize(zerodiag,&nl);
2654:       ISGetIndices(zerodiag,&idxs);
2655:       /* work[0] = 1_p */
2656:       VecSet(work[0],0.);
2657:       VecGetArray(work[0],&array);
2658:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2659:       VecRestoreArray(work[0],&array);
2660:       /* work[0] = 1_v */
2661:       VecSet(work[1],1.);
2662:       VecGetArray(work[1],&array);
2663:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2664:       VecRestoreArray(work[1],&array);
2665:       ISRestoreIndices(zerodiag,&idxs);
2666:     }

2668:     if (nsubs > 1 || bsp > 1) {
2669:       IS       *is;
2670:       PetscInt b,totb;

2672:       totb  = bsp;
2673:       is    = bsp > 1 ? bzerodiag : &zerodiag;
2674:       nsubs = PetscMax(nsubs,1);
2675:       PetscCalloc1(nsubs*totb,&zerodiag_subs);
2676:       for (b=0;b<totb;b++) {
2677:         for (i=0;i<nsubs;i++) {
2678:           ISLocalToGlobalMapping l2g;
2679:           IS                     t_zerodiag_subs;
2680:           PetscInt               nl;

2682:           if (subs) {
2683:             ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2684:           } else {
2685:             IS tis;

2687:             MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2688:             ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2689:             ISLocalToGlobalMappingCreateIS(tis,&l2g);
2690:             ISDestroy(&tis);
2691:           }
2692:           ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2693:           ISGetLocalSize(t_zerodiag_subs,&nl);
2694:           if (nl) {
2695:             PetscBool valid = PETSC_TRUE;

2697:             if (checkb) {
2698:               VecSet(matis->x,0);
2699:               ISGetLocalSize(subs[i],&nl);
2700:               ISGetIndices(subs[i],&idxs);
2701:               VecGetArray(matis->x,&array);
2702:               for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2703:               VecRestoreArray(matis->x,&array);
2704:               ISRestoreIndices(subs[i],&idxs);
2705:               VecPointwiseMult(matis->x,work[0],matis->x);
2706:               MatMult(matis->A,matis->x,matis->y);
2707:               VecPointwiseMult(matis->y,work[1],matis->y);
2708:               VecGetArray(matis->y,&array);
2709:               for (j=0;j<n_interior_dofs;j++) {
2710:                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2711:                   valid = PETSC_FALSE;
2712:                   break;
2713:                 }
2714:               }
2715:               VecRestoreArray(matis->y,&array);
2716:             }
2717:             if (valid && nneu) {
2718:               const PetscInt *idxs;
2719:               PetscInt       nzb;

2721:               ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2722:               ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2723:               ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2724:               if (nzb) valid = PETSC_FALSE;
2725:             }
2726:             if (valid && pressures) {
2727:               IS       t_pressure_subs,tmp;
2728:               PetscInt i1,i2;

2730:               ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2731:               ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2732:               ISGetLocalSize(tmp,&i1);
2733:               ISGetLocalSize(t_zerodiag_subs,&i2);
2734:               if (i2 != i1) valid = PETSC_FALSE;
2735:               ISDestroy(&t_pressure_subs);
2736:               ISDestroy(&tmp);
2737:             }
2738:             if (valid) {
2739:               ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2740:               benign_n++;
2741:             } else recompute_zerodiag = PETSC_TRUE;
2742:           }
2743:           ISDestroy(&t_zerodiag_subs);
2744:           ISLocalToGlobalMappingDestroy(&l2g);
2745:         }
2746:       }
2747:     } else { /* there's just one subdomain (or zero if they have not been detected */
2748:       PetscBool valid = PETSC_TRUE;

2750:       if (nneu) valid = PETSC_FALSE;
2751:       if (valid && pressures) {
2752:         ISEqual(pressures,zerodiag,&valid);
2753:       }
2754:       if (valid && checkb) {
2755:         MatMult(matis->A,work[0],matis->x);
2756:         VecPointwiseMult(matis->x,work[1],matis->x);
2757:         VecGetArray(matis->x,&array);
2758:         for (j=0;j<n_interior_dofs;j++) {
2759:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2760:             valid = PETSC_FALSE;
2761:             break;
2762:           }
2763:         }
2764:         VecRestoreArray(matis->x,&array);
2765:       }
2766:       if (valid) {
2767:         benign_n = 1;
2768:         PetscMalloc1(benign_n,&zerodiag_subs);
2769:         PetscObjectReference((PetscObject)zerodiag);
2770:         zerodiag_subs[0] = zerodiag;
2771:       }
2772:     }
2773:     if (checkb) {
2774:       VecDestroyVecs(2,&work);
2775:     }
2776:   }
2777:   PetscFree(interior_dofs);

2779:   if (!benign_n) {
2780:     PetscInt n;

2782:     ISDestroy(&zerodiag);
2783:     recompute_zerodiag = PETSC_FALSE;
2784:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2785:     if (n) have_null = PETSC_FALSE;
2786:   }

2788:   /* final check for null pressures */
2789:   if (zerodiag && pressures) {
2790:     ISEqual(pressures,zerodiag,&have_null);
2791:   }

2793:   if (recompute_zerodiag) {
2794:     ISDestroy(&zerodiag);
2795:     if (benign_n == 1) {
2796:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2797:       zerodiag = zerodiag_subs[0];
2798:     } else {
2799:       PetscInt i,nzn,*new_idxs;

2801:       nzn = 0;
2802:       for (i=0;i<benign_n;i++) {
2803:         PetscInt ns;
2804:         ISGetLocalSize(zerodiag_subs[i],&ns);
2805:         nzn += ns;
2806:       }
2807:       PetscMalloc1(nzn,&new_idxs);
2808:       nzn = 0;
2809:       for (i=0;i<benign_n;i++) {
2810:         PetscInt ns,*idxs;
2811:         ISGetLocalSize(zerodiag_subs[i],&ns);
2812:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2813:         PetscArraycpy(new_idxs+nzn,idxs,ns);
2814:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2815:         nzn += ns;
2816:       }
2817:       PetscSortInt(nzn,new_idxs);
2818:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2819:     }
2820:     have_null = PETSC_FALSE;
2821:   }

2823:   /* determines if the coarse solver will be singular or not */
2824:   MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));

2826:   /* Prepare matrix to compute no-net-flux */
2827:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2828:     Mat                    A,loc_divudotp;
2829:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2830:     IS                     row,col,isused = NULL;
2831:     PetscInt               M,N,n,st,n_isused;

2833:     if (pressures) {
2834:       isused = pressures;
2835:     } else {
2836:       isused = zerodiag_save;
2837:     }
2838:     MatISGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2839:     MatISGetLocalMat(pc->pmat,&A);
2840:     MatGetLocalSize(A,&n,NULL);
2842:     n_isused = 0;
2843:     if (isused) {
2844:       ISGetLocalSize(isused,&n_isused);
2845:     }
2846:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2847:     st = st-n_isused;
2848:     if (n) {
2849:       const PetscInt *gidxs;

2851:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2852:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2853:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2854:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2855:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2856:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2857:     } else {
2858:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2859:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2860:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2861:     }
2862:     MatGetSize(pc->pmat,NULL,&N);
2863:     ISGetSize(row,&M);
2864:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2865:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2866:     ISDestroy(&row);
2867:     ISDestroy(&col);
2868:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2869:     MatSetType(pcbddc->divudotp,MATIS);
2870:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2871:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2872:     ISLocalToGlobalMappingDestroy(&rl2g);
2873:     ISLocalToGlobalMappingDestroy(&cl2g);
2874:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2875:     MatDestroy(&loc_divudotp);
2876:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2877:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2878:   }
2879:   ISDestroy(&zerodiag_save);
2880:   ISDestroy(&pressures);
2881:   if (bzerodiag) {
2882:     PetscInt i;

2884:     for (i=0;i<bsp;i++) {
2885:       ISDestroy(&bzerodiag[i]);
2886:     }
2887:     PetscFree(bzerodiag);
2888:   }
2889:   pcbddc->benign_n = benign_n;
2890:   pcbddc->benign_zerodiag_subs = zerodiag_subs;

2892:   /* determines if the problem has subdomains with 0 pressure block */
2893:   have_null = (PetscBool)(!!pcbddc->benign_n);
2894:   MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2896: project_b0:
2897:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2898:   /* change of basis and p0 dofs */
2899:   if (pcbddc->benign_n) {
2900:     PetscInt i,s,*nnz;

2902:     /* local change of basis for pressures */
2903:     MatDestroy(&pcbddc->benign_change);
2904:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2905:     MatSetType(pcbddc->benign_change,MATAIJ);
2906:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2907:     PetscMalloc1(n,&nnz);
2908:     for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2909:     for (i=0;i<pcbddc->benign_n;i++) {
2910:       const PetscInt *idxs;
2911:       PetscInt       nzs,j;

2913:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2914:       ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2915:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2916:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2917:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2918:     }
2919:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2920:     MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2921:     PetscFree(nnz);
2922:     /* set identity by default */
2923:     for (i=0;i<n;i++) {
2924:       MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2925:     }
2926:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2927:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2928:     /* set change on pressures */
2929:     for (s=0;s<pcbddc->benign_n;s++) {
2930:       PetscScalar    *array;
2931:       const PetscInt *idxs;
2932:       PetscInt       nzs;

2934:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2935:       ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2936:       for (i=0;i<nzs-1;i++) {
2937:         PetscScalar vals[2];
2938:         PetscInt    cols[2];

2940:         cols[0] = idxs[i];
2941:         cols[1] = idxs[nzs-1];
2942:         vals[0] = 1.;
2943:         vals[1] = 1.;
2944:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2945:       }
2946:       PetscMalloc1(nzs,&array);
2947:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2948:       array[nzs-1] = 1.;
2949:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2950:       /* store local idxs for p0 */
2951:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2952:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2953:       PetscFree(array);
2954:     }
2955:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2956:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);

2958:     /* project if needed */
2959:     if (pcbddc->benign_change_explicit) {
2960:       Mat M;

2962:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2963:       MatDestroy(&pcbddc->local_mat);
2964:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2965:       MatDestroy(&M);
2966:     }
2967:     /* store global idxs for p0 */
2968:     ISLocalToGlobalMappingApply(matis->rmapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2969:   }
2970:   *zerodiaglocal = zerodiag;
2971:   return 0;
2972: }

2974: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2975: {
2976:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2977:   PetscScalar    *array;

2979:   if (!pcbddc->benign_sf) {
2980:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2981:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2982:   }
2983:   if (get) {
2984:     VecGetArrayRead(v,(const PetscScalar**)&array);
2985:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
2986:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
2987:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
2988:   } else {
2989:     VecGetArray(v,&array);
2990:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
2991:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
2992:     VecRestoreArray(v,&array);
2993:   }
2994:   return 0;
2995: }

2997: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2998: {
2999:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

3001:   /* TODO: add error checking
3002:     - avoid nested pop (or push) calls.
3003:     - cannot push before pop.
3004:     - cannot call this if pcbddc->local_mat is NULL
3005:   */
3006:   if (!pcbddc->benign_n) {
3007:     return 0;
3008:   }
3009:   if (pop) {
3010:     if (pcbddc->benign_change_explicit) {
3011:       IS       is_p0;
3012:       MatReuse reuse;

3014:       /* extract B_0 */
3015:       reuse = MAT_INITIAL_MATRIX;
3016:       if (pcbddc->benign_B0) {
3017:         reuse = MAT_REUSE_MATRIX;
3018:       }
3019:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3020:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3021:       /* remove rows and cols from local problem */
3022:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3023:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3024:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3025:       ISDestroy(&is_p0);
3026:     } else {
3027:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
3028:       PetscScalar *vals;
3029:       PetscInt    i,n,*idxs_ins;

3031:       VecGetLocalSize(matis->y,&n);
3032:       PetscMalloc2(n,&idxs_ins,n,&vals);
3033:       if (!pcbddc->benign_B0) {
3034:         PetscInt *nnz;
3035:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3036:         MatSetType(pcbddc->benign_B0,MATAIJ);
3037:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3038:         PetscMalloc1(pcbddc->benign_n,&nnz);
3039:         for (i=0;i<pcbddc->benign_n;i++) {
3040:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3041:           nnz[i] = n - nnz[i];
3042:         }
3043:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3044:         MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3045:         PetscFree(nnz);
3046:       }

3048:       for (i=0;i<pcbddc->benign_n;i++) {
3049:         PetscScalar *array;
3050:         PetscInt    *idxs,j,nz,cum;

3052:         VecSet(matis->x,0.);
3053:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3054:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3055:         for (j=0;j<nz;j++) vals[j] = 1.;
3056:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3057:         VecAssemblyBegin(matis->x);
3058:         VecAssemblyEnd(matis->x);
3059:         VecSet(matis->y,0.);
3060:         MatMult(matis->A,matis->x,matis->y);
3061:         VecGetArray(matis->y,&array);
3062:         cum = 0;
3063:         for (j=0;j<n;j++) {
3064:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3065:             vals[cum] = array[j];
3066:             idxs_ins[cum] = j;
3067:             cum++;
3068:           }
3069:         }
3070:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3071:         VecRestoreArray(matis->y,&array);
3072:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3073:       }
3074:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3075:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3076:       PetscFree2(idxs_ins,vals);
3077:     }
3078:   } else { /* push */
3079:     if (pcbddc->benign_change_explicit) {
3080:       PetscInt i;

3082:       for (i=0;i<pcbddc->benign_n;i++) {
3083:         PetscScalar *B0_vals;
3084:         PetscInt    *B0_cols,B0_ncol;

3086:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3087:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3088:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3089:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3090:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3091:       }
3092:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3093:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3094:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3095:   }
3096:   return 0;
3097: }

3099: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3100: {
3101:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3102:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3103:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3104:   PetscBLASInt    *B_iwork,*B_ifail;
3105:   PetscScalar     *work,lwork;
3106:   PetscScalar     *St,*S,*eigv;
3107:   PetscScalar     *Sarray,*Starray;
3108:   PetscReal       *eigs,thresh,lthresh,uthresh;
3109:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3110:   PetscBool       allocated_S_St;
3111: #if defined(PETSC_USE_COMPLEX)
3112:   PetscReal       *rwork;
3113: #endif
3114:   PetscErrorCode  ierr;

3119:   PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);

3121:   if (pcbddc->dbg_flag) {
3122:     PetscViewerFlush(pcbddc->dbg_viewer);
3123:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3124:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3125:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3126:   }

3128:   if (pcbddc->dbg_flag) {
3129:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3130:   }

3132:   /* max size of subsets */
3133:   mss = 0;
3134:   for (i=0;i<sub_schurs->n_subs;i++) {
3135:     PetscInt subset_size;

3137:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3138:     mss = PetscMax(mss,subset_size);
3139:   }

3141:   /* min/max and threshold */
3142:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3143:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3144:   nmax = PetscMax(nmin,nmax);
3145:   allocated_S_St = PETSC_FALSE;
3146:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3147:     allocated_S_St = PETSC_TRUE;
3148:   }

3150:   /* allocate lapack workspace */
3151:   cum = cum2 = 0;
3152:   maxneigs = 0;
3153:   for (i=0;i<sub_schurs->n_subs;i++) {
3154:     PetscInt n,subset_size;

3156:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3157:     n = PetscMin(subset_size,nmax);
3158:     cum += subset_size;
3159:     cum2 += subset_size*n;
3160:     maxneigs = PetscMax(maxneigs,n);
3161:   }
3162:   lwork = 0;
3163:   if (mss) {
3164:     if (sub_schurs->is_symmetric) {
3165:       PetscScalar  sdummy = 0.;
3166:       PetscBLASInt B_itype = 1;
3167:       PetscBLASInt B_N = mss, idummy = 0;
3168:       PetscReal    rdummy = 0.,zero = 0.0;
3169:       PetscReal    eps = 0.0; /* dlamch? */

3171:       B_lwork = -1;
3172:       /* some implementations may complain about NULL pointers, even if we are querying */
3173:       S = &sdummy;
3174:       St = &sdummy;
3175:       eigs = &rdummy;
3176:       eigv = &sdummy;
3177:       B_iwork = &idummy;
3178:       B_ifail = &idummy;
3179: #if defined(PETSC_USE_COMPLEX)
3180:       rwork = &rdummy;
3181: #endif
3182:       thresh = 1.0;
3183:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3184: #if defined(PETSC_USE_COMPLEX)
3185:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3186: #else
3187:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3188: #endif
3190:       PetscFPTrapPop();
3191:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3192:   }

3194:   nv = 0;
3195:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3196:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3197:   }
3198:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3199:   if (allocated_S_St) {
3200:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3201:   }
3202:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3203: #if defined(PETSC_USE_COMPLEX)
3204:   PetscMalloc1(7*mss,&rwork);
3205: #endif
3206:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3207:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3208:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3209:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3210:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3211:   PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);

3213:   maxneigs = 0;
3214:   cum = cumarray = 0;
3215:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3216:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3217:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3218:     const PetscInt *idxs;

3220:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3221:     for (cum=0;cum<nv;cum++) {
3222:       pcbddc->adaptive_constraints_n[cum] = 1;
3223:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3224:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3225:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3226:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3227:     }
3228:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3229:   }

3231:   if (mss) { /* multilevel */
3232:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3233:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3234:   }

3236:   lthresh = pcbddc->adaptive_threshold[0];
3237:   uthresh = pcbddc->adaptive_threshold[1];
3238:   for (i=0;i<sub_schurs->n_subs;i++) {
3239:     const PetscInt *idxs;
3240:     PetscReal      upper,lower;
3241:     PetscInt       j,subset_size,eigs_start = 0;
3242:     PetscBLASInt   B_N;
3243:     PetscBool      same_data = PETSC_FALSE;
3244:     PetscBool      scal = PETSC_FALSE;

3246:     if (pcbddc->use_deluxe_scaling) {
3247:       upper = PETSC_MAX_REAL;
3248:       lower = uthresh;
3249:     } else {
3251:       upper = 1./uthresh;
3252:       lower = 0.;
3253:     }
3254:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3255:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3256:     PetscBLASIntCast(subset_size,&B_N);
3257:     /* this is experimental: we assume the dofs have been properly grouped to have
3258:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3259:     if (!sub_schurs->is_posdef) {
3260:       Mat T;

3262:       for (j=0;j<subset_size;j++) {
3263:         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3264:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3265:           MatScale(T,-1.0);
3266:           MatDestroy(&T);
3267:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3268:           MatScale(T,-1.0);
3269:           MatDestroy(&T);
3270:           if (sub_schurs->change_primal_sub) {
3271:             PetscInt       nz,k;
3272:             const PetscInt *idxs;

3274:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3275:             ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3276:             for (k=0;k<nz;k++) {
3277:               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3278:               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3279:             }
3280:             ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3281:           }
3282:           scal = PETSC_TRUE;
3283:           break;
3284:         }
3285:       }
3286:     }

3288:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3289:       if (sub_schurs->is_symmetric) {
3290:         PetscInt j,k;
3291:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3292:           PetscArrayzero(S,subset_size*subset_size);
3293:           PetscArrayzero(St,subset_size*subset_size);
3294:         }
3295:         for (j=0;j<subset_size;j++) {
3296:           for (k=j;k<subset_size;k++) {
3297:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3298:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3299:           }
3300:         }
3301:       } else {
3302:         PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3303:         PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3304:       }
3305:     } else {
3306:       S = Sarray + cumarray;
3307:       St = Starray + cumarray;
3308:     }
3309:     /* see if we can save some work */
3310:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3311:       PetscArraycmp(S,St,subset_size*subset_size,&same_data);
3312:     }

3314:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3315:       B_neigs = 0;
3316:     } else {
3317:       if (sub_schurs->is_symmetric) {
3318:         PetscBLASInt B_itype = 1;
3319:         PetscBLASInt B_IL, B_IU;
3320:         PetscReal    eps = -1.0; /* dlamch? */
3321:         PetscInt     nmin_s;
3322:         PetscBool    compute_range;

3324:         B_neigs = 0;
3325:         compute_range = (PetscBool)!same_data;
3326:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3328:         if (pcbddc->dbg_flag) {
3329:           PetscInt nc = 0;

3331:           if (sub_schurs->change_primal_sub) {
3332:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3333:           }
3334:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3335:         }

3337:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3338:         if (compute_range) {

3340:           /* ask for eigenvalues larger than thresh */
3341:           if (sub_schurs->is_posdef) {
3342: #if defined(PETSC_USE_COMPLEX)
3343:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3344: #else
3345:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3346: #endif
3347:             PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3348:           } else { /* no theory so far, but it works nicely */
3349:             PetscInt  recipe = 0,recipe_m = 1;
3350:             PetscReal bb[2];

3352:             PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3353:             switch (recipe) {
3354:             case 0:
3355:               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3356:               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3357: #if defined(PETSC_USE_COMPLEX)
3358:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3359: #else
3360:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3361: #endif
3362:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3363:               break;
3364:             case 1:
3365:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3366: #if defined(PETSC_USE_COMPLEX)
3367:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3368: #else
3369:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3370: #endif
3371:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3372:               if (!scal) {
3373:                 PetscBLASInt B_neigs2 = 0;

3375:                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3376:                 PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3377:                 PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3378: #if defined(PETSC_USE_COMPLEX)
3379:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3380: #else
3381:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3382: #endif
3383:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3384:                 B_neigs += B_neigs2;
3385:               }
3386:               break;
3387:             case 2:
3388:               if (scal) {
3389:                 bb[0] = PETSC_MIN_REAL;
3390:                 bb[1] = 0;
3391: #if defined(PETSC_USE_COMPLEX)
3392:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3393: #else
3394:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3395: #endif
3396:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3397:               } else {
3398:                 PetscBLASInt B_neigs2 = 0;
3399:                 PetscBool    import = PETSC_FALSE;

3401:                 lthresh = PetscMax(lthresh,0.0);
3402:                 if (lthresh > 0.0) {
3403:                   bb[0] = PETSC_MIN_REAL;
3404:                   bb[1] = lthresh*lthresh;

3406:                   import = PETSC_TRUE;
3407: #if defined(PETSC_USE_COMPLEX)
3408:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3409: #else
3410:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3411: #endif
3412:                   PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3413:                 }
3414:                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3415:                 bb[1] = PETSC_MAX_REAL;
3416:                 if (import) {
3417:                   PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3418:                   PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3419:                 }
3420: #if defined(PETSC_USE_COMPLEX)
3421:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3422: #else
3423:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3424: #endif
3425:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3426:                 B_neigs += B_neigs2;
3427:               }
3428:               break;
3429:             case 3:
3430:               if (scal) {
3431:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3432:               } else {
3433:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3434:               }
3435:               if (!scal) {
3436:                 bb[0] = uthresh;
3437:                 bb[1] = PETSC_MAX_REAL;
3438: #if defined(PETSC_USE_COMPLEX)
3439:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3440: #else
3441:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3442: #endif
3443:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3444:               }
3445:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3446:                 PetscBLASInt B_neigs2 = 0;

3448:                 B_IL = 1;
3449:                 PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3450:                 PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3451:                 PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3452: #if defined(PETSC_USE_COMPLEX)
3453:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3454: #else
3455:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3456: #endif
3457:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3458:                 B_neigs += B_neigs2;
3459:               }
3460:               break;
3461:             case 4:
3462:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3463: #if defined(PETSC_USE_COMPLEX)
3464:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3465: #else
3466:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3467: #endif
3468:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3469:               {
3470:                 PetscBLASInt B_neigs2 = 0;

3472:                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3473:                 PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3474:                 PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3475: #if defined(PETSC_USE_COMPLEX)
3476:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3477: #else
3478:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3479: #endif
3480:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3481:                 B_neigs += B_neigs2;
3482:               }
3483:               break;
3484:             case 5: /* same as before: first compute all eigenvalues, then filter */
3485: #if defined(PETSC_USE_COMPLEX)
3486:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3487: #else
3488:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3489: #endif
3490:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3491:               {
3492:                 PetscInt e,k,ne;
3493:                 for (e=0,ne=0;e<B_neigs;e++) {
3494:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3495:                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3496:                     eigs[ne] = eigs[e];
3497:                     ne++;
3498:                   }
3499:                 }
3500:                 PetscArraycpy(eigv,S,B_N*ne);
3501:                 B_neigs = ne;
3502:               }
3503:               break;
3504:             default:
3505:               SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3506:             }
3507:           }
3508:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3509:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3510:           B_IL = 1;
3511: #if defined(PETSC_USE_COMPLEX)
3512:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3513: #else
3514:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3515: #endif
3516:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3517:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3518:           PetscInt k;
3520:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3521:           PetscBLASIntCast(nmax,&B_neigs);
3522:           nmin = nmax;
3523:           PetscArrayzero(eigv,subset_size*nmax);
3524:           for (k=0;k<nmax;k++) {
3525:             eigs[k] = 1./PETSC_SMALL;
3526:             eigv[k*(subset_size+1)] = 1.0;
3527:           }
3528:         }
3529:         PetscFPTrapPop();
3530:         if (B_ierr) {
3533:           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3534:         }

3536:         if (B_neigs > nmax) {
3537:           if (pcbddc->dbg_flag) {
3538:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3539:           }
3540:           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3541:           B_neigs = nmax;
3542:         }

3544:         nmin_s = PetscMin(nmin,B_N);
3545:         if (B_neigs < nmin_s) {
3546:           PetscBLASInt B_neigs2 = 0;

3548:           if (pcbddc->use_deluxe_scaling) {
3549:             if (scal) {
3550:               B_IU = nmin_s;
3551:               B_IL = B_neigs + 1;
3552:             } else {
3553:               B_IL = B_N - nmin_s + 1;
3554:               B_IU = B_N - B_neigs;
3555:             }
3556:           } else {
3557:             B_IL = B_neigs + 1;
3558:             B_IU = nmin_s;
3559:           }
3560:           if (pcbddc->dbg_flag) {
3561:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3562:           }
3563:           if (sub_schurs->is_symmetric) {
3564:             PetscInt j,k;
3565:             for (j=0;j<subset_size;j++) {
3566:               for (k=j;k<subset_size;k++) {
3567:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3568:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3569:               }
3570:             }
3571:           } else {
3572:             PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3573:             PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3574:           }
3575:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3576: #if defined(PETSC_USE_COMPLEX)
3577:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3578: #else
3579:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3580: #endif
3581:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3582:           PetscFPTrapPop();
3583:           B_neigs += B_neigs2;
3584:         }
3585:         if (B_ierr) {
3588:           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3589:         }
3590:         if (pcbddc->dbg_flag) {
3591:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3592:           for (j=0;j<B_neigs;j++) {
3593:             if (eigs[j] == 0.0) {
3594:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3595:             } else {
3596:               if (pcbddc->use_deluxe_scaling) {
3597:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3598:               } else {
3599:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3600:               }
3601:             }
3602:           }
3603:         }
3604:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3605:     }
3606:     /* change the basis back to the original one */
3607:     if (sub_schurs->change) {
3608:       Mat change,phi,phit;

3610:       if (pcbddc->dbg_flag > 2) {
3611:         PetscInt ii;
3612:         for (ii=0;ii<B_neigs;ii++) {
3613:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3614:           for (j=0;j<B_N;j++) {
3615: #if defined(PETSC_USE_COMPLEX)
3616:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3617:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3618:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3619: #else
3620:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3621: #endif
3622:           }
3623:         }
3624:       }
3625:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3626:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3627:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3628:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3629:       MatDestroy(&phit);
3630:       MatDestroy(&phi);
3631:     }
3632:     maxneigs = PetscMax(B_neigs,maxneigs);
3633:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3634:     if (B_neigs) {
3635:       PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);

3637:       if (pcbddc->dbg_flag > 1) {
3638:         PetscInt ii;
3639:         for (ii=0;ii<B_neigs;ii++) {
3640:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3641:           for (j=0;j<B_N;j++) {
3642: #if defined(PETSC_USE_COMPLEX)
3643:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3644:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3645:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3646: #else
3647:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3648: #endif
3649:           }
3650:         }
3651:       }
3652:       PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);
3653:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3654:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3655:       cum++;
3656:     }
3657:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3658:     /* shift for next computation */
3659:     cumarray += subset_size*subset_size;
3660:   }
3661:   if (pcbddc->dbg_flag) {
3662:     PetscViewerFlush(pcbddc->dbg_viewer);
3663:   }

3665:   if (mss) {
3666:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3667:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3668:     /* destroy matrices (junk) */
3669:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3670:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3671:   }
3672:   if (allocated_S_St) {
3673:     PetscFree2(S,St);
3674:   }
3675:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3676: #if defined(PETSC_USE_COMPLEX)
3677:   PetscFree(rwork);
3678: #endif
3679:   if (pcbddc->dbg_flag) {
3680:     PetscInt maxneigs_r;
3681:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3682:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3683:   }
3684:   PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3685:   return 0;
3686: }

3688: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3689: {
3690:   PetscScalar    *coarse_submat_vals;

3692:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3693:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3694:   PCBDDCSetUpLocalScatters(pc);

3696:   /* Setup local neumann solver ksp_R */
3697:   /* PCBDDCSetUpLocalScatters should be called first! */
3698:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3700:   /*
3701:      Setup local correction and local part of coarse basis.
3702:      Gives back the dense local part of the coarse matrix in column major ordering
3703:   */
3704:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3706:   /* Compute total number of coarse nodes and setup coarse solver */
3707:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3709:   /* free */
3710:   PetscFree(coarse_submat_vals);
3711:   return 0;
3712: }

3714: PetscErrorCode PCBDDCResetCustomization(PC pc)
3715: {
3716:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3718:   ISDestroy(&pcbddc->user_primal_vertices);
3719:   ISDestroy(&pcbddc->user_primal_vertices_local);
3720:   ISDestroy(&pcbddc->NeumannBoundaries);
3721:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3722:   ISDestroy(&pcbddc->DirichletBoundaries);
3723:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3724:   PetscFree(pcbddc->onearnullvecs_state);
3725:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3726:   PCBDDCSetDofsSplitting(pc,0,NULL);
3727:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3728:   return 0;
3729: }

3731: PetscErrorCode PCBDDCResetTopography(PC pc)
3732: {
3733:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3734:   PetscInt       i;

3736:   MatDestroy(&pcbddc->nedcG);
3737:   ISDestroy(&pcbddc->nedclocal);
3738:   MatDestroy(&pcbddc->discretegradient);
3739:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3740:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3741:   MatDestroy(&pcbddc->switch_static_change);
3742:   VecDestroy(&pcbddc->work_change);
3743:   MatDestroy(&pcbddc->ConstraintMatrix);
3744:   MatDestroy(&pcbddc->divudotp);
3745:   ISDestroy(&pcbddc->divudotp_vl2l);
3746:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3747:   for (i=0;i<pcbddc->n_local_subs;i++) {
3748:     ISDestroy(&pcbddc->local_subs[i]);
3749:   }
3750:   pcbddc->n_local_subs = 0;
3751:   PetscFree(pcbddc->local_subs);
3752:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3753:   pcbddc->graphanalyzed        = PETSC_FALSE;
3754:   pcbddc->recompute_topography = PETSC_TRUE;
3755:   pcbddc->corner_selected      = PETSC_FALSE;
3756:   return 0;
3757: }

3759: PetscErrorCode PCBDDCResetSolvers(PC pc)
3760: {
3761:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3763:   VecDestroy(&pcbddc->coarse_vec);
3764:   if (pcbddc->coarse_phi_B) {
3765:     PetscScalar *array;
3766:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3767:     PetscFree(array);
3768:   }
3769:   MatDestroy(&pcbddc->coarse_phi_B);
3770:   MatDestroy(&pcbddc->coarse_phi_D);
3771:   MatDestroy(&pcbddc->coarse_psi_B);
3772:   MatDestroy(&pcbddc->coarse_psi_D);
3773:   VecDestroy(&pcbddc->vec1_P);
3774:   VecDestroy(&pcbddc->vec1_C);
3775:   MatDestroy(&pcbddc->local_auxmat2);
3776:   MatDestroy(&pcbddc->local_auxmat1);
3777:   VecDestroy(&pcbddc->vec1_R);
3778:   VecDestroy(&pcbddc->vec2_R);
3779:   ISDestroy(&pcbddc->is_R_local);
3780:   VecScatterDestroy(&pcbddc->R_to_B);
3781:   VecScatterDestroy(&pcbddc->R_to_D);
3782:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3783:   KSPReset(pcbddc->ksp_D);
3784:   KSPReset(pcbddc->ksp_R);
3785:   KSPReset(pcbddc->coarse_ksp);
3786:   MatDestroy(&pcbddc->local_mat);
3787:   PetscFree(pcbddc->primal_indices_local_idxs);
3788:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3789:   PetscFree(pcbddc->global_primal_indices);
3790:   ISDestroy(&pcbddc->coarse_subassembling);
3791:   MatDestroy(&pcbddc->benign_change);
3792:   VecDestroy(&pcbddc->benign_vec);
3793:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3794:   MatDestroy(&pcbddc->benign_B0);
3795:   PetscSFDestroy(&pcbddc->benign_sf);
3796:   if (pcbddc->benign_zerodiag_subs) {
3797:     PetscInt i;
3798:     for (i=0;i<pcbddc->benign_n;i++) {
3799:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3800:     }
3801:     PetscFree(pcbddc->benign_zerodiag_subs);
3802:   }
3803:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3804:   return 0;
3805: }

3807: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3808: {
3809:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3810:   PC_IS          *pcis = (PC_IS*)pc->data;
3811:   VecType        impVecType;
3812:   PetscInt       n_constraints,n_R,old_size;

3814:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3815:   n_R = pcis->n - pcbddc->n_vertices;
3816:   VecGetType(pcis->vec1_N,&impVecType);
3817:   /* local work vectors (try to avoid unneeded work)*/
3818:   /* R nodes */
3819:   old_size = -1;
3820:   if (pcbddc->vec1_R) {
3821:     VecGetSize(pcbddc->vec1_R,&old_size);
3822:   }
3823:   if (n_R != old_size) {
3824:     VecDestroy(&pcbddc->vec1_R);
3825:     VecDestroy(&pcbddc->vec2_R);
3826:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3827:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3828:     VecSetType(pcbddc->vec1_R,impVecType);
3829:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3830:   }
3831:   /* local primal dofs */
3832:   old_size = -1;
3833:   if (pcbddc->vec1_P) {
3834:     VecGetSize(pcbddc->vec1_P,&old_size);
3835:   }
3836:   if (pcbddc->local_primal_size != old_size) {
3837:     VecDestroy(&pcbddc->vec1_P);
3838:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3839:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3840:     VecSetType(pcbddc->vec1_P,impVecType);
3841:   }
3842:   /* local explicit constraints */
3843:   old_size = -1;
3844:   if (pcbddc->vec1_C) {
3845:     VecGetSize(pcbddc->vec1_C,&old_size);
3846:   }
3847:   if (n_constraints && n_constraints != old_size) {
3848:     VecDestroy(&pcbddc->vec1_C);
3849:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3850:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3851:     VecSetType(pcbddc->vec1_C,impVecType);
3852:   }
3853:   return 0;
3854: }

3856: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3857: {
3858:   /* pointers to pcis and pcbddc */
3859:   PC_IS*          pcis = (PC_IS*)pc->data;
3860:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3861:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3862:   /* submatrices of local problem */
3863:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3864:   /* submatrices of local coarse problem */
3865:   Mat             S_VV,S_CV,S_VC,S_CC;
3866:   /* working matrices */
3867:   Mat             C_CR;
3868:   /* additional working stuff */
3869:   PC              pc_R;
3870:   Mat             F,Brhs = NULL;
3871:   Vec             dummy_vec;
3872:   PetscBool       isLU,isCHOL,need_benign_correction,sparserhs;
3873:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3874:   PetscScalar     *work;
3875:   PetscInt        *idx_V_B;
3876:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3877:   PetscInt        i,n_R,n_D,n_B;
3878:   PetscScalar     one=1.0,m_one=-1.0;

3881:   PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);

3883:   /* Set Non-overlapping dimensions */
3884:   n_vertices = pcbddc->n_vertices;
3885:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3886:   n_B = pcis->n_B;
3887:   n_D = pcis->n - n_B;
3888:   n_R = pcis->n - n_vertices;

3890:   /* vertices in boundary numbering */
3891:   PetscMalloc1(n_vertices,&idx_V_B);
3892:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);

3895:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3896:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3897:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3898:   MatDenseSetLDA(S_VV,pcbddc->local_primal_size);
3899:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3900:   MatDenseSetLDA(S_CV,pcbddc->local_primal_size);
3901:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3902:   MatDenseSetLDA(S_VC,pcbddc->local_primal_size);
3903:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3904:   MatDenseSetLDA(S_CC,pcbddc->local_primal_size);

3906:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3907:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3908:   PCSetUp(pc_R);
3909:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3910:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3911:   lda_rhs = n_R;
3912:   need_benign_correction = PETSC_FALSE;
3913:   if (isLU || isCHOL) {
3914:     PCFactorGetMatrix(pc_R,&F);
3915:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3916:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3917:     MatFactorType      type;

3919:     F = reuse_solver->F;
3920:     MatGetFactorType(F,&type);
3921:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3922:     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3923:     MatGetSize(F,&lda_rhs,NULL);
3924:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3925:   } else F = NULL;

3927:   /* determine if we can use a sparse right-hand side */
3928:   sparserhs = PETSC_FALSE;
3929:   if (F) {
3930:     MatSolverType solver;

3932:     MatFactorGetSolverType(F,&solver);
3933:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3934:   }

3936:   /* allocate workspace */
3937:   n = 0;
3938:   if (n_constraints) {
3939:     n += lda_rhs*n_constraints;
3940:   }
3941:   if (n_vertices) {
3942:     n = PetscMax(2*lda_rhs*n_vertices,n);
3943:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3944:   }
3945:   if (!pcbddc->symmetric_primal) {
3946:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3947:   }
3948:   PetscMalloc1(n,&work);

3950:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3951:   dummy_vec = NULL;
3952:   if (need_benign_correction && lda_rhs != n_R && F) {
3953:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3954:     VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3955:     VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3956:   }

3958:   MatDestroy(&pcbddc->local_auxmat1);
3959:   MatDestroy(&pcbddc->local_auxmat2);

3961:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3962:   if (n_constraints) {
3963:     Mat         M3,C_B;
3964:     IS          is_aux;

3966:     /* Extract constraints on R nodes: C_{CR}  */
3967:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3968:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3969:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

3971:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3972:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3973:     if (!sparserhs) {
3974:       PetscArrayzero(work,lda_rhs*n_constraints);
3975:       for (i=0;i<n_constraints;i++) {
3976:         const PetscScalar *row_cmat_values;
3977:         const PetscInt    *row_cmat_indices;
3978:         PetscInt          size_of_constraint,j;

3980:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3981:         for (j=0;j<size_of_constraint;j++) {
3982:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3983:         }
3984:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3985:       }
3986:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3987:     } else {
3988:       Mat tC_CR;

3990:       MatScale(C_CR,-1.0);
3991:       if (lda_rhs != n_R) {
3992:         PetscScalar *aa;
3993:         PetscInt    r,*ii,*jj;
3994:         PetscBool   done;

3996:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3998:         MatSeqAIJGetArray(C_CR,&aa);
3999:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4000:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4002:       } else {
4003:         PetscObjectReference((PetscObject)C_CR);
4004:         tC_CR = C_CR;
4005:       }
4006:       MatCreateTranspose(tC_CR,&Brhs);
4007:       MatDestroy(&tC_CR);
4008:     }
4009:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4010:     if (F) {
4011:       if (need_benign_correction) {
4012:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4014:         /* rhs is already zero on interior dofs, no need to change the rhs */
4015:         PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);
4016:       }
4017:       MatMatSolve(F,Brhs,local_auxmat2_R);
4018:       if (need_benign_correction) {
4019:         PetscScalar        *marr;
4020:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4022:         MatDenseGetArray(local_auxmat2_R,&marr);
4023:         if (lda_rhs != n_R) {
4024:           for (i=0;i<n_constraints;i++) {
4025:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4026:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4027:             VecResetArray(dummy_vec);
4028:           }
4029:         } else {
4030:           for (i=0;i<n_constraints;i++) {
4031:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4032:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4033:             VecResetArray(pcbddc->vec1_R);
4034:           }
4035:         }
4036:         MatDenseRestoreArray(local_auxmat2_R,&marr);
4037:       }
4038:     } else {
4039:       PetscScalar *marr;

4041:       MatDenseGetArray(local_auxmat2_R,&marr);
4042:       for (i=0;i<n_constraints;i++) {
4043:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4044:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4045:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4046:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4047:         VecResetArray(pcbddc->vec1_R);
4048:         VecResetArray(pcbddc->vec2_R);
4049:       }
4050:       MatDenseRestoreArray(local_auxmat2_R,&marr);
4051:     }
4052:     if (sparserhs) {
4053:       MatScale(C_CR,-1.0);
4054:     }
4055:     MatDestroy(&Brhs);
4056:     if (!pcbddc->switch_static) {
4057:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4058:       for (i=0;i<n_constraints;i++) {
4059:         Vec r, b;
4060:         MatDenseGetColumnVecRead(local_auxmat2_R,i,&r);
4061:         MatDenseGetColumnVec(pcbddc->local_auxmat2,i,&b);
4062:         VecScatterBegin(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD);
4063:         VecScatterEnd(pcbddc->R_to_B,r,b,INSERT_VALUES,SCATTER_FORWARD);
4064:         MatDenseRestoreColumnVec(pcbddc->local_auxmat2,i,&b);
4065:         MatDenseRestoreColumnVecRead(local_auxmat2_R,i,&r);
4066:       }
4067:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4068:     } else {
4069:       if (lda_rhs != n_R) {
4070:         IS dummy;

4072:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4073:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4074:         ISDestroy(&dummy);
4075:       } else {
4076:         PetscObjectReference((PetscObject)local_auxmat2_R);
4077:         pcbddc->local_auxmat2 = local_auxmat2_R;
4078:       }
4079:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4080:     }
4081:     ISDestroy(&is_aux);
4082:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4083:     MatScale(M3,m_one);
4084:     if (isCHOL) {
4085:       MatCholeskyFactor(M3,NULL,NULL);
4086:     } else {
4087:       MatLUFactor(M3,NULL,NULL,NULL);
4088:     }
4089:     MatSeqDenseInvertFactors_Private(M3);
4090:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4091:     MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4092:     MatDestroy(&C_B);
4093:     MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4094:     MatDestroy(&M3);
4095:   }

4097:   /* Get submatrices from subdomain matrix */
4098:   if (n_vertices) {
4099: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4100:     PetscBool oldpin;
4101: #endif
4102:     PetscBool isaij;
4103:     IS        is_aux;

4105:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4106:       IS tis;

4108:       ISDuplicate(pcbddc->is_R_local,&tis);
4109:       ISSort(tis);
4110:       ISComplement(tis,0,pcis->n,&is_aux);
4111:       ISDestroy(&tis);
4112:     } else {
4113:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4114:     }
4115: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4116:     oldpin = pcbddc->local_mat->boundtocpu;
4117: #endif
4118:     MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);
4119:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4120:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4121:     PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);
4122:     if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4123:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4124:     }
4125:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4126: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4127:     MatBindToCPU(pcbddc->local_mat,oldpin);
4128: #endif
4129:     ISDestroy(&is_aux);
4130:   }

4132:   /* Matrix of coarse basis functions (local) */
4133:   if (pcbddc->coarse_phi_B) {
4134:     PetscInt on_B,on_primal,on_D=n_D;
4135:     if (pcbddc->coarse_phi_D) {
4136:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4137:     }
4138:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4139:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4140:       PetscScalar *marray;

4142:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4143:       PetscFree(marray);
4144:       MatDestroy(&pcbddc->coarse_phi_B);
4145:       MatDestroy(&pcbddc->coarse_psi_B);
4146:       MatDestroy(&pcbddc->coarse_phi_D);
4147:       MatDestroy(&pcbddc->coarse_psi_D);
4148:     }
4149:   }

4151:   if (!pcbddc->coarse_phi_B) {
4152:     PetscScalar *marr;

4154:     /* memory size */
4155:     n = n_B*pcbddc->local_primal_size;
4156:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4157:     if (!pcbddc->symmetric_primal) n *= 2;
4158:     PetscCalloc1(n,&marr);
4159:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4160:     marr += n_B*pcbddc->local_primal_size;
4161:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4162:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4163:       marr += n_D*pcbddc->local_primal_size;
4164:     }
4165:     if (!pcbddc->symmetric_primal) {
4166:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4167:       marr += n_B*pcbddc->local_primal_size;
4168:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4169:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4170:       }
4171:     } else {
4172:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4173:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4174:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4175:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4176:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4177:       }
4178:     }
4179:   }

4181:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4182:   p0_lidx_I = NULL;
4183:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4184:     const PetscInt *idxs;

4186:     ISGetIndices(pcis->is_I_local,&idxs);
4187:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4188:     for (i=0;i<pcbddc->benign_n;i++) {
4189:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4190:     }
4191:     ISRestoreIndices(pcis->is_I_local,&idxs);
4192:   }

4194:   /* vertices */
4195:   if (n_vertices) {
4196:     PetscBool restoreavr = PETSC_FALSE;

4198:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

4200:     if (n_R) {
4201:       Mat               A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4202:       PetscBLASInt      B_N,B_one = 1;
4203:       const PetscScalar *x;
4204:       PetscScalar       *y;

4206:       MatScale(A_RV,m_one);
4207:       if (need_benign_correction) {
4208:         ISLocalToGlobalMapping RtoN;
4209:         IS                     is_p0;
4210:         PetscInt               *idxs_p0,n;

4212:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4213:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4214:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4216:         ISLocalToGlobalMappingDestroy(&RtoN);
4217:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4218:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4219:         ISDestroy(&is_p0);
4220:       }

4222:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4223:       if (!sparserhs || need_benign_correction) {
4224:         if (lda_rhs == n_R) {
4225:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4226:         } else {
4227:           PetscScalar    *av,*array;
4228:           const PetscInt *xadj,*adjncy;
4229:           PetscInt       n;
4230:           PetscBool      flg_row;

4232:           array = work+lda_rhs*n_vertices;
4233:           PetscArrayzero(array,lda_rhs*n_vertices);
4234:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4235:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4236:           MatSeqAIJGetArray(A_RV,&av);
4237:           for (i=0;i<n;i++) {
4238:             PetscInt j;
4239:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4240:           }
4241:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4242:           MatDestroy(&A_RV);
4243:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4244:         }
4245:         if (need_benign_correction) {
4246:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4247:           PetscScalar        *marr;

4249:           MatDenseGetArray(A_RV,&marr);
4250:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4252:                  | 0 0  0 | (V)
4253:              L = | 0 0 -1 | (P-p0)
4254:                  | 0 0 -1 | (p0)

4256:           */
4257:           for (i=0;i<reuse_solver->benign_n;i++) {
4258:             const PetscScalar *vals;
4259:             const PetscInt    *idxs,*idxs_zero;
4260:             PetscInt          n,j,nz;

4262:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4263:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4264:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4265:             for (j=0;j<n;j++) {
4266:               PetscScalar val = vals[j];
4267:               PetscInt    k,col = idxs[j];
4268:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4269:             }
4270:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4271:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4272:           }
4273:           MatDenseRestoreArray(A_RV,&marr);
4274:         }
4275:         PetscObjectReference((PetscObject)A_RV);
4276:         Brhs = A_RV;
4277:       } else {
4278:         Mat tA_RVT,A_RVT;

4280:         if (!pcbddc->symmetric_primal) {
4281:           /* A_RV already scaled by -1 */
4282:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4283:         } else {
4284:           restoreavr = PETSC_TRUE;
4285:           MatScale(A_VR,-1.0);
4286:           PetscObjectReference((PetscObject)A_VR);
4287:           A_RVT = A_VR;
4288:         }
4289:         if (lda_rhs != n_R) {
4290:           PetscScalar *aa;
4291:           PetscInt    r,*ii,*jj;
4292:           PetscBool   done;

4294:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4296:           MatSeqAIJGetArray(A_RVT,&aa);
4297:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4298:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4300:         } else {
4301:           PetscObjectReference((PetscObject)A_RVT);
4302:           tA_RVT = A_RVT;
4303:         }
4304:         MatCreateTranspose(tA_RVT,&Brhs);
4305:         MatDestroy(&tA_RVT);
4306:         MatDestroy(&A_RVT);
4307:       }
4308:       if (F) {
4309:         /* need to correct the rhs */
4310:         if (need_benign_correction) {
4311:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4312:           PetscScalar        *marr;

4314:           MatDenseGetArray(Brhs,&marr);
4315:           if (lda_rhs != n_R) {
4316:             for (i=0;i<n_vertices;i++) {
4317:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4318:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4319:               VecResetArray(dummy_vec);
4320:             }
4321:           } else {
4322:             for (i=0;i<n_vertices;i++) {
4323:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4324:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4325:               VecResetArray(pcbddc->vec1_R);
4326:             }
4327:           }
4328:           MatDenseRestoreArray(Brhs,&marr);
4329:         }
4330:         MatMatSolve(F,Brhs,A_RRmA_RV);
4331:         if (restoreavr) {
4332:           MatScale(A_VR,-1.0);
4333:         }
4334:         /* need to correct the solution */
4335:         if (need_benign_correction) {
4336:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4337:           PetscScalar        *marr;

4339:           MatDenseGetArray(A_RRmA_RV,&marr);
4340:           if (lda_rhs != n_R) {
4341:             for (i=0;i<n_vertices;i++) {
4342:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4343:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4344:               VecResetArray(dummy_vec);
4345:             }
4346:           } else {
4347:             for (i=0;i<n_vertices;i++) {
4348:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4349:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4350:               VecResetArray(pcbddc->vec1_R);
4351:             }
4352:           }
4353:           MatDenseRestoreArray(A_RRmA_RV,&marr);
4354:         }
4355:       } else {
4356:         MatDenseGetArray(Brhs,&y);
4357:         for (i=0;i<n_vertices;i++) {
4358:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4359:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4360:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4361:           KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4362:           VecResetArray(pcbddc->vec1_R);
4363:           VecResetArray(pcbddc->vec2_R);
4364:         }
4365:         MatDenseRestoreArray(Brhs,&y);
4366:       }
4367:       MatDestroy(&A_RV);
4368:       MatDestroy(&Brhs);
4369:       /* S_VV and S_CV */
4370:       if (n_constraints) {
4371:         Mat B;

4373:         PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);
4374:         for (i=0;i<n_vertices;i++) {
4375:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4376:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4377:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4378:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4379:           VecResetArray(pcis->vec1_B);
4380:           VecResetArray(pcbddc->vec1_R);
4381:         }
4382:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4383:         /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4384:         MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);
4385:         MatProductSetType(S_CV,MATPRODUCT_AB);
4386:         MatProductSetFromOptions(S_CV);
4387:         MatProductSymbolic(S_CV);
4388:         MatProductNumeric(S_CV);
4389:         MatProductClear(S_CV);

4391:         MatDestroy(&B);
4392:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4393:         /* Reuse B = local_auxmat2_R * S_CV */
4394:         MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);
4395:         MatProductSetType(B,MATPRODUCT_AB);
4396:         MatProductSetFromOptions(B);
4397:         MatProductSymbolic(B);
4398:         MatProductNumeric(B);

4400:         MatScale(S_CV,m_one);
4401:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4402:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4403:         MatDestroy(&B);
4404:       }
4405:       if (lda_rhs != n_R) {
4406:         MatDestroy(&A_RRmA_RV);
4407:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4408:         MatDenseSetLDA(A_RRmA_RV,lda_rhs);
4409:       }
4410:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4411:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4412:       if (need_benign_correction) {
4413:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4414:         PetscScalar        *marr,*sums;

4416:         PetscMalloc1(n_vertices,&sums);
4417:         MatDenseGetArray(S_VVt,&marr);
4418:         for (i=0;i<reuse_solver->benign_n;i++) {
4419:           const PetscScalar *vals;
4420:           const PetscInt    *idxs,*idxs_zero;
4421:           PetscInt          n,j,nz;

4423:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4424:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4425:           for (j=0;j<n_vertices;j++) {
4426:             PetscInt k;
4427:             sums[j] = 0.;
4428:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4429:           }
4430:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4431:           for (j=0;j<n;j++) {
4432:             PetscScalar val = vals[j];
4433:             PetscInt k;
4434:             for (k=0;k<n_vertices;k++) {
4435:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4436:             }
4437:           }
4438:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4439:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4440:         }
4441:         PetscFree(sums);
4442:         MatDenseRestoreArray(S_VVt,&marr);
4443:         MatDestroy(&A_RV_bcorr);
4444:       }
4445:       MatDestroy(&A_RRmA_RV);
4446:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4447:       MatDenseGetArrayRead(A_VV,&x);
4448:       MatDenseGetArray(S_VVt,&y);
4449:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4450:       MatDenseRestoreArrayRead(A_VV,&x);
4451:       MatDenseRestoreArray(S_VVt,&y);
4452:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4453:       MatDestroy(&S_VVt);
4454:     } else {
4455:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4456:     }
4457:     MatDestroy(&A_VV);

4459:     /* coarse basis functions */
4460:     for (i=0;i<n_vertices;i++) {
4461:       Vec         v;
4462:       PetscScalar one = 1.0,zero = 0.0;

4464:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4465:       MatDenseGetColumnVec(pcbddc->coarse_phi_B,i,&v);
4466:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4467:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4468:       if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4469:         PetscMPIInt rank;
4470:         MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B),&rank);
4472:       }
4473:       VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES);
4474:       VecAssemblyBegin(v); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4475:       VecAssemblyEnd(v);
4476:       MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i,&v);

4478:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4479:         PetscInt j;

4481:         MatDenseGetColumnVec(pcbddc->coarse_phi_D,i,&v);
4482:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4483:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4484:         if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4485:           PetscMPIInt rank;
4486:           MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D),&rank);
4488:         }
4489:         for (j=0;j<pcbddc->benign_n;j++) VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES);
4490:         VecAssemblyBegin(v);
4491:         VecAssemblyEnd(v);
4492:         MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i,&v);
4493:       }
4494:       VecResetArray(pcbddc->vec1_R);
4495:     }
4496:     /* if n_R == 0 the object is not destroyed */
4497:     MatDestroy(&A_RV);
4498:   }
4499:   VecDestroy(&dummy_vec);

4501:   if (n_constraints) {
4502:     Mat B;

4504:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4505:     MatScale(S_CC,m_one);
4506:     MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);
4507:     MatProductSetType(B,MATPRODUCT_AB);
4508:     MatProductSetFromOptions(B);
4509:     MatProductSymbolic(B);
4510:     MatProductNumeric(B);

4512:     MatScale(S_CC,m_one);
4513:     if (n_vertices) {
4514:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4515:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4516:       } else {
4517:         Mat S_VCt;

4519:         if (lda_rhs != n_R) {
4520:           MatDestroy(&B);
4521:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4522:           MatDenseSetLDA(B,lda_rhs);
4523:         }
4524:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4525:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4526:         MatDestroy(&S_VCt);
4527:       }
4528:     }
4529:     MatDestroy(&B);
4530:     /* coarse basis functions */
4531:     for (i=0;i<n_constraints;i++) {
4532:       Vec v;

4534:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4535:       MatDenseGetColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v);
4536:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4537:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4538:       MatDenseRestoreColumnVec(pcbddc->coarse_phi_B,i+n_vertices,&v);
4539:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4540:         PetscInt    j;
4541:         PetscScalar zero = 0.0;
4542:         MatDenseGetColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v);
4543:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4544:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4545:         for (j=0;j<pcbddc->benign_n;j++) VecSetValues(v,1,&p0_lidx_I[j],&zero,INSERT_VALUES);
4546:         VecAssemblyBegin(v);
4547:         VecAssemblyEnd(v);
4548:         MatDenseRestoreColumnVec(pcbddc->coarse_phi_D,i+n_vertices,&v);
4549:       }
4550:       VecResetArray(pcbddc->vec1_R);
4551:     }
4552:   }
4553:   if (n_constraints) {
4554:     MatDestroy(&local_auxmat2_R);
4555:   }
4556:   PetscFree(p0_lidx_I);

4558:   /* coarse matrix entries relative to B_0 */
4559:   if (pcbddc->benign_n) {
4560:     Mat               B0_B,B0_BPHI;
4561:     IS                is_dummy;
4562:     const PetscScalar *data;
4563:     PetscInt          j;

4565:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4566:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4567:     ISDestroy(&is_dummy);
4568:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4569:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4570:     MatDenseGetArrayRead(B0_BPHI,&data);
4571:     for (j=0;j<pcbddc->benign_n;j++) {
4572:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4573:       for (i=0;i<pcbddc->local_primal_size;i++) {
4574:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4575:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4576:       }
4577:     }
4578:     MatDenseRestoreArrayRead(B0_BPHI,&data);
4579:     MatDestroy(&B0_B);
4580:     MatDestroy(&B0_BPHI);
4581:   }

4583:   /* compute other basis functions for non-symmetric problems */
4584:   if (!pcbddc->symmetric_primal) {
4585:     Mat         B_V=NULL,B_C=NULL;
4586:     PetscScalar *marray;

4588:     if (n_constraints) {
4589:       Mat S_CCT,C_CRT;

4591:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4592:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4593:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4594:       MatDestroy(&S_CCT);
4595:       if (n_vertices) {
4596:         Mat S_VCT;

4598:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4599:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4600:         MatDestroy(&S_VCT);
4601:       }
4602:       MatDestroy(&C_CRT);
4603:     } else {
4604:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4605:     }
4606:     if (n_vertices && n_R) {
4607:       PetscScalar    *av,*marray;
4608:       const PetscInt *xadj,*adjncy;
4609:       PetscInt       n;
4610:       PetscBool      flg_row;

4612:       /* B_V = B_V - A_VR^T */
4613:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4614:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4615:       MatSeqAIJGetArray(A_VR,&av);
4616:       MatDenseGetArray(B_V,&marray);
4617:       for (i=0;i<n;i++) {
4618:         PetscInt j;
4619:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4620:       }
4621:       MatDenseRestoreArray(B_V,&marray);
4622:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4623:       MatDestroy(&A_VR);
4624:     }

4626:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4627:     if (n_vertices) {
4628:       MatDenseGetArray(B_V,&marray);
4629:       for (i=0;i<n_vertices;i++) {
4630:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4631:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4632:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4633:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4634:         VecResetArray(pcbddc->vec1_R);
4635:         VecResetArray(pcbddc->vec2_R);
4636:       }
4637:       MatDenseRestoreArray(B_V,&marray);
4638:     }
4639:     if (B_C) {
4640:       MatDenseGetArray(B_C,&marray);
4641:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4642:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4643:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4644:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4645:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4646:         VecResetArray(pcbddc->vec1_R);
4647:         VecResetArray(pcbddc->vec2_R);
4648:       }
4649:       MatDenseRestoreArray(B_C,&marray);
4650:     }
4651:     /* coarse basis functions */
4652:     for (i=0;i<pcbddc->local_primal_size;i++) {
4653:       Vec  v;

4655:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4656:       MatDenseGetColumnVec(pcbddc->coarse_psi_B,i,&v);
4657:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4658:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4659:       if (i<n_vertices) {
4660:         PetscScalar one = 1.0;
4661:         VecSetValues(v,1,&idx_V_B[i],&one,INSERT_VALUES);
4662:         VecAssemblyBegin(v);
4663:         VecAssemblyEnd(v);
4664:       }
4665:       MatDenseRestoreColumnVec(pcbddc->coarse_psi_B,i,&v);

4667:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4668:         MatDenseGetColumnVec(pcbddc->coarse_psi_D,i,&v);
4669:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4670:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,v,INSERT_VALUES,SCATTER_FORWARD);
4671:         MatDenseRestoreColumnVec(pcbddc->coarse_psi_D,i,&v);
4672:       }
4673:       VecResetArray(pcbddc->vec1_R);
4674:     }
4675:     MatDestroy(&B_V);
4676:     MatDestroy(&B_C);
4677:   }

4679:   /* free memory */
4680:   PetscFree(idx_V_B);
4681:   MatDestroy(&S_VV);
4682:   MatDestroy(&S_CV);
4683:   MatDestroy(&S_VC);
4684:   MatDestroy(&S_CC);
4685:   PetscFree(work);
4686:   if (n_vertices) {
4687:     MatDestroy(&A_VR);
4688:   }
4689:   if (n_constraints) {
4690:     MatDestroy(&C_CR);
4691:   }
4692:   PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);

4694:   /* Checking coarse_sub_mat and coarse basis functios */
4695:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4696:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4697:   if (pcbddc->dbg_flag) {
4698:     Mat         coarse_sub_mat;
4699:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4700:     Mat         coarse_phi_D,coarse_phi_B;
4701:     Mat         coarse_psi_D,coarse_psi_B;
4702:     Mat         A_II,A_BB,A_IB,A_BI;
4703:     Mat         C_B,CPHI;
4704:     IS          is_dummy;
4705:     Vec         mones;
4706:     MatType     checkmattype=MATSEQAIJ;
4707:     PetscReal   real_value;

4709:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4710:       Mat A;
4711:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4712:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4713:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4714:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4715:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4716:       MatDestroy(&A);
4717:     } else {
4718:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4719:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4720:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4721:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4722:     }
4723:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4724:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4725:     if (!pcbddc->symmetric_primal) {
4726:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4727:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4728:     }
4729:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4731:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4732:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4733:     PetscViewerFlush(pcbddc->dbg_viewer);
4734:     if (!pcbddc->symmetric_primal) {
4735:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4736:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4737:       MatDestroy(&AUXMAT);
4738:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4739:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4740:       MatDestroy(&AUXMAT);
4741:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4742:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4743:       MatDestroy(&AUXMAT);
4744:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4745:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4746:       MatDestroy(&AUXMAT);
4747:     } else {
4748:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4749:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4750:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4751:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4752:       MatDestroy(&AUXMAT);
4753:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4754:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4755:       MatDestroy(&AUXMAT);
4756:     }
4757:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4758:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4759:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4760:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4761:     if (pcbddc->benign_n) {
4762:       Mat               B0_B,B0_BPHI;
4763:       const PetscScalar *data2;
4764:       PetscScalar       *data;
4765:       PetscInt          j;

4767:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4768:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4769:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4770:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4771:       MatDenseGetArray(TM1,&data);
4772:       MatDenseGetArrayRead(B0_BPHI,&data2);
4773:       for (j=0;j<pcbddc->benign_n;j++) {
4774:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4775:         for (i=0;i<pcbddc->local_primal_size;i++) {
4776:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4777:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4778:         }
4779:       }
4780:       MatDenseRestoreArray(TM1,&data);
4781:       MatDenseRestoreArrayRead(B0_BPHI,&data2);
4782:       MatDestroy(&B0_B);
4783:       ISDestroy(&is_dummy);
4784:       MatDestroy(&B0_BPHI);
4785:     }
4786: #if 0
4787:   {
4788:     PetscViewer viewer;
4789:     char filename[256];
4790:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4791:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4792:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4793:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4794:     MatView(coarse_sub_mat,viewer);
4795:     PetscObjectSetName((PetscObject)TM1,"projected");
4796:     MatView(TM1,viewer);
4797:     if (pcbddc->coarse_phi_B) {
4798:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4799:       MatView(pcbddc->coarse_phi_B,viewer);
4800:     }
4801:     if (pcbddc->coarse_phi_D) {
4802:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4803:       MatView(pcbddc->coarse_phi_D,viewer);
4804:     }
4805:     if (pcbddc->coarse_psi_B) {
4806:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4807:       MatView(pcbddc->coarse_psi_B,viewer);
4808:     }
4809:     if (pcbddc->coarse_psi_D) {
4810:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4811:       MatView(pcbddc->coarse_psi_D,viewer);
4812:     }
4813:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4814:     MatView(pcbddc->local_mat,viewer);
4815:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4816:     MatView(pcbddc->ConstraintMatrix,viewer);
4817:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4818:     ISView(pcis->is_I_local,viewer);
4819:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4820:     ISView(pcis->is_B_local,viewer);
4821:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4822:     ISView(pcbddc->is_R_local,viewer);
4823:     PetscViewerDestroy(&viewer);
4824:   }
4825: #endif
4826:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4827:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4828:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4829:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4831:     /* check constraints */
4832:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4833:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4834:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4835:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4836:     } else {
4837:       PetscScalar *data;
4838:       Mat         tmat;
4839:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4840:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4841:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4842:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4843:       MatDestroy(&tmat);
4844:     }
4845:     MatCreateVecs(CPHI,&mones,NULL);
4846:     VecSet(mones,-1.0);
4847:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4848:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4849:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4850:     if (!pcbddc->symmetric_primal) {
4851:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4852:       VecSet(mones,-1.0);
4853:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4854:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4855:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4856:     }
4857:     MatDestroy(&C_B);
4858:     MatDestroy(&CPHI);
4859:     ISDestroy(&is_dummy);
4860:     VecDestroy(&mones);
4861:     PetscViewerFlush(pcbddc->dbg_viewer);
4862:     MatDestroy(&A_II);
4863:     MatDestroy(&A_BB);
4864:     MatDestroy(&A_IB);
4865:     MatDestroy(&A_BI);
4866:     MatDestroy(&TM1);
4867:     MatDestroy(&TM2);
4868:     MatDestroy(&TM3);
4869:     MatDestroy(&TM4);
4870:     MatDestroy(&coarse_phi_D);
4871:     MatDestroy(&coarse_phi_B);
4872:     if (!pcbddc->symmetric_primal) {
4873:       MatDestroy(&coarse_psi_D);
4874:       MatDestroy(&coarse_psi_B);
4875:     }
4876:     MatDestroy(&coarse_sub_mat);
4877:   }
4878:   /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4879:   {
4880:     PetscBool gpu;

4882:     PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);
4883:     if (gpu) {
4884:       if (pcbddc->local_auxmat1) {
4885:         MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);
4886:       }
4887:       if (pcbddc->local_auxmat2) {
4888:         MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);
4889:       }
4890:       if (pcbddc->coarse_phi_B) {
4891:         MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);
4892:       }
4893:       if (pcbddc->coarse_phi_D) {
4894:         MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);
4895:       }
4896:       if (pcbddc->coarse_psi_B) {
4897:         MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);
4898:       }
4899:       if (pcbddc->coarse_psi_D) {
4900:         MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);
4901:       }
4902:     }
4903:   }
4904:   /* get back data */
4905:   *coarse_submat_vals_n = coarse_submat_vals;
4906:   return 0;
4907: }

4909: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4910: {
4911:   Mat            *work_mat;
4912:   IS             isrow_s,iscol_s;
4913:   PetscBool      rsorted,csorted;
4914:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4916:   ISSorted(isrow,&rsorted);
4917:   ISSorted(iscol,&csorted);
4918:   ISGetLocalSize(isrow,&rsize);
4919:   ISGetLocalSize(iscol,&csize);

4921:   if (!rsorted) {
4922:     const PetscInt *idxs;
4923:     PetscInt *idxs_sorted,i;

4925:     PetscMalloc1(rsize,&idxs_perm_r);
4926:     PetscMalloc1(rsize,&idxs_sorted);
4927:     for (i=0;i<rsize;i++) {
4928:       idxs_perm_r[i] = i;
4929:     }
4930:     ISGetIndices(isrow,&idxs);
4931:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4932:     for (i=0;i<rsize;i++) {
4933:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4934:     }
4935:     ISRestoreIndices(isrow,&idxs);
4936:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4937:   } else {
4938:     PetscObjectReference((PetscObject)isrow);
4939:     isrow_s = isrow;
4940:   }

4942:   if (!csorted) {
4943:     if (isrow == iscol) {
4944:       PetscObjectReference((PetscObject)isrow_s);
4945:       iscol_s = isrow_s;
4946:     } else {
4947:       const PetscInt *idxs;
4948:       PetscInt       *idxs_sorted,i;

4950:       PetscMalloc1(csize,&idxs_perm_c);
4951:       PetscMalloc1(csize,&idxs_sorted);
4952:       for (i=0;i<csize;i++) {
4953:         idxs_perm_c[i] = i;
4954:       }
4955:       ISGetIndices(iscol,&idxs);
4956:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4957:       for (i=0;i<csize;i++) {
4958:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4959:       }
4960:       ISRestoreIndices(iscol,&idxs);
4961:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4962:     }
4963:   } else {
4964:     PetscObjectReference((PetscObject)iscol);
4965:     iscol_s = iscol;
4966:   }

4968:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4970:   if (!rsorted || !csorted) {
4971:     Mat      new_mat;
4972:     IS       is_perm_r,is_perm_c;

4974:     if (!rsorted) {
4975:       PetscInt *idxs_r,i;
4976:       PetscMalloc1(rsize,&idxs_r);
4977:       for (i=0;i<rsize;i++) {
4978:         idxs_r[idxs_perm_r[i]] = i;
4979:       }
4980:       PetscFree(idxs_perm_r);
4981:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4982:     } else {
4983:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4984:     }
4985:     ISSetPermutation(is_perm_r);

4987:     if (!csorted) {
4988:       if (isrow_s == iscol_s) {
4989:         PetscObjectReference((PetscObject)is_perm_r);
4990:         is_perm_c = is_perm_r;
4991:       } else {
4992:         PetscInt *idxs_c,i;
4994:         PetscMalloc1(csize,&idxs_c);
4995:         for (i=0;i<csize;i++) {
4996:           idxs_c[idxs_perm_c[i]] = i;
4997:         }
4998:         PetscFree(idxs_perm_c);
4999:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
5000:       }
5001:     } else {
5002:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
5003:     }
5004:     ISSetPermutation(is_perm_c);

5006:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
5007:     MatDestroy(&work_mat[0]);
5008:     work_mat[0] = new_mat;
5009:     ISDestroy(&is_perm_r);
5010:     ISDestroy(&is_perm_c);
5011:   }

5013:   PetscObjectReference((PetscObject)work_mat[0]);
5014:   *B = work_mat[0];
5015:   MatDestroyMatrices(1,&work_mat);
5016:   ISDestroy(&isrow_s);
5017:   ISDestroy(&iscol_s);
5018:   return 0;
5019: }

5021: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5022: {
5023:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5024:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5025:   Mat            new_mat,lA;
5026:   IS             is_local,is_global;
5027:   PetscInt       local_size;
5028:   PetscBool      isseqaij;

5030:   MatDestroy(&pcbddc->local_mat);
5031:   MatGetSize(matis->A,&local_size,NULL);
5032:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5033:   ISLocalToGlobalMappingApplyIS(matis->rmapping,is_local,&is_global);
5034:   ISDestroy(&is_local);
5035:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5036:   ISDestroy(&is_global);

5038:   if (pcbddc->dbg_flag) {
5039:     Vec       x,x_change;
5040:     PetscReal error;

5042:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5043:     VecSetRandom(x,NULL);
5044:     MatMult(ChangeOfBasisMatrix,x,x_change);
5045:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5046:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5047:     MatMult(new_mat,matis->x,matis->y);
5048:     if (!pcbddc->change_interior) {
5049:       const PetscScalar *x,*y,*v;
5050:       PetscReal         lerror = 0.;
5051:       PetscInt          i;

5053:       VecGetArrayRead(matis->x,&x);
5054:       VecGetArrayRead(matis->y,&y);
5055:       VecGetArrayRead(matis->counter,&v);
5056:       for (i=0;i<local_size;i++)
5057:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5058:           lerror = PetscAbsScalar(x[i]-y[i]);
5059:       VecRestoreArrayRead(matis->x,&x);
5060:       VecRestoreArrayRead(matis->y,&y);
5061:       VecRestoreArrayRead(matis->counter,&v);
5062:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)pc));
5063:       if (error > PETSC_SMALL) {
5064:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5065:           SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5066:         } else {
5067:           SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5068:         }
5069:       }
5070:     }
5071:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5072:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5073:     VecAXPY(x,-1.0,x_change);
5074:     VecNorm(x,NORM_INFINITY,&error);
5075:     if (error > PETSC_SMALL) {
5076:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5077:         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5078:       } else {
5079:         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5080:       }
5081:     }
5082:     VecDestroy(&x);
5083:     VecDestroy(&x_change);
5084:   }

5086:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5087:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

5089:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5090:   PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5091:   if (isseqaij) {
5092:     MatDestroy(&pcbddc->local_mat);
5093:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5094:     if (lA) {
5095:       Mat work;
5096:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5097:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5098:       MatDestroy(&work);
5099:     }
5100:   } else {
5101:     Mat work_mat;

5103:     MatDestroy(&pcbddc->local_mat);
5104:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5105:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5106:     MatDestroy(&work_mat);
5107:     if (lA) {
5108:       Mat work;
5109:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5110:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5111:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5112:       MatDestroy(&work);
5113:     }
5114:   }
5115:   if (matis->A->symmetric_set) {
5116:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5117: #if !defined(PETSC_USE_COMPLEX)
5118:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5119: #endif
5120:   }
5121:   MatDestroy(&new_mat);
5122:   return 0;
5123: }

5125: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5126: {
5127:   PC_IS*          pcis = (PC_IS*)(pc->data);
5128:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
5129:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5130:   PetscInt        *idx_R_local=NULL;
5131:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
5132:   PetscInt        vbs,bs;
5133:   PetscBT         bitmask=NULL;

5135:   /*
5136:     No need to setup local scatters if
5137:       - primal space is unchanged
5138:         AND
5139:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5140:         AND
5141:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5142:   */
5143:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5144:     return 0;
5145:   }
5146:   /* destroy old objects */
5147:   ISDestroy(&pcbddc->is_R_local);
5148:   VecScatterDestroy(&pcbddc->R_to_B);
5149:   VecScatterDestroy(&pcbddc->R_to_D);
5150:   /* Set Non-overlapping dimensions */
5151:   n_B = pcis->n_B;
5152:   n_D = pcis->n - n_B;
5153:   n_vertices = pcbddc->n_vertices;

5155:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5157:   /* create auxiliary bitmask and allocate workspace */
5158:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5159:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5160:     PetscBTCreate(pcis->n,&bitmask);
5161:     for (i=0;i<n_vertices;i++) {
5162:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5163:     }

5165:     for (i=0, n_R=0; i<pcis->n; i++) {
5166:       if (!PetscBTLookup(bitmask,i)) {
5167:         idx_R_local[n_R++] = i;
5168:       }
5169:     }
5170:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5171:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5173:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5174:     ISGetLocalSize(reuse_solver->is_R,&n_R);
5175:   }

5177:   /* Block code */
5178:   vbs = 1;
5179:   MatGetBlockSize(pcbddc->local_mat,&bs);
5180:   if (bs>1 && !(n_vertices%bs)) {
5181:     PetscBool is_blocked = PETSC_TRUE;
5182:     PetscInt  *vary;
5183:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5184:       PetscMalloc1(pcis->n/bs,&vary);
5185:       PetscArrayzero(vary,pcis->n/bs);
5186:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5187:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5188:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5189:       for (i=0; i<pcis->n/bs; i++) {
5190:         if (vary[i]!=0 && vary[i]!=bs) {
5191:           is_blocked = PETSC_FALSE;
5192:           break;
5193:         }
5194:       }
5195:       PetscFree(vary);
5196:     } else {
5197:       /* Verify directly the R set */
5198:       for (i=0; i<n_R/bs; i++) {
5199:         PetscInt j,node=idx_R_local[bs*i];
5200:         for (j=1; j<bs; j++) {
5201:           if (node != idx_R_local[bs*i+j]-j) {
5202:             is_blocked = PETSC_FALSE;
5203:             break;
5204:           }
5205:         }
5206:       }
5207:     }
5208:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5209:       vbs = bs;
5210:       for (i=0;i<n_R/vbs;i++) {
5211:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5212:       }
5213:     }
5214:   }
5215:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5216:   if (sub_schurs && sub_schurs->reuse_solver) {
5217:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5219:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5220:     ISDestroy(&reuse_solver->is_R);
5221:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5222:     reuse_solver->is_R = pcbddc->is_R_local;
5223:   } else {
5224:     PetscFree(idx_R_local);
5225:   }

5227:   /* print some info if requested */
5228:   if (pcbddc->dbg_flag) {
5229:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5230:     PetscViewerFlush(pcbddc->dbg_viewer);
5231:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5232:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5233:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5234:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5235:     PetscViewerFlush(pcbddc->dbg_viewer);
5236:   }

5238:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5239:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5240:     IS       is_aux1,is_aux2;
5241:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

5243:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5244:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5245:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5246:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5247:     for (i=0; i<n_D; i++) {
5248:       PetscBTSet(bitmask,is_indices[i]);
5249:     }
5250:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5251:     for (i=0, j=0; i<n_R; i++) {
5252:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5253:         aux_array1[j++] = i;
5254:       }
5255:     }
5256:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5257:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5258:     for (i=0, j=0; i<n_B; i++) {
5259:       if (!PetscBTLookup(bitmask,is_indices[i])) {
5260:         aux_array2[j++] = i;
5261:       }
5262:     }
5263:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5264:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5265:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5266:     ISDestroy(&is_aux1);
5267:     ISDestroy(&is_aux2);

5269:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5270:       PetscMalloc1(n_D,&aux_array1);
5271:       for (i=0, j=0; i<n_R; i++) {
5272:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5273:           aux_array1[j++] = i;
5274:         }
5275:       }
5276:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5277:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5278:       ISDestroy(&is_aux1);
5279:     }
5280:     PetscBTDestroy(&bitmask);
5281:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5282:   } else {
5283:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5284:     IS                 tis;
5285:     PetscInt           schur_size;

5287:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
5288:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5289:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5290:     ISDestroy(&tis);
5291:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5292:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5293:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5294:       ISDestroy(&tis);
5295:     }
5296:   }
5297:   return 0;
5298: }

5300: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5301: {
5302:   MatNullSpace   NullSpace;
5303:   Mat            dmat;
5304:   const Vec      *nullvecs;
5305:   Vec            v,v2,*nullvecs2;
5306:   VecScatter     sct = NULL;
5307:   PetscContainer c;
5308:   PetscScalar    *ddata;
5309:   PetscInt       k,nnsp_size,bsiz,bsiz2,n,N,bs;
5310:   PetscBool      nnsp_has_cnst;

5312:   if (!is && !B) { /* MATIS */
5313:     Mat_IS* matis = (Mat_IS*)A->data;

5315:     if (!B) {
5316:       MatISGetLocalMat(A,&B);
5317:     }
5318:     sct  = matis->cctx;
5319:     PetscObjectReference((PetscObject)sct);
5320:   } else {
5321:     MatGetNullSpace(B,&NullSpace);
5322:     if (!NullSpace) {
5323:       MatGetNearNullSpace(B,&NullSpace);
5324:     }
5325:     if (NullSpace) return 0;
5326:   }
5327:   MatGetNullSpace(A,&NullSpace);
5328:   if (!NullSpace) {
5329:     MatGetNearNullSpace(A,&NullSpace);
5330:   }
5331:   if (!NullSpace) return 0;

5333:   MatCreateVecs(A,&v,NULL);
5334:   MatCreateVecs(B,&v2,NULL);
5335:   if (!sct) {
5336:     VecScatterCreate(v,is,v2,NULL,&sct);
5337:   }
5338:   MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5339:   bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5340:   PetscMalloc1(bsiz,&nullvecs2);
5341:   VecGetBlockSize(v2,&bs);
5342:   VecGetSize(v2,&N);
5343:   VecGetLocalSize(v2,&n);
5344:   PetscMalloc1(n*bsiz,&ddata);
5345:   for (k=0;k<nnsp_size;k++) {
5346:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);
5347:     VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5348:     VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5349:   }
5350:   if (nnsp_has_cnst) {
5351:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);
5352:     VecSet(nullvecs2[nnsp_size],1.0);
5353:   }
5354:   PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);
5355:   MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);

5357:   MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);
5358:   PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);
5359:   PetscContainerSetPointer(c,ddata);
5360:   PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);
5361:   PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);
5362:   PetscContainerDestroy(&c);
5363:   PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5364:   MatDestroy(&dmat);

5366:   for (k=0;k<bsiz;k++) {
5367:     VecDestroy(&nullvecs2[k]);
5368:   }
5369:   PetscFree(nullvecs2);
5370:   MatSetNearNullSpace(B,NullSpace);
5371:   MatNullSpaceDestroy(&NullSpace);
5372:   VecDestroy(&v);
5373:   VecDestroy(&v2);
5374:   VecScatterDestroy(&sct);
5375:   return 0;
5376: }

5378: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5379: {
5380:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5381:   PC_IS          *pcis = (PC_IS*)pc->data;
5382:   PC             pc_temp;
5383:   Mat            A_RR;
5384:   MatNullSpace   nnsp;
5385:   MatReuse       reuse;
5386:   PetscScalar    m_one = -1.0;
5387:   PetscReal      value;
5388:   PetscInt       n_D,n_R;
5389:   PetscBool      issbaij,opts;
5390:   void           (*f)(void) = NULL;
5391:   char           dir_prefix[256],neu_prefix[256],str_level[16];
5392:   size_t         len;

5394:   PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5395:   /* approximate solver, propagate NearNullSpace if needed */
5396:   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5397:     MatNullSpace gnnsp1,gnnsp2;
5398:     PetscBool    lhas,ghas;

5400:     MatGetNearNullSpace(pcbddc->local_mat,&nnsp);
5401:     MatGetNearNullSpace(pc->pmat,&gnnsp1);
5402:     MatGetNullSpace(pc->pmat,&gnnsp2);
5403:     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5404:     MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
5405:     if (!ghas && (gnnsp1 || gnnsp2)) {
5406:       MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);
5407:     }
5408:   }

5410:   /* compute prefixes */
5411:   PetscStrcpy(dir_prefix,"");
5412:   PetscStrcpy(neu_prefix,"");
5413:   if (!pcbddc->current_level) {
5414:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5415:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5416:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5417:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5418:   } else {
5419:     PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5420:     PetscStrlen(((PetscObject)pc)->prefix,&len);
5421:     len -= 15; /* remove "pc_bddc_coarse_" */
5422:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5423:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5424:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5425:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5426:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5427:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5428:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5429:     PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5430:     PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5431:   }

5433:   /* DIRICHLET PROBLEM */
5434:   if (dirichlet) {
5435:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5436:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5438:       if (pcbddc->dbg_flag) {
5439:         Mat    A_IIn;

5441:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5442:         MatDestroy(&pcis->A_II);
5443:         pcis->A_II = A_IIn;
5444:       }
5445:     }
5446:     if (pcbddc->local_mat->symmetric_set) {
5447:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5448:     }
5449:     /* Matrix for Dirichlet problem is pcis->A_II */
5450:     n_D  = pcis->n - pcis->n_B;
5451:     opts = PETSC_FALSE;
5452:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5453:       opts = PETSC_TRUE;
5454:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5455:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5456:       /* default */
5457:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5458:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5459:       PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5460:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5461:       if (issbaij) {
5462:         PCSetType(pc_temp,PCCHOLESKY);
5463:       } else {
5464:         PCSetType(pc_temp,PCLU);
5465:       }
5466:       KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5467:     }
5468:     MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5469:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5470:     /* Allow user's customization */
5471:     if (opts) {
5472:       KSPSetFromOptions(pcbddc->ksp_D);
5473:     }
5474:     MatGetNearNullSpace(pcis->pA_II,&nnsp);
5475:     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5476:       MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5477:     }
5478:     MatGetNearNullSpace(pcis->pA_II,&nnsp);
5479:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
5480:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5481:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5482:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5483:       const PetscInt *idxs;
5484:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5486:       ISGetLocalSize(pcis->is_I_local,&nl);
5487:       ISGetIndices(pcis->is_I_local,&idxs);
5488:       PetscMalloc1(nl*cdim,&scoords);
5489:       for (i=0;i<nl;i++) {
5490:         for (d=0;d<cdim;d++) {
5491:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5492:         }
5493:       }
5494:       ISRestoreIndices(pcis->is_I_local,&idxs);
5495:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5496:       PetscFree(scoords);
5497:     }
5498:     if (sub_schurs && sub_schurs->reuse_solver) {
5499:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5501:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5502:     }

5504:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5505:     if (!n_D) {
5506:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5507:       PCSetType(pc_temp,PCNONE);
5508:     }
5509:     KSPSetUp(pcbddc->ksp_D);
5510:     /* set ksp_D into pcis data */
5511:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5512:     KSPDestroy(&pcis->ksp_D);
5513:     pcis->ksp_D = pcbddc->ksp_D;
5514:   }

5516:   /* NEUMANN PROBLEM */
5517:   A_RR = NULL;
5518:   if (neumann) {
5519:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5520:     PetscInt        ibs,mbs;
5521:     PetscBool       issbaij, reuse_neumann_solver;
5522:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

5524:     reuse_neumann_solver = PETSC_FALSE;
5525:     if (sub_schurs && sub_schurs->reuse_solver) {
5526:       IS iP;

5528:       reuse_neumann_solver = PETSC_TRUE;
5529:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5530:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5531:     }
5532:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5533:     ISGetSize(pcbddc->is_R_local,&n_R);
5534:     if (pcbddc->ksp_R) { /* already created ksp */
5535:       PetscInt nn_R;
5536:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5537:       PetscObjectReference((PetscObject)A_RR);
5538:       MatGetSize(A_RR,&nn_R,NULL);
5539:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5540:         KSPReset(pcbddc->ksp_R);
5541:         MatDestroy(&A_RR);
5542:         reuse = MAT_INITIAL_MATRIX;
5543:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5544:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5545:           MatDestroy(&A_RR);
5546:           reuse = MAT_INITIAL_MATRIX;
5547:         } else { /* safe to reuse the matrix */
5548:           reuse = MAT_REUSE_MATRIX;
5549:         }
5550:       }
5551:       /* last check */
5552:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5553:         MatDestroy(&A_RR);
5554:         reuse = MAT_INITIAL_MATRIX;
5555:       }
5556:     } else { /* first time, so we need to create the matrix */
5557:       reuse = MAT_INITIAL_MATRIX;
5558:     }
5559:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5560:        TODO: Get Rid of these conversions */
5561:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5562:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5563:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5564:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5565:       if (matis->A == pcbddc->local_mat) {
5566:         MatDestroy(&pcbddc->local_mat);
5567:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5568:       } else {
5569:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5570:       }
5571:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5572:       if (matis->A == pcbddc->local_mat) {
5573:         MatDestroy(&pcbddc->local_mat);
5574:         MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5575:       } else {
5576:         MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5577:       }
5578:     }
5579:     /* extract A_RR */
5580:     if (reuse_neumann_solver) {
5581:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5583:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5584:         MatDestroy(&A_RR);
5585:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5586:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5587:         } else {
5588:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5589:         }
5590:       } else {
5591:         MatDestroy(&A_RR);
5592:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5593:         PetscObjectReference((PetscObject)A_RR);
5594:       }
5595:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5596:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5597:     }
5598:     if (pcbddc->local_mat->symmetric_set) {
5599:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5600:     }
5601:     opts = PETSC_FALSE;
5602:     if (!pcbddc->ksp_R) { /* create object if not present */
5603:       opts = PETSC_TRUE;
5604:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5605:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5606:       /* default */
5607:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5608:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5609:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5610:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5611:       if (issbaij) {
5612:         PCSetType(pc_temp,PCCHOLESKY);
5613:       } else {
5614:         PCSetType(pc_temp,PCLU);
5615:       }
5616:       KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5617:     }
5618:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5619:     MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5620:     if (opts) { /* Allow user's customization once */
5621:       KSPSetFromOptions(pcbddc->ksp_R);
5622:     }
5623:     MatGetNearNullSpace(A_RR,&nnsp);
5624:     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5625:       MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5626:     }
5627:     MatGetNearNullSpace(A_RR,&nnsp);
5628:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
5629:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5630:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5631:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5632:       const PetscInt *idxs;
5633:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5635:       ISGetLocalSize(pcbddc->is_R_local,&nl);
5636:       ISGetIndices(pcbddc->is_R_local,&idxs);
5637:       PetscMalloc1(nl*cdim,&scoords);
5638:       for (i=0;i<nl;i++) {
5639:         for (d=0;d<cdim;d++) {
5640:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5641:         }
5642:       }
5643:       ISRestoreIndices(pcbddc->is_R_local,&idxs);
5644:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5645:       PetscFree(scoords);
5646:     }

5648:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5649:     if (!n_R) {
5650:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5651:       PCSetType(pc_temp,PCNONE);
5652:     }
5653:     /* Reuse solver if it is present */
5654:     if (reuse_neumann_solver) {
5655:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5657:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5658:     }
5659:     KSPSetUp(pcbddc->ksp_R);
5660:   }

5662:   if (pcbddc->dbg_flag) {
5663:     PetscViewerFlush(pcbddc->dbg_viewer);
5664:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5665:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5666:   }
5667:   PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);

5669:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5670:   if (pcbddc->NullSpace_corr[0]) {
5671:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5672:   }
5673:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5674:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5675:   }
5676:   if (neumann && pcbddc->NullSpace_corr[2]) {
5677:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5678:   }
5679:   /* check Dirichlet and Neumann solvers */
5680:   if (pcbddc->dbg_flag) {
5681:     if (dirichlet) { /* Dirichlet */
5682:       VecSetRandom(pcis->vec1_D,NULL);
5683:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5684:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5685:       KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5686:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5687:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5688:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5689:       PetscViewerFlush(pcbddc->dbg_viewer);
5690:     }
5691:     if (neumann) { /* Neumann */
5692:       VecSetRandom(pcbddc->vec1_R,NULL);
5693:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5694:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5695:       KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5696:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5697:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5698:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5699:       PetscViewerFlush(pcbddc->dbg_viewer);
5700:     }
5701:   }
5702:   /* free Neumann problem's matrix */
5703:   MatDestroy(&A_RR);
5704:   return 0;
5705: }

5707: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5708: {
5709:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5710:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5711:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;

5713:   if (!reuse_solver) {
5714:     VecSet(pcbddc->vec1_R,0.);
5715:   }
5716:   if (!pcbddc->switch_static) {
5717:     if (applytranspose && pcbddc->local_auxmat1) {
5718:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5719:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5720:     }
5721:     if (!reuse_solver) {
5722:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5723:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5724:     } else {
5725:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5727:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5728:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5729:     }
5730:   } else {
5731:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5732:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5733:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5734:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5735:     if (applytranspose && pcbddc->local_auxmat1) {
5736:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5737:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5738:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5739:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5740:     }
5741:   }
5742:   PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5743:   if (!reuse_solver || pcbddc->switch_static) {
5744:     if (applytranspose) {
5745:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5746:     } else {
5747:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5748:     }
5749:     KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5750:   } else {
5751:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5753:     if (applytranspose) {
5754:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5755:     } else {
5756:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5757:     }
5758:   }
5759:   PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5760:   VecSet(inout_B,0.);
5761:   if (!pcbddc->switch_static) {
5762:     if (!reuse_solver) {
5763:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5764:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5765:     } else {
5766:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5768:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5769:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5770:     }
5771:     if (!applytranspose && pcbddc->local_auxmat1) {
5772:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5773:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5774:     }
5775:   } else {
5776:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5777:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5778:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5779:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5780:     if (!applytranspose && pcbddc->local_auxmat1) {
5781:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5782:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5783:     }
5784:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5785:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5786:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5787:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5788:   }
5789:   return 0;
5790: }

5792: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5793: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5794: {
5795:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5796:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5797:   const PetscScalar zero = 0.0;

5799:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5800:   if (!pcbddc->benign_apply_coarse_only) {
5801:     if (applytranspose) {
5802:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5803:       if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);
5804:     } else {
5805:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5806:       if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);
5807:     }
5808:   } else {
5809:     VecSet(pcbddc->vec1_P,zero);
5810:   }

5812:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5813:   if (pcbddc->benign_n) {
5814:     PetscScalar *array;
5815:     PetscInt    j;

5817:     VecGetArray(pcbddc->vec1_P,&array);
5818:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5819:     VecRestoreArray(pcbddc->vec1_P,&array);
5820:   }

5822:   /* start communications from local primal nodes to rhs of coarse solver */
5823:   VecSet(pcbddc->coarse_vec,zero);
5824:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5825:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5827:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5828:   if (pcbddc->coarse_ksp) {
5829:     Mat          coarse_mat;
5830:     Vec          rhs,sol;
5831:     MatNullSpace nullsp;
5832:     PetscBool    isbddc = PETSC_FALSE;

5834:     if (pcbddc->benign_have_null) {
5835:       PC        coarse_pc;

5837:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5838:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5839:       /* we need to propagate to coarser levels the need for a possible benign correction */
5840:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5841:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5842:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5843:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5844:       }
5845:     }
5846:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5847:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5848:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5849:     if (applytranspose) {
5851:       PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5852:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5853:       PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5854:       KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5855:       MatGetTransposeNullSpace(coarse_mat,&nullsp);
5856:       if (nullsp) {
5857:         MatNullSpaceRemove(nullsp,sol);
5858:       }
5859:     } else {
5860:       MatGetNullSpace(coarse_mat,&nullsp);
5861:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5862:         PC        coarse_pc;

5864:         if (nullsp) {
5865:           MatNullSpaceRemove(nullsp,rhs);
5866:         }
5867:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5868:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5869:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5870:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5871:       } else {
5872:         PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5873:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5874:         PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5875:         KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5876:         if (nullsp) {
5877:           MatNullSpaceRemove(nullsp,sol);
5878:         }
5879:       }
5880:     }
5881:     /* we don't need the benign correction at coarser levels anymore */
5882:     if (pcbddc->benign_have_null && isbddc) {
5883:       PC        coarse_pc;
5884:       PC_BDDC*  coarsepcbddc;

5886:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5887:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5888:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5889:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5890:     }
5891:   }

5893:   /* Local solution on R nodes */
5894:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5895:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5896:   }
5897:   /* communications from coarse sol to local primal nodes */
5898:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5899:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5901:   /* Sum contributions from the two levels */
5902:   if (!pcbddc->benign_apply_coarse_only) {
5903:     if (applytranspose) {
5904:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5905:       if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);
5906:     } else {
5907:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5908:       if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);
5909:     }
5910:     /* store p0 */
5911:     if (pcbddc->benign_n) {
5912:       PetscScalar *array;
5913:       PetscInt    j;

5915:       VecGetArray(pcbddc->vec1_P,&array);
5916:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5917:       VecRestoreArray(pcbddc->vec1_P,&array);
5918:     }
5919:   } else { /* expand the coarse solution */
5920:     if (applytranspose) {
5921:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5922:     } else {
5923:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5924:     }
5925:   }
5926:   return 0;
5927: }

5929: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5930: {
5931:   PC_BDDC*          pcbddc = (PC_BDDC*)(pc->data);
5932:   Vec               from,to;
5933:   const PetscScalar *array;

5935:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5936:     from = pcbddc->coarse_vec;
5937:     to = pcbddc->vec1_P;
5938:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5939:       Vec tvec;

5941:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5942:       VecResetArray(tvec);
5943:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5944:       VecGetArrayRead(tvec,&array);
5945:       VecPlaceArray(from,array);
5946:       VecRestoreArrayRead(tvec,&array);
5947:     }
5948:   } else { /* from local to global -> put data in coarse right hand side */
5949:     from = pcbddc->vec1_P;
5950:     to = pcbddc->coarse_vec;
5951:   }
5952:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5953:   return 0;
5954: }

5956: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5957: {
5958:   PC_BDDC*          pcbddc = (PC_BDDC*)(pc->data);
5959:   Vec               from,to;
5960:   const PetscScalar *array;

5962:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5963:     from = pcbddc->coarse_vec;
5964:     to = pcbddc->vec1_P;
5965:   } else { /* from local to global -> put data in coarse right hand side */
5966:     from = pcbddc->vec1_P;
5967:     to = pcbddc->coarse_vec;
5968:   }
5969:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5970:   if (smode == SCATTER_FORWARD) {
5971:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5972:       Vec tvec;

5974:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5975:       VecGetArrayRead(to,&array);
5976:       VecPlaceArray(tvec,array);
5977:       VecRestoreArrayRead(to,&array);
5978:     }
5979:   } else {
5980:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5981:      VecResetArray(from);
5982:     }
5983:   }
5984:   return 0;
5985: }

5987: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5988: {
5989:   PetscErrorCode    ierr;
5990:   PC_IS*            pcis = (PC_IS*)(pc->data);
5991:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5992:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5993:   /* one and zero */
5994:   PetscScalar       one=1.0,zero=0.0;
5995:   /* space to store constraints and their local indices */
5996:   PetscScalar       *constraints_data;
5997:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5998:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5999:   PetscInt          *constraints_n;
6000:   /* iterators */
6001:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
6002:   /* BLAS integers */
6003:   PetscBLASInt      lwork,lierr;
6004:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
6005:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
6006:   /* reuse */
6007:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
6008:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
6009:   /* change of basis */
6010:   PetscBool         qr_needed;
6011:   PetscBT           change_basis,qr_needed_idx;
6012:   /* auxiliary stuff */
6013:   PetscInt          *nnz,*is_indices;
6014:   PetscInt          ncc;
6015:   /* some quantities */
6016:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
6017:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6018:   PetscReal         tol; /* tolerance for retaining eigenmodes */

6020:   tol  = PetscSqrtReal(PETSC_SMALL);
6021:   /* Destroy Mat objects computed previously */
6022:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
6023:   MatDestroy(&pcbddc->ConstraintMatrix);
6024:   MatDestroy(&pcbddc->switch_static_change);
6025:   /* save info on constraints from previous setup (if any) */
6026:   olocal_primal_size = pcbddc->local_primal_size;
6027:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6028:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
6029:   PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);
6030:   PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);
6031:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
6032:   PetscFree(pcbddc->primal_indices_local_idxs);

6034:   if (!pcbddc->adaptive_selection) {
6035:     IS           ISForVertices,*ISForFaces,*ISForEdges;
6036:     MatNullSpace nearnullsp;
6037:     const Vec    *nearnullvecs;
6038:     Vec          *localnearnullsp;
6039:     PetscScalar  *array;
6040:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
6041:     PetscBool    nnsp_has_cnst;
6042:     /* LAPACK working arrays for SVD or POD */
6043:     PetscBool    skip_lapack,boolforchange;
6044:     PetscScalar  *work;
6045:     PetscReal    *singular_vals;
6046: #if defined(PETSC_USE_COMPLEX)
6047:     PetscReal    *rwork;
6048: #endif
6049:     PetscScalar  *temp_basis = NULL,*correlation_mat = NULL;
6050:     PetscBLASInt dummy_int=1;
6051:     PetscScalar  dummy_scalar=1.;
6052:     PetscBool    use_pod = PETSC_FALSE;

6054:     /* MKL SVD with same input gives different results on different processes! */
6055: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6056:     use_pod = PETSC_TRUE;
6057: #endif
6058:     /* Get index sets for faces, edges and vertices from graph */
6059:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6060:     /* print some info */
6061:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6062:       PetscInt nv;

6064:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6065:       ISGetSize(ISForVertices,&nv);
6066:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6067:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6068:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6069:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6070:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6071:       PetscViewerFlush(pcbddc->dbg_viewer);
6072:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6073:     }

6075:     /* free unneeded index sets */
6076:     if (!pcbddc->use_vertices) {
6077:       ISDestroy(&ISForVertices);
6078:     }
6079:     if (!pcbddc->use_edges) {
6080:       for (i=0;i<n_ISForEdges;i++) {
6081:         ISDestroy(&ISForEdges[i]);
6082:       }
6083:       PetscFree(ISForEdges);
6084:       n_ISForEdges = 0;
6085:     }
6086:     if (!pcbddc->use_faces) {
6087:       for (i=0;i<n_ISForFaces;i++) {
6088:         ISDestroy(&ISForFaces[i]);
6089:       }
6090:       PetscFree(ISForFaces);
6091:       n_ISForFaces = 0;
6092:     }

6094:     /* check if near null space is attached to global mat */
6095:     if (pcbddc->use_nnsp) {
6096:       MatGetNearNullSpace(pc->pmat,&nearnullsp);
6097:     } else nearnullsp = NULL;

6099:     if (nearnullsp) {
6100:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6101:       /* remove any stored info */
6102:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
6103:       PetscFree(pcbddc->onearnullvecs_state);
6104:       /* store information for BDDC solver reuse */
6105:       PetscObjectReference((PetscObject)nearnullsp);
6106:       pcbddc->onearnullspace = nearnullsp;
6107:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6108:       for (i=0;i<nnsp_size;i++) {
6109:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6110:       }
6111:     } else { /* if near null space is not provided BDDC uses constants by default */
6112:       nnsp_size = 0;
6113:       nnsp_has_cnst = PETSC_TRUE;
6114:     }
6115:     /* get max number of constraints on a single cc */
6116:     max_constraints = nnsp_size;
6117:     if (nnsp_has_cnst) max_constraints++;

6119:     /*
6120:          Evaluate maximum storage size needed by the procedure
6121:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6122:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6123:          There can be multiple constraints per connected component
6124:                                                                                                                                                            */
6125:     n_vertices = 0;
6126:     if (ISForVertices) {
6127:       ISGetSize(ISForVertices,&n_vertices);
6128:     }
6129:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6130:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

6132:     total_counts = n_ISForFaces+n_ISForEdges;
6133:     total_counts *= max_constraints;
6134:     total_counts += n_vertices;
6135:     PetscBTCreate(total_counts,&change_basis);

6137:     total_counts = 0;
6138:     max_size_of_constraint = 0;
6139:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6140:       IS used_is;
6141:       if (i<n_ISForEdges) {
6142:         used_is = ISForEdges[i];
6143:       } else {
6144:         used_is = ISForFaces[i-n_ISForEdges];
6145:       }
6146:       ISGetSize(used_is,&j);
6147:       total_counts += j;
6148:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6149:     }
6150:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

6152:     /* get local part of global near null space vectors */
6153:     PetscMalloc1(nnsp_size,&localnearnullsp);
6154:     for (k=0;k<nnsp_size;k++) {
6155:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6156:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6157:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6158:     }

6160:     /* whether or not to skip lapack calls */
6161:     skip_lapack = PETSC_TRUE;
6162:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

6164:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6165:     if (!skip_lapack) {
6166:       PetscScalar temp_work;

6168:       if (use_pod) {
6169:         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6170:         PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6171:         PetscMalloc1(max_constraints,&singular_vals);
6172:         PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6173: #if defined(PETSC_USE_COMPLEX)
6174:         PetscMalloc1(3*max_constraints,&rwork);
6175: #endif
6176:         /* now we evaluate the optimal workspace using query with lwork=-1 */
6177:         PetscBLASIntCast(max_constraints,&Blas_N);
6178:         PetscBLASIntCast(max_constraints,&Blas_LDA);
6179:         lwork = -1;
6180:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6181: #if !defined(PETSC_USE_COMPLEX)
6182:         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6183: #else
6184:         PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6185: #endif
6186:         PetscFPTrapPop();
6188:       } else {
6189: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6190:         /* SVD */
6191:         PetscInt max_n,min_n;
6192:         max_n = max_size_of_constraint;
6193:         min_n = max_constraints;
6194:         if (max_size_of_constraint < max_constraints) {
6195:           min_n = max_size_of_constraint;
6196:           max_n = max_constraints;
6197:         }
6198:         PetscMalloc1(min_n,&singular_vals);
6199: #if defined(PETSC_USE_COMPLEX)
6200:         PetscMalloc1(5*min_n,&rwork);
6201: #endif
6202:         /* now we evaluate the optimal workspace using query with lwork=-1 */
6203:         lwork = -1;
6204:         PetscBLASIntCast(max_n,&Blas_M);
6205:         PetscBLASIntCast(min_n,&Blas_N);
6206:         PetscBLASIntCast(max_n,&Blas_LDA);
6207:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6208: #if !defined(PETSC_USE_COMPLEX)
6209:         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6210: #else
6211:         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6212: #endif
6213:         PetscFPTrapPop();
6215: #else
6216:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6217: #endif /* on missing GESVD */
6218:       }
6219:       /* Allocate optimal workspace */
6220:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6221:       PetscMalloc1(lwork,&work);
6222:     }
6223:     /* Now we can loop on constraining sets */
6224:     total_counts = 0;
6225:     constraints_idxs_ptr[0] = 0;
6226:     constraints_data_ptr[0] = 0;
6227:     /* vertices */
6228:     if (n_vertices) {
6229:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6230:       PetscArraycpy(constraints_idxs,is_indices,n_vertices);
6231:       for (i=0;i<n_vertices;i++) {
6232:         constraints_n[total_counts] = 1;
6233:         constraints_data[total_counts] = 1.0;
6234:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6235:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6236:         total_counts++;
6237:       }
6238:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6239:       n_vertices = total_counts;
6240:     }

6242:     /* edges and faces */
6243:     total_counts_cc = total_counts;
6244:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6245:       IS        used_is;
6246:       PetscBool idxs_copied = PETSC_FALSE;

6248:       if (ncc<n_ISForEdges) {
6249:         used_is = ISForEdges[ncc];
6250:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6251:       } else {
6252:         used_is = ISForFaces[ncc-n_ISForEdges];
6253:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6254:       }
6255:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

6257:       ISGetSize(used_is,&size_of_constraint);
6258:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
6259:       /* change of basis should not be performed on local periodic nodes */
6260:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6261:       if (nnsp_has_cnst) {
6262:         PetscScalar quad_value;

6264:         PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6265:         idxs_copied = PETSC_TRUE;

6267:         if (!pcbddc->use_nnsp_true) {
6268:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6269:         } else {
6270:           quad_value = 1.0;
6271:         }
6272:         for (j=0;j<size_of_constraint;j++) {
6273:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6274:         }
6275:         temp_constraints++;
6276:         total_counts++;
6277:       }
6278:       for (k=0;k<nnsp_size;k++) {
6279:         PetscReal real_value;
6280:         PetscScalar *ptr_to_data;

6282:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6283:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6284:         for (j=0;j<size_of_constraint;j++) {
6285:           ptr_to_data[j] = array[is_indices[j]];
6286:         }
6287:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6288:         /* check if array is null on the connected component */
6289:         PetscBLASIntCast(size_of_constraint,&Blas_N);
6290:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6291:         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6292:           temp_constraints++;
6293:           total_counts++;
6294:           if (!idxs_copied) {
6295:             PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6296:             idxs_copied = PETSC_TRUE;
6297:           }
6298:         }
6299:       }
6300:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6301:       valid_constraints = temp_constraints;
6302:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6303:         if (temp_constraints == 1) { /* just normalize the constraint */
6304:           PetscScalar norm,*ptr_to_data;

6306:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6307:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6308:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6309:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6310:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6311:         } else { /* perform SVD */
6312:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6314:           if (use_pod) {
6315:             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6316:                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6317:                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6318:                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6319:                   from that computed using LAPACKgesvd
6320:                -> This is due to a different computation of eigenvectors in LAPACKheev
6321:                -> The quality of the POD-computed basis will be the same */
6322:             PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);
6323:             /* Store upper triangular part of correlation matrix */
6324:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6325:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6326:             for (j=0;j<temp_constraints;j++) {
6327:               for (k=0;k<j+1;k++) {
6328:                 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6329:               }
6330:             }
6331:             /* compute eigenvalues and eigenvectors of correlation matrix */
6332:             PetscBLASIntCast(temp_constraints,&Blas_N);
6333:             PetscBLASIntCast(temp_constraints,&Blas_LDA);
6334: #if !defined(PETSC_USE_COMPLEX)
6335:             PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6336: #else
6337:             PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6338: #endif
6339:             PetscFPTrapPop();
6341:             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6342:             j = 0;
6343:             while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6344:             total_counts = total_counts-j;
6345:             valid_constraints = temp_constraints-j;
6346:             /* scale and copy POD basis into used quadrature memory */
6347:             PetscBLASIntCast(size_of_constraint,&Blas_M);
6348:             PetscBLASIntCast(temp_constraints,&Blas_N);
6349:             PetscBLASIntCast(temp_constraints,&Blas_K);
6350:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6351:             PetscBLASIntCast(temp_constraints,&Blas_LDB);
6352:             PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6353:             if (j<temp_constraints) {
6354:               PetscInt ii;
6355:               for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6356:               PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6357:               PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6358:               PetscFPTrapPop();
6359:               for (k=0;k<temp_constraints-j;k++) {
6360:                 for (ii=0;ii<size_of_constraint;ii++) {
6361:                   ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6362:                 }
6363:               }
6364:             }
6365:           } else {
6366: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6367:             PetscBLASIntCast(size_of_constraint,&Blas_M);
6368:             PetscBLASIntCast(temp_constraints,&Blas_N);
6369:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6370:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6371: #if !defined(PETSC_USE_COMPLEX)
6372:             PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6373: #else
6374:             PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6375: #endif
6377:             PetscFPTrapPop();
6378:             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6379:             k = temp_constraints;
6380:             if (k > size_of_constraint) k = size_of_constraint;
6381:             j = 0;
6382:             while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6383:             valid_constraints = k-j;
6384:             total_counts = total_counts-temp_constraints+valid_constraints;
6385: #else
6386:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6387: #endif /* on missing GESVD */
6388:           }
6389:         }
6390:       }
6391:       /* update pointers information */
6392:       if (valid_constraints) {
6393:         constraints_n[total_counts_cc] = valid_constraints;
6394:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6395:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6396:         /* set change_of_basis flag */
6397:         if (boolforchange) {
6398:           PetscBTSet(change_basis,total_counts_cc);
6399:         }
6400:         total_counts_cc++;
6401:       }
6402:     }
6403:     /* free workspace */
6404:     if (!skip_lapack) {
6405:       PetscFree(work);
6406: #if defined(PETSC_USE_COMPLEX)
6407:       PetscFree(rwork);
6408: #endif
6409:       PetscFree(singular_vals);
6410:       PetscFree(correlation_mat);
6411:       PetscFree(temp_basis);
6412:     }
6413:     for (k=0;k<nnsp_size;k++) {
6414:       VecDestroy(&localnearnullsp[k]);
6415:     }
6416:     PetscFree(localnearnullsp);
6417:     /* free index sets of faces, edges and vertices */
6418:     for (i=0;i<n_ISForFaces;i++) {
6419:       ISDestroy(&ISForFaces[i]);
6420:     }
6421:     if (n_ISForFaces) {
6422:       PetscFree(ISForFaces);
6423:     }
6424:     for (i=0;i<n_ISForEdges;i++) {
6425:       ISDestroy(&ISForEdges[i]);
6426:     }
6427:     if (n_ISForEdges) {
6428:       PetscFree(ISForEdges);
6429:     }
6430:     ISDestroy(&ISForVertices);
6431:   } else {
6432:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6434:     total_counts = 0;
6435:     n_vertices = 0;
6436:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6437:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6438:     }
6439:     max_constraints = 0;
6440:     total_counts_cc = 0;
6441:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6442:       total_counts += pcbddc->adaptive_constraints_n[i];
6443:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6444:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6445:     }
6446:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6447:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6448:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6449:     constraints_data = pcbddc->adaptive_constraints_data;
6450:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6451:     PetscMalloc1(total_counts_cc,&constraints_n);
6452:     total_counts_cc = 0;
6453:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6454:       if (pcbddc->adaptive_constraints_n[i]) {
6455:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6456:       }
6457:     }

6459:     max_size_of_constraint = 0;
6460:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6461:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6462:     /* Change of basis */
6463:     PetscBTCreate(total_counts_cc,&change_basis);
6464:     if (pcbddc->use_change_of_basis) {
6465:       for (i=0;i<sub_schurs->n_subs;i++) {
6466:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6467:           PetscBTSet(change_basis,i+n_vertices);
6468:         }
6469:       }
6470:     }
6471:   }
6472:   pcbddc->local_primal_size = total_counts;
6473:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

6475:   /* map constraints_idxs in boundary numbering */
6476:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);

6479:   /* Create constraint matrix */
6480:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6481:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6482:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

6484:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6485:   /* determine if a QR strategy is needed for change of basis */
6486:   qr_needed = pcbddc->use_qr_single;
6487:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
6488:   total_primal_vertices=0;
6489:   pcbddc->local_primal_size_cc = 0;
6490:   for (i=0;i<total_counts_cc;i++) {
6491:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6492:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6493:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6494:       pcbddc->local_primal_size_cc += 1;
6495:     } else if (PetscBTLookup(change_basis,i)) {
6496:       for (k=0;k<constraints_n[i];k++) {
6497:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6498:       }
6499:       pcbddc->local_primal_size_cc += constraints_n[i];
6500:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6501:         PetscBTSet(qr_needed_idx,i);
6502:         qr_needed = PETSC_TRUE;
6503:       }
6504:     } else {
6505:       pcbddc->local_primal_size_cc += 1;
6506:     }
6507:   }
6508:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6509:   pcbddc->n_vertices = total_primal_vertices;
6510:   /* permute indices in order to have a sorted set of vertices */
6511:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6512:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6513:   PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);
6514:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

6516:   /* nonzero structure of constraint matrix */
6517:   /* and get reference dof for local constraints */
6518:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
6519:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

6521:   j = total_primal_vertices;
6522:   total_counts = total_primal_vertices;
6523:   cum = total_primal_vertices;
6524:   for (i=n_vertices;i<total_counts_cc;i++) {
6525:     if (!PetscBTLookup(change_basis,i)) {
6526:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6527:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6528:       cum++;
6529:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6530:       for (k=0;k<constraints_n[i];k++) {
6531:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6532:         nnz[j+k] = size_of_constraint;
6533:       }
6534:       j += constraints_n[i];
6535:     }
6536:   }
6537:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6538:   MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6539:   PetscFree(nnz);

6541:   /* set values in constraint matrix */
6542:   for (i=0;i<total_primal_vertices;i++) {
6543:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6544:   }
6545:   total_counts = total_primal_vertices;
6546:   for (i=n_vertices;i<total_counts_cc;i++) {
6547:     if (!PetscBTLookup(change_basis,i)) {
6548:       PetscInt *cols;

6550:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6551:       cols = constraints_idxs+constraints_idxs_ptr[i];
6552:       for (k=0;k<constraints_n[i];k++) {
6553:         PetscInt    row = total_counts+k;
6554:         PetscScalar *vals;

6556:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6557:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6558:       }
6559:       total_counts += constraints_n[i];
6560:     }
6561:   }
6562:   /* assembling */
6563:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6564:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6565:   MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");

6567:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6568:   if (pcbddc->use_change_of_basis) {
6569:     /* dual and primal dofs on a single cc */
6570:     PetscInt     dual_dofs,primal_dofs;
6571:     /* working stuff for GEQRF */
6572:     PetscScalar  *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6573:     PetscBLASInt lqr_work;
6574:     /* working stuff for UNGQR */
6575:     PetscScalar  *gqr_work = NULL,lgqr_work_t=0.0;
6576:     PetscBLASInt lgqr_work;
6577:     /* working stuff for TRTRS */
6578:     PetscScalar  *trs_rhs = NULL;
6579:     PetscBLASInt Blas_NRHS;
6580:     /* pointers for values insertion into change of basis matrix */
6581:     PetscInt     *start_rows,*start_cols;
6582:     PetscScalar  *start_vals;
6583:     /* working stuff for values insertion */
6584:     PetscBT      is_primal;
6585:     PetscInt     *aux_primal_numbering_B;
6586:     /* matrix sizes */
6587:     PetscInt     global_size,local_size;
6588:     /* temporary change of basis */
6589:     Mat          localChangeOfBasisMatrix;
6590:     /* extra space for debugging */
6591:     PetscScalar  *dbg_work = NULL;

6593:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6594:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6595:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6596:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6597:     /* nonzeros for local mat */
6598:     PetscMalloc1(pcis->n,&nnz);
6599:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6600:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6601:     } else {
6602:       const PetscInt *ii;
6603:       PetscInt       n;
6604:       PetscBool      flg_row;
6605:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6606:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6607:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6608:     }
6609:     for (i=n_vertices;i<total_counts_cc;i++) {
6610:       if (PetscBTLookup(change_basis,i)) {
6611:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6612:         if (PetscBTLookup(qr_needed_idx,i)) {
6613:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6614:         } else {
6615:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6616:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6617:         }
6618:       }
6619:     }
6620:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6621:     MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6622:     PetscFree(nnz);
6623:     /* Set interior change in the matrix */
6624:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6625:       for (i=0;i<pcis->n;i++) {
6626:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6627:       }
6628:     } else {
6629:       const PetscInt *ii,*jj;
6630:       PetscScalar    *aa;
6631:       PetscInt       n;
6632:       PetscBool      flg_row;
6633:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6634:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6635:       for (i=0;i<n;i++) {
6636:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6637:       }
6638:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6639:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6640:     }

6642:     if (pcbddc->dbg_flag) {
6643:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6644:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6645:     }

6647:     /* Now we loop on the constraints which need a change of basis */
6648:     /*
6649:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6650:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6652:        Basic blocks of change of basis matrix T computed by

6654:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6656:             | 1        0   ...        0         s_1/S |
6657:             | 0        1   ...        0         s_2/S |
6658:             |              ...                        |
6659:             | 0        ...            1     s_{n-1}/S |
6660:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6662:             with S = \sum_{i=1}^n s_i^2
6663:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6664:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6666:           - QR decomposition of constraints otherwise
6667:     */
6668:     if (qr_needed && max_size_of_constraint) {
6669:       /* space to store Q */
6670:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6671:       /* array to store scaling factors for reflectors */
6672:       PetscMalloc1(max_constraints,&qr_tau);
6673:       /* first we issue queries for optimal work */
6674:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6675:       PetscBLASIntCast(max_constraints,&Blas_N);
6676:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6677:       lqr_work = -1;
6678:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6680:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6681:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6682:       lgqr_work = -1;
6683:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6684:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6685:       PetscBLASIntCast(max_constraints,&Blas_K);
6686:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6687:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6688:       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6690:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6691:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6692:       /* array to store rhs and solution of triangular solver */
6693:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6694:       /* allocating workspace for check */
6695:       if (pcbddc->dbg_flag) {
6696:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6697:       }
6698:     }
6699:     /* array to store whether a node is primal or not */
6700:     PetscBTCreate(pcis->n_B,&is_primal);
6701:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6702:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6704:     for (i=0;i<total_primal_vertices;i++) {
6705:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6706:     }
6707:     PetscFree(aux_primal_numbering_B);

6709:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6710:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6711:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6712:       if (PetscBTLookup(change_basis,total_counts)) {
6713:         /* get constraint info */
6714:         primal_dofs = constraints_n[total_counts];
6715:         dual_dofs = size_of_constraint-primal_dofs;

6717:         if (pcbddc->dbg_flag) {
6718:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6719:         }

6721:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6723:           /* copy quadrature constraints for change of basis check */
6724:           if (pcbddc->dbg_flag) {
6725:             PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6726:           }
6727:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6728:           PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);

6730:           /* compute QR decomposition of constraints */
6731:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6732:           PetscBLASIntCast(primal_dofs,&Blas_N);
6733:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6734:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6735:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6737:           PetscFPTrapPop();

6739:           /* explicitly compute R^-T */
6740:           PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);
6741:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6742:           PetscBLASIntCast(primal_dofs,&Blas_N);
6743:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6744:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6745:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6746:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6747:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6749:           PetscFPTrapPop();

6751:           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6752:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6753:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6754:           PetscBLASIntCast(primal_dofs,&Blas_K);
6755:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6756:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6757:           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6759:           PetscFPTrapPop();

6761:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6762:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6763:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6764:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6765:           PetscBLASIntCast(primal_dofs,&Blas_N);
6766:           PetscBLASIntCast(primal_dofs,&Blas_K);
6767:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6768:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6769:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6770:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6771:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6772:           PetscFPTrapPop();
6773:           PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);

6775:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6776:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6777:           /* insert cols for primal dofs */
6778:           for (j=0;j<primal_dofs;j++) {
6779:             start_vals = &qr_basis[j*size_of_constraint];
6780:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6781:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6782:           }
6783:           /* insert cols for dual dofs */
6784:           for (j=0,k=0;j<dual_dofs;k++) {
6785:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6786:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6787:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6788:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6789:               j++;
6790:             }
6791:           }

6793:           /* check change of basis */
6794:           if (pcbddc->dbg_flag) {
6795:             PetscInt   ii,jj;
6796:             PetscBool valid_qr=PETSC_TRUE;
6797:             PetscBLASIntCast(primal_dofs,&Blas_M);
6798:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6799:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6800:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6801:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6802:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6803:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6804:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6805:             PetscFPTrapPop();
6806:             for (jj=0;jj<size_of_constraint;jj++) {
6807:               for (ii=0;ii<primal_dofs;ii++) {
6808:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6809:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6810:               }
6811:             }
6812:             if (!valid_qr) {
6813:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6814:               for (jj=0;jj<size_of_constraint;jj++) {
6815:                 for (ii=0;ii<primal_dofs;ii++) {
6816:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6817:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6818:                   }
6819:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6820:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6821:                   }
6822:                 }
6823:               }
6824:             } else {
6825:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6826:             }
6827:           }
6828:         } else { /* simple transformation block */
6829:           PetscInt    row,col;
6830:           PetscScalar val,norm;

6832:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6833:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6834:           for (j=0;j<size_of_constraint;j++) {
6835:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6836:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6837:             if (!PetscBTLookup(is_primal,row_B)) {
6838:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6839:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6840:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6841:             } else {
6842:               for (k=0;k<size_of_constraint;k++) {
6843:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6844:                 if (row != col) {
6845:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6846:                 } else {
6847:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6848:                 }
6849:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6850:               }
6851:             }
6852:           }
6853:           if (pcbddc->dbg_flag) {
6854:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6855:           }
6856:         }
6857:       } else {
6858:         if (pcbddc->dbg_flag) {
6859:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6860:         }
6861:       }
6862:     }

6864:     /* free workspace */
6865:     if (qr_needed) {
6866:       if (pcbddc->dbg_flag) {
6867:         PetscFree(dbg_work);
6868:       }
6869:       PetscFree(trs_rhs);
6870:       PetscFree(qr_tau);
6871:       PetscFree(qr_work);
6872:       PetscFree(gqr_work);
6873:       PetscFree(qr_basis);
6874:     }
6875:     PetscBTDestroy(&is_primal);
6876:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6877:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6879:     /* assembling of global change of variable */
6880:     if (!pcbddc->fake_change) {
6881:       Mat      tmat;
6882:       PetscInt bs;

6884:       VecGetSize(pcis->vec1_global,&global_size);
6885:       VecGetLocalSize(pcis->vec1_global,&local_size);
6886:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6887:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6888:       MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6889:       MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6890:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6891:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6892:       MatGetBlockSize(pc->pmat,&bs);
6893:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6894:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6895:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6896:       MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6897:       MatDestroy(&tmat);
6898:       VecSet(pcis->vec1_global,0.0);
6899:       VecSet(pcis->vec1_N,1.0);
6900:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6901:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6902:       VecReciprocal(pcis->vec1_global);
6903:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6905:       /* check */
6906:       if (pcbddc->dbg_flag) {
6907:         PetscReal error;
6908:         Vec       x,x_change;

6910:         VecDuplicate(pcis->vec1_global,&x);
6911:         VecDuplicate(pcis->vec1_global,&x_change);
6912:         VecSetRandom(x,NULL);
6913:         VecCopy(x,pcis->vec1_global);
6914:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6915:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6916:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6917:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6918:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6919:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6920:         VecAXPY(x,-1.0,x_change);
6921:         VecNorm(x,NORM_INFINITY,&error);
6922:         if (error > PETSC_SMALL) {
6923:           SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6924:         }
6925:         VecDestroy(&x);
6926:         VecDestroy(&x_change);
6927:       }
6928:       /* adapt sub_schurs computed (if any) */
6929:       if (pcbddc->use_deluxe_scaling) {
6930:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6933:         if (sub_schurs && sub_schurs->S_Ej_all) {
6934:           Mat                    S_new,tmat;
6935:           IS                     is_all_N,is_V_Sall = NULL;

6937:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6938:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6939:           if (pcbddc->deluxe_zerorows) {
6940:             ISLocalToGlobalMapping NtoSall;
6941:             IS                     is_V;
6942:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6943:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6944:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6945:             ISLocalToGlobalMappingDestroy(&NtoSall);
6946:             ISDestroy(&is_V);
6947:           }
6948:           ISDestroy(&is_all_N);
6949:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6950:           MatDestroy(&sub_schurs->S_Ej_all);
6951:           PetscObjectReference((PetscObject)S_new);
6952:           if (pcbddc->deluxe_zerorows) {
6953:             const PetscScalar *array;
6954:             const PetscInt    *idxs_V,*idxs_all;
6955:             PetscInt          i,n_V;

6957:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6958:             ISGetLocalSize(is_V_Sall,&n_V);
6959:             ISGetIndices(is_V_Sall,&idxs_V);
6960:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6961:             VecGetArrayRead(pcis->D,&array);
6962:             for (i=0;i<n_V;i++) {
6963:               PetscScalar val;
6964:               PetscInt    idx;

6966:               idx = idxs_V[i];
6967:               val = array[idxs_all[idxs_V[i]]];
6968:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6969:             }
6970:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6971:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6972:             VecRestoreArrayRead(pcis->D,&array);
6973:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6974:             ISRestoreIndices(is_V_Sall,&idxs_V);
6975:           }
6976:           sub_schurs->S_Ej_all = S_new;
6977:           MatDestroy(&S_new);
6978:           if (sub_schurs->sum_S_Ej_all) {
6979:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6980:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6981:             PetscObjectReference((PetscObject)S_new);
6982:             if (pcbddc->deluxe_zerorows) {
6983:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6984:             }
6985:             sub_schurs->sum_S_Ej_all = S_new;
6986:             MatDestroy(&S_new);
6987:           }
6988:           ISDestroy(&is_V_Sall);
6989:           MatDestroy(&tmat);
6990:         }
6991:         /* destroy any change of basis context in sub_schurs */
6992:         if (sub_schurs && sub_schurs->change) {
6993:           PetscInt i;

6995:           for (i=0;i<sub_schurs->n_subs;i++) {
6996:             KSPDestroy(&sub_schurs->change[i]);
6997:           }
6998:           PetscFree(sub_schurs->change);
6999:         }
7000:       }
7001:       if (pcbddc->switch_static) { /* need to save the local change */
7002:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7003:       } else {
7004:         MatDestroy(&localChangeOfBasisMatrix);
7005:       }
7006:       /* determine if any process has changed the pressures locally */
7007:       pcbddc->change_interior = pcbddc->benign_have_null;
7008:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7009:       MatDestroy(&pcbddc->ConstraintMatrix);
7010:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7011:       pcbddc->use_qr_single = qr_needed;
7012:     }
7013:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7014:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7015:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
7016:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7017:     } else {
7018:       Mat benign_global = NULL;
7019:       if (pcbddc->benign_have_null) {
7020:         Mat M;

7022:         pcbddc->change_interior = PETSC_TRUE;
7023:         VecCopy(matis->counter,pcis->vec1_N);
7024:         VecReciprocal(pcis->vec1_N);
7025:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
7026:         if (pcbddc->benign_change) {
7027:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
7028:           MatDiagonalScale(M,pcis->vec1_N,NULL);
7029:         } else {
7030:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
7031:           MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
7032:         }
7033:         MatISSetLocalMat(benign_global,M);
7034:         MatDestroy(&M);
7035:         MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
7036:         MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
7037:       }
7038:       if (pcbddc->user_ChangeOfBasisMatrix) {
7039:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
7040:         MatDestroy(&benign_global);
7041:       } else if (pcbddc->benign_have_null) {
7042:         pcbddc->ChangeOfBasisMatrix = benign_global;
7043:       }
7044:     }
7045:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7046:       IS             is_global;
7047:       const PetscInt *gidxs;

7049:       ISLocalToGlobalMappingGetIndices(matis->rmapping,&gidxs);
7050:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
7051:       ISLocalToGlobalMappingRestoreIndices(matis->rmapping,&gidxs);
7052:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7053:       ISDestroy(&is_global);
7054:     }
7055:   }
7056:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7057:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7058:   }

7060:   if (!pcbddc->fake_change) {
7061:     /* add pressure dofs to set of primal nodes for numbering purposes */
7062:     for (i=0;i<pcbddc->benign_n;i++) {
7063:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7064:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7065:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7066:       pcbddc->local_primal_size_cc++;
7067:       pcbddc->local_primal_size++;
7068:     }

7070:     /* check if a new primal space has been introduced (also take into account benign trick) */
7071:     pcbddc->new_primal_space_local = PETSC_TRUE;
7072:     if (olocal_primal_size == pcbddc->local_primal_size) {
7073:       PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7074:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7075:       if (!pcbddc->new_primal_space_local) {
7076:         PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7077:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7078:       }
7079:     }
7080:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7081:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7082:   }
7083:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

7085:   /* flush dbg viewer */
7086:   if (pcbddc->dbg_flag) {
7087:     PetscViewerFlush(pcbddc->dbg_viewer);
7088:   }

7090:   /* free workspace */
7091:   PetscBTDestroy(&qr_needed_idx);
7092:   PetscBTDestroy(&change_basis);
7093:   if (!pcbddc->adaptive_selection) {
7094:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7095:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7096:   } else {
7097:     PetscFree5(pcbddc->adaptive_constraints_n,
7098:                       pcbddc->adaptive_constraints_idxs_ptr,
7099:                       pcbddc->adaptive_constraints_data_ptr,
7100:                       pcbddc->adaptive_constraints_idxs,
7101:                       pcbddc->adaptive_constraints_data);
7102:     PetscFree(constraints_n);
7103:     PetscFree(constraints_idxs_B);
7104:   }
7105:   return 0;
7106: }

7108: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7109: {
7110:   ISLocalToGlobalMapping map;
7111:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7112:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
7113:   PetscInt               i,N;
7114:   PetscBool              rcsr = PETSC_FALSE;

7116:   if (pcbddc->recompute_topography) {
7117:     pcbddc->graphanalyzed = PETSC_FALSE;
7118:     /* Reset previously computed graph */
7119:     PCBDDCGraphReset(pcbddc->mat_graph);
7120:     /* Init local Graph struct */
7121:     MatGetSize(pc->pmat,&N,NULL);
7122:     MatISGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7123:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

7125:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7126:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7127:     }
7128:     /* Check validity of the csr graph passed in by the user */

7131:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7132:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7133:       PetscInt  *xadj,*adjncy;
7134:       PetscInt  nvtxs;
7135:       PetscBool flg_row=PETSC_FALSE;

7137:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7138:       if (flg_row) {
7139:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7140:         pcbddc->computed_rowadj = PETSC_TRUE;
7141:       }
7142:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7143:       rcsr = PETSC_TRUE;
7144:     }
7145:     if (pcbddc->dbg_flag) {
7146:       PetscViewerFlush(pcbddc->dbg_viewer);
7147:     }

7149:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7150:       PetscReal    *lcoords;
7151:       PetscInt     n;
7152:       MPI_Datatype dimrealtype;

7154:       /* TODO: support for blocked */
7156:       MatGetLocalSize(matis->A,&n,NULL);
7157:       PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7158:       MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7159:       MPI_Type_commit(&dimrealtype);
7160:       PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7161:       PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7162:       MPI_Type_free(&dimrealtype);
7163:       PetscFree(pcbddc->mat_graph->coords);

7165:       pcbddc->mat_graph->coords = lcoords;
7166:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7167:       pcbddc->mat_graph->cnloc  = n;
7168:     }
7170:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);

7172:     /* Setup of Graph */
7173:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7174:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

7176:     /* attach info on disconnected subdomains if present */
7177:     if (pcbddc->n_local_subs) {
7178:       PetscInt *local_subs,n,totn;

7180:       MatGetLocalSize(matis->A,&n,NULL);
7181:       PetscMalloc1(n,&local_subs);
7182:       for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7183:       for (i=0;i<pcbddc->n_local_subs;i++) {
7184:         const PetscInt *idxs;
7185:         PetscInt       nl,j;

7187:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
7188:         ISGetIndices(pcbddc->local_subs[i],&idxs);
7189:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7190:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7191:       }
7192:       for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7193:       pcbddc->mat_graph->n_local_subs = totn + 1;
7194:       pcbddc->mat_graph->local_subs = local_subs;
7195:     }
7196:   }

7198:   if (!pcbddc->graphanalyzed) {
7199:     /* Graph's connected components analysis */
7200:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7201:     pcbddc->graphanalyzed = PETSC_TRUE;
7202:     pcbddc->corner_selected = pcbddc->corner_selection;
7203:   }
7204:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7205:   return 0;
7206: }

7208: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7209: {
7210:   PetscInt       i,j,n;
7211:   PetscScalar    *alphas;
7212:   PetscReal      norm,*onorms;

7214:   n = *nio;
7215:   if (!n) return 0;
7216:   PetscMalloc2(n,&alphas,n,&onorms);
7217:   VecNormalize(vecs[0],&norm);
7218:   if (norm < PETSC_SMALL) {
7219:     onorms[0] = 0.0;
7220:     VecSet(vecs[0],0.0);
7221:   } else {
7222:     onorms[0] = norm;
7223:   }

7225:   for (i=1;i<n;i++) {
7226:     VecMDot(vecs[i],i,vecs,alphas);
7227:     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7228:     VecMAXPY(vecs[i],i,alphas,vecs);
7229:     VecNormalize(vecs[i],&norm);
7230:     if (norm < PETSC_SMALL) {
7231:       onorms[i] = 0.0;
7232:       VecSet(vecs[i],0.0);
7233:     } else {
7234:       onorms[i] = norm;
7235:     }
7236:   }
7237:   /* push nonzero vectors at the beginning */
7238:   for (i=0;i<n;i++) {
7239:     if (onorms[i] == 0.0) {
7240:       for (j=i+1;j<n;j++) {
7241:         if (onorms[j] != 0.0) {
7242:           VecCopy(vecs[j],vecs[i]);
7243:           onorms[j] = 0.0;
7244:         }
7245:       }
7246:     }
7247:   }
7248:   for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7249:   PetscFree2(alphas,onorms);
7250:   return 0;
7251: }

7253: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7254: {
7255:   ISLocalToGlobalMapping mapping;
7256:   Mat                    A;
7257:   PetscInt               n_neighs,*neighs,*n_shared,**shared;
7258:   PetscMPIInt            size,rank,color;
7259:   PetscInt               *xadj,*adjncy;
7260:   PetscInt               *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7261:   PetscInt               im_active,active_procs,N,n,i,j,threshold = 2;
7262:   PetscInt               void_procs,*procs_candidates = NULL;
7263:   PetscInt               xadj_count,*count;
7264:   PetscBool              ismatis,use_vwgt=PETSC_FALSE;
7265:   PetscSubcomm           psubcomm;
7266:   MPI_Comm               subcomm;

7269:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);

7275:   if (have_void) *have_void = PETSC_FALSE;
7276:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7277:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7278:   MatISGetLocalMat(mat,&A);
7279:   MatGetLocalSize(A,&n,NULL);
7280:   im_active = !!n;
7281:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7282:   void_procs = size - active_procs;
7283:   /* get ranks of of non-active processes in mat communicator */
7284:   if (void_procs) {
7285:     PetscInt ncand;

7287:     if (have_void) *have_void = PETSC_TRUE;
7288:     PetscMalloc1(size,&procs_candidates);
7289:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7290:     for (i=0,ncand=0;i<size;i++) {
7291:       if (!procs_candidates[i]) {
7292:         procs_candidates[ncand++] = i;
7293:       }
7294:     }
7295:     /* force n_subdomains to be not greater that the number of non-active processes */
7296:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7297:   }

7299:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7300:      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7301:   MatGetSize(mat,&N,NULL);
7302:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7303:     PetscInt issize,isidx,dest;
7304:     if (*n_subdomains == 1) dest = 0;
7305:     else dest = rank;
7306:     if (im_active) {
7307:       issize = 1;
7308:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7309:         isidx = procs_candidates[dest];
7310:       } else {
7311:         isidx = dest;
7312:       }
7313:     } else {
7314:       issize = 0;
7315:       isidx = -1;
7316:     }
7317:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7318:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7319:     PetscFree(procs_candidates);
7320:     return 0;
7321:   }
7322:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7323:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7324:   threshold = PetscMax(threshold,2);

7326:   /* Get info on mapping */
7327:   MatISGetLocalToGlobalMapping(mat,&mapping,NULL);
7328:   ISLocalToGlobalMappingGetInfo(mapping,&n_neighs,&neighs,&n_shared,&shared);

7330:   /* build local CSR graph of subdomains' connectivity */
7331:   PetscMalloc1(2,&xadj);
7332:   xadj[0] = 0;
7333:   xadj[1] = PetscMax(n_neighs-1,0);
7334:   PetscMalloc1(xadj[1],&adjncy);
7335:   PetscMalloc1(xadj[1],&adjncy_wgt);
7336:   PetscCalloc1(n,&count);
7337:   for (i=1;i<n_neighs;i++)
7338:     for (j=0;j<n_shared[i];j++)
7339:       count[shared[i][j]] += 1;

7341:   xadj_count = 0;
7342:   for (i=1;i<n_neighs;i++) {
7343:     for (j=0;j<n_shared[i];j++) {
7344:       if (count[shared[i][j]] < threshold) {
7345:         adjncy[xadj_count] = neighs[i];
7346:         adjncy_wgt[xadj_count] = n_shared[i];
7347:         xadj_count++;
7348:         break;
7349:       }
7350:     }
7351:   }
7352:   xadj[1] = xadj_count;
7353:   PetscFree(count);
7354:   ISLocalToGlobalMappingRestoreInfo(mapping,&n_neighs,&neighs,&n_shared,&shared);
7355:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

7357:   PetscMalloc1(1,&ranks_send_to_idx);

7359:   /* Restrict work on active processes only */
7360:   PetscMPIIntCast(im_active,&color);
7361:   if (void_procs) {
7362:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7363:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7364:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7365:     subcomm = PetscSubcommChild(psubcomm);
7366:   } else {
7367:     psubcomm = NULL;
7368:     subcomm = PetscObjectComm((PetscObject)mat);
7369:   }

7371:   v_wgt = NULL;
7372:   if (!color) {
7373:     PetscFree(xadj);
7374:     PetscFree(adjncy);
7375:     PetscFree(adjncy_wgt);
7376:   } else {
7377:     Mat             subdomain_adj;
7378:     IS              new_ranks,new_ranks_contig;
7379:     MatPartitioning partitioner;
7380:     PetscInt        rstart=0,rend=0;
7381:     PetscInt        *is_indices,*oldranks;
7382:     PetscMPIInt     size;
7383:     PetscBool       aggregate;

7385:     MPI_Comm_size(subcomm,&size);
7386:     if (void_procs) {
7387:       PetscInt prank = rank;
7388:       PetscMalloc1(size,&oldranks);
7389:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7390:       for (i=0;i<xadj[1];i++) {
7391:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7392:       }
7393:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7394:     } else {
7395:       oldranks = NULL;
7396:     }
7397:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7398:     if (aggregate) { /* TODO: all this part could be made more efficient */
7399:       PetscInt    lrows,row,ncols,*cols;
7400:       PetscMPIInt nrank;
7401:       PetscScalar *vals;

7403:       MPI_Comm_rank(subcomm,&nrank);
7404:       lrows = 0;
7405:       if (nrank<redprocs) {
7406:         lrows = size/redprocs;
7407:         if (nrank<size%redprocs) lrows++;
7408:       }
7409:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7410:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7411:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7412:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7413:       row = nrank;
7414:       ncols = xadj[1]-xadj[0];
7415:       cols = adjncy;
7416:       PetscMalloc1(ncols,&vals);
7417:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7418:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7419:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7420:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7421:       PetscFree(xadj);
7422:       PetscFree(adjncy);
7423:       PetscFree(adjncy_wgt);
7424:       PetscFree(vals);
7425:       if (use_vwgt) {
7426:         Vec               v;
7427:         const PetscScalar *array;
7428:         PetscInt          nl;

7430:         MatCreateVecs(subdomain_adj,&v,NULL);
7431:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7432:         VecAssemblyBegin(v);
7433:         VecAssemblyEnd(v);
7434:         VecGetLocalSize(v,&nl);
7435:         VecGetArrayRead(v,&array);
7436:         PetscMalloc1(nl,&v_wgt);
7437:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7438:         VecRestoreArrayRead(v,&array);
7439:         VecDestroy(&v);
7440:       }
7441:     } else {
7442:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7443:       if (use_vwgt) {
7444:         PetscMalloc1(1,&v_wgt);
7445:         v_wgt[0] = n;
7446:       }
7447:     }
7448:     /* MatView(subdomain_adj,0); */

7450:     /* Partition */
7451:     MatPartitioningCreate(subcomm,&partitioner);
7452: #if defined(PETSC_HAVE_PTSCOTCH)
7453:     MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7454: #elif defined(PETSC_HAVE_PARMETIS)
7455:     MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7456: #else
7457:     MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7458: #endif
7459:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7460:     if (v_wgt) {
7461:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
7462:     }
7463:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7464:     MatPartitioningSetNParts(partitioner,*n_subdomains);
7465:     MatPartitioningSetFromOptions(partitioner);
7466:     MatPartitioningApply(partitioner,&new_ranks);
7467:     /* MatPartitioningView(partitioner,0); */

7469:     /* renumber new_ranks to avoid "holes" in new set of processors */
7470:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7471:     ISDestroy(&new_ranks);
7472:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7473:     if (!aggregate) {
7474:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7475:         PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7476:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7477:       } else if (oldranks) {
7478:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7479:       } else {
7480:         ranks_send_to_idx[0] = is_indices[0];
7481:       }
7482:     } else {
7483:       PetscInt    idx = 0;
7484:       PetscMPIInt tag;
7485:       MPI_Request *reqs;

7487:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7488:       PetscMalloc1(rend-rstart,&reqs);
7489:       for (i=rstart;i<rend;i++) {
7490:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7491:       }
7492:       MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7493:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7494:       PetscFree(reqs);
7495:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7496:         PetscAssert(oldranks,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7497:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7498:       } else if (oldranks) {
7499:         ranks_send_to_idx[0] = oldranks[idx];
7500:       } else {
7501:         ranks_send_to_idx[0] = idx;
7502:       }
7503:     }
7504:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7505:     /* clean up */
7506:     PetscFree(oldranks);
7507:     ISDestroy(&new_ranks_contig);
7508:     MatDestroy(&subdomain_adj);
7509:     MatPartitioningDestroy(&partitioner);
7510:   }
7511:   PetscSubcommDestroy(&psubcomm);
7512:   PetscFree(procs_candidates);

7514:   /* assemble parallel IS for sends */
7515:   i = 1;
7516:   if (!color) i=0;
7517:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7518:   return 0;
7519: }

7521: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

7523: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7524: {
7525:   Mat                    local_mat;
7526:   IS                     is_sends_internal;
7527:   PetscInt               rows,cols,new_local_rows;
7528:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7529:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7530:   ISLocalToGlobalMapping l2gmap;
7531:   PetscInt*              l2gmap_indices;
7532:   const PetscInt*        is_indices;
7533:   MatType                new_local_type;
7534:   /* buffers */
7535:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7536:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7537:   PetscInt               *recv_buffer_idxs_local;
7538:   PetscScalar            *ptr_vals,*recv_buffer_vals;
7539:   const PetscScalar      *send_buffer_vals;
7540:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7541:   /* MPI */
7542:   MPI_Comm               comm,comm_n;
7543:   PetscSubcomm           subcomm;
7544:   PetscMPIInt            n_sends,n_recvs,size;
7545:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7546:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7547:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7548:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7549:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;

7552:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7560:   if (nvecs) {
7563:   }
7564:   /* further checks */
7565:   MatISGetLocalMat(mat,&local_mat);
7566:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7568:   MatGetSize(local_mat,&rows,&cols);
7570:   if (reuse && *mat_n) {
7571:     PetscInt mrows,mcols,mnrows,mncols;
7573:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7575:     MatGetSize(mat,&mrows,&mcols);
7576:     MatGetSize(*mat_n,&mnrows,&mncols);
7579:   }
7580:   MatGetBlockSize(local_mat,&bs);

7583:   /* prepare IS for sending if not provided */
7584:   if (!is_sends) {
7586:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7587:   } else {
7588:     PetscObjectReference((PetscObject)is_sends);
7589:     is_sends_internal = is_sends;
7590:   }

7592:   /* get comm */
7593:   PetscObjectGetComm((PetscObject)mat,&comm);

7595:   /* compute number of sends */
7596:   ISGetLocalSize(is_sends_internal,&i);
7597:   PetscMPIIntCast(i,&n_sends);

7599:   /* compute number of receives */
7600:   MPI_Comm_size(comm,&size);
7601:   PetscMalloc1(size,&iflags);
7602:   PetscArrayzero(iflags,size);
7603:   ISGetIndices(is_sends_internal,&is_indices);
7604:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7605:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7606:   PetscFree(iflags);

7608:   /* restrict comm if requested */
7609:   subcomm = NULL;
7610:   destroy_mat = PETSC_FALSE;
7611:   if (restrict_comm) {
7612:     PetscMPIInt color,subcommsize;

7614:     color = 0;
7615:     if (restrict_full) {
7616:       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7617:     } else {
7618:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7619:     }
7620:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7621:     subcommsize = size - subcommsize;
7622:     /* check if reuse has been requested */
7623:     if (reuse) {
7624:       if (*mat_n) {
7625:         PetscMPIInt subcommsize2;
7626:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7628:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7629:       } else {
7630:         comm_n = PETSC_COMM_SELF;
7631:       }
7632:     } else { /* MAT_INITIAL_MATRIX */
7633:       PetscMPIInt rank;

7635:       MPI_Comm_rank(comm,&rank);
7636:       PetscSubcommCreate(comm,&subcomm);
7637:       PetscSubcommSetNumber(subcomm,2);
7638:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7639:       comm_n = PetscSubcommChild(subcomm);
7640:     }
7641:     /* flag to destroy *mat_n if not significative */
7642:     if (color) destroy_mat = PETSC_TRUE;
7643:   } else {
7644:     comm_n = comm;
7645:   }

7647:   /* prepare send/receive buffers */
7648:   PetscMalloc1(size,&ilengths_idxs);
7649:   PetscArrayzero(ilengths_idxs,size);
7650:   PetscMalloc1(size,&ilengths_vals);
7651:   PetscArrayzero(ilengths_vals,size);
7652:   if (nis) {
7653:     PetscCalloc1(size,&ilengths_idxs_is);
7654:   }

7656:   /* Get data from local matrices */
7658:     /* TODO: See below some guidelines on how to prepare the local buffers */
7659:     /*
7660:        send_buffer_vals should contain the raw values of the local matrix
7661:        send_buffer_idxs should contain:
7662:        - MatType_PRIVATE type
7663:        - PetscInt        size_of_l2gmap
7664:        - PetscInt        global_row_indices[size_of_l2gmap]
7665:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7666:     */
7667:   {
7668:     ISLocalToGlobalMapping mapping;

7670:     MatISGetLocalToGlobalMapping(mat,&mapping,NULL);
7671:     MatDenseGetArrayRead(local_mat,&send_buffer_vals);
7672:     ISLocalToGlobalMappingGetSize(mapping,&i);
7673:     PetscMalloc1(i+2,&send_buffer_idxs);
7674:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7675:     send_buffer_idxs[1] = i;
7676:     ISLocalToGlobalMappingGetIndices(mapping,(const PetscInt**)&ptr_idxs);
7677:     PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);
7678:     ISLocalToGlobalMappingRestoreIndices(mapping,(const PetscInt**)&ptr_idxs);
7679:     PetscMPIIntCast(i,&len);
7680:     for (i=0;i<n_sends;i++) {
7681:       ilengths_vals[is_indices[i]] = len*len;
7682:       ilengths_idxs[is_indices[i]] = len+2;
7683:     }
7684:   }
7685:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7686:   /* additional is (if any) */
7687:   if (nis) {
7688:     PetscMPIInt psum;
7689:     PetscInt j;
7690:     for (j=0,psum=0;j<nis;j++) {
7691:       PetscInt plen;
7692:       ISGetLocalSize(isarray[j],&plen);
7693:       PetscMPIIntCast(plen,&len);
7694:       psum += len+1; /* indices + length */
7695:     }
7696:     PetscMalloc1(psum,&send_buffer_idxs_is);
7697:     for (j=0,psum=0;j<nis;j++) {
7698:       PetscInt plen;
7699:       const PetscInt *is_array_idxs;
7700:       ISGetLocalSize(isarray[j],&plen);
7701:       send_buffer_idxs_is[psum] = plen;
7702:       ISGetIndices(isarray[j],&is_array_idxs);
7703:       PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);
7704:       ISRestoreIndices(isarray[j],&is_array_idxs);
7705:       psum += plen+1; /* indices + length */
7706:     }
7707:     for (i=0;i<n_sends;i++) {
7708:       ilengths_idxs_is[is_indices[i]] = psum;
7709:     }
7710:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7711:   }
7712:   MatISRestoreLocalMat(mat,&local_mat);

7714:   buf_size_idxs = 0;
7715:   buf_size_vals = 0;
7716:   buf_size_idxs_is = 0;
7717:   buf_size_vecs = 0;
7718:   for (i=0;i<n_recvs;i++) {
7719:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7720:     buf_size_vals += (PetscInt)olengths_vals[i];
7721:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7722:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7723:   }
7724:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7725:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7726:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7727:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7729:   /* get new tags for clean communications */
7730:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7731:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7732:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7733:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7735:   /* allocate for requests */
7736:   PetscMalloc1(n_sends,&send_req_idxs);
7737:   PetscMalloc1(n_sends,&send_req_vals);
7738:   PetscMalloc1(n_sends,&send_req_idxs_is);
7739:   PetscMalloc1(n_sends,&send_req_vecs);
7740:   PetscMalloc1(n_recvs,&recv_req_idxs);
7741:   PetscMalloc1(n_recvs,&recv_req_vals);
7742:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7743:   PetscMalloc1(n_recvs,&recv_req_vecs);

7745:   /* communications */
7746:   ptr_idxs = recv_buffer_idxs;
7747:   ptr_vals = recv_buffer_vals;
7748:   ptr_idxs_is = recv_buffer_idxs_is;
7749:   ptr_vecs = recv_buffer_vecs;
7750:   for (i=0;i<n_recvs;i++) {
7751:     source_dest = onodes[i];
7752:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7753:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7754:     ptr_idxs += olengths_idxs[i];
7755:     ptr_vals += olengths_vals[i];
7756:     if (nis) {
7757:       source_dest = onodes_is[i];
7758:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7759:       ptr_idxs_is += olengths_idxs_is[i];
7760:     }
7761:     if (nvecs) {
7762:       source_dest = onodes[i];
7763:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7764:       ptr_vecs += olengths_idxs[i]-2;
7765:     }
7766:   }
7767:   for (i=0;i<n_sends;i++) {
7768:     PetscMPIIntCast(is_indices[i],&source_dest);
7769:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7770:     MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7771:     if (nis) {
7772:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7773:     }
7774:     if (nvecs) {
7775:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7776:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7777:     }
7778:   }
7779:   ISRestoreIndices(is_sends_internal,&is_indices);
7780:   ISDestroy(&is_sends_internal);

7782:   /* assemble new l2g map */
7783:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7784:   ptr_idxs = recv_buffer_idxs;
7785:   new_local_rows = 0;
7786:   for (i=0;i<n_recvs;i++) {
7787:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7788:     ptr_idxs += olengths_idxs[i];
7789:   }
7790:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7791:   ptr_idxs = recv_buffer_idxs;
7792:   new_local_rows = 0;
7793:   for (i=0;i<n_recvs;i++) {
7794:     PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));
7795:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7796:     ptr_idxs += olengths_idxs[i];
7797:   }
7798:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7799:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7800:   PetscFree(l2gmap_indices);

7802:   /* infer new local matrix type from received local matrices type */
7803:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7804:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7805:   if (n_recvs) {
7806:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7807:     ptr_idxs = recv_buffer_idxs;
7808:     for (i=0;i<n_recvs;i++) {
7809:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7810:         new_local_type_private = MATAIJ_PRIVATE;
7811:         break;
7812:       }
7813:       ptr_idxs += olengths_idxs[i];
7814:     }
7815:     switch (new_local_type_private) {
7816:       case MATDENSE_PRIVATE:
7817:         new_local_type = MATSEQAIJ;
7818:         bs = 1;
7819:         break;
7820:       case MATAIJ_PRIVATE:
7821:         new_local_type = MATSEQAIJ;
7822:         bs = 1;
7823:         break;
7824:       case MATBAIJ_PRIVATE:
7825:         new_local_type = MATSEQBAIJ;
7826:         break;
7827:       case MATSBAIJ_PRIVATE:
7828:         new_local_type = MATSEQSBAIJ;
7829:         break;
7830:       default:
7831:         SETERRQ(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7832:     }
7833:   } else { /* by default, new_local_type is seqaij */
7834:     new_local_type = MATSEQAIJ;
7835:     bs = 1;
7836:   }

7838:   /* create MATIS object if needed */
7839:   if (!reuse) {
7840:     MatGetSize(mat,&rows,&cols);
7841:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n);
7842:   } else {
7843:     /* it also destroys the local matrices */
7844:     if (*mat_n) {
7845:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7846:     } else { /* this is a fake object */
7847:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,l2gmap,mat_n);
7848:     }
7849:   }
7850:   MatISGetLocalMat(*mat_n,&local_mat);
7851:   MatSetType(local_mat,new_local_type);

7853:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7855:   /* Global to local map of received indices */
7856:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7857:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7858:   ISLocalToGlobalMappingDestroy(&l2gmap);

7860:   /* restore attributes -> type of incoming data and its size */
7861:   buf_size_idxs = 0;
7862:   for (i=0;i<n_recvs;i++) {
7863:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7864:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7865:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7866:   }
7867:   PetscFree(recv_buffer_idxs);

7869:   /* set preallocation */
7870:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7871:   if (!newisdense) {
7872:     PetscInt *new_local_nnz=NULL;

7874:     ptr_idxs = recv_buffer_idxs_local;
7875:     if (n_recvs) {
7876:       PetscCalloc1(new_local_rows,&new_local_nnz);
7877:     }
7878:     for (i=0;i<n_recvs;i++) {
7879:       PetscInt j;
7880:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7881:         for (j=0;j<*(ptr_idxs+1);j++) {
7882:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7883:         }
7884:       } else {
7885:         /* TODO */
7886:       }
7887:       ptr_idxs += olengths_idxs[i];
7888:     }
7889:     if (new_local_nnz) {
7890:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7891:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7892:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7893:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7894:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7895:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7896:     } else {
7897:       MatSetUp(local_mat);
7898:     }
7899:     PetscFree(new_local_nnz);
7900:   } else {
7901:     MatSetUp(local_mat);
7902:   }

7904:   /* set values */
7905:   ptr_vals = recv_buffer_vals;
7906:   ptr_idxs = recv_buffer_idxs_local;
7907:   for (i=0;i<n_recvs;i++) {
7908:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7909:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7910:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7911:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7912:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7913:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7914:     } else {
7915:       /* TODO */
7916:     }
7917:     ptr_idxs += olengths_idxs[i];
7918:     ptr_vals += olengths_vals[i];
7919:   }
7920:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7921:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7922:   MatISRestoreLocalMat(*mat_n,&local_mat);
7923:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7924:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7925:   PetscFree(recv_buffer_vals);

7927: #if 0
7928:   if (!restrict_comm) { /* check */
7929:     Vec       lvec,rvec;
7930:     PetscReal infty_error;

7932:     MatCreateVecs(mat,&rvec,&lvec);
7933:     VecSetRandom(rvec,NULL);
7934:     MatMult(mat,rvec,lvec);
7935:     VecScale(lvec,-1.0);
7936:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7937:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7938:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7939:     VecDestroy(&rvec);
7940:     VecDestroy(&lvec);
7941:   }
7942: #endif

7944:   /* assemble new additional is (if any) */
7945:   if (nis) {
7946:     PetscInt **temp_idxs,*count_is,j,psum;

7948:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7949:     PetscCalloc1(nis,&count_is);
7950:     ptr_idxs = recv_buffer_idxs_is;
7951:     psum = 0;
7952:     for (i=0;i<n_recvs;i++) {
7953:       for (j=0;j<nis;j++) {
7954:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7955:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7956:         psum += plen;
7957:         ptr_idxs += plen+1; /* shift pointer to received data */
7958:       }
7959:     }
7960:     PetscMalloc1(nis,&temp_idxs);
7961:     PetscMalloc1(psum,&temp_idxs[0]);
7962:     for (i=1;i<nis;i++) {
7963:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7964:     }
7965:     PetscArrayzero(count_is,nis);
7966:     ptr_idxs = recv_buffer_idxs_is;
7967:     for (i=0;i<n_recvs;i++) {
7968:       for (j=0;j<nis;j++) {
7969:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7970:         PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);
7971:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7972:         ptr_idxs += plen+1; /* shift pointer to received data */
7973:       }
7974:     }
7975:     for (i=0;i<nis;i++) {
7976:       ISDestroy(&isarray[i]);
7977:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7978:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7979:     }
7980:     PetscFree(count_is);
7981:     PetscFree(temp_idxs[0]);
7982:     PetscFree(temp_idxs);
7983:   }
7984:   /* free workspace */
7985:   PetscFree(recv_buffer_idxs_is);
7986:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7987:   PetscFree(send_buffer_idxs);
7988:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7989:   if (isdense) {
7990:     MatISGetLocalMat(mat,&local_mat);
7991:     MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);
7992:     MatISRestoreLocalMat(mat,&local_mat);
7993:   } else {
7994:     /* PetscFree(send_buffer_vals); */
7995:   }
7996:   if (nis) {
7997:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7998:     PetscFree(send_buffer_idxs_is);
7999:   }

8001:   if (nvecs) {
8002:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
8003:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
8004:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8005:     VecDestroy(&nnsp_vec[0]);
8006:     VecCreate(comm_n,&nnsp_vec[0]);
8007:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
8008:     VecSetType(nnsp_vec[0],VECSTANDARD);
8009:     /* set values */
8010:     ptr_vals = recv_buffer_vecs;
8011:     ptr_idxs = recv_buffer_idxs_local;
8012:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
8013:     for (i=0;i<n_recvs;i++) {
8014:       PetscInt j;
8015:       for (j=0;j<*(ptr_idxs+1);j++) {
8016:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8017:       }
8018:       ptr_idxs += olengths_idxs[i];
8019:       ptr_vals += olengths_idxs[i]-2;
8020:     }
8021:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8022:     VecAssemblyBegin(nnsp_vec[0]);
8023:     VecAssemblyEnd(nnsp_vec[0]);
8024:   }

8026:   PetscFree(recv_buffer_vecs);
8027:   PetscFree(recv_buffer_idxs_local);
8028:   PetscFree(recv_req_idxs);
8029:   PetscFree(recv_req_vals);
8030:   PetscFree(recv_req_vecs);
8031:   PetscFree(recv_req_idxs_is);
8032:   PetscFree(send_req_idxs);
8033:   PetscFree(send_req_vals);
8034:   PetscFree(send_req_vecs);
8035:   PetscFree(send_req_idxs_is);
8036:   PetscFree(ilengths_vals);
8037:   PetscFree(ilengths_idxs);
8038:   PetscFree(olengths_vals);
8039:   PetscFree(olengths_idxs);
8040:   PetscFree(onodes);
8041:   if (nis) {
8042:     PetscFree(ilengths_idxs_is);
8043:     PetscFree(olengths_idxs_is);
8044:     PetscFree(onodes_is);
8045:   }
8046:   PetscSubcommDestroy(&subcomm);
8047:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8048:     MatDestroy(mat_n);
8049:     for (i=0;i<nis;i++) {
8050:       ISDestroy(&isarray[i]);
8051:     }
8052:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8053:       VecDestroy(&nnsp_vec[0]);
8054:     }
8055:     *mat_n = NULL;
8056:   }
8057:   return 0;
8058: }

8060: /* temporary hack into ksp private data structure */
8061: #include <petsc/private/kspimpl.h>

8063: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8064: {
8065:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
8066:   PC_IS                  *pcis = (PC_IS*)pc->data;
8067:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
8068:   Mat                    coarsedivudotp = NULL;
8069:   Mat                    coarseG,t_coarse_mat_is;
8070:   MatNullSpace           CoarseNullSpace = NULL;
8071:   ISLocalToGlobalMapping coarse_islg;
8072:   IS                     coarse_is,*isarray,corners;
8073:   PetscInt               i,im_active=-1,active_procs=-1;
8074:   PetscInt               nis,nisdofs,nisneu,nisvert;
8075:   PetscInt               coarse_eqs_per_proc;
8076:   PC                     pc_temp;
8077:   PCType                 coarse_pc_type;
8078:   KSPType                coarse_ksp_type;
8079:   PetscBool              multilevel_requested,multilevel_allowed;
8080:   PetscBool              coarse_reuse;
8081:   PetscInt               ncoarse,nedcfield;
8082:   PetscBool              compute_vecs = PETSC_FALSE;
8083:   PetscScalar            *array;
8084:   MatReuse               coarse_mat_reuse;
8085:   PetscBool              restr, full_restr, have_void;
8086:   PetscMPIInt            size;
8087:   PetscErrorCode         ierr;

8089:   PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8090:   /* Assign global numbering to coarse dofs */
8091:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8092:     PetscInt ocoarse_size;
8093:     compute_vecs = PETSC_TRUE;

8095:     pcbddc->new_primal_space = PETSC_TRUE;
8096:     ocoarse_size = pcbddc->coarse_size;
8097:     PetscFree(pcbddc->global_primal_indices);
8098:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8099:     /* see if we can avoid some work */
8100:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8101:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8102:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8103:         KSPReset(pcbddc->coarse_ksp);
8104:         coarse_reuse = PETSC_FALSE;
8105:       } else { /* we can safely reuse already computed coarse matrix */
8106:         coarse_reuse = PETSC_TRUE;
8107:       }
8108:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8109:       coarse_reuse = PETSC_FALSE;
8110:     }
8111:     /* reset any subassembling information */
8112:     if (!coarse_reuse || pcbddc->recompute_topography) {
8113:       ISDestroy(&pcbddc->coarse_subassembling);
8114:     }
8115:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8116:     coarse_reuse = PETSC_TRUE;
8117:   }
8118:   if (coarse_reuse && pcbddc->coarse_ksp) {
8119:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8120:     PetscObjectReference((PetscObject)coarse_mat);
8121:     coarse_mat_reuse = MAT_REUSE_MATRIX;
8122:   } else {
8123:     coarse_mat = NULL;
8124:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8125:   }

8127:   /* creates temporary l2gmap and IS for coarse indexes */
8128:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8129:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

8131:   /* creates temporary MATIS object for coarse matrix */
8132:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8133:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,coarse_islg,&t_coarse_mat_is);
8134:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8135:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8136:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8137:   MatDestroy(&coarse_submat_dense);

8139:   /* count "active" (i.e. with positive local size) and "void" processes */
8140:   im_active = !!(pcis->n);
8141:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

8143:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8144:   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8145:   /* full_restr : just use the receivers from the subassembling pattern */
8146:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8147:   coarse_mat_is        = NULL;
8148:   multilevel_allowed   = PETSC_FALSE;
8149:   multilevel_requested = PETSC_FALSE;
8150:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8151:   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8152:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8153:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8154:   if (multilevel_requested) {
8155:     ncoarse    = active_procs/pcbddc->coarsening_ratio;
8156:     restr      = PETSC_FALSE;
8157:     full_restr = PETSC_FALSE;
8158:   } else {
8159:     ncoarse    = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8160:     restr      = PETSC_TRUE;
8161:     full_restr = PETSC_TRUE;
8162:   }
8163:   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8164:   ncoarse = PetscMax(1,ncoarse);
8165:   if (!pcbddc->coarse_subassembling) {
8166:     if (pcbddc->coarsening_ratio > 1) {
8167:       if (multilevel_requested) {
8168:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8169:       } else {
8170:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8171:       }
8172:     } else {
8173:       PetscMPIInt rank;

8175:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8176:       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8177:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8178:     }
8179:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8180:     PetscInt    psum;
8181:     if (pcbddc->coarse_ksp) psum = 1;
8182:     else psum = 0;
8183:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8184:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8185:   }
8186:   /* determine if we can go multilevel */
8187:   if (multilevel_requested) {
8188:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8189:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8190:   }
8191:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

8193:   /* dump subassembling pattern */
8194:   if (pcbddc->dbg_flag && multilevel_allowed) {
8195:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8196:   }
8197:   /* compute dofs splitting and neumann boundaries for coarse dofs */
8198:   nedcfield = -1;
8199:   corners = NULL;
8200:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8201:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
8202:     const PetscInt         *idxs;
8203:     ISLocalToGlobalMapping tmap;

8205:     /* create map between primal indices (in local representative ordering) and local primal numbering */
8206:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8207:     /* allocate space for temporary storage */
8208:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8209:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8210:     /* allocate for IS array */
8211:     nisdofs = pcbddc->n_ISForDofsLocal;
8212:     if (pcbddc->nedclocal) {
8213:       if (pcbddc->nedfield > -1) {
8214:         nedcfield = pcbddc->nedfield;
8215:       } else {
8216:         nedcfield = 0;
8218:         nisdofs = 1;
8219:       }
8220:     }
8221:     nisneu = !!pcbddc->NeumannBoundariesLocal;
8222:     nisvert = 0; /* nisvert is not used */
8223:     nis = nisdofs + nisneu + nisvert;
8224:     PetscMalloc1(nis,&isarray);
8225:     /* dofs splitting */
8226:     for (i=0;i<nisdofs;i++) {
8227:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
8228:       if (nedcfield != i) {
8229:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8230:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8231:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8232:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8233:       } else {
8234:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
8235:         ISGetIndices(pcbddc->nedclocal,&idxs);
8236:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8238:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
8239:       }
8240:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8241:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8242:       /* ISView(isarray[i],0); */
8243:     }
8244:     /* neumann boundaries */
8245:     if (pcbddc->NeumannBoundariesLocal) {
8246:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8247:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8248:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8249:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8250:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8251:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8252:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8253:       /* ISView(isarray[nisdofs],0); */
8254:     }
8255:     /* coordinates */
8256:     if (pcbddc->corner_selected) {
8257:       PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8258:       ISGetLocalSize(corners,&tsize);
8259:       ISGetIndices(corners,&idxs);
8260:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8262:       ISRestoreIndices(corners,&idxs);
8263:       PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8264:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8265:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8266:     }
8267:     PetscFree(tidxs);
8268:     PetscFree(tidxs2);
8269:     ISLocalToGlobalMappingDestroy(&tmap);
8270:   } else {
8271:     nis = 0;
8272:     nisdofs = 0;
8273:     nisneu = 0;
8274:     nisvert = 0;
8275:     isarray = NULL;
8276:   }
8277:   /* destroy no longer needed map */
8278:   ISLocalToGlobalMappingDestroy(&coarse_islg);

8280:   /* subassemble */
8281:   if (multilevel_allowed) {
8282:     Vec       vp[1];
8283:     PetscInt  nvecs = 0;
8284:     PetscBool reuse,reuser;

8286:     if (coarse_mat) reuse = PETSC_TRUE;
8287:     else reuse = PETSC_FALSE;
8288:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8289:     vp[0] = NULL;
8290:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8291:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8292:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8293:       VecSetType(vp[0],VECSTANDARD);
8294:       nvecs = 1;

8296:       if (pcbddc->divudotp) {
8297:         Mat      B,loc_divudotp;
8298:         Vec      v,p;
8299:         IS       dummy;
8300:         PetscInt np;

8302:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8303:         MatGetSize(loc_divudotp,&np,NULL);
8304:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8305:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8306:         MatCreateVecs(B,&v,&p);
8307:         VecSet(p,1.);
8308:         MatMultTranspose(B,p,v);
8309:         VecDestroy(&p);
8310:         MatDestroy(&B);
8311:         VecGetArray(vp[0],&array);
8312:         VecPlaceArray(pcbddc->vec1_P,array);
8313:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8314:         VecResetArray(pcbddc->vec1_P);
8315:         VecRestoreArray(vp[0],&array);
8316:         ISDestroy(&dummy);
8317:         VecDestroy(&v);
8318:       }
8319:     }
8320:     if (reuser) {
8321:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8322:     } else {
8323:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8324:     }
8325:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8326:       PetscScalar       *arraym;
8327:       const PetscScalar *arrayv;
8328:       PetscInt          nl;
8329:       VecGetLocalSize(vp[0],&nl);
8330:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8331:       MatDenseGetArray(coarsedivudotp,&arraym);
8332:       VecGetArrayRead(vp[0],&arrayv);
8333:       PetscArraycpy(arraym,arrayv,nl);
8334:       VecRestoreArrayRead(vp[0],&arrayv);
8335:       MatDenseRestoreArray(coarsedivudotp,&arraym);
8336:       VecDestroy(&vp[0]);
8337:     } else {
8338:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8339:     }
8340:   } else {
8341:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8342:   }
8343:   if (coarse_mat_is || coarse_mat) {
8344:     if (!multilevel_allowed) {
8345:       MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8346:     } else {
8347:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8348:       if (coarse_mat_is) {
8350:         PetscObjectReference((PetscObject)coarse_mat_is);
8351:         coarse_mat = coarse_mat_is;
8352:       }
8353:     }
8354:   }
8355:   MatDestroy(&t_coarse_mat_is);
8356:   MatDestroy(&coarse_mat_is);

8358:   /* create local to global scatters for coarse problem */
8359:   if (compute_vecs) {
8360:     PetscInt lrows;
8361:     VecDestroy(&pcbddc->coarse_vec);
8362:     if (coarse_mat) {
8363:       MatGetLocalSize(coarse_mat,&lrows,NULL);
8364:     } else {
8365:       lrows = 0;
8366:     }
8367:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8368:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8369:     VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8370:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8371:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8372:   }
8373:   ISDestroy(&coarse_is);

8375:   /* set defaults for coarse KSP and PC */
8376:   if (multilevel_allowed) {
8377:     coarse_ksp_type = KSPRICHARDSON;
8378:     coarse_pc_type  = PCBDDC;
8379:   } else {
8380:     coarse_ksp_type = KSPPREONLY;
8381:     coarse_pc_type  = PCREDUNDANT;
8382:   }

8384:   /* print some info if requested */
8385:   if (pcbddc->dbg_flag) {
8386:     if (!multilevel_allowed) {
8387:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8388:       if (multilevel_requested) {
8389:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8390:       } else if (pcbddc->max_levels) {
8391:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8392:       }
8393:       PetscViewerFlush(pcbddc->dbg_viewer);
8394:     }
8395:   }

8397:   /* communicate coarse discrete gradient */
8398:   coarseG = NULL;
8399:   if (pcbddc->nedcG && multilevel_allowed) {
8400:     MPI_Comm ccomm;
8401:     if (coarse_mat) {
8402:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8403:     } else {
8404:       ccomm = MPI_COMM_NULL;
8405:     }
8406:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8407:   }

8409:   /* create the coarse KSP object only once with defaults */
8410:   if (coarse_mat) {
8411:     PetscBool   isredundant,isbddc,force,valid;
8412:     PetscViewer dbg_viewer = NULL;

8414:     if (pcbddc->dbg_flag) {
8415:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8416:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8417:     }
8418:     if (!pcbddc->coarse_ksp) {
8419:       char   prefix[256],str_level[16];
8420:       size_t len;

8422:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8423:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8424:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8425:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8426:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8427:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8428:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8429:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8430:       /* TODO is this logic correct? should check for coarse_mat type */
8431:       PCSetType(pc_temp,coarse_pc_type);
8432:       /* prefix */
8433:       PetscStrcpy(prefix,"");
8434:       PetscStrcpy(str_level,"");
8435:       if (!pcbddc->current_level) {
8436:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8437:         PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8438:       } else {
8439:         PetscStrlen(((PetscObject)pc)->prefix,&len);
8440:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8441:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8442:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8443:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8444:         PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8445:         PetscStrlcat(prefix,str_level,sizeof(prefix));
8446:       }
8447:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8448:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8449:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8450:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8451:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8452:       /* allow user customization */
8453:       KSPSetFromOptions(pcbddc->coarse_ksp);
8454:       /* get some info after set from options */
8455:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8456:       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8457:       force = PETSC_FALSE;
8458:       PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8459:       PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8460:       PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8461:       if (multilevel_allowed && !force && !valid) {
8462:         isbddc = PETSC_TRUE;
8463:         PCSetType(pc_temp,PCBDDC);
8464:         PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8465:         PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8466:         PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8467:         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8468:           PetscObjectOptionsBegin((PetscObject)pc_temp);
8469:           (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8470:           PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8471:           PetscOptionsEnd();
8472:           pc_temp->setfromoptionscalled++;
8473:         }
8474:       }
8475:     }
8476:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8477:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8478:     if (nisdofs) {
8479:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8480:       for (i=0;i<nisdofs;i++) {
8481:         ISDestroy(&isarray[i]);
8482:       }
8483:     }
8484:     if (nisneu) {
8485:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8486:       ISDestroy(&isarray[nisdofs]);
8487:     }
8488:     if (nisvert) {
8489:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8490:       ISDestroy(&isarray[nis-1]);
8491:     }
8492:     if (coarseG) {
8493:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8494:     }

8496:     /* get some info after set from options */
8497:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);

8499:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8500:     if (isbddc && !multilevel_allowed) {
8501:       PCSetType(pc_temp,coarse_pc_type);
8502:     }
8503:     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8504:     force = PETSC_FALSE;
8505:     PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8506:     PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8507:     if (multilevel_requested && multilevel_allowed && !valid && !force) {
8508:       PCSetType(pc_temp,PCBDDC);
8509:     }
8510:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8511:     if (isredundant) {
8512:       KSP inner_ksp;
8513:       PC  inner_pc;

8515:       PCRedundantGetKSP(pc_temp,&inner_ksp);
8516:       KSPGetPC(inner_ksp,&inner_pc);
8517:     }

8519:     /* parameters which miss an API */
8520:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8521:     if (isbddc) {
8522:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;

8524:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8525:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8526:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8527:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8528:       if (pcbddc_coarse->benign_saddle_point) {
8529:         Mat                    coarsedivudotp_is;
8530:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8531:         IS                     row,col;
8532:         const PetscInt         *gidxs;
8533:         PetscInt               n,st,M,N;

8535:         MatGetSize(coarsedivudotp,&n,NULL);
8536:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8537:         st   = st-n;
8538:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8539:         MatISGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8540:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
8541:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8542:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8543:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8544:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
8545:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
8546:         ISGetSize(row,&M);
8547:         MatGetSize(coarse_mat,&N,NULL);
8548:         ISDestroy(&row);
8549:         ISDestroy(&col);
8550:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8551:         MatSetType(coarsedivudotp_is,MATIS);
8552:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8553:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8554:         ISLocalToGlobalMappingDestroy(&rl2g);
8555:         ISLocalToGlobalMappingDestroy(&cl2g);
8556:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8557:         MatDestroy(&coarsedivudotp);
8558:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8559:         MatDestroy(&coarsedivudotp_is);
8560:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8561:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8562:       }
8563:     }

8565:     /* propagate symmetry info of coarse matrix */
8566:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8567:     if (pc->pmat->symmetric_set) {
8568:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8569:     }
8570:     if (pc->pmat->hermitian_set) {
8571:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8572:     }
8573:     if (pc->pmat->spd_set) {
8574:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8575:     }
8576:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8577:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8578:     }
8579:     /* set operators */
8580:     MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8581:     MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8582:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8583:     if (pcbddc->dbg_flag) {
8584:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8585:     }
8586:   }
8587:   MatDestroy(&coarseG);
8588:   PetscFree(isarray);
8589: #if 0
8590:   {
8591:     PetscViewer viewer;
8592:     char filename[256];
8593:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8594:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8595:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8596:     MatView(coarse_mat,viewer);
8597:     PetscViewerPopFormat(viewer);
8598:     PetscViewerDestroy(&viewer);
8599:   }
8600: #endif

8602:   if (corners) {
8603:     Vec            gv;
8604:     IS             is;
8605:     const PetscInt *idxs;
8606:     PetscInt       i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8607:     PetscScalar    *coords;

8610:     VecGetSize(pcbddc->coarse_vec,&N);
8611:     VecGetLocalSize(pcbddc->coarse_vec,&n);
8612:     VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8613:     VecSetBlockSize(gv,cdim);
8614:     VecSetSizes(gv,n*cdim,N*cdim);
8615:     VecSetType(gv,VECSTANDARD);
8616:     VecSetFromOptions(gv);
8617:     VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */

8619:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8620:     ISGetLocalSize(is,&n);
8621:     ISGetIndices(is,&idxs);
8622:     PetscMalloc1(n*cdim,&coords);
8623:     for (i=0;i<n;i++) {
8624:       for (d=0;d<cdim;d++) {
8625:         coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8626:       }
8627:     }
8628:     ISRestoreIndices(is,&idxs);
8629:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);

8631:     ISGetLocalSize(corners,&n);
8632:     ISGetIndices(corners,&idxs);
8633:     VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8634:     ISRestoreIndices(corners,&idxs);
8635:     PetscFree(coords);
8636:     VecAssemblyBegin(gv);
8637:     VecAssemblyEnd(gv);
8638:     VecGetArray(gv,&coords);
8639:     if (pcbddc->coarse_ksp) {
8640:       PC        coarse_pc;
8641:       PetscBool isbddc;

8643:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8644:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8645:       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8646:         PetscReal *realcoords;

8648:         VecGetLocalSize(gv,&n);
8649: #if defined(PETSC_USE_COMPLEX)
8650:         PetscMalloc1(n,&realcoords);
8651:         for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8652: #else
8653:         realcoords = coords;
8654: #endif
8655:         PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8656: #if defined(PETSC_USE_COMPLEX)
8657:         PetscFree(realcoords);
8658: #endif
8659:       }
8660:     }
8661:     VecRestoreArray(gv,&coords);
8662:     VecDestroy(&gv);
8663:   }
8664:   ISDestroy(&corners);

8666:   if (pcbddc->coarse_ksp) {
8667:     Vec crhs,csol;

8669:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
8670:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8671:     if (!csol) {
8672:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8673:     }
8674:     if (!crhs) {
8675:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8676:     }
8677:   }
8678:   MatDestroy(&coarsedivudotp);

8680:   /* compute null space for coarse solver if the benign trick has been requested */
8681:   if (pcbddc->benign_null) {

8683:     VecSet(pcbddc->vec1_P,0.);
8684:     for (i=0;i<pcbddc->benign_n;i++) {
8685:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8686:     }
8687:     VecAssemblyBegin(pcbddc->vec1_P);
8688:     VecAssemblyEnd(pcbddc->vec1_P);
8689:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8690:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8691:     if (coarse_mat) {
8692:       Vec         nullv;
8693:       PetscScalar *array,*array2;
8694:       PetscInt    nl;

8696:       MatCreateVecs(coarse_mat,&nullv,NULL);
8697:       VecGetLocalSize(nullv,&nl);
8698:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8699:       VecGetArray(nullv,&array2);
8700:       PetscArraycpy(array2,array,nl);
8701:       VecRestoreArray(nullv,&array2);
8702:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8703:       VecNormalize(nullv,NULL);
8704:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8705:       VecDestroy(&nullv);
8706:     }
8707:   }
8708:   PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);

8710:   PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8711:   if (pcbddc->coarse_ksp) {
8712:     PetscBool ispreonly;

8714:     if (CoarseNullSpace) {
8715:       PetscBool isnull;
8716:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8717:       if (isnull) {
8718:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8719:       }
8720:       /* TODO: add local nullspaces (if any) */
8721:     }
8722:     /* setup coarse ksp */
8723:     KSPSetUp(pcbddc->coarse_ksp);
8724:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8725:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8726:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8727:       KSP       check_ksp;
8728:       KSPType   check_ksp_type;
8729:       PC        check_pc;
8730:       Vec       check_vec,coarse_vec;
8731:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8732:       PetscInt  its;
8733:       PetscBool compute_eigs;
8734:       PetscReal *eigs_r,*eigs_c;
8735:       PetscInt  neigs;
8736:       const char *prefix;

8738:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8739:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8740:       PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8741:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8742:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8743:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8744:       /* prevent from setup unneeded object */
8745:       KSPGetPC(check_ksp,&check_pc);
8746:       PCSetType(check_pc,PCNONE);
8747:       if (ispreonly) {
8748:         check_ksp_type = KSPPREONLY;
8749:         compute_eigs = PETSC_FALSE;
8750:       } else {
8751:         check_ksp_type = KSPGMRES;
8752:         compute_eigs = PETSC_TRUE;
8753:       }
8754:       KSPSetType(check_ksp,check_ksp_type);
8755:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8756:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8757:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8758:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8759:       KSPSetOptionsPrefix(check_ksp,prefix);
8760:       KSPAppendOptionsPrefix(check_ksp,"check_");
8761:       KSPSetFromOptions(check_ksp);
8762:       KSPSetUp(check_ksp);
8763:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8764:       KSPSetPC(check_ksp,check_pc);
8765:       /* create random vec */
8766:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8767:       VecSetRandom(check_vec,NULL);
8768:       MatMult(coarse_mat,check_vec,coarse_vec);
8769:       /* solve coarse problem */
8770:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8771:       KSPCheckSolve(check_ksp,pc,coarse_vec);
8772:       /* set eigenvalue estimation if preonly has not been requested */
8773:       if (compute_eigs) {
8774:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8775:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8776:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8777:         if (neigs) {
8778:           lambda_max = eigs_r[neigs-1];
8779:           lambda_min = eigs_r[0];
8780:           if (pcbddc->use_coarse_estimates) {
8781:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8782:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8783:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8784:             }
8785:           }
8786:         }
8787:       }

8789:       /* check coarse problem residual error */
8790:       if (pcbddc->dbg_flag) {
8791:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8792:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8793:         VecAXPY(check_vec,-1.0,coarse_vec);
8794:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8795:         MatMult(coarse_mat,check_vec,coarse_vec);
8796:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8797:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8798:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8799:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8800:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8801:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8802:         if (CoarseNullSpace) {
8803:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8804:         }
8805:         if (compute_eigs) {
8806:           PetscReal          lambda_max_s,lambda_min_s;
8807:           KSPConvergedReason reason;
8808:           KSPGetType(check_ksp,&check_ksp_type);
8809:           KSPGetIterationNumber(check_ksp,&its);
8810:           KSPGetConvergedReason(check_ksp,&reason);
8811:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8812:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8813:           for (i=0;i<neigs;i++) {
8814:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8815:           }
8816:         }
8817:         PetscViewerFlush(dbg_viewer);
8818:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8819:       }
8820:       VecDestroy(&check_vec);
8821:       VecDestroy(&coarse_vec);
8822:       KSPDestroy(&check_ksp);
8823:       if (compute_eigs) {
8824:         PetscFree(eigs_r);
8825:         PetscFree(eigs_c);
8826:       }
8827:     }
8828:   }
8829:   MatNullSpaceDestroy(&CoarseNullSpace);
8830:   /* print additional info */
8831:   if (pcbddc->dbg_flag) {
8832:     /* waits until all processes reaches this point */
8833:     PetscBarrier((PetscObject)pc);
8834:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8835:     PetscViewerFlush(pcbddc->dbg_viewer);
8836:   }

8838:   /* free memory */
8839:   MatDestroy(&coarse_mat);
8840:   PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8841:   return 0;
8842: }

8844: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8845: {
8846:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8847:   PC_IS*         pcis = (PC_IS*)pc->data;
8848:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8849:   IS             subset,subset_mult,subset_n;
8850:   PetscInt       local_size,coarse_size=0;
8851:   PetscInt       *local_primal_indices=NULL;
8852:   const PetscInt *t_local_primal_indices;

8854:   /* Compute global number of coarse dofs */
8856:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8857:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8858:   ISDestroy(&subset_n);
8859:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8860:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8861:   ISDestroy(&subset);
8862:   ISDestroy(&subset_mult);
8863:   ISGetLocalSize(subset_n,&local_size);
8865:   PetscMalloc1(local_size,&local_primal_indices);
8866:   ISGetIndices(subset_n,&t_local_primal_indices);
8867:   PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);
8868:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8869:   ISDestroy(&subset_n);

8871:   /* check numbering */
8872:   if (pcbddc->dbg_flag) {
8873:     PetscScalar coarsesum,*array,*array2;
8874:     PetscInt    i;
8875:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8877:     PetscViewerFlush(pcbddc->dbg_viewer);
8878:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8879:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8880:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8881:     /* counter */
8882:     VecSet(pcis->vec1_global,0.0);
8883:     VecSet(pcis->vec1_N,1.0);
8884:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8885:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8886:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8887:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8888:     VecSet(pcis->vec1_N,0.0);
8889:     for (i=0;i<pcbddc->local_primal_size;i++) {
8890:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8891:     }
8892:     VecAssemblyBegin(pcis->vec1_N);
8893:     VecAssemblyEnd(pcis->vec1_N);
8894:     VecSet(pcis->vec1_global,0.0);
8895:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8896:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8897:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8898:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8899:     VecGetArray(pcis->vec1_N,&array);
8900:     VecGetArray(pcis->vec2_N,&array2);
8901:     for (i=0;i<pcis->n;i++) {
8902:       if (array[i] != 0.0 && array[i] != array2[i]) {
8903:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8904:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8905:         set_error = PETSC_TRUE;
8906:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8907:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8908:       }
8909:     }
8910:     VecRestoreArray(pcis->vec2_N,&array2);
8911:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8912:     PetscViewerFlush(pcbddc->dbg_viewer);
8913:     for (i=0;i<pcis->n;i++) {
8914:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8915:     }
8916:     VecRestoreArray(pcis->vec1_N,&array);
8917:     VecSet(pcis->vec1_global,0.0);
8918:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8919:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8920:     VecSum(pcis->vec1_global,&coarsesum);
8921:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8922:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8923:       PetscInt *gidxs;

8925:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8926:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8927:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8928:       PetscViewerFlush(pcbddc->dbg_viewer);
8929:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8930:       for (i=0;i<pcbddc->local_primal_size;i++) {
8931:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8932:       }
8933:       PetscViewerFlush(pcbddc->dbg_viewer);
8934:       PetscFree(gidxs);
8935:     }
8936:     PetscViewerFlush(pcbddc->dbg_viewer);
8937:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8939:   }

8941:   /* get back data */
8942:   *coarse_size_n = coarse_size;
8943:   *local_primal_indices_n = local_primal_indices;
8944:   return 0;
8945: }

8947: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8948: {
8949:   IS             localis_t;
8950:   PetscInt       i,lsize,*idxs,n;
8951:   PetscScalar    *vals;

8953:   /* get indices in local ordering exploiting local to global map */
8954:   ISGetLocalSize(globalis,&lsize);
8955:   PetscMalloc1(lsize,&vals);
8956:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8957:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8958:   VecSet(gwork,0.0);
8959:   VecSet(lwork,0.0);
8960:   if (idxs) { /* multilevel guard */
8961:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8962:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8963:   }
8964:   VecAssemblyBegin(gwork);
8965:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8966:   PetscFree(vals);
8967:   VecAssemblyEnd(gwork);
8968:   /* now compute set in local ordering */
8969:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8970:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8971:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8972:   VecGetSize(lwork,&n);
8973:   for (i=0,lsize=0;i<n;i++) {
8974:     if (PetscRealPart(vals[i]) > 0.5) {
8975:       lsize++;
8976:     }
8977:   }
8978:   PetscMalloc1(lsize,&idxs);
8979:   for (i=0,lsize=0;i<n;i++) {
8980:     if (PetscRealPart(vals[i]) > 0.5) {
8981:       idxs[lsize++] = i;
8982:     }
8983:   }
8984:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8985:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8986:   *localis = localis_t;
8987:   return 0;
8988: }

8990: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8991: {
8992:   PC_IS               *pcis=(PC_IS*)pc->data;
8993:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8994:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8995:   Mat                 S_j;
8996:   PetscInt            *used_xadj,*used_adjncy;
8997:   PetscBool           free_used_adj;
8998:   PetscErrorCode      ierr;

9000:   PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9001:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9002:   free_used_adj = PETSC_FALSE;
9003:   if (pcbddc->sub_schurs_layers == -1) {
9004:     used_xadj = NULL;
9005:     used_adjncy = NULL;
9006:   } else {
9007:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9008:       used_xadj = pcbddc->mat_graph->xadj;
9009:       used_adjncy = pcbddc->mat_graph->adjncy;
9010:     } else if (pcbddc->computed_rowadj) {
9011:       used_xadj = pcbddc->mat_graph->xadj;
9012:       used_adjncy = pcbddc->mat_graph->adjncy;
9013:     } else {
9014:       PetscBool      flg_row=PETSC_FALSE;
9015:       const PetscInt *xadj,*adjncy;
9016:       PetscInt       nvtxs;

9018:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9019:       if (flg_row) {
9020:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
9021:         PetscArraycpy(used_xadj,xadj,nvtxs+1);
9022:         PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);
9023:         free_used_adj = PETSC_TRUE;
9024:       } else {
9025:         pcbddc->sub_schurs_layers = -1;
9026:         used_xadj = NULL;
9027:         used_adjncy = NULL;
9028:       }
9029:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9030:     }
9031:   }

9033:   /* setup sub_schurs data */
9034:   MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9035:   if (!sub_schurs->schur_explicit) {
9036:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9037:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9038:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
9039:   } else {
9040:     Mat       change = NULL;
9041:     Vec       scaling = NULL;
9042:     IS        change_primal = NULL, iP;
9043:     PetscInt  benign_n;
9044:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9045:     PetscBool need_change = PETSC_FALSE;
9046:     PetscBool discrete_harmonic = PETSC_FALSE;

9048:     if (!pcbddc->use_vertices && reuse_solvers) {
9049:       PetscInt n_vertices;

9051:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
9052:       reuse_solvers = (PetscBool)!n_vertices;
9053:     }
9054:     if (!pcbddc->benign_change_explicit) {
9055:       benign_n = pcbddc->benign_n;
9056:     } else {
9057:       benign_n = 0;
9058:     }
9059:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9060:        We need a global reduction to avoid possible deadlocks.
9061:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9062:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9063:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9064:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9065:       need_change = (PetscBool)(!need_change);
9066:     }
9067:     /* If the user defines additional constraints, we import them here.
9068:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9069:     if (need_change) {
9070:       PC_IS   *pcisf;
9071:       PC_BDDC *pcbddcf;
9072:       PC      pcf;

9075:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9076:       PCSetOperators(pcf,pc->mat,pc->pmat);
9077:       PCSetType(pcf,PCBDDC);

9079:       /* hacks */
9080:       pcisf                        = (PC_IS*)pcf->data;
9081:       pcisf->is_B_local            = pcis->is_B_local;
9082:       pcisf->vec1_N                = pcis->vec1_N;
9083:       pcisf->BtoNmap               = pcis->BtoNmap;
9084:       pcisf->n                     = pcis->n;
9085:       pcisf->n_B                   = pcis->n_B;
9086:       pcbddcf                      = (PC_BDDC*)pcf->data;
9087:       PetscFree(pcbddcf->mat_graph);
9088:       pcbddcf->mat_graph           = pcbddc->mat_graph;
9089:       pcbddcf->use_faces           = PETSC_TRUE;
9090:       pcbddcf->use_change_of_basis = PETSC_TRUE;
9091:       pcbddcf->use_change_on_faces = PETSC_TRUE;
9092:       pcbddcf->use_qr_single       = PETSC_TRUE;
9093:       pcbddcf->fake_change         = PETSC_TRUE;

9095:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9096:       PCBDDCConstraintsSetUp(pcf);
9097:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9098:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9099:       change = pcbddcf->ConstraintMatrix;
9100:       pcbddcf->ConstraintMatrix = NULL;

9102:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9103:       PetscFree(pcbddcf->sub_schurs);
9104:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9105:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9106:       PetscFree(pcbddcf->primal_indices_local_idxs);
9107:       PetscFree(pcbddcf->onearnullvecs_state);
9108:       PetscFree(pcf->data);
9109:       pcf->ops->destroy = NULL;
9110:       pcf->ops->reset   = NULL;
9111:       PCDestroy(&pcf);
9112:     }
9113:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

9115:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9116:     if (iP) {
9117:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9118:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9119:       PetscOptionsEnd();
9120:     }
9121:     if (discrete_harmonic) {
9122:       Mat A;
9123:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9124:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9125:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9126:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9127:       MatDestroy(&A);
9128:     } else {
9129:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9130:     }
9131:     MatDestroy(&change);
9132:     ISDestroy(&change_primal);
9133:   }
9134:   MatDestroy(&S_j);

9136:   /* free adjacency */
9137:   if (free_used_adj) {
9138:     PetscFree2(used_xadj,used_adjncy);
9139:   }
9140:   PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9141:   return 0;
9142: }

9144: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9145: {
9146:   PC_IS               *pcis=(PC_IS*)pc->data;
9147:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9148:   PCBDDCGraph         graph;

9150:   /* attach interface graph for determining subsets */
9151:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9152:     IS       verticesIS,verticescomm;
9153:     PetscInt vsize,*idxs;

9155:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9156:     ISGetSize(verticesIS,&vsize);
9157:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9158:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9159:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9160:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9161:     PCBDDCGraphCreate(&graph);
9162:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9163:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9164:     ISDestroy(&verticescomm);
9165:     PCBDDCGraphComputeConnectedComponents(graph);
9166:   } else {
9167:     graph = pcbddc->mat_graph;
9168:   }
9169:   /* print some info */
9170:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9171:     IS       vertices;
9172:     PetscInt nv,nedges,nfaces;
9173:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9174:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9175:     ISGetSize(vertices,&nv);
9176:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9177:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9178:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9179:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9180:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9181:     PetscViewerFlush(pcbddc->dbg_viewer);
9182:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9183:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9184:   }

9186:   /* sub_schurs init */
9187:   if (!pcbddc->sub_schurs) {
9188:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9189:   }
9190:   PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);

9192:   /* free graph struct */
9193:   if (pcbddc->sub_schurs_rebuild) {
9194:     PCBDDCGraphDestroy(&graph);
9195:   }
9196:   return 0;
9197: }

9199: PetscErrorCode PCBDDCCheckOperator(PC pc)
9200: {
9201:   PC_IS               *pcis=(PC_IS*)pc->data;
9202:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;

9204:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9205:     IS             zerodiag = NULL;
9206:     Mat            S_j,B0_B=NULL;
9207:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
9208:     PetscScalar    *p0_check,*array,*array2;
9209:     PetscReal      norm;
9210:     PetscInt       i;

9212:     /* B0 and B0_B */
9213:     if (zerodiag) {
9214:       IS       dummy;

9216:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9217:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9218:       MatCreateVecs(B0_B,NULL,&dummy_vec);
9219:       ISDestroy(&dummy);
9220:     }
9221:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9222:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9223:     VecSet(pcbddc->vec1_P,1.0);
9224:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9225:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9226:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9227:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9228:     VecReciprocal(vec_scale_P);
9229:     /* S_j */
9230:     MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9231:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

9233:     /* mimic vector in \widetilde{W}_\Gamma */
9234:     VecSetRandom(pcis->vec1_N,NULL);
9235:     /* continuous in primal space */
9236:     VecSetRandom(pcbddc->coarse_vec,NULL);
9237:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9238:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9239:     VecGetArray(pcbddc->vec1_P,&array);
9240:     PetscCalloc1(pcbddc->benign_n,&p0_check);
9241:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9242:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9243:     VecRestoreArray(pcbddc->vec1_P,&array);
9244:     VecAssemblyBegin(pcis->vec1_N);
9245:     VecAssemblyEnd(pcis->vec1_N);
9246:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9247:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9248:     VecDuplicate(pcis->vec2_B,&vec_check_B);
9249:     VecCopy(pcis->vec2_B,vec_check_B);

9251:     /* assemble rhs for coarse problem */
9252:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9253:     /* local with Schur */
9254:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9255:     if (zerodiag) {
9256:       VecGetArray(dummy_vec,&array);
9257:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9258:       VecRestoreArray(dummy_vec,&array);
9259:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9260:     }
9261:     /* sum on primal nodes the local contributions */
9262:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9263:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9264:     VecGetArray(pcis->vec1_N,&array);
9265:     VecGetArray(pcbddc->vec1_P,&array2);
9266:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9267:     VecRestoreArray(pcbddc->vec1_P,&array2);
9268:     VecRestoreArray(pcis->vec1_N,&array);
9269:     VecSet(pcbddc->coarse_vec,0.);
9270:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9271:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9272:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9273:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9274:     VecGetArray(pcbddc->vec1_P,&array);
9275:     /* scale primal nodes (BDDC sums contibutions) */
9276:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9277:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9278:     VecRestoreArray(pcbddc->vec1_P,&array);
9279:     VecAssemblyBegin(pcis->vec1_N);
9280:     VecAssemblyEnd(pcis->vec1_N);
9281:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9282:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9283:     /* global: \widetilde{B0}_B w_\Gamma */
9284:     if (zerodiag) {
9285:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
9286:       VecGetArray(dummy_vec,&array);
9287:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9288:       VecRestoreArray(dummy_vec,&array);
9289:     }
9290:     /* BDDC */
9291:     VecSet(pcis->vec1_D,0.);
9292:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

9294:     VecCopy(pcis->vec1_B,pcis->vec2_B);
9295:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9296:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9297:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9298:     for (i=0;i<pcbddc->benign_n;i++) {
9299:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9300:     }
9301:     PetscFree(p0_check);
9302:     VecDestroy(&vec_scale_P);
9303:     VecDestroy(&vec_check_B);
9304:     VecDestroy(&dummy_vec);
9305:     MatDestroy(&S_j);
9306:     MatDestroy(&B0_B);
9307:   }
9308:   return 0;
9309: }

9311: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9312: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9313: {
9314:   Mat            At;
9315:   IS             rows;
9316:   PetscInt       rst,ren;
9317:   PetscLayout    rmap;

9319:   rst = ren = 0;
9320:   if (ccomm != MPI_COMM_NULL) {
9321:     PetscLayoutCreate(ccomm,&rmap);
9322:     PetscLayoutSetSize(rmap,A->rmap->N);
9323:     PetscLayoutSetBlockSize(rmap,1);
9324:     PetscLayoutSetUp(rmap);
9325:     PetscLayoutGetRange(rmap,&rst,&ren);
9326:   }
9327:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9328:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9329:   ISDestroy(&rows);

9331:   if (ccomm != MPI_COMM_NULL) {
9332:     Mat_MPIAIJ *a,*b;
9333:     IS         from,to;
9334:     Vec        gvec;
9335:     PetscInt   lsize;

9337:     MatCreate(ccomm,B);
9338:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9339:     MatSetType(*B,MATAIJ);
9340:     PetscLayoutDestroy(&((*B)->rmap));
9341:     PetscLayoutSetUp((*B)->cmap);
9342:     a    = (Mat_MPIAIJ*)At->data;
9343:     b    = (Mat_MPIAIJ*)(*B)->data;
9344:     MPI_Comm_size(ccomm,&b->size);
9345:     MPI_Comm_rank(ccomm,&b->rank);
9346:     PetscObjectReference((PetscObject)a->A);
9347:     PetscObjectReference((PetscObject)a->B);
9348:     b->A = a->A;
9349:     b->B = a->B;

9351:     b->donotstash      = a->donotstash;
9352:     b->roworiented     = a->roworiented;
9353:     b->rowindices      = NULL;
9354:     b->rowvalues       = NULL;
9355:     b->getrowactive    = PETSC_FALSE;

9357:     (*B)->rmap         = rmap;
9358:     (*B)->factortype   = A->factortype;
9359:     (*B)->assembled    = PETSC_TRUE;
9360:     (*B)->insertmode   = NOT_SET_VALUES;
9361:     (*B)->preallocated = PETSC_TRUE;

9363:     if (a->colmap) {
9364: #if defined(PETSC_USE_CTABLE)
9365:       PetscTableCreateCopy(a->colmap,&b->colmap);
9366: #else
9367:       PetscMalloc1(At->cmap->N,&b->colmap);
9368:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9369:       PetscArraycpy(b->colmap,a->colmap,At->cmap->N);
9370: #endif
9371:     } else b->colmap = NULL;
9372:     if (a->garray) {
9373:       PetscInt len;
9374:       len  = a->B->cmap->n;
9375:       PetscMalloc1(len+1,&b->garray);
9376:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9377:       if (len) PetscArraycpy(b->garray,a->garray,len);
9378:     } else b->garray = NULL;

9380:     PetscObjectReference((PetscObject)a->lvec);
9381:     b->lvec = a->lvec;
9382:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

9384:     /* cannot use VecScatterCopy */
9385:     VecGetLocalSize(b->lvec,&lsize);
9386:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9387:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9388:     MatCreateVecs(*B,&gvec,NULL);
9389:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9390:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9391:     ISDestroy(&from);
9392:     ISDestroy(&to);
9393:     VecDestroy(&gvec);
9394:   }
9395:   MatDestroy(&At);
9396:   return 0;
9397: }