PetscErrorCode MatCreateH2OpusFromKernel(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt spacedim, const PetscReal coords[], PetscBool cdist, MatH2OpusKernel kernel, void *kernelctx, PetscReal eta, PetscInt leafsize, PetscInt basisord, Mat* nA)
A | - the matrix |
PetscFunctionBegin; PetscValidHeaderSpecific(A,MAT_CLASSID,1); PetscValidType(A,1); ierr = PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);CHKERRQ(ierr); if (!ish2opus) PetscFunctionReturn(0); if (a->orthogonal) PetscFunctionReturn(0); HLibProfile::clear(); ierr = PetscLogEventBegin(MAT_H2Opus_Orthog,A,0,0,0);CHKERRQ(ierr); #if defined(PETSC_H2OPUS_USE_GPU) boundtocpu = A->boundtocpu; #endif ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); if (size > 1) { if (boundtocpu) { if (!a->dist_hmatrix) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing CPU matrix"); #if defined(H2OPUS_USE_MPI) distributed_horthog(*a->dist_hmatrix, a->handle); #endif #if defined(PETSC_H2OPUS_USE_GPU) A->offloadmask = PETSC_OFFLOAD_CPU; } else { if (!a->dist_hmatrix_gpu) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing GPU matrix"); ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); #if defined(H2OPUS_USE_MPI) distributed_horthog(*a->dist_hmatrix_gpu, a->handle); #endif ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); #endif } } else { #if defined(H2OPUS_USE_MPI) h2opusHandle_t handle = a->handle->handle; #else h2opusHandle_t handle = a->handle; #endif if (boundtocpu) { if (!a->hmatrix) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing CPU matrix"); horthog(*a->hmatrix, handle); #if defined(PETSC_H2OPUS_USE_GPU) A->offloadmask = PETSC_OFFLOAD_CPU; } else { if (!a->hmatrix_gpu) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing GPU matrix"); ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); horthog(*a->hmatrix_gpu, handle); ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); #endif } } a->orthogonal = PETSC_TRUE; { /* log flops */ double gops,time,perf,dev; HLibProfile::getHorthogPerf(gops,time,perf,dev); #if defined(PETSC_H2OPUS_USE_GPU) if (boundtocpu) { ierr = PetscLogFlops(1e9*gops);CHKERRQ(ierr); } else { ierr = PetscLogGpuFlops(1e9*gops);CHKERRQ(ierr); } #else ierr = PetscLogFlops(1e9*gops);CHKERRQ(ierr); #endif } ierr = PetscLogEventEnd(MAT_H2Opus_Orthog,A,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C MatH2OpusCompress - Compress a hierarchical matrix.
A | - the matrix | |
tol | - the absolute truncation threshold |
PetscFunctionBegin; PetscValidHeaderSpecific(A,MAT_CLASSID,1); PetscValidType(A,1); ierr = PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);CHKERRQ(ierr); if (!ish2opus) PetscFunctionReturn(0); ierr = MatH2OpusOrthogonalize(A);CHKERRQ(ierr); HLibProfile::clear(); ierr = PetscLogEventBegin(MAT_H2Opus_Compress,A,0,0,0);CHKERRQ(ierr); #if defined(PETSC_H2OPUS_USE_GPU) boundtocpu = A->boundtocpu; #endif ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); if (size > 1) { if (boundtocpu) { if (!a->dist_hmatrix) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing CPU matrix"); #if defined(H2OPUS_USE_MPI) distributed_hcompress(*a->dist_hmatrix, tol, a->handle); #endif #if defined(PETSC_H2OPUS_USE_GPU) A->offloadmask = PETSC_OFFLOAD_CPU; } else { if (!a->dist_hmatrix_gpu) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing GPU matrix"); ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); #if defined(H2OPUS_USE_MPI) distributed_hcompress(*a->dist_hmatrix_gpu, tol, a->handle); #endif ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); #endif } } else { #if defined(H2OPUS_USE_MPI) h2opusHandle_t handle = a->handle->handle; #else h2opusHandle_t handle = a->handle; #endif if (boundtocpu) { if (!a->hmatrix) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing CPU matrix"); hcompress(*a->hmatrix, tol, handle); #if defined(PETSC_H2OPUS_USE_GPU) A->offloadmask = PETSC_OFFLOAD_CPU; } else { if (!a->hmatrix_gpu) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing GPU matrix"); ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); hcompress(*a->hmatrix_gpu, tol, handle); ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); #endif } } { /* log flops */ double gops,time,perf,dev; HLibProfile::getHcompressPerf(gops,time,perf,dev); #if defined(PETSC_H2OPUS_USE_GPU) if (boundtocpu) { ierr = PetscLogFlops(1e9*gops);CHKERRQ(ierr); } else { ierr = PetscLogGpuFlops(1e9*gops);CHKERRQ(ierr); } #else ierr = PetscLogFlops(1e9*gops);CHKERRQ(ierr); #endif } ierr = PetscLogEventEnd(MAT_H2Opus_Compress,A,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C MatH2OpusSetSamplingMat - Set a matrix to be sampled from matrix vector product to construct a hierarchical matrix.
A | - the hierarchical matrix | |
B | - the matrix to be sampled | |
bs | - maximum number of samples to be taken concurrently | |
tol | - relative tolerance for construction |
Notes: Need to call MatAssemblyBegin/End() to update the hierarchical matrix.
PetscFunctionBegin; PetscValidHeaderSpecific(A,MAT_CLASSID,1); PetscValidType(A,1); if (B) PetscValidHeaderSpecific(B,MAT_CLASSID,2); ierr = PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);CHKERRQ(ierr); if (ish2opus) { Mat_H2OPUS *a = (Mat_H2OPUS*)A->data;
if (!a->sampler) a->sampler = new PetscMatrixSampler(); a->sampler->SetSamplingMat(B); if (bs > 0) a->bs = bs; if (tol > 0.) a->rtol = tol; delete a->kernel; } PetscFunctionReturn(0); }
/*@C MatCreateH2OpusFromKernel - Creates a MATH2OPUS from a user-supplied kernel.
comm | - MPI communicator | |
m | - number of local rows (or PETSC_DECIDE to have calculated if M is given) | |
n | - number of local columns (or PETSC_DECIDE to have calculated if N is given) | |
M | - number of global rows (or PETSC_DETERMINE to have calculated if m is given) | |
N | - number of global columns (or PETSC_DETERMINE to have calculated if n is given) | |
spacedim | - dimension of the space coordinates | |
coords | - coordinates of the points | |
cdist | - whether or not coordinates are distributed | |
kernel | - computational kernel (or NULL) | |
kernelctx | - kernel context | |
eta | - admissibility condition tolerance | |
leafsize | - leaf size in cluster tree | |
basisord | - approximation order for Chebychev interpolation of low-rank blocks |
nA | - matrix |
-mat_h2opus_leafsize <PetscInt> | ||
-mat_h2opus_eta <PetscReal> | ||
-mat_h2opus_order <PetscInt> | - Chebychev approximation order | |
-mat_h2opus_normsamples <PetscInt> | - Maximum bumber of samples to be when estimating norms |