Actual source code: veckok.kokkos.cxx
1: /*
2: Implements the sequential Kokkos vectors.
3: */
4: #include <petscvec_kokkos.hpp>
6: #include <petsc/private/sfimpl.h>
7: #include <petsc/private/petscimpl.h>
8: #include <petscmath.h>
9: #include <petscviewer.h>
10: #include <KokkosBlas.hpp>
12: #include <petscerror.h>
13: #include <../src/vec/vec/impls/dvecimpl.h>
14: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
16: #if defined(PETSC_USE_DEBUG)
17: #define VecErrorIfNotKokkos(v) \
18: do { \
19: PetscBool isKokkos = PETSC_FALSE; \
20: PetscObjectTypeCompareAny((PetscObject)(v),&isKokkos,VECSEQKOKKOS,VECMPIKOKKOS,VECKOKKOS,""); \
22: } while (0)
23: #else
24: #define VecErrorIfNotKokkos(v) do {(void)(v);} while (0)
25: #endif
27: template<class MemorySpace>
28: PetscErrorCode VecGetKokkosView_Private(Vec v,PetscScalarKokkosViewType<MemorySpace>* kv,PetscBool overwrite)
29: {
30: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
32: VecErrorIfNotKokkos(v);
33: if (!overwrite) veckok->v_dual.sync<MemorySpace>(); /* If overwrite=true, no need to sync the space, since caller will overwrite the data */
34: *kv = veckok->v_dual.view<MemorySpace>();
35: return 0;
36: }
38: template<class MemorySpace>
39: PetscErrorCode VecRestoreKokkosView_Private(Vec v,PetscScalarKokkosViewType<MemorySpace>* kv,PetscBool overwrite)
40: {
41: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
43: VecErrorIfNotKokkos(v);
44: if (overwrite) veckok->v_dual.clear_sync_state(); /* If overwrite=true, clear the old sync state since user forced an overwrite */
45: veckok->v_dual.modify<MemorySpace>();
46: PetscObjectStateIncrease((PetscObject)v);
47: return 0;
48: }
50: template<class MemorySpace>
51: PetscErrorCode VecGetKokkosView(Vec v,ConstPetscScalarKokkosViewType<MemorySpace>* kv)
52: {
53: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
54: VecErrorIfNotKokkos(v);
55: veckok->v_dual.sync<MemorySpace>(); /* Sync the space for caller to read */
56: *kv = veckok->v_dual.view<MemorySpace>();
57: return 0;
58: }
60: /* Function template explicit instantiation */
61: template PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView (Vec,ConstPetscScalarKokkosView*);
62: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView (Vec v,PetscScalarKokkosView* kv) {return VecGetKokkosView_Private(v,kv,PETSC_FALSE);}
63: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosView (Vec v,PetscScalarKokkosView* kv) {return VecRestoreKokkosView_Private(v,kv,PETSC_FALSE);}
64: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosViewWrite (Vec v,PetscScalarKokkosView* kv) {return VecGetKokkosView_Private(v,kv,PETSC_TRUE);}
65: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosViewWrite(Vec v,PetscScalarKokkosView* kv) {return VecRestoreKokkosView_Private(v,kv,PETSC_TRUE);}
67: #if !defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HOST) /* Get host views if the default memory space is not host space */
68: template PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView (Vec,ConstPetscScalarKokkosViewHost*);
69: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosView (Vec v,PetscScalarKokkosViewHost* kv) {return VecGetKokkosView_Private(v,kv,PETSC_FALSE);}
70: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosView (Vec v,PetscScalarKokkosViewHost* kv) {return VecRestoreKokkosView_Private(v,kv,PETSC_FALSE);}
71: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecGetKokkosViewWrite (Vec v,PetscScalarKokkosViewHost* kv) {return VecGetKokkosView_Private(v,kv,PETSC_TRUE);}
72: template<> PETSC_VISIBILITY_PUBLIC PetscErrorCode VecRestoreKokkosViewWrite(Vec v,PetscScalarKokkosViewHost* kv) {return VecRestoreKokkosView_Private(v,kv,PETSC_TRUE);}
73: #endif
75: PetscErrorCode VecSetRandom_SeqKokkos(Vec xin,PetscRandom r)
76: {
77: const PetscInt n = xin->map->n;
78: PetscScalar *xx;
80: VecGetArrayWrite(xin,&xx); /* TODO: generate randoms directly on device */
81: for (PetscInt i=0; i<n; i++) PetscRandomGetValue(r,&xx[i]);
82: VecRestoreArrayWrite(xin,&xx);
83: return 0;
84: }
86: /* x = |x| */
87: PetscErrorCode VecAbs_SeqKokkos(Vec xin)
88: {
89: PetscScalarKokkosView xv;
91: PetscLogGpuTimeBegin();
92: VecGetKokkosView(xin,&xv);
93: KokkosBlas::abs(xv,xv);
94: VecRestoreKokkosView(xin,&xv);
95: PetscLogGpuTimeEnd();
96: return 0;
97: }
99: /* x = 1/x */
100: PetscErrorCode VecReciprocal_SeqKokkos(Vec xin)
101: {
102: PetscScalarKokkosView xv;
104: PetscLogGpuTimeBegin();
105: VecGetKokkosView(xin,&xv);
106: Kokkos::parallel_for(xv.extent(0),KOKKOS_LAMBDA(const int64_t i) {if (xv(i) != (PetscScalar)0.0) xv(i) = (PetscScalar)1.0/xv(i);});
107: VecRestoreKokkosView(xin,&xv);
108: PetscLogGpuTimeEnd();
109: return 0;
110: }
112: PetscErrorCode VecMin_SeqKokkos(Vec xin,PetscInt *p,PetscReal *val)
113: {
114: typedef Kokkos::MinLoc<PetscReal,PetscInt>::value_type MinLocValue_t;
115: ConstPetscScalarKokkosView xv;
116: MinLocValue_t minloc;
118: PetscLogGpuTimeBegin();
119: VecGetKokkosView(xin,&xv);
120: Kokkos::parallel_reduce("VecMin",xin->map->n,KOKKOS_LAMBDA(PetscInt i,MinLocValue_t& lminloc) {
121: if (PetscRealPart(xv(i)) < lminloc.val) {
122: lminloc.val = PetscRealPart(xv(i));
123: lminloc.loc = i;
124: }
125: },Kokkos::MinLoc<PetscReal,PetscInt>(minloc)); /* Kokkos will set minloc properly even if xin is zero-lengthed */
126: if (p) *p = minloc.loc;
127: *val = minloc.val;
128: VecRestoreKokkosView(xin,&xv);
129: PetscLogGpuTimeEnd();
130: return 0;
131: }
133: PetscErrorCode VecMax_SeqKokkos(Vec xin,PetscInt *p,PetscReal *val)
134: {
135: typedef Kokkos::MaxLoc<PetscReal,PetscInt>::value_type MaxLocValue_t;
136: ConstPetscScalarKokkosView xv;
137: MaxLocValue_t maxloc;
139: PetscLogGpuTimeBegin();
140: VecGetKokkosView(xin,&xv);
141: Kokkos::parallel_reduce("VecMax",xin->map->n,KOKKOS_LAMBDA(PetscInt i,MaxLocValue_t& lmaxloc) {
142: if (PetscRealPart(xv(i)) > lmaxloc.val) {
143: lmaxloc.val = PetscRealPart(xv(i));
144: lmaxloc.loc = i;
145: }
146: },Kokkos::MaxLoc<PetscReal,PetscInt>(maxloc));
147: if (p) *p = maxloc.loc;
148: *val = maxloc.val;
149: VecRestoreKokkosView(xin,&xv);
150: PetscLogGpuTimeEnd();
151: return 0;
152: }
154: PetscErrorCode VecSum_SeqKokkos(Vec xin,PetscScalar* sum)
155: {
156: ConstPetscScalarKokkosView xv;
158: PetscLogGpuTimeBegin();
159: VecGetKokkosView(xin,&xv);
160: *sum = KokkosBlas::sum(xv);
161: VecRestoreKokkosView(xin,&xv);
162: PetscLogGpuTimeEnd();
163: return 0;
164: }
166: PetscErrorCode VecShift_SeqKokkos(Vec xin,PetscScalar shift)
167: {
168: PetscScalarKokkosView xv;
170: PetscLogGpuTimeBegin();
171: VecGetKokkosView(xin,&xv);
172: Kokkos::parallel_for("VecShift",xin->map->n,KOKKOS_LAMBDA(PetscInt i) {xv(i) += shift;});
173: VecRestoreKokkosView(xin,&xv);
174: PetscLogGpuTimeEnd();
175: return 0;
176: }
178: /* y = alpha x + y */
179: PetscErrorCode VecAXPY_SeqKokkos(Vec yin,PetscScalar alpha,Vec xin)
180: {
181: PetscBool xiskok,yiskok;
182: PetscScalarKokkosView yv;
183: ConstPetscScalarKokkosView xv;
185: if (alpha == (PetscScalar)0.0) return 0;
186: if (yin == xin) {
187: VecScale_SeqKokkos(yin,alpha+1);
188: return 0;
189: }
190: PetscObjectTypeCompareAny((PetscObject)xin,&xiskok,VECSEQKOKKOS,VECMPIKOKKOS,"");
191: PetscObjectTypeCompareAny((PetscObject)yin,&yiskok,VECSEQKOKKOS,VECMPIKOKKOS,"");
192: if (xiskok && yiskok) {
193: PetscLogGpuTimeBegin();
194: VecGetKokkosView(xin,&xv);
195: VecGetKokkosView(yin,&yv);
196: KokkosBlas::axpy(alpha,xv,yv);
197: VecRestoreKokkosView(xin,&xv);
198: VecRestoreKokkosView(yin,&yv);
199: PetscLogGpuTimeEnd();
200: PetscLogGpuFlops(2.0*yin->map->n);
201: } else {
202: VecAXPY_Seq(yin,alpha,xin);
203: }
204: return 0;
205: }
207: /* y = x + beta y */
208: PetscErrorCode VecAYPX_SeqKokkos(Vec yin,PetscScalar beta,Vec xin)
209: {
210: /* One needs to define KOKKOSBLAS_OPTIMIZATION_LEVEL_AXPBY > 2 to have optimizations for cases alpha/beta = 0,+/-1 */
211: VecAXPBY_SeqKokkos(yin,1.0,beta,xin);
212: return 0;
213: }
215: /* z = y^T x */
216: PetscErrorCode VecTDot_SeqKokkos(Vec xin,Vec yin,PetscScalar *z)
217: {
218: ConstPetscScalarKokkosView xv,yv;
220: PetscLogGpuTimeBegin();
221: VecGetKokkosView(xin,&xv);
222: VecGetKokkosView(yin,&yv);
223: Kokkos::parallel_reduce("VecTDot",xin->map->n,KOKKOS_LAMBDA(int64_t i, PetscScalar& update) {
224: update += yv(i)*xv(i);
225: },*z); /* Kokkos always overwrites z, so no need to init it */
226: VecRestoreKokkosView(yin,&yv);
227: VecRestoreKokkosView(xin,&xv);
228: PetscLogGpuTimeEnd();
229: if (xin->map->n > 0) PetscLogGpuFlops(2.0*xin->map->n);
230: return 0;
231: }
233: struct TransposeDotTag {};
234: struct ConjugateDotTag {};
236: struct MDotFunctor {
237: /* Note the C++ notation for an array typedef */
238: typedef PetscScalar value_type[];
239: typedef ConstPetscScalarKokkosView::size_type size_type;
241: /* Tell Kokkos the result array's number of entries. This must be a public value in the functor */
242: const size_type value_count;
243: ConstPetscScalarKokkosView xv,yv[8];
245: MDotFunctor(ConstPetscScalarKokkosView& xv,
246: const PetscInt ny, /* Number of valid entries in yv[8]. 1 <= ny <= 8 */
247: ConstPetscScalarKokkosView& yv0, ConstPetscScalarKokkosView& yv1,
248: ConstPetscScalarKokkosView& yv2, ConstPetscScalarKokkosView& yv3,
249: ConstPetscScalarKokkosView& yv4, ConstPetscScalarKokkosView& yv5,
250: ConstPetscScalarKokkosView& yv6, ConstPetscScalarKokkosView& yv7)
251: : value_count(ny),xv(xv)
252: {
253: yv[0] = yv0; yv[1] = yv1;
254: yv[2] = yv2; yv[3] = yv3;
255: yv[4] = yv4; yv[5] = yv5;
256: yv[6] = yv6; yv[7] = yv7;
257: }
259: KOKKOS_INLINE_FUNCTION void operator() (TransposeDotTag,const size_type i,value_type sum) const
260: {
261: PetscScalar xval = xv(i);
262: for (size_type j = 0; j<value_count; ++j) sum[j] += yv[j](i)*xval;
263: }
265: KOKKOS_INLINE_FUNCTION void operator() (ConjugateDotTag,const size_type i,value_type sum) const
266: {
267: PetscScalar xval = xv(i);
268: for (size_type j = 0; j<value_count; ++j) sum[j] += PetscConj(yv[j](i))*xval;
269: }
271: KOKKOS_INLINE_FUNCTION void join (volatile value_type dst,const volatile value_type src) const
272: {
273: for (size_type j = 0; j<value_count; j++) dst[j] += src[j];
274: }
276: KOKKOS_INLINE_FUNCTION void init (value_type sum) const
277: {
278: for (size_type j = 0; j<value_count; j++) sum[j] = 0.0;
279: }
280: };
282: template<class WorkTag>
283: PetscErrorCode VecMultiDot_Private(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
284: {
285: PetscInt i,j,cur=0,ngroup=nv/8,rem=nv%8,N=xin->map->n;
286: ConstPetscScalarKokkosView xv,yv[8];
287: PetscScalarKokkosViewHost zv(z,nv);
289: VecGetKokkosView(xin,&xv);
290: for (i=0; i<ngroup; i++) { /* 8 y's per group */
291: for (j=0; j<8; j++) VecGetKokkosView(yin[cur+j],&yv[j]);
292: MDotFunctor mdot(xv,8,yv[0],yv[1],yv[2],yv[3],yv[4],yv[5],yv[6],yv[7]); /* Hope Kokkos make it asynchronous */
293: Kokkos::parallel_reduce(Kokkos::RangePolicy<WorkTag>(0,N),mdot,Kokkos::subview(zv,Kokkos::pair<PetscInt,PetscInt>(cur,cur+8)));
294: for (j=0; j<8; j++) VecRestoreKokkosView(yin[cur+j],&yv[j]);
295: cur += 8;
296: }
298: if (rem) { /* The remaining */
299: for (j=0; j<rem; j++) VecGetKokkosView(yin[cur+j],&yv[j]);
300: MDotFunctor mdot(xv,rem,yv[0],yv[1],yv[2],yv[3],yv[4],yv[5],yv[6],yv[7]);
301: Kokkos::parallel_reduce(Kokkos::RangePolicy<WorkTag>(0,N),mdot,Kokkos::subview(zv,Kokkos::pair<PetscInt,PetscInt>(cur,cur+rem)));
302: for (j=0; j<rem; j++) VecRestoreKokkosView(yin[cur+j],&yv[j]);
303: }
304: VecRestoreKokkosView(xin,&xv);
305: Kokkos::fence(); /* If reduce is async, then we need this fence to make sure z is ready for use on host */
306: return 0;
307: }
309: /* z[i] = (x,y_i) = y_i^H x */
310: PetscErrorCode VecMDot_SeqKokkos(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
311: {
312: VecMultiDot_Private<ConjugateDotTag>(xin,nv,yin,z);
313: return 0;
314: }
316: /* z[i] = (x,y_i) = y_i^T x */
317: PetscErrorCode VecMTDot_SeqKokkos(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
318: {
319: VecMultiDot_Private<TransposeDotTag>(xin,nv,yin,z);
320: return 0;
321: }
323: /* x[:] = alpha */
324: PetscErrorCode VecSet_SeqKokkos(Vec xin,PetscScalar alpha)
325: {
326: PetscScalarKokkosView xv;
328: PetscLogGpuTimeBegin();
329: VecGetKokkosViewWrite(xin,&xv);
330: KokkosBlas::fill(xv,alpha);
331: VecRestoreKokkosViewWrite(xin,&xv);
332: PetscLogGpuTimeEnd();
333: return 0;
334: }
336: /* x = alpha x */
337: PetscErrorCode VecScale_SeqKokkos(Vec xin,PetscScalar alpha)
338: {
339: PetscScalarKokkosView xv;
341: if (alpha == (PetscScalar)0.0) {
342: VecSet_SeqKokkos(xin,alpha);
343: } else if (alpha != (PetscScalar)1.0) {
344: PetscLogGpuTimeBegin();
345: VecGetKokkosView(xin,&xv);
346: KokkosBlas::scal(xv,alpha,xv);
347: VecRestoreKokkosView(xin,&xv);
348: PetscLogGpuTimeEnd();
349: PetscLogGpuFlops(xin->map->n);
350: }
351: return 0;
352: }
354: /* z = y^H x */
355: PetscErrorCode VecDot_SeqKokkos(Vec xin,Vec yin,PetscScalar *z)
356: {
357: ConstPetscScalarKokkosView xv,yv;
359: PetscLogGpuTimeBegin();
360: VecGetKokkosView(xin,&xv);
361: VecGetKokkosView(yin,&yv);
362: *z = KokkosBlas::dot(yv,xv); /* KokkosBlas::dot(a,b) takes conjugate of a */
363: VecRestoreKokkosView(xin,&xv);
364: VecRestoreKokkosView(yin,&yv);
365: PetscLogGpuTimeEnd();
366: if (xin->map->n > 0) PetscLogGpuFlops(2.0*xin->map->n-1);
367: return 0;
368: }
370: /* y = x, where x is VECKOKKOS, but y may be not */
371: PetscErrorCode VecCopy_SeqKokkos(Vec xin,Vec yin)
372: {
373: PetscLogGpuTimeBegin();
374: if (xin != yin) {
375: Vec_Kokkos *xkok = static_cast<Vec_Kokkos*>(xin->spptr);
376: if (yin->offloadmask == PETSC_OFFLOAD_KOKKOS) {
377: /* y is also a VecKokkos */
378: Vec_Kokkos *ykok = static_cast<Vec_Kokkos*>(yin->spptr);
379: /* Kokkos rule: if x's host has newer data, it will copy to y's host view; otherwise to y's device view
380: In case x's host is newer, y's device is newer, it will error (though should not, I think). So we just
381: clear y's sync state.
382: */
383: ykok->v_dual.clear_sync_state();
384: Kokkos::deep_copy(ykok->v_dual,xkok->v_dual);
385: } else {
386: PetscScalar *yarray;
387: VecGetArrayWrite(yin,&yarray);
388: PetscScalarKokkosViewHost yv(yarray,yin->map->n);
389: if (xkok->v_dual.need_sync_host()) { /* x's device has newer data */
390: Kokkos::deep_copy(yv,xkok->v_dual.view_device());
391: } else {
392: Kokkos::deep_copy(yv,xkok->v_dual.view_host());
393: }
394: VecRestoreArrayWrite(yin,&yarray);
395: }
396: }
397: PetscLogGpuTimeEnd();
398: return 0;
399: }
401: /* y[i] <--> x[i] */
402: PetscErrorCode VecSwap_SeqKokkos(Vec xin,Vec yin)
403: {
404: PetscScalarKokkosView xv,yv;
406: if (xin != yin) {
407: PetscLogGpuTimeBegin();
408: VecGetKokkosView(xin,&xv);
409: VecGetKokkosView(yin,&yv);
410: Kokkos::parallel_for(xin->map->n,KOKKOS_LAMBDA(const int64_t i) {
411: PetscScalar tmp = xv(i);
412: xv(i) = yv(i);
413: yv(i) = tmp;
414: });
415: VecRestoreKokkosView(xin,&xv);
416: VecRestoreKokkosView(yin,&yv);
417: PetscLogGpuTimeEnd();
418: }
419: return 0;
420: }
422: /* w = alpha x + y */
423: PetscErrorCode VecWAXPY_SeqKokkos(Vec win,PetscScalar alpha,Vec xin, Vec yin)
424: {
425: ConstPetscScalarKokkosView xv,yv;
426: PetscScalarKokkosView wv;
428: if (alpha == (PetscScalar)0.0) {
429: VecCopy_SeqKokkos(yin,win);
430: } else {
431: PetscLogGpuTimeBegin();
432: VecGetKokkosViewWrite(win,&wv);
433: VecGetKokkosView(xin,&xv);
434: VecGetKokkosView(yin,&yv);
435: Kokkos::parallel_for(win->map->n,KOKKOS_LAMBDA(const int64_t i) {wv(i) = alpha*xv(i) + yv(i);});
436: VecRestoreKokkosView(xin,&xv);
437: VecRestoreKokkosView(yin,&yv);
438: VecRestoreKokkosViewWrite(win,&wv);
439: PetscLogGpuTimeEnd();
440: PetscLogGpuFlops(2*win->map->n);
441: }
442: return 0;
443: }
445: struct MAXPYFunctor {
446: typedef ConstPetscScalarKokkosView::size_type size_type;
448: PetscScalarKokkosView yv;
449: PetscInt nx; /* Significent entries in a[8] and xv[8] */
450: PetscScalar a[8];
451: ConstPetscScalarKokkosView xv[8];
453: MAXPYFunctor(PetscScalarKokkosView yv,
454: PetscInt nx,
455: PetscScalar a0,PetscScalar a1,PetscScalar a2,PetscScalar a3,
456: PetscScalar a4,PetscScalar a5,PetscScalar a6,PetscScalar a7,
457: ConstPetscScalarKokkosView xv0, ConstPetscScalarKokkosView xv1,
458: ConstPetscScalarKokkosView xv2, ConstPetscScalarKokkosView xv3,
459: ConstPetscScalarKokkosView xv4, ConstPetscScalarKokkosView xv5,
460: ConstPetscScalarKokkosView xv6, ConstPetscScalarKokkosView xv7)
461: : yv(yv),nx(nx)
462: {
463: a[0] = a0; a[1] = a1;
464: a[2] = a2; a[3] = a3;
465: a[4] = a4; a[5] = a5;
466: a[6] = a6; a[7] = a7;
467: xv[0] = xv0; xv[1] = xv1;
468: xv[2] = xv2; xv[3] = xv3;
469: xv[4] = xv4; xv[5] = xv5;
470: xv[6] = xv6; xv[7] = xv7;
471: }
473: KOKKOS_INLINE_FUNCTION void operator() (const size_type i) const
474: {
475: for (PetscInt j = 0; j<nx; ++j) yv(i) += a[j]*xv[j](i);
476: }
477: };
479: /* y = y + sum alpha[i] x[i] */
480: PetscErrorCode VecMAXPY_SeqKokkos(Vec yin, PetscInt nv,const PetscScalar *alpha,Vec *xin)
481: {
482: PetscInt i,j,cur=0,ngroup=nv/8,rem=nv%8;
483: PetscScalarKokkosView yv;
484: PetscScalar a[8];
485: ConstPetscScalarKokkosView xv[8];
487: VecGetKokkosView(yin,&yv);
488: for (i=0; i<ngroup; i++) { /* 8 x's per group */
489: for (j=0; j<8; j++) { /* Fill the parameters */
490: a[j] = alpha[cur+j];
491: VecGetKokkosView(xin[cur+j],&xv[j]);
492: }
493: MAXPYFunctor maxpy(yv,8,a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7],xv[0],xv[1],xv[2],xv[3],xv[4],xv[5],xv[6],xv[7]);
494: Kokkos::parallel_for(yin->map->n,maxpy);
495: for (j=0; j<8; j++) VecRestoreKokkosView(xin[cur+j],&xv[j]);
496: cur += 8;
497: }
499: if (rem) { /* The remaining */
500: for (j=0; j<rem; j++) {
501: a[j] = alpha[cur+j];
502: VecGetKokkosView(xin[cur+j],&xv[j]);
503: }
504: MAXPYFunctor maxpy(yv,rem,a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7],xv[0],xv[1],xv[2],xv[3],xv[4],xv[5],xv[6],xv[7]);
505: Kokkos::parallel_for(yin->map->n,maxpy);
506: for (j=0; j<rem; j++) VecRestoreKokkosView(xin[cur+j],&xv[j]);
507: }
508: VecRestoreKokkosView(yin,&yv);
509: PetscLogGpuFlops(nv*2.0*yin->map->n);
510: return 0;
511: }
513: /* y = alpha x + beta y */
514: PetscErrorCode VecAXPBY_SeqKokkos(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
515: {
516: ConstPetscScalarKokkosView xv;
517: PetscScalarKokkosView yv;
518: PetscBool xiskok,yiskok;
520: PetscObjectTypeCompareAny((PetscObject)xin,&xiskok,VECSEQKOKKOS,VECMPIKOKKOS,"");
521: PetscObjectTypeCompareAny((PetscObject)yin,&yiskok,VECSEQKOKKOS,VECMPIKOKKOS,"");
522: if (xiskok && yiskok) {
523: PetscLogGpuTimeBegin();
524: VecGetKokkosView(xin,&xv);
525: VecGetKokkosView(yin,&yv);
526: KokkosBlas::axpby(alpha,xv,beta,yv);
527: VecRestoreKokkosView(xin,&xv);
528: VecRestoreKokkosView(yin,&yv);
529: PetscLogGpuTimeEnd();
530: if (alpha == (PetscScalar)0.0 || beta == (PetscScalar)0.0) {
531: PetscLogGpuFlops(xin->map->n);
532: } else if (beta == (PetscScalar)1.0 || alpha == (PetscScalar)1.0) {
533: PetscLogGpuFlops(2.0*xin->map->n);
534: } else {
535: PetscLogGpuFlops(3.0*xin->map->n);
536: }
537: } else {
538: VecAXPBY_Seq(yin,alpha,beta,xin);
539: }
540: return 0;
541: }
543: /* z = alpha x + beta y + gamma z */
544: PetscErrorCode VecAXPBYPCZ_SeqKokkos(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
545: {
546: ConstPetscScalarKokkosView xv,yv;
547: PetscScalarKokkosView zv;
549: PetscLogGpuTimeBegin();
550: VecGetKokkosView(zin,&zv);
551: VecGetKokkosView(xin,&xv);
552: VecGetKokkosView(yin,&yv);
553: KokkosBlas::update(alpha,xv,beta,yv,gamma,zv);
554: VecRestoreKokkosView(xin,&xv);
555: VecRestoreKokkosView(yin,&yv);
556: VecRestoreKokkosView(zin,&zv);
557: PetscLogGpuTimeEnd();
558: PetscLogGpuFlops(zin->map->n*5);
559: return 0;
560: }
562: /* w = x*y. Any subset of the x, y, and w may be the same vector.
564: w is of type VecKokkos, but x, y may be not.
565: */
566: PetscErrorCode VecPointwiseMult_SeqKokkos(Vec win,Vec xin,Vec yin)
567: {
568: PetscInt n;
570: PetscLogGpuTimeBegin();
571: VecGetLocalSize(win,&n);
572: if (xin->offloadmask != PETSC_OFFLOAD_KOKKOS || yin->offloadmask != PETSC_OFFLOAD_KOKKOS) {
573: PetscScalarKokkosViewHost wv;
574: const PetscScalar *xp,*yp;
575: VecGetArrayRead(xin,&xp);
576: VecGetArrayRead(yin,&yp);
577: VecGetKokkosViewWrite(win,&wv);
579: ConstPetscScalarKokkosViewHost xv(xp,n),yv(yp,n);
580: Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0,n),KOKKOS_LAMBDA(const PetscInt i) {wv(i) = xv(i)*yv(i);});
582: VecRestoreArrayRead(xin,&xp);
583: VecRestoreArrayRead(yin,&yp);
584: VecRestoreKokkosViewWrite(win,&wv);
585: } else {
586: ConstPetscScalarKokkosView xv,yv;
587: PetscScalarKokkosView wv;
589: VecGetKokkosViewWrite(win,&wv);
590: VecGetKokkosView(xin,&xv);
591: VecGetKokkosView(yin,&yv);
592: Kokkos::parallel_for(n,KOKKOS_LAMBDA(const PetscInt i) {wv(i) = xv(i)*yv(i);});
593: VecRestoreKokkosView(yin,&yv);
594: VecRestoreKokkosView(xin,&xv);
595: VecRestoreKokkosViewWrite(win,&wv);
596: }
597: PetscLogGpuTimeEnd();
598: PetscLogGpuFlops(n);
599: return 0;
600: }
602: /* w = x/y */
603: PetscErrorCode VecPointwiseDivide_SeqKokkos(Vec win,Vec xin,Vec yin)
604: {
605: PetscInt n;
607: PetscLogGpuTimeBegin();
608: VecGetLocalSize(win,&n);
609: if (xin->offloadmask != PETSC_OFFLOAD_KOKKOS || yin->offloadmask != PETSC_OFFLOAD_KOKKOS) {
610: PetscScalarKokkosViewHost wv;
611: const PetscScalar *xp,*yp;
612: VecGetArrayRead(xin,&xp);
613: VecGetArrayRead(yin,&yp);
614: VecGetKokkosViewWrite(win,&wv);
616: ConstPetscScalarKokkosViewHost xv(xp,n),yv(yp,n);
617: Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0,n),KOKKOS_LAMBDA(const PetscInt i) {
618: if (yv(i) != 0.0) wv(i) = xv(i)/yv(i);
619: else wv(i) = 0.0;
620: });
622: VecRestoreArrayRead(xin,&xp);
623: VecRestoreArrayRead(yin,&yp);
624: VecRestoreKokkosViewWrite(win,&wv);
625: } else {
626: ConstPetscScalarKokkosView xv,yv;
627: PetscScalarKokkosView wv;
629: VecGetKokkosViewWrite(win,&wv);
630: VecGetKokkosView(xin,&xv);
631: VecGetKokkosView(yin,&yv);
632: Kokkos::parallel_for(n,KOKKOS_LAMBDA(const PetscInt i) {
633: if (yv(i) != 0.0) wv(i) = xv(i)/yv(i);
634: else wv(i) = 0.0;
635: });
636: VecRestoreKokkosView(yin,&yv);
637: VecRestoreKokkosView(xin,&xv);
638: VecRestoreKokkosViewWrite(win,&wv);
639: }
640: PetscLogGpuTimeEnd();
641: PetscLogGpuFlops(win->map->n);
642: return 0;
643: }
645: PetscErrorCode VecNorm_SeqKokkos(Vec xin,NormType type,PetscReal *z)
646: {
647: const PetscInt n = xin->map->n;
648: ConstPetscScalarKokkosView xv;
650: if (type == NORM_1_AND_2) {
651: VecNorm_SeqKokkos(xin,NORM_1,z);
652: VecNorm_SeqKokkos(xin,NORM_2,z+1);
653: } else {
654: PetscLogGpuTimeBegin();
655: VecGetKokkosView(xin,&xv);
656: if (type == NORM_2 || type == NORM_FROBENIUS) {
657: *z = KokkosBlas::nrm2(xv);
658: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
659: } else if (type == NORM_1) {
660: *z = KokkosBlas::nrm1(xv);
661: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
662: } else if (type == NORM_INFINITY) {
663: *z = KokkosBlas::nrminf(xv);
664: }
665: VecRestoreKokkosView(xin,&xv);
666: PetscLogGpuTimeEnd();
667: }
668: return 0;
669: }
671: /* A functor for DotNorm2 so that we can compute dp and nm in one kernel */
672: struct DotNorm2 {
673: typedef PetscScalar value_type[];
674: typedef ConstPetscScalarKokkosView::size_type size_type;
676: size_type value_count;
677: ConstPetscScalarKokkosView xv_, yv_; /* first and second vectors in VecDotNorm2. The order matters. */
679: DotNorm2(ConstPetscScalarKokkosView& xv,ConstPetscScalarKokkosView& yv) :
680: value_count(2), xv_(xv), yv_(yv) {}
682: KOKKOS_INLINE_FUNCTION void operator() (const size_type i, value_type result) const
683: {
684: result[0] += PetscConj(yv_(i))*xv_(i);
685: result[1] += PetscConj(yv_(i))*yv_(i);
686: }
688: KOKKOS_INLINE_FUNCTION void join (volatile value_type dst, const volatile value_type src) const
689: {
690: dst[0] += src[0];
691: dst[1] += src[1];
692: }
694: KOKKOS_INLINE_FUNCTION void init (value_type result) const
695: {
696: result[0] = 0.0;
697: result[1] = 0.0;
698: }
699: };
701: /* dp = y^H x, nm = y^H y */
702: PetscErrorCode VecDotNorm2_SeqKokkos(Vec xin, Vec yin, PetscScalar *dp, PetscScalar *nm)
703: {
704: ConstPetscScalarKokkosView xv,yv;
705: PetscScalar result[2];
707: PetscLogGpuTimeBegin();
708: VecGetKokkosView(xin,&xv);
709: VecGetKokkosView(yin,&yv);
710: DotNorm2 dn(xv,yv);
711: Kokkos::parallel_reduce(xin->map->n,dn,result);
712: *dp = result[0];
713: *nm = result[1];
714: VecRestoreKokkosView(yin,&yv);
715: VecRestoreKokkosView(xin,&xv);
716: PetscLogGpuTimeEnd();
717: PetscLogGpuFlops(4.0*xin->map->n);
718: return 0;
719: }
721: PetscErrorCode VecConjugate_SeqKokkos(Vec xin)
722: {
723: #if defined(PETSC_USE_COMPLEX)
724: PetscScalarKokkosView xv;
726: PetscLogGpuTimeBegin();
727: VecGetKokkosView(xin,&xv);
728: Kokkos::parallel_for(xin->map->n,KOKKOS_LAMBDA(int64_t i) {xv(i) = PetscConj(xv(i));});
729: VecRestoreKokkosView(xin,&xv);
730: PetscLogGpuTimeEnd();
731: #else
732: #endif
733: return 0;
734: }
736: /* Temporarily replace the array in vin with a[]. Return to the original array with a call to VecResetArray() */
737: PetscErrorCode VecPlaceArray_SeqKokkos(Vec vin,const PetscScalar *a)
738: {
739: Vec_Seq *vecseq = (Vec_Seq*)vin->data;
740: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(vin->spptr);
742: VecPlaceArray_Seq(vin,a);
743: veckok->UpdateArray<Kokkos::HostSpace>(vecseq->array);
744: return 0;
745: }
747: PetscErrorCode VecResetArray_SeqKokkos(Vec vin)
748: {
749: Vec_Seq *vecseq = (Vec_Seq*)vin->data;
750: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(vin->spptr);
752: veckok->v_dual.sync_host(); /* User wants to unhook the provided host array. Sync it so that user can get the latest */
753: VecResetArray_Seq(vin); /* Swap back the old host array, assuming its has the latest value */
754: veckok->UpdateArray<Kokkos::HostSpace>(vecseq->array);
755: return 0;
756: }
758: /* Replace the array in vin with a[] that must be allocated by PetscMalloc. a[] is owned by vin afterwords. */
759: PetscErrorCode VecReplaceArray_SeqKokkos(Vec vin,const PetscScalar *a)
760: {
761: Vec_Seq *vecseq = (Vec_Seq*)vin->data;
762: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(vin->spptr);
764: /* Make sure the users array has the latest values */
765: if (vecseq->array != vecseq->array_allocated) veckok->v_dual.sync_host();
766: VecReplaceArray_Seq(vin,a);
767: veckok->UpdateArray<Kokkos::HostSpace>(vecseq->array);
768: return 0;
769: }
771: /* Maps the local portion of vector v into vector w */
772: PetscErrorCode VecGetLocalVector_SeqKokkos(Vec v,Vec w)
773: {
774: Vec_Seq *vecseq = static_cast<Vec_Seq*>(w->data);
775: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(w->spptr);
778: /* Destroy w->data, w->spptr */
779: if (vecseq) {
780: PetscFree(vecseq->array_allocated);
781: PetscFree(w->data);
782: }
783: delete veckok;
785: /* Replace with v's */
786: w->data = v->data;
787: w->spptr = v->spptr;
788: PetscObjectStateIncrease((PetscObject)w);
789: return 0;
790: }
792: PetscErrorCode VecRestoreLocalVector_SeqKokkos(Vec v,Vec w)
793: {
795: v->data = w->data;
796: v->spptr = w->spptr;
797: PetscObjectStateIncrease((PetscObject)v);
798: /* TODO: need to think if setting w->data/spptr to NULL is safe */
799: w->data = NULL;
800: w->spptr = NULL;
801: return 0;
802: }
804: PetscErrorCode VecGetArray_SeqKokkos(Vec v,PetscScalar **a)
805: {
806: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
808: veckok->v_dual.sync_host();
809: *a = *((PetscScalar**)v->data);
810: return 0;
811: }
813: PetscErrorCode VecRestoreArray_SeqKokkos(Vec v,PetscScalar **a)
814: {
815: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
817: veckok->v_dual.modify_host();
818: return 0;
819: }
821: /* Get array on host to overwrite, so no need to sync host. In VecRestoreArrayWrite() we will mark host is modified. */
822: PetscErrorCode VecGetArrayWrite_SeqKokkos(Vec v,PetscScalar **a)
823: {
824: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
826: veckok->v_dual.clear_sync_state();
827: *a = veckok->v_dual.view_host().data();
828: return 0;
829: }
831: PetscErrorCode VecGetArrayAndMemType_SeqKokkos(Vec v,PetscScalar** a,PetscMemType *mtype)
832: {
833: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
835: if (std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) {
836: *a = veckok->v_dual.view_host().data();
837: if (mtype) *mtype = PETSC_MEMTYPE_HOST;
838: } else {
839: /* When there is device, we always return up-to-date device data */
840: veckok->v_dual.sync_device();
841: *a = veckok->v_dual.view_device().data();
842: if (mtype) *mtype = PETSC_MEMTYPE_KOKKOS;
843: }
844: return 0;
845: }
847: PetscErrorCode VecRestoreArrayAndMemType_SeqKokkos(Vec v,PetscScalar** a)
848: {
849: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
851: if (std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) {
852: veckok->v_dual.modify_host();
853: } else {
854: veckok->v_dual.modify_device();
855: }
856: return 0;
857: }
859: PetscErrorCode VecGetArrayWriteAndMemType_SeqKokkos(Vec v,PetscScalar** a,PetscMemType *mtype)
860: {
861: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
863: if (std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) {
864: *a = veckok->v_dual.view_host().data();
865: if (mtype) *mtype = PETSC_MEMTYPE_HOST;
866: } else {
867: /* When there is device, we always return device data (but no need to sync the device) */
868: veckok->v_dual.clear_sync_state(); /* So that in restore, we can safely modify_device() */
869: *a = veckok->v_dual.view_device().data();
870: if (mtype) *mtype = PETSC_MEMTYPE_KOKKOS;
871: }
872: return 0;
873: }
875: /* Copy xin's sync state to y */
876: static PetscErrorCode VecCopySyncState_Kokkos_Private(Vec xin,Vec yout)
877: {
878: Vec_Kokkos *xkok = static_cast<Vec_Kokkos*>(xin->spptr);
879: Vec_Kokkos *ykok = static_cast<Vec_Kokkos*>(yout->spptr);
881: ykok->v_dual.clear_sync_state();
882: if (xkok->v_dual.need_sync_host()) {
883: ykok->v_dual.modify_device();
884: } else if (xkok->v_dual.need_sync_device()) {
885: ykok->v_dual.modify_host();
886: }
887: return 0;
888: }
890: /* Interal routine shared by VecGetSubVector_{SeqKokkos,MPIKokkos} */
891: PetscErrorCode VecGetSubVector_Kokkos_Private(Vec x,PetscBool xIsMPI,IS is,Vec *y)
892: {
893: PetscBool contig;
894: PetscInt n,N,start,bs;
895: MPI_Comm comm;
896: Vec z;
898: PetscObjectGetComm((PetscObject)x,&comm);
899: ISGetLocalSize(is,&n);
900: ISGetSize(is,&N);
901: VecGetSubVectorContiguityAndBS_Private(x,is,&contig,&start,&bs);
903: if (contig) { /* We can do a no-copy (in-place) implementation with y sharing x's arrays */
904: Vec_Kokkos *xkok = static_cast<Vec_Kokkos*>(x->spptr);
905: const PetscScalar *array_h = xkok->v_dual.view_host().data() + start;
906: const PetscScalar *array_d = xkok->v_dual.view_device().data() + start;
908: /* These calls assume the input arrays are synced */
909: if (xIsMPI) VecCreateMPIKokkosWithArrays_Private(comm,bs,n,N,array_h,array_d,&z); /* x could be MPI even when x's comm size = 1 */
910: else VecCreateSeqKokkosWithArrays_Private(comm,bs,n,array_h,array_d,&z);
912: VecCopySyncState_Kokkos_Private(x,z); /* Copy x's sync state to z */
914: /* This is relevant only in debug mode */
915: PetscInt state = 0;
916: VecLockGet(x,&state);
917: if (state) { /* x is either in read or read/write mode, therefore z, overlapped with x, can only be in read mode */
918: VecLockReadPush(z);
919: }
921: z->ops->placearray = NULL; /* z's arrays can't be replaced, because z does not own them */
922: z->ops->replacearray = NULL;
924: } else { /* Have to create a VecScatter and a stand-alone vector */
925: VecGetSubVectorThroughVecScatter_Private(x,is,bs,&z);
926: }
927: *y = z;
928: return 0;
929: }
931: PetscErrorCode VecGetSubVector_SeqKokkos(Vec x,IS is,Vec *y)
932: {
933: VecGetSubVector_Kokkos_Private(x,PETSC_FALSE,is,y);
934: return 0;
935: }
937: /* Restore subvector y to x */
938: PetscErrorCode VecRestoreSubVector_SeqKokkos(Vec x,IS is,Vec *y)
939: {
940: VecScatter vscat;
941: PETSC_UNUSED PetscObjectState dummystate = 0;
942: PetscBool unchanged;
944: PetscObjectComposedDataGetInt((PetscObject)*y,VecGetSubVectorSavedStateId,dummystate,unchanged);
945: if (unchanged) return 0; /* If y's state has not changed since VecGetSubVector(), we only need to destroy it */
947: PetscObjectQuery((PetscObject)*y,"VecGetSubVector_Scatter",(PetscObject*)&vscat);
948: if (vscat) {
949: VecScatterBegin(vscat,*y,x,INSERT_VALUES,SCATTER_REVERSE);
950: VecScatterEnd(vscat,*y,x,INSERT_VALUES,SCATTER_REVERSE);
951: } else { /* y and x's (host and device) arrays overlap */
952: Vec_Kokkos *xkok = static_cast<Vec_Kokkos*>(x->spptr);
953: Vec_Kokkos *ykok = static_cast<Vec_Kokkos*>((*y)->spptr);
954: PetscInt state;
956: VecLockGet(x,&state);
959: /* The tricky part: one has to carefully sync the arrays */
960: if (xkok->v_dual.need_sync_device()) { /* x's host has newer data */
961: ykok->v_dual.sync_host(); /* Move y's latest values to host (since y is just a subset of x) */
962: } else if (xkok->v_dual.need_sync_host()) { /* x's device has newer data */
963: ykok->v_dual.sync_device(); /* Move y's latest data to device */
964: } else { /* x's host and device data is already sync'ed; Copy y's sync state to x */
965: VecCopySyncState_Kokkos_Private(*y,x);
966: }
967: PetscObjectStateIncrease((PetscObject)x); /* Since x is updated */
968: }
969: return 0;
970: }
972: static PetscErrorCode VecSetOps_SeqKokkos(Vec v)
973: {
974: v->ops->abs = VecAbs_SeqKokkos;
975: v->ops->reciprocal = VecReciprocal_SeqKokkos;
976: v->ops->pointwisemult = VecPointwiseMult_SeqKokkos;
977: v->ops->min = VecMin_SeqKokkos;
978: v->ops->max = VecMax_SeqKokkos;
979: v->ops->sum = VecSum_SeqKokkos;
980: v->ops->shift = VecShift_SeqKokkos;
981: v->ops->norm = VecNorm_SeqKokkos;
982: v->ops->scale = VecScale_SeqKokkos;
983: v->ops->copy = VecCopy_SeqKokkos;
984: v->ops->set = VecSet_SeqKokkos;
985: v->ops->swap = VecSwap_SeqKokkos;
986: v->ops->axpy = VecAXPY_SeqKokkos;
987: v->ops->axpby = VecAXPBY_SeqKokkos;
988: v->ops->axpbypcz = VecAXPBYPCZ_SeqKokkos;
989: v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
990: v->ops->setrandom = VecSetRandom_SeqKokkos;
992: v->ops->dot = VecDot_SeqKokkos;
993: v->ops->tdot = VecTDot_SeqKokkos;
994: v->ops->mdot = VecMDot_SeqKokkos;
995: v->ops->mtdot = VecMTDot_SeqKokkos;
997: v->ops->dot_local = VecDot_SeqKokkos;
998: v->ops->tdot_local = VecTDot_SeqKokkos;
999: v->ops->mdot_local = VecMDot_SeqKokkos;
1000: v->ops->mtdot_local = VecMTDot_SeqKokkos;
1002: v->ops->norm_local = VecNorm_SeqKokkos;
1003: v->ops->maxpy = VecMAXPY_SeqKokkos;
1004: v->ops->aypx = VecAYPX_SeqKokkos;
1005: v->ops->waxpy = VecWAXPY_SeqKokkos;
1006: v->ops->dotnorm2 = VecDotNorm2_SeqKokkos;
1007: v->ops->placearray = VecPlaceArray_SeqKokkos;
1008: v->ops->replacearray = VecReplaceArray_SeqKokkos;
1009: v->ops->resetarray = VecResetArray_SeqKokkos;
1010: v->ops->destroy = VecDestroy_SeqKokkos;
1011: v->ops->duplicate = VecDuplicate_SeqKokkos;
1012: v->ops->conjugate = VecConjugate_SeqKokkos;
1013: v->ops->getlocalvector = VecGetLocalVector_SeqKokkos;
1014: v->ops->restorelocalvector = VecRestoreLocalVector_SeqKokkos;
1015: v->ops->getlocalvectorread = VecGetLocalVector_SeqKokkos;
1016: v->ops->restorelocalvectorread = VecRestoreLocalVector_SeqKokkos;
1017: v->ops->getarraywrite = VecGetArrayWrite_SeqKokkos;
1018: v->ops->getarray = VecGetArray_SeqKokkos;
1019: v->ops->restorearray = VecRestoreArray_SeqKokkos;
1021: v->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqKokkos;
1022: v->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqKokkos;
1023: v->ops->getarraywriteandmemtype= VecGetArrayWriteAndMemType_SeqKokkos;
1024: v->ops->getsubvector = VecGetSubVector_SeqKokkos;
1025: v->ops->restoresubvector = VecRestoreSubVector_SeqKokkos;
1026: return 0;
1027: }
1029: /*MC
1030: VECSEQKOKKOS - VECSEQKOKKOS = "seqkokkos" - The basic sequential vector, modified to use Kokkos
1032: Options Database Keys:
1033: . -vec_type seqkokkos - sets the vector type to VECSEQKOKKOS during a call to VecSetFromOptions()
1035: Level: beginner
1037: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI()
1038: M*/
1039: PetscErrorCode VecCreate_SeqKokkos(Vec v)
1040: {
1041: Vec_Seq *vecseq;
1042: Vec_Kokkos *veckok;
1044: PetscKokkosInitializeCheck();
1045: PetscLayoutSetUp(v->map);
1046: VecCreate_Seq(v); /* Build a sequential vector, allocate array */
1047: PetscObjectChangeTypeName((PetscObject)v,VECSEQKOKKOS);
1048: VecSetOps_SeqKokkos(v);
1051: vecseq = static_cast<Vec_Seq*>(v->data);
1052: veckok = new Vec_Kokkos(v->map->n,vecseq->array,NULL); /* Let host claim it has the latest data (zero) */
1053: v->spptr = static_cast<void*>(veckok);
1054: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
1055: return 0;
1056: }
1058: /*@C
1059: VecCreateSeqKokkosWithArray - Creates a Kokkos sequential array-style vector,
1060: where the user provides the array space to store the vector values. The array
1061: provided must be a device array.
1063: Collective
1065: Input Parameters:
1066: + comm - the communicator, should be PETSC_COMM_SELF
1067: . bs - the block size
1068: . n - the vector length
1069: - array - device memory where the vector elements are to be stored.
1071: Output Parameter:
1072: . v - the vector
1074: Notes:
1075: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1076: same type as an existing vector.
1078: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1079: The user should not free the array until the vector is destroyed.
1081: Level: intermediate
1083: .seealso: VecCreateMPICUDAWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1084: VecCreateGhost(), VecCreateSeq(), VecCreateSeqWithArray(),
1085: VecCreateMPIWithArray()
1086: @*/
1087: PetscErrorCode VecCreateSeqKokkosWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar darray[],Vec *v)
1088: {
1089: PetscMPIInt size;
1090: Vec w;
1091: Vec_Kokkos *veckok = NULL;
1092: PetscScalar *harray;
1094: MPI_Comm_size(comm,&size);
1097: PetscKokkosInitializeCheck();
1098: VecCreate(comm,&w);
1099: VecSetSizes(w,n,n);
1100: VecSetBlockSize(w,bs);
1101: if (!darray) { /* Allocate memory ourself if user provided NULL */
1102: VecSetType(w,VECSEQKOKKOS);
1103: } else {
1104: /* Build a VECSEQ, get its harray, and then build Vec_Kokkos along with darray */
1105: if (std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) {
1106: harray = const_cast<PetscScalar*>(darray);
1107: VecCreate_Seq_Private(w,harray); /* Build a sequential vector with harray */
1108: } else {
1109: VecSetType(w,VECSEQ);
1110: harray = static_cast<Vec_Seq*>(w->data)->array;
1111: }
1112: PetscObjectChangeTypeName((PetscObject)w,VECSEQKOKKOS); /* Change it to Kokkos */
1113: VecSetOps_SeqKokkos(w);
1114: veckok = new Vec_Kokkos(n,harray,const_cast<PetscScalar*>(darray));
1115: veckok->v_dual.modify_device(); /* Mark the device is modified */
1116: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
1117: w->spptr = static_cast<void*>(veckok);
1118: }
1119: *v = w;
1120: return 0;
1121: }
1123: /*
1124: VecCreateSeqKokkosWithArrays_Private - Creates a Kokkos sequential array-style vector
1125: with user-provided arrays on host and device.
1127: Collective
1129: Input Parameter:
1130: + comm - the communicator, should be PETSC_COMM_SELF
1131: . bs - the block size
1132: . n - the vector length
1133: . harray - host memory where the vector elements are to be stored.
1134: - darray - device memory where the vector elements are to be stored.
1136: Output Parameter:
1137: . v - the vector
1139: Notes:
1140: Unlike VecCreate{Seq,MPI}CUDAWithArrays(), this routine is private since we do not expect users to use it directly.
1142: If there is no device, then harray and darray must be the same.
1143: If n is not zero, then harray and darray must be allocated.
1144: After the call, the created vector is supposed to be in a synchronized state, i.e.,
1145: we suppose harray and darray have the same data.
1147: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1148: Caller should not free the array until the vector is destroyed.
1149: */
1150: PetscErrorCode VecCreateSeqKokkosWithArrays_Private(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar harray[],const PetscScalar darray[],Vec *v)
1151: {
1152: PetscMPIInt size;
1153: Vec w;
1155: PetscKokkosInitializeCheck();
1156: MPI_Comm_size(comm,&size);
1158: if (n) {
1161: }
1164: VecCreateSeqWithArray(comm,bs,n,harray,&w);
1165: PetscObjectChangeTypeName((PetscObject)w,VECSEQKOKKOS); /* Change it to Kokkos */
1166: VecSetOps_SeqKokkos(w);
1167: w->spptr = new Vec_Kokkos(n,const_cast<PetscScalar*>(harray),const_cast<PetscScalar*>(darray));
1168: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
1169: *v = w;
1170: return 0;
1171: }
1173: /* TODO: ftn-auto generates veckok.kokkosf.c */
1174: /*@C
1175: VecCreateSeqKokkos - Creates a standard, sequential array-style vector.
1177: Collective
1179: Input Parameter:
1180: + comm - the communicator, should be PETSC_COMM_SELF
1181: - n - the vector length
1183: Output Parameter:
1184: . v - the vector
1186: Notes:
1187: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1188: same type as an existing vector.
1190: Level: intermediate
1192: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1193: @*/
1194: PetscErrorCode VecCreateSeqKokkos(MPI_Comm comm,PetscInt n,Vec *v)
1195: {
1196: PetscKokkosInitializeCheck();
1197: VecCreate(comm,v);
1198: VecSetSizes(*v,n,n);
1199: VecSetType(*v,VECSEQKOKKOS); /* Calls VecCreate_SeqKokkos */
1200: return 0;
1201: }
1203: /* Duplicate layout etc but not the values in the input vector */
1204: PetscErrorCode VecDuplicate_SeqKokkos(Vec win,Vec *v)
1205: {
1206: VecDuplicate_Seq(win,v); /* It also dups ops of win */
1207: return 0;
1208: }
1210: PetscErrorCode VecDestroy_SeqKokkos(Vec v)
1211: {
1212: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
1213: Vec_Seq *vecseq = static_cast<Vec_Seq*>(v->data);
1215: delete veckok;
1216: v->spptr = NULL;
1217: if (vecseq) VecDestroy_Seq(v);
1218: return 0;
1219: }