HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_vector_types.h
1/*
2Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
28#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
29#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
30
32
33#if defined(__HIPCC_RTC__)
34 #define __HOST_DEVICE__ __device__
35#else
36 #define __HOST_DEVICE__ __host__ __device__
37#endif
38
39#if defined(__has_attribute)
40 #if __has_attribute(ext_vector_type)
41 #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n)))
42 #else
43 #define __NATIVE_VECTOR__(n, T) T[n]
44 #endif
45
46#if defined(__cplusplus)
47#if !defined(__HIPCC_RTC__)
48 #include <array>
49 #include <iosfwd>
50 #include <type_traits>
51#else
52namespace std {
53using ::size_t;
54
55template <class _Tp, _Tp __v> struct integral_constant {
56 static constexpr const _Tp value = __v;
57 typedef _Tp value_type;
58 typedef integral_constant type;
59 constexpr operator value_type() const { return value; }
60 constexpr value_type operator()() const { return value; }
61};
62template <class _Tp, _Tp __v> constexpr const _Tp integral_constant<_Tp, __v>::value;
63
64typedef integral_constant<bool, true> true_type;
65typedef integral_constant<bool, false> false_type;
66
67template <bool B> using bool_constant = integral_constant<bool, B>;
68typedef bool_constant<true> true_type;
69typedef bool_constant<false> false_type;
70
71template <bool __B, class __T = void> struct enable_if {};
72template <class __T> struct enable_if<true, __T> { typedef __T type; };
73
74template<bool _B> struct true_or_false_type : public false_type {};
75template<> struct true_or_false_type<true> : public true_type {};
76
77template <class _Tp> struct is_integral : public false_type {};
78template <> struct is_integral<bool> : public true_type {};
79template <> struct is_integral<char> : public true_type {};
80template <> struct is_integral<signed char> : public true_type {};
81template <> struct is_integral<unsigned char> : public true_type {};
82template <> struct is_integral<wchar_t> : public true_type {};
83template <> struct is_integral<short> : public true_type {};
84template <> struct is_integral<unsigned short> : public true_type {};
85template <> struct is_integral<int> : public true_type {};
86template <> struct is_integral<unsigned int> : public true_type {};
87template <> struct is_integral<long> : public true_type {};
88template <> struct is_integral<unsigned long> : public true_type {};
89template <> struct is_integral<long long> : public true_type {};
90template <> struct is_integral<unsigned long long> : public true_type {};
91
92template <class _Tp> struct is_arithmetic : public false_type {};
93template <> struct is_arithmetic<bool> : public true_type {};
94template <> struct is_arithmetic<char> : public true_type {};
95template <> struct is_arithmetic<signed char> : public true_type {};
96template <> struct is_arithmetic<unsigned char> : public true_type {};
97template <> struct is_arithmetic<wchar_t> : public true_type {};
98template <> struct is_arithmetic<short> : public true_type {};
99template <> struct is_arithmetic<unsigned short> : public true_type {};
100template <> struct is_arithmetic<int> : public true_type {};
101template <> struct is_arithmetic<unsigned int> : public true_type {};
102template <> struct is_arithmetic<long> : public true_type {};
103template <> struct is_arithmetic<unsigned long> : public true_type {};
104template <> struct is_arithmetic<long long> : public true_type {};
105template <> struct is_arithmetic<unsigned long long> : public true_type {};
106template <> struct is_arithmetic<float> : public true_type {};
107template <> struct is_arithmetic<double> : public true_type {};
108
109template<typename _Tp> struct is_floating_point : public false_type {};
110template<> struct is_floating_point<float> : public true_type {};
111template<> struct is_floating_point<double> : public true_type {};
112template<> struct is_floating_point<long double> : public true_type {};
113
114template <typename __T, typename __U> struct is_same : public false_type {};
115template <typename __T> struct is_same<__T, __T> : public true_type {};
116
117template<typename _Tp, bool = is_arithmetic<_Tp>::value>
118 struct is_signed : public false_type {};
119template<typename _Tp>
120 struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {};
121
122template <class _T1, class _T2> struct is_convertible
123 : public true_or_false_type<__is_convertible_to(_T1, _T2)> {};
124
125template<typename _CharT> struct char_traits;
126template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
127template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
128typedef basic_istream<char> istream;
129typedef basic_ostream<char> ostream;
130
131template <typename __T> struct is_scalar : public integral_constant<bool, __is_scalar(__T)> {};
132} // Namespace std.
133#endif // defined(__HIPCC_RTC__)
134
135 namespace hip_impl {
136 inline
137 constexpr
138 unsigned int next_pot(unsigned int x) {
139 // Precondition: x > 1.
140 return 1u << (32u - __builtin_clz(x - 1u));
141 }
142 } // Namespace hip_impl.
143
144 template<typename T, unsigned int n> struct HIP_vector_base;
145
146 template<typename T>
147 struct HIP_vector_base<T, 1> {
148 using Native_vec_ = __NATIVE_VECTOR__(1, T);
149
150 union {
151 Native_vec_ data;
152 struct {
153 T x;
154 };
155 };
156
157 using value_type = T;
158
159 __HOST_DEVICE__
160 HIP_vector_base() = default;
161 __HOST_DEVICE__
162 explicit
163 constexpr
164 HIP_vector_base(T x_) noexcept : data{x_} {}
165 __HOST_DEVICE__
166 constexpr
167 HIP_vector_base(const HIP_vector_base&) = default;
168 __HOST_DEVICE__
169 constexpr
170 HIP_vector_base(HIP_vector_base&&) = default;
171 __HOST_DEVICE__
172 ~HIP_vector_base() = default;
173 __HOST_DEVICE__
174 HIP_vector_base& operator=(const HIP_vector_base&) = default;
175 };
176
177 template<typename T>
178 struct HIP_vector_base<T, 2> {
179 using Native_vec_ = __NATIVE_VECTOR__(2, T);
180
181 union
182 #if !__has_attribute(ext_vector_type)
183 alignas(hip_impl::next_pot(2 * sizeof(T)))
184 #endif
185 {
186 Native_vec_ data;
187 struct {
188 T x;
189 T y;
190 };
191 };
192
193 using value_type = T;
194
195 __HOST_DEVICE__
196 HIP_vector_base() = default;
197 __HOST_DEVICE__
198 explicit
199 constexpr
200 HIP_vector_base(T x_) noexcept : data{x_, x_} {}
201 __HOST_DEVICE__
202 constexpr
203 HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
204 __HOST_DEVICE__
205 constexpr
206 HIP_vector_base(const HIP_vector_base&) = default;
207 __HOST_DEVICE__
208 constexpr
209 HIP_vector_base(HIP_vector_base&&) = default;
210 __HOST_DEVICE__
211 ~HIP_vector_base() = default;
212 __HOST_DEVICE__
213 HIP_vector_base& operator=(const HIP_vector_base&) = default;
214 };
215
216 template<typename T>
217 struct HIP_vector_base<T, 3> {
218 struct Native_vec_ {
219 T d[3];
220
221 __HOST_DEVICE__
222 Native_vec_() = default;
223
224 __HOST_DEVICE__
225 explicit
226 constexpr
227 Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
228 __HOST_DEVICE__
229 constexpr
230 Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
231 __HOST_DEVICE__
232 constexpr
233 Native_vec_(const Native_vec_&) = default;
234 __HOST_DEVICE__
235 constexpr
236 Native_vec_(Native_vec_&&) = default;
237 __HOST_DEVICE__
238 ~Native_vec_() = default;
239
240 __HOST_DEVICE__
241 Native_vec_& operator=(const Native_vec_&) = default;
242 __HOST_DEVICE__
243 Native_vec_& operator=(Native_vec_&&) = default;
244
245 __HOST_DEVICE__
246 T& operator[](unsigned int idx) noexcept { return d[idx]; }
247 __HOST_DEVICE__
248 T operator[](unsigned int idx) const noexcept { return d[idx]; }
249
250 __HOST_DEVICE__
251 Native_vec_& operator+=(const Native_vec_& x_) noexcept
252 {
253 for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
254 return *this;
255 }
256 __HOST_DEVICE__
257 Native_vec_& operator-=(const Native_vec_& x_) noexcept
258 {
259 for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
260 return *this;
261 }
262
263 __HOST_DEVICE__
264 Native_vec_& operator*=(const Native_vec_& x_) noexcept
265 {
266 for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
267 return *this;
268 }
269 __HOST_DEVICE__
270 Native_vec_& operator/=(const Native_vec_& x_) noexcept
271 {
272 for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
273 return *this;
274 }
275
276 template<
277 typename U = T,
278 typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
279 __HOST_DEVICE__
280 Native_vec_ operator-() const noexcept
281 {
282 auto r{*this};
283 for (auto&& x : r.d) x = -x;
284 return r;
285 }
286
287 template<
288 typename U = T,
289 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
290 __HOST_DEVICE__
291 Native_vec_ operator~() const noexcept
292 {
293 auto r{*this};
294 for (auto&& x : r.d) x = ~x;
295 return r;
296 }
297 template<
298 typename U = T,
299 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
300 __HOST_DEVICE__
301 Native_vec_& operator%=(const Native_vec_& x_) noexcept
302 {
303 for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
304 return *this;
305 }
306 template<
307 typename U = T,
308 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
309 __HOST_DEVICE__
310 Native_vec_& operator^=(const Native_vec_& x_) noexcept
311 {
312 for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
313 return *this;
314 }
315 template<
316 typename U = T,
317 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
318 __HOST_DEVICE__
319 Native_vec_& operator|=(const Native_vec_& x_) noexcept
320 {
321 for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
322 return *this;
323 }
324 template<
325 typename U = T,
326 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
327 __HOST_DEVICE__
328 Native_vec_& operator&=(const Native_vec_& x_) noexcept
329 {
330 for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
331 return *this;
332 }
333 template<
334 typename U = T,
335 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
336 __HOST_DEVICE__
337 Native_vec_& operator>>=(const Native_vec_& x_) noexcept
338 {
339 for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
340 return *this;
341 }
342 template<
343 typename U = T,
344 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
345 __HOST_DEVICE__
346 Native_vec_& operator<<=(const Native_vec_& x_) noexcept
347 {
348 for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
349 return *this;
350 }
351#if defined (__INTEL_COMPILER)
352 typedef struct {
353 int values[4];
354 } _Vec3_cmp;
355 using Vec3_cmp = _Vec3_cmp;
356#else
357 using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
358#endif //INTEL
359 __HOST_DEVICE__
360 Vec3_cmp operator==(const Native_vec_& x_) const noexcept
361 {
362 return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
363 }
364 };
365
366 union {
367 Native_vec_ data;
368 struct {
369 T x;
370 T y;
371 T z;
372 };
373 };
374
375 using value_type = T;
376
377 __HOST_DEVICE__
378 HIP_vector_base() = default;
379 __HOST_DEVICE__
380 explicit
381 constexpr
382 HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
383 __HOST_DEVICE__
384 constexpr
385 HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
386 __HOST_DEVICE__
387 constexpr
388 HIP_vector_base(const HIP_vector_base&) = default;
389 __HOST_DEVICE__
390 constexpr
391 HIP_vector_base(HIP_vector_base&&) = default;
392 __HOST_DEVICE__
393 ~HIP_vector_base() = default;
394
395 __HOST_DEVICE__
396 HIP_vector_base& operator=(const HIP_vector_base&) = default;
397 __HOST_DEVICE__
398 HIP_vector_base& operator=(HIP_vector_base&&) = default;
399 };
400
401 template<typename T>
402 struct HIP_vector_base<T, 4> {
403 using Native_vec_ = __NATIVE_VECTOR__(4, T);
404
405 union
406 #if !__has_attribute(ext_vector_type)
407 alignas(hip_impl::next_pot(4 * sizeof(T)))
408 #endif
409 {
410 Native_vec_ data;
411 struct {
412 T x;
413 T y;
414 T z;
415 T w;
416 };
417 };
418
419 using value_type = T;
420
421 __HOST_DEVICE__
422 HIP_vector_base() = default;
423 __HOST_DEVICE__
424 explicit
425 constexpr
426 HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
427 __HOST_DEVICE__
428 constexpr
429 HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
430 __HOST_DEVICE__
431 constexpr
432 HIP_vector_base(const HIP_vector_base&) = default;
433 __HOST_DEVICE__
434 constexpr
435 HIP_vector_base(HIP_vector_base&&) = default;
436 __HOST_DEVICE__
437 ~HIP_vector_base() = default;
438 __HOST_DEVICE__
439 HIP_vector_base& operator=(const HIP_vector_base&) = default;
440 };
441
442 template<typename T, unsigned int rank>
443 struct HIP_vector_type : public HIP_vector_base<T, rank> {
444 using HIP_vector_base<T, rank>::data;
445 using typename HIP_vector_base<T, rank>::Native_vec_;
446
447 __HOST_DEVICE__
448 HIP_vector_type() = default;
449 template<
450 typename U,
451 typename std::enable_if<
452 std::is_convertible<U, T>::value>::type* = nullptr>
453 __HOST_DEVICE__
454 explicit
455 constexpr
456 HIP_vector_type(U x_) noexcept
457 : HIP_vector_base<T, rank>{static_cast<T>(x_)}
458 {}
459 template< // TODO: constrain based on type as well.
460 typename... Us,
461 typename std::enable_if<
462 (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
463 __HOST_DEVICE__
464 constexpr
465 HIP_vector_type(Us... xs) noexcept
466 : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
467 {}
468 __HOST_DEVICE__
469 constexpr
470 HIP_vector_type(const HIP_vector_type&) = default;
471 __HOST_DEVICE__
472 constexpr
473 HIP_vector_type(HIP_vector_type&&) = default;
474 __HOST_DEVICE__
475 ~HIP_vector_type() = default;
476
477 __HOST_DEVICE__
478 HIP_vector_type& operator=(const HIP_vector_type&) = default;
479 __HOST_DEVICE__
480 HIP_vector_type& operator=(HIP_vector_type&&) = default;
481
482 // Operators
483 __HOST_DEVICE__
484 HIP_vector_type& operator++() noexcept
485 {
486 return *this += HIP_vector_type{1};
487 }
488 __HOST_DEVICE__
489 HIP_vector_type operator++(int) noexcept
490 {
491 auto tmp(*this);
492 ++*this;
493 return tmp;
494 }
495
496 __HOST_DEVICE__
497 HIP_vector_type& operator--() noexcept
498 {
499 return *this -= HIP_vector_type{1};
500 }
501 __HOST_DEVICE__
502 HIP_vector_type operator--(int) noexcept
503 {
504 auto tmp(*this);
505 --*this;
506 return tmp;
507 }
508
509 __HOST_DEVICE__
510 HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
511 {
512 data += x.data;
513 return *this;
514 }
515 template<
516 typename U,
517 typename std::enable_if<
518 std::is_convertible<U, T>{}>::type* = nullptr>
519 __HOST_DEVICE__
520 HIP_vector_type& operator+=(U x) noexcept
521 {
522 return *this += HIP_vector_type{x};
523 }
524
525 __HOST_DEVICE__
526 HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
527 {
528 data -= x.data;
529 return *this;
530 }
531 template<
532 typename U,
533 typename std::enable_if<
534 std::is_convertible<U, T>{}>::type* = nullptr>
535 __HOST_DEVICE__
536 HIP_vector_type& operator-=(U x) noexcept
537 {
538 return *this -= HIP_vector_type{x};
539 }
540
541 __HOST_DEVICE__
542 HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
543 {
544 data *= x.data;
545 return *this;
546 }
547
548 friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*(
549 HIP_vector_type x, const HIP_vector_type& y) noexcept
550 {
551 return HIP_vector_type{ x } *= y;
552 }
553
554 template<
555 typename U,
556 typename std::enable_if<
557 std::is_convertible<U, T>{}>::type* = nullptr>
558 __HOST_DEVICE__
559 HIP_vector_type& operator*=(U x) noexcept
560 {
561 return *this *= HIP_vector_type{x};
562 }
563
564 friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/(
565 HIP_vector_type x, const HIP_vector_type& y) noexcept
566 {
567 return HIP_vector_type{ x } /= y;
568 }
569
570 __HOST_DEVICE__
571 HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
572 {
573 data /= x.data;
574 return *this;
575 }
576 template<
577 typename U,
578 typename std::enable_if<
579 std::is_convertible<U, T>{}>::type* = nullptr>
580 __HOST_DEVICE__
581 HIP_vector_type& operator/=(U x) noexcept
582 {
583 return *this /= HIP_vector_type{x};
584 }
585
586 template<
587 typename U = T,
588 typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
589 __HOST_DEVICE__
590 HIP_vector_type operator-() const noexcept
591 {
592 auto tmp(*this);
593 tmp.data = -tmp.data;
594 return tmp;
595 }
596
597 template<
598 typename U = T,
599 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
600 __HOST_DEVICE__
601 HIP_vector_type operator~() const noexcept
602 {
603 HIP_vector_type r{*this};
604 r.data = ~r.data;
605 return r;
606 }
607
608 template<
609 typename U = T,
610 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
611 __HOST_DEVICE__
612 HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
613 {
614 data %= x.data;
615 return *this;
616 }
617
618 template<
619 typename U = T,
620 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
621 __HOST_DEVICE__
622 HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
623 {
624 data ^= x.data;
625 return *this;
626 }
627
628 template<
629 typename U = T,
630 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
631 __HOST_DEVICE__
632 HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
633 {
634 data |= x.data;
635 return *this;
636 }
637
638 template<
639 typename U = T,
640 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
641 __HOST_DEVICE__
642 HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
643 {
644 data &= x.data;
645 return *this;
646 }
647
648 template<
649 typename U = T,
650 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
651 __HOST_DEVICE__
652 HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
653 {
654 data >>= x.data;
655 return *this;
656 }
657
658 template<
659 typename U = T,
660 typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
661 __HOST_DEVICE__
662 HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
663 {
664 data <<= x.data;
665 return *this;
666 }
667 };
668
669 template<typename T, unsigned int n>
670 __HOST_DEVICE__
671 inline
672 constexpr
673 HIP_vector_type<T, n> operator+(
674 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
675 {
676 return HIP_vector_type<T, n>{x} += y;
677 }
678 template<typename T, unsigned int n, typename U>
679 __HOST_DEVICE__
680 inline
681 constexpr
682 HIP_vector_type<T, n> operator+(
683 const HIP_vector_type<T, n>& x, U y) noexcept
684 {
685 return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
686 }
687 template<typename T, unsigned int n, typename U>
688 __HOST_DEVICE__
689 inline
690 constexpr
691 HIP_vector_type<T, n> operator+(
692 U x, const HIP_vector_type<T, n>& y) noexcept
693 {
694 return HIP_vector_type<T, n>{x} += y;
695 }
696
697 template<typename T, unsigned int n>
698 __HOST_DEVICE__
699 inline
700 constexpr
701 HIP_vector_type<T, n> operator-(
702 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
703 {
704 return HIP_vector_type<T, n>{x} -= y;
705 }
706 template<typename T, unsigned int n, typename U>
707 __HOST_DEVICE__
708 inline
709 constexpr
710 HIP_vector_type<T, n> operator-(
711 const HIP_vector_type<T, n>& x, U y) noexcept
712 {
713 return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
714 }
715 template<typename T, unsigned int n, typename U>
716 __HOST_DEVICE__
717 inline
718 constexpr
719 HIP_vector_type<T, n> operator-(
720 U x, const HIP_vector_type<T, n>& y) noexcept
721 {
722 return HIP_vector_type<T, n>{x} -= y;
723 }
724
725 template<typename T, unsigned int n, typename U>
726 __HOST_DEVICE__
727 inline
728 constexpr
729 HIP_vector_type<T, n> operator*(
730 const HIP_vector_type<T, n>& x, U y) noexcept
731 {
732 return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
733 }
734 template<typename T, unsigned int n, typename U>
735 __HOST_DEVICE__
736 inline
737 constexpr
738 HIP_vector_type<T, n> operator*(
739 U x, const HIP_vector_type<T, n>& y) noexcept
740 {
741 return HIP_vector_type<T, n>{x} *= y;
742 }
743
744 template<typename T, unsigned int n, typename U>
745 __HOST_DEVICE__
746 inline
747 constexpr
748 HIP_vector_type<T, n> operator/(
749 const HIP_vector_type<T, n>& x, U y) noexcept
750 {
751 return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
752 }
753 template<typename T, unsigned int n, typename U>
754 __HOST_DEVICE__
755 inline
756 constexpr
757 HIP_vector_type<T, n> operator/(
758 U x, const HIP_vector_type<T, n>& y) noexcept
759 {
760 return HIP_vector_type<T, n>{x} /= y;
761 }
762
763 template<typename V>
764 __HOST_DEVICE__
765 inline
766 constexpr
767 bool _hip_any_zero(const V& x, int n) noexcept
768 {
769 return
770 (n == -1) ? true : ((x[n] == 0) ? false : _hip_any_zero(x, n - 1));
771 }
772
773 template<typename T, unsigned int n>
774 __HOST_DEVICE__
775 inline
776 constexpr
777 bool operator==(
778 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
779 {
780 return _hip_any_zero(x.data == y.data, n - 1);
781 }
782 template<typename T, unsigned int n, typename U>
783 __HOST_DEVICE__
784 inline
785 constexpr
786 bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
787 {
788 return x == HIP_vector_type<T, n>{y};
789 }
790 template<typename T, unsigned int n, typename U>
791 __HOST_DEVICE__
792 inline
793 constexpr
794 bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
795 {
796 return HIP_vector_type<T, n>{x} == y;
797 }
798
799 template<typename T, unsigned int n>
800 __HOST_DEVICE__
801 inline
802 constexpr
803 bool operator!=(
804 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
805 {
806 return !(x == y);
807 }
808 template<typename T, unsigned int n, typename U>
809 __HOST_DEVICE__
810 inline
811 constexpr
812 bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
813 {
814 return !(x == y);
815 }
816 template<typename T, unsigned int n, typename U>
817 __HOST_DEVICE__
818 inline
819 constexpr
820 bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
821 {
822 return !(x == y);
823 }
824
825 template<
826 typename T,
827 unsigned int n,
828 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
829 __HOST_DEVICE__
830 inline
831 constexpr
832 HIP_vector_type<T, n> operator%(
833 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
834 {
835 return HIP_vector_type<T, n>{x} %= y;
836 }
837 template<
838 typename T,
839 unsigned int n,
840 typename U,
841 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
842 __HOST_DEVICE__
843 inline
844 constexpr
845 HIP_vector_type<T, n> operator%(
846 const HIP_vector_type<T, n>& x, U y) noexcept
847 {
848 return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
849 }
850 template<
851 typename T,
852 unsigned int n,
853 typename U,
854 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
855 __HOST_DEVICE__
856 inline
857 constexpr
858 HIP_vector_type<T, n> operator%(
859 U x, const HIP_vector_type<T, n>& y) noexcept
860 {
861 return HIP_vector_type<T, n>{x} %= y;
862 }
863
864 template<
865 typename T,
866 unsigned int n,
867 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
868 __HOST_DEVICE__
869 inline
870 constexpr
871 HIP_vector_type<T, n> operator^(
872 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
873 {
874 return HIP_vector_type<T, n>{x} ^= y;
875 }
876 template<
877 typename T,
878 unsigned int n,
879 typename U,
880 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
881 __HOST_DEVICE__
882 inline
883 constexpr
884 HIP_vector_type<T, n> operator^(
885 const HIP_vector_type<T, n>& x, U y) noexcept
886 {
887 return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
888 }
889 template<
890 typename T,
891 unsigned int n,
892 typename U,
893 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
894 __HOST_DEVICE__
895 inline
896 constexpr
897 HIP_vector_type<T, n> operator^(
898 U x, const HIP_vector_type<T, n>& y) noexcept
899 {
900 return HIP_vector_type<T, n>{x} ^= y;
901 }
902
903 template<
904 typename T,
905 unsigned int n,
906 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
907 __HOST_DEVICE__
908 inline
909 constexpr
910 HIP_vector_type<T, n> operator|(
911 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
912 {
913 return HIP_vector_type<T, n>{x} |= y;
914 }
915 template<
916 typename T,
917 unsigned int n,
918 typename U,
919 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
920 __HOST_DEVICE__
921 inline
922 constexpr
923 HIP_vector_type<T, n> operator|(
924 const HIP_vector_type<T, n>& x, U y) noexcept
925 {
926 return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
927 }
928 template<
929 typename T,
930 unsigned int n,
931 typename U,
932 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
933 __HOST_DEVICE__
934 inline
935 constexpr
936 HIP_vector_type<T, n> operator|(
937 U x, const HIP_vector_type<T, n>& y) noexcept
938 {
939 return HIP_vector_type<T, n>{x} |= y;
940 }
941
942 template<
943 typename T,
944 unsigned int n,
945 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
946 __HOST_DEVICE__
947 inline
948 constexpr
949 HIP_vector_type<T, n> operator&(
950 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
951 {
952 return HIP_vector_type<T, n>{x} &= y;
953 }
954 template<
955 typename T,
956 unsigned int n,
957 typename U,
958 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
959 __HOST_DEVICE__
960 inline
961 constexpr
962 HIP_vector_type<T, n> operator&(
963 const HIP_vector_type<T, n>& x, U y) noexcept
964 {
965 return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
966 }
967 template<
968 typename T,
969 unsigned int n,
970 typename U,
971 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
972 __HOST_DEVICE__
973 inline
974 constexpr
975 HIP_vector_type<T, n> operator&(
976 U x, const HIP_vector_type<T, n>& y) noexcept
977 {
978 return HIP_vector_type<T, n>{x} &= y;
979 }
980
981 template<
982 typename T,
983 unsigned int n,
984 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
985 __HOST_DEVICE__
986 inline
987 constexpr
988 HIP_vector_type<T, n> operator>>(
989 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
990 {
991 return HIP_vector_type<T, n>{x} >>= y;
992 }
993 template<
994 typename T,
995 unsigned int n,
996 typename U,
997 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
998 __HOST_DEVICE__
999 inline
1000 constexpr
1001 HIP_vector_type<T, n> operator>>(
1002 const HIP_vector_type<T, n>& x, U y) noexcept
1003 {
1004 return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
1005 }
1006 template<
1007 typename T,
1008 unsigned int n,
1009 typename U,
1010 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1011 __HOST_DEVICE__
1012 inline
1013 constexpr
1014 HIP_vector_type<T, n> operator>>(
1015 U x, const HIP_vector_type<T, n>& y) noexcept
1016 {
1017 return HIP_vector_type<T, n>{x} >>= y;
1018 }
1019
1020 template<
1021 typename T,
1022 unsigned int n,
1023 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1024 __HOST_DEVICE__
1025 inline
1026 constexpr
1027 HIP_vector_type<T, n> operator<<(
1028 const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
1029 {
1030 return HIP_vector_type<T, n>{x} <<= y;
1031 }
1032 template<
1033 typename T,
1034 unsigned int n,
1035 typename U,
1036 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1037 __HOST_DEVICE__
1038 inline
1039 constexpr
1040 HIP_vector_type<T, n> operator<<(
1041 const HIP_vector_type<T, n>& x, U y) noexcept
1042 {
1043 return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
1044 }
1045 template<
1046 typename T,
1047 unsigned int n,
1048 typename U,
1049 typename std::enable_if<std::is_arithmetic<U>::value>::type,
1050 typename std::enable_if<std::is_integral<T>{}>* = nullptr>
1051 __HOST_DEVICE__
1052 inline
1053 constexpr
1054 HIP_vector_type<T, n> operator<<(
1055 U x, const HIP_vector_type<T, n>& y) noexcept
1056 {
1057 return HIP_vector_type<T, n>{x} <<= y;
1058 }
1059
1060 /*
1061 * Map HIP_vector_type<U, rankU> to HIP_vector_type<T, rankT>
1062 */
1063 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1064 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 1 && rankU >= 1),
1065 const HIP_vector_type<T, rankT>>::type
1066 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1067 return HIP_vector_type<T, rankT>(static_cast<T>(u.x));
1068 };
1069
1070 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1071 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU == 1),
1072 const HIP_vector_type<T, rankT>>::type
1073 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1074 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0));
1075 };
1076
1077 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1078 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 2 && rankU >= 2),
1079 const HIP_vector_type<T, rankT>>::type
1080 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1081 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y));
1082 };
1083
1084 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1085 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 1),
1086 const HIP_vector_type<T, rankT>>::type
1087 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1088 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0),
1089 static_cast<T>(0), static_cast<T>(0));
1090 };
1091
1092 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1093 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 2),
1094 const HIP_vector_type<T, rankT>>::type
1095 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1096 return HIP_vector_type<T, rankT>(static_cast<T>(u.x), static_cast<T>(u.y),
1097 static_cast<T>(0), static_cast<T>(0));
1098 };
1099
1100 template <typename T, unsigned int rankT, typename U, unsigned int rankU>
1101 __forceinline__ __HOST_DEVICE__ typename std::enable_if<(rankT == 4 && rankU == 4),
1102 const HIP_vector_type<T, rankT>>::type
1103 __hipMapVector(const HIP_vector_type<U, rankU>& u) {
1104 return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y),
1105 static_cast<T>(u.z), static_cast<T>(u.w));
1106 };
1107
1108 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1109 using CUDA_name##1 = HIP_vector_type<T, 1>;\
1110 using CUDA_name##2 = HIP_vector_type<T, 2>;\
1111 using CUDA_name##3 = HIP_vector_type<T, 3>;\
1112 using CUDA_name##4 = HIP_vector_type<T, 4>;
1113#else
1114 #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \
1115 typedef struct {\
1116 T x;\
1117 } CUDA_name##1;\
1118 typedef struct {\
1119 T x;\
1120 T y;\
1121 } CUDA_name##2;\
1122 typedef struct {\
1123 T x;\
1124 T y;\
1125 T z;\
1126 } CUDA_name##3;\
1127 typedef struct {\
1128 T x;\
1129 T y;\
1130 T z;\
1131 T w;\
1132 } CUDA_name##4;
1133#endif
1134
1135__MAKE_VECTOR_TYPE__(uchar, unsigned char);
1136__MAKE_VECTOR_TYPE__(char, char);
1137__MAKE_VECTOR_TYPE__(ushort, unsigned short);
1138__MAKE_VECTOR_TYPE__(short, short);
1139__MAKE_VECTOR_TYPE__(uint, unsigned int);
1140__MAKE_VECTOR_TYPE__(int, int);
1141__MAKE_VECTOR_TYPE__(ulong, unsigned long);
1142__MAKE_VECTOR_TYPE__(long, long);
1143__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long);
1144__MAKE_VECTOR_TYPE__(longlong, long long);
1145__MAKE_VECTOR_TYPE__(float, float);
1146__MAKE_VECTOR_TYPE__(double, double);
1147
1148#else // !defined(__has_attribute)
1149
1150#if defined(_MSC_VER)
1151#include <mmintrin.h>
1152#include <xmmintrin.h>
1153#include <emmintrin.h>
1154#include <immintrin.h>
1155
1156/*
1157this is for compatibility with CUDA as CUDA allows accessing vector components
1158in C++ program with MSVC
1159*/
1160typedef union {
1161 struct {
1162 char x;
1163 };
1164 char data;
1165} char1;
1166typedef union {
1167 struct {
1168 char x;
1169 char y;
1170 };
1171 char data[2];
1172} char2;
1173typedef union {
1174 struct {
1175 char x;
1176 char y;
1177 char z;
1178 char w;
1179 };
1180 char data[4];
1181} char4;
1182typedef union {
1183 struct {
1184 char x;
1185 char y;
1186 char z;
1187 };
1188 char data[3];
1189} char3;
1190typedef union {
1191 __m64 data;
1192} char8;
1193typedef union {
1194 __m128i data;
1195} char16;
1196
1197typedef union {
1198 struct {
1199 unsigned char x;
1200 };
1201 unsigned char data;
1202} uchar1;
1203typedef union {
1204 struct {
1205 unsigned char x;
1206 unsigned char y;
1207 };
1208 unsigned char data[2];
1209} uchar2;
1210typedef union {
1211 struct {
1212 unsigned char x;
1213 unsigned char y;
1214 unsigned char z;
1215 unsigned char w;
1216 };
1217 unsigned char data[4];
1218} uchar4;
1219typedef union {
1220 struct {
1221 unsigned char x;
1222 unsigned char y;
1223 unsigned char z;
1224 };
1225 unsigned char data[3];
1226} uchar3;
1227typedef union {
1228 __m64 data;
1229} uchar8;
1230typedef union {
1231 __m128i data;
1232} uchar16;
1233
1234typedef union {
1235 struct {
1236 short x;
1237 };
1238 short data;
1239} short1;
1240typedef union {
1241 struct {
1242 short x;
1243 short y;
1244 };
1245 short data[2];
1246} short2;
1247typedef union {
1248 struct {
1249 short x;
1250 short y;
1251 short z;
1252 short w;
1253 };
1254 __m64 data;
1255} short4;
1256typedef union {
1257 struct {
1258 short x;
1259 short y;
1260 short z;
1261 };
1262 short data[3];
1263} short3;
1264typedef union {
1265 __m128i data;
1266} short8;
1267typedef union {
1268 __m128i data[2];
1269} short16;
1270
1271typedef union {
1272 struct {
1273 unsigned short x;
1274 };
1275 unsigned short data;
1276} ushort1;
1277typedef union {
1278 struct {
1279 unsigned short x;
1280 unsigned short y;
1281 };
1282 unsigned short data[2];
1283} ushort2;
1284typedef union {
1285 struct {
1286 unsigned short x;
1287 unsigned short y;
1288 unsigned short z;
1289 unsigned short w;
1290 };
1291 __m64 data;
1292} ushort4;
1293typedef union {
1294 struct {
1295 unsigned short x;
1296 unsigned short y;
1297 unsigned short z;
1298 };
1299 unsigned short data[3];
1300} ushort3;
1301typedef union {
1302 __m128i data;
1303} ushort8;
1304typedef union {
1305 __m128i data[2];
1306} ushort16;
1307
1308typedef union {
1309 struct {
1310 int x;
1311 };
1312 int data;
1313} int1;
1314typedef union {
1315 struct {
1316 int x;
1317 int y;
1318 };
1319 __m64 data;
1320} int2;
1321typedef union {
1322 struct {
1323 int x;
1324 int y;
1325 int z;
1326 int w;
1327 };
1328 __m128i data;
1329} int4;
1330typedef union {
1331 struct {
1332 int x;
1333 int y;
1334 int z;
1335 };
1336 int data[3];
1337} int3;
1338typedef union {
1339 __m128i data[2];
1340} int8;
1341typedef union {
1342 __m128i data[4];
1343} int16;
1344
1345typedef union {
1346 struct {
1347 unsigned int x;
1348 };
1349 unsigned int data;
1350} uint1;
1351typedef union {
1352 struct {
1353 unsigned int x;
1354 unsigned int y;
1355 };
1356 __m64 data;
1357} uint2;
1358typedef union {
1359 struct {
1360 unsigned int x;
1361 unsigned int y;
1362 unsigned int z;
1363 unsigned int w;
1364 };
1365 __m128i data;
1366} uint4;
1367typedef union {
1368 struct {
1369 unsigned int x;
1370 unsigned int y;
1371 unsigned int z;
1372 };
1373 unsigned int data[3];
1374} uint3;
1375typedef union {
1376 __m128i data[2];
1377} uint8;
1378typedef union {
1379 __m128i data[4];
1380} uint16;
1381
1382typedef union {
1383 struct {
1384 int x;
1385 };
1386 int data;
1387} long1;
1388typedef union {
1389 struct {
1390 int x;
1391 int y;
1392 };
1393 __m64 data;
1394} long2;
1395typedef union {
1396 struct {
1397 int x;
1398 int y;
1399 int z;
1400 int w;
1401 };
1402 __m128i data;
1403} long4;
1404typedef union {
1405 struct {
1406 int x;
1407 int y;
1408 int z;
1409 };
1410 int data[3];
1411} long3;
1412typedef union {
1413 __m128i data[2];
1414} long8;
1415typedef union {
1416 __m128i data[4];
1417} long16;
1418
1419typedef union {
1420 struct {
1421 unsigned int x;
1422 };
1423 unsigned int data;
1424} ulong1;
1425typedef union {
1426 struct {
1427 unsigned int x;
1428 unsigned int y;
1429 };
1430 __m64 data;
1431} ulong2;
1432typedef union {
1433 struct {
1434 unsigned int x;
1435 unsigned int y;
1436 unsigned int z;
1437 unsigned int w;
1438 };
1439 __m128i data;
1440} ulong4;
1441typedef union {
1442 struct {
1443 unsigned int x;
1444 unsigned int y;
1445 unsigned int z;
1446 };
1447 unsigned int data[3];
1448} ulong3;
1449typedef union {
1450 __m128i data[2];
1451} ulong8;
1452typedef union {
1453 __m128i data[4];
1454} ulong16;
1455
1456typedef union {
1457 struct {
1458 long long x;
1459 };
1460 __m64 data;
1461} longlong1;
1462typedef union {
1463 struct {
1464 long long x;
1465 long long y;
1466 };
1467 __m128i data;
1468} longlong2;
1469typedef union {
1470 struct {
1471 long long x;
1472 long long y;
1473 long long z;
1474 long long w;
1475 };
1476 __m128i data[2];
1477} longlong4;
1478typedef union {
1479 struct {
1480 long long x;
1481 long long y;
1482 long long z;
1483 };
1484 __m64 data[3];
1485} longlong3;
1486typedef union {
1487 __m128i data[4];
1488} longlong8;
1489typedef union {
1490 __m128i data[8];
1491} longlong16;
1492
1493typedef union {
1494 struct {
1495 __m64 x;
1496 };
1497 __m64 data;
1498} ulonglong1;
1499typedef union {
1500 struct {
1501 __m64 x;
1502 __m64 y;
1503 };
1504 __m128i data;
1505} ulonglong2;
1506typedef union {
1507 struct {
1508 __m64 x;
1509 __m64 y;
1510 __m64 z;
1511 __m64 w;
1512 };
1513 __m128i data[2];
1514} ulonglong4;
1515typedef union {
1516 struct {
1517 __m64 x;
1518 __m64 y;
1519 __m64 z;
1520 };
1521 __m64 data[3];
1522} ulonglong3;
1523typedef union {
1524 __m128i data[4];
1525} ulonglong8;
1526typedef union {
1527 __m128i data[8];
1528} ulonglong16;
1529
1530typedef union {
1531 struct {
1532 float x;
1533 };
1534 float data;
1535} float1;
1536typedef union {
1537 struct {
1538 float x;
1539 float y;
1540 };
1541 __m64 data;
1542} float2;
1543typedef union {
1544 struct {
1545 float x;
1546 float y;
1547 float z;
1548 float w;
1549 };
1550 __m128 data;
1551} float4;
1552typedef union {
1553 struct {
1554 float x;
1555 float y;
1556 float z;
1557 };
1558 float data[3];
1559} float3;
1560typedef union {
1561 __m256 data;
1562} float8;
1563typedef union {
1564 __m256 data[2];
1565} float16;
1566
1567typedef union {
1568 struct {
1569 double x;
1570 };
1571 double data;
1572} double1;
1573typedef union {
1574 struct {
1575 double x;
1576 double y;
1577 };
1578 __m128d data;
1579} double2;
1580typedef union {
1581 struct {
1582 double x;
1583 double y;
1584 double z;
1585 double w;
1586 };
1587 __m256d data;
1588} double4;
1589typedef union {
1590 struct {
1591 double x;
1592 double y;
1593 double z;
1594 };
1595 double data[3];
1596} double3;
1597typedef union {
1598 __m256d data[2];
1599} double8;
1600typedef union {
1601 __m256d data[4];
1602} double16;
1603
1604#else // !defined(_MSC_VER)
1605
1606/*
1607this is for compatibility with CUDA as CUDA allows accessing vector components
1608in C++ program with MSVC
1609*/
1610typedef union {
1611 struct {
1612 char x;
1613 };
1614 char data;
1615} char1;
1616typedef union {
1617 struct {
1618 char x;
1619 char y;
1620 };
1621 char data[2];
1622} char2;
1623typedef union {
1624 struct {
1625 char x;
1626 char y;
1627 char z;
1628 char w;
1629 };
1630 char data[4];
1631} char4;
1632typedef union {
1633 char data[8];
1634} char8;
1635typedef union {
1636 char data[16];
1637} char16;
1638typedef union {
1639 struct {
1640 char x;
1641 char y;
1642 char z;
1643 };
1644 char data[3];
1645} char3;
1646
1647typedef union {
1648 struct {
1649 unsigned char x;
1650 };
1651 unsigned char data;
1652} uchar1;
1653typedef union {
1654 struct {
1655 unsigned char x;
1656 unsigned char y;
1657 };
1658 unsigned char data[2];
1659} uchar2;
1660typedef union {
1661 struct {
1662 unsigned char x;
1663 unsigned char y;
1664 unsigned char z;
1665 unsigned char w;
1666 };
1667 unsigned char data[4];
1668} uchar4;
1669typedef union {
1670 unsigned char data[8];
1671} uchar8;
1672typedef union {
1673 unsigned char data[16];
1674} uchar16;
1675typedef union {
1676 struct {
1677 unsigned char x;
1678 unsigned char y;
1679 unsigned char z;
1680 };
1681 unsigned char data[3];
1682} uchar3;
1683
1684typedef union {
1685 struct {
1686 short x;
1687 };
1688 short data;
1689} short1;
1690typedef union {
1691 struct {
1692 short x;
1693 short y;
1694 };
1695 short data[2];
1696} short2;
1697typedef union {
1698 struct {
1699 short x;
1700 short y;
1701 short z;
1702 short w;
1703 };
1704 short data[4];
1705} short4;
1706typedef union {
1707 short data[8];
1708} short8;
1709typedef union {
1710 short data[16];
1711} short16;
1712typedef union {
1713 struct {
1714 short x;
1715 short y;
1716 short z;
1717 };
1718 short data[3];
1719} short3;
1720
1721typedef union {
1722 struct {
1723 unsigned short x;
1724 };
1725 unsigned short data;
1726} ushort1;
1727typedef union {
1728 struct {
1729 unsigned short x;
1730 unsigned short y;
1731 };
1732 unsigned short data[2];
1733} ushort2;
1734typedef union {
1735 struct {
1736 unsigned short x;
1737 unsigned short y;
1738 unsigned short z;
1739 unsigned short w;
1740 };
1741 unsigned short data[4];
1742} ushort4;
1743typedef union {
1744 unsigned short data[8];
1745} ushort8;
1746typedef union {
1747 unsigned short data[16];
1748} ushort16;
1749typedef union {
1750 struct {
1751 unsigned short x;
1752 unsigned short y;
1753 unsigned short z;
1754 };
1755 unsigned short data[3];
1756} ushort3;
1757
1758typedef union {
1759 struct {
1760 int x;
1761 };
1762 int data;
1763} int1;
1764typedef union {
1765 struct {
1766 int x;
1767 int y;
1768 };
1769 int data[2];
1770} int2;
1771typedef union {
1772 struct {
1773 int x;
1774 int y;
1775 int z;
1776 int w;
1777 };
1778 int data[4];
1779} int4;
1780typedef union {
1781 int data[8];
1782} int8;
1783typedef union {
1784 int data[16];
1785} int16;
1786typedef union {
1787 struct {
1788 int x;
1789 int y;
1790 int z;
1791 };
1792 int data[3];
1793} int3;
1794
1795typedef union {
1796 struct {
1797 unsigned int x;
1798 };
1799 unsigned int data;
1800} uint1;
1801typedef union {
1802 struct {
1803 unsigned int x;
1804 unsigned int y;
1805 };
1806 unsigned int data[2];
1807} uint2;
1808typedef union {
1809 struct {
1810 unsigned int x;
1811 unsigned int y;
1812 unsigned int z;
1813 unsigned int w;
1814 };
1815 unsigned int data[4];
1816} uint4;
1817typedef union {
1818 unsigned int data[8];
1819} uint8;
1820typedef union {
1821 unsigned int data[16];
1822} uint16;
1823typedef union {
1824 struct {
1825 unsigned int x;
1826 unsigned int y;
1827 unsigned int z;
1828 };
1829 unsigned int data[3];
1830} uint3;
1831
1832typedef union {
1833 struct {
1834 long x;
1835 };
1836 long data;
1837} long1;
1838typedef union {
1839 struct {
1840 long x;
1841 long y;
1842 };
1843 long data[2];
1844} long2;
1845typedef union {
1846 struct {
1847 long x;
1848 long y;
1849 long z;
1850 long w;
1851 };
1852 long data[4];
1853} long4;
1854typedef union {
1855 long data[8];
1856} long8;
1857typedef union {
1858 long data[16];
1859} long16;
1860typedef union {
1861 struct {
1862 long x;
1863 long y;
1864 long z;
1865 };
1866 long data[3];
1867} long3;
1868
1869typedef union {
1870 struct {
1871 unsigned long x;
1872 };
1873 unsigned long data;
1874} ulong1;
1875typedef union {
1876 struct {
1877 unsigned long x;
1878 unsigned long y;
1879 };
1880 unsigned long data[2];
1881} ulong2;
1882typedef union {
1883 struct {
1884 unsigned long x;
1885 unsigned long y;
1886 unsigned long z;
1887 unsigned long w;
1888 };
1889 unsigned long data[4];
1890} ulong4;
1891typedef union {
1892 unsigned long data[8];
1893} ulong8;
1894typedef union {
1895 unsigned long data[16];
1896} ulong16;
1897typedef union {
1898 struct {
1899 unsigned long x;
1900 unsigned long y;
1901 unsigned long z;
1902 };
1903 unsigned long data[3];
1904} ulong3;
1905
1906typedef union {
1907 struct {
1908 long long x;
1909 };
1910 long long data;
1911} longlong1;
1912typedef union {
1913 struct {
1914 long long x;
1915 long long y;
1916 };
1917 long long data[2];
1918} longlong2;
1919typedef union {
1920 struct {
1921 long long x;
1922 long long y;
1923 long long z;
1924 long long w;
1925 };
1926 long long data[4];
1927} longlong4;
1928typedef union {
1929 long long data[8];
1930} longlong8;
1931typedef union {
1932 long long data[16];
1933} longlong16;
1934typedef union {
1935 struct {
1936 long long x;
1937 long long y;
1938 long long z;
1939 };
1940 long long data[3];
1941} longlong3;
1942
1943typedef union {
1944 struct {
1945 unsigned long long x;
1946 };
1947 unsigned long long data;
1948} ulonglong1;
1949typedef union {
1950 struct {
1951 unsigned long long x;
1952 unsigned long long y;
1953 };
1954 unsigned long long data[2];
1955} ulonglong2;
1956typedef union {
1957 struct {
1958 unsigned long long x;
1959 unsigned long long y;
1960 unsigned long long z;
1961 unsigned long long w;
1962 };
1963 unsigned long long data[4];
1964} ulonglong4;
1965typedef union {
1966 unsigned long long data[8];
1967} ulonglong8;
1968typedef union {
1969 unsigned long long data[16];
1970} ulonglong16;
1971typedef union {
1972 struct {
1973 unsigned long long x;
1974 unsigned long long y;
1975 unsigned long long z;
1976 };
1977 unsigned long long data[3];
1978} ulonglong3;
1979
1980typedef union {
1981 struct {
1982 float x;
1983 };
1984 float data;
1985} float1;
1986typedef union {
1987 struct {
1988 float x;
1989 float y;
1990 };
1991 float data[2];
1992} float2;
1993typedef union {
1994 struct {
1995 float x;
1996 float y;
1997 float z;
1998 float w;
1999 };
2000 float data[4];
2001} float4;
2002typedef union {
2003 float data[8];
2004} float8;
2005typedef union {
2006 float data[16];
2007} float16;
2008typedef union {
2009 struct {
2010 float x;
2011 float y;
2012 float z;
2013 };
2014 float data[3];
2015} float3;
2016
2017typedef union {
2018 struct {
2019 double x;
2020 };
2021 double data;
2022} double1;
2023typedef union {
2024 struct {
2025 double x;
2026 double y;
2027 };
2028 double data[2];
2029} double2;
2030typedef union {
2031 struct {
2032 double x;
2033 double y;
2034 double z;
2035 double w;
2036 };
2037 double data[4];
2038} double4;
2039typedef union {
2040 double data[8];
2041} double8;
2042typedef union {
2043 double data[16];
2044} double16;
2045typedef union {
2046 struct {
2047 double x;
2048 double y;
2049 double z;
2050 };
2051 double data[3];
2052} double3;
2053
2054#endif // defined(_MSC_VER)
2055#endif // defined(__has_attribute)
2056
2057#ifdef __cplusplus
2058#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2059 static inline __HOST_DEVICE__ type make_##type(comp x) { \
2060 type r{x}; \
2061 return r; \
2062 }
2063
2064#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2065 static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2066 type r{x, y}; \
2067 return r; \
2068 }
2069
2070#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2071 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2072 type r{x, y, z}; \
2073 return r; \
2074 }
2075
2076#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2077 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2078 type r{x, y, z, w}; \
2079 return r; \
2080 }
2081#else
2082#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \
2083 static inline __HOST_DEVICE__ type make_##type(comp x) { \
2084 type r; \
2085 r.x = x; \
2086 return r; \
2087 }
2088
2089#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \
2090 static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \
2091 type r; \
2092 r.x = x; \
2093 r.y = y; \
2094 return r; \
2095 }
2096
2097#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \
2098 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \
2099 type r; \
2100 r.x = x; \
2101 r.y = y; \
2102 r.z = z; \
2103 return r; \
2104 }
2105
2106#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \
2107 static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \
2108 type r; \
2109 r.x = x; \
2110 r.y = y; \
2111 r.z = z; \
2112 r.w = w; \
2113 return r; \
2114 }
2115#endif
2116
2117DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1);
2118DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2);
2119DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3);
2120DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4);
2121
2122DECLOP_MAKE_ONE_COMPONENT(signed char, char1);
2123DECLOP_MAKE_TWO_COMPONENT(signed char, char2);
2124DECLOP_MAKE_THREE_COMPONENT(signed char, char3);
2125DECLOP_MAKE_FOUR_COMPONENT(signed char, char4);
2126
2127DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1);
2128DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2);
2129DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3);
2130DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4);
2131
2132DECLOP_MAKE_ONE_COMPONENT(signed short, short1);
2133DECLOP_MAKE_TWO_COMPONENT(signed short, short2);
2134DECLOP_MAKE_THREE_COMPONENT(signed short, short3);
2135DECLOP_MAKE_FOUR_COMPONENT(signed short, short4);
2136
2137DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1);
2138DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2);
2139DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3);
2140DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4);
2141
2142DECLOP_MAKE_ONE_COMPONENT(signed int, int1);
2143DECLOP_MAKE_TWO_COMPONENT(signed int, int2);
2144DECLOP_MAKE_THREE_COMPONENT(signed int, int3);
2145DECLOP_MAKE_FOUR_COMPONENT(signed int, int4);
2146
2147DECLOP_MAKE_ONE_COMPONENT(float, float1);
2148DECLOP_MAKE_TWO_COMPONENT(float, float2);
2149DECLOP_MAKE_THREE_COMPONENT(float, float3);
2150DECLOP_MAKE_FOUR_COMPONENT(float, float4);
2151
2152DECLOP_MAKE_ONE_COMPONENT(double, double1);
2153DECLOP_MAKE_TWO_COMPONENT(double, double2);
2154DECLOP_MAKE_THREE_COMPONENT(double, double3);
2155DECLOP_MAKE_FOUR_COMPONENT(double, double4);
2156
2157DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1);
2158DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2);
2159DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3);
2160DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4);
2161
2162DECLOP_MAKE_ONE_COMPONENT(signed long, long1);
2163DECLOP_MAKE_TWO_COMPONENT(signed long, long2);
2164DECLOP_MAKE_THREE_COMPONENT(signed long, long3);
2165DECLOP_MAKE_FOUR_COMPONENT(signed long, long4);
2166
2167DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1);
2168DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2);
2169DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3);
2170DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4);
2171
2172DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1);
2173DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2);
2174DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3);
2175DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4);
2176
2177#endif
Definition amd_hip_vector_types.h:1610
Definition amd_hip_vector_types.h:1616
Definition amd_hip_vector_types.h:1623
Definition amd_hip_vector_types.h:1632
Definition amd_hip_vector_types.h:1635
Definition amd_hip_vector_types.h:1638
Definition amd_hip_vector_types.h:1647
Definition amd_hip_vector_types.h:1653
Definition amd_hip_vector_types.h:1660
Definition amd_hip_vector_types.h:1669
Definition amd_hip_vector_types.h:1672
Definition amd_hip_vector_types.h:1675
Definition amd_hip_vector_types.h:1684
Definition amd_hip_vector_types.h:1690
Definition amd_hip_vector_types.h:1697
Definition amd_hip_vector_types.h:1706
Definition amd_hip_vector_types.h:1709
Definition amd_hip_vector_types.h:1712
Definition amd_hip_vector_types.h:1721
Definition amd_hip_vector_types.h:1727
Definition amd_hip_vector_types.h:1734
Definition amd_hip_vector_types.h:1743
Definition amd_hip_vector_types.h:1746
Definition amd_hip_vector_types.h:1749
Definition amd_hip_vector_types.h:1758
Definition amd_hip_vector_types.h:1764
Definition amd_hip_vector_types.h:1771
Definition amd_hip_vector_types.h:1780
Definition amd_hip_vector_types.h:1783
Definition amd_hip_vector_types.h:1786
Definition amd_hip_vector_types.h:1795
Definition amd_hip_vector_types.h:1801
Definition amd_hip_vector_types.h:1808
Definition amd_hip_vector_types.h:1817
Definition amd_hip_vector_types.h:1820
Definition amd_hip_vector_types.h:1823
Definition amd_hip_vector_types.h:1832
Definition amd_hip_vector_types.h:1838
Definition amd_hip_vector_types.h:1845
Definition amd_hip_vector_types.h:1854
Definition amd_hip_vector_types.h:1857
Definition amd_hip_vector_types.h:1860
Definition amd_hip_vector_types.h:1869
Definition amd_hip_vector_types.h:1875
Definition amd_hip_vector_types.h:1882
Definition amd_hip_vector_types.h:1891
Definition amd_hip_vector_types.h:1894
Definition amd_hip_vector_types.h:1897
Definition amd_hip_vector_types.h:1906
Definition amd_hip_vector_types.h:1912
Definition amd_hip_vector_types.h:1919
Definition amd_hip_vector_types.h:1928
Definition amd_hip_vector_types.h:1931
Definition amd_hip_vector_types.h:1934
Definition amd_hip_vector_types.h:1943
Definition amd_hip_vector_types.h:1949
Definition amd_hip_vector_types.h:1956
Definition amd_hip_vector_types.h:1965
Definition amd_hip_vector_types.h:1968
Definition amd_hip_vector_types.h:1971
Definition amd_hip_vector_types.h:1980
Definition amd_hip_vector_types.h:1986
Definition amd_hip_vector_types.h:1993
Definition amd_hip_vector_types.h:2002
Definition amd_hip_vector_types.h:2005
Definition amd_hip_vector_types.h:2008
Definition amd_hip_vector_types.h:2017
Definition amd_hip_vector_types.h:2023
Definition amd_hip_vector_types.h:2030
Definition amd_hip_vector_types.h:2039
Definition amd_hip_vector_types.h:2042
Definition amd_hip_vector_types.h:2045