20 #if (CRYPTOPP_SHANI_AVAILABLE) 21 # include <nmmintrin.h> 22 # include <immintrin.h> 26 #if (CRYPTOPP_ARM_SHA_AVAILABLE) 27 # include <arm_neon.h> 32 #if defined(CRYPTOPP_ARM_ACLE_AVAILABLE) 34 # include <arm_acle.h> 38 #define M128_CAST(x) ((__m128i *)(void *)(x)) 39 #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x)) 43 #if CRYPTOPP_SHANI_AVAILABLE 44 void SHACAL2_Enc_ProcessAndXorBlock_SHANI(
const word32* subKeys,
const byte *inBlock,
const byte *xorBlock, byte *outBlock)
50 const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
51 const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
53 __m128i B0 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 0)), MASK1);
54 __m128i B1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 16)), MASK2);
56 __m128i TMP = _mm_alignr_epi8(B0, B1, 8);
57 B1 = _mm_blend_epi16(B1, B0, 0xF0);
62 const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
63 const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
65 __m128i B0 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 0));
66 __m128i B1 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 16));
68 __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2);
69 B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2);
73 const byte* keys =
reinterpret_cast<const byte*
>(subKeys);
74 for (
size_t i = 0; i != 8; ++i)
76 const __m128i RK0 = _mm_load_si128(CONST_M128_CAST(keys + 32*i));
77 const __m128i RK2 = _mm_load_si128(CONST_M128_CAST(keys + 32*i+16));
78 const __m128i RK1 = _mm_srli_si128(RK0, 8);
79 const __m128i RK3 = _mm_srli_si128(RK2, 8);
81 B1 = _mm_sha256rnds2_epu32(B1, B0, RK0);
82 B0 = _mm_sha256rnds2_epu32(B0, B1, RK1);
83 B1 = _mm_sha256rnds2_epu32(B1, B0, RK2);
84 B0 = _mm_sha256rnds2_epu32(B0, B1, RK3);
87 TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1);
88 B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1);
93 _mm_storeu_si128(M128_CAST(outBlock + 0),
94 _mm_xor_si128(B0, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 0))));
96 _mm_storeu_si128(M128_CAST(outBlock + 16),
97 _mm_xor_si128(B1, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 16))));
101 _mm_storeu_si128(M128_CAST(outBlock + 0), B0);
102 _mm_storeu_si128(M128_CAST(outBlock + 16), B1);
Utility functions for the Crypto++ library.
Library configuration file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Classes for SHA-1 and SHA-2 family of message digests.
Crypto++ library namespace.