Crypto++ 8.6
Free C++ class library of cryptographic schemes
lsh512_avx.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "misc.h"
17
18#if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
19
20#if defined(CRYPTOPP_AVX2_AVAILABLE)
21# include <emmintrin.h>
22# include <immintrin.h>
23#endif
24
25// GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
26#if (CRYPTOPP_GCC_VERSION >= 40500)
27# include <x86intrin.h>
28#endif
29
30ANONYMOUS_NAMESPACE_BEGIN
31
32/* LSH Constants */
33
34const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
35// const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
36// const unsigned int LSH512_CV_BYTE_LEN = 128;
37const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
38
39// const unsigned int MSG_BLK_WORD_LEN = 32;
40const unsigned int CV_WORD_LEN = 16;
41const unsigned int CONST_WORD_LEN = 8;
42// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
43const unsigned int NUM_STEPS = 28;
44
45const unsigned int ROT_EVEN_ALPHA = 23;
46const unsigned int ROT_EVEN_BETA = 59;
47const unsigned int ROT_ODD_ALPHA = 7;
48const unsigned int ROT_ODD_BETA = 3;
49
50const unsigned int LSH_TYPE_512_512 = 0x0010040;
51const unsigned int LSH_TYPE_512_384 = 0x0010030;
52const unsigned int LSH_TYPE_512_256 = 0x0010020;
53const unsigned int LSH_TYPE_512_224 = 0x001001C;
54
55// const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
56// const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
57
58/* Error Code */
59
60const unsigned int LSH_SUCCESS = 0x0;
61// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
62// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
63const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
64const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
65
66/* Index into our state array */
67
68const unsigned int AlgorithmType = 80;
69const unsigned int RemainingBits = 81;
70
71NAMESPACE_END
72
73NAMESPACE_BEGIN(CryptoPP)
74NAMESPACE_BEGIN(LSH)
75
76// lsh512.cpp
77extern const word64 LSH512_IV224[CV_WORD_LEN];
78extern const word64 LSH512_IV256[CV_WORD_LEN];
79extern const word64 LSH512_IV384[CV_WORD_LEN];
80extern const word64 LSH512_IV512[CV_WORD_LEN];
81extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
82
83NAMESPACE_END // LSH
84NAMESPACE_END // Crypto++
85
86ANONYMOUS_NAMESPACE_BEGIN
87
88using CryptoPP::byte;
89using CryptoPP::word32;
90using CryptoPP::word64;
91using CryptoPP::rotlFixed;
92using CryptoPP::rotlConstant;
93
94using CryptoPP::GetBlock;
95using CryptoPP::LittleEndian;
96using CryptoPP::ConditionalByteReverse;
97using CryptoPP::LITTLE_ENDIAN_ORDER;
98
99using CryptoPP::LSH::LSH512_IV224;
100using CryptoPP::LSH::LSH512_IV256;
101using CryptoPP::LSH::LSH512_IV384;
102using CryptoPP::LSH::LSH512_IV512;
103using CryptoPP::LSH::LSH512_StepConstants;
104
105typedef byte lsh_u8;
106typedef word32 lsh_u32;
107typedef word64 lsh_u64;
108typedef word32 lsh_uint;
109typedef word32 lsh_err;
110typedef word32 lsh_type;
111
112struct LSH512_AVX2_Context
113{
114 LSH512_AVX2_Context(word64* state, word64 algType, word64& remainingBitLength) :
115 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
116 last_block(reinterpret_cast<byte*>(state+48)),
117 remain_databitlen(remainingBitLength),
118 alg_type(static_cast<lsh_type>(algType)) {}
119
120 lsh_u64* cv_l; // start of our state block
121 lsh_u64* cv_r;
122 lsh_u64* sub_msgs;
123 lsh_u8* last_block;
124 lsh_u64& remain_databitlen;
125 lsh_type alg_type;
126};
127
128struct LSH512_AVX2_Internal
129{
130 LSH512_AVX2_Internal(word64* state) :
131 submsg_e_l(state+16), submsg_e_r(state+24),
132 submsg_o_l(state+32), submsg_o_r(state+40) { }
133
134 lsh_u64* submsg_e_l; /* even left sub-message */
135 lsh_u64* submsg_e_r; /* even right sub-message */
136 lsh_u64* submsg_o_l; /* odd left sub-message */
137 lsh_u64* submsg_o_r; /* odd right sub-message */
138};
139
140// Zero the upper 128 bits of all YMM registers on exit.
141// It avoids AVX state transition penalties when saving state.
142// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735
143// makes using zeroupper a little tricky.
144
145struct AVX_Cleanup
146{
147 ~AVX_Cleanup() {
148 _mm256_zeroupper();
149 }
150};
151
152// const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
153
154/* LSH AlgType Macro */
155
156inline bool LSH_IS_LSH512(lsh_uint val) {
157 return (val & 0xf0000) == 0x10000;
158}
159
160inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
161 return val >> 24;
162}
163
164inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
165 return val & 0xffff;
166}
167
168inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
169 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
170}
171
172inline lsh_u64 loadLE64(lsh_u64 v) {
174}
175
176lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
177 return rotlFixed(x, r);
178}
179
180// Original code relied upon unaligned lsh_u64 buffer
181inline void load_msg_blk(LSH512_AVX2_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
182{
183 lsh_u64* submsg_e_l = i_state->submsg_e_l;
184 lsh_u64* submsg_e_r = i_state->submsg_e_r;
185 lsh_u64* submsg_o_l = i_state->submsg_o_l;
186 lsh_u64* submsg_o_r = i_state->submsg_o_r;
187
188 _mm256_storeu_si256(M256_CAST(submsg_e_l+0),
189 _mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
190 _mm256_storeu_si256(M256_CAST(submsg_e_l+4),
191 _mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
192
193 _mm256_storeu_si256(M256_CAST(submsg_e_r+0),
194 _mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
195 _mm256_storeu_si256(M256_CAST(submsg_e_r+4),
196 _mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
197
198 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
199 _mm256_loadu_si256(CONST_M256_CAST(msgblk+128)));
200 _mm256_storeu_si256(M256_CAST(submsg_o_l+4),
201 _mm256_loadu_si256(CONST_M256_CAST(msgblk+160)));
202
203 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
204 _mm256_loadu_si256(CONST_M256_CAST(msgblk+192)));
205 _mm256_storeu_si256(M256_CAST(submsg_o_r+4),
206 _mm256_loadu_si256(CONST_M256_CAST(msgblk+224)));
207}
208
209inline void msg_exp_even(LSH512_AVX2_Internal* i_state)
210{
211 CRYPTOPP_ASSERT(i_state != NULLPTR);
212
213 lsh_u64* submsg_e_l = i_state->submsg_e_l;
214 lsh_u64* submsg_e_r = i_state->submsg_e_r;
215 lsh_u64* submsg_o_l = i_state->submsg_o_l;
216 lsh_u64* submsg_o_r = i_state->submsg_o_r;
217
218 _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi64(
219 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
220 _mm256_permute4x64_epi64(
221 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
222 _MM_SHUFFLE(1,0,2,3))));
223 _mm256_storeu_si256(M256_CAST(submsg_e_l+4), _mm256_add_epi64(
224 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)),
225 _mm256_permute4x64_epi64(
226 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)),
227 _MM_SHUFFLE(2,1,0,3))));
228
229 _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi64(
230 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
231 _mm256_permute4x64_epi64(
232 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
233 _MM_SHUFFLE(1,0,2,3))));
234 _mm256_storeu_si256(M256_CAST(submsg_e_r+4), _mm256_add_epi64(
235 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)),
236 _mm256_permute4x64_epi64(
237 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)),
238 _MM_SHUFFLE(2,1,0,3))));
239}
240
241inline void msg_exp_odd(LSH512_AVX2_Internal* i_state)
242{
243 CRYPTOPP_ASSERT(i_state != NULLPTR);
244
245 lsh_u64* submsg_e_l = i_state->submsg_e_l;
246 lsh_u64* submsg_e_r = i_state->submsg_e_r;
247 lsh_u64* submsg_o_l = i_state->submsg_o_l;
248 lsh_u64* submsg_o_r = i_state->submsg_o_r;
249
250 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
251 _mm256_add_epi64(
252 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
253 _mm256_permute4x64_epi64(
254 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
255 _MM_SHUFFLE(1,0,2,3))));
256 _mm256_storeu_si256(M256_CAST(submsg_o_l+4),
257 _mm256_add_epi64(
258 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)),
259 _mm256_permute4x64_epi64(
260 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)),
261 _MM_SHUFFLE(2,1,0,3))));
262
263 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
264 _mm256_add_epi64(
265 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
266 _mm256_permute4x64_epi64(
267 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
268 _MM_SHUFFLE(1,0,2,3))));
269 _mm256_storeu_si256(M256_CAST(submsg_o_r+4),
270 _mm256_add_epi64(
271 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)),
272 _mm256_permute4x64_epi64(
273 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)),
274 _MM_SHUFFLE(2,1,0,3))));
275}
276
277inline void load_sc(const lsh_u64** p_const_v, size_t i)
278{
279 *p_const_v = &LSH512_StepConstants[i];
280}
281
282inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state)
283{
284 CRYPTOPP_ASSERT(i_state != NULLPTR);
285
286 lsh_u64* submsg_e_l = i_state->submsg_e_l;
287 lsh_u64* submsg_e_r = i_state->submsg_e_r;
288
289 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
290 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
291 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l))));
292 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
293 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
294 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r))));
295
296 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
297 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
298 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4))));
299 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256(
300 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
301 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4))));
302}
303
304inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state)
305{
306 CRYPTOPP_ASSERT(i_state != NULLPTR);
307
308 lsh_u64* submsg_o_l = i_state->submsg_o_l;
309 lsh_u64* submsg_o_r = i_state->submsg_o_r;
310
311 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
312 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
313 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
314 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
315 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
316 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
317
318 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
319 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
320 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4))));
321 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256(
322 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
323 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4))));
324}
325
326inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
327{
328 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi64(
329 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
330 _mm256_loadu_si256(CONST_M256_CAST(cv_r))));
331 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_add_epi64(
332 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
333 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4))));
334}
335
336template <unsigned int R>
337inline void rotate_blk(lsh_u64 cv[8])
338{
339 _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
340 _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
341 _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R)));
342 _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256(
343 _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R),
344 _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R)));
345}
346
347inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
348{
349 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
350 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
351 _mm256_loadu_si256(CONST_M256_CAST(const_v))));
352 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
353 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
354 _mm256_loadu_si256(CONST_M256_CAST(const_v+4))));
355}
356
357inline void rotate_msg_gamma(lsh_u64 cv_r[8])
358{
359 // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
360 _mm256_storeu_si256(M256_CAST(cv_r+0),
361 _mm256_shuffle_epi8(
362 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
363 _mm256_set_epi8(
364 /* hi lane */ 9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4,
365 /* lo lane */ 13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
366 _mm256_storeu_si256(M256_CAST(cv_r+4),
367 _mm256_shuffle_epi8(
368 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
369 _mm256_set_epi8(
370 /* hi lane */ 8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3,
371 /* lo lane */ 12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
372}
373
374inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
375{
376 __m256i temp[2];
377 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_permute4x64_epi64(
378 _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
379 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_permute4x64_epi64(
380 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
381 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_permute4x64_epi64(
382 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
383 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_permute4x64_epi64(
384 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
385
386 temp[0] = _mm256_loadu_si256(CONST_M256_CAST(cv_l+0));
387 temp[1] = _mm256_loadu_si256(CONST_M256_CAST(cv_r+0));
388
389 _mm256_storeu_si256(M256_CAST(cv_l+0),
390 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)));
391 _mm256_storeu_si256(M256_CAST(cv_l+4),
392 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)));
393
394 _mm256_storeu_si256(M256_CAST(cv_r+0), temp[0]);
395 _mm256_storeu_si256(M256_CAST(cv_r+4), temp[1]);
396};
397
398/* -------------------------------------------------------- *
399* step function
400* -------------------------------------------------------- */
401
402template <unsigned int Alpha, unsigned int Beta>
403inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
404{
405 add_blk(cv_l, cv_r);
406 rotate_blk<Alpha>(cv_l);
407 xor_with_const(cv_l, const_v);
408 add_blk(cv_r, cv_l);
409 rotate_blk<Beta>(cv_r);
410 add_blk(cv_l, cv_r);
411 rotate_msg_gamma(cv_r);
412}
413
414/* -------------------------------------------------------- *
415* compression function
416* -------------------------------------------------------- */
417
418inline void compress(LSH512_AVX2_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
419{
420 CRYPTOPP_ASSERT(ctx != NULLPTR);
421
422 LSH512_AVX2_Internal s_state(ctx->cv_l);
423 LSH512_AVX2_Internal* i_state = &s_state;
424
425 const lsh_u64* const_v = NULL;
426 lsh_u64 *cv_l = ctx->cv_l;
427 lsh_u64 *cv_r = ctx->cv_r;
428
429 load_msg_blk(i_state, pdMsgBlk);
430
431 msg_add_even(cv_l, cv_r, i_state);
432 load_sc(&const_v, 0);
433 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
434 word_perm(cv_l, cv_r);
435
436 msg_add_odd(cv_l, cv_r, i_state);
437 load_sc(&const_v, 8);
438 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
439 word_perm(cv_l, cv_r);
440
441 for (size_t i = 1; i < NUM_STEPS / 2; i++)
442 {
443 msg_exp_even(i_state);
444 msg_add_even(cv_l, cv_r, i_state);
445 load_sc(&const_v, 16 * i);
446 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
447 word_perm(cv_l, cv_r);
448
449 msg_exp_odd(i_state);
450 msg_add_odd(cv_l, cv_r, i_state);
451 load_sc(&const_v, 16 * i + 8);
452 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
453 word_perm(cv_l, cv_r);
454 }
455
456 msg_exp_even(i_state);
457 msg_add_even(cv_l, cv_r, i_state);
458}
459
460/* -------------------------------------------------------- */
461
462inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
463{
464 // The IV's are 32-byte aligned so we can use aligned loads.
465 _mm256_storeu_si256(M256_CAST(cv_l+0),
466 _mm256_load_si256(CONST_M256_CAST(iv+0)));
467 _mm256_storeu_si256(M256_CAST(cv_l+4),
468 _mm256_load_si256(CONST_M256_CAST(iv+4)));
469
470 _mm256_storeu_si256(M256_CAST(cv_r+0),
471 _mm256_load_si256(CONST_M256_CAST(iv+8)));
472 _mm256_storeu_si256(M256_CAST(cv_r+4),
473 _mm256_load_si256(CONST_M256_CAST(iv+12)));
474}
475
476inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
477{
478 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
479 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_setzero_si256());
480 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
481 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_setzero_si256());
482}
483
484inline void zero_submsgs(LSH512_AVX2_Context* ctx)
485{
486 lsh_u64* sub_msgs = ctx->sub_msgs;
487
488 _mm256_storeu_si256(M256_CAST(sub_msgs+ 0),
489 _mm256_setzero_si256());
490 _mm256_storeu_si256(M256_CAST(sub_msgs+ 4),
491 _mm256_setzero_si256());
492
493 _mm256_storeu_si256(M256_CAST(sub_msgs+ 8),
494 _mm256_setzero_si256());
495 _mm256_storeu_si256(M256_CAST(sub_msgs+12),
496 _mm256_setzero_si256());
497}
498
499inline void init224(LSH512_AVX2_Context* ctx)
500{
501 CRYPTOPP_ASSERT(ctx != NULLPTR);
502
503 zero_submsgs(ctx);
504 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
505}
506
507inline void init256(LSH512_AVX2_Context* ctx)
508{
509 CRYPTOPP_ASSERT(ctx != NULLPTR);
510
511 zero_submsgs(ctx);
512 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
513}
514
515inline void init384(LSH512_AVX2_Context* ctx)
516{
517 CRYPTOPP_ASSERT(ctx != NULLPTR);
518
519 zero_submsgs(ctx);
520 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
521}
522
523inline void init512(LSH512_AVX2_Context* ctx)
524{
525 CRYPTOPP_ASSERT(ctx != NULLPTR);
526
527 zero_submsgs(ctx);
528 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
529}
530
531/* -------------------------------------------------------- */
532
533inline void fin(LSH512_AVX2_Context* ctx)
534{
535 CRYPTOPP_ASSERT(ctx != NULLPTR);
536
537 _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
538 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
539 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
540
541 _mm256_storeu_si256(M256_CAST(ctx->cv_l+4), _mm256_xor_si256(
542 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+4)),
543 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+4))));
544}
545
546/* -------------------------------------------------------- */
547
548inline void get_hash(LSH512_AVX2_Context* ctx, lsh_u8* pbHashVal)
549{
550 CRYPTOPP_ASSERT(ctx != NULLPTR);
551 CRYPTOPP_ASSERT(ctx->alg_type != 0);
552 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
553
554 lsh_uint alg_type = ctx->alg_type;
555 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
556 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
557
558 // Multiplying by sizeof(lsh_u8) looks odd...
559 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
560 if (hash_val_bit_len){
561 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
562 }
563}
564
565/* -------------------------------------------------------- */
566
567lsh_err lsh512_init_avx2(LSH512_AVX2_Context* ctx)
568{
569 CRYPTOPP_ASSERT(ctx != NULLPTR);
570 CRYPTOPP_ASSERT(ctx->alg_type != 0);
571
572 lsh_u32 alg_type = ctx->alg_type;
573 const lsh_u64* const_v = NULL;
574 ctx->remain_databitlen = 0;
575
576 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
577 AVX_Cleanup cleanup;
578
579 switch (alg_type){
580 case LSH_TYPE_512_512:
581 init512(ctx);
582 return LSH_SUCCESS;
583 case LSH_TYPE_512_384:
584 init384(ctx);
585 return LSH_SUCCESS;
586 case LSH_TYPE_512_256:
587 init256(ctx);
588 return LSH_SUCCESS;
589 case LSH_TYPE_512_224:
590 init224(ctx);
591 return LSH_SUCCESS;
592 default:
593 break;
594 }
595
596 lsh_u64* cv_l = ctx->cv_l;
597 lsh_u64* cv_r = ctx->cv_r;
598
599 zero_iv(cv_l, cv_r);
600 cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
601 cv_l[1] = LSH_GET_HASHBIT(alg_type);
602
603 for (size_t i = 0; i < NUM_STEPS / 2; i++)
604 {
605 //Mix
606 load_sc(&const_v, i * 16);
607 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
608 word_perm(cv_l, cv_r);
609
610 load_sc(&const_v, i * 16 + 8);
611 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
612 word_perm(cv_l, cv_r);
613 }
614
615 return LSH_SUCCESS;
616}
617
618lsh_err lsh512_update_avx2(LSH512_AVX2_Context* ctx, const lsh_u8* data, size_t databitlen)
619{
620 CRYPTOPP_ASSERT(ctx != NULLPTR);
621 CRYPTOPP_ASSERT(data != NULLPTR);
622 CRYPTOPP_ASSERT(databitlen % 8 == 0);
623 CRYPTOPP_ASSERT(ctx->alg_type != 0);
624
625 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
626 AVX_Cleanup cleanup;
627
628 if (databitlen == 0){
629 return LSH_SUCCESS;
630 }
631
632 // We are byte oriented. tail bits will always be 0.
633 size_t databytelen = databitlen >> 3;
634 // lsh_uint pos2 = databitlen & 0x7;
635 const size_t pos2 = 0;
636
637 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
638 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
639 const size_t remain_msg_bit = 0;
640
641 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
642 return LSH_ERR_INVALID_STATE;
643 }
644 if (remain_msg_bit > 0){
645 return LSH_ERR_INVALID_DATABITLEN;
646 }
647
648 if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
649 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
650 ctx->remain_databitlen += (lsh_uint)databitlen;
651 remain_msg_byte += (lsh_uint)databytelen;
652 if (pos2){
653 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
654 }
655 return LSH_SUCCESS;
656 }
657
658 if (remain_msg_byte > 0){
659 size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
660 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
661 compress(ctx, ctx->last_block);
662 data += more_byte;
663 databytelen -= more_byte;
664 remain_msg_byte = 0;
665 ctx->remain_databitlen = 0;
666 }
667
668 while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
669 {
670 // This call to compress caused some trouble.
671 // The data pointer can become unaligned in the
672 // previous block.
673 compress(ctx, data);
674 data += LSH512_MSG_BLK_BYTE_LEN;
675 databytelen -= LSH512_MSG_BLK_BYTE_LEN;
676 }
677
678 if (databytelen > 0){
679 memcpy(ctx->last_block, data, databytelen);
680 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
681 }
682
683 if (pos2){
684 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
685 ctx->remain_databitlen += pos2;
686 }
687 return LSH_SUCCESS;
688}
689
690lsh_err lsh512_final_avx2(LSH512_AVX2_Context* ctx, lsh_u8* hashval)
691{
692 CRYPTOPP_ASSERT(ctx != NULLPTR);
693 CRYPTOPP_ASSERT(hashval != NULLPTR);
694
695 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
696 AVX_Cleanup cleanup;
697
698 // We are byte oriented. tail bits will always be 0.
699 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
700 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
701 const size_t remain_msg_bit = 0;
702
703 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
704 return LSH_ERR_INVALID_STATE;
705 }
706
707 if (remain_msg_bit){
708 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
709 }
710 else{
711 ctx->last_block[remain_msg_byte] = 0x80;
712 }
713 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
714
715 compress(ctx, ctx->last_block);
716
717 fin(ctx);
718 get_hash(ctx, hashval);
719
720 return LSH_SUCCESS;
721}
722
723ANONYMOUS_NAMESPACE_END
724
725NAMESPACE_BEGIN(CryptoPP)
726
727extern
728void LSH512_Base_Restart_AVX2(word64* state)
729{
730 state[RemainingBits] = 0;
731 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
732 lsh_err err = lsh512_init_avx2(&ctx);
733
734 if (err != LSH_SUCCESS)
735 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_avx2 failed");
736}
737
738extern
739void LSH512_Base_Update_AVX2(word64* state, const byte *input, size_t size)
740{
741 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
742 lsh_err err = lsh512_update_avx2(&ctx, input, 8*size);
743
744 if (err != LSH_SUCCESS)
745 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_avx2 failed");
746}
747
748extern
749void LSH512_Base_TruncatedFinal_AVX2(word64* state, byte *hash, size_t)
750{
751 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
752 lsh_err err = lsh512_final_avx2(&ctx, hash);
753
754 if (err != LSH_SUCCESS)
755 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_avx2 failed");
756}
757
758NAMESPACE_END
759
760#endif // CRYPTOPP_AVX2_AVAILABLE
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2187
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1598
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68