Crypto++ 8.6
Free C++ class library of cryptographic schemes
lsh512_sse.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "misc.h"
17
18#if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
19
20#if defined(CRYPTOPP_SSSE3_AVAILABLE)
21# include <emmintrin.h>
22# include <tmmintrin.h>
23#endif
24
25#if defined(CRYPTOPP_XOP_AVAILABLE)
26# include <ammintrin.h>
27#endif
28
29// GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
30#if (CRYPTOPP_GCC_VERSION >= 40500)
31# include <x86intrin.h>
32#endif
33
34ANONYMOUS_NAMESPACE_BEGIN
35
36/* LSH Constants */
37
38const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
39// const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
40// const unsigned int LSH512_CV_BYTE_LEN = 128;
41const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
42
43// const unsigned int MSG_BLK_WORD_LEN = 32;
44const unsigned int CV_WORD_LEN = 16;
45const unsigned int CONST_WORD_LEN = 8;
46// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
47const unsigned int NUM_STEPS = 28;
48
49const unsigned int ROT_EVEN_ALPHA = 23;
50const unsigned int ROT_EVEN_BETA = 59;
51const unsigned int ROT_ODD_ALPHA = 7;
52const unsigned int ROT_ODD_BETA = 3;
53
54const unsigned int LSH_TYPE_512_512 = 0x0010040;
55const unsigned int LSH_TYPE_512_384 = 0x0010030;
56const unsigned int LSH_TYPE_512_256 = 0x0010020;
57const unsigned int LSH_TYPE_512_224 = 0x001001C;
58
59// const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
60// const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
61
62/* Error Code */
63
64const unsigned int LSH_SUCCESS = 0x0;
65// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
66// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
67const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
68const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
69
70/* Index into our state array */
71
72const unsigned int AlgorithmType = 80;
73const unsigned int RemainingBits = 81;
74
75NAMESPACE_END
76
77NAMESPACE_BEGIN(CryptoPP)
78NAMESPACE_BEGIN(LSH)
79
80// lsh512.cpp
81extern const word64 LSH512_IV224[CV_WORD_LEN];
82extern const word64 LSH512_IV256[CV_WORD_LEN];
83extern const word64 LSH512_IV384[CV_WORD_LEN];
84extern const word64 LSH512_IV512[CV_WORD_LEN];
85extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
86
87NAMESPACE_END // LSH
88NAMESPACE_END // Crypto++
89
90ANONYMOUS_NAMESPACE_BEGIN
91
92using CryptoPP::byte;
93using CryptoPP::word32;
94using CryptoPP::word64;
95using CryptoPP::rotlFixed;
96using CryptoPP::rotlConstant;
97
98using CryptoPP::GetBlock;
99using CryptoPP::LittleEndian;
100using CryptoPP::ConditionalByteReverse;
101using CryptoPP::LITTLE_ENDIAN_ORDER;
102
103using CryptoPP::LSH::LSH512_IV224;
104using CryptoPP::LSH::LSH512_IV256;
105using CryptoPP::LSH::LSH512_IV384;
106using CryptoPP::LSH::LSH512_IV512;
107using CryptoPP::LSH::LSH512_StepConstants;
108
109typedef byte lsh_u8;
110typedef word32 lsh_u32;
111typedef word64 lsh_u64;
112typedef word32 lsh_uint;
113typedef word32 lsh_err;
114typedef word32 lsh_type;
115
116struct LSH512_SSSE3_Context
117{
118 LSH512_SSSE3_Context(word64* state, word64 algType, word64& remainingBitLength) :
119 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
120 last_block(reinterpret_cast<byte*>(state+48)),
121 remain_databitlen(remainingBitLength),
122 alg_type(static_cast<lsh_type>(algType)) {}
123
124 lsh_u64* cv_l; // start of our state block
125 lsh_u64* cv_r;
126 lsh_u64* sub_msgs;
127 lsh_u8* last_block;
128 lsh_u64& remain_databitlen;
129 lsh_type alg_type;
130};
131
132struct LSH512_SSSE3_Internal
133{
134 LSH512_SSSE3_Internal(word64* state) :
135 submsg_e_l(state+16), submsg_e_r(state+24),
136 submsg_o_l(state+32), submsg_o_r(state+40) { }
137
138 lsh_u64* submsg_e_l; /* even left sub-message */
139 lsh_u64* submsg_e_r; /* even right sub-message */
140 lsh_u64* submsg_o_l; /* odd left sub-message */
141 lsh_u64* submsg_o_r; /* odd right sub-message */
142};
143
144// const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
145
146/* LSH AlgType Macro */
147
148inline bool LSH_IS_LSH512(lsh_uint val) {
149 return (val & 0xf0000) == 0x10000;
150}
151
152inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
153 return val >> 24;
154}
155
156inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
157 return val & 0xffff;
158}
159
160inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
161 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
162}
163
164inline lsh_u64 loadLE64(lsh_u64 v) {
166}
167
168lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
169 return rotlFixed(x, r);
170}
171
172// Original code relied upon unaligned lsh_u64 buffer
173inline void load_msg_blk(LSH512_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
174{
175 lsh_u64* submsg_e_l = i_state->submsg_e_l;
176 lsh_u64* submsg_e_r = i_state->submsg_e_r;
177 lsh_u64* submsg_o_l = i_state->submsg_o_l;
178 lsh_u64* submsg_o_r = i_state->submsg_o_r;
179
180 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
181 _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
182 _mm_storeu_si128(M128_CAST(submsg_e_l+2),
183 _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
184 _mm_storeu_si128(M128_CAST(submsg_e_l+4),
185 _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
186 _mm_storeu_si128(M128_CAST(submsg_e_l+6),
187 _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
188
189 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
190 _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
191 _mm_storeu_si128(M128_CAST(submsg_e_r+2),
192 _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
193 _mm_storeu_si128(M128_CAST(submsg_e_r+4),
194 _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
195 _mm_storeu_si128(M128_CAST(submsg_e_r+6),
196 _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
197
198 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
199 _mm_loadu_si128(CONST_M128_CAST(msgblk+128)));
200 _mm_storeu_si128(M128_CAST(submsg_o_l+2),
201 _mm_loadu_si128(CONST_M128_CAST(msgblk+144)));
202 _mm_storeu_si128(M128_CAST(submsg_o_l+4),
203 _mm_loadu_si128(CONST_M128_CAST(msgblk+160)));
204 _mm_storeu_si128(M128_CAST(submsg_o_l+6),
205 _mm_loadu_si128(CONST_M128_CAST(msgblk+176)));
206
207 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
208 _mm_loadu_si128(CONST_M128_CAST(msgblk+192)));
209 _mm_storeu_si128(M128_CAST(submsg_o_r+2),
210 _mm_loadu_si128(CONST_M128_CAST(msgblk+208)));
211 _mm_storeu_si128(M128_CAST(submsg_o_r+4),
212 _mm_loadu_si128(CONST_M128_CAST(msgblk+224)));
213 _mm_storeu_si128(M128_CAST(submsg_o_r+6),
214 _mm_loadu_si128(CONST_M128_CAST(msgblk+240)));
215}
216
217inline void msg_exp_even(LSH512_SSSE3_Internal* i_state)
218{
219 CRYPTOPP_ASSERT(i_state != NULLPTR);
220
221 lsh_u64* submsg_e_l = i_state->submsg_e_l;
222 lsh_u64* submsg_e_r = i_state->submsg_e_r;
223 lsh_u64* submsg_o_l = i_state->submsg_o_l;
224 lsh_u64* submsg_o_r = i_state->submsg_o_r;
225
226 __m128i temp;
227 _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_shuffle_epi32(
228 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)), _MM_SHUFFLE(1,0,3,2)));
229
230 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0));
231 _mm_storeu_si128(M128_CAST(submsg_e_l+0),
232 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)));
233 _mm_storeu_si128(M128_CAST(submsg_e_l+2), temp);
234 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_shuffle_epi32(
235 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)), _MM_SHUFFLE(1,0,3,2)));
236
237 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4));
238 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_unpacklo_epi64(
239 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
240 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
241 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_unpackhi_epi64(
242 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
243 _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_shuffle_epi32(
244 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)), _MM_SHUFFLE(1,0,3,2)));
245
246 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0));
247 _mm_storeu_si128(M128_CAST(submsg_e_r+0),
248 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)));
249 _mm_storeu_si128(M128_CAST(submsg_e_r+2), temp);
250 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_shuffle_epi32(
251 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)), _MM_SHUFFLE(1,0,3,2)));
252
253 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4));
254 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_unpacklo_epi64(
255 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
256 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
257 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_unpackhi_epi64(
258 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
259
260 _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi64(
261 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)),
262 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
263 _mm_storeu_si128(M128_CAST(submsg_e_l+2), _mm_add_epi64(
264 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)),
265 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
266 _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi64(
267 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)),
268 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
269 _mm_storeu_si128(M128_CAST(submsg_e_l+6), _mm_add_epi64(
270 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
271 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
272
273 _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi64(
274 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)),
275 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
276 _mm_storeu_si128(M128_CAST(submsg_e_r+2), _mm_add_epi64(
277 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)),
278 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
279 _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi64(
280 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)),
281 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
282 _mm_storeu_si128(M128_CAST(submsg_e_r+6), _mm_add_epi64(
283 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
284 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
285}
286
287inline void msg_exp_odd(LSH512_SSSE3_Internal* i_state)
288{
289 CRYPTOPP_ASSERT(i_state != NULLPTR);
290
291 lsh_u64* submsg_e_l = i_state->submsg_e_l;
292 lsh_u64* submsg_e_r = i_state->submsg_e_r;
293 lsh_u64* submsg_o_l = i_state->submsg_o_l;
294 lsh_u64* submsg_o_r = i_state->submsg_o_r;
295
296 __m128i temp;
297 _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_shuffle_epi32(
298 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)), _MM_SHUFFLE(1,0,3,2)));
299
300 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0));
301 _mm_storeu_si128(M128_CAST(submsg_o_l+0),
302 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2)));
303 _mm_storeu_si128(M128_CAST(submsg_o_l+2), temp);
304 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_shuffle_epi32(
305 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)), _MM_SHUFFLE(1,0,3,2)));
306
307 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4));
308 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_unpacklo_epi64(
309 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6)),
310 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
311 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_unpackhi_epi64(
312 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
313 _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_shuffle_epi32(
314 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)), _MM_SHUFFLE(1,0,3,2)));
315
316 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0));
317 _mm_storeu_si128(M128_CAST(submsg_o_r+0),
318 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2)));
319 _mm_storeu_si128(M128_CAST(submsg_o_r+2), temp);
320 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_shuffle_epi32(
321 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)), _MM_SHUFFLE(1,0,3,2)));
322
323 temp = _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4));
324 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_unpacklo_epi64(
325 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6)),
326 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
327 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_unpackhi_epi64(
328 temp, _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
329
330 _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi64(
331 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)),
332 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0))));
333 _mm_storeu_si128(M128_CAST(submsg_o_l+2), _mm_add_epi64(
334 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2)),
335 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
336 _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi64(
337 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)),
338 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
339 _mm_storeu_si128(M128_CAST(submsg_o_l+6), _mm_add_epi64(
340 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6)),
341 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
342
343 _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi64(
344 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)),
345 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0))));
346 _mm_storeu_si128(M128_CAST(submsg_o_r+2), _mm_add_epi64(
347 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2)),
348 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
349 _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi64(
350 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)),
351 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
352 _mm_storeu_si128(M128_CAST(submsg_o_r+6), _mm_add_epi64(
353 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6)),
354 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
355}
356
357inline void load_sc(const lsh_u64** p_const_v, size_t i)
358{
359 *p_const_v = &LSH512_StepConstants[i];
360}
361
362inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
363{
364 CRYPTOPP_ASSERT(i_state != NULLPTR);
365
366 lsh_u64* submsg_e_l = i_state->submsg_e_l;
367 lsh_u64* submsg_e_r = i_state->submsg_e_r;
368
369 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
370 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
371 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l))));
372 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
373 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
374 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r))));
375 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
376 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
377 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+2))));
378 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
379 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
380 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+2))));
381 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
382 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
383 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
384 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
385 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
386 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
387 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
388 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
389 _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+6))));
390 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
391 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
392 _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+6))));
393}
394
395inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_SSSE3_Internal* i_state)
396{
397 CRYPTOPP_ASSERT(i_state != NULLPTR);
398
399 lsh_u64* submsg_o_l = i_state->submsg_o_l;
400 lsh_u64* submsg_o_r = i_state->submsg_o_r;
401
402 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
403 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
404 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
405 _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
406 _mm_loadu_si128(CONST_M128_CAST(cv_r)),
407 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
408 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
409 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
410 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+2))));
411 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_xor_si128(
412 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
413 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+2))));
414 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
415 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
416 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
417 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
418 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
419 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
420 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
421 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
422 _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+6))));
423 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_xor_si128(
424 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
425 _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+6))));
426}
427
428inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
429{
430 _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi64(
431 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
432 _mm_loadu_si128(CONST_M128_CAST(cv_r))));
433 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_add_epi64(
434 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
435 _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
436 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi64(
437 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
438 _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
439 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_add_epi64(
440 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
441 _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
442}
443
444template <unsigned int R>
445inline void rotate_blk(lsh_u64 cv[8])
446{
447#if defined(CRYPTOPP_XOP_AVAILABLE)
448 _mm_storeu_si128(M128_CAST(cv),
449 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
450 _mm_storeu_si128(M128_CAST(cv+2),
451 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R));
452 _mm_storeu_si128(M128_CAST(cv+4),
453 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
454 _mm_storeu_si128(M128_CAST(cv+6),
455 _mm_roti_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R));
456
457#else
458 _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
459 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
460 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv)), 64-R)));
461 _mm_storeu_si128(M128_CAST(cv+2), _mm_or_si128(
462 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), R),
463 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+2)), 64-R)));
464 _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
465 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
466 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 64-R)));
467 _mm_storeu_si128(M128_CAST(cv+6), _mm_or_si128(
468 _mm_slli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), R),
469 _mm_srli_epi64(_mm_loadu_si128(CONST_M128_CAST(cv+6)), 64-R)));
470#endif
471}
472
473inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
474{
475 _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
476 _mm_loadu_si128(CONST_M128_CAST(cv_l)),
477 _mm_loadu_si128(CONST_M128_CAST(const_v))));
478 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_xor_si128(
479 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
480 _mm_loadu_si128(CONST_M128_CAST(const_v+2))));
481 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
482 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
483 _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
484 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_xor_si128(
485 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
486 _mm_loadu_si128(CONST_M128_CAST(const_v+6))));
487}
488
489inline void rotate_msg_gamma(lsh_u64 cv_r[8])
490{
491 // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
492 _mm_storeu_si128(M128_CAST(cv_r+0),
493 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
494 _mm_set_epi8(13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
495 _mm_storeu_si128(M128_CAST(cv_r+2),
496 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+2)),
497 _mm_set_epi8(9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4)));
498
499 _mm_storeu_si128(M128_CAST(cv_r+4),
500 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
501 _mm_set_epi8(12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
502 _mm_storeu_si128(M128_CAST(cv_r+6),
503 _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+6)),
504 _mm_set_epi8(8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3)));
505}
506
507inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
508{
509 __m128i temp[2];
510 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
511 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_unpacklo_epi64(
512 _mm_loadu_si128(CONST_M128_CAST(cv_l+2)),
513 _mm_loadu_si128(CONST_M128_CAST(cv_l+0))));
514 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_unpackhi_epi64(
515 temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+2))));
516
517 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+4));
518 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_unpacklo_epi64(
519 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)),
520 _mm_loadu_si128(CONST_M128_CAST(cv_l+4))));
521 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_unpackhi_epi64(
522 temp[0], _mm_loadu_si128(CONST_M128_CAST(cv_l+6))));
523 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_shuffle_epi32(
524 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), _MM_SHUFFLE(1,0,3,2)));
525
526 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+0));
527 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_unpacklo_epi64(
528 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
529 _mm_loadu_si128(CONST_M128_CAST(cv_r+2))));
530 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_unpackhi_epi64(
531 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)), temp[0]));
532 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_shuffle_epi32(
533 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), _MM_SHUFFLE(1,0,3,2)));
534
535 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_r+4));
536 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_unpacklo_epi64(
537 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
538 _mm_loadu_si128(CONST_M128_CAST(cv_r+6))));
539 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_unpackhi_epi64(
540 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)), temp[0]));
541
542 temp[0] = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
543 temp[1] = _mm_loadu_si128(CONST_M128_CAST(cv_l+2));
544
545 _mm_storeu_si128(M128_CAST(cv_l+0),
546 _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
547 _mm_storeu_si128(M128_CAST(cv_l+2),
548 _mm_loadu_si128(CONST_M128_CAST(cv_l+6)));
549 _mm_storeu_si128(M128_CAST(cv_l+4),
550 _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
551 _mm_storeu_si128(M128_CAST(cv_l+6),
552 _mm_loadu_si128(CONST_M128_CAST(cv_r+6)));
553 _mm_storeu_si128(M128_CAST(cv_r+4),
554 _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
555 _mm_storeu_si128(M128_CAST(cv_r+6),
556 _mm_loadu_si128(CONST_M128_CAST(cv_r+2)));
557
558 _mm_storeu_si128(M128_CAST(cv_r+0), temp[0]);
559 _mm_storeu_si128(M128_CAST(cv_r+2), temp[1]);
560};
561
562/* -------------------------------------------------------- *
563* step function
564* -------------------------------------------------------- */
565
566template <unsigned int Alpha, unsigned int Beta>
567inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
568{
569 add_blk(cv_l, cv_r);
570 rotate_blk<Alpha>(cv_l);
571 xor_with_const(cv_l, const_v);
572 add_blk(cv_r, cv_l);
573 rotate_blk<Beta>(cv_r);
574 add_blk(cv_l, cv_r);
575 rotate_msg_gamma(cv_r);
576}
577
578/* -------------------------------------------------------- *
579* compression function
580* -------------------------------------------------------- */
581
582inline void compress(LSH512_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
583{
584 CRYPTOPP_ASSERT(ctx != NULLPTR);
585
586 LSH512_SSSE3_Internal s_state(ctx->cv_l);
587 LSH512_SSSE3_Internal* i_state = &s_state;
588
589 const lsh_u64* const_v = NULL;
590 lsh_u64 *cv_l = ctx->cv_l;
591 lsh_u64 *cv_r = ctx->cv_r;
592
593 load_msg_blk(i_state, pdMsgBlk);
594
595 msg_add_even(cv_l, cv_r, i_state);
596 load_sc(&const_v, 0);
597 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
598 word_perm(cv_l, cv_r);
599
600 msg_add_odd(cv_l, cv_r, i_state);
601 load_sc(&const_v, 8);
602 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
603 word_perm(cv_l, cv_r);
604
605 for (size_t i = 1; i < NUM_STEPS / 2; i++)
606 {
607 msg_exp_even(i_state);
608 msg_add_even(cv_l, cv_r, i_state);
609 load_sc(&const_v, 16 * i);
610 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
611 word_perm(cv_l, cv_r);
612
613 msg_exp_odd(i_state);
614 msg_add_odd(cv_l, cv_r, i_state);
615 load_sc(&const_v, 16 * i + 8);
616 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
617 word_perm(cv_l, cv_r);
618 }
619
620 msg_exp_even(i_state);
621 msg_add_even(cv_l, cv_r, i_state);
622}
623
624/* -------------------------------------------------------- */
625
626inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
627{
628 // The IV's are 32-byte aligned so we can use aligned loads.
629 _mm_storeu_si128(M128_CAST(cv_l+0),
630 _mm_load_si128(CONST_M128_CAST(iv+0)));
631 _mm_storeu_si128(M128_CAST(cv_l+2),
632 _mm_load_si128(CONST_M128_CAST(iv+2)));
633 _mm_storeu_si128(M128_CAST(cv_l+4),
634 _mm_load_si128(CONST_M128_CAST(iv+4)));
635 _mm_storeu_si128(M128_CAST(cv_l+6),
636 _mm_load_si128(CONST_M128_CAST(iv+6)));
637 _mm_storeu_si128(M128_CAST(cv_r+0),
638 _mm_load_si128(CONST_M128_CAST(iv+8)));
639 _mm_storeu_si128(M128_CAST(cv_r+2),
640 _mm_load_si128(CONST_M128_CAST(iv+10)));
641 _mm_storeu_si128(M128_CAST(cv_r+4),
642 _mm_load_si128(CONST_M128_CAST(iv+12)));
643 _mm_storeu_si128(M128_CAST(cv_r+6),
644 _mm_load_si128(CONST_M128_CAST(iv+14)));
645}
646
647inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
648{
649 _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
650 _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128());
651 _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
652 _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128());
653 _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
654 _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128());
655 _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
656 _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128());
657}
658
659inline void zero_submsgs(LSH512_SSSE3_Context* ctx)
660{
661 lsh_u64* sub_msgs = ctx->sub_msgs;
662
663 _mm_storeu_si128(M128_CAST(sub_msgs+ 0),
664 _mm_setzero_si128());
665 _mm_storeu_si128(M128_CAST(sub_msgs+ 2),
666 _mm_setzero_si128());
667 _mm_storeu_si128(M128_CAST(sub_msgs+ 4),
668 _mm_setzero_si128());
669 _mm_storeu_si128(M128_CAST(sub_msgs+ 6),
670 _mm_setzero_si128());
671 _mm_storeu_si128(M128_CAST(sub_msgs+ 8),
672 _mm_setzero_si128());
673 _mm_storeu_si128(M128_CAST(sub_msgs+10),
674 _mm_setzero_si128());
675 _mm_storeu_si128(M128_CAST(sub_msgs+12),
676 _mm_setzero_si128());
677 _mm_storeu_si128(M128_CAST(sub_msgs+14),
678 _mm_setzero_si128());
679}
680
681inline void init224(LSH512_SSSE3_Context* ctx)
682{
683 CRYPTOPP_ASSERT(ctx != NULLPTR);
684
685 zero_submsgs(ctx);
686 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
687}
688
689inline void init256(LSH512_SSSE3_Context* ctx)
690{
691 CRYPTOPP_ASSERT(ctx != NULLPTR);
692
693 zero_submsgs(ctx);
694 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
695}
696
697inline void init384(LSH512_SSSE3_Context* ctx)
698{
699 CRYPTOPP_ASSERT(ctx != NULLPTR);
700
701 zero_submsgs(ctx);
702 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
703}
704
705inline void init512(LSH512_SSSE3_Context* ctx)
706{
707 CRYPTOPP_ASSERT(ctx != NULLPTR);
708
709 zero_submsgs(ctx);
710 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
711}
712
713/* -------------------------------------------------------- */
714
715inline void fin(LSH512_SSSE3_Context* ctx)
716{
717 CRYPTOPP_ASSERT(ctx != NULLPTR);
718
719 _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
720 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
721 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
722 _mm_storeu_si128(M128_CAST(ctx->cv_l+2), _mm_xor_si128(
723 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+2)),
724 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+2))));
725 _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
726 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
727 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
728 _mm_storeu_si128(M128_CAST(ctx->cv_l+6), _mm_xor_si128(
729 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+6)),
730 _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+6))));
731}
732
733/* -------------------------------------------------------- */
734
735inline void get_hash(LSH512_SSSE3_Context* ctx, lsh_u8* pbHashVal)
736{
737 CRYPTOPP_ASSERT(ctx != NULLPTR);
738 CRYPTOPP_ASSERT(ctx->alg_type != 0);
739 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
740
741 lsh_uint alg_type = ctx->alg_type;
742 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
743 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
744
745 // Multiplying by sizeof(lsh_u8) looks odd...
746 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
747 if (hash_val_bit_len){
748 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
749 }
750}
751
752/* -------------------------------------------------------- */
753
754lsh_err lsh512_init_ssse3(LSH512_SSSE3_Context* ctx)
755{
756 CRYPTOPP_ASSERT(ctx != NULLPTR);
757 CRYPTOPP_ASSERT(ctx->alg_type != 0);
758
759 lsh_u32 alg_type = ctx->alg_type;
760 const lsh_u64* const_v = NULL;
761 ctx->remain_databitlen = 0;
762
763 switch (alg_type){
764 case LSH_TYPE_512_512:
765 init512(ctx);
766 return LSH_SUCCESS;
767 case LSH_TYPE_512_384:
768 init384(ctx);
769 return LSH_SUCCESS;
770 case LSH_TYPE_512_256:
771 init256(ctx);
772 return LSH_SUCCESS;
773 case LSH_TYPE_512_224:
774 init224(ctx);
775 return LSH_SUCCESS;
776 default:
777 break;
778 }
779
780 lsh_u64* cv_l = ctx->cv_l;
781 lsh_u64* cv_r = ctx->cv_r;
782
783 zero_iv(cv_l, cv_r);
784 cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
785 cv_l[1] = LSH_GET_HASHBIT(alg_type);
786
787 for (size_t i = 0; i < NUM_STEPS / 2; i++)
788 {
789 //Mix
790 load_sc(&const_v, i * 16);
791 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
792 word_perm(cv_l, cv_r);
793
794 load_sc(&const_v, i * 16 + 8);
795 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
796 word_perm(cv_l, cv_r);
797 }
798
799 return LSH_SUCCESS;
800}
801
802lsh_err lsh512_update_ssse3(LSH512_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
803{
804 CRYPTOPP_ASSERT(ctx != NULLPTR);
805 CRYPTOPP_ASSERT(data != NULLPTR);
806 CRYPTOPP_ASSERT(databitlen % 8 == 0);
807 CRYPTOPP_ASSERT(ctx->alg_type != 0);
808
809 if (databitlen == 0){
810 return LSH_SUCCESS;
811 }
812
813 // We are byte oriented. tail bits will always be 0.
814 size_t databytelen = databitlen >> 3;
815 // lsh_uint pos2 = databitlen & 0x7;
816 const size_t pos2 = 0;
817
818 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
819 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
820 const size_t remain_msg_bit = 0;
821
822 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
823 return LSH_ERR_INVALID_STATE;
824 }
825 if (remain_msg_bit > 0){
826 return LSH_ERR_INVALID_DATABITLEN;
827 }
828
829 if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
830 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
831 ctx->remain_databitlen += (lsh_uint)databitlen;
832 remain_msg_byte += (lsh_uint)databytelen;
833 if (pos2){
834 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
835 }
836 return LSH_SUCCESS;
837 }
838
839 if (remain_msg_byte > 0){
840 size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
841 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
842 compress(ctx, ctx->last_block);
843 data += more_byte;
844 databytelen -= more_byte;
845 remain_msg_byte = 0;
846 ctx->remain_databitlen = 0;
847 }
848
849 while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
850 {
851 // This call to compress caused some trouble.
852 // The data pointer can become unaligned in the
853 // previous block.
854 compress(ctx, data);
855 data += LSH512_MSG_BLK_BYTE_LEN;
856 databytelen -= LSH512_MSG_BLK_BYTE_LEN;
857 }
858
859 if (databytelen > 0){
860 memcpy(ctx->last_block, data, databytelen);
861 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
862 }
863
864 if (pos2){
865 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
866 ctx->remain_databitlen += pos2;
867 }
868 return LSH_SUCCESS;
869}
870
871lsh_err lsh512_final_ssse3(LSH512_SSSE3_Context* ctx, lsh_u8* hashval)
872{
873 CRYPTOPP_ASSERT(ctx != NULLPTR);
874 CRYPTOPP_ASSERT(hashval != NULLPTR);
875
876 // We are byte oriented. tail bits will always be 0.
877 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
878 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
879 const size_t remain_msg_bit = 0;
880
881 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
882 return LSH_ERR_INVALID_STATE;
883 }
884
885 if (remain_msg_bit){
886 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
887 }
888 else{
889 ctx->last_block[remain_msg_byte] = 0x80;
890 }
891 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
892
893 compress(ctx, ctx->last_block);
894
895 fin(ctx);
896 get_hash(ctx, hashval);
897
898 return LSH_SUCCESS;
899}
900
901ANONYMOUS_NAMESPACE_END
902
903NAMESPACE_BEGIN(CryptoPP)
904
905extern
906void LSH512_Base_Restart_SSSE3(word64* state)
907{
908 state[RemainingBits] = 0;
909 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
910 lsh_err err = lsh512_init_ssse3(&ctx);
911
912 if (err != LSH_SUCCESS)
913 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_ssse3 failed");
914}
915
916extern
917void LSH512_Base_Update_SSSE3(word64* state, const byte *input, size_t size)
918{
919 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
920 lsh_err err = lsh512_update_ssse3(&ctx, input, 8*size);
921
922 if (err != LSH_SUCCESS)
923 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_ssse3 failed");
924}
925
926extern
927void LSH512_Base_TruncatedFinal_SSSE3(word64* state, byte *hash, size_t)
928{
929 LSH512_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
930 lsh_err err = lsh512_final_ssse3(&ctx, hash);
931
932 if (err != LSH_SUCCESS)
933 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_ssse3 failed");
934}
935
936NAMESPACE_END
937
938#endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2187
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1598
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68