11#ifdef ENABLE_ARM_SHANI
19alignas(uint32x4_t)
static constexpr std::array<uint32_t, 64> K =
21 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
22 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
23 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
24 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
25 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
26 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
27 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
28 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
29 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
30 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
31 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
32 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
33 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
34 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
35 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
36 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
41void Transform(uint32_t*
s,
const unsigned char* chunk,
size_t blocks)
43 uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
44 uint32x4_t MSG0, MSG1, MSG2, MSG3;
45 uint32x4_t TMP0, TMP2;
48 STATE0 = vld1q_u32(&
s[0]);
49 STATE1 = vld1q_u32(&
s[4]);
58 MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 0)));
59 MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 16)));
60 MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 32)));
61 MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(chunk + 48)));
69 TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0]));
71 MSG0 = vsha256su0q_u32(MSG0, MSG1);
72 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
73 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
74 MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
77 TMP0 = vaddq_u32(MSG1, vld1q_u32(&K[4]));
79 MSG1 = vsha256su0q_u32(MSG1, MSG2);
80 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
81 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
82 MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
85 TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[8]));
87 MSG2 = vsha256su0q_u32(MSG2, MSG3);
88 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
89 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
90 MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
93 TMP0 = vaddq_u32(MSG3, vld1q_u32(&K[12]));
95 MSG3 = vsha256su0q_u32(MSG3, MSG0);
96 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
97 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
98 MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
101 TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[16]));
103 MSG0 = vsha256su0q_u32(MSG0, MSG1);
104 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
105 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
106 MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
109 TMP0 = vaddq_u32(MSG1, vld1q_u32(&K[20]));
111 MSG1 = vsha256su0q_u32(MSG1, MSG2);
112 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
113 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
114 MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
117 TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[24]));
119 MSG2 = vsha256su0q_u32(MSG2, MSG3);
120 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
121 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
122 MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
125 TMP0 = vaddq_u32(MSG3, vld1q_u32(&K[28]));
127 MSG3 = vsha256su0q_u32(MSG3, MSG0);
128 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
129 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
130 MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
133 TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[32]));
135 MSG0 = vsha256su0q_u32(MSG0, MSG1);
136 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
137 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
138 MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
141 TMP0 = vaddq_u32(MSG1, vld1q_u32(&K[36]));
143 MSG1 = vsha256su0q_u32(MSG1, MSG2);
144 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
145 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
146 MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
149 TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[40]));
151 MSG2 = vsha256su0q_u32(MSG2, MSG3);
152 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
153 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
154 MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
157 TMP0 = vaddq_u32(MSG3, vld1q_u32(&K[44]));
159 MSG3 = vsha256su0q_u32(MSG3, MSG0);
160 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
161 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
162 MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
165 TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[48]));
167 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
168 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
171 TMP0 = vaddq_u32(MSG1, vld1q_u32(&K[52]));
173 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
174 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
177 TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[56]));
179 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
180 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
183 TMP0 = vaddq_u32(MSG3, vld1q_u32(&K[60]));
185 STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
186 STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
189 STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
190 STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
194 vst1q_u32(&
s[0], STATE0);
195 vst1q_u32(&
s[4], STATE1);
200void Transform_2way(
unsigned char* output,
const unsigned char* input)
203 alignas(uint32x4_t)
static constexpr std::array<uint32_t, 8> INIT = {
204 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
205 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
209 alignas(uint32x4_t)
static constexpr std::array<uint32_t, 64> MIDS = {
210 0xc28a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
211 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
212 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
213 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf374,
214 0x649b69c1, 0xf0fe4786, 0x0fe1edc6, 0x240cf254,
215 0x4fe9346f, 0x6cc984be, 0x61b9411e, 0x16f988fa,
216 0xf2c65152, 0xa88e5a6d, 0xb019fc65, 0xb9d99ec7,
217 0x9a1231c3, 0xe70eeaa0, 0xfdb1232b, 0xc7353eb0,
218 0x3069bad5, 0xcb976d5f, 0x5a0f118f, 0xdc1eeefd,
219 0x0a35b689, 0xde0b7a04, 0x58f4ca9d, 0xe15d5b16,
220 0x007f3e86, 0x37088980, 0xa507ea32, 0x6fab9537,
221 0x17406110, 0x0d8cd6f1, 0xcdaa3b6d, 0xc0bbbe37,
222 0x83613bda, 0xdb48a363, 0x0b02e931, 0x6fd15ca7,
223 0x521afaca, 0x31338431, 0x6ed41a95, 0x6d437890,
224 0xc39c91f2, 0x9eccabbd, 0xb5c9a0e6, 0x532fb63c,
225 0xd2c741c6, 0x07237ea3, 0xa4954b68, 0x4c191d76
229 alignas(uint32x4_t)
static constexpr std::array<uint32_t, 12> FINS = {
230 0x5807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
231 0x80000000, 0x00000000, 0x00000000, 0x00000000,
232 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf274
236 alignas(uint32x4_t)
static constexpr std::array<uint32_t, 8> FINAL = {0x80000000, 0, 0, 0, 0, 0, 0, 0x100};
238 uint32x4_t STATE0A, STATE0B, STATE1A, STATE1B, ABEF_SAVEA, ABEF_SAVEB, CDGH_SAVEA, CDGH_SAVEB;
239 uint32x4_t MSG0A, MSG0B, MSG1A, MSG1B, MSG2A, MSG2B, MSG3A, MSG3B;
240 uint32x4_t TMP0A, TMP0B, TMP2A, TMP2B, TMP;
243 STATE0A = vld1q_u32(&INIT[0]);
245 STATE1A = vld1q_u32(&INIT[4]);
249 MSG0A = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 0)));
250 MSG1A = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 16)));
251 MSG2A = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 32)));
252 MSG3A = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 48)));
253 MSG0B = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 64)));
254 MSG1B = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 80)));
255 MSG2B = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 96)));
256 MSG3B = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 112)));
259 TMP = vld1q_u32(&K[0]);
260 TMP0A = vaddq_u32(MSG0A, TMP);
261 TMP0B = vaddq_u32(MSG0B, TMP);
264 MSG0A = vsha256su0q_u32(MSG0A, MSG1A);
265 MSG0B = vsha256su0q_u32(MSG0B, MSG1B);
266 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
267 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
268 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
269 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
270 MSG0A = vsha256su1q_u32(MSG0A, MSG2A, MSG3A);
271 MSG0B = vsha256su1q_u32(MSG0B, MSG2B, MSG3B);
274 TMP = vld1q_u32(&K[4]);
275 TMP0A = vaddq_u32(MSG1A, TMP);
276 TMP0B = vaddq_u32(MSG1B, TMP);
279 MSG1A = vsha256su0q_u32(MSG1A, MSG2A);
280 MSG1B = vsha256su0q_u32(MSG1B, MSG2B);
281 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
282 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
283 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
284 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
285 MSG1A = vsha256su1q_u32(MSG1A, MSG3A, MSG0A);
286 MSG1B = vsha256su1q_u32(MSG1B, MSG3B, MSG0B);
289 TMP = vld1q_u32(&K[8]);
290 TMP0A = vaddq_u32(MSG2A, TMP);
291 TMP0B = vaddq_u32(MSG2B, TMP);
294 MSG2A = vsha256su0q_u32(MSG2A, MSG3A);
295 MSG2B = vsha256su0q_u32(MSG2B, MSG3B);
296 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
297 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
298 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
299 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
300 MSG2A = vsha256su1q_u32(MSG2A, MSG0A, MSG1A);
301 MSG2B = vsha256su1q_u32(MSG2B, MSG0B, MSG1B);
304 TMP = vld1q_u32(&K[12]);
305 TMP0A = vaddq_u32(MSG3A, TMP);
306 TMP0B = vaddq_u32(MSG3B, TMP);
309 MSG3A = vsha256su0q_u32(MSG3A, MSG0A);
310 MSG3B = vsha256su0q_u32(MSG3B, MSG0B);
311 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
312 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
313 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
314 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
315 MSG3A = vsha256su1q_u32(MSG3A, MSG1A, MSG2A);
316 MSG3B = vsha256su1q_u32(MSG3B, MSG1B, MSG2B);
319 TMP = vld1q_u32(&K[16]);
320 TMP0A = vaddq_u32(MSG0A, TMP);
321 TMP0B = vaddq_u32(MSG0B, TMP);
324 MSG0A = vsha256su0q_u32(MSG0A, MSG1A);
325 MSG0B = vsha256su0q_u32(MSG0B, MSG1B);
326 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
327 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
328 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
329 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
330 MSG0A = vsha256su1q_u32(MSG0A, MSG2A, MSG3A);
331 MSG0B = vsha256su1q_u32(MSG0B, MSG2B, MSG3B);
334 TMP = vld1q_u32(&K[20]);
335 TMP0A = vaddq_u32(MSG1A, TMP);
336 TMP0B = vaddq_u32(MSG1B, TMP);
339 MSG1A = vsha256su0q_u32(MSG1A, MSG2A);
340 MSG1B = vsha256su0q_u32(MSG1B, MSG2B);
341 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
342 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
343 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
344 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
345 MSG1A = vsha256su1q_u32(MSG1A, MSG3A, MSG0A);
346 MSG1B = vsha256su1q_u32(MSG1B, MSG3B, MSG0B);
349 TMP = vld1q_u32(&K[24]);
350 TMP0A = vaddq_u32(MSG2A, TMP);
351 TMP0B = vaddq_u32(MSG2B, TMP);
354 MSG2A = vsha256su0q_u32(MSG2A, MSG3A);
355 MSG2B = vsha256su0q_u32(MSG2B, MSG3B);
356 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
357 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
358 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
359 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
360 MSG2A = vsha256su1q_u32(MSG2A, MSG0A, MSG1A);
361 MSG2B = vsha256su1q_u32(MSG2B, MSG0B, MSG1B);
364 TMP = vld1q_u32(&K[28]);
365 TMP0A = vaddq_u32(MSG3A, TMP);
366 TMP0B = vaddq_u32(MSG3B, TMP);
369 MSG3A = vsha256su0q_u32(MSG3A, MSG0A);
370 MSG3B = vsha256su0q_u32(MSG3B, MSG0B);
371 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
372 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
373 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
374 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
375 MSG3A = vsha256su1q_u32(MSG3A, MSG1A, MSG2A);
376 MSG3B = vsha256su1q_u32(MSG3B, MSG1B, MSG2B);
379 TMP = vld1q_u32(&K[32]);
380 TMP0A = vaddq_u32(MSG0A, TMP);
381 TMP0B = vaddq_u32(MSG0B, TMP);
384 MSG0A = vsha256su0q_u32(MSG0A, MSG1A);
385 MSG0B = vsha256su0q_u32(MSG0B, MSG1B);
386 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
387 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
388 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
389 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
390 MSG0A = vsha256su1q_u32(MSG0A, MSG2A, MSG3A);
391 MSG0B = vsha256su1q_u32(MSG0B, MSG2B, MSG3B);
394 TMP = vld1q_u32(&K[36]);
395 TMP0A = vaddq_u32(MSG1A, TMP);
396 TMP0B = vaddq_u32(MSG1B, TMP);
399 MSG1A = vsha256su0q_u32(MSG1A, MSG2A);
400 MSG1B = vsha256su0q_u32(MSG1B, MSG2B);
401 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
402 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
403 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
404 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
405 MSG1A = vsha256su1q_u32(MSG1A, MSG3A, MSG0A);
406 MSG1B = vsha256su1q_u32(MSG1B, MSG3B, MSG0B);
409 TMP = vld1q_u32(&K[40]);
410 TMP0A = vaddq_u32(MSG2A, TMP);
411 TMP0B = vaddq_u32(MSG2B, TMP);
414 MSG2A = vsha256su0q_u32(MSG2A, MSG3A);
415 MSG2B = vsha256su0q_u32(MSG2B, MSG3B);
416 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
417 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
418 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
419 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
420 MSG2A = vsha256su1q_u32(MSG2A, MSG0A, MSG1A);
421 MSG2B = vsha256su1q_u32(MSG2B, MSG0B, MSG1B);
424 TMP = vld1q_u32(&K[44]);
425 TMP0A = vaddq_u32(MSG3A, TMP);
426 TMP0B = vaddq_u32(MSG3B, TMP);
429 MSG3A = vsha256su0q_u32(MSG3A, MSG0A);
430 MSG3B = vsha256su0q_u32(MSG3B, MSG0B);
431 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
432 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
433 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
434 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
435 MSG3A = vsha256su1q_u32(MSG3A, MSG1A, MSG2A);
436 MSG3B = vsha256su1q_u32(MSG3B, MSG1B, MSG2B);
439 TMP = vld1q_u32(&K[48]);
440 TMP0A = vaddq_u32(MSG0A, TMP);
441 TMP0B = vaddq_u32(MSG0B, TMP);
444 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
445 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
446 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
447 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
450 TMP = vld1q_u32(&K[52]);
451 TMP0A = vaddq_u32(MSG1A, TMP);
452 TMP0B = vaddq_u32(MSG1B, TMP);
455 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
456 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
457 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
458 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
461 TMP = vld1q_u32(&K[56]);
462 TMP0A = vaddq_u32(MSG2A, TMP);
463 TMP0B = vaddq_u32(MSG2B, TMP);
466 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
467 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
468 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
469 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
472 TMP = vld1q_u32(&K[60]);
473 TMP0A = vaddq_u32(MSG3A, TMP);
474 TMP0B = vaddq_u32(MSG3B, TMP);
477 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
478 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
479 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
480 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
483 TMP = vld1q_u32(&INIT[0]);
484 STATE0A = vaddq_u32(STATE0A, TMP);
485 STATE0B = vaddq_u32(STATE0B, TMP);
486 TMP = vld1q_u32(&INIT[4]);
487 STATE1A = vaddq_u32(STATE1A, TMP);
488 STATE1B = vaddq_u32(STATE1B, TMP);
491 ABEF_SAVEA = STATE0A;
492 ABEF_SAVEB = STATE0B;
493 CDGH_SAVEA = STATE1A;
494 CDGH_SAVEB = STATE1B;
497 TMP = vld1q_u32(&MIDS[0]);
500 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
501 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
502 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
503 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
506 TMP = vld1q_u32(&MIDS[4]);
509 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
510 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
511 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
512 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
515 TMP = vld1q_u32(&MIDS[8]);
518 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
519 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
520 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
521 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
524 TMP = vld1q_u32(&MIDS[12]);
527 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
528 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
529 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
530 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
533 TMP = vld1q_u32(&MIDS[16]);
536 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
537 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
538 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
539 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
542 TMP = vld1q_u32(&MIDS[20]);
545 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
546 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
547 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
548 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
551 TMP = vld1q_u32(&MIDS[24]);
554 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
555 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
556 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
557 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
560 TMP = vld1q_u32(&MIDS[28]);
563 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
564 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
565 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
566 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
569 TMP = vld1q_u32(&MIDS[32]);
572 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
573 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
574 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
575 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
578 TMP = vld1q_u32(&MIDS[36]);
581 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
582 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
583 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
584 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
587 TMP = vld1q_u32(&MIDS[40]);
590 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
591 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
592 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
593 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
596 TMP = vld1q_u32(&MIDS[44]);
599 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
600 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
601 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
602 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
605 TMP = vld1q_u32(&MIDS[48]);
608 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
609 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
610 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
611 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
614 TMP = vld1q_u32(&MIDS[52]);
617 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
618 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
619 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
620 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
623 TMP = vld1q_u32(&MIDS[56]);
626 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
627 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
628 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
629 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
632 TMP = vld1q_u32(&MIDS[60]);
635 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
636 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
637 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
638 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
641 STATE0A = vaddq_u32(STATE0A, ABEF_SAVEA);
642 STATE0B = vaddq_u32(STATE0B, ABEF_SAVEB);
643 STATE1A = vaddq_u32(STATE1A, CDGH_SAVEA);
644 STATE1B = vaddq_u32(STATE1B, CDGH_SAVEB);
651 MSG2A = vld1q_u32(&FINAL[0]);
653 MSG3A = vld1q_u32(&FINAL[4]);
657 STATE0A = vld1q_u32(&INIT[0]);
659 STATE1A = vld1q_u32(&INIT[4]);
663 TMP = vld1q_u32(&K[0]);
664 TMP0A = vaddq_u32(MSG0A, TMP);
665 TMP0B = vaddq_u32(MSG0B, TMP);
668 MSG0A = vsha256su0q_u32(MSG0A, MSG1A);
669 MSG0B = vsha256su0q_u32(MSG0B, MSG1B);
670 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
671 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
672 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
673 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
674 MSG0A = vsha256su1q_u32(MSG0A, MSG2A, MSG3A);
675 MSG0B = vsha256su1q_u32(MSG0B, MSG2B, MSG3B);
678 TMP = vld1q_u32(&K[4]);
679 TMP0A = vaddq_u32(MSG1A, TMP);
680 TMP0B = vaddq_u32(MSG1B, TMP);
683 MSG1A = vsha256su0q_u32(MSG1A, MSG2A);
684 MSG1B = vsha256su0q_u32(MSG1B, MSG2B);
685 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
686 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
687 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
688 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
689 MSG1A = vsha256su1q_u32(MSG1A, MSG3A, MSG0A);
690 MSG1B = vsha256su1q_u32(MSG1B, MSG3B, MSG0B);
693 TMP = vld1q_u32(&FINS[0]);
696 MSG2A = vld1q_u32(&FINS[4]);
698 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
699 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
700 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
701 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
702 MSG2A = vsha256su1q_u32(MSG2A, MSG0A, MSG1A);
703 MSG2B = vsha256su1q_u32(MSG2B, MSG0B, MSG1B);
706 TMP = vld1q_u32(&FINS[8]);
709 MSG3A = vsha256su0q_u32(MSG3A, MSG0A);
710 MSG3B = vsha256su0q_u32(MSG3B, MSG0B);
711 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP);
712 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP);
713 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP);
714 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP);
715 MSG3A = vsha256su1q_u32(MSG3A, MSG1A, MSG2A);
716 MSG3B = vsha256su1q_u32(MSG3B, MSG1B, MSG2B);
719 TMP = vld1q_u32(&K[16]);
720 TMP0A = vaddq_u32(MSG0A, TMP);
721 TMP0B = vaddq_u32(MSG0B, TMP);
724 MSG0A = vsha256su0q_u32(MSG0A, MSG1A);
725 MSG0B = vsha256su0q_u32(MSG0B, MSG1B);
726 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
727 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
728 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
729 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
730 MSG0A = vsha256su1q_u32(MSG0A, MSG2A, MSG3A);
731 MSG0B = vsha256su1q_u32(MSG0B, MSG2B, MSG3B);
734 TMP = vld1q_u32(&K[20]);
735 TMP0A = vaddq_u32(MSG1A, TMP);
736 TMP0B = vaddq_u32(MSG1B, TMP);
739 MSG1A = vsha256su0q_u32(MSG1A, MSG2A);
740 MSG1B = vsha256su0q_u32(MSG1B, MSG2B);
741 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
742 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
743 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
744 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
745 MSG1A = vsha256su1q_u32(MSG1A, MSG3A, MSG0A);
746 MSG1B = vsha256su1q_u32(MSG1B, MSG3B, MSG0B);
749 TMP = vld1q_u32(&K[24]);
750 TMP0A = vaddq_u32(MSG2A, TMP);
751 TMP0B = vaddq_u32(MSG2B, TMP);
754 MSG2A = vsha256su0q_u32(MSG2A, MSG3A);
755 MSG2B = vsha256su0q_u32(MSG2B, MSG3B);
756 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
757 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
758 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
759 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
760 MSG2A = vsha256su1q_u32(MSG2A, MSG0A, MSG1A);
761 MSG2B = vsha256su1q_u32(MSG2B, MSG0B, MSG1B);
764 TMP = vld1q_u32(&K[28]);
765 TMP0A = vaddq_u32(MSG3A, TMP);
766 TMP0B = vaddq_u32(MSG3B, TMP);
769 MSG3A = vsha256su0q_u32(MSG3A, MSG0A);
770 MSG3B = vsha256su0q_u32(MSG3B, MSG0B);
771 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
772 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
773 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
774 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
775 MSG3A = vsha256su1q_u32(MSG3A, MSG1A, MSG2A);
776 MSG3B = vsha256su1q_u32(MSG3B, MSG1B, MSG2B);
779 TMP = vld1q_u32(&K[32]);
780 TMP0A = vaddq_u32(MSG0A, TMP);
781 TMP0B = vaddq_u32(MSG0B, TMP);
784 MSG0A = vsha256su0q_u32(MSG0A, MSG1A);
785 MSG0B = vsha256su0q_u32(MSG0B, MSG1B);
786 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
787 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
788 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
789 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
790 MSG0A = vsha256su1q_u32(MSG0A, MSG2A, MSG3A);
791 MSG0B = vsha256su1q_u32(MSG0B, MSG2B, MSG3B);
794 TMP = vld1q_u32(&K[36]);
795 TMP0A = vaddq_u32(MSG1A, TMP);
796 TMP0B = vaddq_u32(MSG1B, TMP);
799 MSG1A = vsha256su0q_u32(MSG1A, MSG2A);
800 MSG1B = vsha256su0q_u32(MSG1B, MSG2B);
801 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
802 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
803 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
804 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
805 MSG1A = vsha256su1q_u32(MSG1A, MSG3A, MSG0A);
806 MSG1B = vsha256su1q_u32(MSG1B, MSG3B, MSG0B);
809 TMP = vld1q_u32(&K[40]);
810 TMP0A = vaddq_u32(MSG2A, TMP);
811 TMP0B = vaddq_u32(MSG2B, TMP);
814 MSG2A = vsha256su0q_u32(MSG2A, MSG3A);
815 MSG2B = vsha256su0q_u32(MSG2B, MSG3B);
816 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
817 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
818 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
819 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
820 MSG2A = vsha256su1q_u32(MSG2A, MSG0A, MSG1A);
821 MSG2B = vsha256su1q_u32(MSG2B, MSG0B, MSG1B);
824 TMP = vld1q_u32(&K[44]);
825 TMP0A = vaddq_u32(MSG3A, TMP);
826 TMP0B = vaddq_u32(MSG3B, TMP);
829 MSG3A = vsha256su0q_u32(MSG3A, MSG0A);
830 MSG3B = vsha256su0q_u32(MSG3B, MSG0B);
831 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
832 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
833 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
834 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
835 MSG3A = vsha256su1q_u32(MSG3A, MSG1A, MSG2A);
836 MSG3B = vsha256su1q_u32(MSG3B, MSG1B, MSG2B);
839 TMP = vld1q_u32(&K[48]);
840 TMP0A = vaddq_u32(MSG0A, TMP);
841 TMP0B = vaddq_u32(MSG0B, TMP);
844 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
845 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
846 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
847 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
850 TMP = vld1q_u32(&K[52]);
851 TMP0A = vaddq_u32(MSG1A, TMP);
852 TMP0B = vaddq_u32(MSG1B, TMP);
855 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
856 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
857 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
858 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
861 TMP = vld1q_u32(&K[56]);
862 TMP0A = vaddq_u32(MSG2A, TMP);
863 TMP0B = vaddq_u32(MSG2B, TMP);
866 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
867 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
868 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
869 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
872 TMP = vld1q_u32(&K[60]);
873 TMP0A = vaddq_u32(MSG3A, TMP);
874 TMP0B = vaddq_u32(MSG3B, TMP);
877 STATE0A = vsha256hq_u32(STATE0A, STATE1A, TMP0A);
878 STATE0B = vsha256hq_u32(STATE0B, STATE1B, TMP0B);
879 STATE1A = vsha256h2q_u32(STATE1A, TMP2A, TMP0A);
880 STATE1B = vsha256h2q_u32(STATE1B, TMP2B, TMP0B);
883 TMP = vld1q_u32(&INIT[0]);
884 STATE0A = vaddq_u32(STATE0A, TMP);
885 STATE0B = vaddq_u32(STATE0B, TMP);
886 TMP = vld1q_u32(&INIT[4]);
887 STATE1A = vaddq_u32(STATE1A, TMP);
888 STATE1B = vaddq_u32(STATE1B, TMP);
891 vst1q_u8(output, vrev32q_u8(vreinterpretq_u8_u32(STATE0A)));
892 vst1q_u8(output + 16, vrev32q_u8(vreinterpretq_u8_u32(STATE1A)));
893 vst1q_u8(output + 32, vrev32q_u8(vreinterpretq_u8_u32(STATE0B)));
894 vst1q_u8(output + 48, vrev32q_u8(vreinterpretq_u8_u32(STATE1B)));
void Transform(uint32_t *s, const unsigned char *chunk, size_t blocks)
void Transform_2way(unsigned char *out, const unsigned char *in)