8#if defined(__x86_64__) || defined(__amd64__)
24#if defined(__SANITIZE_ADDRESS__)
25 __attribute__((no_sanitize(
"address")))
26#elif defined(__clang__)
27#if __has_feature(address_sanitizer)
28 __attribute__((no_sanitize(
"address")))
31void Transform(uint32_t*
s,
const unsigned char* chunk,
size_t blocks)
33 static const uint32_t K256
alignas(16) [] = {
34 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
35 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
36 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
37 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
38 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
39 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
40 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
41 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
42 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
43 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
44 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
45 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
46 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
47 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
48 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
49 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
51 static const uint32_t FLIP_MASK
alignas(16) [] = {0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f};
52 static const uint32_t SHUF_00BA
alignas(16) [] = {0x03020100, 0x0b0a0908, 0xffffffff, 0xffffffff};
53 static const uint32_t SHUF_DC00
alignas(16) [] = {0xffffffff, 0xffffffff, 0x03020100, 0x0b0a0908};
54 uint32_t a, b, c, d, f,
g, h, y0, y1, y2;
56 uint64_t inp_end, inp;
57 uint32_t xfer
alignas(16) [4];
79 "pshufb %%xmm12,%%xmm4;"
80 "movdqu 0x10(%1),%%xmm5;"
81 "pshufb %%xmm12,%%xmm5;"
82 "movdqu 0x20(%1),%%xmm6;"
83 "pshufb %%xmm12,%%xmm6;"
84 "movdqu 0x30(%1),%%xmm7;"
85 "pshufb %%xmm12,%%xmm7;"
90 "movdqa 0x0(%13),%%xmm9;"
91 "paddd %%xmm4,%%xmm9;"
93 "movdqa %%xmm7,%%xmm0;"
97 "palignr $0x4,%%xmm6,%%xmm0;"
102 "movdqa %%xmm5,%%xmm1;"
105 "paddd %%xmm4,%%xmm0;"
109 "palignr $0x4,%%xmm4,%%xmm1;"
113 "movdqa %%xmm1,%%xmm2;"
117 "movdqa %%xmm1,%%xmm3;"
121 "pslld $0x19,%%xmm1;"
131 "movdqa %%xmm3,%%xmm2;"
134 "movdqa %%xmm3,%%xmm8;"
143 "psrld $0x12,%%xmm2;"
148 "pxor %%xmm3,%%xmm1;"
155 "pxor %%xmm2,%%xmm1;"
159 "pxor %%xmm8,%%xmm1;"
163 "pshufd $0xfa,%%xmm7,%%xmm2;"
166 "paddd %%xmm1,%%xmm0;"
169 "movdqa %%xmm2,%%xmm3;"
173 "movdqa %%xmm2,%%xmm8;"
179 "psrlq $0x11,%%xmm2;"
181 "psrlq $0x13,%%xmm3;"
189 "pxor %%xmm3,%%xmm2;"
193 "pxor %%xmm2,%%xmm8;"
197 "pshufb %%xmm10,%%xmm8;"
201 "paddd %%xmm8,%%xmm0;"
204 "pshufd $0x50,%%xmm0,%%xmm2;"
207 "movdqa %%xmm2,%%xmm3;"
211 "movdqa %%xmm2,%%xmm4;"
216 "psrlq $0x11,%%xmm2;"
219 "psrlq $0x13,%%xmm3;"
227 "pxor %%xmm3,%%xmm2;"
231 "pxor %%xmm2,%%xmm4;"
235 "pshufb %%xmm11,%%xmm4;"
239 "paddd %%xmm0,%%xmm4;"
244 "movdqa 0x10(%13),%%xmm9;"
245 "paddd %%xmm5,%%xmm9;"
247 "movdqa %%xmm4,%%xmm0;"
251 "palignr $0x4,%%xmm7,%%xmm0;"
256 "movdqa %%xmm6,%%xmm1;"
259 "paddd %%xmm5,%%xmm0;"
263 "palignr $0x4,%%xmm5,%%xmm1;"
267 "movdqa %%xmm1,%%xmm2;"
271 "movdqa %%xmm1,%%xmm3;"
275 "pslld $0x19,%%xmm1;"
285 "movdqa %%xmm3,%%xmm2;"
288 "movdqa %%xmm3,%%xmm8;"
297 "psrld $0x12,%%xmm2;"
302 "pxor %%xmm3,%%xmm1;"
309 "pxor %%xmm2,%%xmm1;"
313 "pxor %%xmm8,%%xmm1;"
317 "pshufd $0xfa,%%xmm4,%%xmm2;"
320 "paddd %%xmm1,%%xmm0;"
323 "movdqa %%xmm2,%%xmm3;"
327 "movdqa %%xmm2,%%xmm8;"
333 "psrlq $0x11,%%xmm2;"
335 "psrlq $0x13,%%xmm3;"
343 "pxor %%xmm3,%%xmm2;"
347 "pxor %%xmm2,%%xmm8;"
351 "pshufb %%xmm10,%%xmm8;"
355 "paddd %%xmm8,%%xmm0;"
358 "pshufd $0x50,%%xmm0,%%xmm2;"
361 "movdqa %%xmm2,%%xmm3;"
365 "movdqa %%xmm2,%%xmm5;"
370 "psrlq $0x11,%%xmm2;"
373 "psrlq $0x13,%%xmm3;"
381 "pxor %%xmm3,%%xmm2;"
385 "pxor %%xmm2,%%xmm5;"
389 "pshufb %%xmm11,%%xmm5;"
393 "paddd %%xmm0,%%xmm5;"
398 "movdqa 0x20(%13),%%xmm9;"
399 "paddd %%xmm6,%%xmm9;"
401 "movdqa %%xmm5,%%xmm0;"
405 "palignr $0x4,%%xmm4,%%xmm0;"
410 "movdqa %%xmm7,%%xmm1;"
413 "paddd %%xmm6,%%xmm0;"
417 "palignr $0x4,%%xmm6,%%xmm1;"
421 "movdqa %%xmm1,%%xmm2;"
425 "movdqa %%xmm1,%%xmm3;"
429 "pslld $0x19,%%xmm1;"
439 "movdqa %%xmm3,%%xmm2;"
442 "movdqa %%xmm3,%%xmm8;"
451 "psrld $0x12,%%xmm2;"
456 "pxor %%xmm3,%%xmm1;"
463 "pxor %%xmm2,%%xmm1;"
467 "pxor %%xmm8,%%xmm1;"
471 "pshufd $0xfa,%%xmm5,%%xmm2;"
474 "paddd %%xmm1,%%xmm0;"
477 "movdqa %%xmm2,%%xmm3;"
481 "movdqa %%xmm2,%%xmm8;"
487 "psrlq $0x11,%%xmm2;"
489 "psrlq $0x13,%%xmm3;"
497 "pxor %%xmm3,%%xmm2;"
501 "pxor %%xmm2,%%xmm8;"
505 "pshufb %%xmm10,%%xmm8;"
509 "paddd %%xmm8,%%xmm0;"
512 "pshufd $0x50,%%xmm0,%%xmm2;"
515 "movdqa %%xmm2,%%xmm3;"
519 "movdqa %%xmm2,%%xmm6;"
524 "psrlq $0x11,%%xmm2;"
527 "psrlq $0x13,%%xmm3;"
535 "pxor %%xmm3,%%xmm2;"
539 "pxor %%xmm2,%%xmm6;"
543 "pshufb %%xmm11,%%xmm6;"
547 "paddd %%xmm0,%%xmm6;"
552 "movdqa 0x30(%13),%%xmm9;"
553 "paddd %%xmm7,%%xmm9;"
556 "movdqa %%xmm6,%%xmm0;"
560 "palignr $0x4,%%xmm5,%%xmm0;"
565 "movdqa %%xmm4,%%xmm1;"
568 "paddd %%xmm7,%%xmm0;"
572 "palignr $0x4,%%xmm7,%%xmm1;"
576 "movdqa %%xmm1,%%xmm2;"
580 "movdqa %%xmm1,%%xmm3;"
584 "pslld $0x19,%%xmm1;"
594 "movdqa %%xmm3,%%xmm2;"
597 "movdqa %%xmm3,%%xmm8;"
606 "psrld $0x12,%%xmm2;"
611 "pxor %%xmm3,%%xmm1;"
618 "pxor %%xmm2,%%xmm1;"
622 "pxor %%xmm8,%%xmm1;"
626 "pshufd $0xfa,%%xmm6,%%xmm2;"
629 "paddd %%xmm1,%%xmm0;"
632 "movdqa %%xmm2,%%xmm3;"
636 "movdqa %%xmm2,%%xmm8;"
642 "psrlq $0x11,%%xmm2;"
644 "psrlq $0x13,%%xmm3;"
652 "pxor %%xmm3,%%xmm2;"
656 "pxor %%xmm2,%%xmm8;"
660 "pshufb %%xmm10,%%xmm8;"
664 "paddd %%xmm8,%%xmm0;"
667 "pshufd $0x50,%%xmm0,%%xmm2;"
670 "movdqa %%xmm2,%%xmm3;"
674 "movdqa %%xmm2,%%xmm7;"
679 "psrlq $0x11,%%xmm2;"
682 "psrlq $0x13,%%xmm3;"
690 "pxor %%xmm3,%%xmm2;"
694 "pxor %%xmm2,%%xmm7;"
698 "pshufb %%xmm11,%%xmm7;"
702 "paddd %%xmm0,%%xmm7;"
712 "paddd 0x0(%13),%%xmm4;"
826 "paddd 0x10(%13),%%xmm5;"
941 "movdqa %%xmm6,%%xmm4;"
942 "movdqa %%xmm7,%%xmm5;"
968 :
"+r"(
s),
"+r"(chunk),
"+r"(blocks),
"=r"(a),
"=r"(b),
"=r"(c),
"=r"(d),
"=r"(f),
"=r"(
g),
"=r"(h),
"=r"(y0),
"=r"(y1),
"=r"(y2),
"=r"(tbl),
"+m"(inp_end),
"+m"(inp),
"+m"(xfer)
969 :
"m"(K256),
"m"(FLIP_MASK),
"m"(SHUF_00BA),
"m"(SHUF_DC00)
970 :
"cc",
"memory",
"xmm0",
"xmm1",
"xmm2",
"xmm3",
"xmm4",
"xmm5",
"xmm6",
"xmm7",
"xmm8",
"xmm9",
"xmm10",
"xmm11",
"xmm12"
void Transform(uint32_t *s, const unsigned char *chunk, size_t blocks)