_sha512-sse2.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /**
  2. * Author......: Jens Steube <jens.steube@gmail.com>
  3. * License.....: MIT
  4. */
  5. #define Sh_32(h,l,n) (_mm_or_si128 (_mm_srli_epi32 (h, n), _mm_slli_epi32 (l, 32 - n)))
  6. #define Sh_64(h,l,n) (_mm_or_si128 (_mm_slli_epi32 (h, 64 - n), _mm_srli_epi32 (l, n - 32)))
  7. #define Rh_32(h,l,n) (_mm_srli_epi32 (h, n))
  8. #define Rh_64(h,l,n) (0)
  9. #define Rl_32(h,l,n) (_mm_or_si128 (_mm_slli_epi32 (h, 32 - n), _mm_srli_epi32 (l, n)))
  10. #define Rl_64(h,l,n) (_mm_srli_epi32 (h, n - 32))
  11. #define Sl_32(h,l,n) (_mm_or_si128 (_mm_slli_epi32 (h, 32 - n), _mm_srli_epi32 (l, n)))
  12. #define Sl_64(h,l,n) (_mm_or_si128 (_mm_srli_epi32 (h, n - 32), _mm_slli_epi32 (l, 64 - n)))
  13. #define SHA512_F1(x,y,z) (_mm_or_si128 (_mm_and_si128 (x, y), _mm_and_si128 (z, _mm_or_si128 (x, y))))
  14. #define SHA512_F0(x,y,z) (_mm_xor_si128 (z, _mm_and_si128 (x, _mm_xor_si128 (y, z))))
  15. #define SHA512_S3h(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sh_32 (h, l, 19), Sh_64 (h, l, 61)), Rh_32 (h, l, 6)))
  16. #define SHA512_S3l(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sl_32 (h, l, 19), Sl_64 (h, l, 61)), Rl_32 (h, l, 6)))
  17. #define SHA512_S2h(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sh_32 (h, l, 1), Sh_32 (h, l, 8)), Rh_32 (h, l, 7)))
  18. #define SHA512_S2l(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sl_32 (h, l, 1), Sl_32 (h, l, 8)), Rl_32 (h, l, 7)))
  19. #define SHA512_S1h(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sh_32 (h, l, 14), Sh_32 (h, l, 18)), Sh_64 (h, l, 41)))
  20. #define SHA512_S1l(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sl_32 (h, l, 14), Sl_32 (h, l, 18)), Sl_64 (h, l, 41)))
  21. #define SHA512_S0h(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sh_32 (h, l, 28), Sh_64 (h, l, 34)), Sh_64 (h, l, 39)))
  22. #define SHA512_S0l(h,l) (_mm_xor_si128 (_mm_xor_si128 (Sl_32 (h, l, 28), Sl_64 (h, l, 34)), Sl_64 (h, l, 39)))
  23. #define ADDC(xh,xl,yh,yl) \
  24. { \
  25. __m128i t1; \
  26. __m128i t2; \
  27. t1 = _mm_and_si128 (xl, yl); \
  28. t2 = _mm_or_si128 (xl, yl); \
  29. xl = _mm_add_epi32 (xl, yl); \
  30. t2 = _mm_andnot_si128 (xl, t2); \
  31. t1 = _mm_or_si128 (t1, t2); \
  32. t1 = _mm_srli_epi32 (t1, 31); \
  33. xh = _mm_add_epi32 (xh, t1); \
  34. xh = _mm_add_epi32 (xh, yh); \
  35. }
  36. #define SHA512_EXPAND(t) \
  37. { \
  38. __m128i oh; \
  39. __m128i ol; \
  40. __m128i th; \
  41. __m128i tl; \
  42. th = SHA512_S3h (W[(t - 4) & 31], W[(t - 3) & 31]); \
  43. tl = SHA512_S3l (W[(t - 4) & 31], W[(t - 3) & 31]); \
  44. oh = th; ol = tl; \
  45. th = W[(t - 14) & 31]; \
  46. tl = W[(t - 13) & 31]; \
  47. ADDC (oh, ol, th, tl); \
  48. th = SHA512_S2h (W[(t - 30) & 31], W[(t - 29) & 31]); \
  49. tl = SHA512_S2l (W[(t - 30) & 31], W[(t - 29) & 31]); \
  50. ADDC (oh, ol, th, tl); \
  51. th = W[(t - 32) & 31]; \
  52. tl = W[(t - 31) & 31]; \
  53. ADDC (oh, ol, th, tl); \
  54. W[(t + 0) & 31] = oh; \
  55. W[(t + 1) & 31] = ol; \
  56. }
  57. #define SHA512_ROUND(t, a0, a1, b0, b1, c0, c1, d0, d1, e0, e1, f0, f1, g0, g1, h0, h1) \
  58. { \
  59. __m128i oh; \
  60. __m128i ol; \
  61. __m128i tl; \
  62. __m128i th; \
  63. th = sha512_const[t + 0]; \
  64. tl = sha512_const[t + 1]; \
  65. oh = th; ol = tl; \
  66. th = W[(t + 0) & 31]; \
  67. tl = W[(t + 1) & 31]; \
  68. ADDC (oh, ol, th, tl); \
  69. th = h0; \
  70. tl = h1; \
  71. ADDC (oh, ol, th, tl); \
  72. th = SHA512_S1h (e0, e1); \
  73. tl = SHA512_S1l (e0, e1); \
  74. ADDC (oh, ol, th, tl); \
  75. th = SHA512_F0 (e0, f0, g0); \
  76. tl = SHA512_F0 (e1, f1, g1); \
  77. ADDC (oh, ol, th, tl); \
  78. ADDC (d0, d1, oh, ol); \
  79. th = SHA512_S0h (a0, a1); \
  80. tl = SHA512_S0l (a0, a1); \
  81. ADDC (oh, ol, th, tl); \
  82. th = SHA512_F1 (a0, b0, c0); \
  83. tl = SHA512_F1 (a1, b1, c1); \
  84. ADDC (oh, ol, th, tl); \
  85. h0 = oh; \
  86. h1 = ol; \
  87. }
  88. static const uint32_t SHA512_MAGIC[16] =
  89. {
  90. 0x6a09e667,
  91. 0xf3bcc908,
  92. 0xbb67ae85,
  93. 0x84caa73b,
  94. 0x3c6ef372,
  95. 0xfe94f82b,
  96. 0xa54ff53a,
  97. 0x5f1d36f1,
  98. 0x510e527f,
  99. 0xade682d1,
  100. 0x9b05688c,
  101. 0x2b3e6c1f,
  102. 0x1f83d9ab,
  103. 0xfb41bd6b,
  104. 0x5be0cd19,
  105. 0x137e2179,
  106. };
  107. static const uint32_t SHA512_CONST[160][4] __attribute__ ((aligned (16))) =
  108. {
  109. { 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98 },
  110. { 0xd728ae22, 0xd728ae22, 0xd728ae22, 0xd728ae22 },
  111. { 0x71374491, 0x71374491, 0x71374491, 0x71374491 },
  112. { 0x23ef65cd, 0x23ef65cd, 0x23ef65cd, 0x23ef65cd },
  113. { 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf },
  114. { 0xec4d3b2f, 0xec4d3b2f, 0xec4d3b2f, 0xec4d3b2f },
  115. { 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5 },
  116. { 0x8189dbbc, 0x8189dbbc, 0x8189dbbc, 0x8189dbbc },
  117. { 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b },
  118. { 0xf348b538, 0xf348b538, 0xf348b538, 0xf348b538 },
  119. { 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1 },
  120. { 0xb605d019, 0xb605d019, 0xb605d019, 0xb605d019 },
  121. { 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4 },
  122. { 0xaf194f9b, 0xaf194f9b, 0xaf194f9b, 0xaf194f9b },
  123. { 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5 },
  124. { 0xda6d8118, 0xda6d8118, 0xda6d8118, 0xda6d8118 },
  125. { 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98 },
  126. { 0xa3030242, 0xa3030242, 0xa3030242, 0xa3030242 },
  127. { 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01 },
  128. { 0x45706fbe, 0x45706fbe, 0x45706fbe, 0x45706fbe },
  129. { 0x243185be, 0x243185be, 0x243185be, 0x243185be },
  130. { 0x4ee4b28c, 0x4ee4b28c, 0x4ee4b28c, 0x4ee4b28c },
  131. { 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3 },
  132. { 0xd5ffb4e2, 0xd5ffb4e2, 0xd5ffb4e2, 0xd5ffb4e2 },
  133. { 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74 },
  134. { 0xf27b896f, 0xf27b896f, 0xf27b896f, 0xf27b896f },
  135. { 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe },
  136. { 0x3b1696b1, 0x3b1696b1, 0x3b1696b1, 0x3b1696b1 },
  137. { 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7 },
  138. { 0x25c71235, 0x25c71235, 0x25c71235, 0x25c71235 },
  139. { 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174 },
  140. { 0xcf692694, 0xcf692694, 0xcf692694, 0xcf692694 },
  141. { 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1 },
  142. { 0x9ef14ad2, 0x9ef14ad2, 0x9ef14ad2, 0x9ef14ad2 },
  143. { 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786 },
  144. { 0x384f25e3, 0x384f25e3, 0x384f25e3, 0x384f25e3 },
  145. { 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6 },
  146. { 0x8b8cd5b5, 0x8b8cd5b5, 0x8b8cd5b5, 0x8b8cd5b5 },
  147. { 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc },
  148. { 0x77ac9c65, 0x77ac9c65, 0x77ac9c65, 0x77ac9c65 },
  149. { 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f },
  150. { 0x592b0275, 0x592b0275, 0x592b0275, 0x592b0275 },
  151. { 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa },
  152. { 0x6ea6e483, 0x6ea6e483, 0x6ea6e483, 0x6ea6e483 },
  153. { 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc },
  154. { 0xbd41fbd4, 0xbd41fbd4, 0xbd41fbd4, 0xbd41fbd4 },
  155. { 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da },
  156. { 0x831153b5, 0x831153b5, 0x831153b5, 0x831153b5 },
  157. { 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152 },
  158. { 0xee66dfab, 0xee66dfab, 0xee66dfab, 0xee66dfab },
  159. { 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d },
  160. { 0x2db43210, 0x2db43210, 0x2db43210, 0x2db43210 },
  161. { 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8 },
  162. { 0x98fb213f, 0x98fb213f, 0x98fb213f, 0x98fb213f },
  163. { 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7 },
  164. { 0xbeef0ee4, 0xbeef0ee4, 0xbeef0ee4, 0xbeef0ee4 },
  165. { 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3 },
  166. { 0x3da88fc2, 0x3da88fc2, 0x3da88fc2, 0x3da88fc2 },
  167. { 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147 },
  168. { 0x930aa725, 0x930aa725, 0x930aa725, 0x930aa725 },
  169. { 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351 },
  170. { 0xe003826f, 0xe003826f, 0xe003826f, 0xe003826f },
  171. { 0x14292967, 0x14292967, 0x14292967, 0x14292967 },
  172. { 0x0a0e6e70, 0x0a0e6e70, 0x0a0e6e70, 0x0a0e6e70 },
  173. { 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85 },
  174. { 0x46d22ffc, 0x46d22ffc, 0x46d22ffc, 0x46d22ffc },
  175. { 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138 },
  176. { 0x5c26c926, 0x5c26c926, 0x5c26c926, 0x5c26c926 },
  177. { 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc },
  178. { 0x5ac42aed, 0x5ac42aed, 0x5ac42aed, 0x5ac42aed },
  179. { 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13 },
  180. { 0x9d95b3df, 0x9d95b3df, 0x9d95b3df, 0x9d95b3df },
  181. { 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354 },
  182. { 0x8baf63de, 0x8baf63de, 0x8baf63de, 0x8baf63de },
  183. { 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb },
  184. { 0x3c77b2a8, 0x3c77b2a8, 0x3c77b2a8, 0x3c77b2a8 },
  185. { 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e },
  186. { 0x47edaee6, 0x47edaee6, 0x47edaee6, 0x47edaee6 },
  187. { 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85 },
  188. { 0x1482353b, 0x1482353b, 0x1482353b, 0x1482353b },
  189. { 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1 },
  190. { 0x4cf10364, 0x4cf10364, 0x4cf10364, 0x4cf10364 },
  191. { 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b },
  192. { 0xbc423001, 0xbc423001, 0xbc423001, 0xbc423001 },
  193. { 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70 },
  194. { 0xd0f89791, 0xd0f89791, 0xd0f89791, 0xd0f89791 },
  195. { 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3 },
  196. { 0x0654be30, 0x0654be30, 0x0654be30, 0x0654be30 },
  197. { 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819 },
  198. { 0xd6ef5218, 0xd6ef5218, 0xd6ef5218, 0xd6ef5218 },
  199. { 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624 },
  200. { 0x5565a910, 0x5565a910, 0x5565a910, 0x5565a910 },
  201. { 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585 },
  202. { 0x5771202a, 0x5771202a, 0x5771202a, 0x5771202a },
  203. { 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070 },
  204. { 0x32bbd1b8, 0x32bbd1b8, 0x32bbd1b8, 0x32bbd1b8 },
  205. { 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116 },
  206. { 0xb8d2d0c8, 0xb8d2d0c8, 0xb8d2d0c8, 0xb8d2d0c8 },
  207. { 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08 },
  208. { 0x5141ab53, 0x5141ab53, 0x5141ab53, 0x5141ab53 },
  209. { 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c },
  210. { 0xdf8eeb99, 0xdf8eeb99, 0xdf8eeb99, 0xdf8eeb99 },
  211. { 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5 },
  212. { 0xe19b48a8, 0xe19b48a8, 0xe19b48a8, 0xe19b48a8 },
  213. { 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3 },
  214. { 0xc5c95a63, 0xc5c95a63, 0xc5c95a63, 0xc5c95a63 },
  215. { 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a },
  216. { 0xe3418acb, 0xe3418acb, 0xe3418acb, 0xe3418acb },
  217. { 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f },
  218. { 0x7763e373, 0x7763e373, 0x7763e373, 0x7763e373 },
  219. { 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3 },
  220. { 0xd6b2b8a3, 0xd6b2b8a3, 0xd6b2b8a3, 0xd6b2b8a3 },
  221. { 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee },
  222. { 0x5defb2fc, 0x5defb2fc, 0x5defb2fc, 0x5defb2fc },
  223. { 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f },
  224. { 0x43172f60, 0x43172f60, 0x43172f60, 0x43172f60 },
  225. { 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814 },
  226. { 0xa1f0ab72, 0xa1f0ab72, 0xa1f0ab72, 0xa1f0ab72 },
  227. { 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208 },
  228. { 0x1a6439ec, 0x1a6439ec, 0x1a6439ec, 0x1a6439ec },
  229. { 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa },
  230. { 0x23631e28, 0x23631e28, 0x23631e28, 0x23631e28 },
  231. { 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb },
  232. { 0xde82bde9, 0xde82bde9, 0xde82bde9, 0xde82bde9 },
  233. { 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7 },
  234. { 0xb2c67915, 0xb2c67915, 0xb2c67915, 0xb2c67915 },
  235. { 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2 },
  236. { 0xe372532b, 0xe372532b, 0xe372532b, 0xe372532b },
  237. { 0xca273ece, 0xca273ece, 0xca273ece, 0xca273ece },
  238. { 0xea26619c, 0xea26619c, 0xea26619c, 0xea26619c },
  239. { 0xd186b8c7, 0xd186b8c7, 0xd186b8c7, 0xd186b8c7 },
  240. { 0x21c0c207, 0x21c0c207, 0x21c0c207, 0x21c0c207 },
  241. { 0xeada7dd6, 0xeada7dd6, 0xeada7dd6, 0xeada7dd6 },
  242. { 0xcde0eb1e, 0xcde0eb1e, 0xcde0eb1e, 0xcde0eb1e },
  243. { 0xf57d4f7f, 0xf57d4f7f, 0xf57d4f7f, 0xf57d4f7f },
  244. { 0xee6ed178, 0xee6ed178, 0xee6ed178, 0xee6ed178 },
  245. { 0x06f067aa, 0x06f067aa, 0x06f067aa, 0x06f067aa },
  246. { 0x72176fba, 0x72176fba, 0x72176fba, 0x72176fba },
  247. { 0x0a637dc5, 0x0a637dc5, 0x0a637dc5, 0x0a637dc5 },
  248. { 0xa2c898a6, 0xa2c898a6, 0xa2c898a6, 0xa2c898a6 },
  249. { 0x113f9804, 0x113f9804, 0x113f9804, 0x113f9804 },
  250. { 0xbef90dae, 0xbef90dae, 0xbef90dae, 0xbef90dae },
  251. { 0x1b710b35, 0x1b710b35, 0x1b710b35, 0x1b710b35 },
  252. { 0x131c471b, 0x131c471b, 0x131c471b, 0x131c471b },
  253. { 0x28db77f5, 0x28db77f5, 0x28db77f5, 0x28db77f5 },
  254. { 0x23047d84, 0x23047d84, 0x23047d84, 0x23047d84 },
  255. { 0x32caab7b, 0x32caab7b, 0x32caab7b, 0x32caab7b },
  256. { 0x40c72493, 0x40c72493, 0x40c72493, 0x40c72493 },
  257. { 0x3c9ebe0a, 0x3c9ebe0a, 0x3c9ebe0a, 0x3c9ebe0a },
  258. { 0x15c9bebc, 0x15c9bebc, 0x15c9bebc, 0x15c9bebc },
  259. { 0x431d67c4, 0x431d67c4, 0x431d67c4, 0x431d67c4 },
  260. { 0x9c100d4c, 0x9c100d4c, 0x9c100d4c, 0x9c100d4c },
  261. { 0x4cc5d4be, 0x4cc5d4be, 0x4cc5d4be, 0x4cc5d4be },
  262. { 0xcb3e42b6, 0xcb3e42b6, 0xcb3e42b6, 0xcb3e42b6 },
  263. { 0x597f299c, 0x597f299c, 0x597f299c, 0x597f299c },
  264. { 0xfc657e2a, 0xfc657e2a, 0xfc657e2a, 0xfc657e2a },
  265. { 0x5fcb6fab, 0x5fcb6fab, 0x5fcb6fab, 0x5fcb6fab },
  266. { 0x3ad6faec, 0x3ad6faec, 0x3ad6faec, 0x3ad6faec },
  267. { 0x6c44198c, 0x6c44198c, 0x6c44198c, 0x6c44198c },
  268. { 0x4a475817, 0x4a475817, 0x4a475817, 0x4a475817 }
  269. };
  270. __m128i sha512_const[160];
  271. void hashcat_sha512_128 (uint32_t digests[16][4], uint32_t blocks[160][4])
  272. {
  273. __m128i *sha512_digests = (__m128i *) digests;
  274. __m128i *sha512_blocks = (__m128i *) blocks;
  275. #define W sha512_blocks
  276. __m128i Ah = sha512_digests[ 0];
  277. __m128i Al = sha512_digests[ 1];
  278. __m128i Bh = sha512_digests[ 2];
  279. __m128i Bl = sha512_digests[ 3];
  280. __m128i Ch = sha512_digests[ 4];
  281. __m128i Cl = sha512_digests[ 5];
  282. __m128i Dh = sha512_digests[ 6];
  283. __m128i Dl = sha512_digests[ 7];
  284. __m128i Eh = sha512_digests[ 8];
  285. __m128i El = sha512_digests[ 9];
  286. __m128i Fh = sha512_digests[10];
  287. __m128i Fl = sha512_digests[11];
  288. __m128i Gh = sha512_digests[12];
  289. __m128i Gl = sha512_digests[13];
  290. __m128i Hh = sha512_digests[14];
  291. __m128i Hl = sha512_digests[15];
  292. int t;
  293. int tN;
  294. for (t = 0; t < 16; t += 8)
  295. {
  296. tN = ((t + 0) * 2); SHA512_ROUND (tN, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl);
  297. tN = ((t + 1) * 2); SHA512_ROUND (tN, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl);
  298. tN = ((t + 2) * 2); SHA512_ROUND (tN, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl);
  299. tN = ((t + 3) * 2); SHA512_ROUND (tN, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El);
  300. tN = ((t + 4) * 2); SHA512_ROUND (tN, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl);
  301. tN = ((t + 5) * 2); SHA512_ROUND (tN, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl);
  302. tN = ((t + 6) * 2); SHA512_ROUND (tN, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl);
  303. tN = ((t + 7) * 2); SHA512_ROUND (tN, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al);
  304. }
  305. for (t = 16; t < 80; t += 8)
  306. {
  307. tN = ((t + 0) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl);
  308. tN = ((t + 1) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl);
  309. tN = ((t + 2) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl);
  310. tN = ((t + 3) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El);
  311. tN = ((t + 4) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl, Dh, Dl);
  312. tN = ((t + 5) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl, Ch, Cl);
  313. tN = ((t + 6) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al, Bh, Bl);
  314. tN = ((t + 7) * 2); SHA512_EXPAND (tN); SHA512_ROUND (tN, Bh, Bl, Ch, Cl, Dh, Dl, Eh, El, Fh, Fl, Gh, Gl, Hh, Hl, Ah, Al);
  315. }
  316. ADDC (sha512_digests[ 0], sha512_digests[ 1], Ah, Al);
  317. ADDC (sha512_digests[ 2], sha512_digests[ 3], Bh, Bl);
  318. ADDC (sha512_digests[ 4], sha512_digests[ 5], Ch, Cl);
  319. ADDC (sha512_digests[ 6], sha512_digests[ 7], Dh, Dl);
  320. ADDC (sha512_digests[ 8], sha512_digests[ 9], Eh, El);
  321. ADDC (sha512_digests[10], sha512_digests[11], Fh, Fl);
  322. ADDC (sha512_digests[12], sha512_digests[13], Gh, Gl);
  323. ADDC (sha512_digests[14], sha512_digests[15], Hh, Hl);
  324. }