From 3485f5635ff285c79fee2ed027273f426649dda7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Fri, 17 Apr 2026 22:30:00 +0200 Subject: [PATCH 01/10] SHA-512 accelerated by x86 --- libtomcrypt_VS2008.vcproj | 32 ++ makefile.mingw | 32 +- makefile.msvc | 32 +- makefile.unix | 32 +- makefile_include.mk | 32 +- sources.cmake | 8 + src/hashes/sha2/sha224_desc.c | 3 + src/hashes/sha2/sha256_desc.c | 3 + src/hashes/sha2/sha256_x86.c | 12 +- src/hashes/sha2/sha384.c | 57 +-- src/hashes/sha2/sha384_desc.c | 191 +++++++++ src/hashes/sha2/sha384_x86.c | 87 ++++ src/hashes/sha2/sha512.c | 64 +-- src/hashes/sha2/sha512_224.c | 57 +-- src/hashes/sha2/sha512_224_desc.c | 187 +++++++++ src/hashes/sha2/sha512_224_x86.c | 87 ++++ src/hashes/sha2/sha512_256.c | 57 +-- src/hashes/sha2/sha512_256_desc.c | 187 +++++++++ src/hashes/sha2/sha512_256_x86.c | 88 ++++ src/hashes/sha2/sha512_desc.c | 212 ++++++++++ src/hashes/sha2/sha512_x86.c | 467 +++++++++++++++++++++ src/headers/tomcrypt_cfg.h | 14 + src/headers/tomcrypt_hash.h | 63 ++- src/headers/tomcrypt_private.h | 12 + src/misc/crypt/crypt.c | 12 + src/misc/crypt/crypt_register_all_hashes.c | 12 + tests/test.c | 19 + 27 files changed, 1802 insertions(+), 257 deletions(-) create mode 100644 src/hashes/sha2/sha384_desc.c create mode 100644 src/hashes/sha2/sha384_x86.c create mode 100644 src/hashes/sha2/sha512_224_desc.c create mode 100644 src/hashes/sha2/sha512_224_x86.c create mode 100644 src/hashes/sha2/sha512_256_desc.c create mode 100644 src/hashes/sha2/sha512_256_x86.c create mode 100644 src/hashes/sha2/sha512_desc.c create mode 100644 src/hashes/sha2/sha512_x86.c diff --git a/libtomcrypt_VS2008.vcproj b/libtomcrypt_VS2008.vcproj index 987f3a33e..d174a1a88 100644 --- a/libtomcrypt_VS2008.vcproj +++ b/libtomcrypt_VS2008.vcproj @@ -966,6 +966,14 @@ RelativePath="src\hashes\sha2\sha384.c" > + + + + @@ -974,10 +982,34 @@ RelativePath="src\hashes\sha2\sha512_224.c" > + + + + + + + + + + + + +#endif static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) { #if defined _MSC_VER diff --git a/src/hashes/sha2/sha256_desc.c b/src/hashes/sha2/sha256_desc.c index 993b0a168..29410bb39 100644 --- a/src/hashes/sha2/sha256_desc.c +++ b/src/hashes/sha2/sha256_desc.c @@ -6,6 +6,9 @@ #if !defined (LTC_S_X86_CPUID) #define LTC_S_X86_CPUID +#if defined _MSC_VER +#include +#endif static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) { #if defined _MSC_VER diff --git a/src/hashes/sha2/sha256_x86.c b/src/hashes/sha2/sha256_x86.c index 43a23c913..e2da51a59 100644 --- a/src/hashes/sha2/sha256_x86.c +++ b/src/hashes/sha2/sha256_x86.c @@ -18,7 +18,7 @@ #elif defined(_MSC_VER) #include #endif -#include /* SSE2 _mm_load_si128 _mm_loadu_si128 _mm_store_si128 _mm_set_epi64x _mm_add_epi32 _mm_shuffle_epi32 */ +#include /* SSE2 _mm_load_si128 _mm_store_si128 _mm_set_epi64x _mm_add_epi32 _mm_shuffle_epi32 */ #include /* SSSE3 _mm_alignr_epi8 _mm_shuffle_epi8 */ #include /* SSE4.1 _mm_blend_epi16 */ #include /* SHA _mm_sha256msg1_epu32 _mm_sha256msg2_epu32 _mm_sha256rnds2_epu32 */ @@ -89,6 +89,8 @@ static int LTC_SHA_TARGET s_sha256_x86_compress(hash_state * md, const unsigned __m128i msg_2; __m128i msg_3; + LTC_ARGCHK(((ltc_uintptr)(buf)) % 16 == 0); + reverse = _mm_set_epi64x(0x0c0d0e0f08090a0bull, 0x0405060700010203ull); state_0 = _mm_load_si128(((__m128i const*)(&md->sha256.state[0]))); state_1 = _mm_load_si128(((__m128i const*)(&md->sha256.state[4]))); @@ -99,28 +101,28 @@ static int LTC_SHA_TARGET s_sha256_x86_compress(hash_state * md, const unsigned old_0 = state_0; old_1 = state_1; - msg_0 = _mm_loadu_si128(((__m128i const*)(&buf[0 * 16]))); + msg_0 = _mm_load_si128(((__m128i const*)(&buf[0 * 16]))); msg_0 = _mm_shuffle_epi8(msg_0, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[0 * 4]))); msg = _mm_add_epi32(msg_0, tmp); state_1 = _mm_sha256rnds2_epu32(state_1, state_0, msg); msg = _mm_shuffle_epi32(msg, k_shuffle_epi32(k_any, k_any, 0x3, 0x2)); state_0 = _mm_sha256rnds2_epu32(state_0, state_1, msg); - msg_1 = _mm_loadu_si128(((__m128i const*)(&buf[1 * 16]))); + msg_1 = _mm_load_si128(((__m128i const*)(&buf[1 * 16]))); msg_1 = _mm_shuffle_epi8(msg_1, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[1 * 4]))); msg = _mm_add_epi32(msg_1, tmp); state_1 = _mm_sha256rnds2_epu32(state_1, state_0, msg); msg = _mm_shuffle_epi32(msg, k_shuffle_epi32(k_any, k_any, 0x3, 0x2)); state_0 = _mm_sha256rnds2_epu32(state_0, state_1, msg); - msg_2 = _mm_loadu_si128(((__m128i const*)(&buf[2 * 16]))); + msg_2 = _mm_load_si128(((__m128i const*)(&buf[2 * 16]))); msg_2 = _mm_shuffle_epi8(msg_2, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[2 * 4]))); msg = _mm_add_epi32(msg_2, tmp); state_1 = _mm_sha256rnds2_epu32(state_1, state_0, msg); msg = _mm_shuffle_epi32(msg, k_shuffle_epi32(k_any, k_any, 0x3, 0x2)); state_0 = _mm_sha256rnds2_epu32(state_0, state_1, msg); - msg_3 = _mm_loadu_si128(((__m128i const*)(&buf[3 * 16]))); + msg_3 = _mm_load_si128(((__m128i const*)(&buf[3 * 16]))); msg_3 = _mm_shuffle_epi8(msg_3, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[3 * 4]))); msg = _mm_add_epi32(msg_3, tmp); diff --git a/src/hashes/sha2/sha384.c b/src/hashes/sha2/sha384.c index 86841cef2..e9c22a22e 100644 --- a/src/hashes/sha2/sha384.c +++ b/src/hashes/sha2/sha384.c @@ -9,7 +9,7 @@ #if defined(LTC_SHA384) && defined(LTC_SHA512) -const struct ltc_hash_descriptor sha384_desc = +const struct ltc_hash_descriptor sha384_portable_desc = { "sha384", 4, @@ -20,9 +20,9 @@ const struct ltc_hash_descriptor sha384_desc = { 2, 16, 840, 1, 101, 3, 4, 2, 2, }, 9, - &sha384_init, - &sha512_process, - &sha384_done, + &sha384_c_init, + &sha512_c_process, + &sha384_c_done, &sha384_test, NULL }; @@ -32,10 +32,11 @@ const struct ltc_hash_descriptor sha384_desc = @param md The hash state you wish to initialize @return CRYPT_OK if successful */ -int sha384_init(hash_state * md) +int sha384_c_init(hash_state * md) { LTC_ARGCHK(md != NULL); + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); md->sha512.curlen = 0; md->sha512.length = 0; md->sha512.state[0] = CONST64(0xcbbb9d5dc1059ed8); @@ -55,7 +56,7 @@ int sha384_init(hash_state * md) @param out [out] The destination of the hash (48 bytes) @return CRYPT_OK if successful */ -int sha384_done(hash_state * md, unsigned char *out) +int sha384_c_done(hash_state * md, unsigned char *out) { unsigned char buf[64]; @@ -66,7 +67,7 @@ int sha384_done(hash_state * md, unsigned char *out) return CRYPT_INVALID_ARG; } - sha512_done(md, buf); + sha512_c_done(md, buf); XMEMCPY(out, buf, 48); #ifdef LTC_CLEAN_STACK zeromem(buf, sizeof(buf)); @@ -78,47 +79,9 @@ int sha384_done(hash_state * md, unsigned char *out) Self-test the hash @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled */ -int sha384_test(void) +int sha384_c_test(void) { - #ifndef LTC_TEST - return CRYPT_NOP; - #else - static const struct { - const char *msg; - unsigned char hash[48]; - } tests[] = { - { "abc", - { 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, - 0xb5, 0xa0, 0x3d, 0x69, 0x9a, 0xc6, 0x50, 0x07, - 0x27, 0x2c, 0x32, 0xab, 0x0e, 0xde, 0xd1, 0x63, - 0x1a, 0x8b, 0x60, 0x5a, 0x43, 0xff, 0x5b, 0xed, - 0x80, 0x86, 0x07, 0x2b, 0xa1, 0xe7, 0xcc, 0x23, - 0x58, 0xba, 0xec, 0xa1, 0x34, 0xc8, 0x25, 0xa7 } - }, - { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", - { 0x09, 0x33, 0x0c, 0x33, 0xf7, 0x11, 0x47, 0xe8, - 0x3d, 0x19, 0x2f, 0xc7, 0x82, 0xcd, 0x1b, 0x47, - 0x53, 0x11, 0x1b, 0x17, 0x3b, 0x3b, 0x05, 0xd2, - 0x2f, 0xa0, 0x80, 0x86, 0xe3, 0xb0, 0xf7, 0x12, - 0xfc, 0xc7, 0xc7, 0x1a, 0x55, 0x7e, 0x2d, 0xb9, - 0x66, 0xc3, 0xe9, 0xfa, 0x91, 0x74, 0x60, 0x39 } - }, - }; - - int i; - unsigned char tmp[48]; - hash_state md; - - for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { - sha384_init(&md); - sha384_process(&md, (unsigned char*)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); - sha384_done(&md, tmp); - if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), "SHA384", i)) { - return CRYPT_FAIL_TESTVECTOR; - } - } - return CRYPT_OK; - #endif + return sha384_test_desc(&sha384_portable_desc, "SHA384 portable"); } #endif /* defined(LTC_SHA384) && defined(LTC_SHA512) */ diff --git a/src/hashes/sha2/sha384_desc.c b/src/hashes/sha2/sha384_desc.c new file mode 100644 index 000000000..207b5cfbe --- /dev/null +++ b/src/hashes/sha2/sha384_desc.c @@ -0,0 +1,191 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +/** + @param sha384.c + LTC_SHA384 hash included in sha512.c, Tom St Denis +*/ + +#include "tomcrypt_private.h" + +#if defined(LTC_SHA384) && defined(LTC_SHA512) + +const struct ltc_hash_descriptor sha384_desc = +{ + "sha384", + 4, + 48, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 2, }, + 9, + + &sha384_init, + &sha512_process, + &sha384_done, + &sha384_test, + NULL +}; + +#if defined LTC_SHA384_X86 + +#if !defined (LTC_S_X86_CPUID) +#define LTC_S_X86_CPUID +#if defined _MSC_VER +#include +#endif +static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) +{ +#if defined _MSC_VER + __cpuid(regs, leaf); +#else + int a, b, c, d; + + a = leaf; + b = c = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"a"(a), "c"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) +{ +#if defined _MSC_VER + __cpuidex(regs, eax, ecx); +#else + int a, b, c, d; + + a = eax; + c = ecx; + b = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"0"(a), "2"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +#endif /* LTC_S_X86_CPUID */ + +static LTC_INLINE int s_sha384_x86_is_supported(void) +{ + static int initialized = 0; + static int is_supported = 0; + + if (initialized == 0) { + int regs[4]; + int sse2, avx, avx2, sha512; + /* Leaf 0, Reg 0 contains the number of leafs available */ + s_x86_cpuid(regs, 0); + if(regs[0] >= 7) { + s_x86_cpuid(regs, 1); + sse2 = ((((unsigned int)(regs[3])) >> 26) & 1u) != 0; /* SSE2, leaf 1, edx, bit 26 */ + avx = ((((unsigned int)(regs[2])) >> 28) & 1u) != 0; /* AVX, leaf 1, ecx, bit 28 */ + s_x86_cpuid(regs, 7); + avx2 = ((((unsigned int)(regs[1])) >> 5) & 1u) != 0; /* AVX2, leaf 7, ebx, bit 5 */ + /* Leaf 7, Reg 0 contains the number of sub leafs available */ + if(regs[0] >= 1) + { + s_x86_cpuidex(regs, 7, 1); + sha512 = ((((unsigned int)(regs[0])) >> 0) & 1u) != 0; /* SHA-512, leaf 7, sub leaf 1, eax, bit 0 */ + is_supported = sse2 && avx && avx2 && sha512; + } + } + initialized = 1; + } + return is_supported; +} +#endif /* LTC_SHA384_X86 */ + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha384_init(hash_state * md) +{ +#if defined LTC_SHA384_X86 + if(s_sha384_x86_is_supported()) { + return sha384_x86_init(md); + } +#endif + return sha384_c_init(md); +} + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (48 bytes) + @return CRYPT_OK if successful +*/ +int sha384_done(hash_state * md, unsigned char *out) +{ +#if defined LTC_SHA384_X86 + if(s_sha384_x86_is_supported()) { + return sha384_x86_done(md, out); + } +#endif + return sha384_c_done(md, out); +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha384_test(void) +{ + return sha384_test_desc(&sha384_desc, "SHA384"); +} + +int sha384_test_desc(const struct ltc_hash_descriptor *desc, const char *name) +{ + #ifndef LTC_TEST + return CRYPT_NOP; + #else + static const struct { + const char *msg; + unsigned char hash[48]; + } tests[] = { + { "abc", + { 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, + 0xb5, 0xa0, 0x3d, 0x69, 0x9a, 0xc6, 0x50, 0x07, + 0x27, 0x2c, 0x32, 0xab, 0x0e, 0xde, 0xd1, 0x63, + 0x1a, 0x8b, 0x60, 0x5a, 0x43, 0xff, 0x5b, 0xed, + 0x80, 0x86, 0x07, 0x2b, 0xa1, 0xe7, 0xcc, 0x23, + 0x58, 0xba, 0xec, 0xa1, 0x34, 0xc8, 0x25, 0xa7 } + }, + { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + { 0x09, 0x33, 0x0c, 0x33, 0xf7, 0x11, 0x47, 0xe8, + 0x3d, 0x19, 0x2f, 0xc7, 0x82, 0xcd, 0x1b, 0x47, + 0x53, 0x11, 0x1b, 0x17, 0x3b, 0x3b, 0x05, 0xd2, + 0x2f, 0xa0, 0x80, 0x86, 0xe3, 0xb0, 0xf7, 0x12, + 0xfc, 0xc7, 0xc7, 0x1a, 0x55, 0x7e, 0x2d, 0xb9, + 0x66, 0xc3, 0xe9, 0xfa, 0x91, 0x74, 0x60, 0x39 } + }, + }; + + int i; + unsigned char tmp[48]; + hash_state md; + + for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { + desc->init(&md); + desc->process(&md, (unsigned char*)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); + desc->done(&md, tmp); + if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), name, i)) { + return CRYPT_FAIL_TESTVECTOR; + } + } + return CRYPT_OK; + #endif +} + +#endif /* defined(LTC_SHA384) && defined(LTC_SHA512) */ diff --git a/src/hashes/sha2/sha384_x86.c b/src/hashes/sha2/sha384_x86.c new file mode 100644 index 000000000..a1a509e8a --- /dev/null +++ b/src/hashes/sha2/sha384_x86.c @@ -0,0 +1,87 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +/** + @param sha384_x86.c + LTC_SHA384 hash included in sha512_x86.c, Marek Knapek +*/ + +#include "tomcrypt_private.h" + +#if defined(LTC_SHA384) && defined(LTC_SHA512) && defined(LTC_SHA384_X86) + +const struct ltc_hash_descriptor sha384_x86_desc = +{ + "sha384", + 4, + 48, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 2, }, + 9, + + &sha384_x86_init, + &sha512_x86_process, + &sha384_x86_done, + &sha384_x86_test, + NULL +}; + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha384_x86_init(hash_state * md) +{ + LTC_ARGCHK(md != NULL); + + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); + md->sha512.curlen = 0; + md->sha512.length = 0; + md->sha512.state[0] = CONST64(0xcbbb9d5dc1059ed8); + md->sha512.state[1] = CONST64(0x629a292a367cd507); + md->sha512.state[2] = CONST64(0x9159015a3070dd17); + md->sha512.state[3] = CONST64(0x152fecd8f70e5939); + md->sha512.state[4] = CONST64(0x67332667ffc00b31); + md->sha512.state[5] = CONST64(0x8eb44a8768581511); + md->sha512.state[6] = CONST64(0xdb0c2e0d64f98fa7); + md->sha512.state[7] = CONST64(0x47b5481dbefa4fa4); + return CRYPT_OK; +} + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (48 bytes) + @return CRYPT_OK if successful +*/ +int sha384_x86_done(hash_state * md, unsigned char *out) +{ + unsigned char buf[64]; + + LTC_ARGCHK(md != NULL); + LTC_ARGCHK(out != NULL); + + if (md->sha512.curlen >= sizeof(md->sha512.buf)) { + return CRYPT_INVALID_ARG; + } + + sha512_x86_done(md, buf); + XMEMCPY(out, buf, 48); +#ifdef LTC_CLEAN_STACK + zeromem(buf, sizeof(buf)); +#endif + return CRYPT_OK; +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha384_x86_test(void) +{ + return sha384_test_desc(&sha384_x86_desc, "SHA384 x86"); +} + +#endif /* defined(LTC_SHA384) && defined(LTC_SHA512) && defined(LTC_SHA384_X86) */ diff --git a/src/hashes/sha2/sha512.c b/src/hashes/sha2/sha512.c index 5086a823e..7fac48a15 100644 --- a/src/hashes/sha2/sha512.c +++ b/src/hashes/sha2/sha512.c @@ -9,7 +9,7 @@ #ifdef LTC_SHA512 -const struct ltc_hash_descriptor sha512_desc = +const struct ltc_hash_descriptor sha512_portable_desc = { "sha512", 5, @@ -20,10 +20,10 @@ const struct ltc_hash_descriptor sha512_desc = { 2, 16, 840, 1, 101, 3, 4, 2, 3, }, 9, - &sha512_init, - &sha512_process, - &sha512_done, - &sha512_test, + &sha512_c_init, + &sha512_c_process, + &sha512_c_done, + &sha512_c_test, NULL }; @@ -205,9 +205,11 @@ static int s_sha512_compress(hash_state * md, const unsigned char *buf) @param md The hash state you wish to initialize @return CRYPT_OK if successful */ -int sha512_init(hash_state * md) +int sha512_c_init(hash_state * md) { LTC_ARGCHK(md != NULL); + + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); md->sha512.curlen = 0; md->sha512.length = 0; md->sha512.state[0] = CONST64(0x6a09e667f3bcc908); @@ -228,7 +230,7 @@ int sha512_init(hash_state * md) @param inlen The length of the data (octets) @return CRYPT_OK if successful */ -HASH_PROCESS(sha512_process, s_sha512_compress, sha512, 128) +HASH_PROCESS(sha512_c_process, s_sha512_compress, sha512, 128) /** Terminate the hash to get the digest @@ -236,7 +238,7 @@ HASH_PROCESS(sha512_process, s_sha512_compress, sha512, 128) @param out [out] The destination of the hash (64 bytes) @return CRYPT_OK if successful */ -int sha512_done(hash_state * md, unsigned char *out) +int sha512_c_done(hash_state * md, unsigned char *out) { int i; @@ -291,51 +293,9 @@ int sha512_done(hash_state * md, unsigned char *out) Self-test the hash @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled */ -int sha512_test(void) +int sha512_c_test(void) { - #ifndef LTC_TEST - return CRYPT_NOP; - #else - static const struct { - const char *msg; - unsigned char hash[64]; - } tests[] = { - { "abc", - { 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, - 0xcc, 0x41, 0x73, 0x49, 0xae, 0x20, 0x41, 0x31, - 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, - 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, - 0x21, 0x92, 0x99, 0x2a, 0x27, 0x4f, 0xc1, 0xa8, - 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, - 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, - 0x2a, 0x9a, 0xc9, 0x4f, 0xa5, 0x4c, 0xa4, 0x9f } - }, - { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", - { 0x8e, 0x95, 0x9b, 0x75, 0xda, 0xe3, 0x13, 0xda, - 0x8c, 0xf4, 0xf7, 0x28, 0x14, 0xfc, 0x14, 0x3f, - 0x8f, 0x77, 0x79, 0xc6, 0xeb, 0x9f, 0x7f, 0xa1, - 0x72, 0x99, 0xae, 0xad, 0xb6, 0x88, 0x90, 0x18, - 0x50, 0x1d, 0x28, 0x9e, 0x49, 0x00, 0xf7, 0xe4, - 0x33, 0x1b, 0x99, 0xde, 0xc4, 0xb5, 0x43, 0x3a, - 0xc7, 0xd3, 0x29, 0xee, 0xb6, 0xdd, 0x26, 0x54, - 0x5e, 0x96, 0xe5, 0x5b, 0x87, 0x4b, 0xe9, 0x09 } - }, - }; - - int i; - unsigned char tmp[64]; - hash_state md; - - for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { - sha512_init(&md); - sha512_process(&md, (unsigned char *)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); - sha512_done(&md, tmp); - if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), "SHA512", i)) { - return CRYPT_FAIL_TESTVECTOR; - } - } - return CRYPT_OK; - #endif + return sha512_test_desc(&sha512_portable_desc, "SHA512 portable"); } #undef Ch diff --git a/src/hashes/sha2/sha512_224.c b/src/hashes/sha2/sha512_224.c index 7acc724ac..5f55c2539 100644 --- a/src/hashes/sha2/sha512_224.c +++ b/src/hashes/sha2/sha512_224.c @@ -9,7 +9,7 @@ #if defined(LTC_SHA512_224) && defined(LTC_SHA512) -const struct ltc_hash_descriptor sha512_224_desc = +const struct ltc_hash_descriptor sha512_224_portable_desc = { "sha512-224", 15, @@ -20,10 +20,10 @@ const struct ltc_hash_descriptor sha512_224_desc = { 2, 16, 840, 1, 101, 3, 4, 2, 5, }, 9, - &sha512_224_init, - &sha512_process, - &sha512_224_done, - &sha512_224_test, + &sha512_224_c_init, + &sha512_c_process, + &sha512_224_c_done, + &sha512_224_c_test, NULL }; @@ -32,10 +32,11 @@ const struct ltc_hash_descriptor sha512_224_desc = @param md The hash state you wish to initialize @return CRYPT_OK if successful */ -int sha512_224_init(hash_state * md) +int sha512_224_c_init(hash_state * md) { LTC_ARGCHK(md != NULL); + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); md->sha512.curlen = 0; md->sha512.length = 0; md->sha512.state[0] = CONST64(0x8C3D37C819544DA2); @@ -55,7 +56,7 @@ int sha512_224_init(hash_state * md) @param out [out] The destination of the hash (48 bytes) @return CRYPT_OK if successful */ -int sha512_224_done(hash_state * md, unsigned char *out) +int sha512_224_c_done(hash_state * md, unsigned char *out) { unsigned char buf[64]; @@ -66,7 +67,7 @@ int sha512_224_done(hash_state * md, unsigned char *out) return CRYPT_INVALID_ARG; } - sha512_done(md, buf); + sha512_c_done(md, buf); XMEMCPY(out, buf, 28); #ifdef LTC_CLEAN_STACK zeromem(buf, sizeof(buf)); @@ -78,43 +79,9 @@ int sha512_224_done(hash_state * md, unsigned char *out) Self-test the hash @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled */ -int sha512_224_test(void) +int sha512_224_c_test(void) { - #ifndef LTC_TEST - return CRYPT_NOP; - #else - static const struct { - const char *msg; - unsigned char hash[28]; - } tests[] = { - { "abc", - { 0x46, 0x34, 0x27, 0x0F, 0x70, 0x7B, 0x6A, 0x54, - 0xDA, 0xAE, 0x75, 0x30, 0x46, 0x08, 0x42, 0xE2, - 0x0E, 0x37, 0xED, 0x26, 0x5C, 0xEE, 0xE9, 0xA4, - 0x3E, 0x89, 0x24, 0xAA } - }, - { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", - { 0x23, 0xFE, 0xC5, 0xBB, 0x94, 0xD6, 0x0B, 0x23, - 0x30, 0x81, 0x92, 0x64, 0x0B, 0x0C, 0x45, 0x33, - 0x35, 0xD6, 0x64, 0x73, 0x4F, 0xE4, 0x0E, 0x72, - 0x68, 0x67, 0x4A, 0xF9 } - }, - }; - - int i; - unsigned char tmp[28]; - hash_state md; - - for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { - sha512_224_init(&md); - sha512_224_process(&md, (unsigned char*)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); - sha512_224_done(&md, tmp); - if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), "SHA512-224", i)) { - return CRYPT_FAIL_TESTVECTOR; - } - } - return CRYPT_OK; - #endif + return sha512_224_test_desc(&sha512_224_portable_desc, "SHA512-224 portable"); } -#endif /* defined(LTC_SHA384) && defined(LTC_SHA512) */ +#endif /* defined(LTC_SHA512_224) && defined(LTC_SHA512) */ diff --git a/src/hashes/sha2/sha512_224_desc.c b/src/hashes/sha2/sha512_224_desc.c new file mode 100644 index 000000000..1ae7aecd1 --- /dev/null +++ b/src/hashes/sha2/sha512_224_desc.c @@ -0,0 +1,187 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +/** + @param sha512_224.c + SHA512/224 hash included in sha512.c +*/ + +#include "tomcrypt_private.h" + +#if defined(LTC_SHA512_224) && defined(LTC_SHA512) + +const struct ltc_hash_descriptor sha512_224_desc = +{ + "sha512-224", + 15, + 28, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 5, }, + 9, + + &sha512_224_init, + &sha512_process, + &sha512_224_done, + &sha512_224_test, + NULL +}; + +#if defined LTC_SHA224_X86 + +#if !defined (LTC_S_X86_CPUID) +#define LTC_S_X86_CPUID +#if defined _MSC_VER +#include +#endif +static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) +{ +#if defined _MSC_VER + __cpuid(regs, leaf); +#else + int a, b, c, d; + + a = leaf; + b = c = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"a"(a), "c"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) +{ +#if defined _MSC_VER + __cpuidex(regs, eax, ecx); +#else + int a, b, c, d; + + a = eax; + c = ecx; + b = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"0"(a), "2"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +#endif /* LTC_S_X86_CPUID */ + +static LTC_INLINE int s_sha512_224_x86_is_supported(void) +{ + static int initialized = 0; + static int is_supported = 0; + + if (initialized == 0) { + int regs[4]; + int sse2, avx, avx2, sha512; + /* Leaf 0, Reg 0 contains the number of leafs available */ + s_x86_cpuid(regs, 0); + if(regs[0] >= 7) { + s_x86_cpuid(regs, 1); + sse2 = ((((unsigned int)(regs[3])) >> 26) & 1u) != 0; /* SSE2, leaf 1, edx, bit 26 */ + avx = ((((unsigned int)(regs[2])) >> 28) & 1u) != 0; /* AVX, leaf 1, ecx, bit 28 */ + s_x86_cpuid(regs, 7); + avx2 = ((((unsigned int)(regs[1])) >> 5) & 1u) != 0; /* AVX2, leaf 7, ebx, bit 5 */ + /* Leaf 7, Reg 0 contains the number of sub leafs available */ + if(regs[0] >= 1) + { + s_x86_cpuidex(regs, 7, 1); + sha512 = ((((unsigned int)(regs[0])) >> 0) & 1u) != 0; /* SHA-512, leaf 7, sub leaf 1, eax, bit 0 */ + is_supported = sse2 && avx && avx2 && sha512; + } + } + initialized = 1; + } + return is_supported; +} +#endif /* LTC_SHA224_X86 */ + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha512_224_init(hash_state * md) +{ +#if defined LTC_SHA512_224_X86 + if(s_sha512_224_x86_is_supported()) { + return sha512_224_x86_init(md); + } +#endif + return sha512_224_c_init(md); +} + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (48 bytes) + @return CRYPT_OK if successful +*/ +int sha512_224_done(hash_state * md, unsigned char *out) +{ +#if defined LTC_SHA512_224_X86 + if(s_sha512_224_x86_is_supported()) { + return sha512_224_x86_done(md, out); + } +#endif + return sha512_224_c_done(md, out); +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha512_224_test(void) +{ + return sha512_224_test_desc(&sha512_224_desc, "SHA512-224"); +} + +int sha512_224_test_desc(const struct ltc_hash_descriptor *desc, const char *name) +{ + #ifndef LTC_TEST + return CRYPT_NOP; + #else + static const struct { + const char *msg; + unsigned char hash[28]; + } tests[] = { + { "abc", + { 0x46, 0x34, 0x27, 0x0F, 0x70, 0x7B, 0x6A, 0x54, + 0xDA, 0xAE, 0x75, 0x30, 0x46, 0x08, 0x42, 0xE2, + 0x0E, 0x37, 0xED, 0x26, 0x5C, 0xEE, 0xE9, 0xA4, + 0x3E, 0x89, 0x24, 0xAA } + }, + { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + { 0x23, 0xFE, 0xC5, 0xBB, 0x94, 0xD6, 0x0B, 0x23, + 0x30, 0x81, 0x92, 0x64, 0x0B, 0x0C, 0x45, 0x33, + 0x35, 0xD6, 0x64, 0x73, 0x4F, 0xE4, 0x0E, 0x72, + 0x68, 0x67, 0x4A, 0xF9 } + }, + }; + + int i; + unsigned char tmp[28]; + hash_state md; + + for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { + desc->init(&md); + desc->process(&md, (unsigned char*)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); + desc->done(&md, tmp); + if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), name, i)) { + return CRYPT_FAIL_TESTVECTOR; + } + } + return CRYPT_OK; + #endif +} + +#endif /* defined(LTC_SHA512_224) && defined(LTC_SHA512) */ diff --git a/src/hashes/sha2/sha512_224_x86.c b/src/hashes/sha2/sha512_224_x86.c new file mode 100644 index 000000000..0c05bc276 --- /dev/null +++ b/src/hashes/sha2/sha512_224_x86.c @@ -0,0 +1,87 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +/** + @param sha512_224_x86.c + SHA512/224 hash included in sha512_x86.c +*/ + +#include "tomcrypt_private.h" + +#if defined(LTC_SHA512_224) && defined(LTC_SHA512) && defined(LTC_SHA512_224_X86) + +const struct ltc_hash_descriptor sha512_224_x86_desc = +{ + "sha512-224", + 15, + 28, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 5, }, + 9, + + &sha512_224_x86_init, + &sha512_x86_process, + &sha512_224_x86_done, + &sha512_224_x86_test, + NULL +}; + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha512_224_x86_init(hash_state * md) +{ + LTC_ARGCHK(md != NULL); + + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); + md->sha512.curlen = 0; + md->sha512.length = 0; + md->sha512.state[0] = CONST64(0x8C3D37C819544DA2); + md->sha512.state[1] = CONST64(0x73E1996689DCD4D6); + md->sha512.state[2] = CONST64(0x1DFAB7AE32FF9C82); + md->sha512.state[3] = CONST64(0x679DD514582F9FCF); + md->sha512.state[4] = CONST64(0x0F6D2B697BD44DA8); + md->sha512.state[5] = CONST64(0x77E36F7304C48942); + md->sha512.state[6] = CONST64(0x3F9D85A86A1D36C8); + md->sha512.state[7] = CONST64(0x1112E6AD91D692A1); + return CRYPT_OK; +} + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (48 bytes) + @return CRYPT_OK if successful +*/ +int sha512_224_x86_done(hash_state * md, unsigned char *out) +{ + unsigned char buf[64]; + + LTC_ARGCHK(md != NULL); + LTC_ARGCHK(out != NULL); + + if (md->sha512.curlen >= sizeof(md->sha512.buf)) { + return CRYPT_INVALID_ARG; + } + + sha512_x86_done(md, buf); + XMEMCPY(out, buf, 28); +#ifdef LTC_CLEAN_STACK + zeromem(buf, sizeof(buf)); +#endif + return CRYPT_OK; +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha512_224_x86_test(void) +{ + return sha512_224_test_desc(&sha512_224_x86_desc, "SHA512-224 x86"); +} + +#endif /* defined(LTC_SHA512_224) && defined(LTC_SHA512) && defined(LTC_SHA512_224_X86) */ diff --git a/src/hashes/sha2/sha512_256.c b/src/hashes/sha2/sha512_256.c index f71c53197..673e6bf01 100644 --- a/src/hashes/sha2/sha512_256.c +++ b/src/hashes/sha2/sha512_256.c @@ -9,7 +9,7 @@ #if defined(LTC_SHA512_256) && defined(LTC_SHA512) -const struct ltc_hash_descriptor sha512_256_desc = +const struct ltc_hash_descriptor sha512_256_portable_desc = { "sha512-256", 16, @@ -20,10 +20,10 @@ const struct ltc_hash_descriptor sha512_256_desc = { 2, 16, 840, 1, 101, 3, 4, 2, 6, }, 9, - &sha512_256_init, - &sha512_process, - &sha512_256_done, - &sha512_256_test, + &sha512_256_c_init, + &sha512_c_process, + &sha512_256_c_done, + &sha512_256_c_test, NULL }; @@ -32,10 +32,11 @@ const struct ltc_hash_descriptor sha512_256_desc = @param md The hash state you wish to initialize @return CRYPT_OK if successful */ -int sha512_256_init(hash_state * md) +int sha512_256_c_init(hash_state * md) { LTC_ARGCHK(md != NULL); + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); md->sha512.curlen = 0; md->sha512.length = 0; md->sha512.state[0] = CONST64(0x22312194FC2BF72C); @@ -55,7 +56,7 @@ int sha512_256_init(hash_state * md) @param out [out] The destination of the hash (48 bytes) @return CRYPT_OK if successful */ -int sha512_256_done(hash_state * md, unsigned char *out) +int sha512_256_c_done(hash_state * md, unsigned char *out) { unsigned char buf[64]; @@ -66,7 +67,7 @@ int sha512_256_done(hash_state * md, unsigned char *out) return CRYPT_INVALID_ARG; } - sha512_done(md, buf); + sha512_c_done(md, buf); XMEMCPY(out, buf, 32); #ifdef LTC_CLEAN_STACK zeromem(buf, sizeof(buf)); @@ -78,43 +79,9 @@ int sha512_256_done(hash_state * md, unsigned char *out) Self-test the hash @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled */ -int sha512_256_test(void) +int sha512_256_c_test(void) { - #ifndef LTC_TEST - return CRYPT_NOP; - #else - static const struct { - const char *msg; - unsigned char hash[32]; - } tests[] = { - { "abc", - { 0x53, 0x04, 0x8E, 0x26, 0x81, 0x94, 0x1E, 0xF9, - 0x9B, 0x2E, 0x29, 0xB7, 0x6B, 0x4C, 0x7D, 0xAB, - 0xE4, 0xC2, 0xD0, 0xC6, 0x34, 0xFC, 0x6D, 0x46, - 0xE0, 0xE2, 0xF1, 0x31, 0x07, 0xE7, 0xAF, 0x23 } - }, - { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", - { 0x39, 0x28, 0xE1, 0x84, 0xFB, 0x86, 0x90, 0xF8, - 0x40, 0xDA, 0x39, 0x88, 0x12, 0x1D, 0x31, 0xBE, - 0x65, 0xCB, 0x9D, 0x3E, 0xF8, 0x3E, 0xE6, 0x14, - 0x6F, 0xEA, 0xC8, 0x61, 0xE1, 0x9B, 0x56, 0x3A } - }, - }; - - int i; - unsigned char tmp[32]; - hash_state md; - - for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { - sha512_256_init(&md); - sha512_256_process(&md, (unsigned char*)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); - sha512_256_done(&md, tmp); - if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), "SHA512-265", i)) { - return CRYPT_FAIL_TESTVECTOR; - } - } - return CRYPT_OK; - #endif + return sha512_256_test_desc(&sha512_256_portable_desc, "SHA512-256 portable"); } -#endif /* defined(LTC_SHA384) && defined(LTC_SHA512) */ +#endif /* defined(LTC_SHA512_256) && defined(LTC_SHA512) */ diff --git a/src/hashes/sha2/sha512_256_desc.c b/src/hashes/sha2/sha512_256_desc.c new file mode 100644 index 000000000..31cc136c5 --- /dev/null +++ b/src/hashes/sha2/sha512_256_desc.c @@ -0,0 +1,187 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +/** + @param sha512_256.c + SHA512/256 hash included in sha512.c +*/ + +#include "tomcrypt_private.h" + +#if defined(LTC_SHA512_256) && defined(LTC_SHA512) + +const struct ltc_hash_descriptor sha512_256_desc = +{ + "sha512-256", + 16, + 32, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 6, }, + 9, + + &sha512_256_init, + &sha512_process, + &sha512_256_done, + &sha512_256_test, + NULL +}; + +#if defined LTC_SHA256_X86 + +#if !defined (LTC_S_X86_CPUID) +#define LTC_S_X86_CPUID +#if defined _MSC_VER +#include +#endif +static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) +{ +#if defined _MSC_VER + __cpuid(regs, leaf); +#else + int a, b, c, d; + + a = leaf; + b = c = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"a"(a), "c"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) +{ +#if defined _MSC_VER + __cpuidex(regs, eax, ecx); +#else + int a, b, c, d; + + a = eax; + c = ecx; + b = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"0"(a), "2"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +#endif /* LTC_S_X86_CPUID */ + +static LTC_INLINE int s_sha512_256_x86_is_supported(void) +{ + static int initialized = 0; + static int is_supported = 0; + + if (initialized == 0) { + int regs[4]; + int sse2, avx, avx2, sha512; + /* Leaf 0, Reg 0 contains the number of leafs available */ + s_x86_cpuid(regs, 0); + if(regs[0] >= 7) { + s_x86_cpuid(regs, 1); + sse2 = ((((unsigned int)(regs[3])) >> 26) & 1u) != 0; /* SSE2, leaf 1, edx, bit 26 */ + avx = ((((unsigned int)(regs[2])) >> 28) & 1u) != 0; /* AVX, leaf 1, ecx, bit 28 */ + s_x86_cpuid(regs, 7); + avx2 = ((((unsigned int)(regs[1])) >> 5) & 1u) != 0; /* AVX2, leaf 7, ebx, bit 5 */ + /* Leaf 7, Reg 0 contains the number of sub leafs available */ + if(regs[0] >= 1) + { + s_x86_cpuidex(regs, 7, 1); + sha512 = ((((unsigned int)(regs[0])) >> 0) & 1u) != 0; /* SHA-512, leaf 7, sub leaf 1, eax, bit 0 */ + is_supported = sse2 && avx && avx2 && sha512; + } + } + initialized = 1; + } + return is_supported; +} +#endif /* LTC_SHA256_X86 */ + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha512_256_init(hash_state * md) +{ +#if defined LTC_SHA512_256_X86 + if(s_sha512_256_x86_is_supported()) { + return sha512_256_x86_init(md); + } +#endif + return sha512_256_c_init(md); +} + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (48 bytes) + @return CRYPT_OK if successful +*/ +int sha512_256_done(hash_state * md, unsigned char *out) +{ +#if defined LTC_SHA512_256_X86 + if(s_sha512_256_x86_is_supported()) { + return sha512_256_x86_done(md, out); + } +#endif + return sha512_256_c_done(md, out); +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha512_256_test(void) +{ + return sha512_256_test_desc(&sha512_256_desc, "SHA512-256"); +} + +int sha512_256_test_desc(const struct ltc_hash_descriptor *desc, const char *name) +{ + #ifndef LTC_TEST + return CRYPT_NOP; + #else + static const struct { + const char *msg; + unsigned char hash[32]; + } tests[] = { + { "abc", + { 0x53, 0x04, 0x8E, 0x26, 0x81, 0x94, 0x1E, 0xF9, + 0x9B, 0x2E, 0x29, 0xB7, 0x6B, 0x4C, 0x7D, 0xAB, + 0xE4, 0xC2, 0xD0, 0xC6, 0x34, 0xFC, 0x6D, 0x46, + 0xE0, 0xE2, 0xF1, 0x31, 0x07, 0xE7, 0xAF, 0x23 } + }, + { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + { 0x39, 0x28, 0xE1, 0x84, 0xFB, 0x86, 0x90, 0xF8, + 0x40, 0xDA, 0x39, 0x88, 0x12, 0x1D, 0x31, 0xBE, + 0x65, 0xCB, 0x9D, 0x3E, 0xF8, 0x3E, 0xE6, 0x14, + 0x6F, 0xEA, 0xC8, 0x61, 0xE1, 0x9B, 0x56, 0x3A } + }, + }; + + int i; + unsigned char tmp[32]; + hash_state md; + + for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { + desc->init(&md); + desc->process(&md, (unsigned char*)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); + desc->done(&md, tmp); + if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), name, i)) { + return CRYPT_FAIL_TESTVECTOR; + } + } + return CRYPT_OK; + #endif +} + +#endif /* defined(LTC_SHA512_256) && defined(LTC_SHA512) */ diff --git a/src/hashes/sha2/sha512_256_x86.c b/src/hashes/sha2/sha512_256_x86.c new file mode 100644 index 000000000..5a9936a8d --- /dev/null +++ b/src/hashes/sha2/sha512_256_x86.c @@ -0,0 +1,88 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +/** + @param sha512_256_x86.c + SHA512/256 hash included in sha512_x86.c +*/ + +#include "tomcrypt_private.h" + +#if defined(LTC_SHA512_256) && defined(LTC_SHA512) && defined(LTC_SHA512_256_X86) + +const struct ltc_hash_descriptor sha512_256_x86_desc = +{ + "sha512-256", + 16, + 32, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 6, }, + 9, + + &sha512_256_x86_init, + &sha512_x86_process, + &sha512_256_x86_done, + &sha512_256_x86_test, + NULL +}; + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha512_256_x86_init(hash_state * md) +{ + LTC_ARGCHK(md != NULL); + + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); + md->sha512.curlen = 0; + md->sha512.length = 0; + md->sha512.state[0] = CONST64(0x22312194FC2BF72C); + md->sha512.state[1] = CONST64(0x9F555FA3C84C64C2); + md->sha512.state[2] = CONST64(0x2393B86B6F53B151); + md->sha512.state[3] = CONST64(0x963877195940EABD); + md->sha512.state[4] = CONST64(0x96283EE2A88EFFE3); + md->sha512.state[5] = CONST64(0xBE5E1E2553863992); + md->sha512.state[6] = CONST64(0x2B0199FC2C85B8AA); + md->sha512.state[7] = CONST64(0x0EB72DDC81C52CA2); + return CRYPT_OK; +} + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (48 bytes) + @return CRYPT_OK if successful +*/ +int sha512_256_x86_done(hash_state * md, unsigned char *out) +{ + unsigned char buf[64]; + + LTC_ARGCHK(md != NULL); + LTC_ARGCHK(out != NULL); + + if (md->sha512.curlen >= sizeof(md->sha512.buf)) { + return CRYPT_INVALID_ARG; + } + + sha512_x86_done(md, buf); + XMEMCPY(out, buf, 32); +#ifdef LTC_CLEAN_STACK + zeromem(buf, sizeof(buf)); +#endif + return CRYPT_OK; +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha512_256_x86_test(void) +{ + return sha512_256_test_desc(&sha512_256_x86_desc, "SHA512-256 x86"); +} + +#endif /* defined(LTC_SHA512_256) && defined(LTC_SHA512) && defined(LTC_SHA512_256_X86) */ + diff --git a/src/hashes/sha2/sha512_desc.c b/src/hashes/sha2/sha512_desc.c new file mode 100644 index 000000000..920fe5abd --- /dev/null +++ b/src/hashes/sha2/sha512_desc.c @@ -0,0 +1,212 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +#include "tomcrypt_private.h" + +/** + @param sha512.c + SHA512 by Tom St Denis +*/ + +#ifdef LTC_SHA512 + +const struct ltc_hash_descriptor sha512_desc = +{ + "sha512", + 5, + 64, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 3, }, + 9, + + &sha512_init, + &sha512_process, + &sha512_done, + &sha512_test, + NULL +}; + +#if defined LTC_SHA512_X86 + +#if !defined (LTC_S_X86_CPUID) +#define LTC_S_X86_CPUID +#if defined _MSC_VER +#include +#endif +static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) +{ +#if defined _MSC_VER + __cpuid(regs, leaf); +#else + int a, b, c, d; + + a = leaf; + b = c = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"a"(a), "c"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) +{ +#if defined _MSC_VER + __cpuidex(regs, eax, ecx); +#else + int a, b, c, d; + + a = eax; + c = ecx; + b = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"0"(a), "2"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +#endif /* LTC_S_X86_CPUID */ + +static LTC_INLINE int s_sha512_x86_is_supported(void) +{ + static int initialized = 0; + static int is_supported = 0; + + if (initialized == 0) { + int regs[4]; + int sse2, avx, avx2, sha512; + /* Leaf 0, Reg 0 contains the number of leafs available */ + s_x86_cpuid(regs, 0); + if(regs[0] >= 7) { + s_x86_cpuid(regs, 1); + sse2 = ((((unsigned int)(regs[3])) >> 26) & 1u) != 0; /* SSE2, leaf 1, edx, bit 26 */ + avx = ((((unsigned int)(regs[2])) >> 28) & 1u) != 0; /* AVX, leaf 1, ecx, bit 28 */ + s_x86_cpuid(regs, 7); + avx2 = ((((unsigned int)(regs[1])) >> 5) & 1u) != 0; /* AVX2, leaf 7, ebx, bit 5 */ + /* Leaf 7, Reg 0 contains the number of sub leafs available */ + if(regs[0] >= 1) + { + s_x86_cpuidex(regs, 7, 1); + sha512 = ((((unsigned int)(regs[0])) >> 0) & 1u) != 0; /* SHA-512, leaf 7, sub leaf 1, eax, bit 0 */ + is_supported = sse2 && avx && avx2 && sha512; + } + } + initialized = 1; + } + return is_supported; +} +#endif /* LTC_SHA512_X86 */ + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha512_init(hash_state * md) +{ +#if defined LTC_SHA512_X86 + if(s_sha512_x86_is_supported()) { + return sha512_x86_init(md); + } +#endif + return sha512_c_init(md); +} + +/** + Process a block of memory though the hash + @param md The hash state + @param in The data to hash + @param inlen The length of the data (octets) + @return CRYPT_OK if successful +*/ +int sha512_process(hash_state * md, const unsigned char *in, unsigned long inlen) +{ +#if defined LTC_SHA512_X86 + if(s_sha512_x86_is_supported()) { + return sha512_x86_process(md, in, inlen); + } +#endif + return sha512_c_process(md, in, inlen); +} + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (64 bytes) + @return CRYPT_OK if successful +*/ +int sha512_done(hash_state * md, unsigned char *out) +{ +#if defined LTC_SHA512_X86 + if(s_sha512_x86_is_supported()) { + return sha512_x86_done(md, out); + } +#endif + return sha512_c_done(md, out); +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha512_test(void) +{ + return sha512_test_desc(&sha512_desc, "SHA512"); +} + +int sha512_test_desc(const struct ltc_hash_descriptor *desc, const char *name) +{ + #ifndef LTC_TEST + return CRYPT_NOP; + #else + static const struct { + const char *msg; + unsigned char hash[64]; + } tests[] = { + { "abc", + { 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, + 0xcc, 0x41, 0x73, 0x49, 0xae, 0x20, 0x41, 0x31, + 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, + 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, + 0x21, 0x92, 0x99, 0x2a, 0x27, 0x4f, 0xc1, 0xa8, + 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, + 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, + 0x2a, 0x9a, 0xc9, 0x4f, 0xa5, 0x4c, 0xa4, 0x9f } + }, + { "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + { 0x8e, 0x95, 0x9b, 0x75, 0xda, 0xe3, 0x13, 0xda, + 0x8c, 0xf4, 0xf7, 0x28, 0x14, 0xfc, 0x14, 0x3f, + 0x8f, 0x77, 0x79, 0xc6, 0xeb, 0x9f, 0x7f, 0xa1, + 0x72, 0x99, 0xae, 0xad, 0xb6, 0x88, 0x90, 0x18, + 0x50, 0x1d, 0x28, 0x9e, 0x49, 0x00, 0xf7, 0xe4, + 0x33, 0x1b, 0x99, 0xde, 0xc4, 0xb5, 0x43, 0x3a, + 0xc7, 0xd3, 0x29, 0xee, 0xb6, 0xdd, 0x26, 0x54, + 0x5e, 0x96, 0xe5, 0x5b, 0x87, 0x4b, 0xe9, 0x09 } + }, + }; + + int i; + unsigned char tmp[64]; + hash_state md; + + for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) { + desc->init(&md); + desc->process(&md, (unsigned char*)tests[i].msg, (unsigned long)XSTRLEN(tests[i].msg)); + desc->done(&md, tmp); + if (ltc_compare_testvector(tmp, sizeof(tmp), tests[i].hash, sizeof(tests[i].hash), name, i)) { + return CRYPT_FAIL_TESTVECTOR; + } + } + return CRYPT_OK; + #endif +} + +#endif diff --git a/src/hashes/sha2/sha512_x86.c b/src/hashes/sha2/sha512_x86.c new file mode 100644 index 000000000..d778ca8db --- /dev/null +++ b/src/hashes/sha2/sha512_x86.c @@ -0,0 +1,467 @@ +/* LibTomCrypt, modular cryptographic library -- Tom St Denis */ +/* SPDX-License-Identifier: Unlicense */ +#include "tomcrypt_private.h" + +/** + @file sha512_x86.c + SHA512 by Marek Knapek +*/ + +#if defined(LTC_SHA512) && defined(LTC_SHA512_X86) + +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeclaration-after-statement" +#pragma GCC diagnostic ignored "-Wuninitialized" +#pragma GCC diagnostic ignored "-Wunused-function" +#elif defined(_MSC_VER) +#include +#endif +#include /* SSE2 _mm_set_epi64x */ +#include /* AVX _mm256_castsi128_si256 _mm256_castsi256_si128 _mm256_load_si256 _mm256_store_si256 */ +#include /* AVX2 _mm256_add_epi64 _mm256_blend_epi32 _mm256_permute4x64_epi64 _mm256_shuffle_epi8 */ +#include /* SHA512 _mm256_sha512msg1_epi64 _mm256_sha512msg2_epi64 _mm256_sha512rnds2_epi64 */ +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +const struct ltc_hash_descriptor sha512_x86_desc = +{ + "sha512", + 5, + 64, + 128, + + /* OID */ + { 2, 16, 840, 1, 101, 3, 4, 2, 3, }, + 9, + + &sha512_x86_init, + &sha512_x86_process, + &sha512_x86_done, + &sha512_x86_test, + NULL +}; + +/* the K array */ +#define K sha512_K +static const ulong64 K[80] LTC_ALIGN(32) = { +CONST64(0x428a2f98d728ae22), CONST64(0x7137449123ef65cd), +CONST64(0xb5c0fbcfec4d3b2f), CONST64(0xe9b5dba58189dbbc), +CONST64(0x3956c25bf348b538), CONST64(0x59f111f1b605d019), +CONST64(0x923f82a4af194f9b), CONST64(0xab1c5ed5da6d8118), +CONST64(0xd807aa98a3030242), CONST64(0x12835b0145706fbe), +CONST64(0x243185be4ee4b28c), CONST64(0x550c7dc3d5ffb4e2), +CONST64(0x72be5d74f27b896f), CONST64(0x80deb1fe3b1696b1), +CONST64(0x9bdc06a725c71235), CONST64(0xc19bf174cf692694), +CONST64(0xe49b69c19ef14ad2), CONST64(0xefbe4786384f25e3), +CONST64(0x0fc19dc68b8cd5b5), CONST64(0x240ca1cc77ac9c65), +CONST64(0x2de92c6f592b0275), CONST64(0x4a7484aa6ea6e483), +CONST64(0x5cb0a9dcbd41fbd4), CONST64(0x76f988da831153b5), +CONST64(0x983e5152ee66dfab), CONST64(0xa831c66d2db43210), +CONST64(0xb00327c898fb213f), CONST64(0xbf597fc7beef0ee4), +CONST64(0xc6e00bf33da88fc2), CONST64(0xd5a79147930aa725), +CONST64(0x06ca6351e003826f), CONST64(0x142929670a0e6e70), +CONST64(0x27b70a8546d22ffc), CONST64(0x2e1b21385c26c926), +CONST64(0x4d2c6dfc5ac42aed), CONST64(0x53380d139d95b3df), +CONST64(0x650a73548baf63de), CONST64(0x766a0abb3c77b2a8), +CONST64(0x81c2c92e47edaee6), CONST64(0x92722c851482353b), +CONST64(0xa2bfe8a14cf10364), CONST64(0xa81a664bbc423001), +CONST64(0xc24b8b70d0f89791), CONST64(0xc76c51a30654be30), +CONST64(0xd192e819d6ef5218), CONST64(0xd69906245565a910), +CONST64(0xf40e35855771202a), CONST64(0x106aa07032bbd1b8), +CONST64(0x19a4c116b8d2d0c8), CONST64(0x1e376c085141ab53), +CONST64(0x2748774cdf8eeb99), CONST64(0x34b0bcb5e19b48a8), +CONST64(0x391c0cb3c5c95a63), CONST64(0x4ed8aa4ae3418acb), +CONST64(0x5b9cca4f7763e373), CONST64(0x682e6ff3d6b2b8a3), +CONST64(0x748f82ee5defb2fc), CONST64(0x78a5636f43172f60), +CONST64(0x84c87814a1f0ab72), CONST64(0x8cc702081a6439ec), +CONST64(0x90befffa23631e28), CONST64(0xa4506cebde82bde9), +CONST64(0xbef9a3f7b2c67915), CONST64(0xc67178f2e372532b), +CONST64(0xca273eceea26619c), CONST64(0xd186b8c721c0c207), +CONST64(0xeada7dd6cde0eb1e), CONST64(0xf57d4f7fee6ed178), +CONST64(0x06f067aa72176fba), CONST64(0x0a637dc5a2c898a6), +CONST64(0x113f9804bef90dae), CONST64(0x1b710b35131c471b), +CONST64(0x28db77f523047d84), CONST64(0x32caab7b40c72493), +CONST64(0x3c9ebe0a15c9bebc), CONST64(0x431d67c49c100d4c), +CONST64(0x4cc5d4becb3e42b6), CONST64(0x597f299cfc657e2a), +CONST64(0x5fcb6fab3ad6faec), CONST64(0x6c44198c4a475817) +}; + +/* compress 1024-bits */ +#ifdef LTC_CLEAN_STACK +static int LTC_SHA512_TARGET ss_sha512_x86_compress(hash_state * md, const unsigned char *buf) +#else +static int LTC_SHA512_TARGET s_sha512_x86_compress(hash_state * md, const unsigned char *buf) +#endif +{ +#define ltc_permute_epi64_k(a, b, c, d) ((((a) & 0x3) << (3 * 2)) | (((b) & 0x3) << (2 * 2)) | (((c) & 0x3) << (1 * 2)) | (((d) & 0x3) << (0 * 2))) +#define ltc_blend_epi32_k(a, b, c, d, e, f, g, h) ((((a) & 0x1) << 7) | (((b) & 0x1) << 6) | (((c) & 0x1) << 5) | (((d) & 0x1) << 4) | (((e) & 0x1) << 3) | (((f) & 0x1) << 2) | (((g) & 0x1) << 1) | (((h) & 0x1) << 0)) +#define ltc_blend_epi64_k(a, b, c, d) ((((a) & 0x1) << 3) | (((b) & 0x1) << 2) | (((c) & 0x1) << 1) | (((d) & 0x1) << 0)) +#define ltc_mm256_blend_epi64(a, b, c) _mm256_blend_epi32((a), (b), ltc_blend_epi32_k((((c) >> 3) & 0x1), (((c) >> 3) & 0x1), (((c) >> 2) & 0x1), (((c) >> 2) & 0x1), (((c) >> 1) & 0x1), (((c) >> 1) & 0x1), (((c) >> 0) & 0x1), (((c) >> 0) & 0x1))) +#define any 0 + + __m256i reverse; + __m256i state_a; + __m256i state_b; + __m256i tmp_a; + __m256i tmp_b; + __m256i tmp_c; + __m256i tmp_d; + __m256i old_a; + __m256i old_b; + __m256i msg_a; + __m256i msg_b; + __m256i msg_c; + __m256i msg_d; + + LTC_ARGCHK(((ltc_uintptr)(buf)) % 32 == 0); + + reverse = _mm256_permute4x64_epi64(_mm256_castsi128_si256(_mm_set_epi64x(0x08090a0b0c0d0e0full, 0x0001020304050607ull)), ltc_permute_epi64_k(0x1, 0x0, 0x1, 0x0)); + state_a = _mm256_load_si256(((__m256i const*)(&md->sha512.state[0]))); + state_b = _mm256_load_si256(((__m256i const*)(&md->sha512.state[4]))); + tmp_a = _mm256_permute4x64_epi64(state_b, ltc_permute_epi64_k(any, any, 0x2, 0x3)); + tmp_b = _mm256_permute4x64_epi64(state_a, ltc_permute_epi64_k(0x2, 0x3, any, any)); + tmp_c = _mm256_permute4x64_epi64(state_b, ltc_permute_epi64_k(any, any, 0x0, 0x1)); + tmp_d = _mm256_permute4x64_epi64(state_a, ltc_permute_epi64_k(0x0, 0x1, any, any)); + state_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x1, 0x0, 0x0)); + state_b = ltc_mm256_blend_epi64(tmp_c, tmp_d, ltc_blend_epi64_k(0x1, 0x1, 0x0, 0x0)); + + old_a = state_a; + old_b = state_b; + tmp_a = _mm256_load_si256(((__m256i const*)(&K[0 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + msg_a = _mm256_load_si256(((__m256i const*)(&buf[0 * (256 / CHAR_BIT)]))); + msg_a = _mm256_shuffle_epi8(msg_a, reverse); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[1 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + msg_b = _mm256_load_si256(((__m256i const*)(&buf[1 * (256 / CHAR_BIT)]))); + msg_b = _mm256_shuffle_epi8(msg_b, reverse); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[2 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + msg_c = _mm256_load_si256(((__m256i const*)(&buf[2 * (256 / CHAR_BIT)]))); + msg_c = _mm256_shuffle_epi8(msg_c, reverse); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[3 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + msg_d = _mm256_load_si256(((__m256i const*)(&buf[3 * (256 / CHAR_BIT)]))); + msg_d = _mm256_shuffle_epi8(msg_d, reverse); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_a = _mm256_sha512msg1_epi64(msg_a, _mm256_castsi256_si128(msg_b)); + tmp_a = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + msg_a = _mm256_sha512msg2_epi64(tmp_a, msg_d); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[4 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_b = _mm256_sha512msg1_epi64(msg_b, _mm256_castsi256_si128(msg_c)); + tmp_a = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + msg_b = _mm256_sha512msg2_epi64(tmp_a, msg_a); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[5 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_c = _mm256_sha512msg1_epi64(msg_c, _mm256_castsi256_si128(msg_d)); + tmp_a = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + msg_c = _mm256_sha512msg2_epi64(tmp_a, msg_b); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[6 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_d = _mm256_sha512msg1_epi64(msg_d, _mm256_castsi256_si128(msg_a)); + tmp_a = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + msg_d = _mm256_sha512msg2_epi64(tmp_a, msg_c); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[7 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_a = _mm256_sha512msg1_epi64(msg_a, _mm256_castsi256_si128(msg_b)); + tmp_a = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + msg_a = _mm256_sha512msg2_epi64(tmp_a, msg_d); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[8 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_b = _mm256_sha512msg1_epi64(msg_b, _mm256_castsi256_si128(msg_c)); + tmp_a = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + msg_b = _mm256_sha512msg2_epi64(tmp_a, msg_a); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[9 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_c = _mm256_sha512msg1_epi64(msg_c, _mm256_castsi256_si128(msg_d)); + tmp_a = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + msg_c = _mm256_sha512msg2_epi64(tmp_a, msg_b); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[10 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_d = _mm256_sha512msg1_epi64(msg_d, _mm256_castsi256_si128(msg_a)); + tmp_a = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + msg_d = _mm256_sha512msg2_epi64(tmp_a, msg_c); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[11 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_a = _mm256_sha512msg1_epi64(msg_a, _mm256_castsi256_si128(msg_b)); + tmp_a = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + msg_a = _mm256_sha512msg2_epi64(tmp_a, msg_d); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[12 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_b = _mm256_sha512msg1_epi64(msg_b, _mm256_castsi256_si128(msg_c)); + tmp_a = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + msg_b = _mm256_sha512msg2_epi64(tmp_a, msg_a); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[13 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_c = _mm256_sha512msg1_epi64(msg_c, _mm256_castsi256_si128(msg_d)); + tmp_a = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + msg_c = _mm256_sha512msg2_epi64(tmp_a, msg_b); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[14 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_d = _mm256_sha512msg1_epi64(msg_d, _mm256_castsi256_si128(msg_a)); + tmp_a = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + msg_d = _mm256_sha512msg2_epi64(tmp_a, msg_c); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[15 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_a = _mm256_sha512msg1_epi64(msg_a, _mm256_castsi256_si128(msg_b)); + tmp_a = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + msg_a = _mm256_sha512msg2_epi64(tmp_a, msg_d); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[16 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_a); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_b = _mm256_sha512msg1_epi64(msg_b, _mm256_castsi256_si128(msg_c)); + tmp_a = _mm256_permute4x64_epi64(msg_d, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + msg_b = _mm256_sha512msg2_epi64(tmp_a, msg_a); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[17 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_b); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_c = _mm256_sha512msg1_epi64(msg_c, _mm256_castsi256_si128(msg_d)); + tmp_a = _mm256_permute4x64_epi64(msg_a, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + msg_c = _mm256_sha512msg2_epi64(tmp_a, msg_b); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[18 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_c); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + msg_d = _mm256_sha512msg1_epi64(msg_d, _mm256_castsi256_si128(msg_a)); + tmp_a = _mm256_permute4x64_epi64(msg_b, ltc_permute_epi64_k(any, 0x3, 0x2, 0x1)); + tmp_b = _mm256_permute4x64_epi64(msg_c, ltc_permute_epi64_k(0x0, any, any, any)); + tmp_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x0, 0x0, 0x0)); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + msg_d = _mm256_sha512msg2_epi64(tmp_a, msg_c); + tmp_a = _mm256_load_si256(((__m256i const*)(&K[19 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); + tmp_a = _mm256_add_epi64(tmp_a, msg_d); + state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); + tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); + state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); + state_a = _mm256_add_epi64(state_a, old_a); + state_b = _mm256_add_epi64(state_b, old_b); + + tmp_a = _mm256_permute4x64_epi64(state_b, ltc_permute_epi64_k(any, any, 0x2, 0x3)); + tmp_b = _mm256_permute4x64_epi64(state_a, ltc_permute_epi64_k(0x2, 0x3, any, any)); + tmp_c = _mm256_permute4x64_epi64(state_b, ltc_permute_epi64_k(any, any, 0x0, 0x1)); + tmp_d = _mm256_permute4x64_epi64(state_a, ltc_permute_epi64_k(0x0, 0x1, any, any)); + state_a = ltc_mm256_blend_epi64(tmp_a, tmp_b, ltc_blend_epi64_k(0x1, 0x1, 0x0, 0x0)); + state_b = ltc_mm256_blend_epi64(tmp_c, tmp_d, ltc_blend_epi64_k(0x1, 0x1, 0x0, 0x0)); + _mm256_store_si256(((__m256i*)(&md->sha512.state[0])), state_a); + _mm256_store_si256(((__m256i*)(&md->sha512.state[4])), state_b); + return CRYPT_OK; + +#undef ltc_permute_epi64_k +#undef ltc_blend_epi32_k +#undef ltc_blend_epi64_k +#undef ltc_mm256_blend_epi64 +#undef any +} +#undef K + +/* compress 1024-bits */ +#ifdef LTC_CLEAN_STACK +static int s_sha512_compress(hash_state * md, const unsigned char *buf) +{ + int err; + err = ss_sha512_compress(md, buf); + burn_stack(sizeof(ulong64) * 90 + sizeof(int)); + return err; +} +#endif + +/** + Initialize the hash state + @param md The hash state you wish to initialize + @return CRYPT_OK if successful +*/ +int sha512_x86_init(hash_state * md) +{ + LTC_ARGCHK(md != NULL); + + md->sha512.state = LTC_ALIGN_BUF(md->sha512.state_buf, 32); + md->sha512.curlen = 0; + md->sha512.length = 0; + md->sha512.state[0] = CONST64(0x6a09e667f3bcc908); + md->sha512.state[1] = CONST64(0xbb67ae8584caa73b); + md->sha512.state[2] = CONST64(0x3c6ef372fe94f82b); + md->sha512.state[3] = CONST64(0xa54ff53a5f1d36f1); + md->sha512.state[4] = CONST64(0x510e527fade682d1); + md->sha512.state[5] = CONST64(0x9b05688c2b3e6c1f); + md->sha512.state[6] = CONST64(0x1f83d9abfb41bd6b); + md->sha512.state[7] = CONST64(0x5be0cd19137e2179); + return CRYPT_OK; +} + +/** + Process a block of memory though the hash + @param md The hash state + @param in The data to hash + @param inlen The length of the data (octets) + @return CRYPT_OK if successful +*/ +HASH_PROCESS(sha512_x86_process,s_sha512_x86_compress, sha512, 128) + +/** + Terminate the hash to get the digest + @param md The hash state + @param out [out] The destination of the hash (64 bytes) + @return CRYPT_OK if successful +*/ +int sha512_x86_done(hash_state * md, unsigned char *out) +{ + int i; + + LTC_ARGCHK(md != NULL); + LTC_ARGCHK(out != NULL); + + if (md->sha512.curlen >= sizeof(md->sha512.buf)) { + return CRYPT_INVALID_ARG; + } + + + /* increase the length of the message */ + md->sha512.length += md->sha512.curlen * CONST64(8); + + /* append the '1' bit */ + md->sha512.buf[md->sha512.curlen++] = (unsigned char)0x80; + + /* if the length is currently above 112 bytes we append zeros + * then compress. Then we can fall back to padding zeros and length + * encoding like normal. + */ + if (md->sha512.curlen > 112) { + while (md->sha512.curlen < 128) { + md->sha512.buf[md->sha512.curlen++] = (unsigned char)0; + } + s_sha512_x86_compress(md, md->sha512.buf); + md->sha512.curlen = 0; + } + + /* pad upto 120 bytes of zeroes + * note: that from 112 to 120 is the 64 MSB of the length. We assume that you won't hash + * > 2^64 bits of data... :-) + */ + while (md->sha512.curlen < 120) { + md->sha512.buf[md->sha512.curlen++] = (unsigned char)0; + } + + /* store length */ + STORE64H(md->sha512.length, md->sha512.buf+120); + s_sha512_x86_compress(md, md->sha512.buf); + + /* copy output */ + for (i = 0; i < 8; i++) { + STORE64H(md->sha512.state[i], out+(8*i)); + } +#ifdef LTC_CLEAN_STACK + zeromem(md, sizeof(hash_state)); +#endif + return CRYPT_OK; +} + +/** + Self-test the hash + @return CRYPT_OK if successful, CRYPT_NOP if self-tests have been disabled +*/ +int sha512_x86_test(void) +{ + return sha512_test_desc(&sha512_x86_desc, "SHA512 x86"); +} + +#endif /* defined(LTC_SHA512) && defined(LTC_SHA512_X86) */ diff --git a/src/headers/tomcrypt_cfg.h b/src/headers/tomcrypt_cfg.h index f0f5051e2..604733a8e 100644 --- a/src/headers/tomcrypt_cfg.h +++ b/src/headers/tomcrypt_cfg.h @@ -333,6 +333,18 @@ typedef unsigned long ltc_mp_digit; #if !defined(LTC_NO_SHA256_X86) #define LTC_SHA256_X86 #endif + #if !defined(LTC_NO_SHA512_X86) + #define LTC_SHA512_X86 + #endif + #if !defined(LTC_NO_SHA384_X86) + #define LTC_SHA384_X86 + #endif + #if !defined(LTC_NO_SHA512_256_X86) + #define LTC_SHA512_256_X86 + #endif + #if !defined(LTC_NO_SHA512_224_X86) + #define LTC_SHA512_224_X86 + #endif #endif #if defined(__GNUC__) @@ -396,6 +408,8 @@ typedef unsigned long ltc_mp_digit; #else # define LTC_ATTRIBUTE(x) #endif +#define LTC_SHA512_TARGET __attribute__((__target__("sse2,avx,avx2,sha512"))) +#define LTC_SHA512_TARGET #if !defined(LTC_NO_GCM_PMULL) && (defined(__aarch64__) || defined(_M_ARM64)) #define LTC_GCM_PMULL diff --git a/src/headers/tomcrypt_hash.h b/src/headers/tomcrypt_hash.h index c80ad9590..fb2d89dfe 100644 --- a/src/headers/tomcrypt_hash.h +++ b/src/headers/tomcrypt_hash.h @@ -28,9 +28,10 @@ struct kangaroo_twelve_state { #ifdef LTC_SHA512 struct sha512_state { - ulong64 length, state[8]; + ulong64 length, *state; unsigned long curlen; unsigned char buf[128]; + unsigned char state_buf[LTC_ALIGNED_BUF_SIZE(ulong64, 8, 32)]; }; #endif @@ -340,7 +341,21 @@ int sha512_process(hash_state * md, const unsigned char *in, unsigned long inlen int sha512_done(hash_state * md, unsigned char *out); int sha512_test(void); extern const struct ltc_hash_descriptor sha512_desc; -#endif + +int sha512_c_init(hash_state * md); +int sha512_c_process(hash_state * md, const unsigned char *in, unsigned long inlen); +int sha512_c_done(hash_state * md, unsigned char *out); +int sha512_c_test(void); +extern const struct ltc_hash_descriptor sha512_portable_desc; + +#ifdef LTC_SHA512_X86 +int sha512_x86_init(hash_state * md); +int sha512_x86_process(hash_state * md, const unsigned char *in, unsigned long inlen); +int sha512_x86_done(hash_state * md, unsigned char *out); +int sha512_x86_test(void); +extern const struct ltc_hash_descriptor sha512_x86_desc; +#endif /* LTC_SHA512_X86 */ +#endif /* LTC_SHA512 */ #ifdef LTC_SHA384 #ifndef LTC_SHA512 @@ -351,7 +366,21 @@ int sha384_init(hash_state * md); int sha384_done(hash_state * md, unsigned char *out); int sha384_test(void); extern const struct ltc_hash_descriptor sha384_desc; -#endif + +int sha384_c_init(hash_state * md); +#define sha384_c_process sha512_c_process +int sha384_c_done(hash_state * md, unsigned char *out); +int sha384_c_test(void); +extern const struct ltc_hash_descriptor sha384_portable_desc; + +#ifdef LTC_SHA384_X86 +int sha384_x86_init(hash_state * md); +#define sha384_x86_process sha512_x86_process +int sha384_x86_done(hash_state * md, unsigned char *out); +int sha384_x86_test(void); +extern const struct ltc_hash_descriptor sha384_x86_desc; +#endif /* LTC_SHA384_X86 */ +#endif /* LTC_SHA384 */ #ifdef LTC_SHA512_256 #ifndef LTC_SHA512 @@ -362,6 +391,20 @@ int sha512_256_init(hash_state * md); int sha512_256_done(hash_state * md, unsigned char *out); int sha512_256_test(void); extern const struct ltc_hash_descriptor sha512_256_desc; + +int sha512_256_c_init(hash_state * md); +#define sha512_256_c_process sha512_c_process +int sha512_256_c_done(hash_state * md, unsigned char *out); +int sha512_256_c_test(void); +extern const struct ltc_hash_descriptor sha512_256_portable_desc; + +#ifdef LTC_SHA512_256_X86 +int sha512_256_x86_init(hash_state * md); +#define sha512_256_x86_process sha512_x86_process +int sha512_256_x86_done(hash_state * md, unsigned char *out); +int sha512_256_x86_test(void); +extern const struct ltc_hash_descriptor sha512_256_x86_desc; +#endif /* LTC_SHA512_256_X86 */ #endif /* LTC_SHA512_256 */ #ifdef LTC_SHA512_224 @@ -373,6 +416,20 @@ int sha512_224_init(hash_state * md); int sha512_224_done(hash_state * md, unsigned char *out); int sha512_224_test(void); extern const struct ltc_hash_descriptor sha512_224_desc; + +int sha512_224_c_init(hash_state * md); +#define sha512_224_c_process sha512_c_process +int sha512_224_c_done(hash_state * md, unsigned char *out); +int sha512_224_c_test(void); +extern const struct ltc_hash_descriptor sha512_224_portable_desc; + +#ifdef LTC_SHA512_224_X86 +int sha512_224_x86_init(hash_state * md); +#define sha512_224_x86_process sha512_x86_process +int sha512_224_x86_done(hash_state * md, unsigned char *out); +int sha512_224_x86_test(void); +extern const struct ltc_hash_descriptor sha512_224_x86_desc; +#endif /* LTC_SHA512_224_X86 */ #endif /* LTC_SHA512_224 */ int shani_is_supported(void); diff --git a/src/headers/tomcrypt_private.h b/src/headers/tomcrypt_private.h index 39c56454d..1344b423f 100644 --- a/src/headers/tomcrypt_private.h +++ b/src/headers/tomcrypt_private.h @@ -194,6 +194,18 @@ int sha224_test_desc(const struct ltc_hash_descriptor *desc, const char *name); #ifdef LTC_SHA256 int sha256_test_desc(const struct ltc_hash_descriptor *desc, const char *name); #endif +#ifdef LTC_SHA512 +int sha512_test_desc(const struct ltc_hash_descriptor *desc, const char *name); +#endif +#ifdef LTC_SHA384 +int sha384_test_desc(const struct ltc_hash_descriptor *desc, const char *name); +#endif +#ifdef LTC_SHA512_224 +int sha512_224_test_desc(const struct ltc_hash_descriptor *desc, const char *name); +#endif +#ifdef LTC_SHA512_256 +int sha512_256_test_desc(const struct ltc_hash_descriptor *desc, const char *name); +#endif /* tomcrypt_mac.h */ diff --git a/src/misc/crypt/crypt.c b/src/misc/crypt/crypt.c index 26fe4fbb3..a4e6b88a4 100644 --- a/src/misc/crypt/crypt.c +++ b/src/misc/crypt/crypt.c @@ -514,6 +514,18 @@ const char *crypt_build_settings = #if defined(LTC_SHA256) && defined(LTC_SHA256_X86) " SHA256-NI " #endif +#if defined(LTC_SHA512_X86) + " SHA512-NI " +#endif +#if defined(LTC_SHA384_X86) + " SHA384-NI " +#endif +#if defined(LTC_SHA512_224_X86) + " SHA512_224-NI " +#endif +#if defined(LTC_SHA512_256_X86) + " SHA512_256-NI " +#endif #if defined(LTC_DEVRANDOM) " LTC_DEVRANDOM " #endif diff --git a/src/misc/crypt/crypt_register_all_hashes.c b/src/misc/crypt/crypt_register_all_hashes.c index 91f1cadb1..51588a732 100644 --- a/src/misc/crypt/crypt_register_all_hashes.c +++ b/src/misc/crypt/crypt_register_all_hashes.c @@ -16,6 +16,9 @@ int register_all_hashes(void) { #ifdef LTC_SHA512 + /* `sha512_desc` does the multiplexing into `sha512_x86_desc` resp. `sha512_portable_desc` + * depending on the capabilities of the CPU. + */ REGISTER_HASH(&sha512_desc); #endif #ifdef LTC_SHA256 @@ -31,9 +34,15 @@ int register_all_hashes(void) REGISTER_HASH(&sha3_224_desc); #endif #ifdef LTC_SHA512_256 + /* `sha512_256_desc` does the multiplexing into `sha512_256_x86_desc` resp. `sha512_256_portable_desc` + * depending on the capabilities of the CPU. + */ REGISTER_HASH(&sha512_256_desc); #endif #ifdef LTC_SHA512_224 + /* `sha512_224_desc` does the multiplexing into `sha512_224_x86_desc` resp. `sha512_224_portable_desc` + * depending on the capabilities of the CPU. + */ REGISTER_HASH(&sha512_224_desc); #endif #ifdef LTC_SHA224 @@ -43,6 +52,9 @@ int register_all_hashes(void) REGISTER_HASH(&sha224_desc); #endif #ifdef LTC_SHA384 + /* `sha384_desc` does the multiplexing into `sha384_x86_desc` resp. `sha384_portable_desc` + * depending on the capabilities of the CPU. + */ REGISTER_HASH(&sha384_desc); #endif #ifdef LTC_SHA1 diff --git a/tests/test.c b/tests/test.c index f2f5ed0c5..9c5768539 100644 --- a/tests/test.c +++ b/tests/test.c @@ -228,12 +228,31 @@ static void s_unregister_all(void) unregister_hash(&sha384_desc); #endif #ifdef LTC_SHA512 + /* `register_all_hashes()` does not register + * - `sha512_portable_desc` + * - `sha512_x86_desc` + * so we don't have to unregister them */ + unregister_hash(&sha512_desc); +#endif +#ifdef LTC_SHA384 + /* `register_all_hashes()` does not register + * - `sha384_portable_desc` + * - `sha384_x86_desc` + * so we don't have to unregister them */ unregister_hash(&sha512_desc); #endif #ifdef LTC_SHA512_224 + /* `register_all_hashes()` does not register + * - `sha512_224_portable_desc` + * - `sha512_224_x86_desc` + * so we don't have to unregister them */ unregister_hash(&sha512_224_desc); #endif #ifdef LTC_SHA512_256 + /* `register_all_hashes()` does not register + * - `sha512_256_portable_desc` + * - `sha512_256_x86_desc` + * so we don't have to unregister them */ unregister_hash(&sha512_256_desc); #endif #ifdef LTC_SHA3 From 9cfd41ae92a4c7d69beef382df4157fedf017dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Fri, 17 Apr 2026 23:00:00 +0200 Subject: [PATCH 02/10] GCC >= 14 clang >= 17 MSVS >= 2022 --- src/headers/tomcrypt_cfg.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/headers/tomcrypt_cfg.h b/src/headers/tomcrypt_cfg.h index 604733a8e..e771e4127 100644 --- a/src/headers/tomcrypt_cfg.h +++ b/src/headers/tomcrypt_cfg.h @@ -333,17 +333,22 @@ typedef unsigned long ltc_mp_digit; #if !defined(LTC_NO_SHA256_X86) #define LTC_SHA256_X86 #endif - #if !defined(LTC_NO_SHA512_X86) - #define LTC_SHA512_X86 - #endif - #if !defined(LTC_NO_SHA384_X86) - #define LTC_SHA384_X86 - #endif - #if !defined(LTC_NO_SHA512_256_X86) - #define LTC_SHA512_256_X86 - #endif - #if !defined(LTC_NO_SHA512_224_X86) - #define LTC_SHA512_224_X86 + #if \ + (defined __GNUC__ && defined __GNUC_MINOR__ && (__GNUC__) >= 14) || \ + (defined __clang__ && defined __clang_major__ && defined __clang_minor__ & (__clang_major__) >= 17) || \ + (defined _MSC_VER && defined _MSC_FULL_VER && (_MSC_VER) >= 1930) + #if !defined(LTC_NO_SHA512_X86) + #define LTC_SHA512_X86 + #endif + #if !defined(LTC_NO_SHA384_X86) + #define LTC_SHA384_X86 + #endif + #if !defined(LTC_NO_SHA512_256_X86) + #define LTC_SHA512_256_X86 + #endif + #if !defined(LTC_NO_SHA512_224_X86) + #define LTC_SHA512_224_X86 + #endif #endif #endif From e1057cbcece3e71486dff3ff3065adb73606f6c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Fri, 17 Apr 2026 23:10:00 +0200 Subject: [PATCH 03/10] LTC_S_X86_CPUIDEX --- src/hashes/sha2/sha384_desc.c | 8 +++++++- src/hashes/sha2/sha512_224_desc.c | 8 +++++++- src/hashes/sha2/sha512_256_desc.c | 8 +++++++- src/hashes/sha2/sha512_desc.c | 8 +++++++- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/hashes/sha2/sha384_desc.c b/src/hashes/sha2/sha384_desc.c index 207b5cfbe..8add43ce9 100644 --- a/src/hashes/sha2/sha384_desc.c +++ b/src/hashes/sha2/sha384_desc.c @@ -53,6 +53,12 @@ static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) regs[3] = d; #endif } +#endif /* LTC_S_X86_CPUID */ +#if !defined (LTC_S_X86_CPUIDEX) +#define LTC_S_X86_CPUIDEX +#if defined _MSC_VER +#include +#endif static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) { #if defined _MSC_VER @@ -73,7 +79,7 @@ static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) regs[3] = d; #endif } -#endif /* LTC_S_X86_CPUID */ +#endif /* LTC_S_X86_CPUIDEX */ static LTC_INLINE int s_sha384_x86_is_supported(void) { diff --git a/src/hashes/sha2/sha512_224_desc.c b/src/hashes/sha2/sha512_224_desc.c index 1ae7aecd1..e18e51a4e 100644 --- a/src/hashes/sha2/sha512_224_desc.c +++ b/src/hashes/sha2/sha512_224_desc.c @@ -53,6 +53,12 @@ static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) regs[3] = d; #endif } +#endif /* LTC_S_X86_CPUID */ +#if !defined (LTC_S_X86_CPUIDEX) +#define LTC_S_X86_CPUIDEX +#if defined _MSC_VER +#include +#endif static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) { #if defined _MSC_VER @@ -73,7 +79,7 @@ static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) regs[3] = d; #endif } -#endif /* LTC_S_X86_CPUID */ +#endif /* LTC_S_X86_CPUIDEX */ static LTC_INLINE int s_sha512_224_x86_is_supported(void) { diff --git a/src/hashes/sha2/sha512_256_desc.c b/src/hashes/sha2/sha512_256_desc.c index 31cc136c5..0e38f537b 100644 --- a/src/hashes/sha2/sha512_256_desc.c +++ b/src/hashes/sha2/sha512_256_desc.c @@ -53,6 +53,12 @@ static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) regs[3] = d; #endif } +#endif /* LTC_S_X86_CPUID */ +#if !defined (LTC_S_X86_CPUIDEX) +#define LTC_S_X86_CPUIDEX +#if defined _MSC_VER +#include +#endif static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) { #if defined _MSC_VER @@ -73,7 +79,7 @@ static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) regs[3] = d; #endif } -#endif /* LTC_S_X86_CPUID */ +#endif /* LTC_S_X86_CPUIDEX */ static LTC_INLINE int s_sha512_256_x86_is_supported(void) { diff --git a/src/hashes/sha2/sha512_desc.c b/src/hashes/sha2/sha512_desc.c index 920fe5abd..a30c55c8d 100644 --- a/src/hashes/sha2/sha512_desc.c +++ b/src/hashes/sha2/sha512_desc.c @@ -53,6 +53,12 @@ static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) regs[3] = d; #endif } +#endif /* LTC_S_X86_CPUID */ +#if !defined (LTC_S_X86_CPUIDEX) +#define LTC_S_X86_CPUIDEX +#if defined _MSC_VER +#include +#endif static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) { #if defined _MSC_VER @@ -73,7 +79,7 @@ static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) regs[3] = d; #endif } -#endif /* LTC_S_X86_CPUID */ +#endif /* LTC_S_X86_CPUIDEX */ static LTC_INLINE int s_sha512_x86_is_supported(void) { From 9a0c6f82fe40b64ab1b6d85a89e5ec483f5bd299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Fri, 17 Apr 2026 23:15:00 +0200 Subject: [PATCH 04/10] SHA-512 constants table. --- src/hashes/sha2/sha512.c | 4 ++-- src/hashes/sha2/sha512_x86.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hashes/sha2/sha512.c b/src/hashes/sha2/sha512.c index 7fac48a15..8d3d46326 100644 --- a/src/hashes/sha2/sha512.c +++ b/src/hashes/sha2/sha512.c @@ -28,8 +28,8 @@ const struct ltc_hash_descriptor sha512_portable_desc = }; /* the K array */ -#define K sha512_K -static const ulong64 K[80] = { +#define K sha512_k +static const ulong64 sha512_k[80] LTC_ALIGN(32) = { CONST64(0x428a2f98d728ae22), CONST64(0x7137449123ef65cd), CONST64(0xb5c0fbcfec4d3b2f), CONST64(0xe9b5dba58189dbbc), CONST64(0x3956c25bf348b538), CONST64(0x59f111f1b605d019), diff --git a/src/hashes/sha2/sha512_x86.c b/src/hashes/sha2/sha512_x86.c index d778ca8db..d2c27d179 100644 --- a/src/hashes/sha2/sha512_x86.c +++ b/src/hashes/sha2/sha512_x86.c @@ -44,8 +44,8 @@ const struct ltc_hash_descriptor sha512_x86_desc = }; /* the K array */ -#define K sha512_K -static const ulong64 K[80] LTC_ALIGN(32) = { +#define K sha512_x86_k +static const ulong64 sha512_x86_k[80] LTC_ALIGN(32) = { CONST64(0x428a2f98d728ae22), CONST64(0x7137449123ef65cd), CONST64(0xb5c0fbcfec4d3b2f), CONST64(0xe9b5dba58189dbbc), CONST64(0x3956c25bf348b538), CONST64(0x59f111f1b605d019), From 8a896f5316ea2126117f38ad78428f93786aa568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Sat, 18 Apr 2026 00:25:00 +0200 Subject: [PATCH 05/10] Bring back unaligned reads. We cannot force clients to align their buffers. --- src/hashes/sha2/sha256_x86.c | 12 +++++------- src/hashes/sha2/sha512_x86.c | 12 +++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/hashes/sha2/sha256_x86.c b/src/hashes/sha2/sha256_x86.c index e2da51a59..43a23c913 100644 --- a/src/hashes/sha2/sha256_x86.c +++ b/src/hashes/sha2/sha256_x86.c @@ -18,7 +18,7 @@ #elif defined(_MSC_VER) #include #endif -#include /* SSE2 _mm_load_si128 _mm_store_si128 _mm_set_epi64x _mm_add_epi32 _mm_shuffle_epi32 */ +#include /* SSE2 _mm_load_si128 _mm_loadu_si128 _mm_store_si128 _mm_set_epi64x _mm_add_epi32 _mm_shuffle_epi32 */ #include /* SSSE3 _mm_alignr_epi8 _mm_shuffle_epi8 */ #include /* SSE4.1 _mm_blend_epi16 */ #include /* SHA _mm_sha256msg1_epu32 _mm_sha256msg2_epu32 _mm_sha256rnds2_epu32 */ @@ -89,8 +89,6 @@ static int LTC_SHA_TARGET s_sha256_x86_compress(hash_state * md, const unsigned __m128i msg_2; __m128i msg_3; - LTC_ARGCHK(((ltc_uintptr)(buf)) % 16 == 0); - reverse = _mm_set_epi64x(0x0c0d0e0f08090a0bull, 0x0405060700010203ull); state_0 = _mm_load_si128(((__m128i const*)(&md->sha256.state[0]))); state_1 = _mm_load_si128(((__m128i const*)(&md->sha256.state[4]))); @@ -101,28 +99,28 @@ static int LTC_SHA_TARGET s_sha256_x86_compress(hash_state * md, const unsigned old_0 = state_0; old_1 = state_1; - msg_0 = _mm_load_si128(((__m128i const*)(&buf[0 * 16]))); + msg_0 = _mm_loadu_si128(((__m128i const*)(&buf[0 * 16]))); msg_0 = _mm_shuffle_epi8(msg_0, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[0 * 4]))); msg = _mm_add_epi32(msg_0, tmp); state_1 = _mm_sha256rnds2_epu32(state_1, state_0, msg); msg = _mm_shuffle_epi32(msg, k_shuffle_epi32(k_any, k_any, 0x3, 0x2)); state_0 = _mm_sha256rnds2_epu32(state_0, state_1, msg); - msg_1 = _mm_load_si128(((__m128i const*)(&buf[1 * 16]))); + msg_1 = _mm_loadu_si128(((__m128i const*)(&buf[1 * 16]))); msg_1 = _mm_shuffle_epi8(msg_1, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[1 * 4]))); msg = _mm_add_epi32(msg_1, tmp); state_1 = _mm_sha256rnds2_epu32(state_1, state_0, msg); msg = _mm_shuffle_epi32(msg, k_shuffle_epi32(k_any, k_any, 0x3, 0x2)); state_0 = _mm_sha256rnds2_epu32(state_0, state_1, msg); - msg_2 = _mm_load_si128(((__m128i const*)(&buf[2 * 16]))); + msg_2 = _mm_loadu_si128(((__m128i const*)(&buf[2 * 16]))); msg_2 = _mm_shuffle_epi8(msg_2, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[2 * 4]))); msg = _mm_add_epi32(msg_2, tmp); state_1 = _mm_sha256rnds2_epu32(state_1, state_0, msg); msg = _mm_shuffle_epi32(msg, k_shuffle_epi32(k_any, k_any, 0x3, 0x2)); state_0 = _mm_sha256rnds2_epu32(state_0, state_1, msg); - msg_3 = _mm_load_si128(((__m128i const*)(&buf[3 * 16]))); + msg_3 = _mm_loadu_si128(((__m128i const*)(&buf[3 * 16]))); msg_3 = _mm_shuffle_epi8(msg_3, reverse); tmp = _mm_load_si128(((__m128i const*)(&K[3 * 4]))); msg = _mm_add_epi32(msg_3, tmp); diff --git a/src/hashes/sha2/sha512_x86.c b/src/hashes/sha2/sha512_x86.c index d2c27d179..f315c5991 100644 --- a/src/hashes/sha2/sha512_x86.c +++ b/src/hashes/sha2/sha512_x86.c @@ -18,7 +18,7 @@ #include #endif #include /* SSE2 _mm_set_epi64x */ -#include /* AVX _mm256_castsi128_si256 _mm256_castsi256_si128 _mm256_load_si256 _mm256_store_si256 */ +#include /* AVX _mm256_castsi128_si256 _mm256_castsi256_si128 _mm256_load_si256 _mm256_loadu_si256 _mm256_store_si256 */ #include /* AVX2 _mm256_add_epi64 _mm256_blend_epi32 _mm256_permute4x64_epi64 _mm256_shuffle_epi8 */ #include /* SHA512 _mm256_sha512msg1_epi64 _mm256_sha512msg2_epi64 _mm256_sha512rnds2_epi64 */ #if defined(__GNUC__) @@ -115,8 +115,6 @@ static int LTC_SHA512_TARGET s_sha512_x86_compress(hash_state * md, const unsign __m256i msg_c; __m256i msg_d; - LTC_ARGCHK(((ltc_uintptr)(buf)) % 32 == 0); - reverse = _mm256_permute4x64_epi64(_mm256_castsi128_si256(_mm_set_epi64x(0x08090a0b0c0d0e0full, 0x0001020304050607ull)), ltc_permute_epi64_k(0x1, 0x0, 0x1, 0x0)); state_a = _mm256_load_si256(((__m256i const*)(&md->sha512.state[0]))); state_b = _mm256_load_si256(((__m256i const*)(&md->sha512.state[4]))); @@ -130,28 +128,28 @@ static int LTC_SHA512_TARGET s_sha512_x86_compress(hash_state * md, const unsign old_a = state_a; old_b = state_b; tmp_a = _mm256_load_si256(((__m256i const*)(&K[0 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); - msg_a = _mm256_load_si256(((__m256i const*)(&buf[0 * (256 / CHAR_BIT)]))); + msg_a = _mm256_loadu_si256(((__m256i const*)(&buf[0 * (256 / CHAR_BIT)]))); msg_a = _mm256_shuffle_epi8(msg_a, reverse); tmp_a = _mm256_add_epi64(tmp_a, msg_a); state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); tmp_a = _mm256_load_si256(((__m256i const*)(&K[1 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); - msg_b = _mm256_load_si256(((__m256i const*)(&buf[1 * (256 / CHAR_BIT)]))); + msg_b = _mm256_loadu_si256(((__m256i const*)(&buf[1 * (256 / CHAR_BIT)]))); msg_b = _mm256_shuffle_epi8(msg_b, reverse); tmp_a = _mm256_add_epi64(tmp_a, msg_b); state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); tmp_a = _mm256_load_si256(((__m256i const*)(&K[2 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); - msg_c = _mm256_load_si256(((__m256i const*)(&buf[2 * (256 / CHAR_BIT)]))); + msg_c = _mm256_loadu_si256(((__m256i const*)(&buf[2 * (256 / CHAR_BIT)]))); msg_c = _mm256_shuffle_epi8(msg_c, reverse); tmp_a = _mm256_add_epi64(tmp_a, msg_c); state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); tmp_a = _mm256_permute4x64_epi64(tmp_a, ltc_permute_epi64_k(any, any, 0x3, 0x2)); state_b = _mm256_sha512rnds2_epi64(state_b, state_a, _mm256_castsi256_si128(tmp_a)); tmp_a = _mm256_load_si256(((__m256i const*)(&K[3 * (256 / (sizeof(ulong64) * CHAR_BIT))]))); - msg_d = _mm256_load_si256(((__m256i const*)(&buf[3 * (256 / CHAR_BIT)]))); + msg_d = _mm256_loadu_si256(((__m256i const*)(&buf[3 * (256 / CHAR_BIT)]))); msg_d = _mm256_shuffle_epi8(msg_d, reverse); tmp_a = _mm256_add_epi64(tmp_a, msg_d); state_a = _mm256_sha512rnds2_epi64(state_a, state_b, _mm256_castsi256_si128(tmp_a)); From 67e88113d3bdb0b48870d6869c4179a8580af0b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Thu, 23 Apr 2026 08:10:00 +0200 Subject: [PATCH 06/10] LTC_UNUSED_PARAM --- src/hashes/sha2/sha224_desc.c | 4 ++-- src/hashes/sha2/sha256_desc.c | 4 ++-- src/hashes/sha2/sha384_desc.c | 2 ++ src/hashes/sha2/sha512_224_desc.c | 2 ++ src/hashes/sha2/sha512_256_desc.c | 2 ++ src/hashes/sha2/sha512_desc.c | 2 ++ 6 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/hashes/sha2/sha224_desc.c b/src/hashes/sha2/sha224_desc.c index ccd1c23e1..4907d33f0 100644 --- a/src/hashes/sha2/sha224_desc.c +++ b/src/hashes/sha2/sha224_desc.c @@ -124,8 +124,8 @@ int sha224_test(void) int sha224_test_desc(const struct ltc_hash_descriptor *desc, const char *name) { #ifndef LTC_TEST - (void)desc; - (void)name; + LTC_UNUSED_PARAM(desc); + LTC_UNUSED_PARAM(name); return CRYPT_NOP; #else static const struct { diff --git a/src/hashes/sha2/sha256_desc.c b/src/hashes/sha2/sha256_desc.c index 29410bb39..d08f67232 100644 --- a/src/hashes/sha2/sha256_desc.c +++ b/src/hashes/sha2/sha256_desc.c @@ -144,8 +144,8 @@ int sha256_test(void) int sha256_test_desc(const struct ltc_hash_descriptor *desc, const char *name) { #ifndef LTC_TEST - (void)desc; - (void)name; + LTC_UNUSED_PARAM(desc); + LTC_UNUSED_PARAM(name); return CRYPT_NOP; #else static const struct { diff --git a/src/hashes/sha2/sha384_desc.c b/src/hashes/sha2/sha384_desc.c index 8add43ce9..5c9a2e2eb 100644 --- a/src/hashes/sha2/sha384_desc.c +++ b/src/hashes/sha2/sha384_desc.c @@ -154,6 +154,8 @@ int sha384_test(void) int sha384_test_desc(const struct ltc_hash_descriptor *desc, const char *name) { #ifndef LTC_TEST + LTC_UNUSED_PARAM(desc); + LTC_UNUSED_PARAM(name); return CRYPT_NOP; #else static const struct { diff --git a/src/hashes/sha2/sha512_224_desc.c b/src/hashes/sha2/sha512_224_desc.c index e18e51a4e..072c9cad0 100644 --- a/src/hashes/sha2/sha512_224_desc.c +++ b/src/hashes/sha2/sha512_224_desc.c @@ -154,6 +154,8 @@ int sha512_224_test(void) int sha512_224_test_desc(const struct ltc_hash_descriptor *desc, const char *name) { #ifndef LTC_TEST + LTC_UNUSED_PARAM(desc); + LTC_UNUSED_PARAM(name); return CRYPT_NOP; #else static const struct { diff --git a/src/hashes/sha2/sha512_256_desc.c b/src/hashes/sha2/sha512_256_desc.c index 0e38f537b..6ff4748b9 100644 --- a/src/hashes/sha2/sha512_256_desc.c +++ b/src/hashes/sha2/sha512_256_desc.c @@ -154,6 +154,8 @@ int sha512_256_test(void) int sha512_256_test_desc(const struct ltc_hash_descriptor *desc, const char *name) { #ifndef LTC_TEST + LTC_UNUSED_PARAM(desc); + LTC_UNUSED_PARAM(name); return CRYPT_NOP; #else static const struct { diff --git a/src/hashes/sha2/sha512_desc.c b/src/hashes/sha2/sha512_desc.c index a30c55c8d..c5cb47f60 100644 --- a/src/hashes/sha2/sha512_desc.c +++ b/src/hashes/sha2/sha512_desc.c @@ -171,6 +171,8 @@ int sha512_test(void) int sha512_test_desc(const struct ltc_hash_descriptor *desc, const char *name) { #ifndef LTC_TEST + LTC_UNUSED_PARAM(desc); + LTC_UNUSED_PARAM(name); return CRYPT_NOP; #else static const struct { From 772d9168bb36f0d8e18583d16bc7634bb8dfe884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Thu, 23 Apr 2026 09:01:00 +0200 Subject: [PATCH 07/10] Fix from code review. --- src/hashes/sha2/sha512_224_desc.c | 4 ++-- src/hashes/sha2/sha512_256_desc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hashes/sha2/sha512_224_desc.c b/src/hashes/sha2/sha512_224_desc.c index 072c9cad0..d91bbd288 100644 --- a/src/hashes/sha2/sha512_224_desc.c +++ b/src/hashes/sha2/sha512_224_desc.c @@ -27,7 +27,7 @@ const struct ltc_hash_descriptor sha512_224_desc = NULL }; -#if defined LTC_SHA224_X86 +#if defined LTC_SHA512_224_X86 #if !defined (LTC_S_X86_CPUID) #define LTC_S_X86_CPUID @@ -109,7 +109,7 @@ static LTC_INLINE int s_sha512_224_x86_is_supported(void) } return is_supported; } -#endif /* LTC_SHA224_X86 */ +#endif /* LTC_SHA512_224_X86 */ /** Initialize the hash state diff --git a/src/hashes/sha2/sha512_256_desc.c b/src/hashes/sha2/sha512_256_desc.c index 6ff4748b9..34ee40439 100644 --- a/src/hashes/sha2/sha512_256_desc.c +++ b/src/hashes/sha2/sha512_256_desc.c @@ -27,7 +27,7 @@ const struct ltc_hash_descriptor sha512_256_desc = NULL }; -#if defined LTC_SHA256_X86 +#if defined LTC_SHA512_256_X86 #if !defined (LTC_S_X86_CPUID) #define LTC_S_X86_CPUID @@ -109,7 +109,7 @@ static LTC_INLINE int s_sha512_256_x86_is_supported(void) } return is_supported; } -#endif /* LTC_SHA256_X86 */ +#endif /* LTC_SHA512_256_X86 */ /** Initialize the hash state From 3542e52fd0bed2ca57d2dbe9695af836bff04443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Thu, 23 Apr 2026 13:00:00 +0200 Subject: [PATCH 08/10] Code review, alignment. --- src/hashes/sha2/sha512.c | 3 ++- src/hashes/sha2/sha512_x86.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hashes/sha2/sha512.c b/src/hashes/sha2/sha512.c index 8d3d46326..3f54a1cbe 100644 --- a/src/hashes/sha2/sha512.c +++ b/src/hashes/sha2/sha512.c @@ -29,7 +29,8 @@ const struct ltc_hash_descriptor sha512_portable_desc = /* the K array */ #define K sha512_k -static const ulong64 sha512_k[80] LTC_ALIGN(32) = { +LTC_ALIGN_MSVC(32) +static const ulong64 K[80] LTC_ALIGN(32) = { CONST64(0x428a2f98d728ae22), CONST64(0x7137449123ef65cd), CONST64(0xb5c0fbcfec4d3b2f), CONST64(0xe9b5dba58189dbbc), CONST64(0x3956c25bf348b538), CONST64(0x59f111f1b605d019), diff --git a/src/hashes/sha2/sha512_x86.c b/src/hashes/sha2/sha512_x86.c index f315c5991..ceb36c2f5 100644 --- a/src/hashes/sha2/sha512_x86.c +++ b/src/hashes/sha2/sha512_x86.c @@ -45,7 +45,8 @@ const struct ltc_hash_descriptor sha512_x86_desc = /* the K array */ #define K sha512_x86_k -static const ulong64 sha512_x86_k[80] LTC_ALIGN(32) = { +LTC_ALIGN_MSVC(32) +static const ulong64 K[80] LTC_ALIGN(32) = { CONST64(0x428a2f98d728ae22), CONST64(0x7137449123ef65cd), CONST64(0xb5c0fbcfec4d3b2f), CONST64(0xe9b5dba58189dbbc), CONST64(0x3956c25bf348b538), CONST64(0x59f111f1b605d019), From c7bf5041ec951ac4013f87a2c6e655af0125db00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Thu, 23 Apr 2026 13:15:00 +0200 Subject: [PATCH 09/10] post-rebase-fix --- src/headers/tomcrypt_cfg.h | 2 -- src/headers/tomcrypt_private.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/headers/tomcrypt_cfg.h b/src/headers/tomcrypt_cfg.h index e771e4127..b1052e584 100644 --- a/src/headers/tomcrypt_cfg.h +++ b/src/headers/tomcrypt_cfg.h @@ -413,8 +413,6 @@ typedef unsigned long ltc_mp_digit; #else # define LTC_ATTRIBUTE(x) #endif -#define LTC_SHA512_TARGET __attribute__((__target__("sse2,avx,avx2,sha512"))) -#define LTC_SHA512_TARGET #if !defined(LTC_NO_GCM_PMULL) && (defined(__aarch64__) || defined(_M_ARM64)) #define LTC_GCM_PMULL diff --git a/src/headers/tomcrypt_private.h b/src/headers/tomcrypt_private.h index 1344b423f..d8068a5e2 100644 --- a/src/headers/tomcrypt_private.h +++ b/src/headers/tomcrypt_private.h @@ -184,6 +184,7 @@ int func_name (hash_state * md, const unsigned char *in, unsigned long inlen) #define LTC_SHA_TARGET LTC_ATTRIBUTE((__target__("sse2,ssse3,sse4.1,sha"))) +#define LTC_SHA512_TARGET LTC_ATTRIBUTE((__target__("sse2,avx,avx2,sha512"))) #ifdef LTC_SHA1 int sha1_test_desc(const struct ltc_hash_descriptor *desc, const char *name); From 425f5160f5f5fdd518093fa2efa0d3165ef5f096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Kn=C3=A1pek?= Date: Thu, 23 Apr 2026 16:25:00 +0200 Subject: [PATCH 10/10] CPUID unification --- src/ciphers/aes/aes_desc.c | 23 -------------- src/encauth/gcm/gcm_gf_mult.c | 23 -------------- src/hashes/sha1_desc.c | 23 -------------- src/hashes/sha2/sha224_desc.c | 26 ---------------- src/hashes/sha2/sha256_desc.c | 26 ---------------- src/hashes/sha2/sha384_desc.c | 52 ------------------------------- src/hashes/sha2/sha512_224_desc.c | 52 ------------------------------- src/hashes/sha2/sha512_256_desc.c | 52 ------------------------------- src/hashes/sha2/sha512_desc.c | 52 ------------------------------- src/headers/tomcrypt_private.h | 47 ++++++++++++++++++++++++++++ 10 files changed, 47 insertions(+), 329 deletions(-) diff --git a/src/ciphers/aes/aes_desc.c b/src/ciphers/aes/aes_desc.c index c03e1bb36..b803ca2b5 100644 --- a/src/ciphers/aes/aes_desc.c +++ b/src/ciphers/aes/aes_desc.c @@ -11,29 +11,6 @@ #if defined(LTC_ARCH_X86) && (defined(LTC_AES_NI) || !defined(ENCRYPT_ONLY)) -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ - static LTC_INLINE int s_aesni_is_supported(void) { static int initialized = 0, is_supported = 0; diff --git a/src/encauth/gcm/gcm_gf_mult.c b/src/encauth/gcm/gcm_gf_mult.c index c9f968ecb..873cc5ea3 100644 --- a/src/encauth/gcm/gcm_gf_mult.c +++ b/src/encauth/gcm/gcm_gf_mult.c @@ -29,29 +29,6 @@ #pragma GCC diagnostic pop #endif -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ - static LTC_INLINE int s_pclmul_is_supported(void) { static int initialized = 0, is_supported = 0; diff --git a/src/hashes/sha1_desc.c b/src/hashes/sha1_desc.c index bc6a7de34..11cb09c26 100644 --- a/src/hashes/sha1_desc.c +++ b/src/hashes/sha1_desc.c @@ -24,29 +24,6 @@ const struct ltc_hash_descriptor sha1_desc = #if defined LTC_SHA1_X86 -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ - static LTC_INLINE int s_sha1_x86_is_supported(void) { static int initialized = 0; diff --git a/src/hashes/sha2/sha224_desc.c b/src/hashes/sha2/sha224_desc.c index 4907d33f0..98012d7e1 100644 --- a/src/hashes/sha2/sha224_desc.c +++ b/src/hashes/sha2/sha224_desc.c @@ -29,32 +29,6 @@ const struct ltc_hash_descriptor sha224_desc = #if defined LTC_SHA224_X86 -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ - static LTC_INLINE int s_sha224_x86_is_supported(void) { static int initialized = 0; diff --git a/src/hashes/sha2/sha256_desc.c b/src/hashes/sha2/sha256_desc.c index d08f67232..89712edca 100644 --- a/src/hashes/sha2/sha256_desc.c +++ b/src/hashes/sha2/sha256_desc.c @@ -4,32 +4,6 @@ #if defined LTC_ARCH_X86 -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ - static LTC_INLINE int s_sha256_x86_is_supported(void) { static int initialized = 0; diff --git a/src/hashes/sha2/sha384_desc.c b/src/hashes/sha2/sha384_desc.c index 5c9a2e2eb..51ef203bf 100644 --- a/src/hashes/sha2/sha384_desc.c +++ b/src/hashes/sha2/sha384_desc.c @@ -29,58 +29,6 @@ const struct ltc_hash_descriptor sha384_desc = #if defined LTC_SHA384_X86 -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ -#if !defined (LTC_S_X86_CPUIDEX) -#define LTC_S_X86_CPUIDEX -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) -{ -#if defined _MSC_VER - __cpuidex(regs, eax, ecx); -#else - int a, b, c, d; - - a = eax; - c = ecx; - b = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"0"(a), "2"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUIDEX */ - static LTC_INLINE int s_sha384_x86_is_supported(void) { static int initialized = 0; diff --git a/src/hashes/sha2/sha512_224_desc.c b/src/hashes/sha2/sha512_224_desc.c index d91bbd288..1ce52aced 100644 --- a/src/hashes/sha2/sha512_224_desc.c +++ b/src/hashes/sha2/sha512_224_desc.c @@ -29,58 +29,6 @@ const struct ltc_hash_descriptor sha512_224_desc = #if defined LTC_SHA512_224_X86 -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ -#if !defined (LTC_S_X86_CPUIDEX) -#define LTC_S_X86_CPUIDEX -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) -{ -#if defined _MSC_VER - __cpuidex(regs, eax, ecx); -#else - int a, b, c, d; - - a = eax; - c = ecx; - b = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"0"(a), "2"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUIDEX */ - static LTC_INLINE int s_sha512_224_x86_is_supported(void) { static int initialized = 0; diff --git a/src/hashes/sha2/sha512_256_desc.c b/src/hashes/sha2/sha512_256_desc.c index 34ee40439..db62dc9b9 100644 --- a/src/hashes/sha2/sha512_256_desc.c +++ b/src/hashes/sha2/sha512_256_desc.c @@ -29,58 +29,6 @@ const struct ltc_hash_descriptor sha512_256_desc = #if defined LTC_SHA512_256_X86 -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ -#if !defined (LTC_S_X86_CPUIDEX) -#define LTC_S_X86_CPUIDEX -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) -{ -#if defined _MSC_VER - __cpuidex(regs, eax, ecx); -#else - int a, b, c, d; - - a = eax; - c = ecx; - b = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"0"(a), "2"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUIDEX */ - static LTC_INLINE int s_sha512_256_x86_is_supported(void) { static int initialized = 0; diff --git a/src/hashes/sha2/sha512_desc.c b/src/hashes/sha2/sha512_desc.c index c5cb47f60..b98f71010 100644 --- a/src/hashes/sha2/sha512_desc.c +++ b/src/hashes/sha2/sha512_desc.c @@ -29,58 +29,6 @@ const struct ltc_hash_descriptor sha512_desc = #if defined LTC_SHA512_X86 -#if !defined (LTC_S_X86_CPUID) -#define LTC_S_X86_CPUID -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) -{ -#if defined _MSC_VER - __cpuid(regs, leaf); -#else - int a, b, c, d; - - a = leaf; - b = c = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"a"(a), "c"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUID */ -#if !defined (LTC_S_X86_CPUIDEX) -#define LTC_S_X86_CPUIDEX -#if defined _MSC_VER -#include -#endif -static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) -{ -#if defined _MSC_VER - __cpuidex(regs, eax, ecx); -#else - int a, b, c, d; - - a = eax; - c = ecx; - b = d = 0; - asm volatile ("cpuid" - :"=a"(a), "=b"(b), "=c"(c), "=d"(d) - :"0"(a), "2"(c) - ); - regs[0] = a; - regs[1] = b; - regs[2] = c; - regs[3] = d; -#endif -} -#endif /* LTC_S_X86_CPUIDEX */ - static LTC_INLINE int s_sha512_x86_is_supported(void) { static int initialized = 0; diff --git a/src/headers/tomcrypt_private.h b/src/headers/tomcrypt_private.h index d8068a5e2..f79db376f 100644 --- a/src/headers/tomcrypt_private.h +++ b/src/headers/tomcrypt_private.h @@ -958,4 +958,51 @@ int which ## _export(unsigned char *out, unsigned long *outlen, prng_state *prng #endif #endif +#if defined LTC_ARCH_X86 +#if defined _MSC_VER +#include +#pragma intrinsic(__cpuid) +#pragma intrinsic(__cpuidex) +#endif +static LTC_INLINE void s_x86_cpuid(int* regs, int leaf) +{ +#if defined _MSC_VER + __cpuid(regs, leaf); +#else + int a, b, c, d; + + a = leaf; + b = c = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"a"(a), "c"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +static LTC_INLINE void s_x86_cpuidex(int* regs, int eax, int ecx) +{ +#if defined _MSC_VER + __cpuidex(regs, eax, ecx); +#else + int a, b, c, d; + + a = eax; + c = ecx; + b = d = 0; + asm volatile ("cpuid" + :"=a"(a), "=b"(b), "=c"(c), "=d"(d) + :"a"(a), "c"(c) + ); + regs[0] = a; + regs[1] = b; + regs[2] = c; + regs[3] = d; +#endif +} +#endif /* LTC_ARCH_X86 */ + #endif /* TOMCRYPT_PRIVATE_H_ */