summaryrefslogtreecommitdiffstats
path: root/cryptopp562/panama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cryptopp562/panama.cpp')
-rw-r--r--cryptopp562/panama.cpp507
1 files changed, 0 insertions, 507 deletions
diff --git a/cryptopp562/panama.cpp b/cryptopp562/panama.cpp
deleted file mode 100644
index 5fc4f94..0000000
--- a/cryptopp562/panama.cpp
+++ /dev/null
@@ -1,507 +0,0 @@
-// panama.cpp - written and placed in the public domain by Wei Dai
-
-// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM panama.cpp" to generate MASM code
-
-#include "pch.h"
-
-#ifndef CRYPTOPP_GENERATE_X64_MASM
-
-#include "panama.h"
-#include "misc.h"
-#include "cpu.h"
-
-NAMESPACE_BEGIN(CryptoPP)
-
-template <class B>
-void Panama<B>::Reset()
-{
- memset(m_state, 0, m_state.SizeInBytes());
-#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
- m_state[17] = HasSSSE3();
-#endif
-}
-
-#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
-
-#ifdef CRYPTOPP_X64_MASM_AVAILABLE
-extern "C" {
-void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y);
-}
-#elif CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
- Panama_SSE2_Pull PROC FRAME
- rex_push_reg rdi
- alloc_stack(2*16)
- save_xmm128 xmm6, 0h
- save_xmm128 xmm7, 10h
- .endprolog
-#else
-#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
-void CRYPTOPP_NOINLINE Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
-{
-#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
- asm __volatile__
- (
- ".intel_syntax noprefix;"
- AS_PUSH_IF86( bx)
-#else
- AS2( mov AS_REG_1, count)
- AS2( mov AS_REG_2, state)
- AS2( mov AS_REG_3, z)
- AS2( mov AS_REG_4, y)
-#endif
-#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
-
-#if CRYPTOPP_BOOL_X86
- #define REG_loopEnd [esp]
-#elif defined(CRYPTOPP_GENERATE_X64_MASM)
- #define REG_loopEnd rdi
-#else
- #define REG_loopEnd r8
-#endif
-
- AS2( shl AS_REG_1, 5)
- ASJ( jz, 5, f)
- AS2( mov AS_REG_6d, [AS_REG_2+4*17])
- AS2( add AS_REG_1, AS_REG_6)
-
- #if CRYPTOPP_BOOL_X64
- AS2( mov REG_loopEnd, AS_REG_1)
- #else
- AS1( push ebp)
- AS1( push AS_REG_1)
- #endif
-
- AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
- AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
- AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
- AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
- AS2( mov eax, dword ptr [AS_REG_2+4*16])
-
- ASL(4)
- // gamma and pi
-#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
- AS2( test AS_REG_6, 1)
- ASJ( jnz, 6, f)
-#endif
- AS2( movdqa xmm6, xmm2)
- AS2( movss xmm6, xmm3)
- ASS( pshufd xmm5, xmm6, 0, 3, 2, 1)
- AS2( movd xmm6, eax)
- AS2( movdqa xmm7, xmm3)
- AS2( movss xmm7, xmm6)
- ASS( pshufd xmm6, xmm7, 0, 3, 2, 1)
-#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
- ASJ( jmp, 7, f)
- ASL(6)
- AS2( movdqa xmm5, xmm3)
- AS3( palignr xmm5, xmm2, 4)
- AS2( movd xmm6, eax)
- AS3( palignr xmm6, xmm3, 4)
- ASL(7)
-#endif
-
- AS2( movd AS_REG_1d, xmm2)
- AS1( not AS_REG_1d)
- AS2( movd AS_REG_7d, xmm3)
- AS2( or AS_REG_1d, AS_REG_7d)
- AS2( xor eax, AS_REG_1d)
-
-#define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
-
-#define pi(i) \
- AS2( movd AS_REG_1d, xmm7)\
- AS2( rol AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
- AS2( mov [AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
-
-#define pi4(x, y, z, a, b, c, d) \
- AS2( pcmpeqb xmm7, xmm7)\
- AS2( pxor xmm7, x)\
- AS2( por xmm7, y)\
- AS2( pxor xmm7, z)\
- pi(a)\
- ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
- pi(b)\
- AS2( punpckhqdq xmm7, xmm7)\
- pi(c)\
- ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
- pi(d)
-
- pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
- pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
- pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
- pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
-
- // output keystream and update buffer here to hide partial memory stalls between pi and theta
- AS2( movdqa xmm4, xmm3)
- AS2( punpcklqdq xmm3, xmm2) // 1 5 2 6
- AS2( punpckhdq xmm4, xmm2) // 9 10 13 14
- AS2( movdqa xmm2, xmm1)
- AS2( punpcklqdq xmm1, xmm0) // 3 7 4 8
- AS2( punpckhdq xmm2, xmm0) // 11 12 15 16
-
- // keystream
- AS2( test AS_REG_3, AS_REG_3)
- ASJ( jz, 0, f)
- AS2( movdqa xmm6, xmm4)
- AS2( punpcklqdq xmm4, xmm2)
- AS2( punpckhqdq xmm6, xmm2)
- AS2( test AS_REG_4, 15)
- ASJ( jnz, 2, f)
- AS2( test AS_REG_4, AS_REG_4)
- ASJ( jz, 1, f)
- AS2( pxor xmm4, [AS_REG_4])
- AS2( pxor xmm6, [AS_REG_4+16])
- AS2( add AS_REG_4, 32)
- ASJ( jmp, 1, f)
- ASL(2)
- AS2( movdqu xmm0, [AS_REG_4])
- AS2( movdqu xmm2, [AS_REG_4+16])
- AS2( pxor xmm4, xmm0)
- AS2( pxor xmm6, xmm2)
- AS2( add AS_REG_4, 32)
- ASL(1)
- AS2( test AS_REG_3, 15)
- ASJ( jnz, 3, f)
- AS2( movdqa XMMWORD_PTR [AS_REG_3], xmm4)
- AS2( movdqa XMMWORD_PTR [AS_REG_3+16], xmm6)
- AS2( add AS_REG_3, 32)
- ASJ( jmp, 0, f)
- ASL(3)
- AS2( movdqu XMMWORD_PTR [AS_REG_3], xmm4)
- AS2( movdqu XMMWORD_PTR [AS_REG_3+16], xmm6)
- AS2( add AS_REG_3, 32)
- ASL(0)
-
- // buffer update
- AS2( lea AS_REG_1, [AS_REG_6 + 32])
- AS2( and AS_REG_1, 31*32)
- AS2( lea AS_REG_7, [AS_REG_6 + (32-24)*32])
- AS2( and AS_REG_7, 31*32)
-
- AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8])
- AS2( pxor xmm3, xmm0)
- ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
- AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3)
- AS2( pxor xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8])
- AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0)
-
- AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8])
- AS2( pxor xmm1, xmm4)
- AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1)
- AS2( pxor xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8])
- AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4)
-
- // theta
- AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
- AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
- AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
- AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
-
-#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
- AS2( test AS_REG_6, 1)
- ASJ( jnz, 8, f)
-#endif
- AS2( movd xmm6, eax)
- AS2( movdqa xmm7, xmm3)
- AS2( movss xmm7, xmm6)
- AS2( movdqa xmm6, xmm2)
- AS2( movss xmm6, xmm3)
- AS2( movdqa xmm5, xmm1)
- AS2( movss xmm5, xmm2)
- AS2( movdqa xmm4, xmm0)
- AS2( movss xmm4, xmm1)
- ASS( pshufd xmm7, xmm7, 0, 3, 2, 1)
- ASS( pshufd xmm6, xmm6, 0, 3, 2, 1)
- ASS( pshufd xmm5, xmm5, 0, 3, 2, 1)
- ASS( pshufd xmm4, xmm4, 0, 3, 2, 1)
-#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
- ASJ( jmp, 9, f)
- ASL(8)
- AS2( movd xmm7, eax)
- AS3( palignr xmm7, xmm3, 4)
- AS2( movq xmm6, xmm3)
- AS3( palignr xmm6, xmm2, 4)
- AS2( movq xmm5, xmm2)
- AS3( palignr xmm5, xmm1, 4)
- AS2( movq xmm4, xmm1)
- AS3( palignr xmm4, xmm0, 4)
- ASL(9)
-#endif
-
- AS2( xor eax, 1)
- AS2( movd AS_REG_1d, xmm0)
- AS2( xor eax, AS_REG_1d)
- AS2( movd AS_REG_1d, xmm3)
- AS2( xor eax, AS_REG_1d)
-
- AS2( pxor xmm3, xmm2)
- AS2( pxor xmm2, xmm1)
- AS2( pxor xmm1, xmm0)
- AS2( pxor xmm0, xmm7)
- AS2( pxor xmm3, xmm7)
- AS2( pxor xmm2, xmm6)
- AS2( pxor xmm1, xmm5)
- AS2( pxor xmm0, xmm4)
-
- // sigma
- AS2( lea AS_REG_1, [AS_REG_6 + (32-4)*32])
- AS2( and AS_REG_1, 31*32)
- AS2( lea AS_REG_7, [AS_REG_6 + 16*32])
- AS2( and AS_REG_7, 31*32)
-
- AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16])
- AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16])
- AS2( movdqa xmm6, xmm4)
- AS2( punpcklqdq xmm4, xmm5)
- AS2( punpckhqdq xmm6, xmm5)
- AS2( pxor xmm3, xmm4)
- AS2( pxor xmm2, xmm6)
-
- AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16])
- AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16])
- AS2( movdqa xmm6, xmm4)
- AS2( punpcklqdq xmm4, xmm5)
- AS2( punpckhqdq xmm6, xmm5)
- AS2( pxor xmm1, xmm4)
- AS2( pxor xmm0, xmm6)
-
- // loop
- AS2( add AS_REG_6, 32)
- AS2( cmp AS_REG_6, REG_loopEnd)
- ASJ( jne, 4, b)
-
- // save state
- AS2( mov [AS_REG_2+4*16], eax)
- AS2( movdqa XMMWORD_PTR [AS_REG_2+3*16], xmm3)
- AS2( movdqa XMMWORD_PTR [AS_REG_2+2*16], xmm2)
- AS2( movdqa XMMWORD_PTR [AS_REG_2+1*16], xmm1)
- AS2( movdqa XMMWORD_PTR [AS_REG_2+0*16], xmm0)
-
- #if CRYPTOPP_BOOL_X86
- AS2( add esp, 4)
- AS1( pop ebp)
- #endif
- ASL(5)
-
-#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
- AS_POP_IF86( bx)
- ".att_syntax prefix;"
- :
- #if CRYPTOPP_BOOL_X64
- : "D" (count), "S" (state), "d" (z), "c" (y)
- : "%r8", "%r9", "r10", "%eax", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
- #else
- : "c" (count), "d" (state), "S" (z), "D" (y)
- : "%eax", "memory", "cc"
- #endif
- );
-#endif
-#ifdef CRYPTOPP_GENERATE_X64_MASM
- movdqa xmm6, [rsp + 0h]
- movdqa xmm7, [rsp + 10h]
- add rsp, 2*16
- pop rdi
- ret
- Panama_SSE2_Pull ENDP
-#else
-}
-#endif
-#endif // #ifdef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-
-#ifndef CRYPTOPP_GENERATE_X64_MASM
-
-template <class B>
-void Panama<B>::Iterate(size_t count, const word32 *p, byte *output, const byte *input, KeystreamOperation operation)
-{
- word32 bstart = m_state[17];
- word32 *const aPtr = m_state;
- word32 cPtr[17];
-
-#define bPtr ((byte *)(aPtr+20))
-
-// reorder the state for SSE2
-// a and c: 4 8 12 16 | 3 7 11 15 | 2 6 10 14 | 1 5 9 13 | 0
-// xmm0 xmm1 xmm2 xmm3 eax
-#define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17
-#define c(i) cPtr[((i)*13+16) % 17]
-// b: 0 4 | 1 5 | 2 6 | 3 7
-#define b(i, j) b##i[(j)*2%8 + (j)/4]
-
-// buffer update
-#define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
-#define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
-// gamma and pi
-#define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
-// theta and sigma
-#define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
-#define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
-#define TS1L(i) T(i+1, b(4,i))
-#define TS2(i) T(i+9, b(16,i))
-
- while (count--)
- {
- if (output)
- {
-#define PANAMA_OUTPUT(x) \
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, a(0+9));\
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, a(1+9));\
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, a(2+9));\
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a(3+9));\
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 4, a(4+9));\
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 5, a(5+9));\
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 6, a(6+9));\
- CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 7, a(7+9));
-
- typedef word32 WordType;
- CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(PANAMA_OUTPUT, 4*8);
- }
-
- word32 *const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32));
- word32 *const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32));
- bstart += 32;
- word32 *const b0 = (word32 *)(bPtr+((bstart) & 31*32));
- word32 *const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32));
-
- if (p)
- {
- US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
- }
- else
- {
- UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
- }
-
- GP(0);
- GP(1);
- GP(2);
- GP(3);
- GP(4);
- GP(5);
- GP(6);
- GP(7);
- GP(8);
- GP(9);
- GP(10);
- GP(11);
- GP(12);
- GP(13);
- GP(14);
- GP(15);
- GP(16);
-
- T(0,1);
-
- if (p)
- {
- TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
- p += 8;
- }
- else
- {
- TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
- }
-
- TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
- }
- m_state[17] = bstart;
-}
-
-namespace Weak {
-template <class B>
-size_t PanamaHash<B>::HashMultipleBlocks(const word32 *input, size_t length)
-{
- this->Iterate(length / this->BLOCKSIZE, input);
- return length % this->BLOCKSIZE;
-}
-
-template <class B>
-void PanamaHash<B>::TruncatedFinal(byte *hash, size_t size)
-{
- this->ThrowIfInvalidTruncatedSize(size);
-
- this->PadLastBlock(this->BLOCKSIZE, 0x01);
-
- HashEndianCorrectedBlock(this->m_data);
-
- this->Iterate(32); // pull
-
- FixedSizeSecBlock<word32, 8> buf;
- this->Iterate(1, NULL, buf.BytePtr(), NULL);
-
- memcpy(hash, buf, size);
-
- this->Restart(); // reinit for next use
-}
-}
-
-template <class B>
-void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs &params, const byte *key, size_t length)
-{
- assert(length==32);
- memcpy(m_key, key, 32);
-}
-
-template <class B>
-void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
-{
- assert(length==32);
- this->Reset();
- this->Iterate(1, m_key);
- if (iv && IsAligned<word32>(iv))
- this->Iterate(1, (const word32 *)iv);
- else
- {
- FixedSizeSecBlock<word32, 8> buf;
- if (iv)
- memcpy(buf, iv, 32);
- else
- memset(buf, 0, 32);
- this->Iterate(1, buf);
- }
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
- if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2() && !IsP4()) // SSE2 code is slower on P4 Prescott
- Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
- else
-#endif
- this->Iterate(32);
-}
-
-#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
-template <class B>
-unsigned int PanamaCipherPolicy<B>::GetAlignment() const
-{
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
- if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
- return 16;
- else
-#endif
- return 1;
-}
-#endif
-
-template <class B>
-void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
-{
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
- if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
- Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (const word32 *)input);
- else
-#endif
- this->Iterate(iterationCount, NULL, output, input, operation);
-}
-
-template class Panama<BigEndian>;
-template class Panama<LittleEndian>;
-
-template class Weak::PanamaHash<BigEndian>;
-template class Weak::PanamaHash<LittleEndian>;
-
-template class PanamaCipherPolicy<BigEndian>;
-template class PanamaCipherPolicy<LittleEndian>;
-
-NAMESPACE_END
-
-#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM