Changeset View
Changeset View
Standalone View
Standalone View
cipher/rijndael.c
Context not available. | |||||
size_t nblocks, int encrypt); | size_t nblocks, int encrypt); | ||||
#endif /*USE_ARM_ASM*/ | #endif /*USE_ARM_ASM*/ | ||||
#ifdef USE_PPC_ASM | #ifdef USE_PPC_ASM | ||||
/* POWER 8 AES extensions */ | /* POWER 8 AES extensions */ | ||||
extern void aes_p8_encrypt (const unsigned char *in, | #include <altivec.h> | ||||
unsigned char *out, | |||||
const RIJNDAEL_context *ctx); | typedef vector unsigned char block; | ||||
vector unsigned char backwards = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; | |||||
#ifdef __LITTLE_ENDIAN__ | |||||
#define swap_if_le(a) \ | |||||
vec_perm(a, a, backwards) | |||||
#elif __BIG_ENDIAN__ | |||||
#define swap_if_le(a) (a) | |||||
#else | |||||
#error "What endianness?" | |||||
#endif | |||||
/* Passes in AltiVec registers (big-endian) | |||||
* sadly compilers don't know how to unroll outer loops into | |||||
* inner loops with more registers on static functions, | |||||
* so that this can be properly optimized for OOO multi-issue | |||||
* without having to hand-unroll. | |||||
*/ | |||||
static block _gcry_aes_ppc8_encrypt_altivec (const RIJNDAEL_context *ctx, | |||||
block a) { | |||||
int r; | |||||
uintptr_t zero = 0; | |||||
int rounds = ctx->rounds; | |||||
block *rk = (block*)ctx->keyschenc; | |||||
//hexDump("sa", &a, sizeof(a)); | |||||
a = rk[0] ^ a; | |||||
//hexDump("sa", &a, sizeof(a)); | |||||
for (r = 1;r < rounds;r++) { | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (a) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("sa", &a, sizeof(a)); | |||||
} | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (a) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("end", &a, sizeof(a)); | |||||
return a; | |||||
} | |||||
static block _gcry_aes_ppc8_decrypt_altivec (const RIJNDAEL_context *ctx, | |||||
block a) { | |||||
int r; | |||||
uintptr_t zero = 0; | |||||
int rounds = ctx->rounds; | |||||
block *rk = (block*)ctx->keyschdec; | |||||
//hexDump("sa", &a, sizeof(a)); | |||||
a = rk[0] ^ a; | |||||
//hexDump("sa", &a, sizeof(a)); | |||||
for (r = 1;r < rounds;r++) { | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (a) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("sa", &a, sizeof(a)); | |||||
} | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (a) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("end", &a, sizeof(a)); | |||||
return a; | |||||
} | |||||
static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, | static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, | ||||
unsigned char *out, | unsigned char *b, | ||||
const unsigned char *in) | const unsigned char *a) { | ||||
{ | uintptr_t zero = 0; | ||||
/* When I tried to switch these registers in the assembly it broke. */ | block sa; | ||||
aes_p8_encrypt (in, out, ctx); | //hexDump("key", rk_c, 16 * 15); | ||||
if ((uintptr_t)a % 16 == 0) { | |||||
sa = vec_ld(0, a); | |||||
} else { | |||||
block unalignedprev, unalignedcur; | |||||
unalignedprev = vec_ld(0, a); | |||||
unalignedcur = vec_ld(16, a); | |||||
sa = vec_perm(unalignedprev, unalignedcur, vec_lvsl(0, a)); | |||||
} | |||||
sa = swap_if_le(sa); | |||||
sa = _gcry_aes_ppc8_encrypt_altivec(ctx, sa); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
: | |||||
: "wa" (sa), "r" (zero), "r" ((uintptr_t)b)); | |||||
return 0; /* does not use stack */ | return 0; /* does not use stack */ | ||||
} | } | ||||
/* this is the decryption key part of context */ | |||||
extern void aes_p8_decrypt (const unsigned char *in, | |||||
unsigned char *out, | |||||
const void *sboxes); | |||||
static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, | static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, | ||||
unsigned char *out, | unsigned char *b, | ||||
const unsigned char *in) | const unsigned char *a) | ||||
{ | { | ||||
aes_p8_decrypt (in, out, &ctx->u2); | int r; | ||||
uintptr_t zero = 0; | |||||
int rounds = ctx->rounds; | |||||
block sa, unalignedprev, unalignedcur; | |||||
block *rk = (block*)ctx->keyschdec; | |||||
//hexDump("key", rk, 16 * 15); | |||||
if ((uintptr_t)a % 16 == 0) { | |||||
sa = vec_ld(0, a); | |||||
} else { | |||||
unalignedprev = vec_ld(0, a); | |||||
unalignedcur = vec_ld(16, a); | |||||
sa = vec_perm(unalignedprev, unalignedcur, vec_lvsl(0, a)); | |||||
} | |||||
sa = swap_if_le(sa); | |||||
sa = _gcry_aes_ppc8_decrypt_altivec(ctx, sa); | |||||
//hexDump("sa", &sa, sizeof(sa)); | |||||
if ((uintptr_t)b % 16 == 0) | |||||
vec_vsx_st(swap_if_le(sa), 0, b); | |||||
else { | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
: | |||||
: "wa" (sa), "r" (zero), "r" ((uintptr_t)b)); | |||||
} | |||||
return 0; /* does not use stack */ | return 0; /* does not use stack */ | ||||
} | } | ||||
size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, | |||||
const void *inbuf_arg, size_t nblocks, | |||||
int encrypt) { | |||||
RIJNDAEL_context *ctx = (void *)&c->context.c; | |||||
unsigned char *outbuf = outbuf_arg; | |||||
const unsigned char *inbuf = inbuf_arg; | |||||
block *in = (block*)inbuf; | |||||
block *out = (block*)outbuf; | |||||
uintptr_t zero = 0; | |||||
int r; | |||||
int rounds = ctx->rounds; | |||||
int burn_depth = 0; | |||||
if (encrypt) | |||||
{ | |||||
const int unroll = 8; | |||||
block unalignedprev, ctr, iv; | |||||
if (((uintptr_t)inbuf % 16) != 0) { | |||||
unalignedprev = vec_ld(0, in++); | |||||
} | |||||
iv = vec_ld(0, (block*)&c->u_iv.iv); | |||||
ctr = vec_ld(0, (block*)&c->u_ctr.ctr); | |||||
//hexDump("ctr", &ctr, 16); | |||||
//hexDump("key", &ctx->u1, sizeof(ctx->u1)); | |||||
for ( ;nblocks >= unroll; nblocks -= unroll) | |||||
{ | |||||
u64 i = c->u_mode.ocb.data_nblocks + 1; | |||||
block l0, l1, l2, l3, l4, l5, l6, l7; | |||||
block b0, b1, b2, b3, b4, b5, b6, b7; | |||||
block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; | |||||
const block *rk = (block*)&ctx->keyschenc; | |||||
int j; | |||||
c->u_mode.ocb.data_nblocks += unroll; | |||||
//hexDump("iv", &iv, 16); | |||||
iv0 = iv; | |||||
if ((uintptr_t)inbuf % 16 == 0) | |||||
{ | |||||
b0 = vec_ld(0, in++); | |||||
//hexDump("start", &b0, 16); | |||||
b1 = vec_ld(0, in++); | |||||
b2 = vec_ld(0, in++); | |||||
b3 = vec_ld(0, in++); | |||||
b4 = vec_ld(0, in++); | |||||
b5 = vec_ld(0, in++); | |||||
b6 = vec_ld(0, in++); | |||||
b7 = vec_ld(0, in++); | |||||
} | |||||
else | |||||
{ | |||||
block unaligned0, unaligned1, unaligned2, | |||||
unaligned3, unaligned4, unaligned5, unaligned6; | |||||
unaligned0 = vec_ld(0, in++); | |||||
unaligned1 = vec_ld(0, in++); | |||||
unaligned2 = vec_ld(0, in++); | |||||
unaligned3 = vec_ld(0, in++); | |||||
unaligned4 = vec_ld(0, in++); | |||||
unaligned5 = vec_ld(0, in++); | |||||
unaligned6 = vec_ld(0, in++); | |||||
b0 = vec_perm(unalignedprev, unaligned0, vec_lvsl(0, inbuf)); | |||||
//hexDump("start", &b0, 16); | |||||
unalignedprev = vec_ld(0, in++); | |||||
b1 = vec_perm(unaligned0, unaligned1, vec_lvsl(0, inbuf)); | |||||
b2 = vec_perm(unaligned1, unaligned2, vec_lvsl(0, inbuf)); | |||||
b3 = vec_perm(unaligned2, unaligned3, vec_lvsl(0, inbuf)); | |||||
b4 = vec_perm(unaligned3, unaligned4, vec_lvsl(0, inbuf)); | |||||
b5 = vec_perm(unaligned4, unaligned5, vec_lvsl(0, inbuf)); | |||||
b6 = vec_perm(unaligned5, unaligned6, vec_lvsl(0, inbuf)); | |||||
b7 = vec_perm(unaligned6, unalignedprev, vec_lvsl(0, inbuf)); | |||||
} | |||||
//hexDump("i", &i, sizeof(i)); | |||||
l0 = *(block*)ocb_get_l(c, i++); | |||||
//hexDump("l", &l0, 16); | |||||
l1 = *(block*)ocb_get_l(c, i++); | |||||
l2 = *(block*)ocb_get_l(c, i++); | |||||
l3 = *(block*)ocb_get_l(c, i++); | |||||
l4 = *(block*)ocb_get_l(c, i++); | |||||
l5 = *(block*)ocb_get_l(c, i++); | |||||
l6 = *(block*)ocb_get_l(c, i++); | |||||
l7 = *(block*)ocb_get_l(c, i++); | |||||
ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; | |||||
iv0 ^= l0; | |||||
//hexDump("xorl", &iv0, 16); | |||||
b0 ^= iv0; | |||||
//hexDump("xor", &b0, 16); | |||||
iv1 = iv0 ^ l1; | |||||
b1 ^= iv1; | |||||
iv2 = iv1 ^ l2; | |||||
b2 ^= iv2; | |||||
iv3 = iv2 ^ l3; | |||||
b3 ^= iv3; | |||||
iv4 = iv3 ^ l4; | |||||
b4 ^= iv4; | |||||
iv5 = iv4 ^ l5; | |||||
b5 ^= iv5; | |||||
iv6 = iv5 ^ l6; | |||||
b6 ^= iv6; | |||||
iv7 = iv6 ^ l7; | |||||
b7 ^= iv7; | |||||
b0 = swap_if_le(b0); | |||||
//hexDump("swap", &b0, 16); | |||||
b1 = swap_if_le(b1); | |||||
b2 = swap_if_le(b2); | |||||
b3 = swap_if_le(b3); | |||||
b4 = swap_if_le(b4); | |||||
b5 = swap_if_le(b5); | |||||
b6 = swap_if_le(b6); | |||||
b7 = swap_if_le(b7); | |||||
b0 ^= rk[0]; | |||||
//hexDump("xor ??", &b0, 16); | |||||
b1 ^= rk[0]; | |||||
b2 ^= rk[0]; | |||||
b3 ^= rk[0]; | |||||
b4 ^= rk[0]; | |||||
b5 ^= rk[0]; | |||||
b6 ^= rk[0]; | |||||
b7 ^= rk[0]; | |||||
for (r = 1;r < rounds;r++) | |||||
{ | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b0) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("round", &b0, 16); | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b1) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b2) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b3) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b4) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b5) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b6) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
:"+v" (b7) | |||||
:"v" (rk[r]) | |||||
); | |||||
} | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b0) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b1) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b2) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b3) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b4) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b5) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b6) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
:"+v" (b7) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("end", &b0, 16); | |||||
iv = iv7; | |||||
//hexDump("end-iv5", &b0, 16); | |||||
// The unaligned store stxvb16x writes big-endian, | |||||
// so in the unaligned case we swap the iv instead of the bytes | |||||
if ((uintptr_t)outbuf % 16 == 0) | |||||
{ | |||||
vec_vsx_st(swap_if_le(b0) ^ iv0, 0, out++); | |||||
//hexDump("out", out - 1, 16); | |||||
vec_vsx_st(swap_if_le(b1) ^ iv1, 0, out++); | |||||
vec_vsx_st(swap_if_le(b2) ^ iv2, 0, out++); | |||||
vec_vsx_st(swap_if_le(b3) ^ iv3, 0, out++); | |||||
vec_vsx_st(swap_if_le(b4) ^ iv4, 0, out++); | |||||
vec_vsx_st(swap_if_le(b5) ^ iv5, 0, out++); | |||||
vec_vsx_st(swap_if_le(b6) ^ iv6, 0, out++); | |||||
vec_vsx_st(swap_if_le(b7) ^ iv7, 0, out++); | |||||
} | |||||
else | |||||
{ | |||||
b0 ^= swap_if_le(iv0); | |||||
b1 ^= swap_if_le(iv1); | |||||
b2 ^= swap_if_le(iv2); | |||||
b3 ^= swap_if_le(iv3); | |||||
b4 ^= swap_if_le(iv4); | |||||
b5 ^= swap_if_le(iv5); | |||||
b6 ^= swap_if_le(iv6); | |||||
b7 ^= swap_if_le(iv7); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
//hexDump("out-un", out - 1, 16); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
} | |||||
} | |||||
for ( ;nblocks; nblocks-- ) | |||||
{ | |||||
block b; | |||||
u64 i = ++c->u_mode.ocb.data_nblocks; | |||||
const block l = *(block*)ocb_get_l(c, i); | |||||
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ | |||||
iv ^= l; | |||||
if ((uintptr_t)in % 16 == 0) { | |||||
b = vec_ld(0, in++); | |||||
} else { | |||||
block unalignedprevprev; | |||||
unalignedprevprev = unalignedprev; | |||||
unalignedprev = vec_ld(0, in++); | |||||
b = vec_perm(unalignedprevprev, unalignedprev, vec_lvsl(0, inbuf)); | |||||
} | |||||
//hexDump("start", &b, 16); | |||||
/* Checksum_i = Checksum_{i-1} xor P_i */ | |||||
ctr ^= b; | |||||
//hexDump("ctr", &ctr, 16); | |||||
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ | |||||
b ^= iv; | |||||
//hexDump("xoriv", &b, 16); | |||||
b = swap_if_le(b); | |||||
b = _gcry_aes_ppc8_encrypt_altivec (ctx, b); | |||||
//hexDump("crypt", &b, 16); | |||||
if ((uintptr_t)out % 16 == 0) | |||||
vec_vsx_st(swap_if_le(b) ^ iv, 0, out++); | |||||
else { | |||||
b ^= swap_if_le(iv); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
: | |||||
: "wa" (b), "r" (zero), "r" ((uintptr_t)out++)); | |||||
} | |||||
//hexDump("out", out - 1, 16); | |||||
} | |||||
// We want to store iv and ctr big-endian and the unaligned | |||||
// store stxvb16x stores them little endian, so we have to swap them. | |||||
iv = swap_if_le(iv); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv)); | |||||
ctr = swap_if_le(ctr); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr)); | |||||
} | |||||
else | |||||
{ | |||||
const int unroll = 8; | |||||
block unalignedprev, ctr, iv; | |||||
if (((uintptr_t)inbuf % 16) != 0) { | |||||
unalignedprev = vec_ld(0, in++); | |||||
} | |||||
iv = vec_ld(0, (block*)&c->u_iv.iv); | |||||
ctr = vec_ld(0, (block*)&c->u_ctr.ctr); | |||||
//hexDump("ctr", &ctr, 16); | |||||
//hexDump("key", &ctx->u1, sizeof(ctx->u1)); | |||||
for ( ;nblocks >= unroll; nblocks -= unroll) | |||||
{ | |||||
u64 i = c->u_mode.ocb.data_nblocks + 1; | |||||
block l0, l1, l2, l3, l4, l5, l6, l7; | |||||
block b0, b1, b2, b3, b4, b5, b6, b7; | |||||
block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; | |||||
const block *rk = (block*)&ctx->keyschdec; | |||||
int j; | |||||
c->u_mode.ocb.data_nblocks += unroll; | |||||
//hexDump("iv", &iv, 16); | |||||
iv0 = iv; | |||||
if ((uintptr_t)inbuf % 16 == 0) | |||||
{ | |||||
b0 = vec_ld(0, in++); | |||||
//hexDump("start", &b0, 16); | |||||
b1 = vec_ld(0, in++); | |||||
b2 = vec_ld(0, in++); | |||||
b3 = vec_ld(0, in++); | |||||
b4 = vec_ld(0, in++); | |||||
b5 = vec_ld(0, in++); | |||||
b6 = vec_ld(0, in++); | |||||
b7 = vec_ld(0, in++); | |||||
} | |||||
else | |||||
{ | |||||
block unaligned0, unaligned1, unaligned2, | |||||
unaligned3, unaligned4, unaligned5, unaligned6; | |||||
unaligned0 = vec_ld(0, in++); | |||||
unaligned1 = vec_ld(0, in++); | |||||
unaligned2 = vec_ld(0, in++); | |||||
unaligned3 = vec_ld(0, in++); | |||||
unaligned4 = vec_ld(0, in++); | |||||
unaligned5 = vec_ld(0, in++); | |||||
unaligned6 = vec_ld(0, in++); | |||||
b0 = vec_perm(unalignedprev, unaligned0, vec_lvsl(0, inbuf)); | |||||
//hexDump("start", &b0, 16); | |||||
unalignedprev = vec_ld(0, in++); | |||||
b1 = vec_perm(unaligned0, unaligned1, vec_lvsl(0, inbuf)); | |||||
b2 = vec_perm(unaligned1, unaligned2, vec_lvsl(0, inbuf)); | |||||
b3 = vec_perm(unaligned2, unaligned3, vec_lvsl(0, inbuf)); | |||||
b4 = vec_perm(unaligned3, unaligned4, vec_lvsl(0, inbuf)); | |||||
b5 = vec_perm(unaligned4, unaligned5, vec_lvsl(0, inbuf)); | |||||
b6 = vec_perm(unaligned5, unaligned6, vec_lvsl(0, inbuf)); | |||||
b7 = vec_perm(unaligned6, unalignedprev, vec_lvsl(0, inbuf)); | |||||
} | |||||
//hexDump("i", &i, sizeof(i)); | |||||
l0 = *(block*)ocb_get_l(c, i++); | |||||
//hexDump("l", &l0, 16); | |||||
l1 = *(block*)ocb_get_l(c, i++); | |||||
l2 = *(block*)ocb_get_l(c, i++); | |||||
l3 = *(block*)ocb_get_l(c, i++); | |||||
l4 = *(block*)ocb_get_l(c, i++); | |||||
l5 = *(block*)ocb_get_l(c, i++); | |||||
l6 = *(block*)ocb_get_l(c, i++); | |||||
l7 = *(block*)ocb_get_l(c, i++); | |||||
iv0 ^= l0; | |||||
//hexDump("xorl", &iv0, 16); | |||||
b0 ^= iv0; | |||||
//hexDump("xor", &b0, 16); | |||||
iv1 = iv0 ^ l1; | |||||
b1 ^= iv1; | |||||
iv2 = iv1 ^ l2; | |||||
b2 ^= iv2; | |||||
iv3 = iv2 ^ l3; | |||||
b3 ^= iv3; | |||||
iv4 = iv3 ^ l4; | |||||
b4 ^= iv4; | |||||
iv5 = iv4 ^ l5; | |||||
b5 ^= iv5; | |||||
iv6 = iv5 ^ l6; | |||||
b6 ^= iv6; | |||||
iv7 = iv6 ^ l7; | |||||
b7 ^= iv7; | |||||
b0 = swap_if_le(b0); | |||||
//hexDump("swap", &b0, 16); | |||||
b1 = swap_if_le(b1); | |||||
b2 = swap_if_le(b2); | |||||
b3 = swap_if_le(b3); | |||||
b4 = swap_if_le(b4); | |||||
b5 = swap_if_le(b5); | |||||
b6 = swap_if_le(b6); | |||||
b7 = swap_if_le(b7); | |||||
b0 ^= rk[0]; | |||||
//hexDump("xor ??", &b0, 16); | |||||
b1 ^= rk[0]; | |||||
b2 ^= rk[0]; | |||||
b3 ^= rk[0]; | |||||
b4 ^= rk[0]; | |||||
b5 ^= rk[0]; | |||||
b6 ^= rk[0]; | |||||
b7 ^= rk[0]; | |||||
for (r = 1;r < rounds;r++) | |||||
{ | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b0) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("round", &b0, 16); | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b1) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b2) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b3) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b4) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b5) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b6) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
:"+v" (b7) | |||||
:"v" (rk[r]) | |||||
); | |||||
} | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b0) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b1) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b2) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b3) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b4) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b5) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b6) | |||||
:"v" (rk[r]) | |||||
); | |||||
__asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
:"+v" (b7) | |||||
:"v" (rk[r]) | |||||
); | |||||
//hexDump("end", &b0, 16); | |||||
iv = iv7; | |||||
//hexDump("end-iv5", &b0, 16); | |||||
b0 = swap_if_le(b0) ^ iv0; | |||||
b1 = swap_if_le(b1) ^ iv1; | |||||
b2 = swap_if_le(b2) ^ iv2; | |||||
b3 = swap_if_le(b3) ^ iv3; | |||||
b4 = swap_if_le(b4) ^ iv4; | |||||
b5 = swap_if_le(b5) ^ iv5; | |||||
b6 = swap_if_le(b6) ^ iv6; | |||||
b7 = swap_if_le(b7) ^ iv7; | |||||
ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; | |||||
// The unaligned store stxvb16x writes big-endian | |||||
if ((uintptr_t)outbuf % 16 == 0) | |||||
{ | |||||
vec_vsx_st(b0, 0, out++); | |||||
vec_vsx_st(b1, 0, out++); | |||||
vec_vsx_st(b2, 0, out++); | |||||
vec_vsx_st(b3, 0, out++); | |||||
vec_vsx_st(b4, 0, out++); | |||||
vec_vsx_st(b5, 0, out++); | |||||
vec_vsx_st(b6, 0, out++); | |||||
vec_vsx_st(b7, 0, out++); | |||||
} | |||||
else | |||||
{ | |||||
b0 = swap_if_le(b0); | |||||
b1 = swap_if_le(b1); | |||||
b2 = swap_if_le(b2); | |||||
b3 = swap_if_le(b3); | |||||
b4 = swap_if_le(b4); | |||||
b5 = swap_if_le(b5); | |||||
b6 = swap_if_le(b6); | |||||
b7 = swap_if_le(b7); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
//hexDump("out-un", out - 1, 16); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
__asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
} | |||||
} | |||||
for ( ;nblocks; nblocks-- ) | |||||
{ | |||||
block b; | |||||
u64 i = ++c->u_mode.ocb.data_nblocks; | |||||
const block l = *(block*)ocb_get_l(c, i); | |||||
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ | |||||
iv ^= l; | |||||
if ((uintptr_t)in % 16 == 0) { | |||||
b = vec_ld(0, in++); | |||||
} else { | |||||
block unalignedprevprev; | |||||
unalignedprevprev = unalignedprev; | |||||
unalignedprev = vec_ld(0, in++); | |||||
b = vec_perm(unalignedprevprev, unalignedprev, vec_lvsl(0, inbuf)); | |||||
} | |||||
//hexDump("start", &b, 16); | |||||
/* Checksum_i = Checksum_{i-1} xor P_i */ | |||||
//hexDump("ctr", &ctr, 16); | |||||
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ | |||||
b ^= iv; | |||||
//hexDump("xoriv", &b, 16); | |||||
b = swap_if_le(b); | |||||
b = _gcry_aes_ppc8_decrypt_altivec (ctx, b); | |||||
//hexDump("crypt", &b, 16); | |||||
b = swap_if_le(b) ^ iv; | |||||
ctr ^= b; | |||||
if ((uintptr_t)out % 16 == 0) | |||||
vec_vsx_st(b, 0, out++); | |||||
else { | |||||
b = swap_if_le(b); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
: | |||||
: "wa" (b), "r" (zero), "r" ((uintptr_t)out++)); | |||||
} | |||||
//hexDump("out", out - 1, 16); | |||||
} | |||||
// We want to store iv and ctr big-endian and the unaligned | |||||
// store stxvb16x stores them little endian, so we have to swap them. | |||||
iv = swap_if_le(iv); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv)); | |||||
ctr = swap_if_le(ctr); | |||||
__asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
:: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr)); | |||||
} | |||||
return 0; | |||||
} | |||||
extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits, | extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits, | ||||
RIJNDAEL_context *key); | RIJNDAEL_context *key); | ||||
extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits, | extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits, | ||||
/* this is the decryption key part of context */ | /* this is the decryption key part of context */ | ||||
const unsigned (*)[15][4]); | const unsigned (*)[15][4]); | ||||
Context not available. | |||||
const unsigned char *inbuf = inbuf_arg; | const unsigned char *inbuf = inbuf_arg; | ||||
unsigned char *outbuf = outbuf_arg; | unsigned char *outbuf = outbuf_arg; | ||||
const RIJNDAEL_context *ctx = context; | const RIJNDAEL_context *ctx = context; | ||||
const uint64_t two32 = 1ULL << 32; | const uint64_t two32 = 1ULL << 32; | ||||
int overflow; | int overflow; | ||||
u64 s[2], e[2]; | u64 s[2]; | ||||
s[0] = buf_get_be64(ctr + 8); | s[0] = buf_get_be64(ctr + 8); | ||||
overflow = two32 - (s[0] % two32) < nblocks; | overflow = two32 - (s[0] % two32) < nblocks; | ||||
#ifdef __builtin_expect | #ifdef __builtin_expect | ||||
__builtin_expect(overflow, 0); | __builtin_expect(overflow, 0); | ||||
#endif | #endif | ||||
Context not available. | |||||
if (hd) { | if (hd) { | ||||
hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec; | hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec; | ||||
hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc; | hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc; | ||||
hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt; | hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt; | ||||
hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc; | hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc; | ||||
hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt; | |||||
} | } | ||||
} | } | ||||
#endif | #endif | ||||
else | else | ||||
{ | { | ||||
Context not available. | |||||
else if (ctx->use_arm_ce) | else if (ctx->use_arm_ce) | ||||
{ | { | ||||
return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); | return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); | ||||
} | } | ||||
#endif /*USE_ARM_CE*/ | #endif /*USE_ARM_CE*/ | ||||
#ifdef USE_PPC_ASM | |||||
else if (ctx->use_ppc_asm) | |||||
{ | |||||
return _gcry_aes_ppc8_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); | |||||
} | |||||
#endif /*USE_PPC_ASM*/ | |||||
else if (encrypt) | else if (encrypt) | ||||
{ | { | ||||
union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; | union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; | ||||
rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; | rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; | ||||
Context not available. |