Changeset View
Changeset View
Standalone View
Standalone View
cipher/rijndael.c
| Context not available. | |||||
| size_t nblocks, int encrypt); | size_t nblocks, int encrypt); | ||||
| #endif /*USE_ARM_ASM*/ | #endif /*USE_ARM_ASM*/ | ||||
| #ifdef USE_PPC_ASM | #ifdef USE_PPC_ASM | ||||
| /* POWER 8 AES extensions */ | /* POWER 8 AES extensions */ | ||||
| extern void aes_p8_encrypt (const unsigned char *in, | #include <altivec.h> | ||||
| unsigned char *out, | |||||
| const RIJNDAEL_context *ctx); | typedef vector unsigned char block; | ||||
| vector unsigned char backwards = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; | |||||
| #ifdef __LITTLE_ENDIAN__ | |||||
| #define swap_if_le(a) \ | |||||
| vec_perm(a, a, backwards) | |||||
| #elif __BIG_ENDIAN__ | |||||
| #define swap_if_le(a) (a) | |||||
| #else | |||||
| #error "What endianness?" | |||||
| #endif | |||||
| /* Passes in AltiVec registers (big-endian) | |||||
| * sadly compilers don't know how to unroll outer loops into | |||||
| * inner loops with more registers on static functions, | |||||
| * so that this can be properly optimized for OOO multi-issue | |||||
| * without having to hand-unroll. | |||||
| */ | |||||
| static block _gcry_aes_ppc8_encrypt_altivec (const RIJNDAEL_context *ctx, | |||||
| block a) { | |||||
| int r; | |||||
| uintptr_t zero = 0; | |||||
| int rounds = ctx->rounds; | |||||
| block *rk = (block*)ctx->keyschenc; | |||||
| //hexDump("sa", &a, sizeof(a)); | |||||
| a = rk[0] ^ a; | |||||
| //hexDump("sa", &a, sizeof(a)); | |||||
| for (r = 1;r < rounds;r++) { | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (a) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("sa", &a, sizeof(a)); | |||||
| } | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (a) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("end", &a, sizeof(a)); | |||||
| return a; | |||||
| } | |||||
| static block _gcry_aes_ppc8_decrypt_altivec (const RIJNDAEL_context *ctx, | |||||
| block a) { | |||||
| int r; | |||||
| uintptr_t zero = 0; | |||||
| int rounds = ctx->rounds; | |||||
| block *rk = (block*)ctx->keyschdec; | |||||
| //hexDump("sa", &a, sizeof(a)); | |||||
| a = rk[0] ^ a; | |||||
| //hexDump("sa", &a, sizeof(a)); | |||||
| for (r = 1;r < rounds;r++) { | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (a) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("sa", &a, sizeof(a)); | |||||
| } | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (a) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("end", &a, sizeof(a)); | |||||
| return a; | |||||
| } | |||||
| static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, | static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, | ||||
| unsigned char *out, | unsigned char *b, | ||||
| const unsigned char *in) | const unsigned char *a) { | ||||
| { | uintptr_t zero = 0; | ||||
| /* When I tried to switch these registers in the assembly it broke. */ | block sa; | ||||
| aes_p8_encrypt (in, out, ctx); | //hexDump("key", rk_c, 16 * 15); | ||||
| if ((uintptr_t)a % 16 == 0) { | |||||
| sa = vec_ld(0, a); | |||||
| } else { | |||||
| block unalignedprev, unalignedcur; | |||||
| unalignedprev = vec_ld(0, a); | |||||
| unalignedcur = vec_ld(16, a); | |||||
| sa = vec_perm(unalignedprev, unalignedcur, vec_lvsl(0, a)); | |||||
| } | |||||
| sa = swap_if_le(sa); | |||||
| sa = _gcry_aes_ppc8_encrypt_altivec(ctx, sa); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| : | |||||
| : "wa" (sa), "r" (zero), "r" ((uintptr_t)b)); | |||||
| return 0; /* does not use stack */ | return 0; /* does not use stack */ | ||||
| } | } | ||||
| /* this is the decryption key part of context */ | |||||
| extern void aes_p8_decrypt (const unsigned char *in, | |||||
| unsigned char *out, | |||||
| const void *sboxes); | |||||
| static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, | static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, | ||||
| unsigned char *out, | unsigned char *b, | ||||
| const unsigned char *in) | const unsigned char *a) | ||||
| { | { | ||||
| aes_p8_decrypt (in, out, &ctx->u2); | int r; | ||||
| uintptr_t zero = 0; | |||||
| int rounds = ctx->rounds; | |||||
| block sa, unalignedprev, unalignedcur; | |||||
| block *rk = (block*)ctx->keyschdec; | |||||
| //hexDump("key", rk, 16 * 15); | |||||
| if ((uintptr_t)a % 16 == 0) { | |||||
| sa = vec_ld(0, a); | |||||
| } else { | |||||
| unalignedprev = vec_ld(0, a); | |||||
| unalignedcur = vec_ld(16, a); | |||||
| sa = vec_perm(unalignedprev, unalignedcur, vec_lvsl(0, a)); | |||||
| } | |||||
| sa = swap_if_le(sa); | |||||
| sa = _gcry_aes_ppc8_decrypt_altivec(ctx, sa); | |||||
| //hexDump("sa", &sa, sizeof(sa)); | |||||
| if ((uintptr_t)b % 16 == 0) | |||||
| vec_vsx_st(swap_if_le(sa), 0, b); | |||||
| else { | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| : | |||||
| : "wa" (sa), "r" (zero), "r" ((uintptr_t)b)); | |||||
| } | |||||
| return 0; /* does not use stack */ | return 0; /* does not use stack */ | ||||
| } | } | ||||
| size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, | |||||
| const void *inbuf_arg, size_t nblocks, | |||||
| int encrypt) { | |||||
| RIJNDAEL_context *ctx = (void *)&c->context.c; | |||||
| unsigned char *outbuf = outbuf_arg; | |||||
| const unsigned char *inbuf = inbuf_arg; | |||||
| block *in = (block*)inbuf; | |||||
| block *out = (block*)outbuf; | |||||
| uintptr_t zero = 0; | |||||
| int r; | |||||
| int rounds = ctx->rounds; | |||||
| int burn_depth = 0; | |||||
| if (encrypt) | |||||
| { | |||||
| const int unroll = 8; | |||||
| block unalignedprev, ctr, iv; | |||||
| if (((uintptr_t)inbuf % 16) != 0) { | |||||
| unalignedprev = vec_ld(0, in++); | |||||
| } | |||||
| iv = vec_ld(0, (block*)&c->u_iv.iv); | |||||
| ctr = vec_ld(0, (block*)&c->u_ctr.ctr); | |||||
| //hexDump("ctr", &ctr, 16); | |||||
| //hexDump("key", &ctx->u1, sizeof(ctx->u1)); | |||||
| for ( ;nblocks >= unroll; nblocks -= unroll) | |||||
| { | |||||
| u64 i = c->u_mode.ocb.data_nblocks + 1; | |||||
| block l0, l1, l2, l3, l4, l5, l6, l7; | |||||
| block b0, b1, b2, b3, b4, b5, b6, b7; | |||||
| block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; | |||||
| const block *rk = (block*)&ctx->keyschenc; | |||||
| int j; | |||||
| c->u_mode.ocb.data_nblocks += unroll; | |||||
| //hexDump("iv", &iv, 16); | |||||
| iv0 = iv; | |||||
| if ((uintptr_t)inbuf % 16 == 0) | |||||
| { | |||||
| b0 = vec_ld(0, in++); | |||||
| //hexDump("start", &b0, 16); | |||||
| b1 = vec_ld(0, in++); | |||||
| b2 = vec_ld(0, in++); | |||||
| b3 = vec_ld(0, in++); | |||||
| b4 = vec_ld(0, in++); | |||||
| b5 = vec_ld(0, in++); | |||||
| b6 = vec_ld(0, in++); | |||||
| b7 = vec_ld(0, in++); | |||||
| } | |||||
| else | |||||
| { | |||||
| block unaligned0, unaligned1, unaligned2, | |||||
| unaligned3, unaligned4, unaligned5, unaligned6; | |||||
| unaligned0 = vec_ld(0, in++); | |||||
| unaligned1 = vec_ld(0, in++); | |||||
| unaligned2 = vec_ld(0, in++); | |||||
| unaligned3 = vec_ld(0, in++); | |||||
| unaligned4 = vec_ld(0, in++); | |||||
| unaligned5 = vec_ld(0, in++); | |||||
| unaligned6 = vec_ld(0, in++); | |||||
| b0 = vec_perm(unalignedprev, unaligned0, vec_lvsl(0, inbuf)); | |||||
| //hexDump("start", &b0, 16); | |||||
| unalignedprev = vec_ld(0, in++); | |||||
| b1 = vec_perm(unaligned0, unaligned1, vec_lvsl(0, inbuf)); | |||||
| b2 = vec_perm(unaligned1, unaligned2, vec_lvsl(0, inbuf)); | |||||
| b3 = vec_perm(unaligned2, unaligned3, vec_lvsl(0, inbuf)); | |||||
| b4 = vec_perm(unaligned3, unaligned4, vec_lvsl(0, inbuf)); | |||||
| b5 = vec_perm(unaligned4, unaligned5, vec_lvsl(0, inbuf)); | |||||
| b6 = vec_perm(unaligned5, unaligned6, vec_lvsl(0, inbuf)); | |||||
| b7 = vec_perm(unaligned6, unalignedprev, vec_lvsl(0, inbuf)); | |||||
| } | |||||
| //hexDump("i", &i, sizeof(i)); | |||||
| l0 = *(block*)ocb_get_l(c, i++); | |||||
| //hexDump("l", &l0, 16); | |||||
| l1 = *(block*)ocb_get_l(c, i++); | |||||
| l2 = *(block*)ocb_get_l(c, i++); | |||||
| l3 = *(block*)ocb_get_l(c, i++); | |||||
| l4 = *(block*)ocb_get_l(c, i++); | |||||
| l5 = *(block*)ocb_get_l(c, i++); | |||||
| l6 = *(block*)ocb_get_l(c, i++); | |||||
| l7 = *(block*)ocb_get_l(c, i++); | |||||
| ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; | |||||
| iv0 ^= l0; | |||||
| //hexDump("xorl", &iv0, 16); | |||||
| b0 ^= iv0; | |||||
| //hexDump("xor", &b0, 16); | |||||
| iv1 = iv0 ^ l1; | |||||
| b1 ^= iv1; | |||||
| iv2 = iv1 ^ l2; | |||||
| b2 ^= iv2; | |||||
| iv3 = iv2 ^ l3; | |||||
| b3 ^= iv3; | |||||
| iv4 = iv3 ^ l4; | |||||
| b4 ^= iv4; | |||||
| iv5 = iv4 ^ l5; | |||||
| b5 ^= iv5; | |||||
| iv6 = iv5 ^ l6; | |||||
| b6 ^= iv6; | |||||
| iv7 = iv6 ^ l7; | |||||
| b7 ^= iv7; | |||||
| b0 = swap_if_le(b0); | |||||
| //hexDump("swap", &b0, 16); | |||||
| b1 = swap_if_le(b1); | |||||
| b2 = swap_if_le(b2); | |||||
| b3 = swap_if_le(b3); | |||||
| b4 = swap_if_le(b4); | |||||
| b5 = swap_if_le(b5); | |||||
| b6 = swap_if_le(b6); | |||||
| b7 = swap_if_le(b7); | |||||
| b0 ^= rk[0]; | |||||
| //hexDump("xor ??", &b0, 16); | |||||
| b1 ^= rk[0]; | |||||
| b2 ^= rk[0]; | |||||
| b3 ^= rk[0]; | |||||
| b4 ^= rk[0]; | |||||
| b5 ^= rk[0]; | |||||
| b6 ^= rk[0]; | |||||
| b7 ^= rk[0]; | |||||
| for (r = 1;r < rounds;r++) | |||||
| { | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b0) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("round", &b0, 16); | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b1) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b2) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b3) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b4) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b5) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b6) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipher %0, %0, %1\n\t" | |||||
| :"+v" (b7) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| } | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b0) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b1) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b2) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b3) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b4) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b5) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b6) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vcipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b7) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("end", &b0, 16); | |||||
| iv = iv7; | |||||
| //hexDump("end-iv5", &b0, 16); | |||||
| // The unaligned store stxvb16x writes big-endian, | |||||
| // so in the unaligned case we swap the iv instead of the bytes | |||||
| if ((uintptr_t)outbuf % 16 == 0) | |||||
| { | |||||
| vec_vsx_st(swap_if_le(b0) ^ iv0, 0, out++); | |||||
| //hexDump("out", out - 1, 16); | |||||
| vec_vsx_st(swap_if_le(b1) ^ iv1, 0, out++); | |||||
| vec_vsx_st(swap_if_le(b2) ^ iv2, 0, out++); | |||||
| vec_vsx_st(swap_if_le(b3) ^ iv3, 0, out++); | |||||
| vec_vsx_st(swap_if_le(b4) ^ iv4, 0, out++); | |||||
| vec_vsx_st(swap_if_le(b5) ^ iv5, 0, out++); | |||||
| vec_vsx_st(swap_if_le(b6) ^ iv6, 0, out++); | |||||
| vec_vsx_st(swap_if_le(b7) ^ iv7, 0, out++); | |||||
| } | |||||
| else | |||||
| { | |||||
| b0 ^= swap_if_le(iv0); | |||||
| b1 ^= swap_if_le(iv1); | |||||
| b2 ^= swap_if_le(iv2); | |||||
| b3 ^= swap_if_le(iv3); | |||||
| b4 ^= swap_if_le(iv4); | |||||
| b5 ^= swap_if_le(iv5); | |||||
| b6 ^= swap_if_le(iv6); | |||||
| b7 ^= swap_if_le(iv7); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| //hexDump("out-un", out - 1, 16); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| } | |||||
| } | |||||
| for ( ;nblocks; nblocks-- ) | |||||
| { | |||||
| block b; | |||||
| u64 i = ++c->u_mode.ocb.data_nblocks; | |||||
| const block l = *(block*)ocb_get_l(c, i); | |||||
| /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ | |||||
| iv ^= l; | |||||
| if ((uintptr_t)in % 16 == 0) { | |||||
| b = vec_ld(0, in++); | |||||
| } else { | |||||
| block unalignedprevprev; | |||||
| unalignedprevprev = unalignedprev; | |||||
| unalignedprev = vec_ld(0, in++); | |||||
| b = vec_perm(unalignedprevprev, unalignedprev, vec_lvsl(0, inbuf)); | |||||
| } | |||||
| //hexDump("start", &b, 16); | |||||
| /* Checksum_i = Checksum_{i-1} xor P_i */ | |||||
| ctr ^= b; | |||||
| //hexDump("ctr", &ctr, 16); | |||||
| /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ | |||||
| b ^= iv; | |||||
| //hexDump("xoriv", &b, 16); | |||||
| b = swap_if_le(b); | |||||
| b = _gcry_aes_ppc8_encrypt_altivec (ctx, b); | |||||
| //hexDump("crypt", &b, 16); | |||||
| if ((uintptr_t)out % 16 == 0) | |||||
| vec_vsx_st(swap_if_le(b) ^ iv, 0, out++); | |||||
| else { | |||||
| b ^= swap_if_le(iv); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| : | |||||
| : "wa" (b), "r" (zero), "r" ((uintptr_t)out++)); | |||||
| } | |||||
| //hexDump("out", out - 1, 16); | |||||
| } | |||||
| // We want to store iv and ctr big-endian and the unaligned | |||||
| // store stxvb16x stores them little endian, so we have to swap them. | |||||
| iv = swap_if_le(iv); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv)); | |||||
| ctr = swap_if_le(ctr); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr)); | |||||
| } | |||||
| else | |||||
| { | |||||
| const int unroll = 8; | |||||
| block unalignedprev, ctr, iv; | |||||
| if (((uintptr_t)inbuf % 16) != 0) { | |||||
| unalignedprev = vec_ld(0, in++); | |||||
| } | |||||
| iv = vec_ld(0, (block*)&c->u_iv.iv); | |||||
| ctr = vec_ld(0, (block*)&c->u_ctr.ctr); | |||||
| //hexDump("ctr", &ctr, 16); | |||||
| //hexDump("key", &ctx->u1, sizeof(ctx->u1)); | |||||
| for ( ;nblocks >= unroll; nblocks -= unroll) | |||||
| { | |||||
| u64 i = c->u_mode.ocb.data_nblocks + 1; | |||||
| block l0, l1, l2, l3, l4, l5, l6, l7; | |||||
| block b0, b1, b2, b3, b4, b5, b6, b7; | |||||
| block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; | |||||
| const block *rk = (block*)&ctx->keyschdec; | |||||
| int j; | |||||
| c->u_mode.ocb.data_nblocks += unroll; | |||||
| //hexDump("iv", &iv, 16); | |||||
| iv0 = iv; | |||||
| if ((uintptr_t)inbuf % 16 == 0) | |||||
| { | |||||
| b0 = vec_ld(0, in++); | |||||
| //hexDump("start", &b0, 16); | |||||
| b1 = vec_ld(0, in++); | |||||
| b2 = vec_ld(0, in++); | |||||
| b3 = vec_ld(0, in++); | |||||
| b4 = vec_ld(0, in++); | |||||
| b5 = vec_ld(0, in++); | |||||
| b6 = vec_ld(0, in++); | |||||
| b7 = vec_ld(0, in++); | |||||
| } | |||||
| else | |||||
| { | |||||
| block unaligned0, unaligned1, unaligned2, | |||||
| unaligned3, unaligned4, unaligned5, unaligned6; | |||||
| unaligned0 = vec_ld(0, in++); | |||||
| unaligned1 = vec_ld(0, in++); | |||||
| unaligned2 = vec_ld(0, in++); | |||||
| unaligned3 = vec_ld(0, in++); | |||||
| unaligned4 = vec_ld(0, in++); | |||||
| unaligned5 = vec_ld(0, in++); | |||||
| unaligned6 = vec_ld(0, in++); | |||||
| b0 = vec_perm(unalignedprev, unaligned0, vec_lvsl(0, inbuf)); | |||||
| //hexDump("start", &b0, 16); | |||||
| unalignedprev = vec_ld(0, in++); | |||||
| b1 = vec_perm(unaligned0, unaligned1, vec_lvsl(0, inbuf)); | |||||
| b2 = vec_perm(unaligned1, unaligned2, vec_lvsl(0, inbuf)); | |||||
| b3 = vec_perm(unaligned2, unaligned3, vec_lvsl(0, inbuf)); | |||||
| b4 = vec_perm(unaligned3, unaligned4, vec_lvsl(0, inbuf)); | |||||
| b5 = vec_perm(unaligned4, unaligned5, vec_lvsl(0, inbuf)); | |||||
| b6 = vec_perm(unaligned5, unaligned6, vec_lvsl(0, inbuf)); | |||||
| b7 = vec_perm(unaligned6, unalignedprev, vec_lvsl(0, inbuf)); | |||||
| } | |||||
| //hexDump("i", &i, sizeof(i)); | |||||
| l0 = *(block*)ocb_get_l(c, i++); | |||||
| //hexDump("l", &l0, 16); | |||||
| l1 = *(block*)ocb_get_l(c, i++); | |||||
| l2 = *(block*)ocb_get_l(c, i++); | |||||
| l3 = *(block*)ocb_get_l(c, i++); | |||||
| l4 = *(block*)ocb_get_l(c, i++); | |||||
| l5 = *(block*)ocb_get_l(c, i++); | |||||
| l6 = *(block*)ocb_get_l(c, i++); | |||||
| l7 = *(block*)ocb_get_l(c, i++); | |||||
| iv0 ^= l0; | |||||
| //hexDump("xorl", &iv0, 16); | |||||
| b0 ^= iv0; | |||||
| //hexDump("xor", &b0, 16); | |||||
| iv1 = iv0 ^ l1; | |||||
| b1 ^= iv1; | |||||
| iv2 = iv1 ^ l2; | |||||
| b2 ^= iv2; | |||||
| iv3 = iv2 ^ l3; | |||||
| b3 ^= iv3; | |||||
| iv4 = iv3 ^ l4; | |||||
| b4 ^= iv4; | |||||
| iv5 = iv4 ^ l5; | |||||
| b5 ^= iv5; | |||||
| iv6 = iv5 ^ l6; | |||||
| b6 ^= iv6; | |||||
| iv7 = iv6 ^ l7; | |||||
| b7 ^= iv7; | |||||
| b0 = swap_if_le(b0); | |||||
| //hexDump("swap", &b0, 16); | |||||
| b1 = swap_if_le(b1); | |||||
| b2 = swap_if_le(b2); | |||||
| b3 = swap_if_le(b3); | |||||
| b4 = swap_if_le(b4); | |||||
| b5 = swap_if_le(b5); | |||||
| b6 = swap_if_le(b6); | |||||
| b7 = swap_if_le(b7); | |||||
| b0 ^= rk[0]; | |||||
| //hexDump("xor ??", &b0, 16); | |||||
| b1 ^= rk[0]; | |||||
| b2 ^= rk[0]; | |||||
| b3 ^= rk[0]; | |||||
| b4 ^= rk[0]; | |||||
| b5 ^= rk[0]; | |||||
| b6 ^= rk[0]; | |||||
| b7 ^= rk[0]; | |||||
| for (r = 1;r < rounds;r++) | |||||
| { | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b0) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("round", &b0, 16); | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b1) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b2) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b3) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b4) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b5) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b6) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipher %0, %0, %1\n\t" | |||||
| :"+v" (b7) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| } | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b0) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b1) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b2) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b3) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b4) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b5) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b6) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| __asm__ volatile ("vncipherlast %0, %0, %1\n\t" | |||||
| :"+v" (b7) | |||||
| :"v" (rk[r]) | |||||
| ); | |||||
| //hexDump("end", &b0, 16); | |||||
| iv = iv7; | |||||
| //hexDump("end-iv5", &b0, 16); | |||||
| b0 = swap_if_le(b0) ^ iv0; | |||||
| b1 = swap_if_le(b1) ^ iv1; | |||||
| b2 = swap_if_le(b2) ^ iv2; | |||||
| b3 = swap_if_le(b3) ^ iv3; | |||||
| b4 = swap_if_le(b4) ^ iv4; | |||||
| b5 = swap_if_le(b5) ^ iv5; | |||||
| b6 = swap_if_le(b6) ^ iv6; | |||||
| b7 = swap_if_le(b7) ^ iv7; | |||||
| ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; | |||||
| // The unaligned store stxvb16x writes big-endian | |||||
| if ((uintptr_t)outbuf % 16 == 0) | |||||
| { | |||||
| vec_vsx_st(b0, 0, out++); | |||||
| vec_vsx_st(b1, 0, out++); | |||||
| vec_vsx_st(b2, 0, out++); | |||||
| vec_vsx_st(b3, 0, out++); | |||||
| vec_vsx_st(b4, 0, out++); | |||||
| vec_vsx_st(b5, 0, out++); | |||||
| vec_vsx_st(b6, 0, out++); | |||||
| vec_vsx_st(b7, 0, out++); | |||||
| } | |||||
| else | |||||
| { | |||||
| b0 = swap_if_le(b0); | |||||
| b1 = swap_if_le(b1); | |||||
| b2 = swap_if_le(b2); | |||||
| b3 = swap_if_le(b3); | |||||
| b4 = swap_if_le(b4); | |||||
| b5 = swap_if_le(b5); | |||||
| b6 = swap_if_le(b6); | |||||
| b7 = swap_if_le(b7); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| //hexDump("out-un", out - 1, 16); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| __asm__ ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++))); | |||||
| } | |||||
| } | |||||
| for ( ;nblocks; nblocks-- ) | |||||
| { | |||||
| block b; | |||||
| u64 i = ++c->u_mode.ocb.data_nblocks; | |||||
| const block l = *(block*)ocb_get_l(c, i); | |||||
| /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ | |||||
| iv ^= l; | |||||
| if ((uintptr_t)in % 16 == 0) { | |||||
| b = vec_ld(0, in++); | |||||
| } else { | |||||
| block unalignedprevprev; | |||||
| unalignedprevprev = unalignedprev; | |||||
| unalignedprev = vec_ld(0, in++); | |||||
| b = vec_perm(unalignedprevprev, unalignedprev, vec_lvsl(0, inbuf)); | |||||
| } | |||||
| //hexDump("start", &b, 16); | |||||
| /* Checksum_i = Checksum_{i-1} xor P_i */ | |||||
| //hexDump("ctr", &ctr, 16); | |||||
| /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ | |||||
| b ^= iv; | |||||
| //hexDump("xoriv", &b, 16); | |||||
| b = swap_if_le(b); | |||||
| b = _gcry_aes_ppc8_decrypt_altivec (ctx, b); | |||||
| //hexDump("crypt", &b, 16); | |||||
| b = swap_if_le(b) ^ iv; | |||||
| ctr ^= b; | |||||
| if ((uintptr_t)out % 16 == 0) | |||||
| vec_vsx_st(b, 0, out++); | |||||
| else { | |||||
| b = swap_if_le(b); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| : | |||||
| : "wa" (b), "r" (zero), "r" ((uintptr_t)out++)); | |||||
| } | |||||
| //hexDump("out", out - 1, 16); | |||||
| } | |||||
| // We want to store iv and ctr big-endian and the unaligned | |||||
| // store stxvb16x stores them little endian, so we have to swap them. | |||||
| iv = swap_if_le(iv); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv)); | |||||
| ctr = swap_if_le(ctr); | |||||
| __asm__ volatile ("stxvb16x %x0, %1, %2\n\t" | |||||
| :: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr)); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits, | extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits, | ||||
| RIJNDAEL_context *key); | RIJNDAEL_context *key); | ||||
| extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits, | extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits, | ||||
| /* this is the decryption key part of context */ | /* this is the decryption key part of context */ | ||||
| const unsigned (*)[15][4]); | const unsigned (*)[15][4]); | ||||
| Context not available. | |||||
| const unsigned char *inbuf = inbuf_arg; | const unsigned char *inbuf = inbuf_arg; | ||||
| unsigned char *outbuf = outbuf_arg; | unsigned char *outbuf = outbuf_arg; | ||||
| const RIJNDAEL_context *ctx = context; | const RIJNDAEL_context *ctx = context; | ||||
| const uint64_t two32 = 1ULL << 32; | const uint64_t two32 = 1ULL << 32; | ||||
| int overflow; | int overflow; | ||||
| u64 s[2], e[2]; | u64 s[2]; | ||||
| s[0] = buf_get_be64(ctr + 8); | s[0] = buf_get_be64(ctr + 8); | ||||
| overflow = two32 - (s[0] % two32) < nblocks; | overflow = two32 - (s[0] % two32) < nblocks; | ||||
| #ifdef __builtin_expect | #ifdef __builtin_expect | ||||
| __builtin_expect(overflow, 0); | __builtin_expect(overflow, 0); | ||||
| #endif | #endif | ||||
| Context not available. | |||||
| if (hd) { | if (hd) { | ||||
| hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec; | hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec; | ||||
| hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc; | hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc; | ||||
| hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt; | hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt; | ||||
| hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc; | hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc; | ||||
| hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt; | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| else | else | ||||
| { | { | ||||
| Context not available. | |||||
| else if (ctx->use_arm_ce) | else if (ctx->use_arm_ce) | ||||
| { | { | ||||
| return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); | return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); | ||||
| } | } | ||||
| #endif /*USE_ARM_CE*/ | #endif /*USE_ARM_CE*/ | ||||
| #ifdef USE_PPC_ASM | |||||
| else if (ctx->use_ppc_asm) | |||||
| { | |||||
| return _gcry_aes_ppc8_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); | |||||
| } | |||||
| #endif /*USE_PPC_ASM*/ | |||||
| else if (encrypt) | else if (encrypt) | ||||
| { | { | ||||
| union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; | union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; | ||||
| rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; | rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; | ||||
| Context not available. | |||||