diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h index 165dd9f7..bbbeaac0 100644 --- a/cipher/rijndael-ppc-common.h +++ b/cipher/rijndael-ppc-common.h @@ -1,326 +1,342 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden * Copyright (C) 2019-2020 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, * and Cryptogams by Andy Polyakov, and if made part of a release of either * or both projects, is thereafter dual-licensed under the license said project * is released under. */ #ifndef G10_RIJNDAEL_PPC_COMMON_H #define G10_RIJNDAEL_PPC_COMMON_H #include typedef vector unsigned char block; typedef union { u32 data32[4]; } __attribute__((packed, aligned(1), may_alias)) u128_t; #define ALWAYS_INLINE inline __attribute__((always_inline)) #define NO_INLINE __attribute__((noinline)) #define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) #define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION #define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE #define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE #define ALIGNED_LOAD(in_ptr, offs) \ (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr))) #define ALIGNED_STORE(out_ptr, offs, vec) \ (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr))) #define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const))) #define VEC_LOAD_BE(in_ptr, offs, bige_const) \ (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \ bige_const)) #define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \ (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr))) #define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \ (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \ (void *)(out_ptr))) #define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \ (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr))) #define ROUND_KEY_VARIABLES \ block rkey0, rkeylast #define PRELOAD_ROUND_KEYS(nrounds) \ do { \ rkey0 = ALIGNED_LOAD (rk, 0); \ rkeylast = ALIGNED_LOAD (rk, nrounds); \ } while (0) #define AES_ENCRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ blk = asm_cipherlast_be (blk, rkeylast); \ } while (0) #define AES_DECRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ blk = asm_ncipherlast_be (blk, rkeylast); \ } while (0) #define ROUND_KEY_VARIABLES_ALL \ block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \ rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast #define PRELOAD_ROUND_KEYS_ALL(nrounds) \ do { \ rkey0 = ALIGNED_LOAD (rk, 0); \ rkey1 = ALIGNED_LOAD (rk, 1); \ rkey2 = ALIGNED_LOAD (rk, 2); \ rkey3 = ALIGNED_LOAD (rk, 3); \ rkey4 = ALIGNED_LOAD (rk, 4); \ rkey5 = ALIGNED_LOAD (rk, 5); \ rkey6 = ALIGNED_LOAD (rk, 6); \ rkey7 = ALIGNED_LOAD (rk, 7); \ rkey8 = ALIGNED_LOAD (rk, 8); \ rkey9 = ALIGNED_LOAD (rk, 9); \ if (nrounds >= 12) \ { \ rkey10 = ALIGNED_LOAD (rk, 10); \ rkey11 = ALIGNED_LOAD (rk, 11); \ if (rounds > 12) \ { \ rkey12 = ALIGNED_LOAD (rk, 12); \ rkey13 = ALIGNED_LOAD (rk, 13); \ } \ } \ rkeylast = ALIGNED_LOAD (rk, nrounds); \ } while (0) #define AES_ENCRYPT_ALL(blk, nrounds) \ do { \ blk ^= rkey0; \ blk = asm_cipher_be (blk, rkey1); \ blk = asm_cipher_be (blk, rkey2); \ blk = asm_cipher_be (blk, rkey3); \ blk = asm_cipher_be (blk, rkey4); \ blk = asm_cipher_be (blk, rkey5); \ blk = asm_cipher_be (blk, rkey6); \ blk = asm_cipher_be (blk, rkey7); \ blk = asm_cipher_be (blk, rkey8); \ blk = asm_cipher_be (blk, rkey9); \ if (nrounds >= 12) \ { \ blk = asm_cipher_be (blk, rkey10); \ blk = asm_cipher_be (blk, rkey11); \ if (rounds > 12) \ { \ blk = asm_cipher_be (blk, rkey12); \ blk = asm_cipher_be (blk, rkey13); \ } \ } \ blk = asm_cipherlast_be (blk, rkeylast); \ } while (0) static ASM_FUNC_ATTR_INLINE block asm_aligned_ld(unsigned long offset, const void *ptr) { block vec; - __asm__ volatile ("lvx %0,%1,%2\n\t" - : "=v" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lvx %0,0,%1\n\t" + : "=v" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lvx %0,%1,%2\n\t" + : "=v" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); return vec; } static ASM_FUNC_ATTR_INLINE void asm_aligned_st(block vec, unsigned long offset, void *ptr) { - __asm__ volatile ("stvx %0,%1,%2\n\t" - : - : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stvx %0,0,%1\n\t" + : + : "v" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stvx %0,%1,%2\n\t" + : + : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); } static ASM_FUNC_ATTR_INLINE block asm_vperm1(block vec, block mask) { block o; __asm__ volatile ("vperm %0,%1,%1,%2\n\t" : "=v" (o) : "v" (vec), "v" (mask)); return o; } static ASM_FUNC_ATTR_INLINE block asm_add_uint128(block a, block b) { block res; __asm__ volatile ("vadduqm %0,%1,%2\n\t" : "=v" (res) : "v" (a), "v" (b)); return res; } static ASM_FUNC_ATTR_INLINE block asm_add_uint64(block a, block b) { block res; __asm__ volatile ("vaddudm %0,%1,%2\n\t" : "=v" (res) : "v" (a), "v" (b)); return res; } static ASM_FUNC_ATTR_INLINE block asm_sra_int64(block a, block b) { block res; __asm__ volatile ("vsrad %0,%1,%2\n\t" : "=v" (res) : "v" (a), "v" (b)); return res; } static block asm_swap_uint64_halfs(block a) { block res; __asm__ volatile ("xxswapd %x0, %x1" : "=wa" (res) : "wa" (a)); return res; } static ASM_FUNC_ATTR_INLINE block asm_xor(block a, block b) { block res; __asm__ volatile ("vxor %0,%1,%2\n\t" : "=v" (res) : "v" (a), "v" (b)); return res; } static ASM_FUNC_ATTR_INLINE block asm_cipher_be(block b, block rk) { block o; __asm__ volatile ("vcipher %0, %1, %2\n\t" : "=v" (o) : "v" (b), "v" (rk)); return o; } static ASM_FUNC_ATTR_INLINE block asm_cipherlast_be(block b, block rk) { block o; __asm__ volatile ("vcipherlast %0, %1, %2\n\t" : "=v" (o) : "v" (b), "v" (rk)); return o; } static ASM_FUNC_ATTR_INLINE block asm_ncipher_be(block b, block rk) { block o; __asm__ volatile ("vncipher %0, %1, %2\n\t" : "=v" (o) : "v" (b), "v" (rk)); return o; } static ASM_FUNC_ATTR_INLINE block asm_ncipherlast_be(block b, block rk) { block o; __asm__ volatile ("vncipherlast %0, %1, %2\n\t" : "=v" (o) : "v" (b), "v" (rk)); return o; } /* Make a decryption key from an encryption key. */ static ASM_FUNC_ATTR_INLINE void internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx) { u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; u128_t *dkey = (u128_t *)(void *)ctx->keyschdec; int rounds = ctx->rounds; int rr; int r; r = 0; rr = rounds; for (r = 0, rr = rounds; r <= rounds; r++, rr--) { ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr)); } } #endif /* G10_RIJNDAEL_PPC_COMMON_H */ diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 3e727628..f5c32361 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -1,243 +1,259 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden * Copyright (C) 2019-2020 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, * and Cryptogams by Andy Polyakov, and if made part of a release of either * or both projects, is thereafter dual-licensed under the license said project * is released under. */ #include #include "rijndael-internal.h" #include "cipher-internal.h" #include "bufhelp.h" #ifdef USE_PPC_CRYPTO #include "rijndael-ppc-common.h" #ifdef WORDS_BIGENDIAN static const block vec_bswap32_const = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; #else static const block vec_bswap32_const_neg = { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 }; #endif static ASM_FUNC_ATTR_INLINE block asm_load_be_const(void) { #ifndef WORDS_BIGENDIAN return ALIGNED_LOAD (&vec_bswap32_const_neg, 0); #else static const block vec_dummy = { 0 }; return vec_dummy; #endif } static ASM_FUNC_ATTR_INLINE block asm_be_swap(block vec, block be_bswap_const) { (void)be_bswap_const; #ifndef WORDS_BIGENDIAN return asm_vperm1 (vec, be_bswap_const); #else return vec; #endif } static ASM_FUNC_ATTR_INLINE block asm_load_be_noswap(unsigned long offset, const void *ptr) { block vec; - __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" - : "=wa" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvw4x %x0,0,%1\n\t" + : "=wa" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */ return vec; } static ASM_FUNC_ATTR_INLINE void asm_store_be_noswap(block vec, unsigned long offset, void *ptr) { /* NOTE: vec be-swapped using 'asm_be_swap' by caller */ - __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" - : - : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stxvw4x %x0,0,%1\n\t" + : + : "wa" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); } static ASM_FUNC_ATTR_INLINE u32 _gcry_aes_sbox4_ppc8(u32 fourbytes) { union { PROPERLY_ALIGNED_TYPE dummy; block data_vec; u32 data32[4]; } u; u.data32[0] = fourbytes; u.data_vec = vec_sbox_be(u.data_vec); return u.data32[0]; } void _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) { const block bige_const = asm_load_be_const(); union { PROPERLY_ALIGNED_TYPE dummy; byte data[MAXKC][4]; u32 data32[MAXKC]; } tkk[2]; unsigned int rounds = ctx->rounds; int KC = rounds - 6; unsigned int keylen = KC * 4; u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; unsigned int i, r, t; byte rcon = 1; int j; #define k tkk[0].data #define k_u32 tkk[0].data32 #define tk tkk[1].data #define tk_u32 tkk[1].data32 #define W (ctx->keyschenc) #define W_u32 (ctx->keyschenc32) for (i = 0; i < keylen; i++) { k[i >> 2][i & 3] = key[i]; } for (j = KC-1; j >= 0; j--) { tk_u32[j] = k_u32[j]; } r = 0; t = 0; /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } while (r < rounds + 1) { tk_u32[0] ^= le_bswap32( _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon); if (KC != 8) { for (j = 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } else { for (j = 1; j < KC/2; j++) { tk_u32[j] ^= tk_u32[j-1]; } tk_u32[KC/2] ^= le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1]))); for (j = KC/2 + 1; j < KC; j++) { tk_u32[j] ^= tk_u32[j-1]; } } /* Copy values into round key array. */ for (j = 0; (j < KC) && (r < rounds + 1); ) { for (; (j < KC) && (t < 4); j++, t++) { W_u32[r][t] = le_bswap32(tk_u32[j]); } if (t == 4) { r++; t = 0; } } rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b); } /* Store in big-endian order. */ for (r = 0; r <= rounds; r++) { #ifndef WORDS_BIGENDIAN VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const); #else block rvec = ALIGNED_LOAD (ekey, r); ALIGNED_STORE (ekey, r, vec_perm(rvec, rvec, vec_bswap32_const)); (void)bige_const; #endif } #undef W #undef tk #undef k #undef W_u32 #undef tk_u32 #undef k_u32 wipememory(&tkk, sizeof(tkk)); } void _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) { internal_aes_ppc_prepare_decryption (ctx); } #define GCRY_AES_PPC8 1 #define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc8_encrypt #define DECRYPT_BLOCK_FUNC _gcry_aes_ppc8_decrypt #define CFB_ENC_FUNC _gcry_aes_ppc8_cfb_enc #define CFB_DEC_FUNC _gcry_aes_ppc8_cfb_dec #define CBC_ENC_FUNC _gcry_aes_ppc8_cbc_enc #define CBC_DEC_FUNC _gcry_aes_ppc8_cbc_dec #define CTR_ENC_FUNC _gcry_aes_ppc8_ctr_enc #define OCB_CRYPT_FUNC _gcry_aes_ppc8_ocb_crypt #define OCB_AUTH_FUNC _gcry_aes_ppc8_ocb_auth #define XTS_CRYPT_FUNC _gcry_aes_ppc8_xts_crypt #include #endif /* USE_PPC_CRYPTO */