diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 16066bfc..1e67771e 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -1,148 +1,148 @@ # Makefile for cipher modules # Copyright (C) 1998, 1999, 2000, 2001, 2002, # 2003, 2009 Free Software Foundation, Inc. # # This file is part of Libgcrypt. # # Libgcrypt is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as # published by the Free Software Foundation; either version 2.1 of # the License, or (at your option) any later version. # # Libgcrypt is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . # Process this file with automake to produce Makefile.in # Need to include ../src in addition to top_srcdir because gcrypt.h is # a built header. AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi AM_CFLAGS = $(GPG_ERROR_CFLAGS) AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) EXTRA_DIST = gost-s-box.c CLEANFILES = gost-s-box DISTCLEANFILES = gost-sb.h noinst_LTLIBRARIES = libcipher.la GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) libcipher_la_LIBADD = $(GCRYPT_MODULES) libcipher_la_SOURCES = \ cipher.c cipher-internal.h \ cipher-cbc.c \ cipher-cfb.c \ cipher-ofb.c \ cipher-ctr.c \ cipher-aeswrap.c \ cipher-ccm.c \ cipher-cmac.c \ - cipher-gcm.c cipher-gcm-intel-pclmul.c \ + cipher-gcm.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \ cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ cipher-poly1305.c \ cipher-ocb.c \ cipher-xts.c \ cipher-eax.c \ cipher-selftest.c cipher-selftest.h \ pubkey.c pubkey-internal.h pubkey-util.c \ md.c \ mac.c mac-internal.h \ mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ poly1305.c poly1305-internal.h \ kdf.c kdf-internal.h \ hmac-tests.c \ bithelp.h \ bufhelp.h \ primegen.c \ hash-common.c hash-common.h \ dsa-common.c rsa-common.c \ sha1.h EXTRA_libcipher_la_SOURCES = \ asm-common-amd64.h \ asm-common-aarch64.h \ asm-poly1305-amd64.h \ arcfour.c arcfour-amd64.S \ blowfish.c blowfish-amd64.S blowfish-arm.S \ cast5.c cast5-amd64.S cast5-arm.S \ chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \ chacha20-armv7-neon.S chacha20-aarch64.S \ crc.c crc-intel-pclmul.c \ des.c des-amd64.S \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ rijndael.c rijndael-internal.h rijndael-tables.h \ rijndael-aesni.c rijndael-padlock.c \ rijndael-amd64.S rijndael-arm.S \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ scrypt.c \ seed.c \ serpent.c serpent-sse2-amd64.S \ serpent-avx2-amd64.S serpent-armv7-neon.S \ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha1-armv7-neon.S sha1-armv8-aarch32-ce.S sha1-armv8-aarch64-ce.S \ sha1-intel-shaext.c \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \ sha256-avx2-bmi2-amd64.S \ sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ sha256-intel-shaext.c \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ sha512-avx2-bmi2-amd64.S \ sha512-armv7-neon.S sha512-arm.S \ sm3.c \ keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ twofish-avx2-amd64.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ blake2.c \ blake2b-amd64-avx2.S blake2s-amd64-avx.S gost28147.lo: gost-sb.h gost-sb.h: gost-s-box ./gost-s-box $@ gost-s-box: gost-s-box.c $(CC_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g' else o_flag_munging = cat endif # We need to lower the optimization for this module. tiger.o: $(srcdir)/tiger.c `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` tiger.lo: $(srcdir)/tiger.c `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` diff --git a/cipher/cipher-gcm-armv7-neon.S b/cipher/cipher-gcm-armv7-neon.S new file mode 100644 index 00000000..a801a5e5 --- /dev/null +++ b/cipher/cipher-gcm-armv7-neon.S @@ -0,0 +1,341 @@ +/* cipher-gcm-armv7-neon.S - ARM/NEON accelerated GHASH + * Copyright (C) 2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +.syntax unified +.fpu neon +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +/* Constants */ + +.align 4 +gcry_gcm_reduction_constant: +.Lrconst64: + .quad 0xc200000000000000 + +/* Register macros */ + +#define rhash q0 +#define rhash_l d0 +#define rhash_h d1 + +#define rh1 q1 +#define rh1_l d2 +#define rh1_h d3 + +#define rbuf q2 +#define rbuf_l d4 +#define rbuf_h d5 + +#define rbuf1 q3 +#define rbuf1_l d6 +#define rbuf1_h d7 + +#define t0q q4 +#define t0l d8 +#define t0h d9 + +#define t1q q5 +#define t1l d10 +#define t1h d11 + +#define t2q q6 +#define t2l d12 +#define t2h d13 + +#define t3q q7 +#define t3l d14 +#define t3h d15 + +/* q8 */ +#define k16 d16 +#define k32 d17 + +/* q9 */ +#define k48 d18 + +#define k0 q10 + +#define rr0 q11 +#define rr0_l d22 +#define rr0_h d23 + +#define rr1 q12 +#define rr1_l d24 +#define rr1_h d25 + +#define rt0 q13 +#define rt0_l d26 +#define rt0_h d27 + +#define rt1 q14 +#define rt1_l d28 +#define rt1_h d29 + +#define rrconst q15 +#define rrconst_l d30 +#define rrconst_h d31 + +/* Macro for 64x64=>128 carry-less multiplication using vmull.p8 instruction. + * + * From "Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R. Fast Software + * Polynomial Multiplication on ARM Processors using the NEON Engine. The + * Second International Workshop on Modern Cryptography and Security + * Engineering — MoCrySEn, 2013". */ + +#define vmull_p64(rq, rl, rh, ad, bd) \ + vext.8 t0l, ad, ad, $1; \ + vmull.p8 t0q, t0l, bd; \ + vext.8 rl, bd, bd, $1; \ + vmull.p8 rq, ad, rl; \ + vext.8 t1l, ad, ad, $2; \ + vmull.p8 t1q, t1l, bd; \ + vext.8 t3l, bd, bd, $2; \ + vmull.p8 t3q, ad, t3l; \ + vext.8 t2l, ad, ad, $3; \ + vmull.p8 t2q, t2l, bd; \ + veor t0q, t0q, rq; \ + vext.8 rl, bd, bd, $3; \ + vmull.p8 rq, ad, rl; \ + veor t1q, t1q, t3q; \ + vext.8 t3l, bd, bd, $4; \ + vmull.p8 t3q, ad, t3l; \ + veor t0l, t0l, t0h; \ + vand t0h, t0h, k48; \ + veor t1l, t1l, t1h; \ + vand t1h, t1h, k32; \ + veor t2q, t2q, rq; \ + veor t0l, t0l, t0h; \ + veor t1l, t1l, t1h; \ + veor t2l, t2l, t2h; \ + vand t2h, t2h, k16; \ + veor t3l, t3l, t3h; \ + vmov.i64 t3h, $0; \ + vext.8 t0q, t0q, t0q, $15; \ + veor t2l, t2l, t2h; \ + vext.8 t1q, t1q, t1q, $14; \ + vmull.p8 rq, ad, bd; \ + vext.8 t2q, t2q, t2q, $13; \ + vext.8 t3q, t3q, t3q, $12; \ + veor t0q, t0q, t1q; \ + veor t2q, t2q, t3q; \ + veor rq, rq, t0q; \ + veor rq, rq, t2q; + +/* GHASH macros. + * + * See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + +/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) + * Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'. + */ +#define PMUL_128x128(r0, r1, a, b, t1, t2, interleave_op) \ + veor t1##_h, b##_l, b##_h; \ + veor t1##_l, a##_l, a##_h; \ + vmull_p64( r0, r0##_l, r0##_h, a##_l, b##_l ); \ + vmull_p64( r1, r1##_l, r1##_h, a##_h, b##_h ); \ + vmull_p64( t2, t2##_h, t2##_l, t1##_h, t1##_l ); \ + interleave_op; \ + veor t2, r0; \ + veor t2, r1; \ + veor r0##_h, t2##_l; \ + veor r1##_l, t2##_h; + +/* Reduction using Xor and Shift. + * Input: 'r0:r1', Output: 'a' + * + * See "Shay Gueron, Michael E. Kounavis. Intel Carry-Less Multiplication + * Instruction and its Usage for Computing the GCM Mode" for details. + */ +#define REDUCTION(a, r0, r1, t, interleave_op) \ + vshl.u32 t0q, r0, #31; \ + vshl.u32 t1q, r0, #30; \ + vshl.u32 t2q, r0, #25; \ + veor t0q, t0q, t1q; \ + veor t0q, t0q, t2q; \ + vext.8 t, t0q, k0, #4; \ + vext.8 t0q, k0, t0q, #(16-12); \ + veor r0, r0, t0q; \ + interleave_op; \ + vshr.u32 t0q, r0, #1; \ + vshr.u32 t1q, r0, #2; \ + vshr.u32 t2q, r0, #7; \ + veor t0q, t0q, t1q; \ + veor t0q, t0q, t2q; \ + veor t0q, t0q, t; \ + veor r0, r0, t0q; \ + veor a, r0, r1; + +#define _(...) __VA_ARGS__ +#define __ _() + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result, + * const byte *buf, size_t nblocks); + */ +.align 3 +.globl _gcry_ghash_armv7_neon +.type _gcry_ghash_armv7_neon,%function; +_gcry_ghash_armv7_neon: + /* input: + * r0: gcm_key + * r1: result/hash + * r2: buf + * r3: nblocks + */ + push {r4-r6, lr} + + cmp r3, #0 + beq .Ldo_nothing + + vpush {q4-q7} + + vld1.64 {rhash}, [r1] + vld1.64 {rh1}, [r0] + + vrev64.8 rhash, rhash /* byte-swap */ + + vmov.i64 k0, #0x0 + vmov.i64 k16, #0xffff + vmov.i64 k32, #0xffffffff + vmov.i64 k48, #0xffffffffffff + + vext.8 rhash, rhash, rhash, #8 + + /* Handle remaining blocks. */ + + vld1.64 {rbuf}, [r2]! + subs r3, r3, #1 + + vrev64.8 rbuf, rbuf /* byte-swap */ + vext.8 rbuf, rbuf, rbuf, #8 + + veor rhash, rhash, rbuf + + beq .Lend + +.Loop: + vld1.64 {rbuf}, [r2]! + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(vrev64.8 rbuf, rbuf)) + REDUCTION(rhash, rr0, rr1, rt0, _(vext.8 rbuf, rbuf, rbuf, #8)) + subs r3, r3, #1 + veor rhash, rhash, rbuf + + bne .Loop + +.Lend: + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(CLEAR_REG(rbuf))) + REDUCTION(rhash, rr0, rr1, rt0, _(CLEAR_REG(rh1))) + +.Ldone: + CLEAR_REG(rr1) + vrev64.8 rhash, rhash /* byte-swap */ + CLEAR_REG(rt0) + CLEAR_REG(rr0) + vext.8 rhash, rhash, rhash, #8 + CLEAR_REG(rt1) + CLEAR_REG(t0q) + CLEAR_REG(t1q) + CLEAR_REG(t2q) + CLEAR_REG(t3q) + vst1.64 {rhash}, [r1] + CLEAR_REG(rhash) + + vpop {q4-q7} + +.Ldo_nothing: + mov r0, #0 + pop {r4-r6, pc} +.size _gcry_ghash_armv7_neon,.-_gcry_ghash_armv7_neon; + + +/* + * void _gcry_ghash_armv7_neon (void *gcm_key); + */ +.align 3 +.globl _gcry_ghash_setup_armv7_neon +.type _gcry_ghash_setup_armv7_neon,%function; +_gcry_ghash_setup_armv7_neon: + /* input: + * r0: gcm_key + */ + + vpush {q4-q7} + + GET_DATA_POINTER(r2, .Lrconst64, r3) + + vld1.64 {rrconst_h}, [r2] + +#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \ + /* H <<< 1 */ \ + vshr.s64 ma, ib, #63; \ + vshr.u64 oa, ib, #63; \ + vshr.u64 ob, ia, #63; \ + vand ma, const_d; \ + vshl.u64 ib, ib, #1; \ + vshl.u64 ia, ia, #1; \ + vorr ob, ib; \ + vorr oa, ia; \ + veor ob, ma; \ + vst1.64 {oa, ob}, [r_out] + + vld1.64 {rhash}, [r0] + vrev64.8 rhash, rhash /* byte-swap */ + vext.8 rhash, rhash, rhash, #8 + + vmov rbuf1, rhash + GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */ + + CLEAR_REG(rh1) + CLEAR_REG(rhash) + CLEAR_REG(rbuf1) + CLEAR_REG(rrconst) + vpop {q4-q7} + bx lr +.size _gcry_ghash_setup_armv7_neon,.-_gcry_ghash_setup_armv7_neon; + +#endif diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c index 4fdd6120..cbda87be 100644 --- a/cipher/cipher-gcm.c +++ b/cipher/cipher-gcm.c @@ -1,1019 +1,1046 @@ /* cipher-gcm.c - Generic Galois Counter Mode implementation * Copyright (C) 2013 Dmitry Eremin-Solenikov * Copyright (C) 2013, 2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "./cipher-internal.h" #ifdef GCM_USE_INTEL_PCLMUL extern void _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c); extern unsigned int _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, size_t nblocks); #endif #ifdef GCM_USE_ARM_PMULL extern void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); extern unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, const byte *buf, size_t nblocks, void *gcm_table); static void ghash_setup_armv8_ce_pmull (gcry_cipher_hd_t c) { _gcry_ghash_setup_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.gcm_table); } static unsigned int ghash_armv8_ce_pmull (gcry_cipher_hd_t c, byte *result, const byte *buf, size_t nblocks) { return _gcry_ghash_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, result, buf, nblocks, c->u_mode.gcm.gcm_table); } +#endif /* GCM_USE_ARM_PMULL */ -#endif +#ifdef GCM_USE_ARM_NEON +extern void _gcry_ghash_setup_armv7_neon (void *gcm_key); + +extern unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result, + const byte *buf, size_t nblocks); + +static void +ghash_setup_armv7_neon (gcry_cipher_hd_t c) +{ + _gcry_ghash_setup_armv7_neon(c->u_mode.gcm.u_ghash_key.key); +} + +static unsigned int +ghash_armv7_neon (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + return _gcry_ghash_armv7_neon(c->u_mode.gcm.u_ghash_key.key, result, buf, + nblocks); +} +#endif /* GCM_USE_ARM_NEON */ #ifdef GCM_USE_TABLES static const u16 gcmR[256] = { 0x0000, 0x01c2, 0x0384, 0x0246, 0x0708, 0x06ca, 0x048c, 0x054e, 0x0e10, 0x0fd2, 0x0d94, 0x0c56, 0x0918, 0x08da, 0x0a9c, 0x0b5e, 0x1c20, 0x1de2, 0x1fa4, 0x1e66, 0x1b28, 0x1aea, 0x18ac, 0x196e, 0x1230, 0x13f2, 0x11b4, 0x1076, 0x1538, 0x14fa, 0x16bc, 0x177e, 0x3840, 0x3982, 0x3bc4, 0x3a06, 0x3f48, 0x3e8a, 0x3ccc, 0x3d0e, 0x3650, 0x3792, 0x35d4, 0x3416, 0x3158, 0x309a, 0x32dc, 0x331e, 0x2460, 0x25a2, 0x27e4, 0x2626, 0x2368, 0x22aa, 0x20ec, 0x212e, 0x2a70, 0x2bb2, 0x29f4, 0x2836, 0x2d78, 0x2cba, 0x2efc, 0x2f3e, 0x7080, 0x7142, 0x7304, 0x72c6, 0x7788, 0x764a, 0x740c, 0x75ce, 0x7e90, 0x7f52, 0x7d14, 0x7cd6, 0x7998, 0x785a, 0x7a1c, 0x7bde, 0x6ca0, 0x6d62, 0x6f24, 0x6ee6, 0x6ba8, 0x6a6a, 0x682c, 0x69ee, 0x62b0, 0x6372, 0x6134, 0x60f6, 0x65b8, 0x647a, 0x663c, 0x67fe, 0x48c0, 0x4902, 0x4b44, 0x4a86, 0x4fc8, 0x4e0a, 0x4c4c, 0x4d8e, 0x46d0, 0x4712, 0x4554, 0x4496, 0x41d8, 0x401a, 0x425c, 0x439e, 0x54e0, 0x5522, 0x5764, 0x56a6, 0x53e8, 0x522a, 0x506c, 0x51ae, 0x5af0, 0x5b32, 0x5974, 0x58b6, 0x5df8, 0x5c3a, 0x5e7c, 0x5fbe, 0xe100, 0xe0c2, 0xe284, 0xe346, 0xe608, 0xe7ca, 0xe58c, 0xe44e, 0xef10, 0xeed2, 0xec94, 0xed56, 0xe818, 0xe9da, 0xeb9c, 0xea5e, 0xfd20, 0xfce2, 0xfea4, 0xff66, 0xfa28, 0xfbea, 0xf9ac, 0xf86e, 0xf330, 0xf2f2, 0xf0b4, 0xf176, 0xf438, 0xf5fa, 0xf7bc, 0xf67e, 0xd940, 0xd882, 0xdac4, 0xdb06, 0xde48, 0xdf8a, 0xddcc, 0xdc0e, 0xd750, 0xd692, 0xd4d4, 0xd516, 0xd058, 0xd19a, 0xd3dc, 0xd21e, 0xc560, 0xc4a2, 0xc6e4, 0xc726, 0xc268, 0xc3aa, 0xc1ec, 0xc02e, 0xcb70, 0xcab2, 0xc8f4, 0xc936, 0xcc78, 0xcdba, 0xcffc, 0xce3e, 0x9180, 0x9042, 0x9204, 0x93c6, 0x9688, 0x974a, 0x950c, 0x94ce, 0x9f90, 0x9e52, 0x9c14, 0x9dd6, 0x9898, 0x995a, 0x9b1c, 0x9ade, 0x8da0, 0x8c62, 0x8e24, 0x8fe6, 0x8aa8, 0x8b6a, 0x892c, 0x88ee, 0x83b0, 0x8272, 0x8034, 0x81f6, 0x84b8, 0x857a, 0x873c, 0x86fe, 0xa9c0, 0xa802, 0xaa44, 0xab86, 0xaec8, 0xaf0a, 0xad4c, 0xac8e, 0xa7d0, 0xa612, 0xa454, 0xa596, 0xa0d8, 0xa11a, 0xa35c, 0xa29e, 0xb5e0, 0xb422, 0xb664, 0xb7a6, 0xb2e8, 0xb32a, 0xb16c, 0xb0ae, 0xbbf0, 0xba32, 0xb874, 0xb9b6, 0xbcf8, 0xbd3a, 0xbf7c, 0xbebe, }; #ifdef GCM_TABLES_USE_U64 static void bshift (u64 * b0, u64 * b1) { u64 t[2], mask; t[0] = *b0; t[1] = *b1; mask = t[1] & 1 ? 0xe1 : 0; mask <<= 56; *b1 = (t[1] >> 1) ^ (t[0] << 63); *b0 = (t[0] >> 1) ^ mask; } static void do_fillM (unsigned char *h, u64 *M) { int i, j; M[0 + 0] = 0; M[0 + 16] = 0; M[8 + 0] = buf_get_be64 (h + 0); M[8 + 16] = buf_get_be64 (h + 8); for (i = 4; i > 0; i /= 2) { M[i + 0] = M[2 * i + 0]; M[i + 16] = M[2 * i + 16]; bshift (&M[i], &M[i + 16]); } for (i = 2; i < 16; i *= 2) for (j = 1; j < i; j++) { M[(i + j) + 0] = M[i + 0] ^ M[j + 0]; M[(i + j) + 16] = M[i + 16] ^ M[j + 16]; } } static inline unsigned int do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM) { u64 V[2]; u64 tmp[2]; const u64 *M; u64 T; u32 A; int i; cipher_block_xor (V, result, buf, 16); V[0] = be_bswap64 (V[0]); V[1] = be_bswap64 (V[1]); /* First round can be manually tweaked based on fact that 'tmp' is zero. */ i = 15; M = &gcmM[(V[1] & 0xf)]; V[1] >>= 4; tmp[0] = (M[0] >> 4) ^ ((u64) gcmR[(M[16] & 0xf) << 4] << 48); tmp[1] = (M[16] >> 4) ^ (M[0] << 60); tmp[0] ^= gcmM[(V[1] & 0xf) + 0]; tmp[1] ^= gcmM[(V[1] & 0xf) + 16]; V[1] >>= 4; --i; while (1) { M = &gcmM[(V[1] & 0xf)]; V[1] >>= 4; A = tmp[1] & 0xff; T = tmp[0]; tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[1] & 0xf) + 0]; tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[1] & 0xf) + 16]; tmp[0] ^= (M[0] >> 4) ^ ((u64) gcmR[(M[16] & 0xf) << 4] << 48); tmp[1] ^= (M[16] >> 4) ^ (M[0] << 60); if (i == 0) break; else if (i == 8) V[1] = V[0]; else V[1] >>= 4; --i; } buf_put_be64 (result + 0, tmp[0]); buf_put_be64 (result + 8, tmp[1]); return (sizeof(V) + sizeof(T) + sizeof(tmp) + sizeof(int)*2 + sizeof(void*)*5); } #else /*!GCM_TABLES_USE_U64*/ static void bshift (u32 * M, int i) { u32 t[4], mask; t[0] = M[i * 4 + 0]; t[1] = M[i * 4 + 1]; t[2] = M[i * 4 + 2]; t[3] = M[i * 4 + 3]; mask = t[3] & 1 ? 0xe1 : 0; M[i * 4 + 3] = (t[3] >> 1) ^ (t[2] << 31); M[i * 4 + 2] = (t[2] >> 1) ^ (t[1] << 31); M[i * 4 + 1] = (t[1] >> 1) ^ (t[0] << 31); M[i * 4 + 0] = (t[0] >> 1) ^ (mask << 24); } static void do_fillM (unsigned char *h, u32 *M) { int i, j; M[0 * 4 + 0] = 0; M[0 * 4 + 1] = 0; M[0 * 4 + 2] = 0; M[0 * 4 + 3] = 0; M[8 * 4 + 0] = buf_get_be32 (h + 0); M[8 * 4 + 1] = buf_get_be32 (h + 4); M[8 * 4 + 2] = buf_get_be32 (h + 8); M[8 * 4 + 3] = buf_get_be32 (h + 12); for (i = 4; i > 0; i /= 2) { M[i * 4 + 0] = M[2 * i * 4 + 0]; M[i * 4 + 1] = M[2 * i * 4 + 1]; M[i * 4 + 2] = M[2 * i * 4 + 2]; M[i * 4 + 3] = M[2 * i * 4 + 3]; bshift (M, i); } for (i = 2; i < 16; i *= 2) for (j = 1; j < i; j++) { M[(i + j) * 4 + 0] = M[i * 4 + 0] ^ M[j * 4 + 0]; M[(i + j) * 4 + 1] = M[i * 4 + 1] ^ M[j * 4 + 1]; M[(i + j) * 4 + 2] = M[i * 4 + 2] ^ M[j * 4 + 2]; M[(i + j) * 4 + 3] = M[i * 4 + 3] ^ M[j * 4 + 3]; } } static inline unsigned int do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM) { byte V[16]; u32 tmp[4]; u32 v; const u32 *M, *m; u32 T[3]; int i; cipher_block_xor (V, result, buf, 16); /* V is big-endian */ /* First round can be manually tweaked based on fact that 'tmp' is zero. */ i = 15; v = V[i]; M = &gcmM[(v & 0xf) * 4]; v = (v & 0xf0) >> 4; m = &gcmM[v * 4]; v = V[--i]; tmp[0] = (M[0] >> 4) ^ ((u64) gcmR[(M[3] << 4) & 0xf0] << 16) ^ m[0]; tmp[1] = (M[1] >> 4) ^ (M[0] << 28) ^ m[1]; tmp[2] = (M[2] >> 4) ^ (M[1] << 28) ^ m[2]; tmp[3] = (M[3] >> 4) ^ (M[2] << 28) ^ m[3]; while (1) { M = &gcmM[(v & 0xf) * 4]; v = (v & 0xf0) >> 4; m = &gcmM[v * 4]; T[0] = tmp[0]; T[1] = tmp[1]; T[2] = tmp[2]; tmp[0] = (T[0] >> 8) ^ ((u32) gcmR[tmp[3] & 0xff] << 16) ^ m[0]; tmp[1] = (T[0] << 24) ^ (tmp[1] >> 8) ^ m[1]; tmp[2] = (T[1] << 24) ^ (tmp[2] >> 8) ^ m[2]; tmp[3] = (T[2] << 24) ^ (tmp[3] >> 8) ^ m[3]; tmp[0] ^= (M[0] >> 4) ^ ((u64) gcmR[(M[3] << 4) & 0xf0] << 16); tmp[1] ^= (M[1] >> 4) ^ (M[0] << 28); tmp[2] ^= (M[2] >> 4) ^ (M[1] << 28); tmp[3] ^= (M[3] >> 4) ^ (M[2] << 28); if (i == 0) break; v = V[--i]; } buf_put_be32 (result + 0, tmp[0]); buf_put_be32 (result + 4, tmp[1]); buf_put_be32 (result + 8, tmp[2]); buf_put_be32 (result + 12, tmp[3]); return (sizeof(V) + sizeof(T) + sizeof(tmp) + sizeof(int)*2 + sizeof(void*)*6); } #endif /*!GCM_TABLES_USE_U64*/ #define fillM(c) \ do_fillM (c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.gcm_table) #define GHASH(c, result, buf) do_ghash (result, buf, c->u_mode.gcm.gcm_table) #else static unsigned long bshift (unsigned long *b) { unsigned long c; int i; c = b[3] & 1; for (i = 3; i > 0; i--) { b[i] = (b[i] >> 1) | (b[i - 1] << 31); } b[i] >>= 1; return c; } static unsigned int do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) { unsigned long V[4]; int i, j; byte *p; #ifdef WORDS_BIGENDIAN p = result; #else unsigned long T[4]; cipher_block_xor (V, result, buf, 16); for (i = 0; i < 4; i++) { V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8; V[i] = (V[i] & 0x0000ffff) << 16 | (V[i] & 0xffff0000) >> 16; } p = (byte *) T; #endif memset (p, 0, 16); for (i = 0; i < 16; i++) { for (j = 0x80; j; j >>= 1) { if (hsub[i] & j) cipher_block_xor (p, p, V, 16); if (bshift (V)) V[0] ^= 0xe1000000; } } #ifndef WORDS_BIGENDIAN for (i = 0, p = (byte *) T; i < 16; i += 4, p += 4) { result[i + 0] = p[3]; result[i + 1] = p[2]; result[i + 2] = p[1]; result[i + 3] = p[0]; } #endif return (sizeof(V) + sizeof(T) + sizeof(int)*2 + sizeof(void*)*5); } #define fillM(c) do { } while (0) #define GHASH(c, result, buf) do_ghash (c->u_mode.gcm.u_ghash_key.key, result, buf) #endif /* !GCM_USE_TABLES */ static unsigned int ghash_internal (gcry_cipher_hd_t c, byte *result, const byte *buf, size_t nblocks) { const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; unsigned int burn = 0; while (nblocks) { burn = GHASH (c, result, buf); buf += blocksize; nblocks--; } return burn + (burn ? 5*sizeof(void*) : 0); } static void setupM (gcry_cipher_hd_t c) { #if defined(GCM_USE_INTEL_PCLMUL) || defined(GCM_USE_ARM_PMULL) unsigned int features = _gcry_get_hw_features (); #endif if (0) ; #ifdef GCM_USE_INTEL_PCLMUL else if (features & HWF_INTEL_PCLMUL) { c->u_mode.gcm.ghash_fn = _gcry_ghash_intel_pclmul; _gcry_ghash_setup_intel_pclmul (c); } #endif #ifdef GCM_USE_ARM_PMULL else if (features & HWF_ARM_PMULL) { c->u_mode.gcm.ghash_fn = ghash_armv8_ce_pmull; ghash_setup_armv8_ce_pmull (c); } +#endif +#ifdef GCM_USE_ARM_NEON + else if (features & HWF_ARM_NEON) + { + c->u_mode.gcm.ghash_fn = ghash_armv7_neon; + ghash_setup_armv7_neon (c); + } #endif else { c->u_mode.gcm.ghash_fn = ghash_internal; fillM (c); } } static inline void gcm_bytecounter_add (u32 ctr[2], size_t add) { if (sizeof(add) > sizeof(u32)) { u32 high_add = ((add >> 31) >> 1) & 0xffffffff; ctr[1] += high_add; } ctr[0] += add; if (ctr[0] >= add) return; ++ctr[1]; } static inline u32 gcm_add32_be128 (byte *ctr, unsigned int add) { /* 'ctr' must be aligned to four bytes. */ const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; u32 *pval = (u32 *)(void *)(ctr + blocksize - sizeof(u32)); u32 val; val = be_bswap32(*pval) + add; *pval = be_bswap32(val); return val; /* return result as host-endian value */ } static inline int gcm_check_datalen (u32 ctr[2]) { /* len(plaintext) <= 2^39-256 bits == 2^36-32 bytes == 2^32-2 blocks */ if (ctr[1] > 0xfU) return 0; if (ctr[1] < 0xfU) return 1; if (ctr[0] <= 0xffffffe0U) return 1; return 0; } static inline int gcm_check_aadlen_or_ivlen (u32 ctr[2]) { /* len(aad/iv) <= 2^64-1 bits ~= 2^61-1 bytes */ if (ctr[1] > 0x1fffffffU) return 0; if (ctr[1] < 0x1fffffffU) return 1; if (ctr[0] <= 0xffffffffU) return 1; return 0; } static void do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf, size_t buflen, int do_padding) { unsigned int blocksize = GCRY_GCM_BLOCK_LEN; unsigned int unused = c->u_mode.gcm.mac_unused; ghash_fn_t ghash_fn = c->u_mode.gcm.ghash_fn; size_t nblocks, n; unsigned int burn = 0; if (buflen == 0 && (unused == 0 || !do_padding)) return; do { if (buflen > 0 && (buflen + unused < blocksize || unused > 0)) { n = blocksize - unused; n = n < buflen ? n : buflen; buf_cpy (&c->u_mode.gcm.macbuf[unused], buf, n); unused += n; buf += n; buflen -= n; } if (!buflen) { if (!do_padding) break; n = blocksize - unused; if (n > 0) { memset (&c->u_mode.gcm.macbuf[unused], 0, n); unused = blocksize; } } if (unused > 0) { gcry_assert (unused == blocksize); /* Process one block from macbuf. */ burn = ghash_fn (c, hash, c->u_mode.gcm.macbuf, 1); unused = 0; } nblocks = buflen / blocksize; if (nblocks) { burn = ghash_fn (c, hash, buf, nblocks); buf += blocksize * nblocks; buflen -= blocksize * nblocks; } } while (buflen > 0); c->u_mode.gcm.mac_unused = unused; if (burn) _gcry_burn_stack (burn); } static gcry_err_code_t gcm_ctr_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen) { gcry_err_code_t err = 0; while (inbuflen) { u32 nblocks_to_overflow; u32 num_ctr_increments; u32 curr_ctr_low; size_t currlen = inbuflen; byte ctr_copy[GCRY_GCM_BLOCK_LEN]; int fix_ctr = 0; /* GCM CTR increments only least significant 32-bits, without carry * to upper 96-bits of counter. Using generic CTR implementation * directly would carry 32-bit overflow to upper 96-bit. Detect * if input length is long enough to cause overflow, and limit * input length so that CTR overflow happen but updated CTR value is * not used to encrypt further input. After overflow, upper 96 bits * of CTR are restored to cancel out modification done by generic CTR * encryption. */ if (inbuflen > c->unused) { curr_ctr_low = gcm_add32_be128 (c->u_ctr.ctr, 0); /* Number of CTR increments this inbuflen would cause. */ num_ctr_increments = (inbuflen - c->unused) / GCRY_GCM_BLOCK_LEN + !!((inbuflen - c->unused) % GCRY_GCM_BLOCK_LEN); if ((u32)(num_ctr_increments + curr_ctr_low) < curr_ctr_low) { nblocks_to_overflow = 0xffffffffU - curr_ctr_low + 1; currlen = nblocks_to_overflow * GCRY_GCM_BLOCK_LEN + c->unused; if (currlen > inbuflen) { currlen = inbuflen; } fix_ctr = 1; cipher_block_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN); } } err = _gcry_cipher_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen); if (err != 0) return err; if (fix_ctr) { /* Lower 32-bits of CTR should now be zero. */ gcry_assert(gcm_add32_be128 (c->u_ctr.ctr, 0) == 0); /* Restore upper part of CTR. */ buf_cpy(c->u_ctr.ctr, ctr_copy, GCRY_GCM_BLOCK_LEN - sizeof(u32)); wipememory(ctr_copy, sizeof(ctr_copy)); } inbuflen -= currlen; inbuf += currlen; outbuflen -= currlen; outbuf += currlen; } return err; } gcry_err_code_t _gcry_cipher_gcm_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen) { static const unsigned char zerobuf[MAX_BLOCKSIZE]; gcry_err_code_t err; if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) return GPG_ERR_CIPHER_ALGO; if (outbuflen < inbuflen) return GPG_ERR_BUFFER_TOO_SHORT; if (c->u_mode.gcm.datalen_over_limits) return GPG_ERR_INV_LENGTH; if (c->marks.tag || c->u_mode.gcm.ghash_data_finalized || !c->u_mode.gcm.ghash_fn) return GPG_ERR_INV_STATE; if (!c->marks.iv) _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode) return GPG_ERR_INV_STATE; if (!c->u_mode.gcm.ghash_aad_finalized) { /* Start of encryption marks end of AAD stream. */ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); c->u_mode.gcm.ghash_aad_finalized = 1; } gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen); if (!gcm_check_datalen(c->u_mode.gcm.datalen)) { c->u_mode.gcm.datalen_over_limits = 1; return GPG_ERR_INV_LENGTH; } while (inbuflen) { size_t currlen = inbuflen; /* Since checksumming is done after encryption, process input in 24KiB * chunks to keep data loaded in L1 cache for checksumming. */ if (currlen > 24 * 1024) currlen = 24 * 1024; err = gcm_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen); if (err != 0) return err; do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, outbuf, currlen, 0); outbuf += currlen; inbuf += currlen; outbuflen -= currlen; inbuflen -= currlen; } return 0; } gcry_err_code_t _gcry_cipher_gcm_decrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen) { static const unsigned char zerobuf[MAX_BLOCKSIZE]; gcry_err_code_t err; if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) return GPG_ERR_CIPHER_ALGO; if (outbuflen < inbuflen) return GPG_ERR_BUFFER_TOO_SHORT; if (c->u_mode.gcm.datalen_over_limits) return GPG_ERR_INV_LENGTH; if (c->marks.tag || c->u_mode.gcm.ghash_data_finalized || !c->u_mode.gcm.ghash_fn) return GPG_ERR_INV_STATE; if (!c->marks.iv) _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); if (!c->u_mode.gcm.ghash_aad_finalized) { /* Start of decryption marks end of AAD stream. */ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); c->u_mode.gcm.ghash_aad_finalized = 1; } gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen); if (!gcm_check_datalen(c->u_mode.gcm.datalen)) { c->u_mode.gcm.datalen_over_limits = 1; return GPG_ERR_INV_LENGTH; } while (inbuflen) { size_t currlen = inbuflen; /* Since checksumming is done before decryption, process input in * 24KiB chunks to keep data loaded in L1 cache for decryption. */ if (currlen > 24 * 1024) currlen = 24 * 1024; do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, inbuf, currlen, 0); err = gcm_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen); if (err) return err; outbuf += currlen; inbuf += currlen; outbuflen -= currlen; inbuflen -= currlen; } return 0; } gcry_err_code_t _gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c, const byte * aadbuf, size_t aadbuflen) { static const unsigned char zerobuf[MAX_BLOCKSIZE]; if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) return GPG_ERR_CIPHER_ALGO; if (c->u_mode.gcm.datalen_over_limits) return GPG_ERR_INV_LENGTH; if (c->marks.tag || c->u_mode.gcm.ghash_aad_finalized || c->u_mode.gcm.ghash_data_finalized || !c->u_mode.gcm.ghash_fn) return GPG_ERR_INV_STATE; if (!c->marks.iv) _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); gcm_bytecounter_add(c->u_mode.gcm.aadlen, aadbuflen); if (!gcm_check_aadlen_or_ivlen(c->u_mode.gcm.aadlen)) { c->u_mode.gcm.datalen_over_limits = 1; return GPG_ERR_INV_LENGTH; } do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, aadbuf, aadbuflen, 0); return 0; } void _gcry_cipher_gcm_setkey (gcry_cipher_hd_t c) { memset (c->u_mode.gcm.u_ghash_key.key, 0, GCRY_GCM_BLOCK_LEN); c->spec->encrypt (&c->context.c, c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.u_ghash_key.key); setupM (c); } static gcry_err_code_t _gcry_cipher_gcm_initiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) { memset (c->u_mode.gcm.aadlen, 0, sizeof(c->u_mode.gcm.aadlen)); memset (c->u_mode.gcm.datalen, 0, sizeof(c->u_mode.gcm.datalen)); memset (c->u_mode.gcm.u_tag.tag, 0, GCRY_GCM_BLOCK_LEN); c->u_mode.gcm.datalen_over_limits = 0; c->u_mode.gcm.ghash_data_finalized = 0; c->u_mode.gcm.ghash_aad_finalized = 0; if (ivlen == 0) return GPG_ERR_INV_LENGTH; if (ivlen != GCRY_GCM_BLOCK_LEN - 4) { u32 iv_bytes[2] = {0, 0}; u32 bitlengths[2][2]; if (!c->u_mode.gcm.ghash_fn) return GPG_ERR_INV_STATE; memset(c->u_ctr.ctr, 0, GCRY_GCM_BLOCK_LEN); gcm_bytecounter_add(iv_bytes, ivlen); if (!gcm_check_aadlen_or_ivlen(iv_bytes)) { c->u_mode.gcm.datalen_over_limits = 1; return GPG_ERR_INV_LENGTH; } do_ghash_buf(c, c->u_ctr.ctr, iv, ivlen, 1); /* iv length, 64-bit */ bitlengths[1][1] = be_bswap32(iv_bytes[0] << 3); bitlengths[1][0] = be_bswap32((iv_bytes[0] >> 29) | (iv_bytes[1] << 3)); /* zeros, 64-bit */ bitlengths[0][1] = 0; bitlengths[0][0] = 0; do_ghash_buf(c, c->u_ctr.ctr, (byte*)bitlengths, GCRY_GCM_BLOCK_LEN, 1); wipememory (iv_bytes, sizeof iv_bytes); wipememory (bitlengths, sizeof bitlengths); } else { /* 96-bit IV is handled differently. */ memcpy (c->u_ctr.ctr, iv, ivlen); c->u_ctr.ctr[12] = c->u_ctr.ctr[13] = c->u_ctr.ctr[14] = 0; c->u_ctr.ctr[15] = 1; } c->spec->encrypt (&c->context.c, c->u_mode.gcm.tagiv, c->u_ctr.ctr); gcm_add32_be128 (c->u_ctr.ctr, 1); c->unused = 0; c->marks.iv = 1; c->marks.tag = 0; return 0; } gcry_err_code_t _gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) { c->marks.iv = 0; c->marks.tag = 0; c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0; if (fips_mode ()) { /* Direct invocation of GCM setiv in FIPS mode disables encryption. */ c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 1; } return _gcry_cipher_gcm_initiv (c, iv, ivlen); } #if 0 && TODO void _gcry_cipher_gcm_geniv (gcry_cipher_hd_t c, byte *ivout, size_t ivoutlen, const byte *nonce, size_t noncelen) { /* nonce: user provided part (might be null) */ /* noncelen: check if proper length (if nonce not null) */ /* ivout: iv used to initialize gcm, output to user */ /* ivoutlen: check correct size */ byte iv[IVLEN]; if (!ivout) return GPG_ERR_INV_ARG; if (ivoutlen != IVLEN) return GPG_ERR_INV_LENGTH; if (nonce != NULL && !is_nonce_ok_len(noncelen)) return GPG_ERR_INV_ARG; gcm_generate_iv(iv, nonce, noncelen); c->marks.iv = 0; c->marks.tag = 0; c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0; _gcry_cipher_gcm_initiv (c, iv, IVLEN); buf_cpy(ivout, iv, IVLEN); wipememory(iv, sizeof(iv)); } #endif static int is_tag_length_valid(size_t taglen) { switch (taglen) { /* Allowed tag lengths from NIST SP 800-38D. */ case 128 / 8: /* GCRY_GCM_BLOCK_LEN */ case 120 / 8: case 112 / 8: case 104 / 8: case 96 / 8: case 64 / 8: case 32 / 8: return 1; default: return 0; } } static gcry_err_code_t _gcry_cipher_gcm_tag (gcry_cipher_hd_t c, byte * outbuf, size_t outbuflen, int check) { if (!(is_tag_length_valid (outbuflen) || outbuflen >= GCRY_GCM_BLOCK_LEN)) return GPG_ERR_INV_LENGTH; if (c->u_mode.gcm.datalen_over_limits) return GPG_ERR_INV_LENGTH; if (!c->marks.tag) { u32 bitlengths[2][2]; if (!c->u_mode.gcm.ghash_fn) return GPG_ERR_INV_STATE; /* aad length */ bitlengths[0][1] = be_bswap32(c->u_mode.gcm.aadlen[0] << 3); bitlengths[0][0] = be_bswap32((c->u_mode.gcm.aadlen[0] >> 29) | (c->u_mode.gcm.aadlen[1] << 3)); /* data length */ bitlengths[1][1] = be_bswap32(c->u_mode.gcm.datalen[0] << 3); bitlengths[1][0] = be_bswap32((c->u_mode.gcm.datalen[0] >> 29) | (c->u_mode.gcm.datalen[1] << 3)); /* Finalize data-stream. */ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); c->u_mode.gcm.ghash_aad_finalized = 1; c->u_mode.gcm.ghash_data_finalized = 1; /* Add bitlengths to tag. */ do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths, GCRY_GCM_BLOCK_LEN, 1); cipher_block_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv, c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN); c->marks.tag = 1; wipememory (bitlengths, sizeof (bitlengths)); wipememory (c->u_mode.gcm.macbuf, GCRY_GCM_BLOCK_LEN); wipememory (c->u_mode.gcm.tagiv, GCRY_GCM_BLOCK_LEN); wipememory (c->u_mode.gcm.aadlen, sizeof (c->u_mode.gcm.aadlen)); wipememory (c->u_mode.gcm.datalen, sizeof (c->u_mode.gcm.datalen)); } if (!check) { if (outbuflen > GCRY_GCM_BLOCK_LEN) outbuflen = GCRY_GCM_BLOCK_LEN; /* NB: We already checked that OUTBUF is large enough to hold * the result or has valid truncated length. */ memcpy (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen); } else { /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF * and thus we need to compare its length first. */ if (!is_tag_length_valid (outbuflen) || !buf_eq_const (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen)) return GPG_ERR_CHECKSUM; } return 0; } gcry_err_code_t _gcry_cipher_gcm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen) { /* Outputting authentication tag is part of encryption. */ if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode) return GPG_ERR_INV_STATE; return _gcry_cipher_gcm_tag (c, outtag, taglen, 0); } gcry_err_code_t _gcry_cipher_gcm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen) { return _gcry_cipher_gcm_tag (c, (unsigned char *) intag, taglen, 1); } diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index 5ece774e..2283bf31 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -1,763 +1,772 @@ /* cipher-internal.h - Internal defs for cipher.c * Copyright (C) 2011 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #ifndef G10_CIPHER_INTERNAL_H #define G10_CIPHER_INTERNAL_H #include "./poly1305-internal.h" /* The maximum supported size of a block in bytes. */ #define MAX_BLOCKSIZE 16 /* The length for an OCB block. Although OCB supports any block length it does not make sense to use a 64 bit blocklen (and cipher) because this reduces the security margin to an unacceptable state. Thus we require a cipher with 128 bit blocklength. */ #define OCB_BLOCK_LEN (128/8) /* The size of the pre-computed L table for OCB. This takes the same size as the table used for GCM and thus we don't save anything by not using such a table. */ #define OCB_L_TABLE_SIZE 16 /* Check the above constants. */ #if OCB_BLOCK_LEN > MAX_BLOCKSIZE # error OCB_BLOCKLEN > MAX_BLOCKSIZE #endif /* Magic values for the context structure. */ #define CTX_MAGIC_NORMAL 0x24091964 #define CTX_MAGIC_SECURE 0x46919042 /* Try to use 16 byte aligned cipher context for better performance. We use the aligned attribute, thus it is only possible to implement this with gcc. */ #undef NEED_16BYTE_ALIGNED_CONTEXT #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED # define NEED_16BYTE_ALIGNED_CONTEXT 1 #endif /* Undef this symbol to trade GCM speed for 256 bytes of memory per context */ #define GCM_USE_TABLES 1 /* GCM_USE_INTEL_PCLMUL indicates whether to compile GCM with Intel PCLMUL code. */ #undef GCM_USE_INTEL_PCLMUL #if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES) # if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) # if __GNUC__ >= 4 # define GCM_USE_INTEL_PCLMUL 1 # endif # endif #endif /* GCM_USE_INTEL_PCLMUL */ /* GCM_USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */ #undef GCM_USE_ARM_PMULL #if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(GCM_USE_TABLES) # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) # define GCM_USE_ARM_PMULL 1 # elif defined(__AARCH64EL__) && \ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) # define GCM_USE_ARM_PMULL 1 # endif #endif /* GCM_USE_ARM_PMULL */ +/* GCM_USE_ARM_NEON indicates whether to compile GCM with ARMv7 NEON code. */ +#undef GCM_USE_ARM_NEON +#if defined(GCM_USE_TABLES) +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) +# define GCM_USE_ARM_NEON 1 +#endif +#endif /* GCM_USE_ARM_NEON */ typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result, const byte *buf, size_t nblocks); /* A VIA processor with the Padlock engine as well as the Intel AES_NI instructions require an alignment of most data on a 16 byte boundary. Because we trick out the compiler while allocating the context, the align attribute as used in rijndael.c does not work on its own. Thus we need to make sure that the entire context structure is a aligned on that boundary. We achieve this by defining a new type and use that instead of our usual alignment type. */ typedef union { PROPERLY_ALIGNED_TYPE foo; #ifdef NEED_16BYTE_ALIGNED_CONTEXT char bar[16] __attribute__ ((aligned (16))); #endif char c[1]; } cipher_context_alignment_t; /* Storage structure for CMAC, for CMAC and EAX modes. */ typedef struct { /* The initialization vector. Also contains tag after finalization. */ union { cipher_context_alignment_t iv_align; unsigned char iv[MAX_BLOCKSIZE]; } u_iv; /* Subkeys for tag creation, not cleared by gcry_cipher_reset. */ unsigned char subkeys[2][MAX_BLOCKSIZE]; /* Space to save partial input lengths for MAC. */ unsigned char macbuf[MAX_BLOCKSIZE]; int mac_unused; /* Number of unprocessed bytes in MACBUF. */ unsigned int tag:1; /* Set to 1 if tag has been finalized. */ } gcry_cmac_context_t; /* The handle structure. */ struct gcry_cipher_handle { int magic; size_t actual_handle_size; /* Allocated size of this handle. */ size_t handle_offset; /* Offset to the malloced block. */ gcry_cipher_spec_t *spec; /* The algorithm id. This is a hack required because the module interface does not easily allow to retrieve this value. */ int algo; /* A structure with function pointers for mode operations. */ struct { gcry_err_code_t (*encrypt)(gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t (*decrypt)(gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t (*setiv)(gcry_cipher_hd_t c, const unsigned char *iv, size_t ivlen); gcry_err_code_t (*authenticate)(gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); gcry_err_code_t (*get_tag)(gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t (*check_tag)(gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); } mode_ops; /* A structure with function pointers for bulk operations. Due to limitations of the module system (we don't want to change the API) we need to keep these function pointers here. The cipher open function initializes them and the actual encryption routines use them if they are not NULL. */ struct { void (*cfb_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void (*cfb_dec)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void (*cbc_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac); void (*cbc_dec)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); void (*ctr_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); size_t (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); void (*xts_crypt)(void *context, unsigned char *tweak, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt); } bulk; int mode; unsigned int flags; struct { unsigned int key:1; /* Set to 1 if a key has been set. */ unsigned int iv:1; /* Set to 1 if a IV has been set. */ unsigned int tag:1; /* Set to 1 if a tag is finalized. */ unsigned int finalize:1; /* Next encrypt/decrypt has the final data. */ } marks; /* The initialization vector. For best performance we make sure that it is properly aligned. In particular some implementations of bulk operations expect an 16 byte aligned IV. IV is also used to store CBC-MAC in CCM mode; counter IV is stored in U_CTR. For OCB mode it is used for the offset value. */ union { cipher_context_alignment_t iv_align; unsigned char iv[MAX_BLOCKSIZE]; } u_iv; /* The counter for CTR mode. This field is also used by AESWRAP and thus we can't use the U_IV union. For OCB mode it is used for the checksum. */ union { cipher_context_alignment_t iv_align; unsigned char ctr[MAX_BLOCKSIZE]; } u_ctr; /* Space to save an IV or CTR for chaining operations. */ unsigned char lastiv[MAX_BLOCKSIZE]; int unused; /* Number of unused bytes in LASTIV. */ union { /* Mode specific storage for CCM mode. */ struct { u64 encryptlen; u64 aadlen; unsigned int authlen; /* Space to save partial input lengths for MAC. */ unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; int mac_unused; /* Number of unprocessed bytes in MACBUF. */ unsigned char s0[GCRY_CCM_BLOCK_LEN]; unsigned int nonce:1; /* Set to 1 if nonce has been set. */ unsigned int lengths:1; /* Set to 1 if CCM length parameters has been processed. */ } ccm; /* Mode specific storage for Poly1305 mode. */ struct { /* byte counter for AAD. */ u32 aadcount[2]; /* byte counter for data. */ u32 datacount[2]; unsigned int aad_finalized:1; unsigned int bytecount_over_limits:1; poly1305_context_t ctx; } poly1305; /* Mode specific storage for CMAC mode. */ gcry_cmac_context_t cmac; /* Mode specific storage for EAX mode. */ struct { /* CMAC for header (AAD). */ gcry_cmac_context_t cmac_header; /* CMAC for ciphertext. */ gcry_cmac_context_t cmac_ciphertext; } eax; /* Mode specific storage for GCM mode. */ struct { /* The interim tag for GCM mode. */ union { cipher_context_alignment_t iv_align; unsigned char tag[MAX_BLOCKSIZE]; } u_tag; /* Space to save partial input lengths for MAC. */ unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; int mac_unused; /* Number of unprocessed bytes in MACBUF. */ /* byte counters for GCM */ u32 aadlen[2]; u32 datalen[2]; /* encrypted tag counter */ unsigned char tagiv[MAX_BLOCKSIZE]; unsigned int ghash_data_finalized:1; unsigned int ghash_aad_finalized:1; unsigned int datalen_over_limits:1; unsigned int disallow_encryption_because_of_setiv_in_fips_mode:1; /* --- Following members are not cleared in gcry_cipher_reset --- */ /* GHASH multiplier from key. */ union { cipher_context_alignment_t iv_align; unsigned char key[MAX_BLOCKSIZE]; } u_ghash_key; /* GHASH implementation in use. */ ghash_fn_t ghash_fn; /* Pre-calculated table for GCM. */ #ifdef GCM_USE_TABLES #if (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__)) #define GCM_TABLES_USE_U64 1 u64 gcm_table[2 * 16]; #else #undef GCM_TABLES_USE_U64 u32 gcm_table[4 * 16]; #endif #endif } gcm; /* Mode specific storage for OCB mode. */ struct { /* --- Following members are not cleared in gcry_cipher_reset --- */ /* Helper variables and pre-computed table of L values. */ unsigned char L_star[OCB_BLOCK_LEN]; unsigned char L_dollar[OCB_BLOCK_LEN]; unsigned char L0L1[OCB_BLOCK_LEN]; unsigned char L[OCB_L_TABLE_SIZE][OCB_BLOCK_LEN]; /* --- Following members are cleared in gcry_cipher_reset --- */ /* The tag is valid if marks.tag has been set. */ unsigned char tag[OCB_BLOCK_LEN]; /* A buffer to hold the offset for the AAD processing. */ unsigned char aad_offset[OCB_BLOCK_LEN]; /* A buffer to hold the current sum of AAD processing. We can't use tag here because tag may already hold the preprocessed checksum of the data. */ unsigned char aad_sum[OCB_BLOCK_LEN]; /* A buffer to store AAD data not yet processed. */ unsigned char aad_leftover[OCB_BLOCK_LEN]; /* Number of data/aad blocks processed so far. */ u64 data_nblocks; u64 aad_nblocks; /* Number of valid bytes in AAD_LEFTOVER. */ unsigned char aad_nleftover; /* Length of the tag. Fixed for now but may eventually be specified using a set of gcry_cipher_flags. */ unsigned char taglen; /* Flags indicating that the final data/aad block has been processed. */ unsigned int data_finalized:1; unsigned int aad_finalized:1; } ocb; /* Mode specific storage for XTS mode. */ struct { /* Pointer to tweak cipher context, allocated after actual * cipher context. */ char *tweak_context; } xts; } u_mode; /* What follows are two contexts of the cipher in use. The first one needs to be aligned well enough for the cipher operation whereas the second one is a copy created by cipher_setkey and used by cipher_reset. That second copy has no need for proper aligment because it is only accessed by memcpy. */ cipher_context_alignment_t context; }; /*-- cipher-cbc.c --*/ gcry_err_code_t _gcry_cipher_cbc_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cbc_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cbc_cts_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cbc_cts_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-cfb.c --*/ gcry_err_code_t _gcry_cipher_cfb_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cfb_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cfb8_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_cfb8_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-ofb.c --*/ gcry_err_code_t _gcry_cipher_ofb_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-ctr.c --*/ gcry_err_code_t _gcry_cipher_ctr_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /*-- cipher-aeswrap.c --*/ gcry_err_code_t _gcry_cipher_aeswrap_encrypt /* */ (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_aeswrap_decrypt /* */ (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, const byte *inbuf, size_t inbuflen); /*-- cipher-ccm.c --*/ gcry_err_code_t _gcry_cipher_ccm_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ccm_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ccm_set_nonce /* */ (gcry_cipher_hd_t c, const unsigned char *nonce, size_t noncelen); gcry_err_code_t _gcry_cipher_ccm_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); gcry_err_code_t _gcry_cipher_ccm_set_lengths /* */ (gcry_cipher_hd_t c, u64 encryptedlen, u64 aadlen, u64 taglen); gcry_err_code_t _gcry_cipher_ccm_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_ccm_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); /*-- cipher-cmac.c --*/ gcry_err_code_t _gcry_cmac_generate_subkeys /* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx); gcry_err_code_t _gcry_cmac_write /* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx, const byte * inbuf, size_t inlen); gcry_err_code_t _gcry_cmac_final /* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx); void _gcry_cmac_reset (gcry_cmac_context_t *ctx); /*-- cipher-eax.c --*/ gcry_err_code_t _gcry_cipher_eax_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_eax_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_eax_set_nonce /* */ (gcry_cipher_hd_t c, const unsigned char *nonce, size_t noncelen); gcry_err_code_t _gcry_cipher_eax_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *aadbuf, size_t aadbuflen); gcry_err_code_t _gcry_cipher_eax_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_eax_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); gcry_err_code_t _gcry_cipher_eax_setkey /* */ (gcry_cipher_hd_t c); /*-- cipher-gcm.c --*/ gcry_err_code_t _gcry_cipher_gcm_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_gcm_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_gcm_setiv /* */ (gcry_cipher_hd_t c, const unsigned char *iv, size_t ivlen); gcry_err_code_t _gcry_cipher_gcm_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *aadbuf, size_t aadbuflen); gcry_err_code_t _gcry_cipher_gcm_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_gcm_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); void _gcry_cipher_gcm_setkey /* */ (gcry_cipher_hd_t c); /*-- cipher-poly1305.c --*/ gcry_err_code_t _gcry_cipher_poly1305_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_poly1305_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_poly1305_setiv /* */ (gcry_cipher_hd_t c, const unsigned char *iv, size_t ivlen); gcry_err_code_t _gcry_cipher_poly1305_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *aadbuf, size_t aadbuflen); gcry_err_code_t _gcry_cipher_poly1305_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_poly1305_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); void _gcry_cipher_poly1305_setkey /* */ (gcry_cipher_hd_t c); /*-- chacha20.c --*/ gcry_err_code_t _gcry_chacha20_poly1305_encrypt /* */ (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, size_t length); gcry_err_code_t _gcry_chacha20_poly1305_decrypt /* */ (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, size_t length); /*-- cipher-ocb.c --*/ gcry_err_code_t _gcry_cipher_ocb_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ocb_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_ocb_set_nonce /* */ (gcry_cipher_hd_t c, const unsigned char *nonce, size_t noncelen); gcry_err_code_t _gcry_cipher_ocb_authenticate /* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); gcry_err_code_t _gcry_cipher_ocb_get_tag /* */ (gcry_cipher_hd_t c, unsigned char *outtag, size_t taglen); gcry_err_code_t _gcry_cipher_ocb_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); void _gcry_cipher_ocb_setkey /* */ (gcry_cipher_hd_t c); /*-- cipher-xts.c --*/ gcry_err_code_t _gcry_cipher_xts_encrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); gcry_err_code_t _gcry_cipher_xts_decrypt /* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, const unsigned char *inbuf, size_t inbuflen); /* Return the L-value for block N. Note: 'cipher_ocb.c' ensures that N * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can * be directly passed to _gcry_ctz() function and resulting index will * never overflow the table. */ static inline const unsigned char * ocb_get_l (gcry_cipher_hd_t c, u64 n) { unsigned long ntz; #if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4) /* Assumes that N != 0. */ asm ("rep;bsfl %k[low], %k[ntz]\n\t" : [ntz] "=r" (ntz) : [low] "r" ((unsigned long)n) : "cc"); #else ntz = _gcry_ctz (n); #endif return c->u_mode.ocb.L[ntz]; } /* Return bit-shift of blocksize. */ static inline unsigned int _gcry_blocksize_shift(gcry_cipher_hd_t c) { /* Only blocksizes 8 and 16 are used. Return value in such way * that compiler can optimize calling functions based on this. */ return c->spec->blocksize == 8 ? 3 : 4; } /* Optimized function for cipher block copying */ static inline void cipher_block_cpy(void *_dst, const void *_src, size_t blocksize) { byte *dst = _dst; const byte *src = _src; u64 s[2]; if (blocksize == 8) { buf_put_he64(dst + 0, buf_get_he64(src + 0)); } else /* blocksize == 16 */ { s[0] = buf_get_he64(src + 0); s[1] = buf_get_he64(src + 8); buf_put_he64(dst + 0, s[0]); buf_put_he64(dst + 8, s[1]); } } /* Optimized function for cipher block xoring */ static inline void cipher_block_xor(void *_dst, const void *_src1, const void *_src2, size_t blocksize) { byte *dst = _dst; const byte *src1 = _src1; const byte *src2 = _src2; u64 s1[2]; u64 s2[2]; if (blocksize == 8) { buf_put_he64(dst + 0, buf_get_he64(src1 + 0) ^ buf_get_he64(src2 + 0)); } else /* blocksize == 16 */ { s1[0] = buf_get_he64(src1 + 0); s1[1] = buf_get_he64(src1 + 8); s2[0] = buf_get_he64(src2 + 0); s2[1] = buf_get_he64(src2 + 8); buf_put_he64(dst + 0, s1[0] ^ s2[0]); buf_put_he64(dst + 8, s1[1] ^ s2[1]); } } /* Optimized function for in-place cipher block xoring */ static inline void cipher_block_xor_1(void *_dst, const void *_src, size_t blocksize) { cipher_block_xor (_dst, _dst, _src, blocksize); } /* Optimized function for cipher block xoring with two destination cipher blocks. Used mainly by CFB mode encryption. */ static inline void cipher_block_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t blocksize) { byte *dst1 = _dst1; byte *dst2 = _dst2; const byte *src = _src; u64 d2[2]; u64 s[2]; if (blocksize == 8) { d2[0] = buf_get_he64(dst2 + 0) ^ buf_get_he64(src + 0); buf_put_he64(dst2 + 0, d2[0]); buf_put_he64(dst1 + 0, d2[0]); } else /* blocksize == 16 */ { s[0] = buf_get_he64(src + 0); s[1] = buf_get_he64(src + 8); d2[0] = buf_get_he64(dst2 + 0); d2[1] = buf_get_he64(dst2 + 8); d2[0] = d2[0] ^ s[0]; d2[1] = d2[1] ^ s[1]; buf_put_he64(dst2 + 0, d2[0]); buf_put_he64(dst2 + 8, d2[1]); buf_put_he64(dst1 + 0, d2[0]); buf_put_he64(dst1 + 8, d2[1]); } } /* Optimized function for combined cipher block xoring and copying. Used by mainly CBC mode decryption. */ static inline void cipher_block_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy, const void *_src_cpy, size_t blocksize) { byte *dst_xor = _dst_xor; byte *srcdst_cpy = _srcdst_cpy; const byte *src_xor = _src_xor; const byte *src_cpy = _src_cpy; u64 sc[2]; u64 sx[2]; u64 sdc[2]; if (blocksize == 8) { sc[0] = buf_get_he64(src_cpy + 0); buf_put_he64(dst_xor + 0, buf_get_he64(srcdst_cpy + 0) ^ buf_get_he64(src_xor + 0)); buf_put_he64(srcdst_cpy + 0, sc[0]); } else /* blocksize == 16 */ { sc[0] = buf_get_he64(src_cpy + 0); sc[1] = buf_get_he64(src_cpy + 8); sx[0] = buf_get_he64(src_xor + 0); sx[1] = buf_get_he64(src_xor + 8); sdc[0] = buf_get_he64(srcdst_cpy + 0); sdc[1] = buf_get_he64(srcdst_cpy + 8); sx[0] ^= sdc[0]; sx[1] ^= sdc[1]; buf_put_he64(dst_xor + 0, sx[0]); buf_put_he64(dst_xor + 8, sx[1]); buf_put_he64(srcdst_cpy + 0, sc[0]); buf_put_he64(srcdst_cpy + 8, sc[1]); } } /* Optimized function for combined cipher block xoring and copying. Used by mainly CFB mode decryption. */ static inline void cipher_block_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t blocksize) { cipher_block_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, blocksize); } #endif /*G10_CIPHER_INTERNAL_H*/