diff --git a/cipher/poly1305.c b/cipher/poly1305.c index cded7cb2..69805085 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -1,687 +1,700 @@ /* poly1305.c - Poly1305 internals and generic implementation * Copyright (C) 2014,2017,2018 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include "types.h" #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" #include "poly1305-internal.h" #include "mpi-internal.h" #include "longlong.h" static const char *selftest (void); #undef USE_MPI_64BIT #undef USE_MPI_32BIT #if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_U64_TYPEDEF) # define USE_MPI_64BIT 1 #elif BYTES_PER_MPI_LIMB == 4 # define USE_MPI_32BIT 1 #else # error please implement for this limb size. #endif static void poly1305_init (poly1305_context_t *ctx, const byte key[POLY1305_KEYLEN]) { POLY1305_STATE *st = &ctx->state; ctx->leftover = 0; st->h[0] = 0; st->h[1] = 0; st->h[2] = 0; st->h[3] = 0; st->h[4] = 0; st->r[0] = buf_get_le32(key + 0) & 0x0fffffff; st->r[1] = buf_get_le32(key + 4) & 0x0ffffffc; st->r[2] = buf_get_le32(key + 8) & 0x0ffffffc; st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc; st->k[0] = buf_get_le32(key + 16); st->k[1] = buf_get_le32(key + 20); st->k[2] = buf_get_le32(key + 24); st->k[3] = buf_get_le32(key + 28); } #ifdef USE_MPI_64BIT #if defined (__aarch64__) && __GNUC__ >= 4 /* A += B (armv8/aarch64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("adds %0, %3, %0\n" \ "adcs %1, %4, %1\n" \ "adc %2, %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "r" (B0), "r" (B1), "r" (B2) \ : "cc" ) #endif /* __aarch64__ */ #if defined (__x86_64__) && __GNUC__ >= 4 /* A += B (x86-64) */ #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ __asm__ ("addq %3, %0\n" \ "adcq %4, %1\n" \ "adcq %5, %2\n" \ : "+r" (A0), "+r" (A1), "+r" (A2) \ : "g" (B0), "g" (B1), "g" (B2) \ : "cc" ) #endif /* __x86_64__ */ +#if defined (__powerpc__) && __GNUC__ >= 4 + +/* A += B (ppc64) */ +#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ + __asm__ ("addc %0, %3, %0\n" \ + "adde %1, %4, %1\n" \ + "adde %2, %5, %2\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2) \ + : "r" (B0), "r" (B1), "r" (B2) \ + : "cc" ) + +#endif /* __powerpc__ */ + #ifndef ADD_1305_64 /* A += B (generic, mpi) */ # define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \ u64 carry; \ add_ssaaaa(carry, A0, 0, A0, 0, B0); \ add_ssaaaa(A2, A1, A2, A1, B2, B1); \ add_ssaaaa(A2, A1, A2, A1, 0, carry); \ } while (0) #endif /* H = H * R mod 2¹³⁰-5 */ #define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \ u64 x0_lo, x0_hi, x1_lo, x1_hi; \ u64 t0_lo, t0_hi, t1_lo, t1_hi; \ \ /* x = a * r (partial mod 2^130-5) */ \ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ \ umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \ add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \ umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \ add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \ \ t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \ t1_hi = H2 * R0; /* h2 * r0 */ \ add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \ \ /* carry propagation */ \ H2 = H0 & 3; \ H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \ } while (0) static unsigned int poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { POLY1305_STATE *st = &ctx->state; u64 r0, r1, r1_mult5; u64 h0, h1, h2; u64 m0, m1, m2; m2 = high_pad; h0 = st->h[0] + ((u64)st->h[1] << 32); h1 = st->h[2] + ((u64)st->h[3] << 32); h2 = st->h[4]; r0 = st->r[0] + ((u64)st->r[1] << 32); r1 = st->r[2] + ((u64)st->r[3] << 32); r1_mult5 = (r1 >> 2) + r1; m0 = buf_get_le64(buf + 0); m1 = buf_get_le64(buf + 8); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; while (len >= POLY1305_BLOCKSIZE) { /* a = h + m */ ADD_1305_64(h2, h1, h0, m2, m1, m0); m0 = buf_get_le64(buf + 0); m1 = buf_get_le64(buf + 8); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; } /* a = h + m */ ADD_1305_64(h2, h1, h0, m2, m1, m0); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); st->h[0] = h0; st->h[1] = h0 >> 32; st->h[2] = h1; st->h[3] = h1 >> 32; st->h[4] = h2; return 6 * sizeof (void *) + 18 * sizeof (u64); } static unsigned int poly1305_final (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { POLY1305_STATE *st = &ctx->state; unsigned int burn = 0; u64 u, carry; u64 k0, k1; u64 h0, h1; u64 h2; /* process the remaining block */ if (ctx->leftover) { ctx->buffer[ctx->leftover++] = 1; if (ctx->leftover < POLY1305_BLOCKSIZE) { memset (&ctx->buffer[ctx->leftover], 0, POLY1305_BLOCKSIZE - ctx->leftover); ctx->leftover = POLY1305_BLOCKSIZE; } burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); } h0 = st->h[0] + ((u64)st->h[1] << 32); h1 = st->h[2] + ((u64)st->h[3] << 32); h2 = st->h[4]; k0 = st->k[0] + ((u64)st->k[1] << 32); k1 = st->k[2] + ((u64)st->k[3] << 32); /* check if h is more than 2^130-5, by adding 5. */ add_ssaaaa(carry, u, 0, h0, 0, 5); add_ssaaaa(carry, u, 0, carry, 0, h1); u = (carry + h2) >> 2; /* u == 0 or 1 */ /* minus 2^130-5 ... (+5) */ u = (-u) & 5; add_ssaaaa(h1, h0, h1, h0, 0, u); /* add high part of key + h */ add_ssaaaa(h1, h0, h1, h0, k1, k0); buf_put_le64(mac + 0, h0); buf_put_le64(mac + 8, h1); /* burn_stack */ return 4 * sizeof (void *) + 7 * sizeof (u64) + burn; } #endif /* USE_MPI_64BIT */ #ifdef USE_MPI_32BIT #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS /* HI:LO += A * B (arm) */ #define UMUL_ADD_32(HI, LO, A, B) \ __asm__ ("umlal %1, %0, %4, %5" \ : "=r" (HI), "=r" (LO) \ : "0" (HI), "1" (LO), "r" (A), "r" (B) ) /* A += B (arm) */ #define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ __asm__ ("adds %0, %0, %5\n" \ "adcs %1, %1, %6\n" \ "adcs %2, %2, %7\n" \ "adcs %3, %3, %8\n" \ "adc %4, %4, %9\n" \ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ : "r" (B0), "r" (B1), "r" (B2), "r" (B3), "r" (B4) \ : "cc" ) #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ #if defined (__i386__) && __GNUC__ >= 4 /* A += B (i386) */ #define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ __asm__ ("addl %5, %0\n" \ "adcl %6, %1\n" \ "adcl %7, %2\n" \ "adcl %8, %3\n" \ "adcl %9, %4\n" \ : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \ : "cc" ) #endif /* __i386__ */ #ifndef UMUL_ADD_32 /* HI:LO += A * B (generic, mpi) */ # define UMUL_ADD_32(HI, LO, A, B) do { \ u32 t_lo, t_hi; \ umul_ppmm(t_hi, t_lo, A, B); \ add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \ } while (0) #endif #ifndef ADD_1305_32 /* A += B (generic, mpi) */ # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ u32 carry0, carry1, carry2; \ add_ssaaaa(carry0, A0, 0, A0, 0, B0); \ add_ssaaaa(carry1, A1, 0, A1, 0, B1); \ add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \ add_ssaaaa(carry2, A2, 0, A2, 0, B2); \ add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \ add_ssaaaa(A4, A3, A4, A3, B4, B3); \ add_ssaaaa(A4, A3, A4, A3, 0, carry2); \ } while (0) #endif /* H = H * R mod 2¹³⁰-5 */ #define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \ R3_MULT5, R2_MULT5, R1_MULT5) do { \ u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \ u32 t0_lo, t0_hi; \ \ /* x = a * r (partial mod 2^130-5) */ \ umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ umul_ppmm(x2_hi, x2_lo, H0, R2); /* h0 * r2 */ \ umul_ppmm(x3_hi, x3_lo, H0, R3); /* h0 * r3 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x1_hi, x1_lo, H1, R0); /* h1 * r0 */ \ UMUL_ADD_32(x2_hi, x2_lo, H1, R1); /* h1 * r1 */ \ UMUL_ADD_32(x3_hi, x3_lo, H1, R2); /* h1 * r2 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \ UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x2_hi, x2_lo, H2, R0); /* h2 * r0 */ \ UMUL_ADD_32(x3_hi, x3_lo, H2, R1); /* h2 * r1 */ \ \ UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \ H1 = x0_hi; \ UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \ UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \ UMUL_ADD_32(x3_hi, x3_lo, H3, R0); /* h3 * r0 */ \ \ t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \ t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \ add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \ add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \ t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \ t0_hi = H4 * R0; /* h4 * r0 */ \ add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \ \ /* carry propagation */ \ H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \ H4 = H4 & 3; \ ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \ } while (0) static unsigned int poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, byte high_pad) { POLY1305_STATE *st = &ctx->state; u32 r1_mult5, r2_mult5, r3_mult5; u32 h0, h1, h2, h3, h4; u32 m0, m1, m2, m3, m4; m4 = high_pad; h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; r1_mult5 = (st->r[1] >> 2) + st->r[1]; r2_mult5 = (st->r[2] >> 2) + st->r[2]; r3_mult5 = (st->r[3] >> 2) + st->r[3]; while (len >= POLY1305_BLOCKSIZE) { m0 = buf_get_le32(buf + 0); m1 = buf_get_le32(buf + 4); m2 = buf_get_le32(buf + 8); m3 = buf_get_le32(buf + 12); /* a = h + m */ ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0); /* h = a * r (partial mod 2^130-5) */ MUL_MOD_1305_32(h4, h3, h2, h1, h0, st->r[3], st->r[2], st->r[1], st->r[0], r3_mult5, r2_mult5, r1_mult5); buf += POLY1305_BLOCKSIZE; len -= POLY1305_BLOCKSIZE; } st->h[0] = h0; st->h[1] = h1; st->h[2] = h2; st->h[3] = h3; st->h[4] = h4; return 6 * sizeof (void *) + 28 * sizeof (u32); } static unsigned int poly1305_final (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { POLY1305_STATE *st = &ctx->state; unsigned int burn = 0; u32 carry, tmp0, tmp1, tmp2, u; u32 h4, h3, h2, h1, h0; /* process the remaining block */ if (ctx->leftover) { ctx->buffer[ctx->leftover++] = 1; if (ctx->leftover < POLY1305_BLOCKSIZE) { memset (&ctx->buffer[ctx->leftover], 0, POLY1305_BLOCKSIZE - ctx->leftover); ctx->leftover = POLY1305_BLOCKSIZE; } burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); } h0 = st->h[0]; h1 = st->h[1]; h2 = st->h[2]; h3 = st->h[3]; h4 = st->h[4]; /* check if h is more than 2^130-5, by adding 5. */ add_ssaaaa(carry, tmp0, 0, h0, 0, 5); add_ssaaaa(carry, tmp0, 0, carry, 0, h1); add_ssaaaa(carry, tmp0, 0, carry, 0, h2); add_ssaaaa(carry, tmp0, 0, carry, 0, h3); u = (carry + h4) >> 2; /* u == 0 or 1 */ /* minus 2^130-5 ... (+5) */ u = (-u) & 5; add_ssaaaa(carry, h0, 0, h0, 0, u); add_ssaaaa(carry, h1, 0, h1, 0, carry); add_ssaaaa(carry, h2, 0, h2, 0, carry); add_ssaaaa(carry, h3, 0, h3, 0, carry); /* add high part of key + h */ add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]); add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]); add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0); add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]); add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1); add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]); h3 += tmp2; buf_put_le32(mac + 0, h0); buf_put_le32(mac + 4, h1); buf_put_le32(mac + 8, h2); buf_put_le32(mac + 12, h3); /* burn_stack */ return 4 * sizeof (void *) + 10 * sizeof (u32) + burn; } #endif /* USE_MPI_32BIT */ unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m, size_t bytes) { unsigned int burn = 0; /* handle leftover */ if (ctx->leftover) { size_t want = (POLY1305_BLOCKSIZE - ctx->leftover); if (want > bytes) want = bytes; buf_cpy (ctx->buffer + ctx->leftover, m, want); bytes -= want; m += want; ctx->leftover += want; if (ctx->leftover < POLY1305_BLOCKSIZE) return 0; burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1); ctx->leftover = 0; } /* process full blocks */ if (bytes >= POLY1305_BLOCKSIZE) { size_t nblks = bytes / POLY1305_BLOCKSIZE; burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1); m += nblks * POLY1305_BLOCKSIZE; bytes -= nblks * POLY1305_BLOCKSIZE; } /* store leftover */ if (bytes) { buf_cpy (ctx->buffer + ctx->leftover, m, bytes); ctx->leftover += bytes; } return burn; } void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes) { unsigned int burn; burn = _gcry_poly1305_update_burn (ctx, m, bytes); if (burn) _gcry_burn_stack (burn); } void _gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) { unsigned int burn; burn = poly1305_final (ctx, mac); _gcry_burn_stack (burn); } gcry_err_code_t _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, size_t keylen) { static int initialized; static const char *selftest_failed; if (!initialized) { initialized = 1; selftest_failed = selftest (); if (selftest_failed) log_error ("Poly1305 selftest failed (%s)\n", selftest_failed); } if (keylen != POLY1305_KEYLEN) return GPG_ERR_INV_KEYLEN; if (selftest_failed) return GPG_ERR_SELFTEST_FAILED; poly1305_init (ctx, key); return 0; } static void poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes, const byte * key) { poly1305_context_t ctx; memset (&ctx, 0, sizeof (ctx)); _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, m, bytes); _gcry_poly1305_finish (&ctx, mac); wipememory (&ctx, sizeof (ctx)); } static const char * selftest (void) { /* example from nacl */ static const byte nacl_key[POLY1305_KEYLEN] = { 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91, 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25, 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65, 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80, }; static const byte nacl_msg[131] = { 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73, 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce, 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4, 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a, 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b, 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72, 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2, 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38, 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a, 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae, 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea, 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda, 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde, 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3, 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6, 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74, 0xe3, 0x55, 0xa5 }; static const byte nacl_mac[16] = { 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5, 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 }; /* generates a final value of (2^130 - 2) == 3 */ static const byte wrap_key[POLY1305_KEYLEN] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static const byte wrap_msg[16] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte wrap_mac[16] = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; /* mac of the macs of messages of length 0 to 256, where the key and messages * have all their values set to the length */ static const byte total_key[POLY1305_KEYLEN] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const byte total_mac[16] = { 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd, 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39 }; poly1305_context_t ctx; poly1305_context_t total_ctx; byte all_key[POLY1305_KEYLEN]; byte all_msg[256]; byte mac[16]; size_t i, j; memset (&ctx, 0, sizeof (ctx)); memset (&total_ctx, 0, sizeof (total_ctx)); memset (mac, 0, sizeof (mac)); poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 1 failed."; /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so * make sure everything still works varying between them */ memset (mac, 0, sizeof (mac)); _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN); _gcry_poly1305_update (&ctx, nacl_msg + 0, 32); _gcry_poly1305_update (&ctx, nacl_msg + 32, 64); _gcry_poly1305_update (&ctx, nacl_msg + 96, 16); _gcry_poly1305_update (&ctx, nacl_msg + 112, 8); _gcry_poly1305_update (&ctx, nacl_msg + 120, 4); _gcry_poly1305_update (&ctx, nacl_msg + 124, 2); _gcry_poly1305_update (&ctx, nacl_msg + 126, 1); _gcry_poly1305_update (&ctx, nacl_msg + 127, 1); _gcry_poly1305_update (&ctx, nacl_msg + 128, 1); _gcry_poly1305_update (&ctx, nacl_msg + 129, 1); _gcry_poly1305_update (&ctx, nacl_msg + 130, 1); _gcry_poly1305_finish (&ctx, mac); if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 2 failed."; memset (mac, 0, sizeof (mac)); poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key); if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0) return "Poly1305 test 3 failed."; _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN); for (i = 0; i < 256; i++) { /* set key and message to 'i,i,i..' */ for (j = 0; j < sizeof (all_key); j++) all_key[j] = i; for (j = 0; j < i; j++) all_msg[j] = i; poly1305_auth (mac, all_msg, i, all_key); _gcry_poly1305_update (&total_ctx, mac, 16); } _gcry_poly1305_finish (&total_ctx, mac); if (memcmp (total_mac, mac, sizeof (total_mac)) != 0) return "Poly1305 test 4 failed."; return NULL; }