diff --git a/cipher/keccak-armv7-neon.S b/cipher/keccak-armv7-neon.S index 0bec8d50..28a284a1 100644 --- a/cipher/keccak-armv7-neon.S +++ b/cipher/keccak-armv7-neon.S @@ -1,945 +1,945 @@ /* keccak-armv7-neon.S - ARMv7/NEON implementation of Keccak * * Copyright (C) 2015 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_NEON) /* Based on public-domain/CC0 implementation from SUPERCOP package * (keccakc1024/inplace-armv7a-neon/keccak2.s) * * Original copyright header follows: */ @ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, @ Michaƫl Peeters and Gilles Van Assche. For more information, feedback or @ questions, please refer to our website: http://keccak.noekeon.org/ @ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer". @ @ To the extent possible under law, the implementer has waived all copyright @ and related or neighboring rights to the source code in this file. @ http://creativecommons.org/publicdomain/zero/1.0/ .text .syntax unified .fpu neon .arm .extern _gcry_keccak_round_consts_64bit; #ifdef __PIC__ # define GET_DATA_POINTER(reg, name, rtmp) \ ldr reg, 1f; \ ldr rtmp, 2f; \ b 3f; \ 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ 2: .word name(GOT); \ 3: add reg, pc, reg; \ ldr reg, [reg, rtmp]; #else # define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name #endif @// --- offsets in state .equ Aba, 0*8 .equ Aga, 1*8 .equ Aka, 2*8 .equ Ama, 3*8 .equ Asa, 4*8 @// --- macros .macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5 @Prepare Theta @Ca = Aba^Aga^Aka^Ama^Asa@ @Ce = Abe^Age^Ake^Ame^Ase@ @Ci = Abi^Agi^Aki^Ami^Asi@ @Co = Abo^Ago^Ako^Amo^Aso@ @Cu = Abu^Agu^Aku^Amu^Asu@ @De = Ca^ROL64(Ci, 1)@ @Di = Ce^ROL64(Co, 1)@ @Do = Ci^ROL64(Cu, 1)@ @Du = Co^ROL64(Ca, 1)@ @Da = Cu^ROL64(Ce, 1)@ veor.64 q4, q6, q7 veor.64 q5, q9, q10 veor.64 d8, d8, d9 veor.64 d10, d10, d11 veor.64 d1, d8, d16 veor.64 d2, d10, d17 veor.64 q4, q11, q12 veor.64 q5, q14, q15 veor.64 d8, d8, d9 veor.64 d10, d10, d11 veor.64 d3, d8, d26 vadd.u64 q4, q1, q1 veor.64 d4, d10, d27 vmov.64 d0, d5 vsri.64 q4, q1, #63 vadd.u64 q5, q2, q2 veor.64 q4, q4, q0 vsri.64 q5, q2, #63 vadd.u64 d7, d1, d1 veor.64 \argA2, \argA2, d8 veor.64 q5, q5, q1 vsri.64 d7, d1, #63 vshl.u64 d1, \argA2, #44 veor.64 \argA3, \argA3, d9 veor.64 d7, d7, d4 @Ba = argA1^Da@ @Be = ROL64((argA2^De), 44)@ @Bi = ROL64((argA3^Di), 43)@ @Bo = ROL64((argA4^Do), 21)@ @Bu = ROL64((argA5^Du), 14)@ @argA2 = Be ^((~Bi)& Bo )@ @argA3 = Bi ^((~Bo)& Bu )@ @argA4 = Bo ^((~Bu)& Ba )@ @argA5 = Bu ^((~Ba)& Be )@ @argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@ vsri.64 d1, \argA2, #64-44 vshl.u64 d2, \argA3, #43 vldr.64 d0, [sp, #\argA1] veor.64 \argA4, \argA4, d10 vsri.64 d2, \argA3, #64-43 vshl.u64 d3, \argA4, #21 veor.64 \argA5, \argA5, d11 veor.64 d0, d0, d7 vsri.64 d3, \argA4, #64-21 vbic.64 d5, d2, d1 vshl.u64 d4, \argA5, #14 vbic.64 \argA2, d3, d2 vld1.64 d6, [ip]! veor.64 d5, d0 vsri.64 d4, \argA5, #64-14 veor.64 d5, d6 vbic.64 \argA5, d1, d0 vbic.64 \argA3, d4, d3 vbic.64 \argA4, d0, d4 veor.64 \argA2, d1 vstr.64 d5, [sp, #\argA1] veor.64 \argA3, d2 veor.64 \argA4, d3 veor.64 \argA5, d4 .endm .macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5 @d2 = ROL64((argA1^Da), 3)@ @d3 = ROL64((argA2^De), 45)@ @d4 = ROL64((argA3^Di), 61)@ @d0 = ROL64((argA4^Do), 28)@ @d1 = ROL64((argA5^Du), 20)@ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ @argA2 = Be ^((~Bi)& Bo )@ @argA3 = Bi ^((~Bo)& Bu )@ @argA4 = Bo ^((~Bu)& Ba )@ @argA5 = Bu ^((~Ba)& Be )@ veor.64 \argA2, \argA2, d8 veor.64 \argA3, \argA3, d9 vshl.u64 d3, \argA2, #45 vldr.64 d6, [sp, #\argA1] vshl.u64 d4, \argA3, #61 veor.64 \argA4, \argA4, d10 vsri.64 d3, \argA2, #64-45 veor.64 \argA5, \argA5, d11 vsri.64 d4, \argA3, #64-61 vshl.u64 d0, \argA4, #28 veor.64 d6, d6, d7 vshl.u64 d1, \argA5, #20 vbic.64 \argA3, d4, d3 vsri.64 d0, \argA4, #64-28 vbic.64 \argA4, d0, d4 vshl.u64 d2, d6, #3 vsri.64 d1, \argA5, #64-20 veor.64 \argA4, d3 vsri.64 d2, d6, #64-3 vbic.64 \argA5, d1, d0 vbic.64 d6, d2, d1 vbic.64 \argA2, d3, d2 veor.64 d6, d0 veor.64 \argA2, d1 vstr.64 d6, [sp, #\argA1] veor.64 \argA3, d2 veor.64 d5, d6 veor.64 \argA5, d4 .endm .macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5 @d4 = ROL64((argA1^Da), 18)@ @d0 = ROL64((argA2^De), 1)@ @d1 = ROL64((argA3^Di), 6)@ @d2 = ROL64((argA4^Do), 25)@ @d3 = ROL64((argA5^Du), 8)@ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ @argA2 = Be ^((~Bi)& Bo )@ @argA3 = Bi ^((~Bo)& Bu )@ @argA4 = Bo ^((~Bu)& Ba )@ @argA5 = Bu ^((~Ba)& Be )@ veor.64 \argA3, \argA3, d9 veor.64 \argA4, \argA4, d10 vshl.u64 d1, \argA3, #6 vldr.64 d6, [sp, #\argA1] vshl.u64 d2, \argA4, #25 veor.64 \argA5, \argA5, d11 vsri.64 d1, \argA3, #64-6 veor.64 \argA2, \argA2, d8 vsri.64 d2, \argA4, #64-25 vext.8 d3, \argA5, \argA5, #7 veor.64 d6, d6, d7 vbic.64 \argA3, d2, d1 vadd.u64 d0, \argA2, \argA2 vbic.64 \argA4, d3, d2 vsri.64 d0, \argA2, #64-1 vshl.u64 d4, d6, #18 veor.64 \argA2, d1, \argA4 veor.64 \argA3, d0 vsri.64 d4, d6, #64-18 vstr.64 \argA3, [sp, #\argA1] veor.64 d5, \argA3 vbic.64 \argA5, d1, d0 vbic.64 \argA3, d4, d3 vbic.64 \argA4, d0, d4 veor.64 \argA3, d2 veor.64 \argA4, d3 veor.64 \argA5, d4 .endm .macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5 @d1 = ROL64((argA1^Da), 36)@ @d2 = ROL64((argA2^De), 10)@ @d3 = ROL64((argA3^Di), 15)@ @d4 = ROL64((argA4^Do), 56)@ @d0 = ROL64((argA5^Du), 27)@ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ @argA2 = Be ^((~Bi)& Bo )@ @argA3 = Bi ^((~Bo)& Bu )@ @argA4 = Bo ^((~Bu)& Ba )@ @argA5 = Bu ^((~Ba)& Be )@ veor.64 \argA2, \argA2, d8 veor.64 \argA3, \argA3, d9 vshl.u64 d2, \argA2, #10 vldr.64 d6, [sp, #\argA1] vshl.u64 d3, \argA3, #15 veor.64 \argA4, \argA4, d10 vsri.64 d2, \argA2, #64-10 vsri.64 d3, \argA3, #64-15 veor.64 \argA5, \argA5, d11 vext.8 d4, \argA4, \argA4, #1 vbic.64 \argA2, d3, d2 vshl.u64 d0, \argA5, #27 veor.64 d6, d6, d7 vbic.64 \argA3, d4, d3 vsri.64 d0, \argA5, #64-27 vshl.u64 d1, d6, #36 veor.64 \argA3, d2 vbic.64 \argA4, d0, d4 vsri.64 d1, d6, #64-36 veor.64 \argA4, d3 vbic.64 d6, d2, d1 vbic.64 \argA5, d1, d0 veor.64 d6, d0 veor.64 \argA2, d1 vstr.64 d6, [sp, #\argA1] veor.64 d5, d6 veor.64 \argA5, d4 .endm .macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5 @d3 = ROL64((argA1^Da), 41)@ @d4 = ROL64((argA2^De), 2)@ @d0 = ROL64((argA3^Di), 62)@ @d1 = ROL64((argA4^Do), 55)@ @d2 = ROL64((argA5^Du), 39)@ @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ @argA2 = Be ^((~Bi)& Bo )@ @argA3 = Bi ^((~Bo)& Bu )@ @argA4 = Bo ^((~Bu)& Ba )@ @argA5 = Bu ^((~Ba)& Be )@ veor.64 \argA2, \argA2, d8 veor.64 \argA3, \argA3, d9 vshl.u64 d4, \argA2, #2 veor.64 \argA5, \argA5, d11 vshl.u64 d0, \argA3, #62 vldr.64 d6, [sp, #\argA1] vsri.64 d4, \argA2, #64-2 veor.64 \argA4, \argA4, d10 vsri.64 d0, \argA3, #64-62 vshl.u64 d1, \argA4, #55 veor.64 d6, d6, d7 vshl.u64 d2, \argA5, #39 vsri.64 d1, \argA4, #64-55 vbic.64 \argA4, d0, d4 vsri.64 d2, \argA5, #64-39 vbic.64 \argA2, d1, d0 vshl.u64 d3, d6, #41 veor.64 \argA5, d4, \argA2 vbic.64 \argA2, d2, d1 vsri.64 d3, d6, #64-41 veor.64 d6, d0, \argA2 vbic.64 \argA2, d3, d2 vbic.64 \argA3, d4, d3 veor.64 \argA2, d1 vstr.64 d6, [sp, #\argA1] veor.64 d5, d6 veor.64 \argA3, d2 veor.64 \argA4, d3 .endm @// --- code @not callable from C! .p2align 3 .type KeccakF_armv7a_neon_asm,%function; KeccakF_armv7a_neon_asm: @ .LroundLoop: KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31 KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28 KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30 KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27 KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29 KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29 KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28 KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27 KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31 KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30 KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30 KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28 KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31 KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29 KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27 KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27 KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28 KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29 ldr r0, [ip] KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30 cmp r0, #0xFFFFFFFF KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31 bne .LroundLoop sub ip, #(8*24) bx lr .p2align 2 .ltorg .size KeccakF_armv7a_neon_asm,.-KeccakF_armv7a_neon_asm; @//unsigned _gcry_keccak_permute_armv7_neon(u64 *state) callable from C .p2align 3 .global _gcry_keccak_permute_armv7_neon .type _gcry_keccak_permute_armv7_neon,%function; _gcry_keccak_permute_armv7_neon: push {ip, lr} vpush {q4-q7} sub sp,sp, #5*8 vldr.64 d0, [r0, #0*8] vldr.64 d12, [r0, #1*8] vldr.64 d17, [r0, #2*8] vldr.64 d22, [r0, #3*8] vldr.64 d27, [r0, #4*8] GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); vldr.64 d1, [r0, #5*8] vldr.64 d13, [r0, #6*8] vldr.64 d18, [r0, #7*8] vldr.64 d23, [r0, #8*8] vldr.64 d28, [r0, #9*8] vldr.64 d2, [r0, #10*8] vldr.64 d14, [r0, #11*8] vldr.64 d19, [r0, #12*8] vldr.64 d24, [r0, #13*8] vldr.64 d29, [r0, #14*8] vldr.64 d3, [r0, #15*8] vldr.64 d15, [r0, #16*8] vldr.64 d20, [r0, #17*8] vldr.64 d25, [r0, #18*8] vldr.64 d30, [r0, #19*8] vldr.64 d4, [r0, #20*8] vldr.64 d16, [r0, #21*8] vldr.64 d21, [r0, #22*8] vldr.64 d26, [r0, #23*8] vldr.64 d31, [r0, #24*8] vstr.64 d0, [sp, #Aba] vstr.64 d1, [sp, #Aga] veor.64 q0, q0, q1 vstr.64 d2, [sp, #Aka] veor.64 d5, d0, d1 vstr.64 d3, [sp, #Ama] mov r1, r0 vstr.64 d4, [sp, #Asa] veor.64 d5, d5, d4 bl KeccakF_armv7a_neon_asm vpop.64 { d0- d4 } vstr.64 d0, [r1, #0*8] vstr.64 d12, [r1, #1*8] vstr.64 d17, [r1, #2*8] vstr.64 d22, [r1, #3*8] vstr.64 d27, [r1, #4*8] vstr.64 d1, [r1, #5*8] vstr.64 d13, [r1, #6*8] vstr.64 d18, [r1, #7*8] vstr.64 d23, [r1, #8*8] vstr.64 d28, [r1, #9*8] vstr.64 d2, [r1, #10*8] vstr.64 d14, [r1, #11*8] vstr.64 d19, [r1, #12*8] vstr.64 d24, [r1, #13*8] vstr.64 d29, [r1, #14*8] vstr.64 d3, [r1, #15*8] vstr.64 d15, [r1, #16*8] vstr.64 d20, [r1, #17*8] vstr.64 d25, [r1, #18*8] vstr.64 d30, [r1, #19*8] vstr.64 d4, [r1, #20*8] vstr.64 d16, [r1, #21*8] vstr.64 d21, [r1, #22*8] vstr.64 d26, [r1, #23*8] vstr.64 d31, [r1, #24*8] mov r0, #112 vpop {q4-q7} pop {ip, pc} .p2align 2 .ltorg .size _gcry_keccak_permute_armv7_neon,.-_gcry_keccak_permute_armv7_neon; -@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state, @r4 -@ int pos, @r1 -@ const byte *lanes, @r2 -@ unsigned int nlanes, @r3 -@ int blocklanes) @ r5 callable from C +@//unsigned _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, @r4 +@ int pos, @r1 +@ const byte *lanes, @r2 +@ size_t nlanes, @r3 +@ int blocklanes) @ r5 callable from C .p2align 3 .global _gcry_keccak_absorb_lanes64_armv7_neon .type _gcry_keccak_absorb_lanes64_armv7_neon,%function; _gcry_keccak_absorb_lanes64_armv7_neon: cmp r3, #0 @ nlanes == 0 itt eq moveq r0, #0 bxeq lr push {r4-r5, ip, lr} beq .Lout mov r4, r0 ldr r5, [sp, #(4*4)] vpush {q4-q7} @ load state vldr.64 d0, [r4, #0*8] vldr.64 d12, [r4, #1*8] vldr.64 d17, [r4, #2*8] vldr.64 d22, [r4, #3*8] vldr.64 d27, [r4, #4*8] GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); vldr.64 d1, [r4, #5*8] vldr.64 d13, [r4, #6*8] vldr.64 d18, [r4, #7*8] vldr.64 d23, [r4, #8*8] vldr.64 d28, [r4, #9*8] vldr.64 d2, [r4, #10*8] vldr.64 d14, [r4, #11*8] vldr.64 d19, [r4, #12*8] vldr.64 d24, [r4, #13*8] vldr.64 d29, [r4, #14*8] vldr.64 d3, [r4, #15*8] vldr.64 d15, [r4, #16*8] vldr.64 d20, [r4, #17*8] vldr.64 d25, [r4, #18*8] vldr.64 d30, [r4, #19*8] vldr.64 d4, [r4, #20*8] vldr.64 d16, [r4, #21*8] vldr.64 d21, [r4, #22*8] vldr.64 d26, [r4, #23*8] vldr.64 d31, [r4, #24*8] .Lmain_loop: @ detect absorb mode (full blocks vs lanes) cmp r1, #0 @ pos != 0 bne .Llanes_loop .Lmain_loop_pos0: @ full blocks mode @ switch (blocksize) cmp r5, #21 beq .Lfull_block_21 cmp r5, #18 beq .Lfull_block_18 cmp r5, #17 beq .Lfull_block_17 cmp r5, #13 beq .Lfull_block_13 cmp r5, #9 beq .Lfull_block_9 @ unknown blocksize b .Llanes_loop .Lfull_block_21: @ SHAKE128 cmp r3, #21 @ nlanes < blocklanes blo .Llanes_loop sub sp,sp, #5*8 vld1.64 {d5-d8}, [r2]! veor d0, d5 vld1.64 {d9-d11}, [r2]! veor d12, d6 veor d17, d7 veor d22, d8 vld1.64 {d5-d8}, [r2]! veor d27, d9 veor d1, d10 veor d13, d11 vld1.64 {d9-d11}, [r2]! veor d18, d5 veor d23, d6 veor d28, d7 veor d2, d8 vld1.64 {d5-d8}, [r2]! veor d14, d9 veor d19, d10 veor d24, d11 vld1.64 {d9-d11}, [r2]! veor d29, d5 veor d3, d6 veor d15, d7 veor d20, d8 veor d25, d9 veor d30, d10 veor d4, d11 vstr.64 d0, [sp, #Aba] vstr.64 d1, [sp, #Aga] veor.64 q0, q0, q1 vstr.64 d2, [sp, #Aka] veor.64 d5, d0, d1 vstr.64 d3, [sp, #Ama] vstr.64 d4, [sp, #Asa] veor.64 d5, d5, d4 bl KeccakF_armv7a_neon_asm subs r3, #21 @ nlanes -= 21 vpop.64 { d0-d4 } beq .Ldone b .Lfull_block_21 .Lfull_block_18: @ SHA3-224 cmp r3, #18 @ nlanes < blocklanes blo .Llanes_loop sub sp,sp, #5*8 vld1.64 {d5-d8}, [r2]! veor d0, d5 vld1.64 {d9-d11}, [r2]! veor d12, d6 veor d17, d7 veor d22, d8 vld1.64 {d5-d8}, [r2]! veor d27, d9 veor d1, d10 veor d13, d11 vld1.64 {d9-d11}, [r2]! veor d18, d5 veor d23, d6 veor d28, d7 veor d2, d8 vld1.64 {d5-d8}, [r2]! veor d14, d9 veor d19, d10 veor d24, d11 veor d29, d5 veor d3, d6 veor d15, d7 veor d20, d8 vstr.64 d0, [sp, #Aba] vstr.64 d1, [sp, #Aga] veor.64 q0, q0, q1 vstr.64 d2, [sp, #Aka] veor.64 d5, d0, d1 vstr.64 d3, [sp, #Ama] vstr.64 d4, [sp, #Asa] veor.64 d5, d5, d4 bl KeccakF_armv7a_neon_asm subs r3, #18 @ nlanes -= 18 vpop.64 { d0-d4 } beq .Ldone b .Lfull_block_18 .Lfull_block_17: @ SHA3-256 & SHAKE256 cmp r3, #17 @ nlanes < blocklanes blo .Llanes_loop sub sp,sp, #5*8 vld1.64 {d5-d8}, [r2]! veor d0, d5 vld1.64 {d9-d11}, [r2]! veor d12, d6 veor d17, d7 veor d22, d8 vld1.64 {d5-d8}, [r2]! veor d27, d9 veor d1, d10 veor d13, d11 vld1.64 {d9-d11}, [r2]! veor d18, d5 veor d23, d6 veor d28, d7 veor d2, d8 vld1.64 {d5-d7}, [r2]! veor d14, d9 veor d19, d10 veor d24, d11 veor d29, d5 veor d3, d6 veor d15, d7 vstr.64 d0, [sp, #Aba] vstr.64 d1, [sp, #Aga] veor.64 q0, q0, q1 vstr.64 d2, [sp, #Aka] veor.64 d5, d0, d1 vstr.64 d3, [sp, #Ama] vstr.64 d4, [sp, #Asa] veor.64 d5, d5, d4 bl KeccakF_armv7a_neon_asm subs r3, #17 @ nlanes -= 17 vpop.64 { d0-d4 } beq .Ldone b .Lfull_block_17 .Lfull_block_13: @ SHA3-384 cmp r3, #13 @ nlanes < blocklanes blo .Llanes_loop sub sp,sp, #5*8 vld1.64 {d5-d8}, [r2]! veor d0, d5 vld1.64 {d9-d11}, [r2]! veor d12, d6 veor d17, d7 veor d22, d8 vld1.64 {d5-d8}, [r2]! veor d27, d9 veor d1, d10 veor d13, d11 vld1.64 {d9-d10}, [r2]! veor d18, d5 veor d23, d6 veor d28, d7 veor d2, d8 veor d14, d9 veor d19, d10 vstr.64 d0, [sp, #Aba] vstr.64 d1, [sp, #Aga] veor.64 q0, q0, q1 vstr.64 d2, [sp, #Aka] veor.64 d5, d0, d1 vstr.64 d3, [sp, #Ama] vstr.64 d4, [sp, #Asa] veor.64 d5, d5, d4 bl KeccakF_armv7a_neon_asm subs r3, #13 @ nlanes -= 13 vpop.64 { d0-d4 } beq .Ldone b .Lfull_block_13 .Lfull_block_9: @ SHA3-512 cmp r3, #9 @ nlanes < blocklanes blo .Llanes_loop sub sp,sp, #5*8 vld1.64 {d5-d8}, [r2]! veor d0, d5 vld1.64 {d9-d11}, [r2]! veor d12, d6 veor d17, d7 veor d22, d8 vld1.64 {d5-d6}, [r2]! veor d27, d9 veor d1, d10 veor d13, d11 veor d18, d5 veor d23, d6 vstr.64 d0, [sp, #Aba] vstr.64 d1, [sp, #Aga] veor.64 q0, q0, q1 vstr.64 d2, [sp, #Aka] veor.64 d5, d0, d1 vstr.64 d3, [sp, #Ama] vstr.64 d4, [sp, #Asa] veor.64 d5, d5, d4 bl KeccakF_armv7a_neon_asm subs r3, #9 @ nlanes -= 9 vpop.64 { d0-d4 } beq .Ldone b .Lfull_block_9 .Llanes_loop: @ per-lane mode @ switch (pos) ldrb r0, [pc, r1] add pc, pc, r0, lsl #2 .Lswitch_table: .byte (.Llane0-.Lswitch_table-4)/4 .byte (.Llane1-.Lswitch_table-4)/4 .byte (.Llane2-.Lswitch_table-4)/4 .byte (.Llane3-.Lswitch_table-4)/4 .byte (.Llane4-.Lswitch_table-4)/4 .byte (.Llane5-.Lswitch_table-4)/4 .byte (.Llane6-.Lswitch_table-4)/4 .byte (.Llane7-.Lswitch_table-4)/4 .byte (.Llane8-.Lswitch_table-4)/4 .byte (.Llane9-.Lswitch_table-4)/4 .byte (.Llane10-.Lswitch_table-4)/4 .byte (.Llane11-.Lswitch_table-4)/4 .byte (.Llane12-.Lswitch_table-4)/4 .byte (.Llane13-.Lswitch_table-4)/4 .byte (.Llane14-.Lswitch_table-4)/4 .byte (.Llane15-.Lswitch_table-4)/4 .byte (.Llane16-.Lswitch_table-4)/4 .byte (.Llane17-.Lswitch_table-4)/4 .byte (.Llane18-.Lswitch_table-4)/4 .byte (.Llane19-.Lswitch_table-4)/4 .byte (.Llane20-.Lswitch_table-4)/4 .byte (.Llane21-.Lswitch_table-4)/4 .byte (.Llane22-.Lswitch_table-4)/4 .byte (.Llane23-.Lswitch_table-4)/4 .byte (.Llane24-.Lswitch_table-4)/4 .p2align 2 #define ABSORB_LANE(label, vreg) \ label: \ add r1, #1; \ vld1.64 d5, [r2]!; \ cmp r1, r5; /* pos == blocklanes */ \ veor vreg, vreg, d5; \ beq .Llanes_permute; \ subs r3, #1; \ beq .Ldone; ABSORB_LANE(.Llane0, d0) ABSORB_LANE(.Llane1, d12) ABSORB_LANE(.Llane2, d17) ABSORB_LANE(.Llane3, d22) ABSORB_LANE(.Llane4, d27) ABSORB_LANE(.Llane5, d1) ABSORB_LANE(.Llane6, d13) ABSORB_LANE(.Llane7, d18) ABSORB_LANE(.Llane8, d23) ABSORB_LANE(.Llane9, d28) ABSORB_LANE(.Llane10, d2) ABSORB_LANE(.Llane11, d14) ABSORB_LANE(.Llane12, d19) ABSORB_LANE(.Llane13, d24) ABSORB_LANE(.Llane14, d29) ABSORB_LANE(.Llane15, d3) ABSORB_LANE(.Llane16, d15) ABSORB_LANE(.Llane17, d20) ABSORB_LANE(.Llane18, d25) ABSORB_LANE(.Llane19, d30) ABSORB_LANE(.Llane20, d4) ABSORB_LANE(.Llane21, d16) ABSORB_LANE(.Llane22, d21) ABSORB_LANE(.Llane23, d26) ABSORB_LANE(.Llane24, d31) b .Llanes_loop .Llanes_permute: sub sp,sp, #5*8 vstr.64 d0, [sp, #Aba] vstr.64 d1, [sp, #Aga] veor.64 q0, q0, q1 vstr.64 d2, [sp, #Aka] veor.64 d5, d0, d1 vstr.64 d3, [sp, #Ama] vstr.64 d4, [sp, #Asa] veor.64 d5, d5, d4 bl KeccakF_armv7a_neon_asm mov r1, #0 @ pos <= 0 subs r3, #1 vpop.64 { d0-d4 } beq .Ldone b .Lmain_loop_pos0 .Ldone: @ save state vstr.64 d0, [r4, #0*8] vstr.64 d12, [r4, #1*8] vstr.64 d17, [r4, #2*8] vstr.64 d22, [r4, #3*8] vstr.64 d27, [r4, #4*8] vstr.64 d1, [r4, #5*8] vstr.64 d13, [r4, #6*8] vstr.64 d18, [r4, #7*8] vstr.64 d23, [r4, #8*8] vstr.64 d28, [r4, #9*8] vstr.64 d2, [r4, #10*8] vstr.64 d14, [r4, #11*8] vstr.64 d19, [r4, #12*8] vstr.64 d24, [r4, #13*8] vstr.64 d29, [r4, #14*8] vstr.64 d3, [r4, #15*8] vstr.64 d15, [r4, #16*8] vstr.64 d20, [r4, #17*8] vstr.64 d25, [r4, #18*8] vstr.64 d30, [r4, #19*8] vstr.64 d4, [r4, #20*8] vstr.64 d16, [r4, #21*8] vstr.64 d21, [r4, #22*8] vstr.64 d26, [r4, #23*8] vstr.64 d31, [r4, #24*8] mov r0, #120 vpop {q4-q7} .Lout: pop {r4-r5, ip, pc} .p2align 2 .ltorg .size _gcry_keccak_absorb_lanes64_armv7_neon,.-_gcry_keccak_absorb_lanes64_armv7_neon; #endif diff --git a/cipher/keccak.c b/cipher/keccak.c index e7e42473..6c385f71 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -1,1644 +1,1644 @@ /* keccak.c - SHA3 hash functions * Copyright (C) 2015 g10 Code GmbH * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include "g10lib.h" #include "bithelp.h" #include "bufhelp.h" #include "cipher.h" #include "hash-common.h" /* USE_64BIT indicates whether to use 64-bit generic implementation. * USE_32BIT indicates whether to use 32-bit generic implementation. */ #undef USE_64BIT #if defined(__x86_64__) || SIZEOF_UNSIGNED_LONG == 8 # define USE_64BIT 1 #else # define USE_32BIT 1 #endif /* USE_64BIT_BMI2 indicates whether to compile with 64-bit Intel BMI2 code. */ #undef USE_64BIT_BMI2 #if defined(USE_64BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(HAVE_CPU_ARCH_X86) # define USE_64BIT_BMI2 1 #endif /* USE_64BIT_SHLD indicates whether to compile with 64-bit Intel SHLD code. */ #undef USE_64BIT_SHLD #if defined(USE_64BIT) && defined (__GNUC__) && defined(__x86_64__) && \ defined(HAVE_CPU_ARCH_X86) # define USE_64BIT_SHLD 1 #endif /* USE_32BIT_BMI2 indicates whether to compile with 32-bit Intel BMI2 code. */ #undef USE_32BIT_BMI2 #if defined(USE_32BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(HAVE_CPU_ARCH_X86) # define USE_32BIT_BMI2 1 #endif /* USE_64BIT_AVX512 indicates whether to compile with Intel AVX512 code. */ #undef USE_64BIT_AVX512 #if defined(USE_64BIT) && defined(__x86_64__) && \ defined(HAVE_GCC_INLINE_ASM_AVX512) && \ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_64BIT_AVX512 1 #endif /* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly * code. */ #undef USE_64BIT_ARM_NEON #ifdef ENABLE_NEON_SUPPORT # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ && defined(HAVE_GCC_INLINE_ASM_NEON) # define USE_64BIT_ARM_NEON 1 # endif #endif /*ENABLE_NEON_SUPPORT*/ /* USE_S390X_CRYPTO indicates whether to enable zSeries code. */ #undef USE_S390X_CRYPTO #if defined(HAVE_GCC_INLINE_ASM_S390X) # define USE_S390X_CRYPTO 1 #endif /* USE_S390X_CRYPTO */ /* x86-64 vector register assembly implementations use SystemV ABI, ABI * conversion needed on Win64 through function attribute. */ #undef ASM_FUNC_ABI #if defined(USE_64BIT_AVX512) && defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) # define ASM_FUNC_ABI __attribute__((sysv_abi)) #else # define ASM_FUNC_ABI #endif #if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON) # define NEED_COMMON64 1 #endif #ifdef USE_32BIT # define NEED_COMMON32BI 1 #endif #define SHA3_DELIMITED_SUFFIX 0x06 #define SHAKE_DELIMITED_SUFFIX 0x1F typedef struct { union { #ifdef NEED_COMMON64 u64 state64[25]; #endif #ifdef NEED_COMMON32BI u32 state32bi[50]; #endif } u; } KECCAK_STATE; typedef struct { unsigned int (*permute)(KECCAK_STATE *hd); unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes); + size_t nlanes, int blocklanes); unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen); } keccak_ops_t; typedef struct KECCAK_CONTEXT_S { KECCAK_STATE state; unsigned int outlen; unsigned int blocksize; unsigned int count; unsigned int suffix; const keccak_ops_t *ops; #ifdef USE_S390X_CRYPTO unsigned int kimd_func; unsigned int buf_pos; byte buf[1344 / 8]; /* SHAKE128 requires biggest buffer, 1344 bits. */ #endif } KECCAK_CONTEXT; #ifdef NEED_COMMON64 const u64 _gcry_keccak_round_consts_64bit[24 + 1] = { U64_C(0x0000000000000001), U64_C(0x0000000000008082), U64_C(0x800000000000808A), U64_C(0x8000000080008000), U64_C(0x000000000000808B), U64_C(0x0000000080000001), U64_C(0x8000000080008081), U64_C(0x8000000000008009), U64_C(0x000000000000008A), U64_C(0x0000000000000088), U64_C(0x0000000080008009), U64_C(0x000000008000000A), U64_C(0x000000008000808B), U64_C(0x800000000000008B), U64_C(0x8000000000008089), U64_C(0x8000000000008003), U64_C(0x8000000000008002), U64_C(0x8000000000000080), U64_C(0x000000000000800A), U64_C(0x800000008000000A), U64_C(0x8000000080008081), U64_C(0x8000000000008080), U64_C(0x0000000080000001), U64_C(0x8000000080008008), U64_C(0xFFFFFFFFFFFFFFFF) }; static unsigned int keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen) { unsigned int i; /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { u64 tmp = hd->u.state64[i]; buf_put_le64(outbuf, tmp); outbuf += 8; } return 0; } #endif /* NEED_COMMON64 */ #ifdef NEED_COMMON32BI static const u32 round_consts_32bit[2 * 24] = { 0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL, 0x00000000UL, 0x8000008bUL, 0x00000000UL, 0x80008080UL, 0x00000001UL, 0x0000008bUL, 0x00000001UL, 0x00008000UL, 0x00000001UL, 0x80008088UL, 0x00000001UL, 0x80000082UL, 0x00000000UL, 0x0000000bUL, 0x00000000UL, 0x0000000aUL, 0x00000001UL, 0x00008082UL, 0x00000000UL, 0x00008003UL, 0x00000001UL, 0x0000808bUL, 0x00000001UL, 0x8000000bUL, 0x00000001UL, 0x8000008aUL, 0x00000001UL, 0x80000081UL, 0x00000000UL, 0x80000081UL, 0x00000000UL, 0x80000008UL, 0x00000000UL, 0x00000083UL, 0x00000000UL, 0x80008003UL, 0x00000001UL, 0x80008088UL, 0x00000000UL, 0x80000088UL, 0x00000001UL, 0x00008000UL, 0x00000000UL, 0x80008082UL }; static unsigned int keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen) { unsigned int i; u32 x0; u32 x1; u32 t; /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { x0 = hd->u.state32bi[i * 2 + 0]; x1 = hd->u.state32bi[i * 2 + 1]; t = (x0 & 0x0000FFFFUL) + (x1 << 16); x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); x0 = t; t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); buf_put_le32(&outbuf[0], x0); buf_put_le32(&outbuf[4], x1); outbuf += 8; } return 0; } static inline void keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1) { u32 t; t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); } #endif /* NEED_COMMON32BI */ /* Construct generic 64-bit implementation. */ #ifdef USE_64BIT #if __GNUC__ >= 4 && defined(__x86_64__) static inline void absorb_lanes64_8(u64 *dst, const byte *in) { asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" "movdqu 0*16(%[in]), %%xmm4\n\t" "movdqu 1*16(%[dst]), %%xmm1\n\t" "movdqu 1*16(%[in]), %%xmm5\n\t" "movdqu 2*16(%[dst]), %%xmm2\n\t" "movdqu 3*16(%[dst]), %%xmm3\n\t" "pxor %%xmm4, %%xmm0\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu 2*16(%[in]), %%xmm4\n\t" "movdqu 3*16(%[in]), %%xmm5\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" "pxor %%xmm4, %%xmm2\n\t" "movdqu %%xmm1, 1*16(%[dst])\n\t" "pxor %%xmm5, %%xmm3\n\t" "movdqu %%xmm2, 2*16(%[dst])\n\t" "movdqu %%xmm3, 3*16(%[dst])\n\t" : : [dst] "r" (dst), [in] "r" (in) : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); } static inline void absorb_lanes64_4(u64 *dst, const byte *in) { asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" "movdqu 0*16(%[in]), %%xmm4\n\t" "movdqu 1*16(%[dst]), %%xmm1\n\t" "movdqu 1*16(%[in]), %%xmm5\n\t" "pxor %%xmm4, %%xmm0\n\t" "pxor %%xmm5, %%xmm1\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" "movdqu %%xmm1, 1*16(%[dst])\n\t" : : [dst] "r" (dst), [in] "r" (in) : "xmm0", "xmm1", "xmm4", "xmm5", "memory"); } static inline void absorb_lanes64_2(u64 *dst, const byte *in) { asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" "movdqu 0*16(%[in]), %%xmm4\n\t" "pxor %%xmm4, %%xmm0\n\t" "movdqu %%xmm0, 0*16(%[dst])\n\t" : : [dst] "r" (dst), [in] "r" (in) : "xmm0", "xmm4", "memory"); } #else /* __x86_64__ */ static inline void absorb_lanes64_8(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); dst[1] ^= buf_get_le64(in + 8 * 1); dst[2] ^= buf_get_le64(in + 8 * 2); dst[3] ^= buf_get_le64(in + 8 * 3); dst[4] ^= buf_get_le64(in + 8 * 4); dst[5] ^= buf_get_le64(in + 8 * 5); dst[6] ^= buf_get_le64(in + 8 * 6); dst[7] ^= buf_get_le64(in + 8 * 7); } static inline void absorb_lanes64_4(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); dst[1] ^= buf_get_le64(in + 8 * 1); dst[2] ^= buf_get_le64(in + 8 * 2); dst[3] ^= buf_get_le64(in + 8 * 3); } static inline void absorb_lanes64_2(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); dst[1] ^= buf_get_le64(in + 8 * 1); } #endif /* !__x86_64__ */ static inline void absorb_lanes64_1(u64 *dst, const byte *in) { dst[0] ^= buf_get_le64(in + 8 * 0); } # define ANDN64(x, y) (~(x) & (y)) # define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \ ((x) >> ((64 - (unsigned int)(n)) & 63))) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64 # define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME # undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_generic64_ops = { .permute = keccak_f1600_state_permute64, .absorb = keccak_absorb_lanes64, .extract = keccak_extract64, }; #endif /* USE_64BIT */ /* Construct 64-bit Intel SHLD implementation. */ #ifdef USE_64BIT_SHLD # define ANDN64(x, y) (~(x) & (y)) # define ROL64(x, n) ({ \ u64 tmp = (x); \ asm ("shldq %1, %0, %0" \ : "+r" (tmp) \ : "J" ((n) & 63) \ : "cc"); \ tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld # define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME # undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_shld_64_ops = { .permute = keccak_f1600_state_permute64_shld, .absorb = keccak_absorb_lanes64_shld, .extract = keccak_extract64, }; #endif /* USE_64BIT_SHLD */ /* Construct 64-bit Intel BMI2 implementation. */ #ifdef USE_64BIT_BMI2 # define ANDN64(x, y) ({ \ u64 tmp; \ asm ("andnq %2, %1, %0" \ : "=r" (tmp) \ : "r0" (x), "rm" (y)); \ tmp; }) # define ROL64(x, n) ({ \ u64 tmp; \ asm ("rorxq %2, %1, %0" \ : "=r" (tmp) \ : "rm0" (x), "J" (64 - ((n) & 63))); \ tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2 # define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME # undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_bmi2_64_ops = { .permute = keccak_f1600_state_permute64_bmi2, .absorb = keccak_absorb_lanes64_bmi2, .extract = keccak_extract64, }; #endif /* USE_64BIT_BMI2 */ /* 64-bit Intel AVX512 implementation. */ #ifdef USE_64BIT_AVX512 extern ASM_FUNC_ABI unsigned int _gcry_keccak_f1600_state_permute64_avx512(u64 *state, const u64 *rconst); extern ASM_FUNC_ABI unsigned int _gcry_keccak_absorb_blocks_avx512(u64 *state, const u64 *rconst, const byte *lanes, size_t nlanes, size_t blocklanes, const byte **new_lanes); static unsigned int keccak_f1600_state_permute64_avx512(KECCAK_STATE *hd) { return _gcry_keccak_f1600_state_permute64_avx512 ( hd->u.state64, _gcry_keccak_round_consts_64bit); } static unsigned int keccak_absorb_lanes64_avx512(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) + size_t nlanes, int blocklanes) { while (nlanes) { - if (pos == 0 && blocklanes > 0 && nlanes >= (unsigned int)blocklanes) + if (pos == 0 && blocklanes > 0 && nlanes >= (size_t)blocklanes) { nlanes = _gcry_keccak_absorb_blocks_avx512 ( hd->u.state64, _gcry_keccak_round_consts_64bit, lanes, nlanes, blocklanes, &lanes); } while (nlanes) { hd->u.state64[pos] ^= buf_get_le64 (lanes); lanes += 8; nlanes--; if (++pos == blocklanes) { keccak_f1600_state_permute64_avx512 (hd); pos = 0; break; } } } return 0; } static const keccak_ops_t keccak_avx512_64_ops = { .permute = keccak_f1600_state_permute64_avx512, .absorb = keccak_absorb_lanes64_avx512, .extract = keccak_extract64, }; #endif /* USE_64BIT_AVX512 */ /* 64-bit ARMv7/NEON implementation. */ #ifdef USE_64BIT_ARM_NEON unsigned int _gcry_keccak_permute_armv7_neon(u64 *state); unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos, const byte *lanes, - unsigned int nlanes, + size_t nlanes, int blocklanes); static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd) { return _gcry_keccak_permute_armv7_neon(hd->u.state64); } static unsigned int keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) + size_t nlanes, int blocklanes) { if (blocklanes < 0) { /* blocklanes == -1, permutationless absorb from keccak_final. */ while (nlanes) { hd->u.state64[pos] ^= buf_get_le64(lanes); lanes += 8; nlanes--; } return 0; } else { return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes, nlanes, blocklanes); } } static const keccak_ops_t keccak_armv7_neon_64_ops = { .permute = keccak_permute64_armv7_neon, .absorb = keccak_absorb_lanes64_armv7_neon, .extract = keccak_extract64, }; #endif /* USE_64BIT_ARM_NEON */ /* Construct generic 32-bit implementation. */ #ifdef USE_32BIT # define ANDN32(x, y) (~(x) & (y)) # define ROL32(x, n) (((x) << ((unsigned int)n & 31)) | \ ((x) >> ((32 - (unsigned int)(n)) & 31))) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi # include "keccak_permute_32.h" # undef ANDN32 # undef ROL32 # undef KECCAK_F1600_PERMUTE_FUNC_NAME static unsigned int keccak_absorb_lanes32bi(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) + size_t nlanes, int blocklanes) { unsigned int burn = 0; while (nlanes) { keccak_absorb_lane32bi(&hd->u.state32bi[pos * 2], buf_get_le32(lanes + 0), buf_get_le32(lanes + 4)); lanes += 8; nlanes--; if (++pos == blocklanes) { burn = keccak_f1600_state_permute32bi(hd); pos = 0; } } return burn; } static const keccak_ops_t keccak_generic32bi_ops = { .permute = keccak_f1600_state_permute32bi, .absorb = keccak_absorb_lanes32bi, .extract = keccak_extract32bi, }; #endif /* USE_32BIT */ /* Construct 32-bit Intel BMI2 implementation. */ #ifdef USE_32BIT_BMI2 # define ANDN32(x, y) ({ \ u32 tmp; \ asm ("andnl %2, %1, %0" \ : "=r" (tmp) \ : "r0" (x), "rm" (y)); \ tmp; }) # define ROL32(x, n) ({ \ u32 tmp; \ asm ("rorxl %2, %1, %0" \ : "=r" (tmp) \ : "rm0" (x), "J" (32 - ((n) & 31))); \ tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi_bmi2 # include "keccak_permute_32.h" # undef ANDN32 # undef ROL32 # undef KECCAK_F1600_PERMUTE_FUNC_NAME static inline u32 pext(u32 x, u32 mask) { u32 tmp; asm ("pextl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); return tmp; } static inline u32 pdep(u32 x, u32 mask) { u32 tmp; asm ("pdepl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); return tmp; } static inline void keccak_absorb_lane32bi_bmi2(u32 *lane, u32 x0, u32 x1) { x0 = pdep(pext(x0, 0x55555555), 0x0000ffff) | (pext(x0, 0xaaaaaaaa) << 16); x1 = pdep(pext(x1, 0x55555555), 0x0000ffff) | (pext(x1, 0xaaaaaaaa) << 16); lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); } static unsigned int keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) + size_t nlanes, int blocklanes) { unsigned int burn = 0; while (nlanes) { keccak_absorb_lane32bi_bmi2(&hd->u.state32bi[pos * 2], buf_get_le32(lanes + 0), buf_get_le32(lanes + 4)); lanes += 8; nlanes--; if (++pos == blocklanes) { burn = keccak_f1600_state_permute32bi_bmi2(hd); pos = 0; } } return burn; } static unsigned int keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, unsigned int outlen) { unsigned int i; u32 x0; u32 x1; u32 t; /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { x0 = hd->u.state32bi[i * 2 + 0]; x1 = hd->u.state32bi[i * 2 + 1]; t = (x0 & 0x0000FFFFUL) + (x1 << 16); x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); x0 = t; x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554); x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554); buf_put_le32(&outbuf[0], x0); buf_put_le32(&outbuf[4], x1); outbuf += 8; } return 0; } static const keccak_ops_t keccak_bmi2_32bi_ops = { .permute = keccak_f1600_state_permute32bi_bmi2, .absorb = keccak_absorb_lanes32bi_bmi2, .extract = keccak_extract32bi_bmi2, }; #endif /* USE_32BIT_BMI2 */ #ifdef USE_S390X_CRYPTO #include "asm-inline-s390x.h" static inline void keccak_bwrite_s390x (void *context, const byte *in, size_t inlen) { KECCAK_CONTEXT *ctx = context; /* Write full-blocks. */ kimd_execute (ctx->kimd_func, &ctx->state, in, inlen); return; } static inline void keccak_final_s390x (void *context) { KECCAK_CONTEXT *ctx = context; if (ctx->suffix == SHA3_DELIMITED_SUFFIX) { klmd_execute (ctx->kimd_func, &ctx->state, ctx->buf, ctx->count); } else { klmd_shake_execute (ctx->kimd_func, &ctx->state, NULL, 0, ctx->buf, ctx->count); ctx->count = 0; ctx->buf_pos = 0; } return; } static inline void keccak_bextract_s390x (void *context, byte *out, size_t outlen) { KECCAK_CONTEXT *ctx = context; /* Extract full-blocks. */ klmd_shake_execute (ctx->kimd_func | KLMD_PADDING_STATE, &ctx->state, out, outlen, NULL, 0); return; } static void keccak_write_s390x (void *context, const byte *inbuf, size_t inlen) { KECCAK_CONTEXT *hd = context; const size_t blocksize = hd->blocksize; size_t inblocks; size_t copylen; while (hd->count) { if (hd->count == blocksize) /* Flush the buffer. */ { keccak_bwrite_s390x (hd, hd->buf, blocksize); hd->count = 0; } else { copylen = inlen; if (copylen > blocksize - hd->count) copylen = blocksize - hd->count; if (copylen == 0) break; buf_cpy (&hd->buf[hd->count], inbuf, copylen); hd->count += copylen; inbuf += copylen; inlen -= copylen; } } if (inlen == 0) return; if (inlen >= blocksize) { inblocks = inlen / blocksize; keccak_bwrite_s390x (hd, inbuf, inblocks * blocksize); hd->count = 0; inlen -= inblocks * blocksize; inbuf += inblocks * blocksize; } if (inlen) { buf_cpy (hd->buf, inbuf, inlen); hd->count = inlen; } } static void keccak_extract_s390x (void *context, void *outbuf_arg, size_t outlen) { KECCAK_CONTEXT *hd = context; const size_t blocksize = hd->blocksize; byte *outbuf = outbuf_arg; while (outlen) { gcry_assert(hd->count == 0 || hd->buf_pos < hd->count); if (hd->buf_pos < hd->count && outlen) { size_t copylen = hd->count - hd->buf_pos; if (copylen > outlen) copylen = outlen; buf_cpy (outbuf, &hd->buf[hd->buf_pos], copylen); outbuf += copylen; outlen -= copylen; hd->buf_pos += copylen; } if (hd->buf_pos == hd->count) { hd->buf_pos = 0; hd->count = 0; } if (outlen == 0) return; if (outlen >= blocksize) { size_t outblocks = outlen / blocksize; keccak_bextract_s390x (context, outbuf, outblocks * blocksize); outlen -= outblocks * blocksize; outbuf += outblocks * blocksize; if (outlen == 0) return; } keccak_bextract_s390x (context, hd->buf, blocksize); hd->count = blocksize; } } #endif /* USE_S390X_CRYPTO */ static void keccak_write (void *context, const void *inbuf_arg, size_t inlen) { KECCAK_CONTEXT *ctx = context; const size_t bsize = ctx->blocksize; const size_t blocklanes = bsize / 8; const byte *inbuf = inbuf_arg; unsigned int nburn, burn = 0; unsigned int count, i; - unsigned int pos, nlanes; + unsigned int pos; + size_t nlanes; #ifdef USE_S390X_CRYPTO if (ctx->kimd_func) { keccak_write_s390x (context, inbuf, inlen); return; } #endif count = ctx->count; if (inlen && (count % 8)) { byte lane[8] = { 0, }; /* Complete absorbing partial input lane. */ pos = count / 8; for (i = count % 8; inlen && i < 8; i++) { lane[i] = *inbuf++; inlen--; count++; } if (count == bsize) count = 0; nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, (count % 8) ? -1 : blocklanes); burn = nburn > burn ? nburn : burn; } /* Absorb full input lanes. */ pos = count / 8; nlanes = inlen / 8; if (nlanes > 0) { nburn = ctx->ops->absorb(&ctx->state, pos, inbuf, nlanes, blocklanes); burn = nburn > burn ? nburn : burn; inlen -= nlanes * 8; inbuf += nlanes * 8; - count += nlanes * 8; - count = count % bsize; + count = ((size_t) count + nlanes * 8) % bsize; } if (inlen) { byte lane[8] = { 0, }; /* Absorb remaining partial input lane. */ pos = count / 8; for (i = count % 8; inlen && i < 8; i++) { lane[i] = *inbuf++; inlen--; count++; } nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, -1); burn = nburn > burn ? nburn : burn; gcry_assert(count < bsize); } ctx->count = count; if (burn) _gcry_burn_stack (burn); } static void keccak_init (int algo, void *context, unsigned int flags) { KECCAK_CONTEXT *ctx = context; KECCAK_STATE *hd = &ctx->state; unsigned int features = _gcry_get_hw_features (); (void)flags; (void)features; memset (hd, 0, sizeof *hd); ctx->count = 0; /* Select generic implementation. */ #ifdef USE_64BIT ctx->ops = &keccak_generic64_ops; #elif defined USE_32BIT ctx->ops = &keccak_generic32bi_ops; #endif /* Select optimized implementation based in hw features. */ if (0) {} #ifdef USE_64BIT_AVX512 else if (features & HWF_INTEL_AVX512) ctx->ops = &keccak_avx512_64_ops; #endif #ifdef USE_64BIT_ARM_NEON else if (features & HWF_ARM_NEON) ctx->ops = &keccak_armv7_neon_64_ops; #endif #ifdef USE_64BIT_BMI2 else if (features & HWF_INTEL_BMI2) ctx->ops = &keccak_bmi2_64_ops; #endif #ifdef USE_32BIT_BMI2 else if (features & HWF_INTEL_BMI2) ctx->ops = &keccak_bmi2_32bi_ops; #endif #ifdef USE_64BIT_SHLD else if (features & HWF_INTEL_FAST_SHLD) ctx->ops = &keccak_shld_64_ops; #endif /* Set input block size, in Keccak terms this is called 'rate'. */ switch (algo) { case GCRY_MD_SHA3_224: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 1152 / 8; ctx->outlen = 224 / 8; break; case GCRY_MD_SHA3_256: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 1088 / 8; ctx->outlen = 256 / 8; break; case GCRY_MD_SHA3_384: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 832 / 8; ctx->outlen = 384 / 8; break; case GCRY_MD_SHA3_512: ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 576 / 8; ctx->outlen = 512 / 8; break; case GCRY_MD_SHAKE128: ctx->suffix = SHAKE_DELIMITED_SUFFIX; ctx->blocksize = 1344 / 8; ctx->outlen = 0; break; case GCRY_MD_SHAKE256: ctx->suffix = SHAKE_DELIMITED_SUFFIX; ctx->blocksize = 1088 / 8; ctx->outlen = 0; break; default: BUG(); } #ifdef USE_S390X_CRYPTO ctx->kimd_func = 0; if ((features & HWF_S390X_MSA) != 0) { unsigned int kimd_func = 0; switch (algo) { case GCRY_MD_SHA3_224: kimd_func = KMID_FUNCTION_SHA3_224; break; case GCRY_MD_SHA3_256: kimd_func = KMID_FUNCTION_SHA3_256; break; case GCRY_MD_SHA3_384: kimd_func = KMID_FUNCTION_SHA3_384; break; case GCRY_MD_SHA3_512: kimd_func = KMID_FUNCTION_SHA3_512; break; case GCRY_MD_SHAKE128: kimd_func = KMID_FUNCTION_SHAKE128; break; case GCRY_MD_SHAKE256: kimd_func = KMID_FUNCTION_SHAKE256; break; } if ((kimd_query () & km_function_to_mask (kimd_func)) && (klmd_query () & km_function_to_mask (kimd_func))) { ctx->kimd_func = kimd_func; } } #endif } static void sha3_224_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_224, context, flags); } static void sha3_256_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_256, context, flags); } static void sha3_384_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_384, context, flags); } static void sha3_512_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHA3_512, context, flags); } static void shake128_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHAKE128, context, flags); } static void shake256_init (void *context, unsigned int flags) { keccak_init (GCRY_MD_SHAKE256, context, flags); } /* The routine final terminates the computation and * returns the digest. * The handle is prepared for a new cycle, but adding bytes to the * handle will the destroy the returned buffer. * Returns: 64 bytes representing the digest. When used for sha384, * we take the leftmost 48 of those bytes. */ static void keccak_final (void *context) { KECCAK_CONTEXT *ctx = context; KECCAK_STATE *hd = &ctx->state; const size_t bsize = ctx->blocksize; const byte suffix = ctx->suffix; unsigned int nburn, burn = 0; unsigned int lastbytes; byte lane[8]; #ifdef USE_S390X_CRYPTO if (ctx->kimd_func) { keccak_final_s390x (context); return; } #endif lastbytes = ctx->count; /* Do the padding and switch to the squeezing phase */ /* Absorb the last few bits and add the first bit of padding (which coincides with the delimiter in delimited suffix) */ buf_put_le64(lane, (u64)suffix << ((lastbytes % 8) * 8)); nburn = ctx->ops->absorb(&ctx->state, lastbytes / 8, lane, 1, -1); burn = nburn > burn ? nburn : burn; /* Add the second bit of padding. */ buf_put_le64(lane, (u64)0x80 << (((bsize - 1) % 8) * 8)); nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1); burn = nburn > burn ? nburn : burn; if (suffix == SHA3_DELIMITED_SUFFIX) { /* Switch to the squeezing phase. */ nburn = ctx->ops->permute(hd); burn = nburn > burn ? nburn : burn; /* Squeeze out the SHA3 digest. */ nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen); burn = nburn > burn ? nburn : burn; } else { /* Output for SHAKE can now be read with md_extract(). */ ctx->count = 0; } wipememory(lane, sizeof(lane)); if (burn) _gcry_burn_stack (burn); } static byte * keccak_read (void *context) { KECCAK_CONTEXT *ctx = (KECCAK_CONTEXT *) context; KECCAK_STATE *hd = &ctx->state; return (byte *)&hd->u; } static void keccak_extract (void *context, void *out, size_t outlen) { KECCAK_CONTEXT *ctx = context; KECCAK_STATE *hd = &ctx->state; const size_t bsize = ctx->blocksize; unsigned int nburn, burn = 0; byte *outbuf = out; unsigned int nlanes; unsigned int nleft; unsigned int count; unsigned int i; byte lane[8]; #ifdef USE_S390X_CRYPTO if (ctx->kimd_func) { keccak_extract_s390x (context, out, outlen); return; } #endif count = ctx->count; while (count && outlen && (outlen < 8 || count % 8)) { /* Extract partial lane. */ nburn = ctx->ops->extract(hd, count / 8, lane, 8); burn = nburn > burn ? nburn : burn; for (i = count % 8; outlen && i < 8; i++) { *outbuf++ = lane[i]; outlen--; count++; } gcry_assert(count <= bsize); if (count == bsize) count = 0; } if (outlen >= 8 && count) { /* Extract tail of partial block. */ nlanes = outlen / 8; nleft = (bsize - count) / 8; nlanes = nlanes < nleft ? nlanes : nleft; nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); burn = nburn > burn ? nburn : burn; outlen -= nlanes * 8; outbuf += nlanes * 8; count += nlanes * 8; gcry_assert(count <= bsize); if (count == bsize) count = 0; } while (outlen >= bsize) { gcry_assert(count == 0); /* Squeeze more. */ nburn = ctx->ops->permute(hd); burn = nburn > burn ? nburn : burn; /* Extract full block. */ nburn = ctx->ops->extract(hd, 0, outbuf, bsize); burn = nburn > burn ? nburn : burn; outlen -= bsize; outbuf += bsize; } if (outlen) { gcry_assert(outlen < bsize); if (count == 0) { /* Squeeze more. */ nburn = ctx->ops->permute(hd); burn = nburn > burn ? nburn : burn; } if (outlen >= 8) { /* Extract head of partial block. */ nlanes = outlen / 8; nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); burn = nburn > burn ? nburn : burn; outlen -= nlanes * 8; outbuf += nlanes * 8; count += nlanes * 8; gcry_assert(count < bsize); } if (outlen) { /* Extract head of partial lane. */ nburn = ctx->ops->extract(hd, count / 8, lane, 8); burn = nburn > burn ? nburn : burn; for (i = count % 8; outlen && i < 8; i++) { *outbuf++ = lane[i]; outlen--; count++; } gcry_assert(count < bsize); } } ctx->count = count; if (burn) _gcry_burn_stack (burn); } /* Variant of the above shortcut function using multiple buffers. */ static void _gcry_sha3_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov, int iovcnt, const gcry_md_spec_t *spec) { KECCAK_CONTEXT hd; spec->init (&hd, 0); for (;iovcnt > 0; iov++, iovcnt--) keccak_write (&hd, (const char*)iov[0].data + iov[0].off, iov[0].len); keccak_final (&hd); if (spec->mdlen > 0) memcpy (outbuf, keccak_read (&hd), spec->mdlen); else keccak_extract (&hd, outbuf, nbytes); } static void _gcry_sha3_224_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov, int iovcnt) { _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt, &_gcry_digest_spec_sha3_224); } static void _gcry_sha3_256_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov, int iovcnt) { _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt, &_gcry_digest_spec_sha3_256); } static void _gcry_sha3_384_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov, int iovcnt) { _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt, &_gcry_digest_spec_sha3_384); } static void _gcry_sha3_512_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov, int iovcnt) { _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt, &_gcry_digest_spec_sha3_512); } static void _gcry_shake128_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov, int iovcnt) { _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt, &_gcry_digest_spec_shake128); } static void _gcry_shake256_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov, int iovcnt) { _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt, &_gcry_digest_spec_shake256); } /* Self-test section. */ static gpg_err_code_t selftests_keccak (int algo, int extended, selftest_report_func_t report) { const char *what; const char *errtxt; const char *short_hash; const char *long_hash; const char *one_million_a_hash; int hash_len; switch (algo) { default: BUG(); case GCRY_MD_SHA3_224: short_hash = "\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76\x6f" "\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf"; long_hash = "\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5" "\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc"; one_million_a_hash = "\xd6\x93\x35\xb9\x33\x25\x19\x2e\x51\x6a\x91\x2e\x6d\x19\xa1\x5c" "\xb5\x1c\x6e\xd5\xc1\x52\x43\xe7\xa7\xfd\x65\x3c"; hash_len = 28; break; case GCRY_MD_SHA3_256: short_hash = "\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90\xbd" "\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43\x15\x32"; long_hash = "\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf\xdb" "\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad\x1d\x18"; one_million_a_hash = "\x5c\x88\x75\xae\x47\x4a\x36\x34\xba\x4f\xd5\x5e\xc8\x5b\xff\xd6" "\x61\xf3\x2a\xca\x75\xc6\xd6\x99\xd0\xcd\xcb\x6c\x11\x58\x91\xc1"; hash_len = 32; break; case GCRY_MD_SHA3_384: short_hash = "\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad\x8d" "\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b\xe4\xb2" "\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28\x37\x6d\x25"; long_hash = "\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91" "\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc" "\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7"; one_million_a_hash = "\xee\xe9\xe2\x4d\x78\xc1\x85\x53\x37\x98\x34\x51\xdf\x97\xc8\xad" "\x9e\xed\xf2\x56\xc6\x33\x4f\x8e\x94\x8d\x25\x2d\x5e\x0e\x76\x84" "\x7a\xa0\x77\x4d\xdb\x90\xa8\x42\x19\x0d\x2c\x55\x8b\x4b\x83\x40"; hash_len = 48; break; case GCRY_MD_SHA3_512: short_hash = "\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09\x6e" "\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2\x71\x2e" "\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47\xe3\x93\x40" "\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27\x4e\xec\x53\xf0"; long_hash = "\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9" "\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa" "\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89" "\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85"; one_million_a_hash = "\x3c\x3a\x87\x6d\xa1\x40\x34\xab\x60\x62\x7c\x07\x7b\xb9\x8f\x7e" "\x12\x0a\x2a\x53\x70\x21\x2d\xff\xb3\x38\x5a\x18\xd4\xf3\x88\x59" "\xed\x31\x1d\x0a\x9d\x51\x41\xce\x9c\xc5\xc6\x6e\xe6\x89\xb2\x66" "\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87"; hash_len = 64; break; case GCRY_MD_SHAKE128: short_hash = "\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7" "\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8"; long_hash = "\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b" "\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b"; one_million_a_hash = "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11" "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58"; hash_len = 32; break; case GCRY_MD_SHAKE256: short_hash = "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d" "\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39"; long_hash = "\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9" "\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45"; one_million_a_hash = "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb" "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a"; hash_len = 32; break; } what = "short string"; errtxt = _gcry_hash_selftest_check_one (algo, 0, "abc", 3, short_hash, hash_len); if (errtxt) goto failed; if (extended) { what = "long string"; errtxt = _gcry_hash_selftest_check_one (algo, 0, "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, long_hash, hash_len); if (errtxt) goto failed; what = "one million \"a\""; errtxt = _gcry_hash_selftest_check_one (algo, 1, NULL, 0, one_million_a_hash, hash_len); if (errtxt) goto failed; } return 0; /* Succeeded. */ failed: if (report) report ("digest", algo, what, errtxt); return GPG_ERR_SELFTEST_FAILED; } /* Run a full self-test for ALGO and return 0 on success. */ static gpg_err_code_t run_selftests (int algo, int extended, selftest_report_func_t report) { gpg_err_code_t ec; switch (algo) { case GCRY_MD_SHA3_224: case GCRY_MD_SHA3_256: case GCRY_MD_SHA3_384: case GCRY_MD_SHA3_512: case GCRY_MD_SHAKE128: case GCRY_MD_SHAKE256: ec = selftests_keccak (algo, extended, report); break; default: ec = GPG_ERR_DIGEST_ALGO; break; } return ec; } static const byte sha3_224_asn[] = { 0x30 }; static const gcry_md_oid_spec_t oid_spec_sha3_224[] = { { "2.16.840.1.101.3.4.2.7" }, /* PKCS#1 sha3_224WithRSAEncryption */ { "?" }, { NULL } }; static const byte sha3_256_asn[] = { 0x30 }; static const gcry_md_oid_spec_t oid_spec_sha3_256[] = { { "2.16.840.1.101.3.4.2.8" }, /* PKCS#1 sha3_256WithRSAEncryption */ { "?" }, { NULL } }; static const byte sha3_384_asn[] = { 0x30 }; static const gcry_md_oid_spec_t oid_spec_sha3_384[] = { { "2.16.840.1.101.3.4.2.9" }, /* PKCS#1 sha3_384WithRSAEncryption */ { "?" }, { NULL } }; static const byte sha3_512_asn[] = { 0x30 }; static const gcry_md_oid_spec_t oid_spec_sha3_512[] = { { "2.16.840.1.101.3.4.2.10" }, /* PKCS#1 sha3_512WithRSAEncryption */ { "?" }, { NULL } }; static const byte shake128_asn[] = { 0x30 }; static const gcry_md_oid_spec_t oid_spec_shake128[] = { { "2.16.840.1.101.3.4.2.11" }, /* PKCS#1 shake128WithRSAEncryption */ { "?" }, { NULL } }; static const byte shake256_asn[] = { 0x30 }; static const gcry_md_oid_spec_t oid_spec_shake256[] = { { "2.16.840.1.101.3.4.2.12" }, /* PKCS#1 shake256WithRSAEncryption */ { "?" }, { NULL } }; const gcry_md_spec_t _gcry_digest_spec_sha3_224 = { GCRY_MD_SHA3_224, {0, 1}, "SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28, sha3_224_init, keccak_write, keccak_final, keccak_read, NULL, _gcry_sha3_224_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; const gcry_md_spec_t _gcry_digest_spec_sha3_256 = { GCRY_MD_SHA3_256, {0, 1}, "SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32, sha3_256_init, keccak_write, keccak_final, keccak_read, NULL, _gcry_sha3_256_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; const gcry_md_spec_t _gcry_digest_spec_sha3_384 = { GCRY_MD_SHA3_384, {0, 1}, "SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48, sha3_384_init, keccak_write, keccak_final, keccak_read, NULL, _gcry_sha3_384_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; const gcry_md_spec_t _gcry_digest_spec_sha3_512 = { GCRY_MD_SHA3_512, {0, 1}, "SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64, sha3_512_init, keccak_write, keccak_final, keccak_read, NULL, _gcry_sha3_512_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; const gcry_md_spec_t _gcry_digest_spec_shake128 = { GCRY_MD_SHAKE128, {0, 1}, "SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0, shake128_init, keccak_write, keccak_final, NULL, keccak_extract, _gcry_shake128_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; const gcry_md_spec_t _gcry_digest_spec_shake256 = { GCRY_MD_SHAKE256, {0, 1}, "SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0, shake256_init, keccak_write, keccak_final, NULL, keccak_extract, _gcry_shake256_hash_buffers, sizeof (KECCAK_CONTEXT), run_selftests }; diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h index b28c871e..45ef462f 100644 --- a/cipher/keccak_permute_64.h +++ b/cipher/keccak_permute_64.h @@ -1,385 +1,385 @@ /* keccak_permute_64.h - Keccak permute function (simple 64bit) * Copyright (C) 2015 Jussi Kivilinna * * This file is part of Libgcrypt. * * Libgcrypt is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser general Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * Libgcrypt is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ /* The code is based on public-domain/CC0 "keccakc1024/simple/Keccak-simple.c" * implementation by Ronny Van Keer from SUPERCOP toolkit package. */ /* Function that computes the Keccak-f[1600] permutation on the given state. */ static unsigned int KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) { const u64 *round_consts = _gcry_keccak_round_consts_64bit; const u64 *round_consts_end = _gcry_keccak_round_consts_64bit + 24; u64 Aba, Abe, Abi, Abo, Abu; u64 Aga, Age, Agi, Ago, Agu; u64 Aka, Ake, Aki, Ako, Aku; u64 Ama, Ame, Ami, Amo, Amu; u64 Asa, Ase, Asi, Aso, Asu; u64 BCa, BCe, BCi, BCo, BCu; u64 Da, De, Di, Do, Du; u64 Eba, Ebe, Ebi, Ebo, Ebu; u64 Ega, Ege, Egi, Ego, Egu; u64 Eka, Eke, Eki, Eko, Eku; u64 Ema, Eme, Emi, Emo, Emu; u64 Esa, Ese, Esi, Eso, Esu; u64 *state = hd->u.state64; Aba = state[0]; Abe = state[1]; Abi = state[2]; Abo = state[3]; Abu = state[4]; Aga = state[5]; Age = state[6]; Agi = state[7]; Ago = state[8]; Agu = state[9]; Aka = state[10]; Ake = state[11]; Aki = state[12]; Ako = state[13]; Aku = state[14]; Ama = state[15]; Ame = state[16]; Ami = state[17]; Amo = state[18]; Amu = state[19]; Asa = state[20]; Ase = state[21]; Asi = state[22]; Aso = state[23]; Asu = state[24]; do { /* prepareTheta */ BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; /* thetaRhoPiChiIotaPrepareTheta(round , A, E) */ Da = BCu ^ ROL64(BCe, 1); De = BCa ^ ROL64(BCi, 1); Di = BCe ^ ROL64(BCo, 1); Do = BCi ^ ROL64(BCu, 1); Du = BCo ^ ROL64(BCa, 1); Aba ^= Da; BCa = Aba; Age ^= De; BCe = ROL64(Age, 44); Aki ^= Di; BCi = ROL64(Aki, 43); Amo ^= Do; BCo = ROL64(Amo, 21); Asu ^= Du; BCu = ROL64(Asu, 14); Eba = BCa ^ ANDN64(BCe, BCi); Eba ^= *(round_consts++); Ebe = BCe ^ ANDN64(BCi, BCo); Ebi = BCi ^ ANDN64(BCo, BCu); Ebo = BCo ^ ANDN64(BCu, BCa); Ebu = BCu ^ ANDN64(BCa, BCe); Abo ^= Do; BCa = ROL64(Abo, 28); Agu ^= Du; BCe = ROL64(Agu, 20); Aka ^= Da; BCi = ROL64(Aka, 3); Ame ^= De; BCo = ROL64(Ame, 45); Asi ^= Di; BCu = ROL64(Asi, 61); Ega = BCa ^ ANDN64(BCe, BCi); Ege = BCe ^ ANDN64(BCi, BCo); Egi = BCi ^ ANDN64(BCo, BCu); Ego = BCo ^ ANDN64(BCu, BCa); Egu = BCu ^ ANDN64(BCa, BCe); Abe ^= De; BCa = ROL64(Abe, 1); Agi ^= Di; BCe = ROL64(Agi, 6); Ako ^= Do; BCi = ROL64(Ako, 25); Amu ^= Du; BCo = ROL64(Amu, 8); Asa ^= Da; BCu = ROL64(Asa, 18); Eka = BCa ^ ANDN64(BCe, BCi); Eke = BCe ^ ANDN64(BCi, BCo); Eki = BCi ^ ANDN64(BCo, BCu); Eko = BCo ^ ANDN64(BCu, BCa); Eku = BCu ^ ANDN64(BCa, BCe); Abu ^= Du; BCa = ROL64(Abu, 27); Aga ^= Da; BCe = ROL64(Aga, 36); Ake ^= De; BCi = ROL64(Ake, 10); Ami ^= Di; BCo = ROL64(Ami, 15); Aso ^= Do; BCu = ROL64(Aso, 56); Ema = BCa ^ ANDN64(BCe, BCi); Eme = BCe ^ ANDN64(BCi, BCo); Emi = BCi ^ ANDN64(BCo, BCu); Emo = BCo ^ ANDN64(BCu, BCa); Emu = BCu ^ ANDN64(BCa, BCe); Abi ^= Di; BCa = ROL64(Abi, 62); Ago ^= Do; BCe = ROL64(Ago, 55); Aku ^= Du; BCi = ROL64(Aku, 39); Ama ^= Da; BCo = ROL64(Ama, 41); Ase ^= De; BCu = ROL64(Ase, 2); Esa = BCa ^ ANDN64(BCe, BCi); Ese = BCe ^ ANDN64(BCi, BCo); Esi = BCi ^ ANDN64(BCo, BCu); Eso = BCo ^ ANDN64(BCu, BCa); Esu = BCu ^ ANDN64(BCa, BCe); /* prepareTheta */ BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; /* thetaRhoPiChiIotaPrepareTheta(round+1, E, A) */ Da = BCu ^ ROL64(BCe, 1); De = BCa ^ ROL64(BCi, 1); Di = BCe ^ ROL64(BCo, 1); Do = BCi ^ ROL64(BCu, 1); Du = BCo ^ ROL64(BCa, 1); Eba ^= Da; BCa = Eba; Ege ^= De; BCe = ROL64(Ege, 44); Eki ^= Di; BCi = ROL64(Eki, 43); Emo ^= Do; BCo = ROL64(Emo, 21); Esu ^= Du; BCu = ROL64(Esu, 14); Aba = BCa ^ ANDN64(BCe, BCi); Aba ^= *(round_consts++); Abe = BCe ^ ANDN64(BCi, BCo); Abi = BCi ^ ANDN64(BCo, BCu); Abo = BCo ^ ANDN64(BCu, BCa); Abu = BCu ^ ANDN64(BCa, BCe); Ebo ^= Do; BCa = ROL64(Ebo, 28); Egu ^= Du; BCe = ROL64(Egu, 20); Eka ^= Da; BCi = ROL64(Eka, 3); Eme ^= De; BCo = ROL64(Eme, 45); Esi ^= Di; BCu = ROL64(Esi, 61); Aga = BCa ^ ANDN64(BCe, BCi); Age = BCe ^ ANDN64(BCi, BCo); Agi = BCi ^ ANDN64(BCo, BCu); Ago = BCo ^ ANDN64(BCu, BCa); Agu = BCu ^ ANDN64(BCa, BCe); Ebe ^= De; BCa = ROL64(Ebe, 1); Egi ^= Di; BCe = ROL64(Egi, 6); Eko ^= Do; BCi = ROL64(Eko, 25); Emu ^= Du; BCo = ROL64(Emu, 8); Esa ^= Da; BCu = ROL64(Esa, 18); Aka = BCa ^ ANDN64(BCe, BCi); Ake = BCe ^ ANDN64(BCi, BCo); Aki = BCi ^ ANDN64(BCo, BCu); Ako = BCo ^ ANDN64(BCu, BCa); Aku = BCu ^ ANDN64(BCa, BCe); Ebu ^= Du; BCa = ROL64(Ebu, 27); Ega ^= Da; BCe = ROL64(Ega, 36); Eke ^= De; BCi = ROL64(Eke, 10); Emi ^= Di; BCo = ROL64(Emi, 15); Eso ^= Do; BCu = ROL64(Eso, 56); Ama = BCa ^ ANDN64(BCe, BCi); Ame = BCe ^ ANDN64(BCi, BCo); Ami = BCi ^ ANDN64(BCo, BCu); Amo = BCo ^ ANDN64(BCu, BCa); Amu = BCu ^ ANDN64(BCa, BCe); Ebi ^= Di; BCa = ROL64(Ebi, 62); Ego ^= Do; BCe = ROL64(Ego, 55); Eku ^= Du; BCi = ROL64(Eku, 39); Ema ^= Da; BCo = ROL64(Ema, 41); Ese ^= De; BCu = ROL64(Ese, 2); Asa = BCa ^ ANDN64(BCe, BCi); Ase = BCe ^ ANDN64(BCi, BCo); Asi = BCi ^ ANDN64(BCo, BCu); Aso = BCo ^ ANDN64(BCu, BCa); Asu = BCu ^ ANDN64(BCa, BCe); } while (round_consts < round_consts_end); state[0] = Aba; state[1] = Abe; state[2] = Abi; state[3] = Abo; state[4] = Abu; state[5] = Aga; state[6] = Age; state[7] = Agi; state[8] = Ago; state[9] = Agu; state[10] = Aka; state[11] = Ake; state[12] = Aki; state[13] = Ako; state[14] = Aku; state[15] = Ama; state[16] = Ame; state[17] = Ami; state[18] = Amo; state[19] = Amu; state[20] = Asa; state[21] = Ase; state[22] = Asi; state[23] = Aso; state[24] = Asu; return sizeof(void *) * 4 + sizeof(u64) * 12 * 5; } static unsigned int KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) + size_t nlanes, int blocklanes) { unsigned int burn = 0; while (nlanes) { switch (blocklanes) { case 21: /* SHAKE128 */ while (pos == 0 && nlanes >= 21) { nlanes -= 21; absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; absorb_lanes64_4(&hd->u.state64[16], lanes); lanes += 8 * 4; absorb_lanes64_1(&hd->u.state64[20], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } break; case 18: /* SHA3-224 */ while (pos == 0 && nlanes >= 18) { nlanes -= 18; absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; absorb_lanes64_2(&hd->u.state64[16], lanes); lanes += 8 * 2; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } break; case 17: /* SHA3-256 & SHAKE256 */ while (pos == 0 && nlanes >= 17) { nlanes -= 17; absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; absorb_lanes64_1(&hd->u.state64[16], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } break; case 13: /* SHA3-384 */ while (pos == 0 && nlanes >= 13) { nlanes -= 13; absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; absorb_lanes64_4(&hd->u.state64[8], lanes); lanes += 8 * 4; absorb_lanes64_1(&hd->u.state64[12], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } break; case 9: /* SHA3-512 */ while (pos == 0 && nlanes >= 9) { nlanes -= 9; absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; absorb_lanes64_1(&hd->u.state64[8], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } break; } while (nlanes) { hd->u.state64[pos] ^= buf_get_le64(lanes); lanes += 8; nlanes--; if (++pos == blocklanes) { burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); pos = 0; break; } } } return burn; }