Index: cipher/Makefile.am =================================================================== --- cipher/Makefile.am +++ cipher/Makefile.am @@ -96,6 +96,10 @@ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ + rijndael-ppc8.pl \ + rijndael-ppc8.S \ + rijndael-ppc8be.S \ + rijndael-ppc832.S \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ Index: cipher/ppc-xlate.pl =================================================================== --- cipher/ppc-xlate.pl +++ /dev/null @@ -1,348 +0,0 @@ -#! /usr/bin/env perl -# SPDX-License-Identifier: BSD-3-Clause - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -my $flavour = shift; -my $output = shift; -open STDOUT,">$output" || die "can't open $output: $!"; - -my %GLOBALS; -my %TYPES; -my $dotinlocallabels=($flavour=~/linux/)?1:0; - -################################################################ -# directives which need special treatment on different platforms -################################################################ -my $type = sub { - my ($dir,$name,$type) = @_; - - $TYPES{$name} = $type; - if ($flavour =~ /linux/) { - $name =~ s|^\.||; - ".type $name,$type"; - } else { - ""; - } -}; -my $globl = sub { - my $junk = shift; - my $name = shift; - my $global = \$GLOBALS{$name}; - my $type = \$TYPES{$name}; - my $ret; - - $name =~ s|^\.||; - - SWITCH: for ($flavour) { - /aix/ && do { if (!$$type) { - $$type = "\@function"; - } - if ($$type =~ /function/) { - $name = ".$name"; - } - last; - }; - /osx/ && do { $name = "_$name"; - last; - }; - /linux.*(32|64le)/ - && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - last; - }; - /linux.*64/ && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - if ($$type =~ /function/) { - $ret .= "\n.section \".opd\",\"aw\""; - $ret .= "\n.align 3"; - $ret .= "\n$name:"; - $ret .= "\n.quad .$name,.TOC.\@tocbase,0"; - $ret .= "\n.previous"; - $name = ".$name"; - } - last; - }; - } - - $ret = ".globl $name" if (!$ret); - $$global = $name; - $ret; -}; -my $text = sub { - my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); - $ret; -}; -my $machine = sub { - my $junk = shift; - my $arch = shift; - if ($flavour =~ /osx/) - { $arch =~ s/\"//g; - $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); - } - ".machine $arch"; -}; -my $size = sub { - if ($flavour =~ /linux/) - { shift; - my $name = shift; - my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name; - my $ret = ".size $$real,.-$$real"; - $name =~ s|^\.||; - if ($$real ne $name) { - $ret .= "\n.size $name,.-$$real"; - } - $ret; - } - else - { ""; } -}; -my $asciz = sub { - shift; - my $line = join(",",@_); - if ($line =~ /^"(.*)"$/) - { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } - else - { ""; } -}; -my $quad = sub { - shift; - my @ret; - my ($hi,$lo); - for (@_) { - if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) - { $hi=$1?"0x$1":"0"; $lo="0x$2"; } - elsif (/^([0-9]+)$/o) - { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl - else - { $hi=undef; $lo=$_; } - - if (defined($hi)) - { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } - else - { push(@ret,".quad $lo"); } - } - join("\n",@ret); -}; - -################################################################ -# simplified mnemonics not handled by at least one assembler -################################################################ -my $cmplw = sub { - my $f = shift; - my $cr = 0; $cr = shift if ($#_>1); - # Some out-of-date 32-bit GNU assembler just can't handle cmplw... - ($flavour =~ /linux.*32/) ? - " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : - " cmplw ".join(',',$cr,@_); -}; -my $bdnz = sub { - my $f = shift; - my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint - " bc $bo,0,".shift; -} if ($flavour!~/linux/); -my $bltlr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : - " bclr $bo,0"; -}; -my $bnelr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -my $beqlr = sub { - my $f = shift; - my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two -# arguments is 64, with "operand out of range" error. -my $extrdi = sub { - my ($f,$ra,$rs,$n,$b) = @_; - $b = ($b+$n)&63; $n = 64-$n; - " rldicl $ra,$rs,$b,$n"; -}; -my $vmr = sub { - my ($f,$vx,$vy) = @_; - " vor $vx,$vy,$vy"; -}; - -# Some ABIs specify vrsave, special-purpose register #256, as reserved -# for system use. -my $no_vrsave = ($flavour =~ /aix|linux64le/); -my $mtspr = sub { - my ($f,$idx,$ra) = @_; - if ($idx == 256 && $no_vrsave) { - " or $ra,$ra,$ra"; - } else { - " mtspr $idx,$ra"; - } -}; -my $mfspr = sub { - my ($f,$rd,$idx) = @_; - if ($idx == 256 && $no_vrsave) { - " li $rd,-1"; - } else { - " mfspr $rd,$idx"; - } -}; - -# PowerISA 2.06 stuff -sub vsxmem_op { - my ($f, $vrt, $ra, $rb, $op) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); -} -# made-up unaligned memory reference AltiVec/VMX instructions -my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x -my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x -my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx -my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx -my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x -my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x -my $lvx_splt = sub { vsxmem_op(@_, 332); }; # lxvdsx -# VSX instruction[s] masqueraded as made-up AltiVec/VMX -my $vpermdi = sub { # xxpermdi - my ($f, $vrt, $vra, $vrb, $dm) = @_; - $dm = oct($dm) if ($dm =~ /^0/); - " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($dm<<8)|(10<<3)|7; -}; - -# PowerISA 2.07 stuff -sub vcrypto_op { - my ($f, $vrt, $vra, $vrb, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; -} -sub vfour { - my ($f, $vrt, $vra, $vrb, $vrc, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($vrc<<6)|$op; -}; -my $vcipher = sub { vcrypto_op(@_, 1288); }; -my $vcipherlast = sub { vcrypto_op(@_, 1289); }; -my $vncipher = sub { vcrypto_op(@_, 1352); }; -my $vncipherlast= sub { vcrypto_op(@_, 1353); }; -my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; -my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; -my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; -my $vpmsumb = sub { vcrypto_op(@_, 1032); }; -my $vpmsumd = sub { vcrypto_op(@_, 1224); }; -my $vpmsubh = sub { vcrypto_op(@_, 1096); }; -my $vpmsumw = sub { vcrypto_op(@_, 1160); }; -# These are not really crypto, but vcrypto_op template works -my $vaddudm = sub { vcrypto_op(@_, 192); }; -my $vadduqm = sub { vcrypto_op(@_, 256); }; -my $vmuleuw = sub { vcrypto_op(@_, 648); }; -my $vmulouw = sub { vcrypto_op(@_, 136); }; -my $vrld = sub { vcrypto_op(@_, 196); }; -my $vsld = sub { vcrypto_op(@_, 1476); }; -my $vsrd = sub { vcrypto_op(@_, 1732); }; -my $vsubudm = sub { vcrypto_op(@_, 1216); }; -my $vaddcuq = sub { vcrypto_op(@_, 320); }; -my $vaddeuqm = sub { vfour(@_,60); }; -my $vaddecuq = sub { vfour(@_,61); }; -my $vmrgew = sub { vfour(@_,0,1932); }; -my $vmrgow = sub { vfour(@_,0,1676); }; - -my $mtsle = sub { - my ($f, $arg) = @_; - " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); -}; - -# VSX instructions masqueraded as AltiVec/VMX -my $mtvrd = sub { - my ($f, $vrt, $ra) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(179<<1)|1; -}; -my $mtvrwz = sub { - my ($f, $vrt, $ra) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(243<<1)|1; -}; -my $lvwzx_u = sub { vsxmem_op(@_, 12); }; # lxsiwzx -my $stvwx_u = sub { vsxmem_op(@_, 140); }; # stxsiwx - -# PowerISA 3.0 stuff -my $maddhdu = sub { vfour(@_,49); }; -my $maddld = sub { vfour(@_,51); }; -my $darn = sub { - my ($f, $rt, $l) = @_; - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); -}; -my $iseleq = sub { - my ($f, $rt, $ra, $rb) = @_; - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|(2<<6)|30; -}; -# VSX instruction[s] masqueraded as made-up AltiVec/VMX -my $vspltib = sub { # xxspltib - my ($f, $vrt, $imm8) = @_; - $imm8 = oct($imm8) if ($imm8 =~ /^0/); - $imm8 &= 0xff; - " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($imm8<<11)|(360<<1)|1; -}; - -# PowerISA 3.0B stuff -my $addex = sub { - my ($f, $rt, $ra, $rb, $cy) = @_; # only cy==0 is specified in 3.0B - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($cy<<9)|(170<<1); -}; -my $vmsumudm = sub { vfour(@_,35); }; - -while($line=<>) { - - $line =~ s|[#!;].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning... - $line =~ s|\s+$||; # ... and at the end - - { - $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel - $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); - } - - { - $line =~ s|(^[\.\w]+)\:\s*||; - my $label = $1; - if ($label) { - my $xlated = ($GLOBALS{$label} or $label); - print "$xlated:"; - if ($flavour =~ /linux.*64le/) { - if ($TYPES{$label} =~ /function/) { - printf "\n.localentry %s,0\n",$xlated; - } - } - } - } - - { - $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; - my $c = $1; $c = "\t" if ($c eq ""); - my $mnemonic = $2; - my $f = $3; - my $opcode = eval("\$$mnemonic"); - $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); - if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(/,\s*/,$line)); } - elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } - } - - print $line if ($line); - print "\n"; -} - -close STDOUT; Index: cipher/rijndael-internal.h =================================================================== --- cipher/rijndael-internal.h +++ cipher/rijndael-internal.h @@ -102,6 +102,13 @@ # endif #endif /* ENABLE_ARM_CRYPTO_SUPPORT */ +#undef USE_PPC_ASM +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(__powerpc64__) || defined(__powerpc__) +# define USE_PPC_ASM 1 +# endif +#endif + struct RIJNDAEL_context_s; typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx, @@ -152,6 +159,9 @@ #ifdef USE_ARM_CE unsigned int use_arm_ce:1; /* ARMv8 CE shall be used. */ #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_ASM + unsigned int use_ppc_asm:1; /* PowerISA 2.07 crypto shall be used. */ +#endif rijndael_cryptfn_t encrypt_fn; rijndael_cryptfn_t decrypt_fn; rijndael_prefetchfn_t prefetch_enc_fn; Index: cipher/rijndael-ppc8.S =================================================================== --- cipher/rijndael-ppc8.S +++ cipher/rijndael-ppc8.S @@ -273,7 +273,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) Lenc_key_abort: mr 3,6 @@ -333,7 +332,7 @@ .globl aes_p8_encrypt .align 5 aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -487,7 +486,7 @@ neg 11,3 lvsr 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1223,7 +1222,7 @@ neg 11,3 lvsr 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1861,7 +1860,7 @@ beq Lxts_enc_no_key2 lvsr 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1905,7 +1904,7 @@ addi 10,10,16 lvsr 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael-ppc8.pl =================================================================== --- cipher/rijndael-ppc8.pl +++ cipher/rijndael-ppc8.pl @@ -1,6 +1,9 @@ #! /usr/bin/env perl # SPDX-License-Identifier: BSD-3-Clause # +# Changes: adjust struct offsets to work with libgcrypt ctx +# rename ppc-xlate.pl +# # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and @@ -60,6 +63,7 @@ $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}asm-common-ppc.pl" and -f $xlate ) or ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or die "can't locate ppc-xlate.pl"; @@ -353,7 +357,6 @@ stvx $in1,0,$inp li $ptr,0 mtspr 256,$vrsave - stw $rounds,0($out) Lenc_key_abort: mr r3,$ptr @@ -417,13 +420,14 @@ sub gen_block () { my $dir = shift; my $n = $dir eq "de" ? "n" : ""; +my $rounds_off = $dir eq "de" ? "240" : "480"; my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); $code.=<<___; .globl .${prefix}_${dir}crypt .align 5 .${prefix}_${dir}crypt: - lwz $rounds,240($key) + lwz $rounds,$rounds_off($key) lis r0,0xfc00 mfspr $vrsave,256 li $idx,15 # 15 is not typo @@ -522,7 +526,7 @@ neg r11,$inp ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) + lwz $rounds,480($key) lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inptail,0,$inp @@ -1283,7 +1287,7 @@ neg r11,$inp ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) + lwz $rounds,480($key) lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inptail,0,$inp @@ -1958,7 +1962,7 @@ beq Lxts_enc_no_key2 ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) + lwz $rounds,480($key2) srwi $rounds,$rounds,1 subi $rounds,$rounds,1 li $idx,16 @@ -2002,7 +2006,7 @@ addi $inp,$inp,16 ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) + lwz $rounds,480($key1) srwi $rounds,$rounds,1 subi $rounds,$rounds,1 li $idx,16 Index: cipher/rijndael-ppc832.S =================================================================== --- cipher/rijndael-ppc832.S +++ cipher/rijndael-ppc832.S @@ -273,7 +273,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) Lenc_key_abort: mr 3,6 @@ -333,7 +332,7 @@ .globl aes_p8_encrypt .align 5 aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -487,7 +486,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1223,7 +1222,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1861,7 +1860,7 @@ beq Lxts_enc_no_key2 lvsl 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1905,7 +1904,7 @@ addi 10,10,16 lvsl 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael-ppc8be.S =================================================================== --- cipher/rijndael-ppc8be.S +++ cipher/rijndael-ppc8be.S @@ -273,7 +273,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) Lenc_key_abort: mr 3,6 @@ -333,7 +332,7 @@ .globl aes_p8_encrypt .align 5 aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -487,7 +486,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1223,7 +1222,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1861,7 +1860,7 @@ beq Lxts_enc_no_key2 lvsl 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1905,7 +1904,7 @@ addi 10,10,16 lvsl 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael.c =================================================================== --- cipher/rijndael.c +++ cipher/rijndael.c @@ -199,6 +199,102 @@ size_t nblocks, int encrypt); #endif /*USE_ARM_ASM*/ +/* forward declaration */ +static int _gcry_aes_generic_cbc_enc (const void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, + int cbc_mac); +#ifdef USE_PPC_ASM +/* POWER 8 AES extensions */ +extern void aes_p8_encrypt (const unsigned char *in, + unsigned char *out, + const RIJNDAEL_context *ctx); +static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + /* When I tried to switch these registers in the assembly it broke. */ + aes_p8_encrypt (in, out, ctx); + return 0; /* does not use stack */ +} + /* this is the decryption key part of context */ +extern void aes_p8_decrypt (const unsigned char *in, + unsigned char *out, + const void *sboxes); +static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + aes_p8_decrypt (in, out, &ctx->u2); + return 0; /* does not use stack */ +} +extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits, + RIJNDAEL_context *key); +extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits, + /* this is the decryption key part of context */ + const unsigned (*)[15][4]); +/* No performance benifit observed */ +#if 0 + /* or decrypt */ +extern void aes_p8_cbc_encrypt (const unsigned char *in, + unsigned char *out, + size_t length, + const RIJNDAEL_context *key, unsigned char *ivec, int is_enc); +static void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, + int is_cbc_mac) +{ + const RIJNDAEL_context *ctx = context; + +#ifdef __builtin_expect + __builtin_expect (is_cbc_mac, 0); +#endif + if (is_cbc_mac) { + _gcry_aes_generic_cbc_enc (ctx, iv, outbuf_arg, inbuf_arg, nblocks, is_cbc_mac); + return; + } + + aes_p8_cbc_encrypt (inbuf_arg, outbuf_arg, nblocks, ctx, iv, 1); +} +extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + aes_p8_cbc_encrypt (inbuf_arg, outbuf_arg, nblocks, context, iv, 0); +} +extern void aes_p8_ctr32_encrypt_blocks (const unsigned char *in, unsigned char *out, + size_t len, const void *key, + const unsigned char ivec[16]); +void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr, + void *outbuf, const void *inbuf, + size_t nblocks) +{ + aes_p8_ctr32_encrypt_blocks (inbuf, outbuf, nblocks, context, ctr); +} +extern void aes_p8_xts_encrypt (const unsigned char *in, + unsigned char *out, + size_t length, + const void *key1, const void *key2, + const unsigned char iv[16]); +extern void aes_p8_xts_decrypt (const unsigned char *in, + unsigned char *out, + size_t length, + const void *key1, const void *key2, + const unsigned char iv[16]); +static void _gcry_aes_ppc8_xts_crypt_wrap (void *context, unsigned char *tweak, + void *outbuf, const void *inbuf, + size_t nblocks, int encrypt) +{ + const RIJNDAEL_context *ctx = context; + if (encrypt) + aes_p8_xts_encrypt (inbuf, outbuf, nblocks, &ctx->u1, &ctx->u2, tweak); + else + aes_p8_xts_decrypt (inbuf, outbuf, nblocks, &ctx->u1, &ctx->u2, tweak); +} +#endif +#endif /*USE_PPC_ASM*/ + static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, @@ -260,7 +356,7 @@ int i,j, r, t, rconpointer = 0; int KC; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_ASM) unsigned int hwfeatures; #endif @@ -304,7 +400,7 @@ ctx->rounds = rounds; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_ASM) hwfeatures = _gcry_get_hw_features (); #endif @@ -321,6 +417,9 @@ #ifdef USE_ARM_CE ctx->use_arm_ce = 0; #endif +#ifdef USE_PPC_ASM + ctx->use_ppc_asm = 0; +#endif if (0) { @@ -400,6 +499,26 @@ hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt; } } +#endif +#ifdef USE_PPC_ASM + else if (hwfeatures & HWF_PPC_VCRYPTO) + { + ctx->encrypt_fn = _gcry_aes_ppc8_encrypt; + ctx->decrypt_fn = _gcry_aes_ppc8_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ppc_asm = 1; +/* no performance benifit was observed */ +#if 0 + if (hd) + { + hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc; + hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec; + hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc; + hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt_wrap; + } +#endif + } #endif else { @@ -426,6 +545,14 @@ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) _gcry_aes_armv8_ce_setkey (ctx, key); +#endif +#ifdef USE_PPC_ASM + else if (ctx->use_ppc_asm) { + /* These are both done here to avoid having to store the key. + * These S-boxes are generated on-the-fly. */ + aes_p8_set_encrypt_key (key, keylen * 8, ctx); + aes_p8_set_decrypt_key (key, keylen * 8, &ctx->keyschdec32); + } #endif else { @@ -571,6 +698,13 @@ /* Padlock does not need decryption subkeys. */ } #endif /*USE_PADLOCK*/ +#ifdef USE_PPC_ASM + else if (ctx->use_ppc_asm) + { + /* done during encryption key setup, as then we have the actual + * key available */ + } +#endif /*USE_PPC_ASM*/ else { const byte *sbox = ((const byte *)encT) + 1; @@ -622,7 +756,7 @@ } -#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) +#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) && !defined(USE_PPC_ASM) /* Encrypt one block. A and B may be the same. */ static unsigned int do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, @@ -756,7 +890,7 @@ return (56 + 2*sizeof(int)); } -#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +#endif /*!USE_ARM_ASM && !USE_AMD64_ASM && !USE_PPC_ASM*/ static unsigned int @@ -768,6 +902,8 @@ encT); #elif defined(USE_ARM_ASM) return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT); +#elif defined(USE_PPC_ASM) + return _gcry_aes_ppc8_encrypt(ctx, bx, ax); #else return do_encrypt_fn (ctx, bx, ax); #endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ @@ -845,6 +981,42 @@ _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } +static int _gcry_aes_generic_cbc_enc (const void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, + int cbc_mac) +{ + const RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + int burn_depth = 0; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + unsigned char *last_iv = iv; + + for ( ;nblocks; nblocks-- ) + { + cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); + + burn_depth = encrypt_fn (ctx, outbuf, outbuf); + + last_iv = outbuf; + inbuf += BLOCKSIZE; + if (!cbc_mac) + outbuf += BLOCKSIZE; + } + + if (last_iv != iv) + cipher_block_cpy (iv, last_iv, BLOCKSIZE); + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); + + return 0; +} /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This @@ -858,7 +1030,6 @@ RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; - unsigned char *last_iv; unsigned int burn_depth = 0; if (0) @@ -886,27 +1057,8 @@ #endif /*USE_ARM_CE*/ else { - rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; - - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - - last_iv = iv; - - for ( ;nblocks; nblocks-- ) - { - cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); - - burn_depth = encrypt_fn (ctx, outbuf, outbuf); - - last_iv = outbuf; - inbuf += BLOCKSIZE; - if (!cbc_mac) - outbuf += BLOCKSIZE; - } - - if (last_iv != iv) - cipher_block_cpy (iv, last_iv, BLOCKSIZE); + _gcry_aes_generic_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; } if (burn_depth) @@ -981,7 +1133,7 @@ -#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) +#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) && !defined(USE_PPC_ASM) /* Decrypt one block. A and B may be the same. */ static unsigned int do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, @@ -1113,7 +1265,7 @@ return (56+2*sizeof(int)); } -#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +#endif /*!USE_ARM_ASM && !USE_AMD64_ASM && !USE_PPC_ASM*/ /* Decrypt one block. AX and BX may be the same. */ @@ -1127,9 +1279,11 @@ #elif defined(USE_ARM_ASM) return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, &dec_tables); +#elif defined(USE_PPC_ASM) + return _gcry_aes_ppc8_decrypt(ctx, bx, ax); #else return do_decrypt_fn (ctx, bx, ax); -#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +#endif } @@ -1567,14 +1721,21 @@ { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f - /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ - /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; + + static const unsigned char key_test_expansion_128[16] = + { + 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, + 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c + }; + + RIJNDAEL_context exp_ctx; + rijndael_setkey (&exp_ctx, key_test_expansion_128, sizeof (key_128), NULL); #endif /* Because gcc/ld can only align the CTX struct on 8 bytes on the @@ -1590,7 +1751,7 @@ xfree (ctxmem); return "AES-128 test encryption failed."; } - rijndael_decrypt (ctx, scratch, scratch); + rijndael_decrypt (ctx, scratch, ciphertext_128); xfree (ctxmem); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; Index: configure.ac =================================================================== --- configure.ac +++ configure.ac @@ -681,6 +681,14 @@ armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) +# Implementation of the --disable-ppc-crypto-support switch. +AC_MSG_CHECKING([whether PPC crypto support is requested]) +AC_ARG_ENABLE(ppc-crypto-support, + AC_HELP_STRING([--disable-ppc-crypto-support], + [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]), + ppccryptosupport=$enableval,ppccryptosupport=yes) +AC_MSG_RESULT($ppccryptosupport) + # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], @@ -1272,14 +1280,14 @@ drngsupport="n/a" fi -if test "$mpi_cpu_arch" != "arm" ; then - if test "$mpi_cpu_arch" != "aarch64" ; then - neonsupport="n/a" - armcryptosupport="n/a" - fi +if test "$mpi_cpu_arch" != "arm" && test "$mpi_cpu_arch" != "aarch64"; then + neonsupport="n/a" + armcryptosupport="n/a" fi - +if test "$mpi_cpu_arch" != "ppc"; then + ppccryptosupport="n/a" +fi ############################################# #### #### #### Platform specific compiler checks. #### @@ -2119,6 +2127,10 @@ AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi +if test x"$ppccryptosupport" = xyes ; then + AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1, + [Enable support for POWER 8 (PowerISA 2.07) crypto extension.]) +fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) @@ -2226,6 +2238,20 @@ GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; + powerpc64le-*-*) + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8.lo" + ;; + powerpc64-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8be.lo" + ;; + powerpc-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc832.lo" + ;; esac case "$mpi_cpu_arch" in @@ -2699,6 +2725,7 @@ ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) + GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo" ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) @@ -2800,6 +2827,7 @@ GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) +GCRY_MSG_SHOW([Try using PPC crypto: ],[$ppccryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then Index: src/Makefile.am =================================================================== --- src/Makefile.am +++ src/Makefile.am @@ -66,7 +66,7 @@ hmac256.c hmac256.h context.c context.h \ ec-context.h -EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c +EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c hwf-ppc.c gcrypt_hwf_modules = @GCRYPT_HWF_MODULES@ Index: src/g10lib.h =================================================================== --- src/g10lib.h +++ src/g10lib.h @@ -236,7 +236,8 @@ #define HWF_ARM_SHA2 (1 << 20) #define HWF_ARM_PMULL (1 << 21) - +/* No problem re-using a slot from a differn't architecture */ +#define HWF_PPC_VCRYPTO (1 << 0) gpg_err_code_t _gcry_disable_hw_feature (const char *name); void _gcry_detect_hw_features (void); Index: src/hwf-common.h =================================================================== --- src/hwf-common.h +++ src/hwf-common.h @@ -22,6 +22,6 @@ unsigned int _gcry_hwf_detect_x86 (void); unsigned int _gcry_hwf_detect_arm (void); - +unsigned int _gcry_hwf_detect_ppc (void); #endif /*HWF_COMMON_H*/ Index: src/hwfeatures.c =================================================================== --- src/hwfeatures.c +++ src/hwfeatures.c @@ -42,6 +42,7 @@ const char *desc; } hwflist[] = { +#if defined(HAVE_CPU_ARCH_X86) { HWF_PADLOCK_RNG, "padlock-rng" }, { HWF_PADLOCK_AES, "padlock-aes" }, { HWF_PADLOCK_SHA, "padlock-sha" }, @@ -59,11 +60,15 @@ { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" }, { HWF_INTEL_RDTSC, "intel-rdtsc" }, { HWF_INTEL_SHAEXT, "intel-shaext" }, +#elif defined(HAVE_CPU_ARCH_ARM) { HWF_ARM_NEON, "arm-neon" }, { HWF_ARM_AES, "arm-aes" }, { HWF_ARM_SHA1, "arm-sha1" }, { HWF_ARM_SHA2, "arm-sha2" }, - { HWF_ARM_PMULL, "arm-pmull" } + { HWF_ARM_PMULL, "arm-pmull" }, +#elif defined(HAVE_CPU_ARCH_PPC) + { HWF_PPC_VCRYPTO, "ppc-crypto" }, +#endif }; /* A bit vector with the hardware features which shall not be used. @@ -208,12 +213,14 @@ { hw_features = _gcry_hwf_detect_x86 (); } -#endif /* HAVE_CPU_ARCH_X86 */ -#if defined (HAVE_CPU_ARCH_ARM) +#elif defined (HAVE_CPU_ARCH_ARM) { hw_features = _gcry_hwf_detect_arm (); } -#endif /* HAVE_CPU_ARCH_ARM */ - +#elif defined (HAVE_CPU_ARCH_PPC) + { + hw_features = _gcry_hwf_detect_ppc (); + } +#endif hw_features &= ~disabled_hw_features; }