Index: cipher/Makefile.am =================================================================== --- cipher/Makefile.am +++ cipher/Makefile.am @@ -96,6 +96,10 @@ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ + rijndael-ppc8.pl \ + rijndael-ppc8.S \ + rijndael-ppc8be.S \ + rijndael-ppc832.S \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ Index: cipher/ppc-xlate.pl =================================================================== --- cipher/ppc-xlate.pl +++ /dev/null @@ -1,348 +0,0 @@ -#! /usr/bin/env perl -# SPDX-License-Identifier: BSD-3-Clause - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -my $flavour = shift; -my $output = shift; -open STDOUT,">$output" || die "can't open $output: $!"; - -my %GLOBALS; -my %TYPES; -my $dotinlocallabels=($flavour=~/linux/)?1:0; - -################################################################ -# directives which need special treatment on different platforms -################################################################ -my $type = sub { - my ($dir,$name,$type) = @_; - - $TYPES{$name} = $type; - if ($flavour =~ /linux/) { - $name =~ s|^\.||; - ".type $name,$type"; - } else { - ""; - } -}; -my $globl = sub { - my $junk = shift; - my $name = shift; - my $global = \$GLOBALS{$name}; - my $type = \$TYPES{$name}; - my $ret; - - $name =~ s|^\.||; - - SWITCH: for ($flavour) { - /aix/ && do { if (!$$type) { - $$type = "\@function"; - } - if ($$type =~ /function/) { - $name = ".$name"; - } - last; - }; - /osx/ && do { $name = "_$name"; - last; - }; - /linux.*(32|64le)/ - && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - last; - }; - /linux.*64/ && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - if ($$type =~ /function/) { - $ret .= "\n.section \".opd\",\"aw\""; - $ret .= "\n.align 3"; - $ret .= "\n$name:"; - $ret .= "\n.quad .$name,.TOC.\@tocbase,0"; - $ret .= "\n.previous"; - $name = ".$name"; - } - last; - }; - } - - $ret = ".globl $name" if (!$ret); - $$global = $name; - $ret; -}; -my $text = sub { - my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); - $ret; -}; -my $machine = sub { - my $junk = shift; - my $arch = shift; - if ($flavour =~ /osx/) - { $arch =~ s/\"//g; - $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); - } - ".machine $arch"; -}; -my $size = sub { - if ($flavour =~ /linux/) - { shift; - my $name = shift; - my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name; - my $ret = ".size $$real,.-$$real"; - $name =~ s|^\.||; - if ($$real ne $name) { - $ret .= "\n.size $name,.-$$real"; - } - $ret; - } - else - { ""; } -}; -my $asciz = sub { - shift; - my $line = join(",",@_); - if ($line =~ /^"(.*)"$/) - { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } - else - { ""; } -}; -my $quad = sub { - shift; - my @ret; - my ($hi,$lo); - for (@_) { - if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) - { $hi=$1?"0x$1":"0"; $lo="0x$2"; } - elsif (/^([0-9]+)$/o) - { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl - else - { $hi=undef; $lo=$_; } - - if (defined($hi)) - { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } - else - { push(@ret,".quad $lo"); } - } - join("\n",@ret); -}; - -################################################################ -# simplified mnemonics not handled by at least one assembler -################################################################ -my $cmplw = sub { - my $f = shift; - my $cr = 0; $cr = shift if ($#_>1); - # Some out-of-date 32-bit GNU assembler just can't handle cmplw... - ($flavour =~ /linux.*32/) ? - " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : - " cmplw ".join(',',$cr,@_); -}; -my $bdnz = sub { - my $f = shift; - my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint - " bc $bo,0,".shift; -} if ($flavour!~/linux/); -my $bltlr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : - " bclr $bo,0"; -}; -my $bnelr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -my $beqlr = sub { - my $f = shift; - my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two -# arguments is 64, with "operand out of range" error. -my $extrdi = sub { - my ($f,$ra,$rs,$n,$b) = @_; - $b = ($b+$n)&63; $n = 64-$n; - " rldicl $ra,$rs,$b,$n"; -}; -my $vmr = sub { - my ($f,$vx,$vy) = @_; - " vor $vx,$vy,$vy"; -}; - -# Some ABIs specify vrsave, special-purpose register #256, as reserved -# for system use. -my $no_vrsave = ($flavour =~ /aix|linux64le/); -my $mtspr = sub { - my ($f,$idx,$ra) = @_; - if ($idx == 256 && $no_vrsave) { - " or $ra,$ra,$ra"; - } else { - " mtspr $idx,$ra"; - } -}; -my $mfspr = sub { - my ($f,$rd,$idx) = @_; - if ($idx == 256 && $no_vrsave) { - " li $rd,-1"; - } else { - " mfspr $rd,$idx"; - } -}; - -# PowerISA 2.06 stuff -sub vsxmem_op { - my ($f, $vrt, $ra, $rb, $op) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); -} -# made-up unaligned memory reference AltiVec/VMX instructions -my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x -my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x -my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx -my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx -my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x -my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x -my $lvx_splt = sub { vsxmem_op(@_, 332); }; # lxvdsx -# VSX instruction[s] masqueraded as made-up AltiVec/VMX -my $vpermdi = sub { # xxpermdi - my ($f, $vrt, $vra, $vrb, $dm) = @_; - $dm = oct($dm) if ($dm =~ /^0/); - " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($dm<<8)|(10<<3)|7; -}; - -# PowerISA 2.07 stuff -sub vcrypto_op { - my ($f, $vrt, $vra, $vrb, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; -} -sub vfour { - my ($f, $vrt, $vra, $vrb, $vrc, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($vrc<<6)|$op; -}; -my $vcipher = sub { vcrypto_op(@_, 1288); }; -my $vcipherlast = sub { vcrypto_op(@_, 1289); }; -my $vncipher = sub { vcrypto_op(@_, 1352); }; -my $vncipherlast= sub { vcrypto_op(@_, 1353); }; -my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; -my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; -my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; -my $vpmsumb = sub { vcrypto_op(@_, 1032); }; -my $vpmsumd = sub { vcrypto_op(@_, 1224); }; -my $vpmsubh = sub { vcrypto_op(@_, 1096); }; -my $vpmsumw = sub { vcrypto_op(@_, 1160); }; -# These are not really crypto, but vcrypto_op template works -my $vaddudm = sub { vcrypto_op(@_, 192); }; -my $vadduqm = sub { vcrypto_op(@_, 256); }; -my $vmuleuw = sub { vcrypto_op(@_, 648); }; -my $vmulouw = sub { vcrypto_op(@_, 136); }; -my $vrld = sub { vcrypto_op(@_, 196); }; -my $vsld = sub { vcrypto_op(@_, 1476); }; -my $vsrd = sub { vcrypto_op(@_, 1732); }; -my $vsubudm = sub { vcrypto_op(@_, 1216); }; -my $vaddcuq = sub { vcrypto_op(@_, 320); }; -my $vaddeuqm = sub { vfour(@_,60); }; -my $vaddecuq = sub { vfour(@_,61); }; -my $vmrgew = sub { vfour(@_,0,1932); }; -my $vmrgow = sub { vfour(@_,0,1676); }; - -my $mtsle = sub { - my ($f, $arg) = @_; - " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); -}; - -# VSX instructions masqueraded as AltiVec/VMX -my $mtvrd = sub { - my ($f, $vrt, $ra) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(179<<1)|1; -}; -my $mtvrwz = sub { - my ($f, $vrt, $ra) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(243<<1)|1; -}; -my $lvwzx_u = sub { vsxmem_op(@_, 12); }; # lxsiwzx -my $stvwx_u = sub { vsxmem_op(@_, 140); }; # stxsiwx - -# PowerISA 3.0 stuff -my $maddhdu = sub { vfour(@_,49); }; -my $maddld = sub { vfour(@_,51); }; -my $darn = sub { - my ($f, $rt, $l) = @_; - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); -}; -my $iseleq = sub { - my ($f, $rt, $ra, $rb) = @_; - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|(2<<6)|30; -}; -# VSX instruction[s] masqueraded as made-up AltiVec/VMX -my $vspltib = sub { # xxspltib - my ($f, $vrt, $imm8) = @_; - $imm8 = oct($imm8) if ($imm8 =~ /^0/); - $imm8 &= 0xff; - " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($imm8<<11)|(360<<1)|1; -}; - -# PowerISA 3.0B stuff -my $addex = sub { - my ($f, $rt, $ra, $rb, $cy) = @_; # only cy==0 is specified in 3.0B - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($cy<<9)|(170<<1); -}; -my $vmsumudm = sub { vfour(@_,35); }; - -while($line=<>) { - - $line =~ s|[#!;].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning... - $line =~ s|\s+$||; # ... and at the end - - { - $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel - $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); - } - - { - $line =~ s|(^[\.\w]+)\:\s*||; - my $label = $1; - if ($label) { - my $xlated = ($GLOBALS{$label} or $label); - print "$xlated:"; - if ($flavour =~ /linux.*64le/) { - if ($TYPES{$label} =~ /function/) { - printf "\n.localentry %s,0\n",$xlated; - } - } - } - } - - { - $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; - my $c = $1; $c = "\t" if ($c eq ""); - my $mnemonic = $2; - my $f = $3; - my $opcode = eval("\$$mnemonic"); - $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); - if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(/,\s*/,$line)); } - elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } - } - - print $line if ($line); - print "\n"; -} - -close STDOUT; Index: cipher/rijndael-internal.h =================================================================== --- cipher/rijndael-internal.h +++ cipher/rijndael-internal.h @@ -104,6 +104,13 @@ # endif #endif /* ENABLE_ARM_CRYPTO_SUPPORT */ +#undef USE_PPC_ASM +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(__powerpc64__) || defined(__powerpc__) +# define USE_PPC_ASM 1 +# endif +#endif + struct RIJNDAEL_context_s; typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx, @@ -154,6 +161,9 @@ #ifdef USE_ARM_CE unsigned int use_arm_ce:1; /* ARMv8 CE shall be used. */ #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_ASM + unsigned int use_ppc_asm:1; /* PowerISA 2.07 crypto shall be used. */ +#endif rijndael_cryptfn_t encrypt_fn; rijndael_cryptfn_t decrypt_fn; rijndael_prefetchfn_t prefetch_enc_fn; Index: cipher/rijndael-ppc8.S =================================================================== --- cipher/rijndael-ppc8.S +++ cipher/rijndael-ppc8.S @@ -273,7 +273,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) Lenc_key_abort: mr 3,6 @@ -333,7 +332,7 @@ .globl aes_p8_encrypt .align 5 aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -487,7 +486,7 @@ neg 11,3 lvsr 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1223,7 +1222,7 @@ neg 11,3 lvsr 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1861,7 +1860,7 @@ beq Lxts_enc_no_key2 lvsr 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1905,7 +1904,7 @@ addi 10,10,16 lvsr 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael-ppc8.pl =================================================================== --- cipher/rijndael-ppc8.pl +++ cipher/rijndael-ppc8.pl @@ -1,6 +1,9 @@ #! /usr/bin/env perl # SPDX-License-Identifier: BSD-3-Clause # +# Changes: adjust struct offsets to work with libgcrypt ctx +# rename ppc-xlate.pl +# # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and @@ -60,6 +63,7 @@ $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}asm-common-ppc.pl" and -f $xlate ) or ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or die "can't locate ppc-xlate.pl"; @@ -353,7 +357,6 @@ stvx $in1,0,$inp li $ptr,0 mtspr 256,$vrsave - stw $rounds,0($out) Lenc_key_abort: mr r3,$ptr @@ -417,13 +420,14 @@ sub gen_block () { my $dir = shift; my $n = $dir eq "de" ? "n" : ""; +my $rounds_off = $dir eq "de" ? "240" : "480"; my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); $code.=<<___; .globl .${prefix}_${dir}crypt .align 5 .${prefix}_${dir}crypt: - lwz $rounds,240($key) + lwz $rounds,$rounds_off($key) lis r0,0xfc00 mfspr $vrsave,256 li $idx,15 # 15 is not typo @@ -522,7 +526,7 @@ neg r11,$inp ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) + lwz $rounds,480($key) lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inptail,0,$inp @@ -1283,7 +1287,7 @@ neg r11,$inp ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) + lwz $rounds,480($key) lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inptail,0,$inp @@ -1958,7 +1962,7 @@ beq Lxts_enc_no_key2 ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) + lwz $rounds,480($key2) srwi $rounds,$rounds,1 subi $rounds,$rounds,1 li $idx,16 @@ -2002,7 +2006,7 @@ addi $inp,$inp,16 ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) + lwz $rounds,480($key1) srwi $rounds,$rounds,1 subi $rounds,$rounds,1 li $idx,16 Index: cipher/rijndael-ppc832.S =================================================================== --- cipher/rijndael-ppc832.S +++ cipher/rijndael-ppc832.S @@ -273,7 +273,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) Lenc_key_abort: mr 3,6 @@ -333,7 +332,7 @@ .globl aes_p8_encrypt .align 5 aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -487,7 +486,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1223,7 +1222,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1861,7 +1860,7 @@ beq Lxts_enc_no_key2 lvsl 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1905,7 +1904,7 @@ addi 10,10,16 lvsl 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael-ppc8be.S =================================================================== --- cipher/rijndael-ppc8be.S +++ cipher/rijndael-ppc8be.S @@ -273,7 +273,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) Lenc_key_abort: mr 3,6 @@ -333,7 +332,7 @@ .globl aes_p8_encrypt .align 5 aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -487,7 +486,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1223,7 +1222,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1861,7 +1860,7 @@ beq Lxts_enc_no_key2 lvsl 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1905,7 +1904,7 @@ addi 10,10,16 lvsl 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael.c =================================================================== --- cipher/rijndael.c +++ cipher/rijndael.c @@ -199,6 +199,42 @@ size_t nblocks, int encrypt); #endif /*USE_ARM_ASM*/ +/* forward declaration */ +static int _gcry_aes_generic_cbc_enc (const void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, + int cbc_mac); +#ifdef USE_PPC_ASM +/* POWER 8 AES extensions */ +extern void aes_p8_encrypt (const unsigned char *in, + unsigned char *out, + const RIJNDAEL_context *ctx); +static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + /* When I tried to switch these registers in the assembly it broke. */ + aes_p8_encrypt (in, out, ctx); + return 0; /* does not use stack */ +} + /* this is the decryption key part of context */ +extern void aes_p8_decrypt (const unsigned char *in, + unsigned char *out, + const void *sboxes); +static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + aes_p8_decrypt (in, out, &ctx->u2); + return 0; /* does not use stack */ +} +extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits, + RIJNDAEL_context *key); +extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits, + /* this is the decryption key part of context */ + const unsigned (*)[15][4]); +#endif /*USE_PPC_ASM*/ + static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, @@ -280,7 +316,7 @@ int i,j, r, t, rconpointer = 0; int KC; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_ASM) unsigned int hwfeatures; #endif @@ -324,7 +360,7 @@ ctx->rounds = rounds; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_ASM) hwfeatures = _gcry_get_hw_features (); #endif @@ -341,6 +377,9 @@ #ifdef USE_ARM_CE ctx->use_arm_ce = 0; #endif +#ifdef USE_PPC_ASM + ctx->use_ppc_asm = 0; +#endif if (0) { @@ -420,6 +459,16 @@ hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt; } } +#endif +#ifdef USE_PPC_ASM + else if (hwfeatures & HWF_PPC_VCRYPTO) + { + ctx->encrypt_fn = _gcry_aes_ppc8_encrypt; + ctx->decrypt_fn = _gcry_aes_ppc8_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ppc_asm = 1; + } #endif else { @@ -446,6 +495,14 @@ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) _gcry_aes_armv8_ce_setkey (ctx, key); +#endif +#ifdef USE_PPC_ASM + else if (ctx->use_ppc_asm) { + /* These are both done here to avoid having to store the key. + * These S-boxes are generated on-the-fly. */ + aes_p8_set_encrypt_key (key, keylen * 8, ctx); + aes_p8_set_decrypt_key (key, keylen * 8, &ctx->keyschdec32); + } #endif else { @@ -591,6 +648,13 @@ /* Padlock does not need decryption subkeys. */ } #endif /*USE_PADLOCK*/ +#ifdef USE_PPC_ASM + else if (ctx->use_ppc_asm) + { + /* done during encryption key setup, as then we have the actual + * key available */ + } +#endif /*USE_PPC_ASM*/ else { const byte *sbox = ((const byte *)encT) + 1; @@ -642,7 +706,7 @@ } -#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) +#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) && !defined(USE_PPC_ASM) /* Encrypt one block. A and B may be the same. */ static unsigned int do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, @@ -776,7 +840,7 @@ return (56 + 2*sizeof(int)); } -#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +#endif /*!USE_ARM_ASM && !USE_AMD64_ASM && !USE_PPC_ASM*/ static unsigned int @@ -789,6 +853,8 @@ #elif defined(USE_ARM_ASM) return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, enc_tables.T); +#elif defined(USE_PPC_ASM) + return _gcry_aes_ppc8_encrypt(ctx, bx, ax); #else return do_encrypt_fn (ctx, bx, ax); #endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/ @@ -866,6 +932,42 @@ _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } +static int _gcry_aes_generic_cbc_enc (const void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, + int cbc_mac) +{ + const RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + int burn_depth = 0; + + unsigned char *last_iv = iv; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + for ( ;nblocks; nblocks-- ) + { + cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); + + burn_depth = encrypt_fn (ctx, outbuf, outbuf); + + last_iv = outbuf; + inbuf += BLOCKSIZE; + if (!cbc_mac) + outbuf += BLOCKSIZE; + } + + if (last_iv != iv) + cipher_block_cpy (iv, last_iv, BLOCKSIZE); + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); + + return 0; +} /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This @@ -879,7 +981,6 @@ RIJNDAEL_context *ctx = context; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; - unsigned char *last_iv; unsigned int burn_depth = 0; if (0) @@ -907,27 +1008,8 @@ #endif /*USE_ARM_CE*/ else { - rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; - - if (ctx->prefetch_enc_fn) - ctx->prefetch_enc_fn(); - - last_iv = iv; - - for ( ;nblocks; nblocks-- ) - { - cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE); - - burn_depth = encrypt_fn (ctx, outbuf, outbuf); - - last_iv = outbuf; - inbuf += BLOCKSIZE; - if (!cbc_mac) - outbuf += BLOCKSIZE; - } - - if (last_iv != iv) - cipher_block_cpy (iv, last_iv, BLOCKSIZE); + _gcry_aes_generic_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; } if (burn_depth) @@ -1002,7 +1084,7 @@ -#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) +#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM) && !defined(USE_PPC_ASM) /* Decrypt one block. A and B may be the same. */ static unsigned int do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b, @@ -1134,7 +1216,7 @@ return (56+2*sizeof(int)); } -#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +#endif /*!USE_ARM_ASM && !USE_AMD64_ASM && !USE_PPC_ASM*/ /* Decrypt one block. AX and BX may be the same. */ @@ -1148,9 +1230,11 @@ #elif defined(USE_ARM_ASM) return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds, dec_tables.T); +#elif defined(USE_PPC_ASM) + return _gcry_aes_ppc8_decrypt(ctx, bx, ax); #else return do_decrypt_fn (ctx, bx, ax); -#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +#endif } @@ -1588,14 +1672,21 @@ { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f - /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ - /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; + + static const unsigned char key_test_expansion_128[16] = + { + 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, + 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c + }; + + RIJNDAEL_context exp_ctx; + rijndael_setkey (&exp_ctx, key_test_expansion_128, sizeof (key_128), NULL); #endif /* Because gcc/ld can only align the CTX struct on 8 bytes on the @@ -1611,7 +1702,7 @@ xfree (ctxmem); return "AES-128 test encryption failed."; } - rijndael_decrypt (ctx, scratch, scratch); + rijndael_decrypt (ctx, scratch, ciphertext_128); xfree (ctxmem); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; Index: configure.ac =================================================================== --- configure.ac +++ configure.ac @@ -681,6 +681,14 @@ armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) +# Implementation of the --disable-ppc-crypto-support switch. +AC_MSG_CHECKING([whether PPC crypto support is requested]) +AC_ARG_ENABLE(ppc-crypto-support, + AC_HELP_STRING([--disable-ppc-crypto-support], + [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]), + ppccryptosupport=$enableval,ppccryptosupport=yes) +AC_MSG_RESULT($ppccryptosupport) + # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], @@ -1272,14 +1280,14 @@ drngsupport="n/a" fi -if test "$mpi_cpu_arch" != "arm" ; then - if test "$mpi_cpu_arch" != "aarch64" ; then - neonsupport="n/a" - armcryptosupport="n/a" - fi +if test "$mpi_cpu_arch" != "arm" && test "$mpi_cpu_arch" != "aarch64"; then + neonsupport="n/a" + armcryptosupport="n/a" fi - +if test "$mpi_cpu_arch" != "ppc"; then + ppccryptosupport="n/a" +fi ############################################# #### #### #### Platform specific compiler checks. #### @@ -2119,6 +2127,10 @@ AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi +if test x"$ppccryptosupport" = xyes ; then + AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1, + [Enable support for POWER 8 (PowerISA 2.07) crypto extension.]) +fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) @@ -2226,6 +2238,20 @@ GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; + powerpc64le-*-*) + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8.lo" + ;; + powerpc64-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8be.lo" + ;; + powerpc-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc832.lo" + ;; esac case "$mpi_cpu_arch" in @@ -2699,6 +2725,7 @@ ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) + GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo" ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) @@ -2800,6 +2827,7 @@ GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) +GCRY_MSG_SHOW([Try using PPC crypto: ],[$ppccryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then Index: src/Makefile.am =================================================================== --- src/Makefile.am +++ src/Makefile.am @@ -66,7 +66,7 @@ hmac256.c hmac256.h context.c context.h \ ec-context.h -EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c +EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c hwf-ppc.c gcrypt_hwf_modules = @GCRYPT_HWF_MODULES@ Index: src/g10lib.h =================================================================== --- src/g10lib.h +++ src/g10lib.h @@ -236,7 +236,8 @@ #define HWF_ARM_SHA2 (1 << 20) #define HWF_ARM_PMULL (1 << 21) - +/* No problem re-using a slot from a differn't architecture */ +#define HWF_PPC_VCRYPTO (1 << 0) gpg_err_code_t _gcry_disable_hw_feature (const char *name); void _gcry_detect_hw_features (void); Index: src/hwf-common.h =================================================================== --- src/hwf-common.h +++ src/hwf-common.h @@ -22,6 +22,6 @@ unsigned int _gcry_hwf_detect_x86 (void); unsigned int _gcry_hwf_detect_arm (void); - +unsigned int _gcry_hwf_detect_ppc (void); #endif /*HWF_COMMON_H*/ Index: src/hwf-ppc.c =================================================================== --- /dev/null +++ src/hwf-ppc.c @@ -0,0 +1,230 @@ +/* hwf-ppc.c - Detect hardware features - PPC part + * Copyright (C) 2013,2019 Jussi Kivilinna + * Copyright (C) 2019 Shawn Landden + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \ + defined(HAVE_ELF_AUX_INFO)) +#include +#endif + +#include "g10lib.h" +#include "hwf-common.h" + +#if !defined (__powerpc__) && !defined (__powerpc64__) +# error Module build for wrong CPU. +#endif + + +#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \ + !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP) +#define HAVE_GETAUXVAL +static unsigned long getauxval(unsigned long type) +{ + unsigned long auxval = 0; + int err; + + /* FreeBSD provides 'elf_aux_info' function that does the same as + * 'getauxval' on Linux. */ + + err = elf_aux_info (type, &auxval, sizeof(auxval)); + if (err) + { + errno = err; + auxval = 0; + } + + return auxval; +} +#endif + + +#undef HAS_SYS_AT_HWCAP +#if defined(__linux__) || \ + (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)) +#define HAS_SYS_AT_HWCAP 1 + +struct feature_map_s { + unsigned int hwcap_flag; + unsigned int hwcap2_flag; + const char *feature_match; + unsigned int hwf_flag; +}; + +#if defined(__powerpc__) || defined(__powerpc64__) + +/* Note: These macros have same values on Linux and FreeBSD. */ +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +#ifndef PPC_FEATURE2_VEC_CRYPTO +# define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#endif + +static const struct feature_map_s ppc_features[] = + { +#ifdef ENABLE_PPC_CRYPTO_SUPPORT + { 0, PPC_FEATURE2_VEC_CRYPTO, " crypto", HWF_PPC_VCRYPTO }, +#endif + }; +#endif + +static int +get_hwcap(unsigned int *hwcap, unsigned int *hwcap2) +{ + struct { unsigned long a_type; unsigned long a_val; } auxv; + FILE *f; + int err = -1; + static int hwcap_initialized = 0; + static unsigned int stored_hwcap = 0; + static unsigned int stored_hwcap2 = 0; + + if (hwcap_initialized) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return 0; + } + +#if defined(__GLIBC__) && defined(__GNUC__) +#if __GNUC__ >= 6 + /* Returns 0 if glibc support doesn't exist, so we can + * only trust positive results. This function will need updating + * if we ever need more than one cpu feature. + */ + if (sizeof(ppc_features)/sizeof(ppc_features[0]) == 0) { + if (__builtin_cpu_supports("vcrypto")) { + stored_hwcap = 0; + stored_hwcap2 = PPC_FEATURE2_VEC_CRYPTO; + hwcap_initialized = 1; + return 0; + } + } +#endif +#endif + +#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL) + errno = 0; + auxv.a_val = getauxval (AT_HWCAP); + if (errno == 0) + { + stored_hwcap |= auxv.a_val; + hwcap_initialized = 1; + } + + if (AT_HWCAP2 >= 0) + { + errno = 0; + auxv.a_val = getauxval (AT_HWCAP2); + if (errno == 0) + { + stored_hwcap2 |= auxv.a_val; + hwcap_initialized = 1; + } + } + + if (hwcap_initialized && (stored_hwcap || stored_hwcap2)) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return 0; + } +#endif + + f = fopen("/proc/self/auxv", "r"); + if (!f) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return -1; + } + + while (fread(&auxv, sizeof(auxv), 1, f) > 0) + { + if (auxv.a_type == AT_HWCAP) + { + stored_hwcap |= auxv.a_val; + hwcap_initialized = 1; + } + + if (auxv.a_type == AT_HWCAP2) + { + stored_hwcap2 |= auxv.a_val; + hwcap_initialized = 1; + } + } + + if (hwcap_initialized) + err = 0; + + fclose(f); + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return err; +} + +static unsigned int +detect_ppc_at_hwcap(void) +{ + unsigned int hwcap; + unsigned int hwcap2; + unsigned int features = 0; + unsigned int i; + + if (get_hwcap(&hwcap, &hwcap2) < 0) + return features; + + for (i = 0; i < DIM(ppc_features); i++) + { + if (hwcap & ppc_features[i].hwcap_flag) + features |= ppc_features[i].hwf_flag; + + if (hwcap2 & ppc_features[i].hwcap2_flag) + features |= ppc_features[i].hwf_flag; + } + + return features; +} + +#endif + +unsigned int +_gcry_hwf_detect_ppc (void) +{ + unsigned int ret = 0; + unsigned int broken_hwfs = 0; + +#if defined (HAS_SYS_AT_HWCAP) + ret |= detect_ppc_at_hwcap (); +#endif + + ret &= ~broken_hwfs; + + return ret; +} Index: src/hwfeatures.c =================================================================== --- src/hwfeatures.c +++ src/hwfeatures.c @@ -42,6 +42,7 @@ const char *desc; } hwflist[] = { +#if defined(HAVE_CPU_ARCH_X86) { HWF_PADLOCK_RNG, "padlock-rng" }, { HWF_PADLOCK_AES, "padlock-aes" }, { HWF_PADLOCK_SHA, "padlock-sha" }, @@ -59,11 +60,15 @@ { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" }, { HWF_INTEL_RDTSC, "intel-rdtsc" }, { HWF_INTEL_SHAEXT, "intel-shaext" }, +#elif defined(HAVE_CPU_ARCH_ARM) { HWF_ARM_NEON, "arm-neon" }, { HWF_ARM_AES, "arm-aes" }, { HWF_ARM_SHA1, "arm-sha1" }, { HWF_ARM_SHA2, "arm-sha2" }, - { HWF_ARM_PMULL, "arm-pmull" } + { HWF_ARM_PMULL, "arm-pmull" }, +#elif defined(HAVE_CPU_ARCH_PPC) + { HWF_PPC_VCRYPTO, "ppc-crypto" }, +#endif }; /* A bit vector with the hardware features which shall not be used. @@ -208,12 +213,14 @@ { hw_features = _gcry_hwf_detect_x86 (); } -#endif /* HAVE_CPU_ARCH_X86 */ -#if defined (HAVE_CPU_ARCH_ARM) +#elif defined (HAVE_CPU_ARCH_ARM) { hw_features = _gcry_hwf_detect_arm (); } -#endif /* HAVE_CPU_ARCH_ARM */ - +#elif defined (HAVE_CPU_ARCH_PPC) + { + hw_features = _gcry_hwf_detect_ppc (); + } +#endif hw_features &= ~disabled_hw_features; }