Index: cipher/Makefile.am =================================================================== --- cipher/Makefile.am +++ cipher/Makefile.am @@ -96,6 +96,10 @@ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ + rijndael-ppc8.pl \ + rijndael-ppc8.S \ + rijndael-ppc8be.S \ + rijndael-ppc832.S \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ Index: cipher/ppc-xlate.pl =================================================================== --- cipher/ppc-xlate.pl +++ /dev/null @@ -1,348 +0,0 @@ -#! /usr/bin/env perl -# SPDX-License-Identifier: BSD-3-Clause - -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -my $flavour = shift; -my $output = shift; -open STDOUT,">$output" || die "can't open $output: $!"; - -my %GLOBALS; -my %TYPES; -my $dotinlocallabels=($flavour=~/linux/)?1:0; - -################################################################ -# directives which need special treatment on different platforms -################################################################ -my $type = sub { - my ($dir,$name,$type) = @_; - - $TYPES{$name} = $type; - if ($flavour =~ /linux/) { - $name =~ s|^\.||; - ".type $name,$type"; - } else { - ""; - } -}; -my $globl = sub { - my $junk = shift; - my $name = shift; - my $global = \$GLOBALS{$name}; - my $type = \$TYPES{$name}; - my $ret; - - $name =~ s|^\.||; - - SWITCH: for ($flavour) { - /aix/ && do { if (!$$type) { - $$type = "\@function"; - } - if ($$type =~ /function/) { - $name = ".$name"; - } - last; - }; - /osx/ && do { $name = "_$name"; - last; - }; - /linux.*(32|64le)/ - && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - last; - }; - /linux.*64/ && do { $ret .= ".globl $name"; - if (!$$type) { - $ret .= "\n.type $name,\@function"; - $$type = "\@function"; - } - if ($$type =~ /function/) { - $ret .= "\n.section \".opd\",\"aw\""; - $ret .= "\n.align 3"; - $ret .= "\n$name:"; - $ret .= "\n.quad .$name,.TOC.\@tocbase,0"; - $ret .= "\n.previous"; - $name = ".$name"; - } - last; - }; - } - - $ret = ".globl $name" if (!$ret); - $$global = $name; - $ret; -}; -my $text = sub { - my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); - $ret; -}; -my $machine = sub { - my $junk = shift; - my $arch = shift; - if ($flavour =~ /osx/) - { $arch =~ s/\"//g; - $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); - } - ".machine $arch"; -}; -my $size = sub { - if ($flavour =~ /linux/) - { shift; - my $name = shift; - my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name; - my $ret = ".size $$real,.-$$real"; - $name =~ s|^\.||; - if ($$real ne $name) { - $ret .= "\n.size $name,.-$$real"; - } - $ret; - } - else - { ""; } -}; -my $asciz = sub { - shift; - my $line = join(",",@_); - if ($line =~ /^"(.*)"$/) - { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } - else - { ""; } -}; -my $quad = sub { - shift; - my @ret; - my ($hi,$lo); - for (@_) { - if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) - { $hi=$1?"0x$1":"0"; $lo="0x$2"; } - elsif (/^([0-9]+)$/o) - { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl - else - { $hi=undef; $lo=$_; } - - if (defined($hi)) - { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } - else - { push(@ret,".quad $lo"); } - } - join("\n",@ret); -}; - -################################################################ -# simplified mnemonics not handled by at least one assembler -################################################################ -my $cmplw = sub { - my $f = shift; - my $cr = 0; $cr = shift if ($#_>1); - # Some out-of-date 32-bit GNU assembler just can't handle cmplw... - ($flavour =~ /linux.*32/) ? - " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : - " cmplw ".join(',',$cr,@_); -}; -my $bdnz = sub { - my $f = shift; - my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint - " bc $bo,0,".shift; -} if ($flavour!~/linux/); -my $bltlr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : - " bclr $bo,0"; -}; -my $bnelr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -my $beqlr = sub { - my $f = shift; - my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two -# arguments is 64, with "operand out of range" error. -my $extrdi = sub { - my ($f,$ra,$rs,$n,$b) = @_; - $b = ($b+$n)&63; $n = 64-$n; - " rldicl $ra,$rs,$b,$n"; -}; -my $vmr = sub { - my ($f,$vx,$vy) = @_; - " vor $vx,$vy,$vy"; -}; - -# Some ABIs specify vrsave, special-purpose register #256, as reserved -# for system use. -my $no_vrsave = ($flavour =~ /aix|linux64le/); -my $mtspr = sub { - my ($f,$idx,$ra) = @_; - if ($idx == 256 && $no_vrsave) { - " or $ra,$ra,$ra"; - } else { - " mtspr $idx,$ra"; - } -}; -my $mfspr = sub { - my ($f,$rd,$idx) = @_; - if ($idx == 256 && $no_vrsave) { - " li $rd,-1"; - } else { - " mfspr $rd,$idx"; - } -}; - -# PowerISA 2.06 stuff -sub vsxmem_op { - my ($f, $vrt, $ra, $rb, $op) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); -} -# made-up unaligned memory reference AltiVec/VMX instructions -my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x -my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x -my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx -my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx -my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x -my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x -my $lvx_splt = sub { vsxmem_op(@_, 332); }; # lxvdsx -# VSX instruction[s] masqueraded as made-up AltiVec/VMX -my $vpermdi = sub { # xxpermdi - my ($f, $vrt, $vra, $vrb, $dm) = @_; - $dm = oct($dm) if ($dm =~ /^0/); - " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($dm<<8)|(10<<3)|7; -}; - -# PowerISA 2.07 stuff -sub vcrypto_op { - my ($f, $vrt, $vra, $vrb, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; -} -sub vfour { - my ($f, $vrt, $vra, $vrb, $vrc, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($vrc<<6)|$op; -}; -my $vcipher = sub { vcrypto_op(@_, 1288); }; -my $vcipherlast = sub { vcrypto_op(@_, 1289); }; -my $vncipher = sub { vcrypto_op(@_, 1352); }; -my $vncipherlast= sub { vcrypto_op(@_, 1353); }; -my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; -my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; -my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; -my $vpmsumb = sub { vcrypto_op(@_, 1032); }; -my $vpmsumd = sub { vcrypto_op(@_, 1224); }; -my $vpmsubh = sub { vcrypto_op(@_, 1096); }; -my $vpmsumw = sub { vcrypto_op(@_, 1160); }; -# These are not really crypto, but vcrypto_op template works -my $vaddudm = sub { vcrypto_op(@_, 192); }; -my $vadduqm = sub { vcrypto_op(@_, 256); }; -my $vmuleuw = sub { vcrypto_op(@_, 648); }; -my $vmulouw = sub { vcrypto_op(@_, 136); }; -my $vrld = sub { vcrypto_op(@_, 196); }; -my $vsld = sub { vcrypto_op(@_, 1476); }; -my $vsrd = sub { vcrypto_op(@_, 1732); }; -my $vsubudm = sub { vcrypto_op(@_, 1216); }; -my $vaddcuq = sub { vcrypto_op(@_, 320); }; -my $vaddeuqm = sub { vfour(@_,60); }; -my $vaddecuq = sub { vfour(@_,61); }; -my $vmrgew = sub { vfour(@_,0,1932); }; -my $vmrgow = sub { vfour(@_,0,1676); }; - -my $mtsle = sub { - my ($f, $arg) = @_; - " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); -}; - -# VSX instructions masqueraded as AltiVec/VMX -my $mtvrd = sub { - my ($f, $vrt, $ra) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(179<<1)|1; -}; -my $mtvrwz = sub { - my ($f, $vrt, $ra) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(243<<1)|1; -}; -my $lvwzx_u = sub { vsxmem_op(@_, 12); }; # lxsiwzx -my $stvwx_u = sub { vsxmem_op(@_, 140); }; # stxsiwx - -# PowerISA 3.0 stuff -my $maddhdu = sub { vfour(@_,49); }; -my $maddld = sub { vfour(@_,51); }; -my $darn = sub { - my ($f, $rt, $l) = @_; - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); -}; -my $iseleq = sub { - my ($f, $rt, $ra, $rb) = @_; - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|(2<<6)|30; -}; -# VSX instruction[s] masqueraded as made-up AltiVec/VMX -my $vspltib = sub { # xxspltib - my ($f, $vrt, $imm8) = @_; - $imm8 = oct($imm8) if ($imm8 =~ /^0/); - $imm8 &= 0xff; - " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($imm8<<11)|(360<<1)|1; -}; - -# PowerISA 3.0B stuff -my $addex = sub { - my ($f, $rt, $ra, $rb, $cy) = @_; # only cy==0 is specified in 3.0B - " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($cy<<9)|(170<<1); -}; -my $vmsumudm = sub { vfour(@_,35); }; - -while($line=<>) { - - $line =~ s|[#!;].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning... - $line =~ s|\s+$||; # ... and at the end - - { - $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel - $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); - } - - { - $line =~ s|(^[\.\w]+)\:\s*||; - my $label = $1; - if ($label) { - my $xlated = ($GLOBALS{$label} or $label); - print "$xlated:"; - if ($flavour =~ /linux.*64le/) { - if ($TYPES{$label} =~ /function/) { - printf "\n.localentry %s,0\n",$xlated; - } - } - } - } - - { - $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; - my $c = $1; $c = "\t" if ($c eq ""); - my $mnemonic = $2; - my $f = $3; - my $opcode = eval("\$$mnemonic"); - $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); - if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(/,\s*/,$line)); } - elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } - } - - print $line if ($line); - print "\n"; -} - -close STDOUT; Index: cipher/rijndael-internal.h =================================================================== --- cipher/rijndael-internal.h +++ cipher/rijndael-internal.h @@ -104,6 +104,13 @@ # endif #endif /* ENABLE_ARM_CRYPTO_SUPPORT */ +#undef USE_PPC_ASM +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(__powerpc64__) || defined(__powerpc__) +# define USE_PPC_ASM 1 +# endif +#endif + struct RIJNDAEL_context_s; typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx, @@ -154,6 +161,9 @@ #ifdef USE_ARM_CE unsigned int use_arm_ce:1; /* ARMv8 CE shall be used. */ #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_ASM + unsigned int use_ppc_asm:1; /* PowerISA 2.07 crypto shall be used. */ +#endif rijndael_cryptfn_t encrypt_fn; rijndael_cryptfn_t decrypt_fn; rijndael_prefetchfn_t prefetch_enc_fn; Index: cipher/rijndael-ppc8.S =================================================================== --- cipher/rijndael-ppc8.S +++ cipher/rijndael-ppc8.S @@ -277,7 +277,6 @@ stvx 2,0,3 li 6,0 or 12,12,12 - stw 8,0(5) .Lenc_key_abort: mr 3,6 @@ -343,7 +342,7 @@ aes_p8_encrypt: .localentry aes_p8_encrypt,0 - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 li 12,-1 li 7,15 @@ -503,7 +502,7 @@ neg 11,3 lvsr 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1242,7 +1241,7 @@ neg 11,3 lvsr 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1883,7 +1882,7 @@ beq .Lxts_enc_no_key2 lvsr 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1927,7 +1926,7 @@ addi 10,10,16 lvsr 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael-ppc8.pl =================================================================== --- cipher/rijndael-ppc8.pl +++ cipher/rijndael-ppc8.pl @@ -1,6 +1,9 @@ #! /usr/bin/env perl # SPDX-License-Identifier: BSD-3-Clause # +# Changes: adjust struct offsets to work with libgcrypt ctx +# rename ppc-xlate.pl +# # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and @@ -60,6 +63,7 @@ $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}asm-common-ppc.pl" and -f $xlate ) or ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or die "can't locate ppc-xlate.pl"; @@ -353,7 +357,6 @@ stvx $in1,0,$inp li $ptr,0 mtspr 256,$vrsave - stw $rounds,0($out) Lenc_key_abort: mr r3,$ptr @@ -417,13 +420,14 @@ sub gen_block () { my $dir = shift; my $n = $dir eq "de" ? "n" : ""; +my $rounds_off = $dir eq "de" ? "240" : "480"; my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); $code.=<<___; .globl .${prefix}_${dir}crypt .align 5 .${prefix}_${dir}crypt: - lwz $rounds,240($key) + lwz $rounds,$rounds_off($key) lis r0,0xfc00 mfspr $vrsave,256 li $idx,15 # 15 is not typo @@ -522,7 +526,7 @@ neg r11,$inp ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) + lwz $rounds,480($key) lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inptail,0,$inp @@ -1283,7 +1287,7 @@ neg r11,$inp ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) + lwz $rounds,480($key) lvsr $inpperm,0,r11 # prepare for unaligned load lvx $inptail,0,$inp @@ -1958,7 +1962,7 @@ beq Lxts_enc_no_key2 ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) + lwz $rounds,480($key2) srwi $rounds,$rounds,1 subi $rounds,$rounds,1 li $idx,16 @@ -2002,7 +2006,7 @@ addi $inp,$inp,16 ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) + lwz $rounds,480($key1) srwi $rounds,$rounds,1 subi $rounds,$rounds,1 li $idx,16 Index: cipher/rijndael-ppc832.S =================================================================== --- cipher/rijndael-ppc832.S +++ cipher/rijndael-ppc832.S @@ -274,7 +274,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) .Lenc_key_abort: mr 3,6 @@ -336,7 +335,7 @@ .type aes_p8_encrypt,@function .align 5 aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -492,7 +491,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1229,7 +1228,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1868,7 +1867,7 @@ beq .Lxts_enc_no_key2 lvsl 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1912,7 +1911,7 @@ addi 10,10,16 lvsl 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael-ppc8be.S =================================================================== --- cipher/rijndael-ppc8be.S +++ cipher/rijndael-ppc8be.S @@ -279,7 +279,6 @@ stvx 2,0,3 li 6,0 mtspr 256,12 - stw 8,0(5) .Lenc_key_abort: mr 3,6 @@ -353,7 +352,7 @@ .previous .align 5 .aes_p8_encrypt: - lwz 6,240(5) + lwz 6,480(5) lis 0,0xfc00 mfspr 12,256 li 7,15 @@ -521,7 +520,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1264,7 +1263,7 @@ neg 11,3 lvsl 10,0,6 - lwz 9,240(6) + lwz 9,480(6) lvsr 6,0,11 lvx 5,0,3 @@ -1909,7 +1908,7 @@ beq .Lxts_enc_no_key2 lvsl 7,0,7 - lwz 9,240(7) + lwz 9,480(7) srwi 9,9,1 subi 9,9,1 li 3,16 @@ -1953,7 +1952,7 @@ addi 10,10,16 lvsl 7,0,6 - lwz 9,240(6) + lwz 9,480(6) srwi 9,9,1 subi 9,9,1 li 3,16 Index: cipher/rijndael.c =================================================================== --- cipher/rijndael.c +++ cipher/rijndael.c @@ -199,6 +199,37 @@ size_t nblocks, int encrypt); #endif /*USE_ARM_ASM*/ +#ifdef USE_PPC_ASM +/* POWER 8 AES extensions */ +extern void aes_p8_encrypt (const unsigned char *in, + unsigned char *out, + const RIJNDAEL_context *ctx); +static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + /* When I tried to switch these registers in the assembly it broke. */ + aes_p8_encrypt (in, out, ctx); + return 0; /* does not use stack */ +} + /* this is the decryption key part of context */ +extern void aes_p8_decrypt (const unsigned char *in, + unsigned char *out, + const void *sboxes); +static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + aes_p8_decrypt (in, out, &ctx->u2); + return 0; /* does not use stack */ +} +extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits, + RIJNDAEL_context *key); +extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits, + /* this is the decryption key part of context */ + const unsigned (*)[15][4]); +#endif /*USE_PPC_ASM*/ + static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, @@ -280,7 +311,7 @@ int i,j, r, t, rconpointer = 0; int KC; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_ASM) unsigned int hwfeatures; #endif @@ -324,7 +355,7 @@ ctx->rounds = rounds; #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \ - || defined(USE_ARM_CE) + || defined(USE_ARM_CE) || defined(USE_PPC_ASM) hwfeatures = _gcry_get_hw_features (); #endif @@ -341,6 +372,9 @@ #ifdef USE_ARM_CE ctx->use_arm_ce = 0; #endif +#ifdef USE_PPC_ASM + ctx->use_ppc_asm = 0; +#endif if (0) { @@ -420,6 +454,16 @@ hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt; } } +#endif +#ifdef USE_PPC_ASM + else if (hwfeatures & HWF_PPC_VCRYPTO) + { + ctx->encrypt_fn = _gcry_aes_ppc8_encrypt; + ctx->decrypt_fn = _gcry_aes_ppc8_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ppc_asm = 1; + } #endif else { @@ -446,6 +490,14 @@ #ifdef USE_ARM_CE else if (ctx->use_arm_ce) _gcry_aes_armv8_ce_setkey (ctx, key); +#endif +#ifdef USE_PPC_ASM + else if (ctx->use_ppc_asm) { + /* These are both done here to avoid having to store the key. + * These S-boxes are generated on-the-fly. */ + aes_p8_set_encrypt_key (key, keylen * 8, ctx); + aes_p8_set_decrypt_key (key, keylen * 8, &ctx->keyschdec32); + } #endif else { @@ -591,6 +643,13 @@ /* Padlock does not need decryption subkeys. */ } #endif /*USE_PADLOCK*/ +#ifdef USE_PPC_ASM + else if (ctx->use_ppc_asm) + { + /* done during encryption key setup, as then we have the actual + * key available */ + } +#endif /*USE_PPC_ASM*/ else { const byte *sbox = ((const byte *)encT) + 1; @@ -866,7 +925,6 @@ _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); } - /* Bulk encryption of complete blocks in CBC mode. Caller needs to make sure that IV is aligned on an unsigned long boundary. This function is only intended for the bulk encryption feature of @@ -1150,7 +1208,7 @@ dec_tables.T); #else return do_decrypt_fn (ctx, bx, ax); -#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/ +#endif } @@ -1588,14 +1646,21 @@ { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f - /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ - /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ }; static const unsigned char ciphertext_128[16] = { 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a }; + + static const unsigned char key_test_expansion_128[16] = + { + 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, + 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c + }; + + RIJNDAEL_context exp_ctx; + rijndael_setkey (&exp_ctx, key_test_expansion_128, sizeof (key_128), NULL); #endif /* Because gcc/ld can only align the CTX struct on 8 bytes on the @@ -1611,7 +1676,7 @@ xfree (ctxmem); return "AES-128 test encryption failed."; } - rijndael_decrypt (ctx, scratch, scratch); + rijndael_decrypt (ctx, scratch, ciphertext_128); xfree (ctxmem); if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) return "AES-128 test decryption failed."; Index: configure.ac =================================================================== --- configure.ac +++ configure.ac @@ -681,6 +681,14 @@ armcryptosupport=$enableval,armcryptosupport=yes) AC_MSG_RESULT($armcryptosupport) +# Implementation of the --disable-ppc-crypto-support switch. +AC_MSG_CHECKING([whether PPC crypto support is requested]) +AC_ARG_ENABLE(ppc-crypto-support, + AC_HELP_STRING([--disable-ppc-crypto-support], + [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]), + ppccryptosupport=$enableval,ppccryptosupport=yes) +AC_MSG_RESULT($ppccryptosupport) + # Implementation of the --disable-O-flag-munging switch. AC_MSG_CHECKING([whether a -O flag munging is requested]) AC_ARG_ENABLE([O-flag-munging], @@ -1272,14 +1280,14 @@ drngsupport="n/a" fi -if test "$mpi_cpu_arch" != "arm" ; then - if test "$mpi_cpu_arch" != "aarch64" ; then - neonsupport="n/a" - armcryptosupport="n/a" - fi +if test "$mpi_cpu_arch" != "arm" && test "$mpi_cpu_arch" != "aarch64"; then + neonsupport="n/a" + armcryptosupport="n/a" fi - +if test "$mpi_cpu_arch" != "ppc"; then + ppccryptosupport="n/a" +fi ############################################# #### #### #### Platform specific compiler checks. #### @@ -2119,6 +2127,10 @@ AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1, [Enable support for ARMv8 Crypto Extension instructions.]) fi +if test x"$ppccryptosupport" = xyes ; then + AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1, + [Enable support for POWER 8 (PowerISA 2.07) crypto extension.]) +fi if test x"$jentsupport" = xyes ; then AC_DEFINE(ENABLE_JENT_SUPPORT, 1, [Enable support for the jitter entropy collector.]) @@ -2226,6 +2238,20 @@ GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo" GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo" ;; + powerpc64le-*-*) + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8.lo" + ;; + powerpc64-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8be.lo" + ;; + powerpc-*-*) + # Big-Endian. + # Build with the crypto extension implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc832.lo" + ;; esac case "$mpi_cpu_arch" in @@ -2699,6 +2725,7 @@ ;; ppc) AC_DEFINE(HAVE_CPU_ARCH_PPC, 1, [Defined for PPC platforms]) + GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo" ;; arm) AC_DEFINE(HAVE_CPU_ARCH_ARM, 1, [Defined for ARM platforms]) @@ -2800,6 +2827,7 @@ GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) GCRY_MSG_SHOW([Try using ARM NEON: ],[$neonsupport]) GCRY_MSG_SHOW([Try using ARMv8 crypto: ],[$armcryptosupport]) +GCRY_MSG_SHOW([Try using PPC crypto: ],[$ppccryptosupport]) GCRY_MSG_SHOW([],[]) if test "x${gpg_config_script_warn}" != x; then Index: src/Makefile.am =================================================================== --- src/Makefile.am +++ src/Makefile.am @@ -66,7 +66,7 @@ hmac256.c hmac256.h context.c context.h \ ec-context.h -EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c +EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c hwf-ppc.c gcrypt_hwf_modules = @GCRYPT_HWF_MODULES@ Index: src/g10lib.h =================================================================== --- src/g10lib.h +++ src/g10lib.h @@ -236,7 +236,8 @@ #define HWF_ARM_SHA2 (1 << 20) #define HWF_ARM_PMULL (1 << 21) - +/* No problem re-using a slot from a differn't architecture */ +#define HWF_PPC_VCRYPTO (1 << 0) gpg_err_code_t _gcry_disable_hw_feature (const char *name); void _gcry_detect_hw_features (void); Index: src/hwf-common.h =================================================================== --- src/hwf-common.h +++ src/hwf-common.h @@ -22,6 +22,6 @@ unsigned int _gcry_hwf_detect_x86 (void); unsigned int _gcry_hwf_detect_arm (void); - +unsigned int _gcry_hwf_detect_ppc (void); #endif /*HWF_COMMON_H*/ Index: src/hwf-ppc.c =================================================================== --- /dev/null +++ src/hwf-ppc.c @@ -0,0 +1,230 @@ +/* hwf-ppc.c - Detect hardware features - PPC part + * Copyright (C) 2013,2019 Jussi Kivilinna + * Copyright (C) 2019 Shawn Landden + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \ + defined(HAVE_ELF_AUX_INFO)) +#include +#endif + +#include "g10lib.h" +#include "hwf-common.h" + +#if !defined (__powerpc__) && !defined (__powerpc64__) +# error Module build for wrong CPU. +#endif + + +#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \ + !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP) +#define HAVE_GETAUXVAL +static unsigned long getauxval(unsigned long type) +{ + unsigned long auxval = 0; + int err; + + /* FreeBSD provides 'elf_aux_info' function that does the same as + * 'getauxval' on Linux. */ + + err = elf_aux_info (type, &auxval, sizeof(auxval)); + if (err) + { + errno = err; + auxval = 0; + } + + return auxval; +} +#endif + + +#undef HAS_SYS_AT_HWCAP +#if defined(__linux__) || \ + (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)) +#define HAS_SYS_AT_HWCAP 1 + +struct feature_map_s { + unsigned int hwcap_flag; + unsigned int hwcap2_flag; + const char *feature_match; + unsigned int hwf_flag; +}; + +#if defined(__powerpc__) || defined(__powerpc64__) + +/* Note: These macros have same values on Linux and FreeBSD. */ +#ifndef AT_HWCAP +# define AT_HWCAP 16 +#endif +#ifndef AT_HWCAP2 +# define AT_HWCAP2 26 +#endif + +#ifndef PPC_FEATURE2_VEC_CRYPTO +# define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#endif + +static const struct feature_map_s ppc_features[] = + { +#ifdef ENABLE_PPC_CRYPTO_SUPPORT + { 0, PPC_FEATURE2_VEC_CRYPTO, " crypto", HWF_PPC_VCRYPTO }, +#endif + }; +#endif + +static int +get_hwcap(unsigned int *hwcap, unsigned int *hwcap2) +{ + struct { unsigned long a_type; unsigned long a_val; } auxv; + FILE *f; + int err = -1; + static int hwcap_initialized = 0; + static unsigned int stored_hwcap = 0; + static unsigned int stored_hwcap2 = 0; + + if (hwcap_initialized) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return 0; + } + +#if defined(__GLIBC__) && defined(__GNUC__) +#if __GNUC__ >= 6 + /* Returns 0 if glibc support doesn't exist, so we can + * only trust positive results. This function will need updating + * if we ever need more than one cpu feature. + */ + if (sizeof(ppc_features)/sizeof(ppc_features[0]) == 0) { + if (__builtin_cpu_supports("vcrypto")) { + stored_hwcap = 0; + stored_hwcap2 = PPC_FEATURE2_VEC_CRYPTO; + hwcap_initialized = 1; + return 0; + } + } +#endif +#endif + +#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL) + errno = 0; + auxv.a_val = getauxval (AT_HWCAP); + if (errno == 0) + { + stored_hwcap |= auxv.a_val; + hwcap_initialized = 1; + } + + if (AT_HWCAP2 >= 0) + { + errno = 0; + auxv.a_val = getauxval (AT_HWCAP2); + if (errno == 0) + { + stored_hwcap2 |= auxv.a_val; + hwcap_initialized = 1; + } + } + + if (hwcap_initialized && (stored_hwcap || stored_hwcap2)) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return 0; + } +#endif + + f = fopen("/proc/self/auxv", "r"); + if (!f) + { + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return -1; + } + + while (fread(&auxv, sizeof(auxv), 1, f) > 0) + { + if (auxv.a_type == AT_HWCAP) + { + stored_hwcap |= auxv.a_val; + hwcap_initialized = 1; + } + + if (auxv.a_type == AT_HWCAP2) + { + stored_hwcap2 |= auxv.a_val; + hwcap_initialized = 1; + } + } + + if (hwcap_initialized) + err = 0; + + fclose(f); + *hwcap = stored_hwcap; + *hwcap2 = stored_hwcap2; + return err; +} + +static unsigned int +detect_ppc_at_hwcap(void) +{ + unsigned int hwcap; + unsigned int hwcap2; + unsigned int features = 0; + unsigned int i; + + if (get_hwcap(&hwcap, &hwcap2) < 0) + return features; + + for (i = 0; i < DIM(ppc_features); i++) + { + if (hwcap & ppc_features[i].hwcap_flag) + features |= ppc_features[i].hwf_flag; + + if (hwcap2 & ppc_features[i].hwcap2_flag) + features |= ppc_features[i].hwf_flag; + } + + return features; +} + +#endif + +unsigned int +_gcry_hwf_detect_ppc (void) +{ + unsigned int ret = 0; + unsigned int broken_hwfs = 0; + +#if defined (HAS_SYS_AT_HWCAP) + ret |= detect_ppc_at_hwcap (); +#endif + + ret &= ~broken_hwfs; + + return ret; +} Index: src/hwfeatures.c =================================================================== --- src/hwfeatures.c +++ src/hwfeatures.c @@ -42,6 +42,7 @@ const char *desc; } hwflist[] = { +#if defined(HAVE_CPU_ARCH_X86) { HWF_PADLOCK_RNG, "padlock-rng" }, { HWF_PADLOCK_AES, "padlock-aes" }, { HWF_PADLOCK_SHA, "padlock-sha" }, @@ -59,11 +60,15 @@ { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" }, { HWF_INTEL_RDTSC, "intel-rdtsc" }, { HWF_INTEL_SHAEXT, "intel-shaext" }, +#elif defined(HAVE_CPU_ARCH_ARM) { HWF_ARM_NEON, "arm-neon" }, { HWF_ARM_AES, "arm-aes" }, { HWF_ARM_SHA1, "arm-sha1" }, { HWF_ARM_SHA2, "arm-sha2" }, - { HWF_ARM_PMULL, "arm-pmull" } + { HWF_ARM_PMULL, "arm-pmull" }, +#elif defined(HAVE_CPU_ARCH_PPC) + { HWF_PPC_VCRYPTO, "ppc-crypto" }, +#endif }; /* A bit vector with the hardware features which shall not be used. @@ -208,12 +213,14 @@ { hw_features = _gcry_hwf_detect_x86 (); } -#endif /* HAVE_CPU_ARCH_X86 */ -#if defined (HAVE_CPU_ARCH_ARM) +#elif defined (HAVE_CPU_ARCH_ARM) { hw_features = _gcry_hwf_detect_arm (); } -#endif /* HAVE_CPU_ARCH_ARM */ - +#elif defined (HAVE_CPU_ARCH_PPC) + { + hw_features = _gcry_hwf_detect_ppc (); + } +#endif hw_features &= ~disabled_hw_features; }