Index: AUTHORS =================================================================== --- AUTHORS +++ AUTHORS @@ -38,7 +38,7 @@ Copyright (C) 2013-2014 Dmitry Eremin-Solenikov Copyright (C) 2014 Stephan Mueller Copyright (C) 2018 Bundesamt für Sicherheit in der Informationstechnik - + Copyright (C) 2006,2016 Andy Polyakov Authors with a FSF copyright assignment ======================================= Index: cipher/aesp8-ppc.pl =================================================================== --- /dev/null +++ cipher/aesp8-ppc.pl @@ -0,0 +1,3801 @@ +#! /usr/bin/env perl +# SPDX-License-Identifier: BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for AES instructions as per PowerISA +# specification version 2.07, first implemented by POWER8 processor. +# The module is endian-agnostic in sense that it supports both big- +# and little-endian cases. Data alignment in parallelizable modes is +# handled with VSX loads and stores, which implies MSR.VSX flag being +# set. It should also be noted that ISA specification doesn't prohibit +# alignment exceptions for these instructions on page boundaries. +# Initially alignment was handled in pure AltiVec/VMX way [when data +# is aligned programmatically, which in turn guarantees exception- +# free execution], but it turned to hamper performance when vcipher +# instructions are interleaved. It's reckoned that eventual +# misalignment penalties at page boundaries are in average lower +# than additional overhead in pure AltiVec approach. +# +# May 2016 +# +# Add XTS subroutine, 9x on little- and 12x improvement on big-endian +# systems were measured. +# +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS +# POWER8[le] 3.96/0.72 0.74 1.1 +# POWER8[be] 3.75/0.65 0.66 1.0 +# POWER9[le] 4.02/0.86 0.84 1.05 +# POWER9[be] 3.99/0.78 0.79 0.97 + +$flavour = shift; + +if ($flavour =~ /64/) { + $SIZE_T =8; + $LRSAVE =2*$SIZE_T; + $STU ="stdu"; + $POP ="ld"; + $PUSH ="std"; + $UCMP ="cmpld"; + $SHL ="sldi"; +} elsif ($flavour =~ /32/) { + $SIZE_T =4; + $LRSAVE =$SIZE_T; + $STU ="stwu"; + $POP ="lwz"; + $PUSH ="stw"; + $UCMP ="cmplw"; + $SHL ="slwi"; +} else { die "nonsense $flavour"; } + +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; + +$FRAME=8*$SIZE_T; +$prefix="aes_p8"; + +$sp="r1"; +$vrsave="r12"; + +######################################################################### +{{{ # Key setup procedures # +my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); +my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); +my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); + +$code.=<<___; +.machine "any" + +.text + +.align 7 +rcon: +.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev +.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev +.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev +.long 0,0,0,0 ?asis +Lconsts: + mflr r0 + bcl 20,31,\$+4 + mflr $ptr #vvvvv "distance between . and rcon + addi $ptr,$ptr,-0x48 + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.asciz "AES for PowerISA 2.07, CRYPTOGAMS by " + +.globl .${prefix}_set_encrypt_key +.align 5 +.${prefix}_set_encrypt_key: +Lset_encrypt_key: + mflr r11 + $PUSH r11,$LRSAVE($sp) + + li $ptr,-1 + ${UCMP}i $inp,0 + beq- Lenc_key_abort # if ($inp==0) return -1; + ${UCMP}i $out,0 + beq- Lenc_key_abort # if ($out==0) return -1; + li $ptr,-2 + cmpwi $bits,128 + blt- Lenc_key_abort + cmpwi $bits,256 + bgt- Lenc_key_abort + andi. r0,$bits,0x3f + bne- Lenc_key_abort + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + bl Lconsts + mtlr r11 + + neg r9,$inp + lvx $in0,0,$inp + addi $inp,$inp,15 # 15 is not typo + lvsr $key,0,r9 # borrow $key + li r8,0x20 + cmpwi $bits,192 + lvx $in1,0,$inp + le?vspltisb $mask,0x0f # borrow $mask + lvx $rcon,0,$ptr + le?vxor $key,$key,$mask # adjust for byte swap + lvx $mask,r8,$ptr + addi $ptr,$ptr,0x10 + vperm $in0,$in0,$in1,$key # align [and byte swap in LE] + li $cnt,8 + vxor $zero,$zero,$zero + mtctr $cnt + + ?lvsr $outperm,0,$out + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$zero,$outmask,$outperm + + blt Loop128 + addi $inp,$inp,8 + beq L192 + addi $inp,$inp,8 + b L256 + +.align 4 +Loop128: + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + bdnz Loop128 + + lvx $rcon,0,$ptr # last two round keys + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + + addi $inp,$out,15 # 15 is not typo + addi $out,$out,0x50 + + li $rounds,10 + b Ldone + +.align 4 +L192: + lvx $tmp,0,$inp + li $cnt,4 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + vspltisb $key,8 # borrow $key + mtctr $cnt + vsububm $mask,$mask,$key # adjust the mask + +Loop192: + vperm $key,$in1,$in1,$mask # roate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vcipherlast $key,$key,$rcon + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + + vsldoi $stage,$zero,$in1,8 + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vsldoi $stage,$stage,$in0,8 + + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vsldoi $stage,$in0,$in1,8 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + stvx $stage,0,$out + addi $out,$out,16 + + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdnz Loop192 + + li $rounds,12 + addi $out,$out,0x20 + b Ldone + +.align 4 +L256: + lvx $tmp,0,$inp + li $cnt,7 + li $rounds,14 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + mtctr $cnt + +Loop256: + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in1,$in1,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdz Ldone + + vspltw $key,$in0,3 # just splat + vsldoi $tmp,$zero,$in1,12 # >>32 + vsbox $key,$key + + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + + vxor $in1,$in1,$key + b Loop256 + +.align 4 +Ldone: + lvx $in1,0,$inp # redundant in aligned case + vsel $in1,$outhead,$in1,$outmask + stvx $in1,0,$inp + li $ptr,0 + mtspr 256,$vrsave + stw $rounds,0($out) + +Lenc_key_abort: + mr r3,$ptr + blr + .long 0 + .byte 0,12,0x14,1,0,0,3,0 + .long 0 +.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key + +.globl .${prefix}_set_decrypt_key +.align 5 +.${prefix}_set_decrypt_key: + $STU $sp,-$FRAME($sp) + mflr r10 + $PUSH r10,$FRAME+$LRSAVE($sp) + bl Lset_encrypt_key + mtlr r10 + + cmpwi r3,0 + bne- Ldec_key_abort + + slwi $cnt,$rounds,4 + subi $inp,$out,240 # first round key + srwi $rounds,$rounds,1 + add $out,$inp,$cnt # last round key + mtctr $rounds + +Ldeckey: + lwz r0, 0($inp) + lwz r6, 4($inp) + lwz r7, 8($inp) + lwz r8, 12($inp) + addi $inp,$inp,16 + lwz r9, 0($out) + lwz r10,4($out) + lwz r11,8($out) + lwz r12,12($out) + stw r0, 0($out) + stw r6, 4($out) + stw r7, 8($out) + stw r8, 12($out) + subi $out,$out,16 + stw r9, -16($inp) + stw r10,-12($inp) + stw r11,-8($inp) + stw r12,-4($inp) + bdnz Ldeckey + + xor r3,r3,r3 # return value +Ldec_key_abort: + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,0,3,0 + .long 0 +.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key +___ +}}} +######################################################################### +{{{ # Single block en- and decrypt procedures # +sub gen_block () { +my $dir = shift; +my $n = $dir eq "de" ? "n" : ""; +my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); + +$code.=<<___; +.globl .${prefix}_${dir}crypt +.align 5 +.${prefix}_${dir}crypt: + lwz $rounds,240($key) + lis r0,0xfc00 + mfspr $vrsave,256 + li $idx,15 # 15 is not typo + mtspr 256,r0 + + lvx v0,0,$inp + neg r11,$out + lvx v1,$idx,$inp + lvsl v2,0,$inp # inpperm + le?vspltisb v4,0x0f + ?lvsl v3,0,r11 # outperm + le?vxor v2,v2,v4 + li $idx,16 + vperm v0,v0,v1,v2 # align [and byte swap in LE] + lvx v1,0,$key + ?lvsl v5,0,$key # keyperm + srwi $rounds,$rounds,1 + lvx v2,$idx,$key + addi $idx,$idx,16 + subi $rounds,$rounds,1 + ?vperm v1,v1,v2,v5 # align round key + + vxor v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + mtctr $rounds + +Loop_${dir}c: + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + addi $idx,$idx,16 + ?vperm v1,v1,v2,v5 + v${n}cipher v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + bdnz Loop_${dir}c + + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + ?vperm v1,v1,v2,v5 + v${n}cipherlast v0,v0,v1 + + vspltisb v2,-1 + vxor v1,v1,v1 + li $idx,15 # 15 is not typo + ?vperm v2,v1,v2,v3 # outmask + le?vxor v3,v3,v4 + lvx v1,0,$out # outhead + vperm v0,v0,v0,v3 # rotate [and byte swap in LE] + vsel v1,v1,v0,v2 + lvx v4,$idx,$out + stvx v1,0,$out + vsel v0,v0,v4,v2 + stvx v0,$idx,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,3,0 + .long 0 +.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +######################################################################### +{{{ # CBC en- and decrypt procedures # +my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= + map("v$_",(4..10)); +$code.=<<___; +.globl .${prefix}_cbc_encrypt +.align 5 +.${prefix}_cbc_encrypt: + ${UCMP}i $len,16 + bltlr- + + cmpwi $enc,0 # test direction + lis r0,0xffe0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + beq Lcbc_dec + +Lcbc_enc: + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $inout,$inout,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + vxor $inout,$inout,$ivec + +Loop_cbc_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $ivec,$inout,$rndkey0 + ${UCMP}i $len,16 + + vperm $tmp,$ivec,$ivec,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + ${UCMP}i $len,128 + bge _aesp8_cbc_decrypt8x + vmr $tmp,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $tmp,$tmp,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$tmp,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + +Loop_cbc_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipherlast $inout,$inout,$rndkey0 + ${UCMP}i $len,16 + + vxor $inout,$inout,$ivec + vmr $ivec,$tmp + vperm $tmp,$inout,$inout,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_dec + +Lcbc_done: + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + neg $enc,$ivp # write [unaligned] iv + li $idx,15 # 15 is not typo + vxor $rndkey0,$rndkey0,$rndkey0 + vspltisb $outmask,-1 + le?vspltisb $tmp,0x0f + ?lvsl $outperm,0,$enc + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + lvx $outhead,0,$ivp + vperm $ivec,$ivec,$ivec,$outperm + vsel $inout,$outhead,$ivec,$outmask + lvx $inptail,$idx,$ivp + stvx $inout,0,$ivp + vsel $inout,$ivec,$inptail,$outmask + stvx $inout,$idx,$ivp + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CBC decrypt procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment + +$code.=<<___; +.align 5 +_aesp8_cbc_decrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + subi $len,$len,128 # bias + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_cbc_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_cbc_dec_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + #lvx $inptail,0,$inp # "caller" already did this + #addi $inp,$inp,15 # 15 is not typo + subi $inp,$inp,15 # undo "caller" + + le?li $idx,8 + lvx_u $in0,$x00,$inp # load first 8 "words" + le?lvsl $inpperm,0,$idx + le?vspltisb $tmp,0x0f + lvx_u $in1,$x10,$inp + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + lvx_u $in2,$x20,$inp + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in3,$x30,$inp + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in4,$x40,$inp + le?vperm $in2,$in2,$in2,$inpperm + vxor $out0,$in0,$rndkey0 + lvx_u $in5,$x50,$inp + le?vperm $in3,$in3,$in3,$inpperm + vxor $out1,$in1,$rndkey0 + lvx_u $in6,$x60,$inp + le?vperm $in4,$in4,$in4,$inpperm + vxor $out2,$in2,$rndkey0 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + le?vperm $in5,$in5,$in5,$inpperm + vxor $out3,$in3,$rndkey0 + le?vperm $in6,$in6,$in6,$inpperm + vxor $out4,$in4,$rndkey0 + le?vperm $in7,$in7,$in7,$inpperm + vxor $out5,$in5,$rndkey0 + vxor $out6,$in6,$rndkey0 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x + + subic $len,$len,128 # $len-=128 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + and r0,r0,$len + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vncipher $out0,$out0,v30 + vxor $ivec,$ivec,v31 # xor with last round key + vncipher $out1,$out1,v30 + vxor $in0,$in0,v31 + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + vncipherlast $out0,$out0,$ivec + vncipherlast $out1,$out1,$in0 + lvx_u $in0,$x00,$inp # load next input block + vncipherlast $out2,$out2,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out3,$out3,$in2 + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in2,$x20,$inp + vncipherlast $out4,$out4,$in3 + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in3,$x30,$inp + vncipherlast $out5,$out5,$in4 + le?vperm $in2,$in2,$in2,$inpperm + lvx_u $in4,$x40,$inp + vncipherlast $out6,$out6,$in5 + le?vperm $in3,$in3,$in3,$inpperm + lvx_u $in5,$x50,$inp + vncipherlast $out7,$out7,$in6 + le?vperm $in4,$in4,$in4,$inpperm + lvx_u $in6,$x60,$inp + vmr $ivec,$in7 + le?vperm $in5,$in5,$in5,$inpperm + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $in6,$in6,$in6,$inpperm + vxor $out0,$in0,$rndkey0 + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $in7,$in7,$in7,$inpperm + vxor $out1,$in1,$rndkey0 + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$rndkey0 + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$rndkey0 + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$rndkey0 + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + vxor $out5,$in5,$rndkey0 + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + vxor $out6,$in6,$rndkey0 + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + beq Loop_cbc_dec8x # did $len-=128 borrow? + + addic. $len,$len,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: # up to 7 "words" tail... + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x_tail + + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + + vncipher $out1,$out1,v30 + vxor $ivec,$ivec,v31 # last round key + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + cmplwi $len,32 # switch($len) + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi $len,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi $len,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + vncipherlast $out1,$out1,$ivec + vncipherlast $out2,$out2,$in1 + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out1,$out1,$out1,$inpperm + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x00,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x10,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x20,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x30,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x40,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x50,$out + stvx_u $out7,$x60,$out + addi $out,$out,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + vncipherlast $out2,$out2,$ivec + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out2,$out2,$out2,$inpperm + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x00,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x10,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x20,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x30,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x40,$out + stvx_u $out7,$x50,$out + addi $out,$out,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + vncipherlast $out3,$out3,$ivec + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out3,$out3,$out3,$inpperm + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x00,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x10,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x20,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x30,$out + stvx_u $out7,$x40,$out + addi $out,$out,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + vncipherlast $out4,$out4,$ivec + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out4,$out4,$out4,$inpperm + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x00,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x10,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x20,$out + stvx_u $out7,$x30,$out + addi $out,$out,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + vncipherlast $out5,$out5,$ivec + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out5,$out5,$out5,$inpperm + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x00,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x10,$out + stvx_u $out7,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + vncipherlast $out6,$out6,$ivec + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out6,$out6,$out6,$inpperm + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x00,$out + stvx_u $out7,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + vncipherlast $out7,$out7,$ivec + vmr $ivec,$in7 + + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out7,0,$out + addi $out,$out,0x10 + +Lcbc_dec8x_done: + le?vperm $ivec,$ivec,$ivec,$inpperm + stvx_u $ivec,0,$ivp # write [unaligned] iv + + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt +___ +}} }}} + +######################################################################### +{{{ # CTR procedure[s] # +my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= + map("v$_",(4..11)); +my $dat=$tmp; + +$code.=<<___; +.globl .${prefix}_ctr32_encrypt_blocks +.align 5 +.${prefix}_ctr32_encrypt_blocks: + ${UCMP}i $len,1 + bltlr- + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + vspltisb $one,1 + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + vsldoi $one,$rndkey0,$one,1 + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + + ${UCMP}i $len,8 + bge _aesp8_ctr32_encrypt8x + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + lvx $rndkey0,0,$key + mtctr $rounds + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_ctr32_enc + + vadduwm $ivec,$ivec,$one + vmr $dat,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + subic. $len,$len,1 # blocks-- + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + vperm $dat,$dat,$inptail,$inpperm + li $idx,16 + ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm + lvx $rndkey0,0,$key + vxor $dat,$dat,$rndkey1 # last round key + vcipherlast $inout,$inout,$dat + + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + vperm $inout,$inout,$inout,$outperm + vsel $dat,$outhead,$inout,$outmask + mtctr $rounds + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vmr $outhead,$inout + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + stvx $dat,0,$out + addi $out,$out,16 + bne Loop_ctr32_enc + + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CTR procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment +my ($two,$three,$four)=($outhead,$outperm,$outmask); + +$code.=<<___; +.align 5 +_aesp8_ctr32_encrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_ctr32_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_ctr32_enc_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vadduwm $two,$one,$one + subi $inp,$inp,15 # undo "caller" + $SHL $len,$len,4 + + vadduwm $out1,$ivec,$one # counter values ... + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + le?li $idx,8 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + le?lvsl $inpperm,0,$idx + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + le?vspltisb $tmp,0x0f + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + vxor $out7,$out7,$rndkey0 + + mtctr $rounds + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 +Loop_ctr32_enc8x_middle: + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_ctr32_enc8x + + subic r11,$len,256 # $len-256, borrow $key_ + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 + + subfe r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + + and r0,r0,r11 + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + vcipher $out6,$out6,v26 + vcipher $out7,$out7,v26 + lvx v24,$x00,$key_ # re-pre-load round[1] + + subic $len,$len,129 # $len-=129 + vcipher $out0,$out0,v27 + addi $len,$len,1 # $len-=128 really + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + vcipher $out6,$out6,v27 + vcipher $out7,$out7,v27 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vcipher $out0,$out0,v28 + lvx_u $in0,$x00,$inp # load input + vcipher $out1,$out1,v28 + lvx_u $in1,$x10,$inp + vcipher $out2,$out2,v28 + lvx_u $in2,$x20,$inp + vcipher $out3,$out3,v28 + lvx_u $in3,$x30,$inp + vcipher $out4,$out4,v28 + lvx_u $in4,$x40,$inp + vcipher $out5,$out5,v28 + lvx_u $in5,$x50,$inp + vcipher $out6,$out6,v28 + lvx_u $in6,$x60,$inp + vcipher $out7,$out7,v28 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v29 + le?vperm $in1,$in1,$in1,$inpperm + vcipher $out2,$out2,v29 + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out3,$out3,v29 + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out4,$out4,v29 + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out5,$out5,v29 + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out6,$out6,v29 + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out7,$out7,v29 + le?vperm $in7,$in7,$in7,$inpperm + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + subfe. r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v30 + vxor $in0,$in0,v31 # xor with last round key + vcipher $out1,$out1,v30 + vxor $in1,$in1,v31 + vcipher $out2,$out2,v30 + vxor $in2,$in2,v31 + vcipher $out3,$out3,v30 + vxor $in3,$in3,v31 + vcipher $out4,$out4,v30 + vxor $in4,$in4,v31 + vcipher $out5,$out5,v30 + vxor $in5,$in5,v31 + vcipher $out6,$out6,v30 + vxor $in6,$in6,v31 + vcipher $out7,$out7,v30 + vxor $in7,$in7,v31 + + bne Lctr32_enc8x_break # did $len-129 borrow? + + vcipherlast $in0,$out0,$in0 + vcipherlast $in1,$out1,$in1 + vadduwm $out1,$ivec,$one # counter values ... + vcipherlast $in2,$out2,$in2 + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + vcipherlast $in3,$out3,$in3 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + vcipherlast $in4,$out4,$in4 + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + vcipherlast $in5,$out5,$in5 + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + vcipherlast $in6,$out6,$in6 + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vcipherlast $in7,$out7,$in7 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + le?vperm $in0,$in0,$in0,$inpperm + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + le?vperm $in1,$in1,$in1,$inpperm + vxor $out7,$out7,$rndkey0 + mtctr $rounds + + vcipher $out0,$out0,v24 + stvx_u $in0,$x00,$out + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out1,$out1,v24 + stvx_u $in1,$x10,$out + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out2,$out2,v24 + stvx_u $in2,$x20,$out + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out3,$out3,v24 + stvx_u $in3,$x30,$out + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out4,$out4,v24 + stvx_u $in4,$x40,$out + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out5,$out5,v24 + stvx_u $in5,$x50,$out + le?vperm $in7,$in7,$in7,$inpperm + vcipher $out6,$out6,v24 + stvx_u $in6,$x60,$out + vcipher $out7,$out7,v24 + stvx_u $in7,$x70,$out + addi $out,$out,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi $len,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi $len,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi $len,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi $len,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + vcipherlast $out0,$out0,$in0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + vcipherlast $out5,$out5,$in5 + vcipherlast $out6,$out6,$in6 + vcipherlast $out7,$out7,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + vcipherlast $out0,$out0,$in1 + vcipherlast $out1,$out1,$in2 + vcipherlast $out2,$out2,$in3 + vcipherlast $out3,$out3,$in4 + vcipherlast $out4,$out4,$in5 + vcipherlast $out5,$out5,$in6 + vcipherlast $out6,$out6,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + stvx_u $out6,$x60,$out + addi $out,$out,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + vcipherlast $out0,$out0,$in2 + vcipherlast $out1,$out1,$in3 + vcipherlast $out2,$out2,$in4 + vcipherlast $out3,$out3,$in5 + vcipherlast $out4,$out4,$in6 + vcipherlast $out5,$out5,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + stvx_u $out5,$x50,$out + addi $out,$out,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + vcipherlast $out0,$out0,$in3 + vcipherlast $out1,$out1,$in4 + vcipherlast $out2,$out2,$in5 + vcipherlast $out3,$out3,$in6 + vcipherlast $out4,$out4,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + vcipherlast $out0,$out0,$in4 + vcipherlast $out1,$out1,$in5 + vcipherlast $out2,$out2,$in6 + vcipherlast $out3,$out3,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + vcipherlast $out0,$out0,$in5 + vcipherlast $out1,$out1,$in6 + vcipherlast $out2,$out2,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_two: + vcipherlast $out0,$out0,$in6 + vcipherlast $out1,$out1,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_one: + vcipherlast $out0,$out0,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + stvx_u $out0,0,$out + addi $out,$out,0x10 + +Lctr32_enc8x_done: + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks +___ +}} }}} + +######################################################################### +{{{ # XTS procedures # +# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # +# const AES_KEY *key1, const AES_KEY *key2, # +# [const] unsigned char iv[16]); # +# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # +# input tweak value is assumed to be encrypted already, and last tweak # +# value, one suitable for consecutive call on same chunk of data, is # +# written back to original buffer. In addition, in "tweak chaining" # +# mode only complete input blocks are processed. # + +my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); +my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); +my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); +my $taillen = $key2; + + ($inp,$idx) = ($idx,$inp); # reassign + +$code.=<<___; +.globl .${prefix}_xts_encrypt +.align 5 +.${prefix}_xts_encrypt: + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff0 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_enc_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_enc + +Lxts_enc_no_key2: + li $idx,-16 + and $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_enc: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_encrypt6x + + andi. $taillen,$len,15 + subic r0,$len,32 + subi $taillen,$taillen,16 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vcipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_enc_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + subic r0,$len,32 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $output,$output,$rndkey0 # just in case $len<16 + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_enc + + vxor $output,$output,$tweak + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + subi r11,$out,17 + subi $out,$out,16 + mtctr $len + li $len,16 +Loop_xts_enc_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_enc_steal + + mtctr $rounds + b Loop_xts_enc # one more time... + +Lxts_enc_done: + ${UCMP}i $ivp,0 + beq Lxts_enc_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt + +.globl .${prefix}_xts_decrypt +.align 5 +.${prefix}_xts_decrypt: + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff8 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + andi. r0,$len,15 + neg r0,r0 + andi. r0,r0,16 + sub $len,$len,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_dec_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_dec + +Lxts_dec_no_key2: + neg $idx,$len + andi. $idx,$idx,15 + add $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_dec: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_decrypt6x + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + + ${UCMP}i $len,16 + blt Ltail_xts_dec + be?b Loop_xts_dec + +.align 5 +Loop_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_dec_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak1,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak1,$tweak1,$tmp + + subi $inp,$inp,16 + add $inp,$inp,$len + + vxor $inout,$inout,$tweak # :-( + vxor $inout,$inout,$tweak1 # :-) + +Loop_xts_dec_short: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec_short + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak1 + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + + vmr $inout,$inptail + lvx $inptail,0,$inp + #addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + vxor $rndkey0,$rndkey0,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + subi r11,$out,1 + mtctr $len + li $len,16 +Loop_xts_dec_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_dec_steal + + mtctr $rounds + b Loop_xts_dec # one more time... + +Lxts_dec_done: + ${UCMP}i $ivp,0 + beq Lxts_dec_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt +___ +######################################################################### +{{ # Optimized XTS procedures # +my $key_=$key2; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); +my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); +my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($keyperm)=($out0); # aliases with "caller", redundant assignment +my $taillen=$x70; + +$code.=<<___; +.align 5 +_aesp8_xts_encrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_enc_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out0,$out0,v27 + vcipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v28 + vcipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vcipher $out0,$out0,v29 + vcipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vcipher $out4,$out4,v29 + vcipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vcipher $out0,$out0,v30 + vcipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v30 + vcipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vcipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vcipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vcipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vcipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vcipherlast $tmp,$out5,$in5 # last block might be needed + # in stealing mode + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$tmp,$tmp,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + le?stvx_u $out5,$x50,$out + be?stvx_u $tmp, $x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_enc6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_enc6x_zero + cmpwi $len,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi $len,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $tmp,$out4,$twk5 # last block prep for stealing + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $tmp,$out3,$twk4 # last block prep for stealing + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $tmp,$out2,$twk3 # last block prep for stealing + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vxor $tmp,$out1,$twk2 # last block prep for stealing + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_enc1x: + vcipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc1x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + + lvsr $inpperm,0,$taillen + vcipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vcipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vcipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vcipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vcipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vxor $tmp,$out0,$twk1 # last block prep for stealing + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi $taillen,0 + beq Lxts_enc6x_done + + add $inp,$inp,$taillen + subi $inp,$inp,16 + lvx_u $in0,0,$inp + lvsr $inpperm,0,$taillen # $in5 is no more + le?vperm $in0,$in0,$in0,$leperm + vperm $in0,$in0,$in0,$inpperm + vxor $tmp,$tmp,$twk0 +Lxts_enc6x_steal: + vxor $in0,$in0,$twk0 + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? + + subi r30,$out,17 + subi $out,$out,16 + mtctr $taillen +Loop_xts_enc6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_enc6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_enc1x # one more time... + +.align 4 +Lxts_enc6x_done: + ${UCMP}i $ivp,0 + beq Lxts_enc6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_enc5x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_enc5x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + vcipher $out0,$out0,v26 + lvsr $inpperm,0,$taillen # $in5 is no more + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vcipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vcipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vcipher $out1,$out1,v29 + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vcipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vcipher $out0,$out0,v30 + vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v30 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vcipher $out4,$out4,v30 + + vcipherlast $out0,$out0,$twk0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_dec_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v30 + vncipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vncipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vncipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vncipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vncipherlast $out5,$out5,$in5 + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$out5,$out5,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + stvx_u $out5,$x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_dec6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_dec6x_zero + cmpwi $len,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi $len,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + vxor $twk1,$tweak,$rndkey0 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk1 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + vmr $twk1,$twk5 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk5 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + vmr $twk1,$twk4 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk4 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vmr $twk1,$twk3 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk3 + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_dec1x: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec1x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + mtctr $rounds + vncipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vmr $twk1,$twk2 + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + vxor $out0,$in0,$twk2 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi $taillen,0 + beq Lxts_dec6x_done + + lvx_u $in0,0,$inp + le?vperm $in0,$in0,$in0,$leperm + vxor $out0,$in0,$twk1 +Lxts_dec6x_steal: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Lxts_dec6x_steal + + add $inp,$inp,$taillen + vncipher $out0,$out0,v24 + + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v26 + + lvsr $inpperm,0,$taillen # $in5 is no more + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk1,$twk1,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vncipherlast $tmp,$out0,$twk1 + + le?vperm $out0,$tmp,$tmp,$leperm + le?stvx_u $out0,0,$out + be?stvx_u $tmp,0,$out + + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 + vxor $out0,$out0,$twk0 + + subi r30,$out,1 + mtctr $taillen +Loop_xts_dec6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_dec6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_dec1x # one more time... + +.align 4 +Lxts_dec6x_done: + ${UCMP}i $ivp,0 + beq Lxts_dec6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_dec5x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_dec5x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vncipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vncipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vncipher $out4,$out4,v30 + + vncipherlast $out0,$out0,$twk0 + vncipherlast $out1,$out1,$in1 + vncipherlast $out2,$out2,$in2 + vncipherlast $out3,$out3,$in3 + vncipherlast $out4,$out4,$in4 + mtctr $rounds + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +___ +}} }}} + +my $consts=1; +foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + # constants table endian-specific conversion + if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { + my $conv=$3; + my @bytes=(); + + # convert to endian-agnostic format + if ($1 eq "long") { + foreach (split(/,\s*/,$2)) { + my $l = /^0/?oct:int; + push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; + } + } else { + @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); + } + + # little-endian conversion + if ($flavour =~ /le$/o) { + SWITCH: for($conv) { + /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; + } + } + + #emit + print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; + next; + } + $consts=0 if (m/Lconsts:/o); # end of table + + # instructions prefixed with '?' are endian-specific and need + # to be adjusted accordingly... + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o or + s/\?lvsr/lvsl/o or + s/\?lvsl/lvsr/o or + s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or + s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or + s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; + } else { # big-endian + s/le\?/#le#/o or + s/be\?//o or + s/\?([a-z]+)/$1/o; + } + + print $_,"\n"; +} + +close STDOUT; Index: cipher/build-perlasm.sh =================================================================== --- /dev/null +++ cipher/build-perlasm.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: BSD-3-Clause +# (C) 2019 Shawn Landden +perl sha512-ppc8.pl ppc64le sha512-ppc8.S +perl sha512-ppc8.pl ppc64le sha256-ppc8.S +perl sha512-ppc8.pl ppc64 sha512-ppc8be.S +perl sha512-ppc8.pl ppc64 sha256-ppc8be.S +perl sha512-ppc8.pl ppc32 sha512-ppc832.S +perl sha512-ppc8.pl pp32c sha256-ppc832.S +perl rijndael-ppc8.pl ppc64le > rijndael-ppc8.S +perl rijndael-ppc8.pl ppc64 > rijndael-ppc8be.S +perl rijndael-ppc8.pl ppc32 > rijndael-ppc832.S Index: cipher/ppc-xlate.pl =================================================================== --- /dev/null +++ cipher/ppc-xlate.pl @@ -0,0 +1,348 @@ +#! /usr/bin/env perl +# SPDX-License-Identifier: BSD-3-Clause + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +my %GLOBALS; +my %TYPES; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $type = sub { + my ($dir,$name,$type) = @_; + + $TYPES{$name} = $type; + if ($flavour =~ /linux/) { + $name =~ s|^\.||; + ".type $name,$type"; + } else { + ""; + } +}; +my $globl = sub { + my $junk = shift; + my $name = shift; + my $global = \$GLOBALS{$name}; + my $type = \$TYPES{$name}; + my $ret; + + $name =~ s|^\.||; + + SWITCH: for ($flavour) { + /aix/ && do { if (!$$type) { + $$type = "\@function"; + } + if ($$type =~ /function/) { + $name = ".$name"; + } + last; + }; + /osx/ && do { $name = "_$name"; + last; + }; + /linux.*(32|64le)/ + && do { $ret .= ".globl $name"; + if (!$$type) { + $ret .= "\n.type $name,\@function"; + $$type = "\@function"; + } + last; + }; + /linux.*64/ && do { $ret .= ".globl $name"; + if (!$$type) { + $ret .= "\n.type $name,\@function"; + $$type = "\@function"; + } + if ($$type =~ /function/) { + $ret .= "\n.section \".opd\",\"aw\""; + $ret .= "\n.align 3"; + $ret .= "\n$name:"; + $ret .= "\n.quad .$name,.TOC.\@tocbase,0"; + $ret .= "\n.previous"; + $name = ".$name"; + } + last; + }; + } + + $ret = ".globl $name" if (!$ret); + $$global = $name; + $ret; +}; +my $text = sub { + my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; + $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); + $ret; +}; +my $machine = sub { + my $junk = shift; + my $arch = shift; + if ($flavour =~ /osx/) + { $arch =~ s/\"//g; + $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); + } + ".machine $arch"; +}; +my $size = sub { + if ($flavour =~ /linux/) + { shift; + my $name = shift; + my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name; + my $ret = ".size $$real,.-$$real"; + $name =~ s|^\.||; + if ($$real ne $name) { + $ret .= "\n.size $name,.-$$real"; + } + $ret; + } + else + { ""; } +}; +my $asciz = sub { + shift; + my $line = join(",",@_); + if ($line =~ /^"(.*)"$/) + { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } + else + { ""; } +}; +my $quad = sub { + shift; + my @ret; + my ($hi,$lo); + for (@_) { + if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) + { $hi=$1?"0x$1":"0"; $lo="0x$2"; } + elsif (/^([0-9]+)$/o) + { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl + else + { $hi=undef; $lo=$_; } + + if (defined($hi)) + { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } + else + { push(@ret,".quad $lo"); } + } + join("\n",@ret); +}; + +################################################################ +# simplified mnemonics not handled by at least one assembler +################################################################ +my $cmplw = sub { + my $f = shift; + my $cr = 0; $cr = shift if ($#_>1); + # Some out-of-date 32-bit GNU assembler just can't handle cmplw... + ($flavour =~ /linux.*32/) ? + " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : + " cmplw ".join(',',$cr,@_); +}; +my $bdnz = sub { + my $f = shift; + my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint + " bc $bo,0,".shift; +} if ($flavour!~/linux/); +my $bltlr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : + " bclr $bo,0"; +}; +my $bnelr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +my $beqlr = sub { + my $f = shift; + my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two +# arguments is 64, with "operand out of range" error. +my $extrdi = sub { + my ($f,$ra,$rs,$n,$b) = @_; + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; +}; +my $vmr = sub { + my ($f,$vx,$vy) = @_; + " vor $vx,$vy,$vy"; +}; + +# Some ABIs specify vrsave, special-purpose register #256, as reserved +# for system use. +my $no_vrsave = ($flavour =~ /aix|linux64le/); +my $mtspr = sub { + my ($f,$idx,$ra) = @_; + if ($idx == 256 && $no_vrsave) { + " or $ra,$ra,$ra"; + } else { + " mtspr $idx,$ra"; + } +}; +my $mfspr = sub { + my ($f,$rd,$idx) = @_; + if ($idx == 256 && $no_vrsave) { + " li $rd,-1"; + } else { + " mfspr $rd,$idx"; + } +}; + +# PowerISA 2.06 stuff +sub vsxmem_op { + my ($f, $vrt, $ra, $rb, $op) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x +my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x +my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx +my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx +my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x +my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x +my $lvx_splt = sub { vsxmem_op(@_, 332); }; # lxvdsx +# VSX instruction[s] masqueraded as made-up AltiVec/VMX +my $vpermdi = sub { # xxpermdi + my ($f, $vrt, $vra, $vrb, $dm) = @_; + $dm = oct($dm) if ($dm =~ /^0/); + " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($dm<<8)|(10<<3)|7; +}; + +# PowerISA 2.07 stuff +sub vcrypto_op { + my ($f, $vrt, $vra, $vrb, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +sub vfour { + my ($f, $vrt, $vra, $vrb, $vrc, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($vrc<<6)|$op; +}; +my $vcipher = sub { vcrypto_op(@_, 1288); }; +my $vcipherlast = sub { vcrypto_op(@_, 1289); }; +my $vncipher = sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb = sub { vcrypto_op(@_, 1032); }; +my $vpmsumd = sub { vcrypto_op(@_, 1224); }; +my $vpmsubh = sub { vcrypto_op(@_, 1096); }; +my $vpmsumw = sub { vcrypto_op(@_, 1160); }; +# These are not really crypto, but vcrypto_op template works +my $vaddudm = sub { vcrypto_op(@_, 192); }; +my $vadduqm = sub { vcrypto_op(@_, 256); }; +my $vmuleuw = sub { vcrypto_op(@_, 648); }; +my $vmulouw = sub { vcrypto_op(@_, 136); }; +my $vrld = sub { vcrypto_op(@_, 196); }; +my $vsld = sub { vcrypto_op(@_, 1476); }; +my $vsrd = sub { vcrypto_op(@_, 1732); }; +my $vsubudm = sub { vcrypto_op(@_, 1216); }; +my $vaddcuq = sub { vcrypto_op(@_, 320); }; +my $vaddeuqm = sub { vfour(@_,60); }; +my $vaddecuq = sub { vfour(@_,61); }; +my $vmrgew = sub { vfour(@_,0,1932); }; +my $vmrgow = sub { vfour(@_,0,1676); }; + +my $mtsle = sub { + my ($f, $arg) = @_; + " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; + +# VSX instructions masqueraded as AltiVec/VMX +my $mtvrd = sub { + my ($f, $vrt, $ra) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(179<<1)|1; +}; +my $mtvrwz = sub { + my ($f, $vrt, $ra) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(243<<1)|1; +}; +my $lvwzx_u = sub { vsxmem_op(@_, 12); }; # lxsiwzx +my $stvwx_u = sub { vsxmem_op(@_, 140); }; # stxsiwx + +# PowerISA 3.0 stuff +my $maddhdu = sub { vfour(@_,49); }; +my $maddld = sub { vfour(@_,51); }; +my $darn = sub { + my ($f, $rt, $l) = @_; + " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1); +}; +my $iseleq = sub { + my ($f, $rt, $ra, $rb) = @_; + " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|(2<<6)|30; +}; +# VSX instruction[s] masqueraded as made-up AltiVec/VMX +my $vspltib = sub { # xxspltib + my ($f, $vrt, $imm8) = @_; + $imm8 = oct($imm8) if ($imm8 =~ /^0/); + $imm8 &= 0xff; + " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($imm8<<11)|(360<<1)|1; +}; + +# PowerISA 3.0B stuff +my $addex = sub { + my ($f, $rt, $ra, $rb, $cy) = @_; # only cy==0 is specified in 3.0B + " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($cy<<9)|(170<<1); +}; +my $vmsumudm = sub { vfour(@_,35); }; + +while($line=<>) { + + $line =~ s|[#!;].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning... + $line =~ s|\s+$||; # ... and at the end + + { + $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel + $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); + } + + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; + if ($label) { + my $xlated = ($GLOBALS{$label} or $label); + print "$xlated:"; + if ($flavour =~ /linux.*64le/) { + if ($TYPES{$label} =~ /function/) { + printf "\n.localentry %s,0\n",$xlated; + } + } + } + } + + { + $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; + my $c = $1; $c = "\t" if ($c eq ""); + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); + $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(/,\s*/,$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } + + print $line if ($line); + print "\n"; +} + +close STDOUT; Index: cipher/rijndael-ppc8.S =================================================================== --- /dev/null +++ cipher/rijndael-ppc8.S @@ -0,0 +1,3633 @@ +.machine "any" + +.text + +.align 7 +rcon: +.byte 0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01 +.byte 0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b +.byte 0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d +.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +Lconsts: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,-0x48 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.byte 65,69,83,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.globl aes_p8_set_encrypt_key +.align 5 +aes_p8_set_encrypt_key: +Lset_encrypt_key: + mflr 11 + std 11,16(1) + + li 6,-1 + cmpldi 3,0 + beq- Lenc_key_abort + cmpldi 5,0 + beq- Lenc_key_abort + li 6,-2 + cmpwi 4,128 + blt- Lenc_key_abort + cmpwi 4,256 + bgt- Lenc_key_abort + andi. 0,4,0x3f + bne- Lenc_key_abort + + lis 0,0xfff0 + mfspr 12,256 + mtspr 256,0 + + bl Lconsts + mtlr 11 + + neg 9,3 + lvx 1,0,3 + addi 3,3,15 + lvsr 3,0,9 + li 8,0x20 + cmpwi 4,192 + lvx 2,0,3 + vspltisb 5,0x0f + lvx 4,0,6 + vxor 3,3,5 + lvx 5,8,6 + addi 6,6,0x10 + vperm 1,1,2,3 + li 7,8 + vxor 0,0,0 + mtctr 7 + + lvsl 8,0,5 + vspltisb 9,-1 + lvx 10,0,5 + vperm 9,9,0,8 + + blt Loop128 + addi 3,3,8 + beq L192 + addi 3,3,8 + b L256 + +.align 4 +Loop128: + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + bc 16,0,Loop128 + + lvx 4,0,6 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + + addi 3,5,15 + addi 5,5,0x50 + + li 8,10 + b Ldone + +.align 4 +L192: + lvx 6,0,3 + li 7,4 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + vspltisb 3,8 + mtctr 7 + vsububm 5,5,3 + +Loop192: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + .long 0x10632509 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + + vsldoi 7,0,2,8 + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vsldoi 7,7,1,8 + + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vsldoi 7,1,2,8 + vxor 1,1,6 + vsldoi 6,0,6,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + stvx 7,0,5 + addi 5,5,16 + + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bc 16,0,Loop192 + + li 8,12 + addi 5,5,0x20 + b Ldone + +.align 4 +L256: + lvx 6,0,3 + li 7,7 + li 8,14 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + mtctr 7 + +Loop256: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,2,2,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bdz Ldone + + vspltw 3,1,3 + vsldoi 6,0,2,12 + .long 0x106305C8 + + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + + vxor 2,2,3 + b Loop256 + +.align 4 +Ldone: + lvx 2,0,3 + vsel 2,10,2,9 + stvx 2,0,3 + li 6,0 + mtspr 256,12 + stw 8,0(5) + +Lenc_key_abort: + mr 3,6 + blr +.long 0 +.byte 0,12,0x14,1,0,0,3,0 +.long 0 + + +.globl aes_p8_set_decrypt_key +.align 5 +aes_p8_set_decrypt_key: + stdu 1,-64(1) + mflr 10 + std 10,64+16(1) + bl Lset_encrypt_key + mtlr 10 + + cmpwi 3,0 + bne- Ldec_key_abort + + slwi 7,8,4 + subi 3,5,240 + srwi 8,8,1 + add 5,3,7 + mtctr 8 + +Ldeckey: + lwz 0, 0(3) + lwz 6, 4(3) + lwz 7, 8(3) + lwz 8, 12(3) + addi 3,3,16 + lwz 9, 0(5) + lwz 10,4(5) + lwz 11,8(5) + lwz 12,12(5) + stw 0, 0(5) + stw 6, 4(5) + stw 7, 8(5) + stw 8, 12(5) + subi 5,5,16 + stw 9, -16(3) + stw 10,-12(3) + stw 11,-8(3) + stw 12,-4(3) + bc 16,0,Ldeckey + + xor 3,3,3 +Ldec_key_abort: + addi 1,1,64 + blr +.long 0 +.byte 0,12,4,1,0x80,0,3,0 +.long 0 + +.globl aes_p8_encrypt +.align 5 +aes_p8_encrypt: + lwz 6,240(5) + lis 0,0xfc00 + mfspr 12,256 + li 7,15 + mtspr 256,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + vspltisb 4,0x0f + lvsr 3,0,11 + vxor 2,2,4 + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsr 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,2,1,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +Loop_enc: + vperm 2,1,2,5 + .long 0x10001508 + lvx 2,7,5 + addi 7,7,16 + vperm 1,2,1,5 + .long 0x10000D08 + lvx 1,7,5 + addi 7,7,16 + bc 16,0,Loop_enc + + vperm 2,1,2,5 + .long 0x10001508 + lvx 2,7,5 + vperm 1,2,1,5 + .long 0x10000D09 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,2,1,3 + vxor 3,3,4 + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + +.globl aes_p8_decrypt +.align 5 +aes_p8_decrypt: + lwz 6,240(5) + lis 0,0xfc00 + mfspr 12,256 + li 7,15 + mtspr 256,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + vspltisb 4,0x0f + lvsr 3,0,11 + vxor 2,2,4 + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsr 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,2,1,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +Loop_dec: + vperm 2,1,2,5 + .long 0x10001548 + lvx 2,7,5 + addi 7,7,16 + vperm 1,2,1,5 + .long 0x10000D48 + lvx 1,7,5 + addi 7,7,16 + bc 16,0,Loop_dec + + vperm 2,1,2,5 + .long 0x10001548 + lvx 2,7,5 + vperm 1,2,1,5 + .long 0x10000D49 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,2,1,3 + vxor 3,3,4 + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + +.globl aes_p8_cbc_encrypt +.align 5 +aes_p8_cbc_encrypt: + cmpldi 5,16 + bclr 14,0 + + cmpwi 8,0 + lis 0,0xffe0 + mfspr 12,256 + mtspr 256,0 + + li 10,15 + vxor 0,0,0 + vspltisb 3,0x0f + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + vxor 6,6,3 + vperm 4,4,5,6 + + neg 11,3 + lvsr 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + vxor 6,6,3 + + lvsl 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,9,0,8 + vxor 8,8,3 + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + beq Lcbc_dec + +Lcbc_enc: + vor 2,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 2,2,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,1,0,10 + vxor 2,2,0 + lvx 0,10,6 + addi 10,10,16 + vxor 2,2,4 + +Loop_cbc_enc: + vperm 1,0,1,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,1,0,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_cbc_enc + + vperm 1,0,1,10 + .long 0x10420D08 + lvx 1,10,6 + li 10,16 + vperm 0,1,0,10 + .long 0x10820509 + cmpldi 5,16 + + vperm 3,4,4,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + cmpldi 5,128 + bge _aesp8_cbc_decrypt8x + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 3,3,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,1,0,10 + vxor 2,3,0 + lvx 0,10,6 + addi 10,10,16 + +Loop_cbc_dec: + vperm 1,0,1,10 + .long 0x10420D48 + lvx 1,10,6 + addi 10,10,16 + vperm 0,1,0,10 + .long 0x10420548 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_cbc_dec + + vperm 1,0,1,10 + .long 0x10420D48 + lvx 1,10,6 + li 10,16 + vperm 0,1,0,10 + .long 0x10420549 + cmpldi 5,16 + + vxor 2,2,4 + vor 4,3,3 + vperm 3,2,2,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge Lcbc_dec + +Lcbc_done: + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + neg 8,7 + li 10,15 + vxor 0,0,0 + vspltisb 9,-1 + vspltisb 3,0x0f + lvsr 8,0,8 + vperm 9,9,0,8 + vxor 8,8,3 + lvx 7,0,7 + vperm 4,4,4,8 + vsel 2,7,4,9 + lvx 5,10,7 + stvx 2,0,7 + vsel 2,4,5,9 + stvx 2,10,7 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_cbc_decrypt8x: + stdu 1,-448(1) + li 10,207 + li 11,223 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,396(1) + li 8,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + subi 5,5,128 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,30,23,10 + addi 11,1,64+15 + mtctr 9 + +Load_cbc_dec_key: + vperm 24,31,30,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,30,31,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bc 16,0,Load_cbc_dec_key + + lvx 26,8,6 + vperm 24,31,30,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,26,31,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,64+15 + vperm 26,27,26,10 + lvx 29,28,6 + vperm 27,28,27,10 + lvx 30,29,6 + vperm 28,29,28,10 + lvx 31,30,6 + vperm 29,30,29,10 + lvx 14,31,6 + vperm 30,31,30,10 + lvx 24,0,11 + vperm 31,14,31,10 + lvx 25,8,11 + + + + subi 3,3,15 + + li 10,8 + .long 0x7C001E99 + lvsl 6,0,10 + vspltisb 3,0x0f + .long 0x7C281E99 + vxor 6,6,3 + .long 0x7C5A1E99 + vperm 0,0,0,6 + .long 0x7C7B1E99 + vperm 1,1,1,6 + .long 0x7D5C1E99 + vperm 2,2,2,6 + vxor 14,0,23 + .long 0x7D7D1E99 + vperm 3,3,3,6 + vxor 15,1,23 + .long 0x7D9E1E99 + vperm 10,10,10,6 + vxor 16,2,23 + .long 0x7DBF1E99 + addi 3,3,0x80 + vperm 11,11,11,6 + vxor 17,3,23 + vperm 12,12,12,6 + vxor 18,10,23 + vperm 13,13,13,6 + vxor 19,11,23 + vxor 20,12,23 + vxor 21,13,23 + + mtctr 9 + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bc 16,0,Loop_cbc_dec8x + + subic 5,5,128 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + subfe. 0,0,0 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + and 0,0,5 + .long 0x11CED548 + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + add 3,3,0 + + + + .long 0x11CEDD48 + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + addi 11,1,64+15 + .long 0x11CEE548 + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + lvx 24,0,11 + + .long 0x11CEED48 + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + lvx 25,8,11 + + .long 0x11CEF548 + vxor 4,4,31 + .long 0x11EFF548 + vxor 0,0,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + .long 0x11CE2549 + .long 0x11EF0549 + .long 0x7C001E99 + .long 0x12100D49 + .long 0x7C281E99 + .long 0x12311549 + vperm 0,0,0,6 + .long 0x7C5A1E99 + .long 0x12521D49 + vperm 1,1,1,6 + .long 0x7C7B1E99 + .long 0x12735549 + vperm 2,2,2,6 + .long 0x7D5C1E99 + .long 0x12945D49 + vperm 3,3,3,6 + .long 0x7D7D1E99 + .long 0x12B56549 + vperm 10,10,10,6 + .long 0x7D9E1E99 + vor 4,13,13 + vperm 11,11,11,6 + .long 0x7DBF1E99 + addi 3,3,0x80 + + vperm 14,14,14,6 + vperm 15,15,15,6 + .long 0x7DC02799 + vperm 12,12,12,6 + vxor 14,0,23 + vperm 16,16,16,6 + .long 0x7DE82799 + vperm 13,13,13,6 + vxor 15,1,23 + vperm 17,17,17,6 + .long 0x7E1A2799 + vxor 16,2,23 + vperm 18,18,18,6 + .long 0x7E3B2799 + vxor 17,3,23 + vperm 19,19,19,6 + .long 0x7E5C2799 + vxor 18,10,23 + vperm 20,20,20,6 + .long 0x7E7D2799 + vxor 19,11,23 + vperm 21,21,21,6 + .long 0x7E9E2799 + vxor 20,12,23 + .long 0x7EBF2799 + addi 4,4,0x80 + vxor 21,13,23 + + mtctr 9 + beq Loop_cbc_dec8x + + addic. 5,5,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bc 16,0,Loop_cbc_dec8x_tail + + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + + .long 0x11EFF548 + vxor 4,4,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + cmplwi 5,32 + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi 5,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi 5,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + .long 0x11EF2549 + .long 0x12100D49 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + vperm 17,17,17,6 + .long 0x7E082799 + vperm 18,18,18,6 + .long 0x7E3A2799 + vperm 19,19,19,6 + .long 0x7E5B2799 + vperm 20,20,20,6 + .long 0x7E7C2799 + vperm 21,21,21,6 + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + .long 0x12102549 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + vperm 16,16,16,6 + vperm 17,17,17,6 + .long 0x7E002799 + vperm 18,18,18,6 + .long 0x7E282799 + vperm 19,19,19,6 + .long 0x7E5A2799 + vperm 20,20,20,6 + .long 0x7E7B2799 + vperm 21,21,21,6 + .long 0x7E9C2799 + .long 0x7EBD2799 + addi 4,4,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + .long 0x12312549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + vperm 17,17,17,6 + vperm 18,18,18,6 + .long 0x7E202799 + vperm 19,19,19,6 + .long 0x7E482799 + vperm 20,20,20,6 + .long 0x7E7A2799 + vperm 21,21,21,6 + .long 0x7E9B2799 + .long 0x7EBC2799 + addi 4,4,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + .long 0x12522549 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + vperm 18,18,18,6 + vperm 19,19,19,6 + .long 0x7E402799 + vperm 20,20,20,6 + .long 0x7E682799 + vperm 21,21,21,6 + .long 0x7E9A2799 + .long 0x7EBB2799 + addi 4,4,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + .long 0x12732549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + vperm 19,19,19,6 + vperm 20,20,20,6 + .long 0x7E602799 + vperm 21,21,21,6 + .long 0x7E882799 + .long 0x7EBA2799 + addi 4,4,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + .long 0x12942549 + .long 0x12B56549 + vor 4,13,13 + + vperm 20,20,20,6 + vperm 21,21,21,6 + .long 0x7E802799 + .long 0x7EA82799 + addi 4,4,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + .long 0x12B52549 + vor 4,13,13 + + vperm 21,21,21,6 + .long 0x7EA02799 + addi 4,4,0x10 + +Lcbc_dec8x_done: + vperm 4,4,4,6 + .long 0x7C803F99 + + li 10,79 + li 11,95 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.globl aes_p8_ctr32_encrypt_blocks +.align 5 +aes_p8_ctr32_encrypt_blocks: + cmpldi 5,1 + bclr 14,0 + + lis 0,0xfff0 + mfspr 12,256 + mtspr 256,0 + + li 10,15 + vxor 0,0,0 + vspltisb 3,0x0f + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + vspltisb 11,1 + vxor 6,6,3 + vperm 4,4,5,6 + vsldoi 11,0,11,1 + + neg 11,3 + lvsr 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + vxor 6,6,3 + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + + cmpldi 5,8 + bge _aesp8_ctr32_encrypt8x + + lvsl 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,9,0,8 + vxor 8,8,3 + + lvx 0,0,6 + mtctr 9 + lvx 1,10,6 + addi 10,10,16 + vperm 0,1,0,10 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + vperm 1,0,1,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,1,0,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_ctr32_enc + + vadduwm 4,4,11 + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + subic. 5,5,1 + + vperm 1,0,1,10 + .long 0x10420D08 + lvx 1,10,6 + vperm 3,3,5,6 + li 10,16 + vperm 1,1,0,10 + lvx 0,0,6 + vxor 3,3,1 + .long 0x10421D09 + + lvx 1,10,6 + addi 10,10,16 + vperm 2,2,2,8 + vsel 3,7,2,9 + mtctr 9 + vperm 0,1,0,10 + vor 7,2,2 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + stvx 3,0,4 + addi 4,4,16 + bne Loop_ctr32_enc + + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_ctr32_encrypt8x: + stdu 1,-448(1) + li 10,207 + li 11,223 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,396(1) + li 8,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,30,23,10 + addi 11,1,64+15 + mtctr 9 + +Load_ctr32_enc_key: + vperm 24,31,30,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,30,31,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bc 16,0,Load_ctr32_enc_key + + lvx 26,8,6 + vperm 24,31,30,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,26,31,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,64+15 + vperm 26,27,26,10 + lvx 29,28,6 + vperm 27,28,27,10 + lvx 30,29,6 + vperm 28,29,28,10 + lvx 31,30,6 + vperm 29,30,29,10 + lvx 15,31,6 + vperm 30,31,30,10 + lvx 24,0,11 + vperm 31,15,31,10 + lvx 25,8,11 + + vadduwm 7,11,11 + subi 3,3,15 + sldi 5,5,4 + + vadduwm 16,4,11 + vadduwm 17,4,7 + vxor 15,4,23 + li 10,8 + vadduwm 18,16,7 + vxor 16,16,23 + lvsl 6,0,10 + vadduwm 19,17,7 + vxor 17,17,23 + vspltisb 3,0x0f + vadduwm 20,18,7 + vxor 18,18,23 + vxor 6,6,3 + vadduwm 21,19,7 + vxor 19,19,23 + vadduwm 22,20,7 + vxor 20,20,23 + vadduwm 4,21,7 + vxor 21,21,23 + vxor 22,22,23 + + mtctr 9 + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 +Loop_ctr32_enc8x_middle: + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + lvx 25,8,11 + bc 16,0,Loop_ctr32_enc8x + + subic 11,5,256 + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + subfe 0,0,0 + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + and 0,0,11 + addi 11,1,64+15 + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + lvx 24,0,11 + + subic 5,5,129 + .long 0x11EFDD08 + addi 5,5,1 + .long 0x1210DD08 + .long 0x1231DD08 + .long 0x1252DD08 + .long 0x1273DD08 + .long 0x1294DD08 + .long 0x12B5DD08 + .long 0x12D6DD08 + lvx 25,8,11 + + .long 0x11EFE508 + .long 0x7C001E99 + .long 0x1210E508 + .long 0x7C281E99 + .long 0x1231E508 + .long 0x7C5A1E99 + .long 0x1252E508 + .long 0x7C7B1E99 + .long 0x1273E508 + .long 0x7D5C1E99 + .long 0x1294E508 + .long 0x7D9D1E99 + .long 0x12B5E508 + .long 0x7DBE1E99 + .long 0x12D6E508 + .long 0x7DDF1E99 + addi 3,3,0x80 + + .long 0x11EFED08 + vperm 0,0,0,6 + .long 0x1210ED08 + vperm 1,1,1,6 + .long 0x1231ED08 + vperm 2,2,2,6 + .long 0x1252ED08 + vperm 3,3,3,6 + .long 0x1273ED08 + vperm 10,10,10,6 + .long 0x1294ED08 + vperm 12,12,12,6 + .long 0x12B5ED08 + vperm 13,13,13,6 + .long 0x12D6ED08 + vperm 14,14,14,6 + + add 3,3,0 + + + + subfe. 0,0,0 + .long 0x11EFF508 + vxor 0,0,31 + .long 0x1210F508 + vxor 1,1,31 + .long 0x1231F508 + vxor 2,2,31 + .long 0x1252F508 + vxor 3,3,31 + .long 0x1273F508 + vxor 10,10,31 + .long 0x1294F508 + vxor 12,12,31 + .long 0x12B5F508 + vxor 13,13,31 + .long 0x12D6F508 + vxor 14,14,31 + + bne Lctr32_enc8x_break + + .long 0x100F0509 + .long 0x10300D09 + vadduwm 16,4,11 + .long 0x10511509 + vadduwm 17,4,7 + vxor 15,4,23 + .long 0x10721D09 + vadduwm 18,16,7 + vxor 16,16,23 + .long 0x11535509 + vadduwm 19,17,7 + vxor 17,17,23 + .long 0x11946509 + vadduwm 20,18,7 + vxor 18,18,23 + .long 0x11B56D09 + vadduwm 21,19,7 + vxor 19,19,23 + .long 0x11D67509 + vadduwm 22,20,7 + vxor 20,20,23 + vperm 0,0,0,6 + vadduwm 4,21,7 + vxor 21,21,23 + vperm 1,1,1,6 + vxor 22,22,23 + mtctr 9 + + .long 0x11EFC508 + .long 0x7C002799 + vperm 2,2,2,6 + .long 0x1210C508 + .long 0x7C282799 + vperm 3,3,3,6 + .long 0x1231C508 + .long 0x7C5A2799 + vperm 10,10,10,6 + .long 0x1252C508 + .long 0x7C7B2799 + vperm 12,12,12,6 + .long 0x1273C508 + .long 0x7D5C2799 + vperm 13,13,13,6 + .long 0x1294C508 + .long 0x7D9D2799 + vperm 14,14,14,6 + .long 0x12B5C508 + .long 0x7DBE2799 + .long 0x12D6C508 + .long 0x7DDF2799 + addi 4,4,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi 5,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi 5,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi 5,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi 5,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + .long 0x11EF0509 + .long 0x12100D09 + .long 0x12311509 + .long 0x12521D09 + .long 0x12735509 + .long 0x12946509 + .long 0x12B56D09 + .long 0x12D67509 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + vperm 17,17,17,6 + .long 0x7E082799 + vperm 18,18,18,6 + .long 0x7E3A2799 + vperm 19,19,19,6 + .long 0x7E5B2799 + vperm 20,20,20,6 + .long 0x7E7C2799 + vperm 21,21,21,6 + .long 0x7E9D2799 + vperm 22,22,22,6 + .long 0x7EBE2799 + .long 0x7EDF2799 + addi 4,4,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + .long 0x11EF0D09 + .long 0x12101509 + .long 0x12311D09 + .long 0x12525509 + .long 0x12736509 + .long 0x12946D09 + .long 0x12B57509 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + vperm 17,17,17,6 + .long 0x7E082799 + vperm 18,18,18,6 + .long 0x7E3A2799 + vperm 19,19,19,6 + .long 0x7E5B2799 + vperm 20,20,20,6 + .long 0x7E7C2799 + vperm 21,21,21,6 + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + .long 0x11EF1509 + .long 0x12101D09 + .long 0x12315509 + .long 0x12526509 + .long 0x12736D09 + .long 0x12947509 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + vperm 17,17,17,6 + .long 0x7E082799 + vperm 18,18,18,6 + .long 0x7E3A2799 + vperm 19,19,19,6 + .long 0x7E5B2799 + vperm 20,20,20,6 + .long 0x7E7C2799 + .long 0x7E9D2799 + addi 4,4,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + .long 0x11EF1D09 + .long 0x12105509 + .long 0x12316509 + .long 0x12526D09 + .long 0x12737509 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + vperm 17,17,17,6 + .long 0x7E082799 + vperm 18,18,18,6 + .long 0x7E3A2799 + vperm 19,19,19,6 + .long 0x7E5B2799 + .long 0x7E7C2799 + addi 4,4,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + .long 0x11EF5509 + .long 0x12106509 + .long 0x12316D09 + .long 0x12527509 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + vperm 17,17,17,6 + .long 0x7E082799 + vperm 18,18,18,6 + .long 0x7E3A2799 + .long 0x7E5B2799 + addi 4,4,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + .long 0x11EF6509 + .long 0x12106D09 + .long 0x12317509 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + vperm 17,17,17,6 + .long 0x7E082799 + .long 0x7E3A2799 + addi 4,4,0x30 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_two: + .long 0x11EF6D09 + .long 0x12107509 + + vperm 15,15,15,6 + vperm 16,16,16,6 + .long 0x7DE02799 + .long 0x7E082799 + addi 4,4,0x20 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_one: + .long 0x11EF7509 + + vperm 15,15,15,6 + .long 0x7DE02799 + addi 4,4,0x10 + +Lctr32_enc8x_done: + li 10,79 + li 11,95 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.globl aes_p8_xts_encrypt +.align 5 +aes_p8_xts_encrypt: + mr 10,3 + li 3,-1 + cmpldi 5,16 + bclr 14,0 + + lis 0,0xfff0 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + vspltisb 9,0x07 + lvsl 6,11,11 + vspltisb 11,0x0f + vxor 6,6,9 + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + vxor 5,5,11 + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + vxor 5,5,11 + + cmpldi 7,0 + beq Lxts_enc_no_key2 + + lvsr 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,1,0,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +Ltweak_xts_enc: + vperm 1,0,1,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,1,0,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,Ltweak_xts_enc + + vperm 1,0,1,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,1,0,7 + .long 0x11080509 + + li 8,0 + b Lxts_enc + +Lxts_enc_no_key2: + li 3,-16 + and 5,5,3 + + +Lxts_enc: + lvx 4,0,10 + addi 10,10,16 + + lvsr 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_encrypt6x + + andi. 7,5,15 + subic 0,5,32 + subi 7,7,16 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,1,0,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + vperm 1,0,1,7 + .long 0x10420D08 + lvx 1,3,6 + addi 3,3,16 + vperm 0,1,0,7 + .long 0x10420508 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_enc + + vperm 1,0,1,7 + .long 0x10420D08 + lvx 1,3,6 + li 3,16 + vperm 0,1,0,7 + vxor 0,0,8 + .long 0x10620509 + + vperm 11,3,3,6 + + .long 0x7D602799 + + addi 4,4,16 + + subic. 5,5,16 + beq Lxts_enc_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + subic 0,5,32 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,1,0,7 + vxor 2,2,8 + vxor 3,3,0 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge Loop_xts_enc + + vxor 3,3,8 + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + subi 11,4,17 + subi 4,4,16 + mtctr 5 + li 5,16 +Loop_xts_enc_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,Loop_xts_enc_steal + + mtctr 9 + b Loop_xts_enc + +Lxts_enc_done: + cmpldi 8,0 + beq Lxts_enc_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 8,8,8,6 + .long 0x7D004799 + +Lxts_enc_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + + +.globl aes_p8_xts_decrypt +.align 5 +aes_p8_xts_decrypt: + mr 10,3 + li 3,-1 + cmpldi 5,16 + bclr 14,0 + + lis 0,0xfff8 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + andi. 0,5,15 + neg 0,0 + andi. 0,0,16 + sub 5,5,0 + + vspltisb 9,0x07 + lvsl 6,11,11 + vspltisb 11,0x0f + vxor 6,6,9 + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + vxor 5,5,11 + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + vxor 5,5,11 + + cmpldi 7,0 + beq Lxts_dec_no_key2 + + lvsr 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,1,0,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +Ltweak_xts_dec: + vperm 1,0,1,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,1,0,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,Ltweak_xts_dec + + vperm 1,0,1,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,1,0,7 + .long 0x11080509 + + li 8,0 + b Lxts_dec + +Lxts_dec_no_key2: + neg 3,5 + andi. 3,3,15 + add 5,5,3 + + +Lxts_dec: + lvx 4,0,10 + addi 10,10,16 + + lvsr 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_decrypt6x + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,1,0,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + + cmpldi 5,16 + blt Ltail_xts_dec + + +.align 5 +Loop_xts_dec: + vperm 1,0,1,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,1,0,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_dec + + vperm 1,0,1,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,1,0,7 + vxor 0,0,8 + .long 0x10620549 + + vperm 11,3,3,6 + + .long 0x7D602799 + + addi 4,4,16 + + subic. 5,5,16 + beq Lxts_dec_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,1,0,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab 11,8,9 + vaddubm 12,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 12,12,11 + + subi 10,10,16 + add 10,10,5 + + vxor 2,2,8 + vxor 2,2,12 + +Loop_xts_dec_short: + vperm 1,0,1,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,1,0,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_dec_short + + vperm 1,0,1,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,1,0,7 + vxor 0,0,12 + .long 0x10620549 + + vperm 11,3,3,6 + + .long 0x7D602799 + + + vor 2,4,4 + lvx 4,0,10 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,1,0,7 + + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + vxor 0,0,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + subi 11,4,1 + mtctr 5 + li 5,16 +Loop_xts_dec_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,Loop_xts_dec_steal + + mtctr 9 + b Loop_xts_dec + +Lxts_dec_done: + cmpldi 8,0 + beq Lxts_dec_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 8,8,8,6 + .long 0x7D004799 + +Lxts_dec_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_encrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,30,23,7 + addi 7,1,64+15 + mtctr 9 + +Load_xts_enc_key: + vperm 24,31,30,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,30,31,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,Load_xts_enc_key + + lvx 26,3,6 + vperm 24,31,30,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,26,31,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,27,26,7 + lvx 29,28,6 + vperm 27,28,27,7 + lvx 30,29,6 + vperm 28,29,28,7 + lvx 31,30,6 + vperm 29,30,29,7 + lvx 22,31,6 + vperm 30,31,30,7 + lvx 24,0,7 + vperm 31,22,31,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 1,1,1,6 + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 2,2,2,6 + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 3,3,3,6 + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 4,4,4,6 + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 5,5,5,6 + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + .long 0x1210C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + .long 0x1210CD08 + lvx 25,3,7 + bc 16,0,Loop_xts_enc6x + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C508 + .long 0x118CC508 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC508 + .long 0x11CEC508 + vsldoi 11,11,11,15 + .long 0x11EFC508 + .long 0x1210C508 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD08 + .long 0x118CCD08 + vxor 8,8,11 + .long 0x11ADCD08 + .long 0x11CECD08 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD08 + .long 0x1210CD08 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x10E7D508 + .long 0x118CD508 + vand 11,11,10 + .long 0x11ADD508 + .long 0x11CED508 + vxor 8,8,11 + .long 0x11EFD508 + .long 0x1210D508 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD08 + .long 0x118CDD08 + vsldoi 11,11,11,15 + .long 0x11ADDD08 + .long 0x11CEDD08 + vand 11,11,10 + .long 0x11EFDD08 + .long 0x1210DD08 + + addi 7,1,64+15 + vxor 8,8,11 + .long 0x10E7E508 + .long 0x118CE508 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE508 + .long 0x11CEE508 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x11EFE508 + .long 0x1210E508 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED08 + .long 0x118CED08 + vxor 8,8,11 + .long 0x11ADED08 + .long 0x11CEED08 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED08 + .long 0x1210ED08 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + .long 0x10E7F508 + .long 0x118CF508 + vand 11,11,10 + .long 0x11ADF508 + .long 0x11CEF508 + vxor 8,8,11 + .long 0x11EFF508 + .long 0x1210F508 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70509 + .long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x118C0D09 + .long 0x7C235699 + .long 0x11AD1509 + vperm 0,0,0,6 + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D09 + vperm 1,1,1,6 + .long 0x7C7B5699 + .long 0x11EF2509 + vperm 2,2,2,6 + .long 0x7C9C5699 + vxor 8,8,11 + .long 0x11702D09 + + vperm 3,3,3,6 + .long 0x7CBD5699 + addi 10,10,0x60 + vperm 4,4,4,6 + vperm 5,5,5,6 + + vperm 7,7,7,6 + vperm 12,12,12,6 + .long 0x7CE02799 + vxor 7,0,17 + vperm 13,13,13,6 + .long 0x7D832799 + vxor 12,1,18 + vperm 14,14,14,6 + .long 0x7DBA2799 + vxor 13,2,19 + vperm 15,15,15,6 + .long 0x7DDB2799 + vxor 14,3,20 + vperm 16,11,11,6 + .long 0x7DFC2799 + vxor 15,4,21 + .long 0x7E1D2799 + + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq Loop_xts_enc6x + + addic. 5,5,0x60 + beq Lxts_enc6x_zero + cmpwi 5,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi 5,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_enc5x + + vperm 7,7,7,6 + vor 17,22,22 + vperm 12,12,12,6 + .long 0x7CE02799 + vperm 13,13,13,6 + .long 0x7D832799 + vperm 14,14,14,6 + .long 0x7DBA2799 + vxor 11,15,22 + vperm 15,15,15,6 + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + vperm 7,7,7,6 + vor 17,21,21 + vperm 12,12,12,6 + .long 0x7CE02799 + vperm 13,13,13,6 + .long 0x7D832799 + vxor 11,14,21 + vperm 14,14,14,6 + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + vperm 7,7,7,6 + vor 17,20,20 + vperm 12,12,12,6 + .long 0x7CE02799 + vxor 11,13,20 + vperm 13,13,13,6 + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + vperm 7,7,7,6 + vor 17,19,19 + vxor 11,12,19 + vperm 12,12,12,6 + .long 0x7CE02799 + .long 0x7D832799 + addi 4,4,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor 7,5,17 + nop +Loop_xts_enc1x: + .long 0x10E7C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + lvx 25,3,7 + bc 16,0,Loop_xts_enc1x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + + subi 10,10,16 + .long 0x10E7CD08 + + lvsr 5,0,31 + .long 0x10E7D508 + + .long 0x7C005699 + .long 0x10E7DD08 + + addi 7,1,64+15 + .long 0x10E7E508 + lvx 24,0,7 + + .long 0x10E7ED08 + lvx 25,3,7 + vxor 17,17,31 + + vperm 0,0,0,6 + .long 0x10E7F508 + + vperm 0,0,0,5 + .long 0x10E78D09 + + vor 17,18,18 + vxor 11,7,18 + vperm 7,7,7,6 + .long 0x7CE02799 + addi 4,4,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi 31,0 + beq Lxts_enc6x_done + + add 10,10,31 + subi 10,10,16 + .long 0x7C005699 + lvsr 5,0,31 + vperm 0,0,0,6 + vperm 0,0,0,5 + vxor 11,11,17 +Lxts_enc6x_steal: + vxor 0,0,17 + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + + subi 30,4,17 + subi 4,4,16 + mtctr 31 +Loop_xts_enc6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,Loop_xts_enc6x_steal + + li 31,0 + mtctr 9 + b Loop_xts_enc1x + +.align 4 +Lxts_enc6x_done: + cmpldi 8,0 + beq Lxts_enc6x_ret + + vxor 8,17,23 + vperm 8,8,8,6 + .long 0x7D004799 + +Lxts_enc6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_enc5x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + lvx 25,3,7 + bc 16,0,_aesp8_xts_enc5x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + + subi 10,10,16 + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + vxor 17,17,31 + + .long 0x10E7D508 + lvsr 5,0,31 + .long 0x118CD508 + .long 0x11ADD508 + .long 0x11CED508 + .long 0x11EFD508 + vxor 1,18,31 + + .long 0x10E7DD08 + .long 0x7C005699 + .long 0x118CDD08 + .long 0x11ADDD08 + .long 0x11CEDD08 + .long 0x11EFDD08 + vxor 2,19,31 + + addi 7,1,64+15 + .long 0x10E7E508 + .long 0x118CE508 + .long 0x11ADE508 + .long 0x11CEE508 + .long 0x11EFE508 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED08 + vperm 0,0,0,6 + .long 0x118CED08 + .long 0x11ADED08 + .long 0x11CEED08 + .long 0x11EFED08 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F508 + vperm 0,0,0,5 + .long 0x118CF508 + .long 0x11ADF508 + .long 0x11CEF508 + .long 0x11EFF508 + + .long 0x10E78D09 + .long 0x118C0D09 + .long 0x11AD1509 + .long 0x11CE1D09 + .long 0x11EF2509 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,30,23,7 + addi 7,1,64+15 + mtctr 9 + +Load_xts_dec_key: + vperm 24,31,30,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,30,31,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,Load_xts_dec_key + + lvx 26,3,6 + vperm 24,31,30,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,26,31,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,27,26,7 + lvx 29,28,6 + vperm 27,28,27,7 + lvx 30,29,6 + vperm 28,29,28,7 + lvx 31,30,6 + vperm 29,30,29,7 + lvx 22,31,6 + vperm 30,31,30,7 + lvx 24,0,7 + vperm 31,22,31,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 1,1,1,6 + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 2,2,2,6 + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 3,3,3,6 + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 4,4,4,6 + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vperm 5,5,5,6 + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + lvx 25,3,7 + bc 16,0,Loop_xts_dec6x + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C548 + .long 0x118CC548 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC548 + .long 0x11CEC548 + vsldoi 11,11,11,15 + .long 0x11EFC548 + .long 0x1210C548 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD48 + .long 0x118CCD48 + vxor 8,8,11 + .long 0x11ADCD48 + .long 0x11CECD48 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD48 + .long 0x1210CD48 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x10E7D548 + .long 0x118CD548 + vand 11,11,10 + .long 0x11ADD548 + .long 0x11CED548 + vxor 8,8,11 + .long 0x11EFD548 + .long 0x1210D548 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD48 + .long 0x118CDD48 + vsldoi 11,11,11,15 + .long 0x11ADDD48 + .long 0x11CEDD48 + vand 11,11,10 + .long 0x11EFDD48 + .long 0x1210DD48 + + addi 7,1,64+15 + vxor 8,8,11 + .long 0x10E7E548 + .long 0x118CE548 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE548 + .long 0x11CEE548 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x11EFE548 + .long 0x1210E548 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED48 + .long 0x118CED48 + vxor 8,8,11 + .long 0x11ADED48 + .long 0x11CEED48 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED48 + .long 0x1210ED48 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + .long 0x10E7F548 + .long 0x118CF548 + vand 11,11,10 + .long 0x11ADF548 + .long 0x11CEF548 + vxor 8,8,11 + .long 0x11EFF548 + .long 0x1210F548 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70549 + .long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x118C0D49 + .long 0x7C235699 + .long 0x11AD1549 + vperm 0,0,0,6 + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D49 + vperm 1,1,1,6 + .long 0x7C7B5699 + .long 0x11EF2549 + vperm 2,2,2,6 + .long 0x7C9C5699 + vxor 8,8,11 + .long 0x12102D49 + vperm 3,3,3,6 + .long 0x7CBD5699 + addi 10,10,0x60 + vperm 4,4,4,6 + vperm 5,5,5,6 + + vperm 7,7,7,6 + vperm 12,12,12,6 + .long 0x7CE02799 + vxor 7,0,17 + vperm 13,13,13,6 + .long 0x7D832799 + vxor 12,1,18 + vperm 14,14,14,6 + .long 0x7DBA2799 + vxor 13,2,19 + vperm 15,15,15,6 + .long 0x7DDB2799 + vxor 14,3,20 + vperm 16,16,16,6 + .long 0x7DFC2799 + vxor 15,4,21 + .long 0x7E1D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq Loop_xts_dec6x + + addic. 5,5,0x60 + beq Lxts_dec6x_zero + cmpwi 5,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi 5,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_dec5x + + vperm 7,7,7,6 + vor 17,22,22 + vxor 18,8,23 + vperm 12,12,12,6 + .long 0x7CE02799 + vxor 7,0,18 + vperm 13,13,13,6 + .long 0x7D832799 + vperm 14,14,14,6 + .long 0x7DBA2799 + vperm 15,15,15,6 + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + vperm 7,7,7,6 + vor 17,21,21 + vor 18,22,22 + vperm 12,12,12,6 + .long 0x7CE02799 + vxor 7,0,22 + vperm 13,13,13,6 + .long 0x7D832799 + vperm 14,14,14,6 + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + vperm 7,7,7,6 + vor 17,20,20 + vor 18,21,21 + vperm 12,12,12,6 + .long 0x7CE02799 + vxor 7,0,21 + vperm 13,13,13,6 + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + vperm 7,7,7,6 + vor 17,19,19 + vor 18,20,20 + vperm 12,12,12,6 + .long 0x7CE02799 + vxor 7,0,20 + .long 0x7D832799 + addi 4,4,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor 7,5,17 + nop +Loop_xts_dec1x: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,Loop_xts_dec1x + + subi 0,31,1 + .long 0x10E7C548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + + sub 10,10,0 + .long 0x10E7D548 + + .long 0x7C005699 + .long 0x10E7DD48 + + addi 7,1,64+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 17,17,31 + + vperm 0,0,0,6 + .long 0x10E7F548 + + mtctr 9 + .long 0x10E78D49 + + vor 17,18,18 + vor 18,19,19 + vperm 7,7,7,6 + .long 0x7CE02799 + addi 4,4,0x10 + vxor 7,0,19 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi 31,0 + beq Lxts_dec6x_done + + .long 0x7C005699 + vperm 0,0,0,6 + vxor 7,0,18 +Lxts_dec6x_steal: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,Lxts_dec6x_steal + + add 10,10,31 + .long 0x10E7C548 + + cmpwi 31,0 + .long 0x10E7CD48 + + .long 0x7C005699 + .long 0x10E7D548 + + lvsr 5,0,31 + .long 0x10E7DD48 + + addi 7,1,64+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 18,18,31 + + vperm 0,0,0,6 + .long 0x10E7F548 + + vperm 0,0,0,5 + .long 0x11679549 + + vperm 7,11,11,6 + .long 0x7CE02799 + + + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + vxor 7,7,17 + + subi 30,4,1 + mtctr 31 +Loop_xts_dec6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,Loop_xts_dec6x_steal + + li 31,0 + mtctr 9 + b Loop_xts_dec1x + +.align 4 +Lxts_dec6x_done: + cmpldi 8,0 + beq Lxts_dec6x_ret + + vxor 8,17,23 + vperm 8,8,8,6 + .long 0x7D004799 + +Lxts_dec6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_dec5x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + lvx 25,3,7 + bc 16,0,_aesp8_xts_dec5x + + subi 0,31,1 + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + vxor 17,17,31 + + sub 10,10,0 + .long 0x10E7D548 + .long 0x118CD548 + .long 0x11ADD548 + .long 0x11CED548 + .long 0x11EFD548 + vxor 1,18,31 + + .long 0x10E7DD48 + .long 0x7C005699 + .long 0x118CDD48 + .long 0x11ADDD48 + .long 0x11CEDD48 + .long 0x11EFDD48 + vxor 2,19,31 + + addi 7,1,64+15 + .long 0x10E7E548 + .long 0x118CE548 + .long 0x11ADE548 + .long 0x11CEE548 + .long 0x11EFE548 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED48 + vperm 0,0,0,6 + .long 0x118CED48 + .long 0x11ADED48 + .long 0x11CEED48 + .long 0x11EFED48 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F548 + .long 0x118CF548 + .long 0x11ADF548 + .long 0x11CEF548 + .long 0x11EFF548 + + .long 0x10E78D49 + .long 0x118C0D49 + .long 0x11AD1549 + .long 0x11CE1D49 + .long 0x11EF2549 + mtctr 9 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 Index: cipher/rijndael-ppc832.S =================================================================== --- /dev/null +++ cipher/rijndael-ppc832.S @@ -0,0 +1,3633 @@ +.machine "any" + +.text + +.align 7 +rcon: +.byte 0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00 +.byte 0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00 +.byte 0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c +.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +Lconsts: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,-0x48 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.byte 65,69,83,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.globl aes_p8_set_encrypt_key +.align 5 +aes_p8_set_encrypt_key: +Lset_encrypt_key: + mflr 11 + stw 11,4(1) + + li 6,-1 + cmplwi 3,0 + beq- Lenc_key_abort + cmplwi 5,0 + beq- Lenc_key_abort + li 6,-2 + cmpwi 4,128 + blt- Lenc_key_abort + cmpwi 4,256 + bgt- Lenc_key_abort + andi. 0,4,0x3f + bne- Lenc_key_abort + + lis 0,0xfff0 + mfspr 12,256 + mtspr 256,0 + + bl Lconsts + mtlr 11 + + neg 9,3 + lvx 1,0,3 + addi 3,3,15 + lvsr 3,0,9 + li 8,0x20 + cmpwi 4,192 + lvx 2,0,3 + + lvx 4,0,6 + + lvx 5,8,6 + addi 6,6,0x10 + vperm 1,1,2,3 + li 7,8 + vxor 0,0,0 + mtctr 7 + + lvsr 8,0,5 + vspltisb 9,-1 + lvx 10,0,5 + vperm 9,0,9,8 + + blt Loop128 + addi 3,3,8 + beq L192 + addi 3,3,8 + b L256 + +.align 4 +Loop128: + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + bc 16,0,Loop128 + + lvx 4,0,6 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + + addi 3,5,15 + addi 5,5,0x50 + + li 8,10 + b Ldone + +.align 4 +L192: + lvx 6,0,3 + li 7,4 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + vspltisb 3,8 + mtctr 7 + vsububm 5,5,3 + +Loop192: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + .long 0x10632509 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + + vsldoi 7,0,2,8 + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vsldoi 7,7,1,8 + + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vsldoi 7,1,2,8 + vxor 1,1,6 + vsldoi 6,0,6,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + stvx 7,0,5 + addi 5,5,16 + + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bc 16,0,Loop192 + + li 8,12 + addi 5,5,0x20 + b Ldone + +.align 4 +L256: + lvx 6,0,3 + li 7,7 + li 8,14 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + mtctr 7 + +Loop256: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,2,2,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bdz Ldone + + vspltw 3,1,3 + vsldoi 6,0,2,12 + .long 0x106305C8 + + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + + vxor 2,2,3 + b Loop256 + +.align 4 +Ldone: + lvx 2,0,3 + vsel 2,10,2,9 + stvx 2,0,3 + li 6,0 + mtspr 256,12 + stw 8,0(5) + +Lenc_key_abort: + mr 3,6 + blr +.long 0 +.byte 0,12,0x14,1,0,0,3,0 +.long 0 + + +.globl aes_p8_set_decrypt_key +.align 5 +aes_p8_set_decrypt_key: + stwu 1,-32(1) + mflr 10 + stw 10,32+4(1) + bl Lset_encrypt_key + mtlr 10 + + cmpwi 3,0 + bne- Ldec_key_abort + + slwi 7,8,4 + subi 3,5,240 + srwi 8,8,1 + add 5,3,7 + mtctr 8 + +Ldeckey: + lwz 0, 0(3) + lwz 6, 4(3) + lwz 7, 8(3) + lwz 8, 12(3) + addi 3,3,16 + lwz 9, 0(5) + lwz 10,4(5) + lwz 11,8(5) + lwz 12,12(5) + stw 0, 0(5) + stw 6, 4(5) + stw 7, 8(5) + stw 8, 12(5) + subi 5,5,16 + stw 9, -16(3) + stw 10,-12(3) + stw 11,-8(3) + stw 12,-4(3) + bc 16,0,Ldeckey + + xor 3,3,3 +Ldec_key_abort: + addi 1,1,32 + blr +.long 0 +.byte 0,12,4,1,0x80,0,3,0 +.long 0 + +.globl aes_p8_encrypt +.align 5 +aes_p8_encrypt: + lwz 6,240(5) + lis 0,0xfc00 + mfspr 12,256 + li 7,15 + mtspr 256,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + + lvsl 3,0,11 + + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsl 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,1,2,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +Loop_enc: + vperm 2,2,1,5 + .long 0x10001508 + lvx 2,7,5 + addi 7,7,16 + vperm 1,1,2,5 + .long 0x10000D08 + lvx 1,7,5 + addi 7,7,16 + bc 16,0,Loop_enc + + vperm 2,2,1,5 + .long 0x10001508 + lvx 2,7,5 + vperm 1,1,2,5 + .long 0x10000D09 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,1,2,3 + + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + +.globl aes_p8_decrypt +.align 5 +aes_p8_decrypt: + lwz 6,240(5) + lis 0,0xfc00 + mfspr 12,256 + li 7,15 + mtspr 256,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + + lvsl 3,0,11 + + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsl 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,1,2,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +Loop_dec: + vperm 2,2,1,5 + .long 0x10001548 + lvx 2,7,5 + addi 7,7,16 + vperm 1,1,2,5 + .long 0x10000D48 + lvx 1,7,5 + addi 7,7,16 + bc 16,0,Loop_dec + + vperm 2,2,1,5 + .long 0x10001548 + lvx 2,7,5 + vperm 1,1,2,5 + .long 0x10000D49 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,1,2,3 + + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + +.globl aes_p8_cbc_encrypt +.align 5 +aes_p8_cbc_encrypt: + cmplwi 5,16 + bclr 14,0 + + cmpwi 8,0 + lis 0,0xffe0 + mfspr 12,256 + mtspr 256,0 + + li 10,15 + vxor 0,0,0 + + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + + vperm 4,4,5,6 + + neg 11,3 + lvsl 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + + + lvsr 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,0,9,8 + + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + beq Lcbc_dec + +Lcbc_enc: + vor 2,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 2,2,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,2,0 + lvx 0,10,6 + addi 10,10,16 + vxor 2,2,4 + +Loop_cbc_enc: + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_cbc_enc + + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + li 10,16 + vperm 0,0,1,10 + .long 0x10820509 + cmplwi 5,16 + + vperm 3,4,4,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + cmplwi 5,128 + bge _aesp8_cbc_decrypt8x + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 3,3,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,3,0 + lvx 0,10,6 + addi 10,10,16 + +Loop_cbc_dec: + vperm 1,1,0,10 + .long 0x10420D48 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420548 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_cbc_dec + + vperm 1,1,0,10 + .long 0x10420D48 + lvx 1,10,6 + li 10,16 + vperm 0,0,1,10 + .long 0x10420549 + cmplwi 5,16 + + vxor 2,2,4 + vor 4,3,3 + vperm 3,2,2,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge Lcbc_dec + +Lcbc_done: + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + neg 8,7 + li 10,15 + vxor 0,0,0 + vspltisb 9,-1 + + lvsl 8,0,8 + vperm 9,0,9,8 + + lvx 7,0,7 + vperm 4,4,4,8 + vsel 2,7,4,9 + lvx 5,10,7 + stvx 2,0,7 + vsel 2,4,5,9 + stvx 2,10,7 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_cbc_decrypt8x: + stwu 1,-392(1) + li 10,175 + li 11,191 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,364(1) + li 8,0x10 + stw 26,368(1) + li 26,0x20 + stw 27,372(1) + li 27,0x30 + stw 28,376(1) + li 28,0x40 + stw 29,380(1) + li 29,0x50 + stw 30,384(1) + li 30,0x60 + stw 31,388(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + subi 5,5,128 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,10 + addi 11,1,32+15 + mtctr 9 + +Load_cbc_dec_key: + vperm 24,30,31,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,31,30,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bc 16,0,Load_cbc_dec_key + + lvx 26,8,6 + vperm 24,30,31,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,31,26,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,32+15 + vperm 26,26,27,10 + lvx 29,28,6 + vperm 27,27,28,10 + lvx 30,29,6 + vperm 28,28,29,10 + lvx 31,30,6 + vperm 29,29,30,10 + lvx 14,31,6 + vperm 30,30,31,10 + lvx 24,0,11 + vperm 31,31,14,10 + lvx 25,8,11 + + + + subi 3,3,15 + + + .long 0x7C001E99 + + + .long 0x7C281E99 + + .long 0x7C5A1E99 + + .long 0x7C7B1E99 + + .long 0x7D5C1E99 + + vxor 14,0,23 + .long 0x7D7D1E99 + + vxor 15,1,23 + .long 0x7D9E1E99 + + vxor 16,2,23 + .long 0x7DBF1E99 + addi 3,3,0x80 + + vxor 17,3,23 + + vxor 18,10,23 + + vxor 19,11,23 + vxor 20,12,23 + vxor 21,13,23 + + mtctr 9 + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bc 16,0,Loop_cbc_dec8x + + subic 5,5,128 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + subfe. 0,0,0 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + and 0,0,5 + .long 0x11CED548 + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + add 3,3,0 + + + + .long 0x11CEDD48 + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + addi 11,1,32+15 + .long 0x11CEE548 + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + lvx 24,0,11 + + .long 0x11CEED48 + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + lvx 25,8,11 + + .long 0x11CEF548 + vxor 4,4,31 + .long 0x11EFF548 + vxor 0,0,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + .long 0x11CE2549 + .long 0x11EF0549 + .long 0x7C001E99 + .long 0x12100D49 + .long 0x7C281E99 + .long 0x12311549 + + .long 0x7C5A1E99 + .long 0x12521D49 + + .long 0x7C7B1E99 + .long 0x12735549 + + .long 0x7D5C1E99 + .long 0x12945D49 + + .long 0x7D7D1E99 + .long 0x12B56549 + + .long 0x7D9E1E99 + vor 4,13,13 + + .long 0x7DBF1E99 + addi 3,3,0x80 + + + + .long 0x7DC02799 + + vxor 14,0,23 + + .long 0x7DE82799 + + vxor 15,1,23 + + .long 0x7E1A2799 + vxor 16,2,23 + + .long 0x7E3B2799 + vxor 17,3,23 + + .long 0x7E5C2799 + vxor 18,10,23 + + .long 0x7E7D2799 + vxor 19,11,23 + + .long 0x7E9E2799 + vxor 20,12,23 + .long 0x7EBF2799 + addi 4,4,0x80 + vxor 21,13,23 + + mtctr 9 + beq Loop_cbc_dec8x + + addic. 5,5,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bc 16,0,Loop_cbc_dec8x_tail + + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + + .long 0x11EFF548 + vxor 4,4,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + cmplwi 5,32 + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi 5,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi 5,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + .long 0x11EF2549 + .long 0x12100D49 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + .long 0x12102549 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E002799 + + .long 0x7E282799 + + .long 0x7E5A2799 + + .long 0x7E7B2799 + + .long 0x7E9C2799 + .long 0x7EBD2799 + addi 4,4,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + .long 0x12312549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E202799 + + .long 0x7E482799 + + .long 0x7E7A2799 + + .long 0x7E9B2799 + .long 0x7EBC2799 + addi 4,4,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + .long 0x12522549 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E402799 + + .long 0x7E682799 + + .long 0x7E9A2799 + .long 0x7EBB2799 + addi 4,4,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + .long 0x12732549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E602799 + + .long 0x7E882799 + .long 0x7EBA2799 + addi 4,4,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + .long 0x12942549 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E802799 + .long 0x7EA82799 + addi 4,4,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + .long 0x12B52549 + vor 4,13,13 + + + .long 0x7EA02799 + addi 4,4,0x10 + +Lcbc_dec8x_done: + + .long 0x7C803F99 + + li 10,47 + li 11,63 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + lwz 26,368(1) + lwz 27,372(1) + lwz 28,376(1) + lwz 29,380(1) + lwz 30,384(1) + lwz 31,388(1) + addi 1,1,392 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.globl aes_p8_ctr32_encrypt_blocks +.align 5 +aes_p8_ctr32_encrypt_blocks: + cmplwi 5,1 + bclr 14,0 + + lis 0,0xfff0 + mfspr 12,256 + mtspr 256,0 + + li 10,15 + vxor 0,0,0 + + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + vspltisb 11,1 + + vperm 4,4,5,6 + vsldoi 11,0,11,1 + + neg 11,3 + lvsl 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + + cmplwi 5,8 + bge _aesp8_ctr32_encrypt8x + + lvsr 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,0,9,8 + + + lvx 0,0,6 + mtctr 9 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_ctr32_enc + + vadduwm 4,4,11 + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + subic. 5,5,1 + + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + vperm 3,3,5,6 + li 10,16 + vperm 1,0,1,10 + lvx 0,0,6 + vxor 3,3,1 + .long 0x10421D09 + + lvx 1,10,6 + addi 10,10,16 + vperm 2,2,2,8 + vsel 3,7,2,9 + mtctr 9 + vperm 0,0,1,10 + vor 7,2,2 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + stvx 3,0,4 + addi 4,4,16 + bne Loop_ctr32_enc + + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_ctr32_encrypt8x: + stwu 1,-392(1) + li 10,175 + li 11,191 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,364(1) + li 8,0x10 + stw 26,368(1) + li 26,0x20 + stw 27,372(1) + li 27,0x30 + stw 28,376(1) + li 28,0x40 + stw 29,380(1) + li 29,0x50 + stw 30,384(1) + li 30,0x60 + stw 31,388(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,10 + addi 11,1,32+15 + mtctr 9 + +Load_ctr32_enc_key: + vperm 24,30,31,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,31,30,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bc 16,0,Load_ctr32_enc_key + + lvx 26,8,6 + vperm 24,30,31,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,31,26,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,32+15 + vperm 26,26,27,10 + lvx 29,28,6 + vperm 27,27,28,10 + lvx 30,29,6 + vperm 28,28,29,10 + lvx 31,30,6 + vperm 29,29,30,10 + lvx 15,31,6 + vperm 30,30,31,10 + lvx 24,0,11 + vperm 31,31,15,10 + lvx 25,8,11 + + vadduwm 7,11,11 + subi 3,3,15 + slwi 5,5,4 + + vadduwm 16,4,11 + vadduwm 17,4,7 + vxor 15,4,23 + + vadduwm 18,16,7 + vxor 16,16,23 + + vadduwm 19,17,7 + vxor 17,17,23 + + vadduwm 20,18,7 + vxor 18,18,23 + + vadduwm 21,19,7 + vxor 19,19,23 + vadduwm 22,20,7 + vxor 20,20,23 + vadduwm 4,21,7 + vxor 21,21,23 + vxor 22,22,23 + + mtctr 9 + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 +Loop_ctr32_enc8x_middle: + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + lvx 25,8,11 + bc 16,0,Loop_ctr32_enc8x + + subic 11,5,256 + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + subfe 0,0,0 + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + and 0,0,11 + addi 11,1,32+15 + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + lvx 24,0,11 + + subic 5,5,129 + .long 0x11EFDD08 + addi 5,5,1 + .long 0x1210DD08 + .long 0x1231DD08 + .long 0x1252DD08 + .long 0x1273DD08 + .long 0x1294DD08 + .long 0x12B5DD08 + .long 0x12D6DD08 + lvx 25,8,11 + + .long 0x11EFE508 + .long 0x7C001E99 + .long 0x1210E508 + .long 0x7C281E99 + .long 0x1231E508 + .long 0x7C5A1E99 + .long 0x1252E508 + .long 0x7C7B1E99 + .long 0x1273E508 + .long 0x7D5C1E99 + .long 0x1294E508 + .long 0x7D9D1E99 + .long 0x12B5E508 + .long 0x7DBE1E99 + .long 0x12D6E508 + .long 0x7DDF1E99 + addi 3,3,0x80 + + .long 0x11EFED08 + + .long 0x1210ED08 + + .long 0x1231ED08 + + .long 0x1252ED08 + + .long 0x1273ED08 + + .long 0x1294ED08 + + .long 0x12B5ED08 + + .long 0x12D6ED08 + + + add 3,3,0 + + + + subfe. 0,0,0 + .long 0x11EFF508 + vxor 0,0,31 + .long 0x1210F508 + vxor 1,1,31 + .long 0x1231F508 + vxor 2,2,31 + .long 0x1252F508 + vxor 3,3,31 + .long 0x1273F508 + vxor 10,10,31 + .long 0x1294F508 + vxor 12,12,31 + .long 0x12B5F508 + vxor 13,13,31 + .long 0x12D6F508 + vxor 14,14,31 + + bne Lctr32_enc8x_break + + .long 0x100F0509 + .long 0x10300D09 + vadduwm 16,4,11 + .long 0x10511509 + vadduwm 17,4,7 + vxor 15,4,23 + .long 0x10721D09 + vadduwm 18,16,7 + vxor 16,16,23 + .long 0x11535509 + vadduwm 19,17,7 + vxor 17,17,23 + .long 0x11946509 + vadduwm 20,18,7 + vxor 18,18,23 + .long 0x11B56D09 + vadduwm 21,19,7 + vxor 19,19,23 + .long 0x11D67509 + vadduwm 22,20,7 + vxor 20,20,23 + + vadduwm 4,21,7 + vxor 21,21,23 + + vxor 22,22,23 + mtctr 9 + + .long 0x11EFC508 + .long 0x7C002799 + + .long 0x1210C508 + .long 0x7C282799 + + .long 0x1231C508 + .long 0x7C5A2799 + + .long 0x1252C508 + .long 0x7C7B2799 + + .long 0x1273C508 + .long 0x7D5C2799 + + .long 0x1294C508 + .long 0x7D9D2799 + + .long 0x12B5C508 + .long 0x7DBE2799 + .long 0x12D6C508 + .long 0x7DDF2799 + addi 4,4,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi 5,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi 5,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi 5,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi 5,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + .long 0x11EF0509 + .long 0x12100D09 + .long 0x12311509 + .long 0x12521D09 + .long 0x12735509 + .long 0x12946509 + .long 0x12B56D09 + .long 0x12D67509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + + .long 0x7EBE2799 + .long 0x7EDF2799 + addi 4,4,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + .long 0x11EF0D09 + .long 0x12101509 + .long 0x12311D09 + .long 0x12525509 + .long 0x12736509 + .long 0x12946D09 + .long 0x12B57509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + .long 0x11EF1509 + .long 0x12101D09 + .long 0x12315509 + .long 0x12526509 + .long 0x12736D09 + .long 0x12947509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + .long 0x7E9D2799 + addi 4,4,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + .long 0x11EF1D09 + .long 0x12105509 + .long 0x12316509 + .long 0x12526D09 + .long 0x12737509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + .long 0x7E7C2799 + addi 4,4,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + .long 0x11EF5509 + .long 0x12106509 + .long 0x12316D09 + .long 0x12527509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + .long 0x7E5B2799 + addi 4,4,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + .long 0x11EF6509 + .long 0x12106D09 + .long 0x12317509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + .long 0x7E3A2799 + addi 4,4,0x30 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_two: + .long 0x11EF6D09 + .long 0x12107509 + + + + .long 0x7DE02799 + .long 0x7E082799 + addi 4,4,0x20 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_one: + .long 0x11EF7509 + + + .long 0x7DE02799 + addi 4,4,0x10 + +Lctr32_enc8x_done: + li 10,47 + li 11,63 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + lwz 26,368(1) + lwz 27,372(1) + lwz 28,376(1) + lwz 29,380(1) + lwz 30,384(1) + lwz 31,388(1) + addi 1,1,392 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.globl aes_p8_xts_encrypt +.align 5 +aes_p8_xts_encrypt: + mr 10,3 + li 3,-1 + cmplwi 5,16 + bclr 14,0 + + lis 0,0xfff0 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmplwi 7,0 + beq Lxts_enc_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +Ltweak_xts_enc: + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,Ltweak_xts_enc + + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 + .long 0x11080509 + + li 8,0 + b Lxts_enc + +Lxts_enc_no_key2: + li 3,-16 + and 5,5,3 + + +Lxts_enc: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmplwi 5,96 + bge _aesp8_xts_encrypt6x + + andi. 7,5,15 + subic 0,5,32 + subi 7,7,16 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + vperm 1,1,0,7 + .long 0x10420D08 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420508 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_enc + + vperm 1,1,0,7 + .long 0x10420D08 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 + .long 0x10620509 + + + nop + + .long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq Lxts_enc_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + subic 0,5,32 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 3,3,0 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmplwi 5,16 + bge Loop_xts_enc + + vxor 3,3,8 + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + subi 11,4,17 + subi 4,4,16 + mtctr 5 + li 5,16 +Loop_xts_enc_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,Loop_xts_enc_steal + + mtctr 9 + b Loop_xts_enc + +Lxts_enc_done: + cmplwi 8,0 + beq Lxts_enc_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + + .long 0x7D004799 + +Lxts_enc_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + + +.globl aes_p8_xts_decrypt +.align 5 +aes_p8_xts_decrypt: + mr 10,3 + li 3,-1 + cmplwi 5,16 + bclr 14,0 + + lis 0,0xfff8 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + andi. 0,5,15 + neg 0,0 + andi. 0,0,16 + sub 5,5,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmplwi 7,0 + beq Lxts_dec_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +Ltweak_xts_dec: + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,Ltweak_xts_dec + + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 + .long 0x11080509 + + li 8,0 + b Lxts_dec + +Lxts_dec_no_key2: + neg 3,5 + andi. 3,3,15 + add 5,5,3 + + +Lxts_dec: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmplwi 5,96 + bge _aesp8_xts_decrypt6x + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + + cmplwi 5,16 + blt Ltail_xts_dec + b Loop_xts_dec + +.align 5 +Loop_xts_dec: + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_dec + + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 + .long 0x10620549 + + + nop + + .long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq Lxts_dec_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmplwi 5,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab 11,8,9 + vaddubm 12,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 12,12,11 + + subi 10,10,16 + add 10,10,5 + + vxor 2,2,8 + vxor 2,2,12 + +Loop_xts_dec_short: + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_dec_short + + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,12 + .long 0x10620549 + + + nop + + .long 0x7C602799 + + vor 2,4,4 + lvx 4,0,10 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + vxor 0,0,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + subi 11,4,1 + mtctr 5 + li 5,16 +Loop_xts_dec_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,Loop_xts_dec_steal + + mtctr 9 + b Loop_xts_dec + +Lxts_dec_done: + cmplwi 8,0 + beq Lxts_dec_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + + .long 0x7D004799 + +Lxts_dec_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_encrypt6x: + stwu 1,-392(1) + mflr 11 + li 7,175 + li 3,191 + stw 11,396(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,364(1) + li 3,0x10 + stw 26,368(1) + li 26,0x20 + stw 27,372(1) + li 27,0x30 + stw 28,376(1) + li 28,0x40 + stw 29,380(1) + li 29,0x50 + stw 30,384(1) + li 30,0x60 + stw 31,388(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,32+15 + mtctr 9 + +Load_xts_enc_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,Load_xts_enc_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,32+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + .long 0x1210C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + .long 0x1210CD08 + lvx 25,3,7 + bc 16,0,Loop_xts_enc6x + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C508 + .long 0x118CC508 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC508 + .long 0x11CEC508 + vsldoi 11,11,11,15 + .long 0x11EFC508 + .long 0x1210C508 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD08 + .long 0x118CCD08 + vxor 8,8,11 + .long 0x11ADCD08 + .long 0x11CECD08 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD08 + .long 0x1210CD08 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x10E7D508 + .long 0x118CD508 + vand 11,11,10 + .long 0x11ADD508 + .long 0x11CED508 + vxor 8,8,11 + .long 0x11EFD508 + .long 0x1210D508 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD08 + .long 0x118CDD08 + vsldoi 11,11,11,15 + .long 0x11ADDD08 + .long 0x11CEDD08 + vand 11,11,10 + .long 0x11EFDD08 + .long 0x1210DD08 + + addi 7,1,32+15 + vxor 8,8,11 + .long 0x10E7E508 + .long 0x118CE508 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE508 + .long 0x11CEE508 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x11EFE508 + .long 0x1210E508 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED08 + .long 0x118CED08 + vxor 8,8,11 + .long 0x11ADED08 + .long 0x11CEED08 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED08 + .long 0x1210ED08 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + .long 0x10E7F508 + .long 0x118CF508 + vand 11,11,10 + .long 0x11ADF508 + .long 0x11CEF508 + vxor 8,8,11 + .long 0x11EFF508 + .long 0x1210F508 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70509 + .long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x118C0D09 + .long 0x7C235699 + .long 0x11AD1509 + + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D09 + + .long 0x7C7B5699 + .long 0x11EF2509 + + .long 0x7C9C5699 + vxor 8,8,11 + .long 0x11702D09 + + + .long 0x7CBD5699 + addi 10,10,0x60 + + + + + + .long 0x7CE02799 + vxor 7,0,17 + + .long 0x7D832799 + vxor 12,1,18 + + .long 0x7DBA2799 + vxor 13,2,19 + + .long 0x7DDB2799 + vxor 14,3,20 + + .long 0x7DFC2799 + vxor 15,4,21 + + .long 0x7D7D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq Loop_xts_enc6x + + addic. 5,5,0x60 + beq Lxts_enc6x_zero + cmpwi 5,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi 5,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_enc5x + + + vor 17,22,22 + + .long 0x7CE02799 + + .long 0x7D832799 + + .long 0x7DBA2799 + vxor 11,15,22 + + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,21,21 + + .long 0x7CE02799 + + .long 0x7D832799 + vxor 11,14,21 + + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,20,20 + + .long 0x7CE02799 + vxor 11,13,20 + + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,19,19 + vxor 11,12,19 + + .long 0x7CE02799 + .long 0x7D832799 + addi 4,4,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor 7,5,17 + nop +Loop_xts_enc1x: + .long 0x10E7C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + lvx 25,3,7 + bc 16,0,Loop_xts_enc1x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + + subi 10,10,16 + .long 0x10E7CD08 + + lvsr 5,0,31 + .long 0x10E7D508 + + .long 0x7C005699 + .long 0x10E7DD08 + + addi 7,1,32+15 + .long 0x10E7E508 + lvx 24,0,7 + + .long 0x10E7ED08 + lvx 25,3,7 + vxor 17,17,31 + + + .long 0x10E7F508 + + vperm 0,0,0,5 + .long 0x10E78D09 + + vor 17,18,18 + vxor 11,7,18 + + .long 0x7CE02799 + addi 4,4,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi 31,0 + beq Lxts_enc6x_done + + add 10,10,31 + subi 10,10,16 + .long 0x7C005699 + lvsr 5,0,31 + + vperm 0,0,0,5 + vxor 11,11,17 +Lxts_enc6x_steal: + vxor 0,0,17 + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + + subi 30,4,17 + subi 4,4,16 + mtctr 31 +Loop_xts_enc6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,Loop_xts_enc6x_steal + + li 31,0 + mtctr 9 + b Loop_xts_enc1x + +.align 4 +Lxts_enc6x_done: + cmplwi 8,0 + beq Lxts_enc6x_ret + + vxor 8,17,23 + + .long 0x7D004799 + +Lxts_enc6x_ret: + mtlr 11 + li 10,47 + li 11,63 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + lwz 26,368(1) + lwz 27,372(1) + lwz 28,376(1) + lwz 29,380(1) + lwz 30,384(1) + lwz 31,388(1) + addi 1,1,392 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_enc5x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + lvx 25,3,7 + bc 16,0,_aesp8_xts_enc5x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + + subi 10,10,16 + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + vxor 17,17,31 + + .long 0x10E7D508 + lvsr 5,0,31 + .long 0x118CD508 + .long 0x11ADD508 + .long 0x11CED508 + .long 0x11EFD508 + vxor 1,18,31 + + .long 0x10E7DD08 + .long 0x7C005699 + .long 0x118CDD08 + .long 0x11ADDD08 + .long 0x11CEDD08 + .long 0x11EFDD08 + vxor 2,19,31 + + addi 7,1,32+15 + .long 0x10E7E508 + .long 0x118CE508 + .long 0x11ADE508 + .long 0x11CEE508 + .long 0x11EFE508 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED08 + + .long 0x118CED08 + .long 0x11ADED08 + .long 0x11CEED08 + .long 0x11EFED08 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F508 + vperm 0,0,0,5 + .long 0x118CF508 + .long 0x11ADF508 + .long 0x11CEF508 + .long 0x11EFF508 + + .long 0x10E78D09 + .long 0x118C0D09 + .long 0x11AD1509 + .long 0x11CE1D09 + .long 0x11EF2509 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + stwu 1,-392(1) + mflr 11 + li 7,175 + li 3,191 + stw 11,396(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,364(1) + li 3,0x10 + stw 26,368(1) + li 26,0x20 + stw 27,372(1) + li 27,0x30 + stw 28,376(1) + li 28,0x40 + stw 29,380(1) + li 29,0x50 + stw 30,384(1) + li 30,0x60 + stw 31,388(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,32+15 + mtctr 9 + +Load_xts_dec_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,Load_xts_dec_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,32+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + lvx 25,3,7 + bc 16,0,Loop_xts_dec6x + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C548 + .long 0x118CC548 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC548 + .long 0x11CEC548 + vsldoi 11,11,11,15 + .long 0x11EFC548 + .long 0x1210C548 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD48 + .long 0x118CCD48 + vxor 8,8,11 + .long 0x11ADCD48 + .long 0x11CECD48 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD48 + .long 0x1210CD48 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x10E7D548 + .long 0x118CD548 + vand 11,11,10 + .long 0x11ADD548 + .long 0x11CED548 + vxor 8,8,11 + .long 0x11EFD548 + .long 0x1210D548 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD48 + .long 0x118CDD48 + vsldoi 11,11,11,15 + .long 0x11ADDD48 + .long 0x11CEDD48 + vand 11,11,10 + .long 0x11EFDD48 + .long 0x1210DD48 + + addi 7,1,32+15 + vxor 8,8,11 + .long 0x10E7E548 + .long 0x118CE548 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE548 + .long 0x11CEE548 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x11EFE548 + .long 0x1210E548 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED48 + .long 0x118CED48 + vxor 8,8,11 + .long 0x11ADED48 + .long 0x11CEED48 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED48 + .long 0x1210ED48 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + .long 0x10E7F548 + .long 0x118CF548 + vand 11,11,10 + .long 0x11ADF548 + .long 0x11CEF548 + vxor 8,8,11 + .long 0x11EFF548 + .long 0x1210F548 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70549 + .long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x118C0D49 + .long 0x7C235699 + .long 0x11AD1549 + + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D49 + + .long 0x7C7B5699 + .long 0x11EF2549 + + .long 0x7C9C5699 + vxor 8,8,11 + .long 0x12102D49 + + .long 0x7CBD5699 + addi 10,10,0x60 + + + + + + .long 0x7CE02799 + vxor 7,0,17 + + .long 0x7D832799 + vxor 12,1,18 + + .long 0x7DBA2799 + vxor 13,2,19 + + .long 0x7DDB2799 + vxor 14,3,20 + + .long 0x7DFC2799 + vxor 15,4,21 + .long 0x7E1D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq Loop_xts_dec6x + + addic. 5,5,0x60 + beq Lxts_dec6x_zero + cmpwi 5,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi 5,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_dec5x + + + vor 17,22,22 + vxor 18,8,23 + + .long 0x7CE02799 + vxor 7,0,18 + + .long 0x7D832799 + + .long 0x7DBA2799 + + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,21,21 + vor 18,22,22 + + .long 0x7CE02799 + vxor 7,0,22 + + .long 0x7D832799 + + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,20,20 + vor 18,21,21 + + .long 0x7CE02799 + vxor 7,0,21 + + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,19,19 + vor 18,20,20 + + .long 0x7CE02799 + vxor 7,0,20 + .long 0x7D832799 + addi 4,4,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor 7,5,17 + nop +Loop_xts_dec1x: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,Loop_xts_dec1x + + subi 0,31,1 + .long 0x10E7C548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + + sub 10,10,0 + .long 0x10E7D548 + + .long 0x7C005699 + .long 0x10E7DD48 + + addi 7,1,32+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 17,17,31 + + + .long 0x10E7F548 + + mtctr 9 + .long 0x10E78D49 + + vor 17,18,18 + vor 18,19,19 + + .long 0x7CE02799 + addi 4,4,0x10 + vxor 7,0,19 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi 31,0 + beq Lxts_dec6x_done + + .long 0x7C005699 + + vxor 7,0,18 +Lxts_dec6x_steal: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,Lxts_dec6x_steal + + add 10,10,31 + .long 0x10E7C548 + + cmpwi 31,0 + .long 0x10E7CD48 + + .long 0x7C005699 + .long 0x10E7D548 + + lvsr 5,0,31 + .long 0x10E7DD48 + + addi 7,1,32+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 18,18,31 + + + .long 0x10E7F548 + + vperm 0,0,0,5 + .long 0x11679549 + + + + .long 0x7D602799 + + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + vxor 7,7,17 + + subi 30,4,1 + mtctr 31 +Loop_xts_dec6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,Loop_xts_dec6x_steal + + li 31,0 + mtctr 9 + b Loop_xts_dec1x + +.align 4 +Lxts_dec6x_done: + cmplwi 8,0 + beq Lxts_dec6x_ret + + vxor 8,17,23 + + .long 0x7D004799 + +Lxts_dec6x_ret: + mtlr 11 + li 10,47 + li 11,63 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + lwz 26,368(1) + lwz 27,372(1) + lwz 28,376(1) + lwz 29,380(1) + lwz 30,384(1) + lwz 31,388(1) + addi 1,1,392 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_dec5x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + lvx 25,3,7 + bc 16,0,_aesp8_xts_dec5x + + subi 0,31,1 + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + vxor 17,17,31 + + sub 10,10,0 + .long 0x10E7D548 + .long 0x118CD548 + .long 0x11ADD548 + .long 0x11CED548 + .long 0x11EFD548 + vxor 1,18,31 + + .long 0x10E7DD48 + .long 0x7C005699 + .long 0x118CDD48 + .long 0x11ADDD48 + .long 0x11CEDD48 + .long 0x11EFDD48 + vxor 2,19,31 + + addi 7,1,32+15 + .long 0x10E7E548 + .long 0x118CE548 + .long 0x11ADE548 + .long 0x11CEE548 + .long 0x11EFE548 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED48 + + .long 0x118CED48 + .long 0x11ADED48 + .long 0x11CEED48 + .long 0x11EFED48 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F548 + .long 0x118CF548 + .long 0x11ADF548 + .long 0x11CEF548 + .long 0x11EFF548 + + .long 0x10E78D49 + .long 0x118C0D49 + .long 0x11AD1549 + .long 0x11CE1D49 + .long 0x11EF2549 + mtctr 9 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 Index: cipher/rijndael-ppc8be.S =================================================================== --- /dev/null +++ cipher/rijndael-ppc8be.S @@ -0,0 +1,3633 @@ +.machine "any" + +.text + +.align 7 +rcon: +.byte 0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00 +.byte 0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00 +.byte 0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c +.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +Lconsts: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,-0x48 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.byte 65,69,83,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.globl aes_p8_set_encrypt_key +.align 5 +aes_p8_set_encrypt_key: +Lset_encrypt_key: + mflr 11 + std 11,16(1) + + li 6,-1 + cmpldi 3,0 + beq- Lenc_key_abort + cmpldi 5,0 + beq- Lenc_key_abort + li 6,-2 + cmpwi 4,128 + blt- Lenc_key_abort + cmpwi 4,256 + bgt- Lenc_key_abort + andi. 0,4,0x3f + bne- Lenc_key_abort + + lis 0,0xfff0 + mfspr 12,256 + mtspr 256,0 + + bl Lconsts + mtlr 11 + + neg 9,3 + lvx 1,0,3 + addi 3,3,15 + lvsr 3,0,9 + li 8,0x20 + cmpwi 4,192 + lvx 2,0,3 + + lvx 4,0,6 + + lvx 5,8,6 + addi 6,6,0x10 + vperm 1,1,2,3 + li 7,8 + vxor 0,0,0 + mtctr 7 + + lvsr 8,0,5 + vspltisb 9,-1 + lvx 10,0,5 + vperm 9,0,9,8 + + blt Loop128 + addi 3,3,8 + beq L192 + addi 3,3,8 + b L256 + +.align 4 +Loop128: + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + bc 16,0,Loop128 + + lvx 4,0,6 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + + addi 3,5,15 + addi 5,5,0x50 + + li 8,10 + b Ldone + +.align 4 +L192: + lvx 6,0,3 + li 7,4 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + vspltisb 3,8 + mtctr 7 + vsububm 5,5,3 + +Loop192: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + .long 0x10632509 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + + vsldoi 7,0,2,8 + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vsldoi 7,7,1,8 + + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vsldoi 7,1,2,8 + vxor 1,1,6 + vsldoi 6,0,6,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + stvx 7,0,5 + addi 5,5,16 + + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bc 16,0,Loop192 + + li 8,12 + addi 5,5,0x20 + b Ldone + +.align 4 +L256: + lvx 6,0,3 + li 7,7 + li 8,14 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + mtctr 7 + +Loop256: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,2,2,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bdz Ldone + + vspltw 3,1,3 + vsldoi 6,0,2,12 + .long 0x106305C8 + + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + + vxor 2,2,3 + b Loop256 + +.align 4 +Ldone: + lvx 2,0,3 + vsel 2,10,2,9 + stvx 2,0,3 + li 6,0 + mtspr 256,12 + stw 8,0(5) + +Lenc_key_abort: + mr 3,6 + blr +.long 0 +.byte 0,12,0x14,1,0,0,3,0 +.long 0 + + +.globl aes_p8_set_decrypt_key +.align 5 +aes_p8_set_decrypt_key: + stdu 1,-64(1) + mflr 10 + std 10,64+16(1) + bl Lset_encrypt_key + mtlr 10 + + cmpwi 3,0 + bne- Ldec_key_abort + + slwi 7,8,4 + subi 3,5,240 + srwi 8,8,1 + add 5,3,7 + mtctr 8 + +Ldeckey: + lwz 0, 0(3) + lwz 6, 4(3) + lwz 7, 8(3) + lwz 8, 12(3) + addi 3,3,16 + lwz 9, 0(5) + lwz 10,4(5) + lwz 11,8(5) + lwz 12,12(5) + stw 0, 0(5) + stw 6, 4(5) + stw 7, 8(5) + stw 8, 12(5) + subi 5,5,16 + stw 9, -16(3) + stw 10,-12(3) + stw 11,-8(3) + stw 12,-4(3) + bc 16,0,Ldeckey + + xor 3,3,3 +Ldec_key_abort: + addi 1,1,64 + blr +.long 0 +.byte 0,12,4,1,0x80,0,3,0 +.long 0 + +.globl aes_p8_encrypt +.align 5 +aes_p8_encrypt: + lwz 6,240(5) + lis 0,0xfc00 + mfspr 12,256 + li 7,15 + mtspr 256,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + + lvsl 3,0,11 + + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsl 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,1,2,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +Loop_enc: + vperm 2,2,1,5 + .long 0x10001508 + lvx 2,7,5 + addi 7,7,16 + vperm 1,1,2,5 + .long 0x10000D08 + lvx 1,7,5 + addi 7,7,16 + bc 16,0,Loop_enc + + vperm 2,2,1,5 + .long 0x10001508 + lvx 2,7,5 + vperm 1,1,2,5 + .long 0x10000D09 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,1,2,3 + + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + +.globl aes_p8_decrypt +.align 5 +aes_p8_decrypt: + lwz 6,240(5) + lis 0,0xfc00 + mfspr 12,256 + li 7,15 + mtspr 256,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + + lvsl 3,0,11 + + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsl 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,1,2,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +Loop_dec: + vperm 2,2,1,5 + .long 0x10001548 + lvx 2,7,5 + addi 7,7,16 + vperm 1,1,2,5 + .long 0x10000D48 + lvx 1,7,5 + addi 7,7,16 + bc 16,0,Loop_dec + + vperm 2,2,1,5 + .long 0x10001548 + lvx 2,7,5 + vperm 1,1,2,5 + .long 0x10000D49 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,1,2,3 + + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 + +.globl aes_p8_cbc_encrypt +.align 5 +aes_p8_cbc_encrypt: + cmpldi 5,16 + bclr 14,0 + + cmpwi 8,0 + lis 0,0xffe0 + mfspr 12,256 + mtspr 256,0 + + li 10,15 + vxor 0,0,0 + + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + + vperm 4,4,5,6 + + neg 11,3 + lvsl 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + + + lvsr 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,0,9,8 + + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + beq Lcbc_dec + +Lcbc_enc: + vor 2,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 2,2,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,2,0 + lvx 0,10,6 + addi 10,10,16 + vxor 2,2,4 + +Loop_cbc_enc: + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_cbc_enc + + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + li 10,16 + vperm 0,0,1,10 + .long 0x10820509 + cmpldi 5,16 + + vperm 3,4,4,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + cmpldi 5,128 + bge _aesp8_cbc_decrypt8x + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 3,3,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,3,0 + lvx 0,10,6 + addi 10,10,16 + +Loop_cbc_dec: + vperm 1,1,0,10 + .long 0x10420D48 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420548 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_cbc_dec + + vperm 1,1,0,10 + .long 0x10420D48 + lvx 1,10,6 + li 10,16 + vperm 0,0,1,10 + .long 0x10420549 + cmpldi 5,16 + + vxor 2,2,4 + vor 4,3,3 + vperm 3,2,2,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge Lcbc_dec + +Lcbc_done: + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + neg 8,7 + li 10,15 + vxor 0,0,0 + vspltisb 9,-1 + + lvsl 8,0,8 + vperm 9,0,9,8 + + lvx 7,0,7 + vperm 4,4,4,8 + vsel 2,7,4,9 + lvx 5,10,7 + stvx 2,0,7 + vsel 2,4,5,9 + stvx 2,10,7 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_cbc_decrypt8x: + stdu 1,-448(1) + li 10,207 + li 11,223 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,396(1) + li 8,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + subi 5,5,128 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,10 + addi 11,1,64+15 + mtctr 9 + +Load_cbc_dec_key: + vperm 24,30,31,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,31,30,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bc 16,0,Load_cbc_dec_key + + lvx 26,8,6 + vperm 24,30,31,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,31,26,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,64+15 + vperm 26,26,27,10 + lvx 29,28,6 + vperm 27,27,28,10 + lvx 30,29,6 + vperm 28,28,29,10 + lvx 31,30,6 + vperm 29,29,30,10 + lvx 14,31,6 + vperm 30,30,31,10 + lvx 24,0,11 + vperm 31,31,14,10 + lvx 25,8,11 + + + + subi 3,3,15 + + + .long 0x7C001E99 + + + .long 0x7C281E99 + + .long 0x7C5A1E99 + + .long 0x7C7B1E99 + + .long 0x7D5C1E99 + + vxor 14,0,23 + .long 0x7D7D1E99 + + vxor 15,1,23 + .long 0x7D9E1E99 + + vxor 16,2,23 + .long 0x7DBF1E99 + addi 3,3,0x80 + + vxor 17,3,23 + + vxor 18,10,23 + + vxor 19,11,23 + vxor 20,12,23 + vxor 21,13,23 + + mtctr 9 + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bc 16,0,Loop_cbc_dec8x + + subic 5,5,128 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + subfe. 0,0,0 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + and 0,0,5 + .long 0x11CED548 + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + add 3,3,0 + + + + .long 0x11CEDD48 + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + addi 11,1,64+15 + .long 0x11CEE548 + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + lvx 24,0,11 + + .long 0x11CEED48 + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + lvx 25,8,11 + + .long 0x11CEF548 + vxor 4,4,31 + .long 0x11EFF548 + vxor 0,0,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + .long 0x11CE2549 + .long 0x11EF0549 + .long 0x7C001E99 + .long 0x12100D49 + .long 0x7C281E99 + .long 0x12311549 + + .long 0x7C5A1E99 + .long 0x12521D49 + + .long 0x7C7B1E99 + .long 0x12735549 + + .long 0x7D5C1E99 + .long 0x12945D49 + + .long 0x7D7D1E99 + .long 0x12B56549 + + .long 0x7D9E1E99 + vor 4,13,13 + + .long 0x7DBF1E99 + addi 3,3,0x80 + + + + .long 0x7DC02799 + + vxor 14,0,23 + + .long 0x7DE82799 + + vxor 15,1,23 + + .long 0x7E1A2799 + vxor 16,2,23 + + .long 0x7E3B2799 + vxor 17,3,23 + + .long 0x7E5C2799 + vxor 18,10,23 + + .long 0x7E7D2799 + vxor 19,11,23 + + .long 0x7E9E2799 + vxor 20,12,23 + .long 0x7EBF2799 + addi 4,4,0x80 + vxor 21,13,23 + + mtctr 9 + beq Loop_cbc_dec8x + + addic. 5,5,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bc 16,0,Loop_cbc_dec8x_tail + + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + + .long 0x11EFF548 + vxor 4,4,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + cmplwi 5,32 + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi 5,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi 5,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + .long 0x11EF2549 + .long 0x12100D49 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + .long 0x12102549 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E002799 + + .long 0x7E282799 + + .long 0x7E5A2799 + + .long 0x7E7B2799 + + .long 0x7E9C2799 + .long 0x7EBD2799 + addi 4,4,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + .long 0x12312549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E202799 + + .long 0x7E482799 + + .long 0x7E7A2799 + + .long 0x7E9B2799 + .long 0x7EBC2799 + addi 4,4,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + .long 0x12522549 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E402799 + + .long 0x7E682799 + + .long 0x7E9A2799 + .long 0x7EBB2799 + addi 4,4,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + .long 0x12732549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E602799 + + .long 0x7E882799 + .long 0x7EBA2799 + addi 4,4,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + .long 0x12942549 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E802799 + .long 0x7EA82799 + addi 4,4,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + .long 0x12B52549 + vor 4,13,13 + + + .long 0x7EA02799 + addi 4,4,0x10 + +Lcbc_dec8x_done: + + .long 0x7C803F99 + + li 10,79 + li 11,95 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.globl aes_p8_ctr32_encrypt_blocks +.align 5 +aes_p8_ctr32_encrypt_blocks: + cmpldi 5,1 + bclr 14,0 + + lis 0,0xfff0 + mfspr 12,256 + mtspr 256,0 + + li 10,15 + vxor 0,0,0 + + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + vspltisb 11,1 + + vperm 4,4,5,6 + vsldoi 11,0,11,1 + + neg 11,3 + lvsl 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + + cmpldi 5,8 + bge _aesp8_ctr32_encrypt8x + + lvsr 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,0,9,8 + + + lvx 0,0,6 + mtctr 9 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bc 16,0,Loop_ctr32_enc + + vadduwm 4,4,11 + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + subic. 5,5,1 + + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + vperm 3,3,5,6 + li 10,16 + vperm 1,0,1,10 + lvx 0,0,6 + vxor 3,3,1 + .long 0x10421D09 + + lvx 1,10,6 + addi 10,10,16 + vperm 2,2,2,8 + vsel 3,7,2,9 + mtctr 9 + vperm 0,0,1,10 + vor 7,2,2 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + stvx 3,0,4 + addi 4,4,16 + bne Loop_ctr32_enc + + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + mtspr 256,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_ctr32_encrypt8x: + stdu 1,-448(1) + li 10,207 + li 11,223 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,396(1) + li 8,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,10 + addi 11,1,64+15 + mtctr 9 + +Load_ctr32_enc_key: + vperm 24,30,31,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,31,30,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bc 16,0,Load_ctr32_enc_key + + lvx 26,8,6 + vperm 24,30,31,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,31,26,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,64+15 + vperm 26,26,27,10 + lvx 29,28,6 + vperm 27,27,28,10 + lvx 30,29,6 + vperm 28,28,29,10 + lvx 31,30,6 + vperm 29,29,30,10 + lvx 15,31,6 + vperm 30,30,31,10 + lvx 24,0,11 + vperm 31,31,15,10 + lvx 25,8,11 + + vadduwm 7,11,11 + subi 3,3,15 + sldi 5,5,4 + + vadduwm 16,4,11 + vadduwm 17,4,7 + vxor 15,4,23 + + vadduwm 18,16,7 + vxor 16,16,23 + + vadduwm 19,17,7 + vxor 17,17,23 + + vadduwm 20,18,7 + vxor 18,18,23 + + vadduwm 21,19,7 + vxor 19,19,23 + vadduwm 22,20,7 + vxor 20,20,23 + vadduwm 4,21,7 + vxor 21,21,23 + vxor 22,22,23 + + mtctr 9 + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 +Loop_ctr32_enc8x_middle: + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + lvx 25,8,11 + bc 16,0,Loop_ctr32_enc8x + + subic 11,5,256 + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + subfe 0,0,0 + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + and 0,0,11 + addi 11,1,64+15 + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + lvx 24,0,11 + + subic 5,5,129 + .long 0x11EFDD08 + addi 5,5,1 + .long 0x1210DD08 + .long 0x1231DD08 + .long 0x1252DD08 + .long 0x1273DD08 + .long 0x1294DD08 + .long 0x12B5DD08 + .long 0x12D6DD08 + lvx 25,8,11 + + .long 0x11EFE508 + .long 0x7C001E99 + .long 0x1210E508 + .long 0x7C281E99 + .long 0x1231E508 + .long 0x7C5A1E99 + .long 0x1252E508 + .long 0x7C7B1E99 + .long 0x1273E508 + .long 0x7D5C1E99 + .long 0x1294E508 + .long 0x7D9D1E99 + .long 0x12B5E508 + .long 0x7DBE1E99 + .long 0x12D6E508 + .long 0x7DDF1E99 + addi 3,3,0x80 + + .long 0x11EFED08 + + .long 0x1210ED08 + + .long 0x1231ED08 + + .long 0x1252ED08 + + .long 0x1273ED08 + + .long 0x1294ED08 + + .long 0x12B5ED08 + + .long 0x12D6ED08 + + + add 3,3,0 + + + + subfe. 0,0,0 + .long 0x11EFF508 + vxor 0,0,31 + .long 0x1210F508 + vxor 1,1,31 + .long 0x1231F508 + vxor 2,2,31 + .long 0x1252F508 + vxor 3,3,31 + .long 0x1273F508 + vxor 10,10,31 + .long 0x1294F508 + vxor 12,12,31 + .long 0x12B5F508 + vxor 13,13,31 + .long 0x12D6F508 + vxor 14,14,31 + + bne Lctr32_enc8x_break + + .long 0x100F0509 + .long 0x10300D09 + vadduwm 16,4,11 + .long 0x10511509 + vadduwm 17,4,7 + vxor 15,4,23 + .long 0x10721D09 + vadduwm 18,16,7 + vxor 16,16,23 + .long 0x11535509 + vadduwm 19,17,7 + vxor 17,17,23 + .long 0x11946509 + vadduwm 20,18,7 + vxor 18,18,23 + .long 0x11B56D09 + vadduwm 21,19,7 + vxor 19,19,23 + .long 0x11D67509 + vadduwm 22,20,7 + vxor 20,20,23 + + vadduwm 4,21,7 + vxor 21,21,23 + + vxor 22,22,23 + mtctr 9 + + .long 0x11EFC508 + .long 0x7C002799 + + .long 0x1210C508 + .long 0x7C282799 + + .long 0x1231C508 + .long 0x7C5A2799 + + .long 0x1252C508 + .long 0x7C7B2799 + + .long 0x1273C508 + .long 0x7D5C2799 + + .long 0x1294C508 + .long 0x7D9D2799 + + .long 0x12B5C508 + .long 0x7DBE2799 + .long 0x12D6C508 + .long 0x7DDF2799 + addi 4,4,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi 5,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi 5,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi 5,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi 5,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + .long 0x11EF0509 + .long 0x12100D09 + .long 0x12311509 + .long 0x12521D09 + .long 0x12735509 + .long 0x12946509 + .long 0x12B56D09 + .long 0x12D67509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + + .long 0x7EBE2799 + .long 0x7EDF2799 + addi 4,4,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + .long 0x11EF0D09 + .long 0x12101509 + .long 0x12311D09 + .long 0x12525509 + .long 0x12736509 + .long 0x12946D09 + .long 0x12B57509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + .long 0x11EF1509 + .long 0x12101D09 + .long 0x12315509 + .long 0x12526509 + .long 0x12736D09 + .long 0x12947509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + .long 0x7E9D2799 + addi 4,4,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + .long 0x11EF1D09 + .long 0x12105509 + .long 0x12316509 + .long 0x12526D09 + .long 0x12737509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + .long 0x7E7C2799 + addi 4,4,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + .long 0x11EF5509 + .long 0x12106509 + .long 0x12316D09 + .long 0x12527509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + .long 0x7E5B2799 + addi 4,4,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + .long 0x11EF6509 + .long 0x12106D09 + .long 0x12317509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + .long 0x7E3A2799 + addi 4,4,0x30 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_two: + .long 0x11EF6D09 + .long 0x12107509 + + + + .long 0x7DE02799 + .long 0x7E082799 + addi 4,4,0x20 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_one: + .long 0x11EF7509 + + + .long 0x7DE02799 + addi 4,4,0x10 + +Lctr32_enc8x_done: + li 10,79 + li 11,95 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.globl aes_p8_xts_encrypt +.align 5 +aes_p8_xts_encrypt: + mr 10,3 + li 3,-1 + cmpldi 5,16 + bclr 14,0 + + lis 0,0xfff0 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmpldi 7,0 + beq Lxts_enc_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +Ltweak_xts_enc: + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,Ltweak_xts_enc + + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 + .long 0x11080509 + + li 8,0 + b Lxts_enc + +Lxts_enc_no_key2: + li 3,-16 + and 5,5,3 + + +Lxts_enc: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_encrypt6x + + andi. 7,5,15 + subic 0,5,32 + subi 7,7,16 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + vperm 1,1,0,7 + .long 0x10420D08 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420508 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_enc + + vperm 1,1,0,7 + .long 0x10420D08 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 + .long 0x10620509 + + + nop + + .long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq Lxts_enc_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + subic 0,5,32 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 3,3,0 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge Loop_xts_enc + + vxor 3,3,8 + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + subi 11,4,17 + subi 4,4,16 + mtctr 5 + li 5,16 +Loop_xts_enc_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,Loop_xts_enc_steal + + mtctr 9 + b Loop_xts_enc + +Lxts_enc_done: + cmpldi 8,0 + beq Lxts_enc_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + + .long 0x7D004799 + +Lxts_enc_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + + +.globl aes_p8_xts_decrypt +.align 5 +aes_p8_xts_decrypt: + mr 10,3 + li 3,-1 + cmpldi 5,16 + bclr 14,0 + + lis 0,0xfff8 + mfspr 12,256 + li 11,0 + mtspr 256,0 + + andi. 0,5,15 + neg 0,0 + andi. 0,0,16 + sub 5,5,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmpldi 7,0 + beq Lxts_dec_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +Ltweak_xts_dec: + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bc 16,0,Ltweak_xts_dec + + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 + .long 0x11080509 + + li 8,0 + b Lxts_dec + +Lxts_dec_no_key2: + neg 3,5 + andi. 3,3,15 + add 5,5,3 + + +Lxts_dec: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_decrypt6x + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + + cmpldi 5,16 + blt Ltail_xts_dec + b Loop_xts_dec + +.align 5 +Loop_xts_dec: + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_dec + + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 + .long 0x10620549 + + + nop + + .long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq Lxts_dec_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab 11,8,9 + vaddubm 12,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 12,12,11 + + subi 10,10,16 + add 10,10,5 + + vxor 2,2,8 + vxor 2,2,12 + +Loop_xts_dec_short: + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bc 16,0,Loop_xts_dec_short + + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,12 + .long 0x10620549 + + + nop + + .long 0x7C602799 + + vor 2,4,4 + lvx 4,0,10 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + vxor 0,0,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + subi 11,4,1 + mtctr 5 + li 5,16 +Loop_xts_dec_steal: + lbzu 0,1(11) + stb 0,16(11) + bc 16,0,Loop_xts_dec_steal + + mtctr 9 + b Loop_xts_dec + +Lxts_dec_done: + cmpldi 8,0 + beq Lxts_dec_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + + .long 0x7D004799 + +Lxts_dec_ret: + mtspr 256,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_encrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,64+15 + mtctr 9 + +Load_xts_enc_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,Load_xts_enc_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + .long 0x1210C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + .long 0x1210CD08 + lvx 25,3,7 + bc 16,0,Loop_xts_enc6x + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C508 + .long 0x118CC508 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC508 + .long 0x11CEC508 + vsldoi 11,11,11,15 + .long 0x11EFC508 + .long 0x1210C508 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD08 + .long 0x118CCD08 + vxor 8,8,11 + .long 0x11ADCD08 + .long 0x11CECD08 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD08 + .long 0x1210CD08 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x10E7D508 + .long 0x118CD508 + vand 11,11,10 + .long 0x11ADD508 + .long 0x11CED508 + vxor 8,8,11 + .long 0x11EFD508 + .long 0x1210D508 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD08 + .long 0x118CDD08 + vsldoi 11,11,11,15 + .long 0x11ADDD08 + .long 0x11CEDD08 + vand 11,11,10 + .long 0x11EFDD08 + .long 0x1210DD08 + + addi 7,1,64+15 + vxor 8,8,11 + .long 0x10E7E508 + .long 0x118CE508 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE508 + .long 0x11CEE508 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x11EFE508 + .long 0x1210E508 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED08 + .long 0x118CED08 + vxor 8,8,11 + .long 0x11ADED08 + .long 0x11CEED08 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED08 + .long 0x1210ED08 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + .long 0x10E7F508 + .long 0x118CF508 + vand 11,11,10 + .long 0x11ADF508 + .long 0x11CEF508 + vxor 8,8,11 + .long 0x11EFF508 + .long 0x1210F508 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70509 + .long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x118C0D09 + .long 0x7C235699 + .long 0x11AD1509 + + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D09 + + .long 0x7C7B5699 + .long 0x11EF2509 + + .long 0x7C9C5699 + vxor 8,8,11 + .long 0x11702D09 + + + .long 0x7CBD5699 + addi 10,10,0x60 + + + + + + .long 0x7CE02799 + vxor 7,0,17 + + .long 0x7D832799 + vxor 12,1,18 + + .long 0x7DBA2799 + vxor 13,2,19 + + .long 0x7DDB2799 + vxor 14,3,20 + + .long 0x7DFC2799 + vxor 15,4,21 + + .long 0x7D7D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq Loop_xts_enc6x + + addic. 5,5,0x60 + beq Lxts_enc6x_zero + cmpwi 5,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi 5,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_enc5x + + + vor 17,22,22 + + .long 0x7CE02799 + + .long 0x7D832799 + + .long 0x7DBA2799 + vxor 11,15,22 + + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,21,21 + + .long 0x7CE02799 + + .long 0x7D832799 + vxor 11,14,21 + + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,20,20 + + .long 0x7CE02799 + vxor 11,13,20 + + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,19,19 + vxor 11,12,19 + + .long 0x7CE02799 + .long 0x7D832799 + addi 4,4,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor 7,5,17 + nop +Loop_xts_enc1x: + .long 0x10E7C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + lvx 25,3,7 + bc 16,0,Loop_xts_enc1x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + + subi 10,10,16 + .long 0x10E7CD08 + + lvsr 5,0,31 + .long 0x10E7D508 + + .long 0x7C005699 + .long 0x10E7DD08 + + addi 7,1,64+15 + .long 0x10E7E508 + lvx 24,0,7 + + .long 0x10E7ED08 + lvx 25,3,7 + vxor 17,17,31 + + + .long 0x10E7F508 + + vperm 0,0,0,5 + .long 0x10E78D09 + + vor 17,18,18 + vxor 11,7,18 + + .long 0x7CE02799 + addi 4,4,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi 31,0 + beq Lxts_enc6x_done + + add 10,10,31 + subi 10,10,16 + .long 0x7C005699 + lvsr 5,0,31 + + vperm 0,0,0,5 + vxor 11,11,17 +Lxts_enc6x_steal: + vxor 0,0,17 + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + + subi 30,4,17 + subi 4,4,16 + mtctr 31 +Loop_xts_enc6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,Loop_xts_enc6x_steal + + li 31,0 + mtctr 9 + b Loop_xts_enc1x + +.align 4 +Lxts_enc6x_done: + cmpldi 8,0 + beq Lxts_enc6x_ret + + vxor 8,17,23 + + .long 0x7D004799 + +Lxts_enc6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_enc5x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + lvx 25,3,7 + bc 16,0,_aesp8_xts_enc5x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + + subi 10,10,16 + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + vxor 17,17,31 + + .long 0x10E7D508 + lvsr 5,0,31 + .long 0x118CD508 + .long 0x11ADD508 + .long 0x11CED508 + .long 0x11EFD508 + vxor 1,18,31 + + .long 0x10E7DD08 + .long 0x7C005699 + .long 0x118CDD08 + .long 0x11ADDD08 + .long 0x11CEDD08 + .long 0x11EFDD08 + vxor 2,19,31 + + addi 7,1,64+15 + .long 0x10E7E508 + .long 0x118CE508 + .long 0x11ADE508 + .long 0x11CEE508 + .long 0x11EFE508 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED08 + + .long 0x118CED08 + .long 0x11ADED08 + .long 0x11CEED08 + .long 0x11EFED08 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F508 + vperm 0,0,0,5 + .long 0x118CF508 + .long 0x11ADF508 + .long 0x11CEF508 + .long 0x11EFF508 + + .long 0x10E78D09 + .long 0x118C0D09 + .long 0x11AD1509 + .long 0x11CE1D09 + .long 0x11EF2509 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + mtspr 256,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,64+15 + mtctr 9 + +Load_xts_dec_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bc 16,0,Load_xts_dec_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 7,0,17 + vxor 8,8,11 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 12,1,18 + vxor 8,8,11 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 13,2,19 + vxor 8,8,11 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 14,3,20 + vxor 8,8,11 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 15,4,21 + vxor 8,8,11 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + vand 11,11,10 + vxor 16,5,22 + vxor 8,8,11 + + vxor 31,31,23 + mtctr 9 + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + lvx 25,3,7 + bc 16,0,Loop_xts_dec6x + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C548 + .long 0x118CC548 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC548 + .long 0x11CEC548 + vsldoi 11,11,11,15 + .long 0x11EFC548 + .long 0x1210C548 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD48 + .long 0x118CCD48 + vxor 8,8,11 + .long 0x11ADCD48 + .long 0x11CECD48 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD48 + .long 0x1210CD48 + + and 0,0,5 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x10E7D548 + .long 0x118CD548 + vand 11,11,10 + .long 0x11ADD548 + .long 0x11CED548 + vxor 8,8,11 + .long 0x11EFD548 + .long 0x1210D548 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD48 + .long 0x118CDD48 + vsldoi 11,11,11,15 + .long 0x11ADDD48 + .long 0x11CEDD48 + vand 11,11,10 + .long 0x11EFDD48 + .long 0x1210DD48 + + addi 7,1,64+15 + vxor 8,8,11 + .long 0x10E7E548 + .long 0x118CE548 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE548 + .long 0x11CEE548 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x11EFE548 + .long 0x1210E548 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED48 + .long 0x118CED48 + vxor 8,8,11 + .long 0x11ADED48 + .long 0x11CEED48 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED48 + .long 0x1210ED48 + lvx 25,3,7 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + + .long 0x10E7F548 + .long 0x118CF548 + vand 11,11,10 + .long 0x11ADF548 + .long 0x11CEF548 + vxor 8,8,11 + .long 0x11EFF548 + .long 0x1210F548 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70549 + .long 0x7C005699 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + .long 0x118C0D49 + .long 0x7C235699 + .long 0x11AD1549 + + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D49 + + .long 0x7C7B5699 + .long 0x11EF2549 + + .long 0x7C9C5699 + vxor 8,8,11 + .long 0x12102D49 + + .long 0x7CBD5699 + addi 10,10,0x60 + + + + + + .long 0x7CE02799 + vxor 7,0,17 + + .long 0x7D832799 + vxor 12,1,18 + + .long 0x7DBA2799 + vxor 13,2,19 + + .long 0x7DDB2799 + vxor 14,3,20 + + .long 0x7DFC2799 + vxor 15,4,21 + .long 0x7E1D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq Loop_xts_dec6x + + addic. 5,5,0x60 + beq Lxts_dec6x_zero + cmpwi 5,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi 5,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_dec5x + + + vor 17,22,22 + vxor 18,8,23 + + .long 0x7CE02799 + vxor 7,0,18 + + .long 0x7D832799 + + .long 0x7DBA2799 + + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,21,21 + vor 18,22,22 + + .long 0x7CE02799 + vxor 7,0,22 + + .long 0x7D832799 + + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,20,20 + vor 18,21,21 + + .long 0x7CE02799 + vxor 7,0,21 + + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,19,19 + vor 18,20,20 + + .long 0x7CE02799 + vxor 7,0,20 + .long 0x7D832799 + addi 4,4,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor 7,5,17 + nop +Loop_xts_dec1x: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,Loop_xts_dec1x + + subi 0,31,1 + .long 0x10E7C548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + + sub 10,10,0 + .long 0x10E7D548 + + .long 0x7C005699 + .long 0x10E7DD48 + + addi 7,1,64+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 17,17,31 + + + .long 0x10E7F548 + + mtctr 9 + .long 0x10E78D49 + + vor 17,18,18 + vor 18,19,19 + + .long 0x7CE02799 + addi 4,4,0x10 + vxor 7,0,19 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi 31,0 + beq Lxts_dec6x_done + + .long 0x7C005699 + + vxor 7,0,18 +Lxts_dec6x_steal: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bc 16,0,Lxts_dec6x_steal + + add 10,10,31 + .long 0x10E7C548 + + cmpwi 31,0 + .long 0x10E7CD48 + + .long 0x7C005699 + .long 0x10E7D548 + + lvsr 5,0,31 + .long 0x10E7DD48 + + addi 7,1,64+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 18,18,31 + + + .long 0x10E7F548 + + vperm 0,0,0,5 + .long 0x11679549 + + + + .long 0x7D602799 + + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + vxor 7,7,17 + + subi 30,4,1 + mtctr 31 +Loop_xts_dec6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bc 16,0,Loop_xts_dec6x_steal + + li 31,0 + mtctr 9 + b Loop_xts_dec1x + +.align 4 +Lxts_dec6x_done: + cmpldi 8,0 + beq Lxts_dec6x_ret + + vxor 8,17,23 + + .long 0x7D004799 + +Lxts_dec6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + mtspr 256,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_dec5x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + lvx 25,3,7 + bc 16,0,_aesp8_xts_dec5x + + subi 0,31,1 + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + vxor 17,17,31 + + sub 10,10,0 + .long 0x10E7D548 + .long 0x118CD548 + .long 0x11ADD548 + .long 0x11CED548 + .long 0x11EFD548 + vxor 1,18,31 + + .long 0x10E7DD48 + .long 0x7C005699 + .long 0x118CDD48 + .long 0x11ADDD48 + .long 0x11CEDD48 + .long 0x11EFDD48 + vxor 2,19,31 + + addi 7,1,64+15 + .long 0x10E7E548 + .long 0x118CE548 + .long 0x11ADE548 + .long 0x11CEE548 + .long 0x11EFE548 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED48 + + .long 0x118CED48 + .long 0x11ADED48 + .long 0x11CEED48 + .long 0x11EFED48 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F548 + .long 0x118CF548 + .long 0x11ADF548 + .long 0x11CEF548 + .long 0x11EFF548 + + .long 0x10E78D49 + .long 0x118C0D49 + .long 0x11AD1549 + .long 0x11CE1D49 + .long 0x11EF2549 + mtctr 9 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 Index: cipher/sha256-ppc8.S =================================================================== --- /dev/null +++ cipher/sha256-ppc8.S @@ -0,0 +1,741 @@ +.machine "any" +.text + +.globl sha256_block_p8 +.align 6 +sha256_block_p8: + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + mtspr 256,11 + + bl LPICmeup + addi 11,1,79 + li 7,8 + lvsl 31,0,7 + vspltisb 28,0x0f + vxor 31,31,28 + .long 0x7C001E19 + .long 0x7C8A1E19 + vsldoi 1,0,0,4 + vsldoi 2,0,0,8 + vsldoi 3,0,0,12 + vsldoi 5,4,4,4 + vsldoi 6,4,4,8 + vsldoi 7,4,4,12 + li 0,3 + b Loop +.align 5 +Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + vadduwm 7,7,28 + lvx 28,10,6 + vperm 8,8,8,31 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 9,8,8,4 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 10,9,9,4 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,4 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vperm 12,12,12,31 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 13,12,12,4 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 14,13,13,4 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,4 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + vperm 16,16,16,31 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 17,16,16,4 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 18,17,17,4 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,4 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vperm 24,24,24,31 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 25,24,24,4 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 26,25,25,4 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + vsldoi 27,26,26,4 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + mtctr 0 + b L16_xx +.align 5 +L16_xx: + .long 0x13CA0682 + vadduwm 9,9,30 + .long 0x13DB7E82 + vadduwm 9,9,30 + vadduwm 9,9,18 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13CB0682 + vadduwm 10,10,30 + .long 0x13C87E82 + vadduwm 10,10,30 + vadduwm 10,10,19 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13CC0682 + vadduwm 11,11,30 + .long 0x13C97E82 + vadduwm 11,11,30 + vadduwm 11,11,24 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13CD0682 + vadduwm 12,12,30 + .long 0x13CA7E82 + vadduwm 12,12,30 + vadduwm 12,12,25 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13CE0682 + vadduwm 13,13,30 + .long 0x13CB7E82 + vadduwm 13,13,30 + vadduwm 13,13,26 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13CF0682 + vadduwm 14,14,30 + .long 0x13CC7E82 + vadduwm 14,14,30 + vadduwm 14,14,27 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D00682 + vadduwm 15,15,30 + .long 0x13CD7E82 + vadduwm 15,15,30 + vadduwm 15,15,8 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13D10682 + vadduwm 16,16,30 + .long 0x13CE7E82 + vadduwm 16,16,30 + vadduwm 16,16,9 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + .long 0x13D20682 + vadduwm 17,17,30 + .long 0x13CF7E82 + vadduwm 17,17,30 + vadduwm 17,17,10 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13D30682 + vadduwm 18,18,30 + .long 0x13D07E82 + vadduwm 18,18,30 + vadduwm 18,18,11 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13D80682 + vadduwm 19,19,30 + .long 0x13D17E82 + vadduwm 19,19,30 + vadduwm 19,19,12 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13D90682 + vadduwm 24,24,30 + .long 0x13D27E82 + vadduwm 24,24,30 + vadduwm 24,24,13 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13DA0682 + vadduwm 25,25,30 + .long 0x13D37E82 + vadduwm 25,25,30 + vadduwm 25,25,14 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13DB0682 + vadduwm 26,26,30 + .long 0x13D87E82 + vadduwm 26,26,30 + vadduwm 26,26,15 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C80682 + vadduwm 27,27,30 + .long 0x13D97E82 + vadduwm 27,27,30 + vadduwm 27,27,16 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + bc 16,0,L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + vadduwm 0,0,10 + lvx 12,26,11 + vadduwm 1,1,11 + lvx 13,27,11 + vadduwm 2,2,12 + lvx 14,28,11 + vadduwm 3,3,13 + lvx 15,29,11 + vadduwm 4,4,14 + lvx 16,30,11 + vadduwm 5,5,15 + lvx 17,31,11 + vadduwm 6,6,16 + vadduwm 7,7,17 + bne Loop + lvx 8,26,7 + vperm 0,0,1,28 + lvx 9,27,7 + vperm 4,4,5,28 + vperm 0,0,2,8 + vperm 4,4,6,8 + vperm 0,0,3,9 + vperm 4,4,7,9 + .long 0x7C001F19 + .long 0x7C8A1F19 + addi 11,1,207 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 + +.align 6 +LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98 +.long 0x71374491,0x71374491,0x71374491,0x71374491 +.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf +.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5 +.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b +.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1 +.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4 +.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5 +.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98 +.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01 +.long 0x243185be,0x243185be,0x243185be,0x243185be +.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3 +.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74 +.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe +.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7 +.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174 +.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1 +.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786 +.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6 +.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc +.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f +.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa +.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc +.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da +.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152 +.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d +.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8 +.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7 +.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3 +.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147 +.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351 +.long 0x14292967,0x14292967,0x14292967,0x14292967 +.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85 +.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138 +.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc +.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13 +.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354 +.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb +.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e +.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85 +.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1 +.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b +.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70 +.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3 +.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819 +.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624 +.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585 +.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070 +.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116 +.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08 +.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c +.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5 +.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3 +.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a +.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f +.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3 +.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee +.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f +.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814 +.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208 +.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa +.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb +.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7 +.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2 +.long 0,0,0,0 +.long 0x10111213,0x10111213,0x10111213,0x00010203 +.long 0x10111213,0x10111213,0x04050607,0x00010203 +.long 0x10111213,0x08090a0b,0x04050607,0x00010203 +.byte 83,72,65,50,53,54,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 Index: cipher/sha256-ppc832.S =================================================================== --- /dev/null +++ cipher/sha256-ppc832.S @@ -0,0 +1,733 @@ +.machine "any" +.text + +.globl sha256_block_p8 +.align 6 +sha256_block_p8: + stwu 1,-328(1) + mflr 8 + li 10,175 + li 11,191 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,300(1) + li 10,0x10 + stw 26,304(1) + li 26,0x20 + stw 27,308(1) + li 27,0x30 + stw 28,312(1) + li 28,0x40 + stw 29,316(1) + li 29,0x50 + stw 30,320(1) + li 30,0x60 + stw 31,324(1) + li 31,0x70 + stw 8,332(1) + mtspr 256,11 + + bl LPICmeup + addi 11,1,47 + .long 0x7C001E19 + .long 0x7C8A1E19 + vsldoi 1,0,0,4 + vsldoi 2,0,0,8 + vsldoi 3,0,0,12 + vsldoi 5,4,4,4 + vsldoi 6,4,4,8 + vsldoi 7,4,4,12 + li 0,3 + b Loop +.align 5 +Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + vadduwm 7,7,28 + lvx 28,10,6 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 9,8,8,4 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 10,9,9,4 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,4 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 13,12,12,4 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 14,13,13,4 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,4 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 17,16,16,4 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 18,17,17,4 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,4 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 25,24,24,4 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 26,25,25,4 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + vsldoi 27,26,26,4 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + mtctr 0 + b L16_xx +.align 5 +L16_xx: + .long 0x13CA0682 + vadduwm 9,9,30 + .long 0x13DB7E82 + vadduwm 9,9,30 + vadduwm 9,9,18 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13CB0682 + vadduwm 10,10,30 + .long 0x13C87E82 + vadduwm 10,10,30 + vadduwm 10,10,19 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13CC0682 + vadduwm 11,11,30 + .long 0x13C97E82 + vadduwm 11,11,30 + vadduwm 11,11,24 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13CD0682 + vadduwm 12,12,30 + .long 0x13CA7E82 + vadduwm 12,12,30 + vadduwm 12,12,25 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13CE0682 + vadduwm 13,13,30 + .long 0x13CB7E82 + vadduwm 13,13,30 + vadduwm 13,13,26 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13CF0682 + vadduwm 14,14,30 + .long 0x13CC7E82 + vadduwm 14,14,30 + vadduwm 14,14,27 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D00682 + vadduwm 15,15,30 + .long 0x13CD7E82 + vadduwm 15,15,30 + vadduwm 15,15,8 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13D10682 + vadduwm 16,16,30 + .long 0x13CE7E82 + vadduwm 16,16,30 + vadduwm 16,16,9 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + .long 0x13D20682 + vadduwm 17,17,30 + .long 0x13CF7E82 + vadduwm 17,17,30 + vadduwm 17,17,10 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13D30682 + vadduwm 18,18,30 + .long 0x13D07E82 + vadduwm 18,18,30 + vadduwm 18,18,11 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13D80682 + vadduwm 19,19,30 + .long 0x13D17E82 + vadduwm 19,19,30 + vadduwm 19,19,12 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13D90682 + vadduwm 24,24,30 + .long 0x13D27E82 + vadduwm 24,24,30 + vadduwm 24,24,13 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13DA0682 + vadduwm 25,25,30 + .long 0x13D37E82 + vadduwm 25,25,30 + vadduwm 25,25,14 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13DB0682 + vadduwm 26,26,30 + .long 0x13D87E82 + vadduwm 26,26,30 + vadduwm 26,26,15 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C80682 + vadduwm 27,27,30 + .long 0x13D97E82 + vadduwm 27,27,30 + vadduwm 27,27,16 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + bc 16,0,L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + vadduwm 0,0,10 + lvx 12,26,11 + vadduwm 1,1,11 + lvx 13,27,11 + vadduwm 2,2,12 + lvx 14,28,11 + vadduwm 3,3,13 + lvx 15,29,11 + vadduwm 4,4,14 + lvx 16,30,11 + vadduwm 5,5,15 + lvx 17,31,11 + vadduwm 6,6,16 + vadduwm 7,7,17 + bne Loop + lvx 8,26,7 + vperm 0,0,1,28 + lvx 9,27,7 + vperm 4,4,5,28 + vperm 0,0,2,8 + vperm 4,4,6,8 + vperm 0,0,3,9 + vperm 4,4,7,9 + .long 0x7C001F19 + .long 0x7C8A1F19 + addi 11,1,175 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + lwz 26,304(1) + lwz 27,308(1) + lwz 28,312(1) + lwz 29,316(1) + lwz 30,320(1) + lwz 31,324(1) + addi 1,1,328 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 + +.align 6 +LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98 +.long 0x71374491,0x71374491,0x71374491,0x71374491 +.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf +.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5 +.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b +.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1 +.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4 +.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5 +.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98 +.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01 +.long 0x243185be,0x243185be,0x243185be,0x243185be +.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3 +.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74 +.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe +.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7 +.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174 +.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1 +.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786 +.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6 +.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc +.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f +.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa +.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc +.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da +.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152 +.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d +.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8 +.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7 +.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3 +.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147 +.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351 +.long 0x14292967,0x14292967,0x14292967,0x14292967 +.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85 +.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138 +.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc +.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13 +.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354 +.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb +.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e +.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85 +.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1 +.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b +.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70 +.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3 +.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819 +.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624 +.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585 +.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070 +.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116 +.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08 +.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c +.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5 +.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3 +.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a +.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f +.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3 +.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee +.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f +.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814 +.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208 +.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa +.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb +.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7 +.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2 +.long 0,0,0,0 +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 +.byte 83,72,65,50,53,54,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 Index: cipher/sha256-ppc8be.S =================================================================== --- /dev/null +++ cipher/sha256-ppc8be.S @@ -0,0 +1,733 @@ +.machine "any" +.text + +.globl sha256_block_p8 +.align 6 +sha256_block_p8: + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + mtspr 256,11 + + bl LPICmeup + addi 11,1,79 + .long 0x7C001E19 + .long 0x7C8A1E19 + vsldoi 1,0,0,4 + vsldoi 2,0,0,8 + vsldoi 3,0,0,12 + vsldoi 5,4,4,4 + vsldoi 6,4,4,8 + vsldoi 7,4,4,12 + li 0,3 + b Loop +.align 5 +Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + vadduwm 7,7,28 + lvx 28,10,6 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 9,8,8,4 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 10,9,9,4 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,4 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 13,12,12,4 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 14,13,13,4 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,4 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 17,16,16,4 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 18,17,17,4 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,4 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 25,24,24,4 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 26,25,25,4 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + vsldoi 27,26,26,4 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + mtctr 0 + b L16_xx +.align 5 +L16_xx: + .long 0x13CA0682 + vadduwm 9,9,30 + .long 0x13DB7E82 + vadduwm 9,9,30 + vadduwm 9,9,18 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13CB0682 + vadduwm 10,10,30 + .long 0x13C87E82 + vadduwm 10,10,30 + vadduwm 10,10,19 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13CC0682 + vadduwm 11,11,30 + .long 0x13C97E82 + vadduwm 11,11,30 + vadduwm 11,11,24 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13CD0682 + vadduwm 12,12,30 + .long 0x13CA7E82 + vadduwm 12,12,30 + vadduwm 12,12,25 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13CE0682 + vadduwm 13,13,30 + .long 0x13CB7E82 + vadduwm 13,13,30 + vadduwm 13,13,26 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13CF0682 + vadduwm 14,14,30 + .long 0x13CC7E82 + vadduwm 14,14,30 + vadduwm 14,14,27 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D00682 + vadduwm 15,15,30 + .long 0x13CD7E82 + vadduwm 15,15,30 + vadduwm 15,15,8 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13D10682 + vadduwm 16,16,30 + .long 0x13CE7E82 + vadduwm 16,16,30 + vadduwm 16,16,9 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + .long 0x13D20682 + vadduwm 17,17,30 + .long 0x13CF7E82 + vadduwm 17,17,30 + vadduwm 17,17,10 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13D30682 + vadduwm 18,18,30 + .long 0x13D07E82 + vadduwm 18,18,30 + vadduwm 18,18,11 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13D80682 + vadduwm 19,19,30 + .long 0x13D17E82 + vadduwm 19,19,30 + vadduwm 19,19,12 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13D90682 + vadduwm 24,24,30 + .long 0x13D27E82 + vadduwm 24,24,30 + vadduwm 24,24,13 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13DA0682 + vadduwm 25,25,30 + .long 0x13D37E82 + vadduwm 25,25,30 + vadduwm 25,25,14 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13DB0682 + vadduwm 26,26,30 + .long 0x13D87E82 + vadduwm 26,26,30 + vadduwm 26,26,15 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C80682 + vadduwm 27,27,30 + .long 0x13D97E82 + vadduwm 27,27,30 + vadduwm 27,27,16 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + bc 16,0,L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + vadduwm 0,0,10 + lvx 12,26,11 + vadduwm 1,1,11 + lvx 13,27,11 + vadduwm 2,2,12 + lvx 14,28,11 + vadduwm 3,3,13 + lvx 15,29,11 + vadduwm 4,4,14 + lvx 16,30,11 + vadduwm 5,5,15 + lvx 17,31,11 + vadduwm 6,6,16 + vadduwm 7,7,17 + bne Loop + lvx 8,26,7 + vperm 0,0,1,28 + lvx 9,27,7 + vperm 4,4,5,28 + vperm 0,0,2,8 + vperm 4,4,6,8 + vperm 0,0,3,9 + vperm 4,4,7,9 + .long 0x7C001F19 + .long 0x7C8A1F19 + addi 11,1,207 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 + +.align 6 +LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98 +.long 0x71374491,0x71374491,0x71374491,0x71374491 +.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf +.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5 +.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b +.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1 +.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4 +.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5 +.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98 +.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01 +.long 0x243185be,0x243185be,0x243185be,0x243185be +.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3 +.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74 +.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe +.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7 +.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174 +.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1 +.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786 +.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6 +.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc +.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f +.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa +.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc +.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da +.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152 +.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d +.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8 +.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7 +.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3 +.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147 +.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351 +.long 0x14292967,0x14292967,0x14292967,0x14292967 +.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85 +.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138 +.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc +.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13 +.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354 +.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb +.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e +.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85 +.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1 +.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b +.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70 +.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3 +.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819 +.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624 +.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585 +.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070 +.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116 +.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08 +.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c +.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5 +.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3 +.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a +.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f +.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3 +.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee +.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f +.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814 +.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208 +.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa +.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb +.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7 +.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2 +.long 0,0,0,0 +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 +.byte 83,72,65,50,53,54,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 Index: cipher/sha512-ppc8.S =================================================================== --- /dev/null +++ cipher/sha512-ppc8.S @@ -0,0 +1,843 @@ +.machine "any" +.text + +.globl sha512_block_p8 +.align 6 +sha512_block_p8: + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + mtspr 256,11 + + bl LPICmeup + addi 11,1,79 + li 7,8 + lvsl 31,0,7 + vspltisb 28,0x0f + vxor 31,31,28 + .long 0x7C001E99 + .long 0x7C4A1E99 + .long 0x7C9A1E99 + vsldoi 1,0,0,8 + .long 0x7CDB1E99 + vsldoi 3,2,2,8 + vsldoi 5,4,4,8 + vsldoi 7,6,6,8 + li 0,4 + b Loop +.align 5 +Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + .long 0x10E7E0C0 + lvx 28,10,6 + vperm 8,8,8,31 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7D402699 + addi 4,4,16 + vsldoi 9,8,8,8 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + vperm 10,10,10,31 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,8 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + vperm 12,12,12,31 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7DC02699 + addi 4,4,16 + vsldoi 13,12,12,8 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + vperm 14,14,14,31 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,8 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + vperm 16,16,16,31 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7E402699 + addi 4,4,16 + vsldoi 17,16,16,8 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + vperm 18,18,18,31 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,8 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + vperm 24,24,24,31 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7F402699 + addi 4,4,16 + vsldoi 25,24,24,8 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + vperm 26,26,26,31 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + vsldoi 27,26,26,8 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + mtctr 0 + b L16_xx +.align 5 +L16_xx: + .long 0x13CA06C2 + .long 0x1129F0C0 + .long 0x13DB7EC2 + .long 0x1129F0C0 + .long 0x112990C0 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13CB06C2 + .long 0x114AF0C0 + .long 0x13C87EC2 + .long 0x114AF0C0 + .long 0x114A98C0 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13CC06C2 + .long 0x116BF0C0 + .long 0x13C97EC2 + .long 0x116BF0C0 + .long 0x116BC0C0 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13CD06C2 + .long 0x118CF0C0 + .long 0x13CA7EC2 + .long 0x118CF0C0 + .long 0x118CC8C0 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13CE06C2 + .long 0x11ADF0C0 + .long 0x13CB7EC2 + .long 0x11ADF0C0 + .long 0x11ADD0C0 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13CF06C2 + .long 0x11CEF0C0 + .long 0x13CC7EC2 + .long 0x11CEF0C0 + .long 0x11CED8C0 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D006C2 + .long 0x11EFF0C0 + .long 0x13CD7EC2 + .long 0x11EFF0C0 + .long 0x11EF40C0 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13D106C2 + .long 0x1210F0C0 + .long 0x13CE7EC2 + .long 0x1210F0C0 + .long 0x121048C0 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x13D206C2 + .long 0x1231F0C0 + .long 0x13CF7EC2 + .long 0x1231F0C0 + .long 0x123150C0 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13D306C2 + .long 0x1252F0C0 + .long 0x13D07EC2 + .long 0x1252F0C0 + .long 0x125258C0 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13D806C2 + .long 0x1273F0C0 + .long 0x13D17EC2 + .long 0x1273F0C0 + .long 0x127360C0 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13D906C2 + .long 0x1318F0C0 + .long 0x13D27EC2 + .long 0x1318F0C0 + .long 0x131868C0 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13DA06C2 + .long 0x1339F0C0 + .long 0x13D37EC2 + .long 0x1339F0C0 + .long 0x133970C0 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13DB06C2 + .long 0x135AF0C0 + .long 0x13D87EC2 + .long 0x135AF0C0 + .long 0x135A78C0 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C806C2 + .long 0x137BF0C0 + .long 0x13D97EC2 + .long 0x137BF0C0 + .long 0x137B80C0 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + bc 16,0,L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + .long 0x100050C0 + lvx 12,26,11 + .long 0x102158C0 + lvx 13,27,11 + .long 0x104260C0 + lvx 14,28,11 + .long 0x106368C0 + lvx 15,29,11 + .long 0x108470C0 + lvx 16,30,11 + .long 0x10A578C0 + lvx 17,31,11 + .long 0x10C680C0 + .long 0x10E788C0 + bne Loop + vperm 0,0,1,28 + vperm 2,2,3,28 + vperm 4,4,5,28 + vperm 6,6,7,28 + .long 0x7C001F99 + .long 0x7C4A1F99 + .long 0x7C9A1F99 + .long 0x7CDB1F99 + addi 11,1,207 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 + +.align 6 +LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0xd728ae22,0x428a2f98 +.long 0xd728ae22,0x428a2f98 +.long 0x23ef65cd,0x71374491 +.long 0x23ef65cd,0x71374491 +.long 0xec4d3b2f,0xb5c0fbcf +.long 0xec4d3b2f,0xb5c0fbcf +.long 0x8189dbbc,0xe9b5dba5 +.long 0x8189dbbc,0xe9b5dba5 +.long 0xf348b538,0x3956c25b +.long 0xf348b538,0x3956c25b +.long 0xb605d019,0x59f111f1 +.long 0xb605d019,0x59f111f1 +.long 0xaf194f9b,0x923f82a4 +.long 0xaf194f9b,0x923f82a4 +.long 0xda6d8118,0xab1c5ed5 +.long 0xda6d8118,0xab1c5ed5 +.long 0xa3030242,0xd807aa98 +.long 0xa3030242,0xd807aa98 +.long 0x45706fbe,0x12835b01 +.long 0x45706fbe,0x12835b01 +.long 0x4ee4b28c,0x243185be +.long 0x4ee4b28c,0x243185be +.long 0xd5ffb4e2,0x550c7dc3 +.long 0xd5ffb4e2,0x550c7dc3 +.long 0xf27b896f,0x72be5d74 +.long 0xf27b896f,0x72be5d74 +.long 0x3b1696b1,0x80deb1fe +.long 0x3b1696b1,0x80deb1fe +.long 0x25c71235,0x9bdc06a7 +.long 0x25c71235,0x9bdc06a7 +.long 0xcf692694,0xc19bf174 +.long 0xcf692694,0xc19bf174 +.long 0x9ef14ad2,0xe49b69c1 +.long 0x9ef14ad2,0xe49b69c1 +.long 0x384f25e3,0xefbe4786 +.long 0x384f25e3,0xefbe4786 +.long 0x8b8cd5b5,0x0fc19dc6 +.long 0x8b8cd5b5,0x0fc19dc6 +.long 0x77ac9c65,0x240ca1cc +.long 0x77ac9c65,0x240ca1cc +.long 0x592b0275,0x2de92c6f +.long 0x592b0275,0x2de92c6f +.long 0x6ea6e483,0x4a7484aa +.long 0x6ea6e483,0x4a7484aa +.long 0xbd41fbd4,0x5cb0a9dc +.long 0xbd41fbd4,0x5cb0a9dc +.long 0x831153b5,0x76f988da +.long 0x831153b5,0x76f988da +.long 0xee66dfab,0x983e5152 +.long 0xee66dfab,0x983e5152 +.long 0x2db43210,0xa831c66d +.long 0x2db43210,0xa831c66d +.long 0x98fb213f,0xb00327c8 +.long 0x98fb213f,0xb00327c8 +.long 0xbeef0ee4,0xbf597fc7 +.long 0xbeef0ee4,0xbf597fc7 +.long 0x3da88fc2,0xc6e00bf3 +.long 0x3da88fc2,0xc6e00bf3 +.long 0x930aa725,0xd5a79147 +.long 0x930aa725,0xd5a79147 +.long 0xe003826f,0x06ca6351 +.long 0xe003826f,0x06ca6351 +.long 0x0a0e6e70,0x14292967 +.long 0x0a0e6e70,0x14292967 +.long 0x46d22ffc,0x27b70a85 +.long 0x46d22ffc,0x27b70a85 +.long 0x5c26c926,0x2e1b2138 +.long 0x5c26c926,0x2e1b2138 +.long 0x5ac42aed,0x4d2c6dfc +.long 0x5ac42aed,0x4d2c6dfc +.long 0x9d95b3df,0x53380d13 +.long 0x9d95b3df,0x53380d13 +.long 0x8baf63de,0x650a7354 +.long 0x8baf63de,0x650a7354 +.long 0x3c77b2a8,0x766a0abb +.long 0x3c77b2a8,0x766a0abb +.long 0x47edaee6,0x81c2c92e +.long 0x47edaee6,0x81c2c92e +.long 0x1482353b,0x92722c85 +.long 0x1482353b,0x92722c85 +.long 0x4cf10364,0xa2bfe8a1 +.long 0x4cf10364,0xa2bfe8a1 +.long 0xbc423001,0xa81a664b +.long 0xbc423001,0xa81a664b +.long 0xd0f89791,0xc24b8b70 +.long 0xd0f89791,0xc24b8b70 +.long 0x0654be30,0xc76c51a3 +.long 0x0654be30,0xc76c51a3 +.long 0xd6ef5218,0xd192e819 +.long 0xd6ef5218,0xd192e819 +.long 0x5565a910,0xd6990624 +.long 0x5565a910,0xd6990624 +.long 0x5771202a,0xf40e3585 +.long 0x5771202a,0xf40e3585 +.long 0x32bbd1b8,0x106aa070 +.long 0x32bbd1b8,0x106aa070 +.long 0xb8d2d0c8,0x19a4c116 +.long 0xb8d2d0c8,0x19a4c116 +.long 0x5141ab53,0x1e376c08 +.long 0x5141ab53,0x1e376c08 +.long 0xdf8eeb99,0x2748774c +.long 0xdf8eeb99,0x2748774c +.long 0xe19b48a8,0x34b0bcb5 +.long 0xe19b48a8,0x34b0bcb5 +.long 0xc5c95a63,0x391c0cb3 +.long 0xc5c95a63,0x391c0cb3 +.long 0xe3418acb,0x4ed8aa4a +.long 0xe3418acb,0x4ed8aa4a +.long 0x7763e373,0x5b9cca4f +.long 0x7763e373,0x5b9cca4f +.long 0xd6b2b8a3,0x682e6ff3 +.long 0xd6b2b8a3,0x682e6ff3 +.long 0x5defb2fc,0x748f82ee +.long 0x5defb2fc,0x748f82ee +.long 0x43172f60,0x78a5636f +.long 0x43172f60,0x78a5636f +.long 0xa1f0ab72,0x84c87814 +.long 0xa1f0ab72,0x84c87814 +.long 0x1a6439ec,0x8cc70208 +.long 0x1a6439ec,0x8cc70208 +.long 0x23631e28,0x90befffa +.long 0x23631e28,0x90befffa +.long 0xde82bde9,0xa4506ceb +.long 0xde82bde9,0xa4506ceb +.long 0xb2c67915,0xbef9a3f7 +.long 0xb2c67915,0xbef9a3f7 +.long 0xe372532b,0xc67178f2 +.long 0xe372532b,0xc67178f2 +.long 0xea26619c,0xca273ece +.long 0xea26619c,0xca273ece +.long 0x21c0c207,0xd186b8c7 +.long 0x21c0c207,0xd186b8c7 +.long 0xcde0eb1e,0xeada7dd6 +.long 0xcde0eb1e,0xeada7dd6 +.long 0xee6ed178,0xf57d4f7f +.long 0xee6ed178,0xf57d4f7f +.long 0x72176fba,0x06f067aa +.long 0x72176fba,0x06f067aa +.long 0xa2c898a6,0x0a637dc5 +.long 0xa2c898a6,0x0a637dc5 +.long 0xbef90dae,0x113f9804 +.long 0xbef90dae,0x113f9804 +.long 0x131c471b,0x1b710b35 +.long 0x131c471b,0x1b710b35 +.long 0x23047d84,0x28db77f5 +.long 0x23047d84,0x28db77f5 +.long 0x40c72493,0x32caab7b +.long 0x40c72493,0x32caab7b +.long 0x15c9bebc,0x3c9ebe0a +.long 0x15c9bebc,0x3c9ebe0a +.long 0x9c100d4c,0x431d67c4 +.long 0x9c100d4c,0x431d67c4 +.long 0xcb3e42b6,0x4cc5d4be +.long 0xcb3e42b6,0x4cc5d4be +.long 0xfc657e2a,0x597f299c +.long 0xfc657e2a,0x597f299c +.long 0x3ad6faec,0x5fcb6fab +.long 0x3ad6faec,0x5fcb6fab +.long 0x4a475817,0x6c44198c +.long 0x4a475817,0x6c44198c +.long 0,0 +.long 0,0 +.long 0x14151617,0x10111213 +.long 0x04050607,0x00010203 +.byte 83,72,65,53,49,50,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 Index: cipher/sha512-ppc832.S =================================================================== --- /dev/null +++ cipher/sha512-ppc832.S @@ -0,0 +1,831 @@ +.machine "any" +.text + +.globl sha512_block_p8 +.align 6 +sha512_block_p8: + stwu 1,-328(1) + mflr 8 + li 10,175 + li 11,191 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,300(1) + li 10,0x10 + stw 26,304(1) + li 26,0x20 + stw 27,308(1) + li 27,0x30 + stw 28,312(1) + li 28,0x40 + stw 29,316(1) + li 29,0x50 + stw 30,320(1) + li 30,0x60 + stw 31,324(1) + li 31,0x70 + stw 8,332(1) + mtspr 256,11 + + bl LPICmeup + addi 11,1,47 + .long 0x7C001E99 + .long 0x7C4A1E99 + .long 0x7C9A1E99 + vsldoi 1,0,0,8 + .long 0x7CDB1E99 + vsldoi 3,2,2,8 + vsldoi 5,4,4,8 + vsldoi 7,6,6,8 + li 0,4 + b Loop +.align 5 +Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + .long 0x10E7E0C0 + lvx 28,10,6 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7D402699 + addi 4,4,16 + vsldoi 9,8,8,8 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,8 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7DC02699 + addi 4,4,16 + vsldoi 13,12,12,8 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,8 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7E402699 + addi 4,4,16 + vsldoi 17,16,16,8 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,8 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7F402699 + addi 4,4,16 + vsldoi 25,24,24,8 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + vsldoi 27,26,26,8 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + mtctr 0 + b L16_xx +.align 5 +L16_xx: + .long 0x13CA06C2 + .long 0x1129F0C0 + .long 0x13DB7EC2 + .long 0x1129F0C0 + .long 0x112990C0 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13CB06C2 + .long 0x114AF0C0 + .long 0x13C87EC2 + .long 0x114AF0C0 + .long 0x114A98C0 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13CC06C2 + .long 0x116BF0C0 + .long 0x13C97EC2 + .long 0x116BF0C0 + .long 0x116BC0C0 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13CD06C2 + .long 0x118CF0C0 + .long 0x13CA7EC2 + .long 0x118CF0C0 + .long 0x118CC8C0 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13CE06C2 + .long 0x11ADF0C0 + .long 0x13CB7EC2 + .long 0x11ADF0C0 + .long 0x11ADD0C0 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13CF06C2 + .long 0x11CEF0C0 + .long 0x13CC7EC2 + .long 0x11CEF0C0 + .long 0x11CED8C0 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D006C2 + .long 0x11EFF0C0 + .long 0x13CD7EC2 + .long 0x11EFF0C0 + .long 0x11EF40C0 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13D106C2 + .long 0x1210F0C0 + .long 0x13CE7EC2 + .long 0x1210F0C0 + .long 0x121048C0 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x13D206C2 + .long 0x1231F0C0 + .long 0x13CF7EC2 + .long 0x1231F0C0 + .long 0x123150C0 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13D306C2 + .long 0x1252F0C0 + .long 0x13D07EC2 + .long 0x1252F0C0 + .long 0x125258C0 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13D806C2 + .long 0x1273F0C0 + .long 0x13D17EC2 + .long 0x1273F0C0 + .long 0x127360C0 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13D906C2 + .long 0x1318F0C0 + .long 0x13D27EC2 + .long 0x1318F0C0 + .long 0x131868C0 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13DA06C2 + .long 0x1339F0C0 + .long 0x13D37EC2 + .long 0x1339F0C0 + .long 0x133970C0 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13DB06C2 + .long 0x135AF0C0 + .long 0x13D87EC2 + .long 0x135AF0C0 + .long 0x135A78C0 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C806C2 + .long 0x137BF0C0 + .long 0x13D97EC2 + .long 0x137BF0C0 + .long 0x137B80C0 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + bc 16,0,L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + .long 0x100050C0 + lvx 12,26,11 + .long 0x102158C0 + lvx 13,27,11 + .long 0x104260C0 + lvx 14,28,11 + .long 0x106368C0 + lvx 15,29,11 + .long 0x108470C0 + lvx 16,30,11 + .long 0x10A578C0 + lvx 17,31,11 + .long 0x10C680C0 + .long 0x10E788C0 + bne Loop + vperm 0,0,1,28 + vperm 2,2,3,28 + vperm 4,4,5,28 + vperm 6,6,7,28 + .long 0x7C001F99 + .long 0x7C4A1F99 + .long 0x7C9A1F99 + .long 0x7CDB1F99 + addi 11,1,175 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + lwz 26,304(1) + lwz 27,308(1) + lwz 28,312(1) + lwz 29,316(1) + lwz 30,320(1) + lwz 31,324(1) + addi 1,1,328 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 + +.align 6 +LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0xd728ae22 +.long 0x428a2f98,0xd728ae22 +.long 0x71374491,0x23ef65cd +.long 0x71374491,0x23ef65cd +.long 0xb5c0fbcf,0xec4d3b2f +.long 0xb5c0fbcf,0xec4d3b2f +.long 0xe9b5dba5,0x8189dbbc +.long 0xe9b5dba5,0x8189dbbc +.long 0x3956c25b,0xf348b538 +.long 0x3956c25b,0xf348b538 +.long 0x59f111f1,0xb605d019 +.long 0x59f111f1,0xb605d019 +.long 0x923f82a4,0xaf194f9b +.long 0x923f82a4,0xaf194f9b +.long 0xab1c5ed5,0xda6d8118 +.long 0xab1c5ed5,0xda6d8118 +.long 0xd807aa98,0xa3030242 +.long 0xd807aa98,0xa3030242 +.long 0x12835b01,0x45706fbe +.long 0x12835b01,0x45706fbe +.long 0x243185be,0x4ee4b28c +.long 0x243185be,0x4ee4b28c +.long 0x550c7dc3,0xd5ffb4e2 +.long 0x550c7dc3,0xd5ffb4e2 +.long 0x72be5d74,0xf27b896f +.long 0x72be5d74,0xf27b896f +.long 0x80deb1fe,0x3b1696b1 +.long 0x80deb1fe,0x3b1696b1 +.long 0x9bdc06a7,0x25c71235 +.long 0x9bdc06a7,0x25c71235 +.long 0xc19bf174,0xcf692694 +.long 0xc19bf174,0xcf692694 +.long 0xe49b69c1,0x9ef14ad2 +.long 0xe49b69c1,0x9ef14ad2 +.long 0xefbe4786,0x384f25e3 +.long 0xefbe4786,0x384f25e3 +.long 0x0fc19dc6,0x8b8cd5b5 +.long 0x0fc19dc6,0x8b8cd5b5 +.long 0x240ca1cc,0x77ac9c65 +.long 0x240ca1cc,0x77ac9c65 +.long 0x2de92c6f,0x592b0275 +.long 0x2de92c6f,0x592b0275 +.long 0x4a7484aa,0x6ea6e483 +.long 0x4a7484aa,0x6ea6e483 +.long 0x5cb0a9dc,0xbd41fbd4 +.long 0x5cb0a9dc,0xbd41fbd4 +.long 0x76f988da,0x831153b5 +.long 0x76f988da,0x831153b5 +.long 0x983e5152,0xee66dfab +.long 0x983e5152,0xee66dfab +.long 0xa831c66d,0x2db43210 +.long 0xa831c66d,0x2db43210 +.long 0xb00327c8,0x98fb213f +.long 0xb00327c8,0x98fb213f +.long 0xbf597fc7,0xbeef0ee4 +.long 0xbf597fc7,0xbeef0ee4 +.long 0xc6e00bf3,0x3da88fc2 +.long 0xc6e00bf3,0x3da88fc2 +.long 0xd5a79147,0x930aa725 +.long 0xd5a79147,0x930aa725 +.long 0x06ca6351,0xe003826f +.long 0x06ca6351,0xe003826f +.long 0x14292967,0x0a0e6e70 +.long 0x14292967,0x0a0e6e70 +.long 0x27b70a85,0x46d22ffc +.long 0x27b70a85,0x46d22ffc +.long 0x2e1b2138,0x5c26c926 +.long 0x2e1b2138,0x5c26c926 +.long 0x4d2c6dfc,0x5ac42aed +.long 0x4d2c6dfc,0x5ac42aed +.long 0x53380d13,0x9d95b3df +.long 0x53380d13,0x9d95b3df +.long 0x650a7354,0x8baf63de +.long 0x650a7354,0x8baf63de +.long 0x766a0abb,0x3c77b2a8 +.long 0x766a0abb,0x3c77b2a8 +.long 0x81c2c92e,0x47edaee6 +.long 0x81c2c92e,0x47edaee6 +.long 0x92722c85,0x1482353b +.long 0x92722c85,0x1482353b +.long 0xa2bfe8a1,0x4cf10364 +.long 0xa2bfe8a1,0x4cf10364 +.long 0xa81a664b,0xbc423001 +.long 0xa81a664b,0xbc423001 +.long 0xc24b8b70,0xd0f89791 +.long 0xc24b8b70,0xd0f89791 +.long 0xc76c51a3,0x0654be30 +.long 0xc76c51a3,0x0654be30 +.long 0xd192e819,0xd6ef5218 +.long 0xd192e819,0xd6ef5218 +.long 0xd6990624,0x5565a910 +.long 0xd6990624,0x5565a910 +.long 0xf40e3585,0x5771202a +.long 0xf40e3585,0x5771202a +.long 0x106aa070,0x32bbd1b8 +.long 0x106aa070,0x32bbd1b8 +.long 0x19a4c116,0xb8d2d0c8 +.long 0x19a4c116,0xb8d2d0c8 +.long 0x1e376c08,0x5141ab53 +.long 0x1e376c08,0x5141ab53 +.long 0x2748774c,0xdf8eeb99 +.long 0x2748774c,0xdf8eeb99 +.long 0x34b0bcb5,0xe19b48a8 +.long 0x34b0bcb5,0xe19b48a8 +.long 0x391c0cb3,0xc5c95a63 +.long 0x391c0cb3,0xc5c95a63 +.long 0x4ed8aa4a,0xe3418acb +.long 0x4ed8aa4a,0xe3418acb +.long 0x5b9cca4f,0x7763e373 +.long 0x5b9cca4f,0x7763e373 +.long 0x682e6ff3,0xd6b2b8a3 +.long 0x682e6ff3,0xd6b2b8a3 +.long 0x748f82ee,0x5defb2fc +.long 0x748f82ee,0x5defb2fc +.long 0x78a5636f,0x43172f60 +.long 0x78a5636f,0x43172f60 +.long 0x84c87814,0xa1f0ab72 +.long 0x84c87814,0xa1f0ab72 +.long 0x8cc70208,0x1a6439ec +.long 0x8cc70208,0x1a6439ec +.long 0x90befffa,0x23631e28 +.long 0x90befffa,0x23631e28 +.long 0xa4506ceb,0xde82bde9 +.long 0xa4506ceb,0xde82bde9 +.long 0xbef9a3f7,0xb2c67915 +.long 0xbef9a3f7,0xb2c67915 +.long 0xc67178f2,0xe372532b +.long 0xc67178f2,0xe372532b +.long 0xca273ece,0xea26619c +.long 0xca273ece,0xea26619c +.long 0xd186b8c7,0x21c0c207 +.long 0xd186b8c7,0x21c0c207 +.long 0xeada7dd6,0xcde0eb1e +.long 0xeada7dd6,0xcde0eb1e +.long 0xf57d4f7f,0xee6ed178 +.long 0xf57d4f7f,0xee6ed178 +.long 0x06f067aa,0x72176fba +.long 0x06f067aa,0x72176fba +.long 0x0a637dc5,0xa2c898a6 +.long 0x0a637dc5,0xa2c898a6 +.long 0x113f9804,0xbef90dae +.long 0x113f9804,0xbef90dae +.long 0x1b710b35,0x131c471b +.long 0x1b710b35,0x131c471b +.long 0x28db77f5,0x23047d84 +.long 0x28db77f5,0x23047d84 +.long 0x32caab7b,0x40c72493 +.long 0x32caab7b,0x40c72493 +.long 0x3c9ebe0a,0x15c9bebc +.long 0x3c9ebe0a,0x15c9bebc +.long 0x431d67c4,0x9c100d4c +.long 0x431d67c4,0x9c100d4c +.long 0x4cc5d4be,0xcb3e42b6 +.long 0x4cc5d4be,0xcb3e42b6 +.long 0x597f299c,0xfc657e2a +.long 0x597f299c,0xfc657e2a +.long 0x5fcb6fab,0x3ad6faec +.long 0x5fcb6fab,0x3ad6faec +.long 0x6c44198c,0x4a475817 +.long 0x6c44198c,0x4a475817 +.long 0,0 +.long 0,0 +.long 0x00010203,0x04050607 +.long 0x10111213,0x14151617 +.byte 83,72,65,53,49,50,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 Index: cipher/sha512-ppc8be.S =================================================================== --- /dev/null +++ cipher/sha512-ppc8be.S @@ -0,0 +1,831 @@ +.machine "any" +.text + +.globl sha512_block_p8 +.align 6 +sha512_block_p8: + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + mtspr 256,11 + + bl LPICmeup + addi 11,1,79 + .long 0x7C001E99 + .long 0x7C4A1E99 + .long 0x7C9A1E99 + vsldoi 1,0,0,8 + .long 0x7CDB1E99 + vsldoi 3,2,2,8 + vsldoi 5,4,4,8 + vsldoi 7,6,6,8 + li 0,4 + b Loop +.align 5 +Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + .long 0x10E7E0C0 + lvx 28,10,6 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7D402699 + addi 4,4,16 + vsldoi 9,8,8,8 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,8 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7DC02699 + addi 4,4,16 + vsldoi 13,12,12,8 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,8 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7E402699 + addi 4,4,16 + vsldoi 17,16,16,8 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,8 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7F402699 + addi 4,4,16 + vsldoi 25,24,24,8 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + vsldoi 27,26,26,8 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + mtctr 0 + b L16_xx +.align 5 +L16_xx: + .long 0x13CA06C2 + .long 0x1129F0C0 + .long 0x13DB7EC2 + .long 0x1129F0C0 + .long 0x112990C0 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13CB06C2 + .long 0x114AF0C0 + .long 0x13C87EC2 + .long 0x114AF0C0 + .long 0x114A98C0 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13CC06C2 + .long 0x116BF0C0 + .long 0x13C97EC2 + .long 0x116BF0C0 + .long 0x116BC0C0 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13CD06C2 + .long 0x118CF0C0 + .long 0x13CA7EC2 + .long 0x118CF0C0 + .long 0x118CC8C0 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13CE06C2 + .long 0x11ADF0C0 + .long 0x13CB7EC2 + .long 0x11ADF0C0 + .long 0x11ADD0C0 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13CF06C2 + .long 0x11CEF0C0 + .long 0x13CC7EC2 + .long 0x11CEF0C0 + .long 0x11CED8C0 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D006C2 + .long 0x11EFF0C0 + .long 0x13CD7EC2 + .long 0x11EFF0C0 + .long 0x11EF40C0 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13D106C2 + .long 0x1210F0C0 + .long 0x13CE7EC2 + .long 0x1210F0C0 + .long 0x121048C0 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x13D206C2 + .long 0x1231F0C0 + .long 0x13CF7EC2 + .long 0x1231F0C0 + .long 0x123150C0 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13D306C2 + .long 0x1252F0C0 + .long 0x13D07EC2 + .long 0x1252F0C0 + .long 0x125258C0 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13D806C2 + .long 0x1273F0C0 + .long 0x13D17EC2 + .long 0x1273F0C0 + .long 0x127360C0 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13D906C2 + .long 0x1318F0C0 + .long 0x13D27EC2 + .long 0x1318F0C0 + .long 0x131868C0 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13DA06C2 + .long 0x1339F0C0 + .long 0x13D37EC2 + .long 0x1339F0C0 + .long 0x133970C0 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13DB06C2 + .long 0x135AF0C0 + .long 0x13D87EC2 + .long 0x135AF0C0 + .long 0x135A78C0 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C806C2 + .long 0x137BF0C0 + .long 0x13D97EC2 + .long 0x137BF0C0 + .long 0x137B80C0 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + bc 16,0,L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + .long 0x100050C0 + lvx 12,26,11 + .long 0x102158C0 + lvx 13,27,11 + .long 0x104260C0 + lvx 14,28,11 + .long 0x106368C0 + lvx 15,29,11 + .long 0x108470C0 + lvx 16,30,11 + .long 0x10A578C0 + lvx 17,31,11 + .long 0x10C680C0 + .long 0x10E788C0 + bne Loop + vperm 0,0,1,28 + vperm 2,2,3,28 + vperm 4,4,5,28 + vperm 6,6,7,28 + .long 0x7C001F99 + .long 0x7C4A1F99 + .long 0x7C9A1F99 + .long 0x7CDB1F99 + addi 11,1,207 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 + +.align 6 +LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0xd728ae22 +.long 0x428a2f98,0xd728ae22 +.long 0x71374491,0x23ef65cd +.long 0x71374491,0x23ef65cd +.long 0xb5c0fbcf,0xec4d3b2f +.long 0xb5c0fbcf,0xec4d3b2f +.long 0xe9b5dba5,0x8189dbbc +.long 0xe9b5dba5,0x8189dbbc +.long 0x3956c25b,0xf348b538 +.long 0x3956c25b,0xf348b538 +.long 0x59f111f1,0xb605d019 +.long 0x59f111f1,0xb605d019 +.long 0x923f82a4,0xaf194f9b +.long 0x923f82a4,0xaf194f9b +.long 0xab1c5ed5,0xda6d8118 +.long 0xab1c5ed5,0xda6d8118 +.long 0xd807aa98,0xa3030242 +.long 0xd807aa98,0xa3030242 +.long 0x12835b01,0x45706fbe +.long 0x12835b01,0x45706fbe +.long 0x243185be,0x4ee4b28c +.long 0x243185be,0x4ee4b28c +.long 0x550c7dc3,0xd5ffb4e2 +.long 0x550c7dc3,0xd5ffb4e2 +.long 0x72be5d74,0xf27b896f +.long 0x72be5d74,0xf27b896f +.long 0x80deb1fe,0x3b1696b1 +.long 0x80deb1fe,0x3b1696b1 +.long 0x9bdc06a7,0x25c71235 +.long 0x9bdc06a7,0x25c71235 +.long 0xc19bf174,0xcf692694 +.long 0xc19bf174,0xcf692694 +.long 0xe49b69c1,0x9ef14ad2 +.long 0xe49b69c1,0x9ef14ad2 +.long 0xefbe4786,0x384f25e3 +.long 0xefbe4786,0x384f25e3 +.long 0x0fc19dc6,0x8b8cd5b5 +.long 0x0fc19dc6,0x8b8cd5b5 +.long 0x240ca1cc,0x77ac9c65 +.long 0x240ca1cc,0x77ac9c65 +.long 0x2de92c6f,0x592b0275 +.long 0x2de92c6f,0x592b0275 +.long 0x4a7484aa,0x6ea6e483 +.long 0x4a7484aa,0x6ea6e483 +.long 0x5cb0a9dc,0xbd41fbd4 +.long 0x5cb0a9dc,0xbd41fbd4 +.long 0x76f988da,0x831153b5 +.long 0x76f988da,0x831153b5 +.long 0x983e5152,0xee66dfab +.long 0x983e5152,0xee66dfab +.long 0xa831c66d,0x2db43210 +.long 0xa831c66d,0x2db43210 +.long 0xb00327c8,0x98fb213f +.long 0xb00327c8,0x98fb213f +.long 0xbf597fc7,0xbeef0ee4 +.long 0xbf597fc7,0xbeef0ee4 +.long 0xc6e00bf3,0x3da88fc2 +.long 0xc6e00bf3,0x3da88fc2 +.long 0xd5a79147,0x930aa725 +.long 0xd5a79147,0x930aa725 +.long 0x06ca6351,0xe003826f +.long 0x06ca6351,0xe003826f +.long 0x14292967,0x0a0e6e70 +.long 0x14292967,0x0a0e6e70 +.long 0x27b70a85,0x46d22ffc +.long 0x27b70a85,0x46d22ffc +.long 0x2e1b2138,0x5c26c926 +.long 0x2e1b2138,0x5c26c926 +.long 0x4d2c6dfc,0x5ac42aed +.long 0x4d2c6dfc,0x5ac42aed +.long 0x53380d13,0x9d95b3df +.long 0x53380d13,0x9d95b3df +.long 0x650a7354,0x8baf63de +.long 0x650a7354,0x8baf63de +.long 0x766a0abb,0x3c77b2a8 +.long 0x766a0abb,0x3c77b2a8 +.long 0x81c2c92e,0x47edaee6 +.long 0x81c2c92e,0x47edaee6 +.long 0x92722c85,0x1482353b +.long 0x92722c85,0x1482353b +.long 0xa2bfe8a1,0x4cf10364 +.long 0xa2bfe8a1,0x4cf10364 +.long 0xa81a664b,0xbc423001 +.long 0xa81a664b,0xbc423001 +.long 0xc24b8b70,0xd0f89791 +.long 0xc24b8b70,0xd0f89791 +.long 0xc76c51a3,0x0654be30 +.long 0xc76c51a3,0x0654be30 +.long 0xd192e819,0xd6ef5218 +.long 0xd192e819,0xd6ef5218 +.long 0xd6990624,0x5565a910 +.long 0xd6990624,0x5565a910 +.long 0xf40e3585,0x5771202a +.long 0xf40e3585,0x5771202a +.long 0x106aa070,0x32bbd1b8 +.long 0x106aa070,0x32bbd1b8 +.long 0x19a4c116,0xb8d2d0c8 +.long 0x19a4c116,0xb8d2d0c8 +.long 0x1e376c08,0x5141ab53 +.long 0x1e376c08,0x5141ab53 +.long 0x2748774c,0xdf8eeb99 +.long 0x2748774c,0xdf8eeb99 +.long 0x34b0bcb5,0xe19b48a8 +.long 0x34b0bcb5,0xe19b48a8 +.long 0x391c0cb3,0xc5c95a63 +.long 0x391c0cb3,0xc5c95a63 +.long 0x4ed8aa4a,0xe3418acb +.long 0x4ed8aa4a,0xe3418acb +.long 0x5b9cca4f,0x7763e373 +.long 0x5b9cca4f,0x7763e373 +.long 0x682e6ff3,0xd6b2b8a3 +.long 0x682e6ff3,0xd6b2b8a3 +.long 0x748f82ee,0x5defb2fc +.long 0x748f82ee,0x5defb2fc +.long 0x78a5636f,0x43172f60 +.long 0x78a5636f,0x43172f60 +.long 0x84c87814,0xa1f0ab72 +.long 0x84c87814,0xa1f0ab72 +.long 0x8cc70208,0x1a6439ec +.long 0x8cc70208,0x1a6439ec +.long 0x90befffa,0x23631e28 +.long 0x90befffa,0x23631e28 +.long 0xa4506ceb,0xde82bde9 +.long 0xa4506ceb,0xde82bde9 +.long 0xbef9a3f7,0xb2c67915 +.long 0xbef9a3f7,0xb2c67915 +.long 0xc67178f2,0xe372532b +.long 0xc67178f2,0xe372532b +.long 0xca273ece,0xea26619c +.long 0xca273ece,0xea26619c +.long 0xd186b8c7,0x21c0c207 +.long 0xd186b8c7,0x21c0c207 +.long 0xeada7dd6,0xcde0eb1e +.long 0xeada7dd6,0xcde0eb1e +.long 0xf57d4f7f,0xee6ed178 +.long 0xf57d4f7f,0xee6ed178 +.long 0x06f067aa,0x72176fba +.long 0x06f067aa,0x72176fba +.long 0x0a637dc5,0xa2c898a6 +.long 0x0a637dc5,0xa2c898a6 +.long 0x113f9804,0xbef90dae +.long 0x113f9804,0xbef90dae +.long 0x1b710b35,0x131c471b +.long 0x1b710b35,0x131c471b +.long 0x28db77f5,0x23047d84 +.long 0x28db77f5,0x23047d84 +.long 0x32caab7b,0x40c72493 +.long 0x32caab7b,0x40c72493 +.long 0x3c9ebe0a,0x15c9bebc +.long 0x3c9ebe0a,0x15c9bebc +.long 0x431d67c4,0x9c100d4c +.long 0x431d67c4,0x9c100d4c +.long 0x4cc5d4be,0xcb3e42b6 +.long 0x4cc5d4be,0xcb3e42b6 +.long 0x597f299c,0xfc657e2a +.long 0x597f299c,0xfc657e2a +.long 0x5fcb6fab,0x3ad6faec +.long 0x5fcb6fab,0x3ad6faec +.long 0x6c44198c,0x4a475817 +.long 0x6c44198c,0x4a475817 +.long 0,0 +.long 0,0 +.long 0x00010203,0x04050607 +.long 0x10111213,0x14151617 +.byte 83,72,65,53,49,50,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 Index: cipher/sha512p8-ppc.pl =================================================================== --- /dev/null +++ cipher/sha512p8-ppc.pl @@ -0,0 +1,414 @@ +#! /usr/bin/env perl +# SPDX-License-Identifier: BSD-3-Clause + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA256/512 for PowerISA v2.07. +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This module is +# ~60% faster than integer-only sha512-ppc.pl. To anchor to something +# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than +# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than +# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting +# result is degree of computational resources' utilization. POWER8 is +# "massively multi-threaded chip" and difference between single- and +# maximum multi-process benchmark results tells that utilization is +# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and +# for sha1-ppc.pl - 73%. 100% means that multi-process result equals +# to single-process one, given that all threads end up on the same +# physical core. +# +###################################################################### +# Believed-to-be-accurate results in cycles per processed byte [on +# little-endian system]. Numbers in square brackets are for 64-bit +# build of sha512-ppc.pl, presented for reference. +# +# POWER8 POWER9 +# SHA256 9.7 [15.8] 11.2 [12.5] +# SHA512 6.1 [10.3] 7.0 [7.9] + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$LENDIAN=($flavour=~/le/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +if ($output =~ /512/) { + $bits=512; + $SZ=8; + $sz="d"; + $rounds=80; +} else { + $bits=256; + $SZ=4; + $sz="w"; + $rounds=64; +} + +$func="sha${bits}_block_p8"; +$LOCALS=8*$SIZE_T+8*16; +$FRAME=$LOCALS+9*16+6*$SIZE_T; + +$sp ="r1"; +$toc="r2"; +$ctx="r3"; +$inp="r4"; +$num="r5"; +$Tbl="r6"; +$idx="r7"; +$lrsave="r8"; +$offload="r11"; +$vrsave="r12"; +@I = ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70) = (0,map("r$_",(10,26..31))); + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); +@X=map("v$_",(8..19,24..27)); +($Ki,$Func,$Sigma,$lemask)=map("v$_",(28..31)); + +sub ROUND { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)%16; +my $k=($i+2)%8; + +$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); + lvx_u @X[$i+1],0,$inp ; load X[i] in advance + addi $inp,$inp,16 +___ +$code.=<<___ if ($i<16 && ($i%(16/$SZ))); + vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ +___ +$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); + vperm @X[$i],@X[$i],@X[$i],$lemask +___ +$code.=<<___ if ($i>=15); + vshasigma${sz} $Sigma,@X[($j+1)%16],0,0 + vaddu${sz}m @X[$j],@X[$j],$Sigma + vshasigma${sz} $Sigma,@X[($j+14)%16],0,15 + vaddu${sz}m @X[$j],@X[$j],$Sigma + vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16] +___ +$code.=<<___; + vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] + vsel $Func,$g,$f,$e ; Ch(e,f,g) + vaddu${sz}m $g,$g,$Ki ; future h+=K[i] + vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) + vshasigma${sz} $Sigma,$e,1,15 ; Sigma1(e) + vaddu${sz}m $h,$h,$Sigma ; h+=Sigma1(e) + vxor $Func,$a,$b + vsel $Func,$b,$c,$Func ; Maj(a,b,c) + vaddu${sz}m $d,$d,$h ; d+=h + vshasigma${sz} $Sigma,$a,1,0 ; Sigma0(a) + vaddu${sz}m $Sigma,$Sigma,$Func ; Sigma0(a)+Maj(a,b,c) + vaddu${sz}m $h,$h,$Sigma ; h+=Sigma0(a)+Maj(a,b,c) + lvx $Ki,@I[$k],$idx ; load next K[i] +___ +$code.=<<___ if ($k == 7); + addi $idx,$idx,0x80 +___ +} + +$code=<<___; +.machine "any" +.text + +.globl $func +.align 6 +$func: + $STU $sp,-$FRAME($sp) + mflr $lrsave + li r10,`$LOCALS+15` + li r11,`$LOCALS+31` + stvx v24,r10,$sp # ABI says so + addi r10,r10,32 + mfspr $vrsave,256 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r11,-4096+255 # 0xfffff0ff + stw $vrsave,`$FRAME-6*$SIZE_T-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME-6*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME-5*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME-4*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME-3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME-2*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME-1*$SIZE_T`($sp) + li $x70,0x70 + $PUSH $lrsave,`$FRAME+$LRSAVE`($sp) + mtspr 256,r11 + + bl LPICmeup + addi $offload,$sp,`8*$SIZE_T+15` +___ +$code.=<<___ if ($LENDIAN); + li $idx,8 + lvsl $lemask,0,$idx + vspltisb $Ki,0x0f + vxor $lemask,$lemask,$Ki +___ +$code.=<<___ if ($SZ==4); + lvx_4w $A,$x00,$ctx + lvx_4w $E,$x10,$ctx + vsldoi $B,$A,$A,4 # unpack + vsldoi $C,$A,$A,8 + vsldoi $D,$A,$A,12 + vsldoi $F,$E,$E,4 + vsldoi $G,$E,$E,8 + vsldoi $H,$E,$E,12 +___ +$code.=<<___ if ($SZ==8); + lvx_u $A,$x00,$ctx + lvx_u $C,$x10,$ctx + lvx_u $E,$x20,$ctx + vsldoi $B,$A,$A,8 # unpack + lvx_u $G,$x30,$ctx + vsldoi $D,$C,$C,8 + vsldoi $F,$E,$E,8 + vsldoi $H,$G,$G,8 +___ +$code.=<<___; + li r0,`($rounds-16)/16` # inner loop counter + b Loop +.align 5 +Loop: + lvx $Ki,$x00,$Tbl + lvx_u @X[0],0,$inp + addi $inp,$inp,16 + mr $idx,$Tbl # copy $Tbl + stvx $A,$x00,$offload # offload $A-$H + stvx $B,$x10,$offload + stvx $C,$x20,$offload + stvx $D,$x30,$offload + stvx $E,$x40,$offload + stvx $F,$x50,$offload + stvx $G,$x60,$offload + stvx $H,$x70,$offload + vaddu${sz}m $H,$H,$Ki # h+K[i] + lvx $Ki,$x10,$Tbl +___ +for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mtctr r0 + b L16_xx +.align 5 +L16_xx: +___ +for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bdnz L16_xx + + lvx @X[2],$x00,$offload + subic. $num,$num,1 + lvx @X[3],$x10,$offload + vaddu${sz}m $A,$A,@X[2] + lvx @X[4],$x20,$offload + vaddu${sz}m $B,$B,@X[3] + lvx @X[5],$x30,$offload + vaddu${sz}m $C,$C,@X[4] + lvx @X[6],$x40,$offload + vaddu${sz}m $D,$D,@X[5] + lvx @X[7],$x50,$offload + vaddu${sz}m $E,$E,@X[6] + lvx @X[8],$x60,$offload + vaddu${sz}m $F,$F,@X[7] + lvx @X[9],$x70,$offload + vaddu${sz}m $G,$G,@X[8] + vaddu${sz}m $H,$H,@X[9] + bne Loop +___ +$code.=<<___ if ($SZ==4); + lvx @X[0],$x20,$idx + vperm $A,$A,$B,$Ki # pack the answer + lvx @X[1],$x30,$idx + vperm $E,$E,$F,$Ki + vperm $A,$A,$C,@X[0] + vperm $E,$E,$G,@X[0] + vperm $A,$A,$D,@X[1] + vperm $E,$E,$H,@X[1] + stvx_4w $A,$x00,$ctx + stvx_4w $E,$x10,$ctx +___ +$code.=<<___ if ($SZ==8); + vperm $A,$A,$B,$Ki # pack the answer + vperm $C,$C,$D,$Ki + vperm $E,$E,$F,$Ki + vperm $G,$G,$H,$Ki + stvx_u $A,$x00,$ctx + stvx_u $C,$x10,$ctx + stvx_u $E,$x20,$ctx + stvx_u $G,$x30,$ctx +___ +$code.=<<___; + addi $offload,$sp,`$LOCALS+15` + mtlr $lrsave + mtspr 256,$vrsave + lvx v24,$x00,$offload # ABI says so + lvx v25,$x10,$offload + lvx v26,$x20,$offload + lvx v27,$x30,$offload + lvx v28,$x40,$offload + lvx v29,$x50,$offload + lvx v30,$x60,$offload + lvx v31,$x70,$offload + $POP r26,`$FRAME-6*$SIZE_T`($sp) + $POP r27,`$FRAME-5*$SIZE_T`($sp) + $POP r28,`$FRAME-4*$SIZE_T`($sp) + $POP r29,`$FRAME-3*$SIZE_T`($sp) + $POP r30,`$FRAME-2*$SIZE_T`($sp) + $POP r31,`$FRAME-1*$SIZE_T`($sp) + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,6,3,0 + .long 0 +.size $func,.-$func +___ + +# Ugly hack here, because PPC assembler syntax seem to vary too +# much from platforms to platform... +$code.=<<___; +.align 6 +LPICmeup: + mflr r0 + bcl 20,31,\$+4 + mflr $Tbl ; vvvvvv "distance" between . and 1st data entry + addi $Tbl,$Tbl,`64-8` + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `64-9*4` +___ + +if ($SZ==8) { + local *table = sub { + foreach(@_) { $code.=".quad $_,$_\n"; } + }; + table( + "0x428a2f98d728ae22","0x7137449123ef65cd", + "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", + "0x3956c25bf348b538","0x59f111f1b605d019", + "0x923f82a4af194f9b","0xab1c5ed5da6d8118", + "0xd807aa98a3030242","0x12835b0145706fbe", + "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", + "0x72be5d74f27b896f","0x80deb1fe3b1696b1", + "0x9bdc06a725c71235","0xc19bf174cf692694", + "0xe49b69c19ef14ad2","0xefbe4786384f25e3", + "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", + "0x2de92c6f592b0275","0x4a7484aa6ea6e483", + "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", + "0x983e5152ee66dfab","0xa831c66d2db43210", + "0xb00327c898fb213f","0xbf597fc7beef0ee4", + "0xc6e00bf33da88fc2","0xd5a79147930aa725", + "0x06ca6351e003826f","0x142929670a0e6e70", + "0x27b70a8546d22ffc","0x2e1b21385c26c926", + "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", + "0x650a73548baf63de","0x766a0abb3c77b2a8", + "0x81c2c92e47edaee6","0x92722c851482353b", + "0xa2bfe8a14cf10364","0xa81a664bbc423001", + "0xc24b8b70d0f89791","0xc76c51a30654be30", + "0xd192e819d6ef5218","0xd69906245565a910", + "0xf40e35855771202a","0x106aa07032bbd1b8", + "0x19a4c116b8d2d0c8","0x1e376c085141ab53", + "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", + "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", + "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", + "0x748f82ee5defb2fc","0x78a5636f43172f60", + "0x84c87814a1f0ab72","0x8cc702081a6439ec", + "0x90befffa23631e28","0xa4506cebde82bde9", + "0xbef9a3f7b2c67915","0xc67178f2e372532b", + "0xca273eceea26619c","0xd186b8c721c0c207", + "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", + "0x06f067aa72176fba","0x0a637dc5a2c898a6", + "0x113f9804bef90dae","0x1b710b35131c471b", + "0x28db77f523047d84","0x32caab7b40c72493", + "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", + "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", + "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); +$code.=<<___ if (!$LENDIAN); +.quad 0x0001020304050607,0x1011121314151617 +___ +$code.=<<___ if ($LENDIAN); # quad-swapped +.quad 0x1011121314151617,0x0001020304050607 +___ +} else { + local *table = sub { + foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } + }; + table( + "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", + "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", + "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", + "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", + "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", + "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", + "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", + "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", + "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", + "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", + "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", + "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", + "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", + "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", + "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", + "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); +$code.=<<___ if (!$LENDIAN); +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 +___ +$code.=<<___ if ($LENDIAN); # word-swapped +.long 0x10111213,0x10111213,0x10111213,0x00010203 +.long 0x10111213,0x10111213,0x04050607,0x00010203 +.long 0x10111213,0x08090a0b,0x04050607,0x00010203 +___ +} +$code.=<<___; +.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT;