Page Menu
Home
GnuPG
Search
Configure Global Search
Log In
Files
F18826637
salsa20-amd64.S
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Size
19 KB
Subscribers
None
salsa20-amd64.S
View Options
/*
salsa20
-
amd64.S
-
AMD64
implementation
of
Salsa20
*
*
Copyright
(
C
)
2013
Jussi
Kivilinna
<
jussi.kivilinna
@
iki.fi
>
*
*
This
file
is
part
of
Libgcrypt.
*
*
Libgcrypt
is
free
software
;
you
can
redistribute
it
and
/
or
modify
*
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
*
published
by
the
Free
Software
Foundation
;
either
version
2.1
of
*
the
License
,
or
(
at
your
option
)
any
later
version.
*
*
Libgcrypt
is
distributed
in
the
hope
that
it
will
be
useful
,
*
but
WITHOUT
ANY
WARRANTY
;
without
even
the
implied
warranty
of
*
MERCHANTABILITY
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
*
GNU
Lesser
General
Public
License
for
more
details.
*
*
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
*
License
along
with
this
program
;
if
not
,
see
<
http
://
www.gnu.org
/
licenses
/>
.
*/
/*
*
Based
on
public
domain
implementation
by
D.
J.
Bernstein
at
*
http
://
cr.yp.to
/
snuffle.html
*/
#ifdef __x86_64
#include <config.h>
#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined
(
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
))
&&
defined
(
USE_SALSA20
)
#include "asm-common-amd64.h"
.text
.align
16
.globl
_
gcry_salsa20_amd64_keysetup
ELF
(
.type
_
gcry_salsa20_amd64_keysetup
,
@
function
;)
_
gcry_salsa20_amd64_keysetup
:
CFI_STARTPROC
();
movl
0
(
%rsi),%
r8d
movl
4
(
%rsi),%
r9d
movl
8
(
%rsi),%
eax
movl
12
(
%rsi),%
r10d
movl
%r8d,20(%
rdi
)
movl
%r9d,40(%
rdi
)
movl
%eax,60(%
rdi
)
movl
%r10d,48(%
rdi
)
cmp
$
256
,
%rdx
jb .L_kbits128
.L_kbits256:
movl 16(%
rsi
),
%edx
movl 20(%
rsi
),
%ecx
movl 24(%
rsi
),
%r8d
movl 28(%
rsi
),
%esi
movl %
edx
,
28
(
%rdi)
movl %
ecx
,
16
(
%rdi)
movl %
r8d
,
36
(
%rdi)
movl %
esi
,
56
(
%rdi)
mov $1634760805,%
rsi
mov
$
857760878
,
%rdx
mov $2036477234,%
rcx
mov
$
1797285236
,
%r8
movl %
esi
,
0
(
%rdi)
movl %
edx
,
4
(
%rdi)
movl %
ecx
,
8
(
%rdi)
movl %
r8d
,
12
(
%rdi)
jmp .L_keysetupdone
.L_kbits128:
movl 0(%
rsi
),
%edx
movl 4(%
rsi
),
%ecx
movl 8(%
rsi
),
%r8d
movl 12(%
rsi
),
%esi
movl %
edx
,
28
(
%rdi)
movl %
ecx
,
16
(
%rdi)
movl %
r8d
,
36
(
%rdi)
movl %
esi
,
56
(
%rdi)
mov $1634760805,%
rsi
mov
$
824206446
,
%rdx
mov $2036477238,%
rcx
mov
$
1797285236
,
%r8
movl %
esi
,
0
(
%rdi)
movl %
edx
,
4
(
%rdi)
movl %
ecx
,
8
(
%rdi)
movl %
r8d
,
12
(
%rdi)
.L_keysetupdone:
ret_spec_stop
CFI_ENDPROC();
.align 16
.globl _gcry_salsa20_amd64_ivsetup
ELF(.type _gcry_salsa20_amd64_ivsetup,@function;)
_gcry_salsa20_amd64_ivsetup:
CFI_STARTPROC();
movl 0(%
rsi
),
%r8d
movl 4(%
rsi
),
%esi
mov $0,%
r9
mov
$
0
,
%rax
movl %
r8d
,
24
(
%rdi)
movl %
esi
,
44
(
%rdi)
movl %
r9d
,
32
(
%rdi)
movl %
eax
,
52
(
%rdi)
ret_spec_stop
CFI_ENDPROC();
.align 16
.globl _gcry_salsa20_amd64_encrypt_blocks
ELF(.type _gcry_salsa20_amd64_encrypt_blocks,@function;)
_gcry_salsa20_amd64_encrypt_blocks:
/*
* Modifications to original implementation:
* - Number of rounds passing in register %
r8
(
for
Salsa20
/
12
)
.
*
-
Length
is
input
as
number
of
blocks
,
so
don
'
t handle tail bytes
*
(
this
is
done
in
salsa20.c
)
.
*/
CFI_STARTPROC
();
push
%rbx
CFI_PUSH(%
rbx
);
shlq
$
6
,
%rcx /* blocks to bytes */
mov %
r8
,
%rbx
mov %
rsp
,
%r11
CFI_DEF_CFA_REGISTER(%
r11
);
sub
$
384
,
%rsp
and $~31,%
rsp
mov
%rdi,%
r8
mov
%rsi,%
rsi
mov
%rdx,%
rdi
mov
%rcx,%
rdx
cmp
$
0
,
%rdx
jbe .L_done
.L_start:
cmp $256,%
rdx
jb
.L_bytes_are_64_128_or_192
movdqa
0
(
%r8),%
xmm0
pshufd
$
0x55
,
%xmm0,%
xmm1
pshufd
$
0xaa
,
%xmm0,%
xmm2
pshufd
$
0xff
,
%xmm0,%
xmm3
pshufd
$
0x00
,
%xmm0,%
xmm0
movdqa
%xmm1,0(%
rsp
)
movdqa
%xmm2,16(%
rsp
)
movdqa
%xmm3,32(%
rsp
)
movdqa
%xmm0,48(%
rsp
)
movdqa
16
(
%r8),%
xmm0
pshufd
$
0xaa
,
%xmm0,%
xmm1
pshufd
$
0xff
,
%xmm0,%
xmm2
pshufd
$
0x00
,
%xmm0,%
xmm3
pshufd
$
0x55
,
%xmm0,%
xmm0
movdqa
%xmm1,64(%
rsp
)
movdqa
%xmm2,80(%
rsp
)
movdqa
%xmm3,96(%
rsp
)
movdqa
%xmm0,112(%
rsp
)
movdqa
32
(
%r8),%
xmm0
pshufd
$
0xff
,
%xmm0,%
xmm1
pshufd
$
0x55
,
%xmm0,%
xmm2
pshufd
$
0xaa
,
%xmm0,%
xmm0
movdqa
%xmm1,128(%
rsp
)
movdqa
%xmm2,144(%
rsp
)
movdqa
%xmm0,160(%
rsp
)
movdqa
48
(
%r8),%
xmm0
pshufd
$
0x00
,
%xmm0,%
xmm1
pshufd
$
0xaa
,
%xmm0,%
xmm2
pshufd
$
0xff
,
%xmm0,%
xmm0
movdqa
%xmm1,176(%
rsp
)
movdqa
%xmm2,192(%
rsp
)
movdqa
%xmm0,208(%
rsp
)
.L_bytesatleast256
:
movl
32
(
%r8),%
ecx
movl
52
(
%r8),%
r9d
movl
%ecx,224(%
rsp
)
movl
%r9d,240(%
rsp
)
add
$
1
,
%ecx
adc $0,%
r9d
movl
%ecx,4+224(%
rsp
)
movl
%r9d,4+240(%
rsp
)
add
$
1
,
%ecx
adc $0,%
r9d
movl
%ecx,8+224(%
rsp
)
movl
%r9d,8+240(%
rsp
)
add
$
1
,
%ecx
adc $0,%
r9d
movl
%ecx,12+224(%
rsp
)
movl
%r9d,12+240(%
rsp
)
add
$
1
,
%ecx
adc $0,%
r9d
movl
%ecx,32(%
r8
)
movl
%r9d,52(%
r8
)
movq
%rdx,288(%
rsp
)
mov
%rbx,%
rdx
movdqa
0
(
%rsp),%
xmm0
movdqa
16
(
%rsp),%
xmm1
movdqa
32
(
%rsp),%
xmm2
movdqa
192
(
%rsp),%
xmm3
movdqa
208
(
%rsp),%
xmm4
movdqa
64
(
%rsp),%
xmm5
movdqa
80
(
%rsp),%
xmm6
movdqa
112
(
%rsp),%
xmm7
movdqa
128
(
%rsp),%
xmm8
movdqa
144
(
%rsp),%
xmm9
movdqa
160
(
%rsp),%
xmm10
movdqa
240
(
%rsp),%
xmm11
movdqa
48
(
%rsp),%
xmm12
movdqa
96
(
%rsp),%
xmm13
movdqa
176
(
%rsp),%
xmm14
movdqa
224
(
%rsp),%
xmm15
.L_mainloop1
:
movdqa
%xmm1,256(%
rsp
)
movdqa
%xmm2,272(%
rsp
)
movdqa
%xmm13,%
xmm1
paddd
%xmm12,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
7
,
%xmm1
pxor %
xmm1
,
%xmm14
psrld $25,%
xmm2
pxor
%xmm2,%
xmm14
movdqa
%xmm7,%
xmm1
paddd
%xmm0,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
7
,
%xmm1
pxor %
xmm1
,
%xmm11
psrld $25,%
xmm2
pxor
%xmm2,%
xmm11
movdqa
%xmm12,%
xmm1
paddd
%xmm14,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
9
,
%xmm1
pxor %
xmm1
,
%xmm15
psrld $23,%
xmm2
pxor
%xmm2,%
xmm15
movdqa
%xmm0,%
xmm1
paddd
%xmm11,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
9
,
%xmm1
pxor %
xmm1
,
%xmm9
psrld $23,%
xmm2
pxor
%xmm2,%
xmm9
movdqa
%xmm14,%
xmm1
paddd
%xmm15,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
13
,
%xmm1
pxor %
xmm1
,
%xmm13
psrld $19,%
xmm2
pxor
%xmm2,%
xmm13
movdqa
%xmm11,%
xmm1
paddd
%xmm9,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
13
,
%xmm1
pxor %
xmm1
,
%xmm7
psrld $19,%
xmm2
pxor
%xmm2,%
xmm7
movdqa
%xmm15,%
xmm1
paddd
%xmm13,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
18
,
%xmm1
pxor %
xmm1
,
%xmm12
psrld $14,%
xmm2
pxor
%xmm2,%
xmm12
movdqa
256
(
%rsp),%
xmm1
movdqa
%xmm12,256(%
rsp
)
movdqa
%xmm9,%
xmm2
paddd
%xmm7,%
xmm2
movdqa
%xmm2,%
xmm12
pslld
$
18
,
%xmm2
pxor %
xmm2
,
%xmm0
psrld $14,%
xmm12
pxor
%xmm12,%
xmm0
movdqa
%xmm5,%
xmm2
paddd
%xmm1,%
xmm2
movdqa
%xmm2,%
xmm12
pslld
$
7
,
%xmm2
pxor %
xmm2
,
%xmm3
psrld $25,%
xmm12
pxor
%xmm12,%
xmm3
movdqa
272
(
%rsp),%
xmm2
movdqa
%xmm0,272(%
rsp
)
movdqa
%xmm6,%
xmm0
paddd
%xmm2,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
7
,
%xmm0
pxor %
xmm0
,
%xmm4
psrld $25,%
xmm12
pxor
%xmm12,%
xmm4
movdqa
%xmm1,%
xmm0
paddd
%xmm3,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
9
,
%xmm0
pxor %
xmm0
,
%xmm10
psrld $23,%
xmm12
pxor
%xmm12,%
xmm10
movdqa
%xmm2,%
xmm0
paddd
%xmm4,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
9
,
%xmm0
pxor %
xmm0
,
%xmm8
psrld $23,%
xmm12
pxor
%xmm12,%
xmm8
movdqa
%xmm3,%
xmm0
paddd
%xmm10,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
13
,
%xmm0
pxor %
xmm0
,
%xmm5
psrld $19,%
xmm12
pxor
%xmm12,%
xmm5
movdqa
%xmm4,%
xmm0
paddd
%xmm8,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
13
,
%xmm0
pxor %
xmm0
,
%xmm6
psrld $19,%
xmm12
pxor
%xmm12,%
xmm6
movdqa
%xmm10,%
xmm0
paddd
%xmm5,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
18
,
%xmm0
pxor %
xmm0
,
%xmm1
psrld $14,%
xmm12
pxor
%xmm12,%
xmm1
movdqa
256
(
%rsp),%
xmm0
movdqa
%xmm1,256(%
rsp
)
movdqa
%xmm4,%
xmm1
paddd
%xmm0,%
xmm1
movdqa
%xmm1,%
xmm12
pslld
$
7
,
%xmm1
pxor %
xmm1
,
%xmm7
psrld $25,%
xmm12
pxor
%xmm12,%
xmm7
movdqa
%xmm8,%
xmm1
paddd
%xmm6,%
xmm1
movdqa
%xmm1,%
xmm12
pslld
$
18
,
%xmm1
pxor %
xmm1
,
%xmm2
psrld $14,%
xmm12
pxor
%xmm12,%
xmm2
movdqa
272
(
%rsp),%
xmm12
movdqa
%xmm2,272(%
rsp
)
movdqa
%xmm14,%
xmm1
paddd
%xmm12,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
7
,
%xmm1
pxor %
xmm1
,
%xmm5
psrld $25,%
xmm2
pxor
%xmm2,%
xmm5
movdqa
%xmm0,%
xmm1
paddd
%xmm7,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
9
,
%xmm1
pxor %
xmm1
,
%xmm10
psrld $23,%
xmm2
pxor
%xmm2,%
xmm10
movdqa
%xmm12,%
xmm1
paddd
%xmm5,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
9
,
%xmm1
pxor %
xmm1
,
%xmm8
psrld $23,%
xmm2
pxor
%xmm2,%
xmm8
movdqa
%xmm7,%
xmm1
paddd
%xmm10,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
13
,
%xmm1
pxor %
xmm1
,
%xmm4
psrld $19,%
xmm2
pxor
%xmm2,%
xmm4
movdqa
%xmm5,%
xmm1
paddd
%xmm8,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
13
,
%xmm1
pxor %
xmm1
,
%xmm14
psrld $19,%
xmm2
pxor
%xmm2,%
xmm14
movdqa
%xmm10,%
xmm1
paddd
%xmm4,%
xmm1
movdqa
%xmm1,%
xmm2
pslld
$
18
,
%xmm1
pxor %
xmm1
,
%xmm0
psrld $14,%
xmm2
pxor
%xmm2,%
xmm0
movdqa
256
(
%rsp),%
xmm1
movdqa
%xmm0,256(%
rsp
)
movdqa
%xmm8,%
xmm0
paddd
%xmm14,%
xmm0
movdqa
%xmm0,%
xmm2
pslld
$
18
,
%xmm0
pxor %
xmm0
,
%xmm12
psrld $14,%
xmm2
pxor
%xmm2,%
xmm12
movdqa
%xmm11,%
xmm0
paddd
%xmm1,%
xmm0
movdqa
%xmm0,%
xmm2
pslld
$
7
,
%xmm0
pxor %
xmm0
,
%xmm6
psrld $25,%
xmm2
pxor
%xmm2,%
xmm6
movdqa
272
(
%rsp),%
xmm2
movdqa
%xmm12,272(%
rsp
)
movdqa
%xmm3,%
xmm0
paddd
%xmm2,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
7
,
%xmm0
pxor %
xmm0
,
%xmm13
psrld $25,%
xmm12
pxor
%xmm12,%
xmm13
movdqa
%xmm1,%
xmm0
paddd
%xmm6,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
9
,
%xmm0
pxor %
xmm0
,
%xmm15
psrld $23,%
xmm12
pxor
%xmm12,%
xmm15
movdqa
%xmm2,%
xmm0
paddd
%xmm13,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
9
,
%xmm0
pxor %
xmm0
,
%xmm9
psrld $23,%
xmm12
pxor
%xmm12,%
xmm9
movdqa
%xmm6,%
xmm0
paddd
%xmm15,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
13
,
%xmm0
pxor %
xmm0
,
%xmm11
psrld $19,%
xmm12
pxor
%xmm12,%
xmm11
movdqa
%xmm13,%
xmm0
paddd
%xmm9,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
13
,
%xmm0
pxor %
xmm0
,
%xmm3
psrld $19,%
xmm12
pxor
%xmm12,%
xmm3
movdqa
%xmm15,%
xmm0
paddd
%xmm11,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
18
,
%xmm0
pxor %
xmm0
,
%xmm1
psrld $14,%
xmm12
pxor
%xmm12,%
xmm1
movdqa
%xmm9,%
xmm0
paddd
%xmm3,%
xmm0
movdqa
%xmm0,%
xmm12
pslld
$
18
,
%xmm0
pxor %
xmm0
,
%xmm2
psrld $14,%
xmm12
pxor
%xmm12,%
xmm2
movdqa
256
(
%rsp),%
xmm12
movdqa
272
(
%rsp),%
xmm0
sub
$
2
,
%rdx
ja .L_mainloop1
paddd 48(%
rsp
),
%xmm12
paddd 112(%
rsp
),
%xmm7
paddd 160(%
rsp
),
%xmm10
paddd 208(%
rsp
),
%xmm4
movd %
xmm12
,
%rdx
movd %
xmm7
,
%rcx
movd %
xmm10
,
%r9
movd %
xmm4
,
%rax
pshufd $0x39,%
xmm12
,
%xmm12
pshufd $0x39,%
xmm7
,
%xmm7
pshufd $0x39,%
xmm10
,
%xmm10
pshufd $0x39,%
xmm4
,
%xmm4
xorl 0(%
rsi
),
%edx
xorl 4(%
rsi
),
%ecx
xorl 8(%
rsi
),
%r9d
xorl 12(%
rsi
),
%eax
movl %
edx
,
0
(
%rdi)
movl %
ecx
,
4
(
%rdi)
movl %
r9d
,
8
(
%rdi)
movl %
eax
,
12
(
%rdi)
movd %
xmm12
,
%rdx
movd %
xmm7
,
%rcx
movd %
xmm10
,
%r9
movd %
xmm4
,
%rax
pshufd $0x39,%
xmm12
,
%xmm12
pshufd $0x39,%
xmm7
,
%xmm7
pshufd $0x39,%
xmm10
,
%xmm10
pshufd $0x39,%
xmm4
,
%xmm4
xorl 64(%
rsi
),
%edx
xorl 68(%
rsi
),
%ecx
xorl 72(%
rsi
),
%r9d
xorl 76(%
rsi
),
%eax
movl %
edx
,
64
(
%rdi)
movl %
ecx
,
68
(
%rdi)
movl %
r9d
,
72
(
%rdi)
movl %
eax
,
76
(
%rdi)
movd %
xmm12
,
%rdx
movd %
xmm7
,
%rcx
movd %
xmm10
,
%r9
movd %
xmm4
,
%rax
pshufd $0x39,%
xmm12
,
%xmm12
pshufd $0x39,%
xmm7
,
%xmm7
pshufd $0x39,%
xmm10
,
%xmm10
pshufd $0x39,%
xmm4
,
%xmm4
xorl 128(%
rsi
),
%edx
xorl 132(%
rsi
),
%ecx
xorl 136(%
rsi
),
%r9d
xorl 140(%
rsi
),
%eax
movl %
edx
,
128
(
%rdi)
movl %
ecx
,
132
(
%rdi)
movl %
r9d
,
136
(
%rdi)
movl %
eax
,
140
(
%rdi)
movd %
xmm12
,
%rdx
movd %
xmm7
,
%rcx
movd %
xmm10
,
%r9
movd %
xmm4
,
%rax
xorl 192(%
rsi
),
%edx
xorl 196(%
rsi
),
%ecx
xorl 200(%
rsi
),
%r9d
xorl 204(%
rsi
),
%eax
movl %
edx
,
192
(
%rdi)
movl %
ecx
,
196
(
%rdi)
movl %
r9d
,
200
(
%rdi)
movl %
eax
,
204
(
%rdi)
paddd 176(%
rsp
),
%xmm14
paddd 0(%
rsp
),
%xmm0
paddd 64(%
rsp
),
%xmm5
paddd 128(%
rsp
),
%xmm8
movd %
xmm14
,
%rdx
movd %
xmm0
,
%rcx
movd %
xmm5
,
%r9
movd %
xmm8
,
%rax
pshufd $0x39,%
xmm14
,
%xmm14
pshufd $0x39,%
xmm0
,
%xmm0
pshufd $0x39,%
xmm5
,
%xmm5
pshufd $0x39,%
xmm8
,
%xmm8
xorl 16(%
rsi
),
%edx
xorl 20(%
rsi
),
%ecx
xorl 24(%
rsi
),
%r9d
xorl 28(%
rsi
),
%eax
movl %
edx
,
16
(
%rdi)
movl %
ecx
,
20
(
%rdi)
movl %
r9d
,
24
(
%rdi)
movl %
eax
,
28
(
%rdi)
movd %
xmm14
,
%rdx
movd %
xmm0
,
%rcx
movd %
xmm5
,
%r9
movd %
xmm8
,
%rax
pshufd $0x39,%
xmm14
,
%xmm14
pshufd $0x39,%
xmm0
,
%xmm0
pshufd $0x39,%
xmm5
,
%xmm5
pshufd $0x39,%
xmm8
,
%xmm8
xorl 80(%
rsi
),
%edx
xorl 84(%
rsi
),
%ecx
xorl 88(%
rsi
),
%r9d
xorl 92(%
rsi
),
%eax
movl %
edx
,
80
(
%rdi)
movl %
ecx
,
84
(
%rdi)
movl %
r9d
,
88
(
%rdi)
movl %
eax
,
92
(
%rdi)
movd %
xmm14
,
%rdx
movd %
xmm0
,
%rcx
movd %
xmm5
,
%r9
movd %
xmm8
,
%rax
pshufd $0x39,%
xmm14
,
%xmm14
pshufd $0x39,%
xmm0
,
%xmm0
pshufd $0x39,%
xmm5
,
%xmm5
pshufd $0x39,%
xmm8
,
%xmm8
xorl 144(%
rsi
),
%edx
xorl 148(%
rsi
),
%ecx
xorl 152(%
rsi
),
%r9d
xorl 156(%
rsi
),
%eax
movl %
edx
,
144
(
%rdi)
movl %
ecx
,
148
(
%rdi)
movl %
r9d
,
152
(
%rdi)
movl %
eax
,
156
(
%rdi)
movd %
xmm14
,
%rdx
movd %
xmm0
,
%rcx
movd %
xmm5
,
%r9
movd %
xmm8
,
%rax
xorl 208(%
rsi
),
%edx
xorl 212(%
rsi
),
%ecx
xorl 216(%
rsi
),
%r9d
xorl 220(%
rsi
),
%eax
movl %
edx
,
208
(
%rdi)
movl %
ecx
,
212
(
%rdi)
movl %
r9d
,
216
(
%rdi)
movl %
eax
,
220
(
%rdi)
paddd 224(%
rsp
),
%xmm15
paddd 240(%
rsp
),
%xmm11
paddd 16(%
rsp
),
%xmm1
paddd 80(%
rsp
),
%xmm6
movd %
xmm15
,
%rdx
movd %
xmm11
,
%rcx
movd %
xmm1
,
%r9
movd %
xmm6
,
%rax
pshufd $0x39,%
xmm15
,
%xmm15
pshufd $0x39,%
xmm11
,
%xmm11
pshufd $0x39,%
xmm1
,
%xmm1
pshufd $0x39,%
xmm6
,
%xmm6
xorl 32(%
rsi
),
%edx
xorl 36(%
rsi
),
%ecx
xorl 40(%
rsi
),
%r9d
xorl 44(%
rsi
),
%eax
movl %
edx
,
32
(
%rdi)
movl %
ecx
,
36
(
%rdi)
movl %
r9d
,
40
(
%rdi)
movl %
eax
,
44
(
%rdi)
movd %
xmm15
,
%rdx
movd %
xmm11
,
%rcx
movd %
xmm1
,
%r9
movd %
xmm6
,
%rax
pshufd $0x39,%
xmm15
,
%xmm15
pshufd $0x39,%
xmm11
,
%xmm11
pshufd $0x39,%
xmm1
,
%xmm1
pshufd $0x39,%
xmm6
,
%xmm6
xorl 96(%
rsi
),
%edx
xorl 100(%
rsi
),
%ecx
xorl 104(%
rsi
),
%r9d
xorl 108(%
rsi
),
%eax
movl %
edx
,
96
(
%rdi)
movl %
ecx
,
100
(
%rdi)
movl %
r9d
,
104
(
%rdi)
movl %
eax
,
108
(
%rdi)
movd %
xmm15
,
%rdx
movd %
xmm11
,
%rcx
movd %
xmm1
,
%r9
movd %
xmm6
,
%rax
pshufd $0x39,%
xmm15
,
%xmm15
pshufd $0x39,%
xmm11
,
%xmm11
pshufd $0x39,%
xmm1
,
%xmm1
pshufd $0x39,%
xmm6
,
%xmm6
xorl 160(%
rsi
),
%edx
xorl 164(%
rsi
),
%ecx
xorl 168(%
rsi
),
%r9d
xorl 172(%
rsi
),
%eax
movl %
edx
,
160
(
%rdi)
movl %
ecx
,
164
(
%rdi)
movl %
r9d
,
168
(
%rdi)
movl %
eax
,
172
(
%rdi)
movd %
xmm15
,
%rdx
movd %
xmm11
,
%rcx
movd %
xmm1
,
%r9
movd %
xmm6
,
%rax
xorl 224(%
rsi
),
%edx
xorl 228(%
rsi
),
%ecx
xorl 232(%
rsi
),
%r9d
xorl 236(%
rsi
),
%eax
movl %
edx
,
224
(
%rdi)
movl %
ecx
,
228
(
%rdi)
movl %
r9d
,
232
(
%rdi)
movl %
eax
,
236
(
%rdi)
paddd 96(%
rsp
),
%xmm13
paddd 144(%
rsp
),
%xmm9
paddd 192(%
rsp
),
%xmm3
paddd 32(%
rsp
),
%xmm2
movd %
xmm13
,
%rdx
movd %
xmm9
,
%rcx
movd %
xmm3
,
%r9
movd %
xmm2
,
%rax
pshufd $0x39,%
xmm13
,
%xmm13
pshufd $0x39,%
xmm9
,
%xmm9
pshufd $0x39,%
xmm3
,
%xmm3
pshufd $0x39,%
xmm2
,
%xmm2
xorl 48(%
rsi
),
%edx
xorl 52(%
rsi
),
%ecx
xorl 56(%
rsi
),
%r9d
xorl 60(%
rsi
),
%eax
movl %
edx
,
48
(
%rdi)
movl %
ecx
,
52
(
%rdi)
movl %
r9d
,
56
(
%rdi)
movl %
eax
,
60
(
%rdi)
movd %
xmm13
,
%rdx
movd %
xmm9
,
%rcx
movd %
xmm3
,
%r9
movd %
xmm2
,
%rax
pshufd $0x39,%
xmm13
,
%xmm13
pshufd $0x39,%
xmm9
,
%xmm9
pshufd $0x39,%
xmm3
,
%xmm3
pshufd $0x39,%
xmm2
,
%xmm2
xorl 112(%
rsi
),
%edx
xorl 116(%
rsi
),
%ecx
xorl 120(%
rsi
),
%r9d
xorl 124(%
rsi
),
%eax
movl %
edx
,
112
(
%rdi)
movl %
ecx
,
116
(
%rdi)
movl %
r9d
,
120
(
%rdi)
movl %
eax
,
124
(
%rdi)
movd %
xmm13
,
%rdx
movd %
xmm9
,
%rcx
movd %
xmm3
,
%r9
movd %
xmm2
,
%rax
pshufd $0x39,%
xmm13
,
%xmm13
pshufd $0x39,%
xmm9
,
%xmm9
pshufd $0x39,%
xmm3
,
%xmm3
pshufd $0x39,%
xmm2
,
%xmm2
xorl 176(%
rsi
),
%edx
xorl 180(%
rsi
),
%ecx
xorl 184(%
rsi
),
%r9d
xorl 188(%
rsi
),
%eax
movl %
edx
,
176
(
%rdi)
movl %
ecx
,
180
(
%rdi)
movl %
r9d
,
184
(
%rdi)
movl %
eax
,
188
(
%rdi)
movd %
xmm13
,
%rdx
movd %
xmm9
,
%rcx
movd %
xmm3
,
%r9
movd %
xmm2
,
%rax
xorl 240(%
rsi
),
%edx
xorl 244(%
rsi
),
%ecx
xorl 248(%
rsi
),
%r9d
xorl 252(%
rsi
),
%eax
movl %
edx
,
240
(
%rdi)
movl %
ecx
,
244
(
%rdi)
movl %
r9d
,
248
(
%rdi)
movl %
eax
,
252
(
%rdi)
movq 288(%
rsp
),
%rdx
sub $256,%
rdx
add
$
256
,
%rsi
add $256,%
rdi
cmp
$
256
,
%rdx
jae .L_bytesatleast256
cmp $0,%
rdx
jbe
.L_done
.L_bytes_are_64_128_or_192
:
movq
%rdx,288(%
rsp
)
movdqa
0
(
%r8),%
xmm0
movdqa
16
(
%r8),%
xmm1
movdqa
32
(
%r8),%
xmm2
movdqa
48
(
%r8),%
xmm3
movdqa
%xmm1,%
xmm4
mov
%rbx,%
rdx
.L_mainloop2
:
paddd
%xmm0,%
xmm4
movdqa
%xmm0,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
7
,
%xmm4
psrld $25,%
xmm6
pxor
%xmm4,%
xmm3
pxor
%xmm6,%
xmm3
paddd
%xmm3,%
xmm5
movdqa
%xmm3,%
xmm4
movdqa
%xmm5,%
xmm6
pslld
$
9
,
%xmm5
psrld $23,%
xmm6
pxor
%xmm5,%
xmm2
pshufd
$
0x93
,
%xmm3,%
xmm3
pxor
%xmm6,%
xmm2
paddd
%xmm2,%
xmm4
movdqa
%xmm2,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
13
,
%xmm4
psrld $19,%
xmm6
pxor
%xmm4,%
xmm1
pshufd
$
0x4e
,
%xmm2,%
xmm2
pxor
%xmm6,%
xmm1
paddd
%xmm1,%
xmm5
movdqa
%xmm3,%
xmm4
movdqa
%xmm5,%
xmm6
pslld
$
18
,
%xmm5
psrld $14,%
xmm6
pxor
%xmm5,%
xmm0
pshufd
$
0x39
,
%xmm1,%
xmm1
pxor
%xmm6,%
xmm0
paddd
%xmm0,%
xmm4
movdqa
%xmm0,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
7
,
%xmm4
psrld $25,%
xmm6
pxor
%xmm4,%
xmm1
pxor
%xmm6,%
xmm1
paddd
%xmm1,%
xmm5
movdqa
%xmm1,%
xmm4
movdqa
%xmm5,%
xmm6
pslld
$
9
,
%xmm5
psrld $23,%
xmm6
pxor
%xmm5,%
xmm2
pshufd
$
0x93
,
%xmm1,%
xmm1
pxor
%xmm6,%
xmm2
paddd
%xmm2,%
xmm4
movdqa
%xmm2,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
13
,
%xmm4
psrld $19,%
xmm6
pxor
%xmm4,%
xmm3
pshufd
$
0x4e
,
%xmm2,%
xmm2
pxor
%xmm6,%
xmm3
paddd
%xmm3,%
xmm5
movdqa
%xmm1,%
xmm4
movdqa
%xmm5,%
xmm6
pslld
$
18
,
%xmm5
psrld $14,%
xmm6
pxor
%xmm5,%
xmm0
pshufd
$
0x39
,
%xmm3,%
xmm3
pxor
%xmm6,%
xmm0
paddd
%xmm0,%
xmm4
movdqa
%xmm0,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
7
,
%xmm4
psrld $25,%
xmm6
pxor
%xmm4,%
xmm3
pxor
%xmm6,%
xmm3
paddd
%xmm3,%
xmm5
movdqa
%xmm3,%
xmm4
movdqa
%xmm5,%
xmm6
pslld
$
9
,
%xmm5
psrld $23,%
xmm6
pxor
%xmm5,%
xmm2
pshufd
$
0x93
,
%xmm3,%
xmm3
pxor
%xmm6,%
xmm2
paddd
%xmm2,%
xmm4
movdqa
%xmm2,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
13
,
%xmm4
psrld $19,%
xmm6
pxor
%xmm4,%
xmm1
pshufd
$
0x4e
,
%xmm2,%
xmm2
pxor
%xmm6,%
xmm1
paddd
%xmm1,%
xmm5
movdqa
%xmm3,%
xmm4
movdqa
%xmm5,%
xmm6
pslld
$
18
,
%xmm5
psrld $14,%
xmm6
pxor
%xmm5,%
xmm0
pshufd
$
0x39
,
%xmm1,%
xmm1
pxor
%xmm6,%
xmm0
paddd
%xmm0,%
xmm4
movdqa
%xmm0,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
7
,
%xmm4
psrld $25,%
xmm6
pxor
%xmm4,%
xmm1
pxor
%xmm6,%
xmm1
paddd
%xmm1,%
xmm5
movdqa
%xmm1,%
xmm4
movdqa
%xmm5,%
xmm6
pslld
$
9
,
%xmm5
psrld $23,%
xmm6
pxor
%xmm5,%
xmm2
pshufd
$
0x93
,
%xmm1,%
xmm1
pxor
%xmm6,%
xmm2
paddd
%xmm2,%
xmm4
movdqa
%xmm2,%
xmm5
movdqa
%xmm4,%
xmm6
pslld
$
13
,
%xmm4
psrld $19,%
xmm6
pxor
%xmm4,%
xmm3
pshufd
$
0x4e
,
%xmm2,%
xmm2
pxor
%xmm6,%
xmm3
sub
$
4
,
%rdx
paddd %
xmm3
,
%xmm5
movdqa %
xmm1
,
%xmm4
movdqa %
xmm5
,
%xmm6
pslld $18,%
xmm5
pxor
%xmm7,%
xmm7
psrld
$
14
,
%xmm6
pxor %
xmm5
,
%xmm0
pshufd $0x39,%
xmm3
,
%xmm3
pxor %
xmm6
,
%xmm0
ja .L_mainloop2
paddd 0(%
r8
),
%xmm0
paddd 16(%
r8
),
%xmm1
paddd 32(%
r8
),
%xmm2
paddd 48(%
r8
),
%xmm3
movd %
xmm0
,
%rdx
movd %
xmm1
,
%rcx
movd %
xmm2
,
%rax
movd %
xmm3
,
%r10
pshufd $0x39,%
xmm0
,
%xmm0
pshufd $0x39,%
xmm1
,
%xmm1
pshufd $0x39,%
xmm2
,
%xmm2
pshufd $0x39,%
xmm3
,
%xmm3
xorl 0(%
rsi
),
%edx
xorl 48(%
rsi
),
%ecx
xorl 32(%
rsi
),
%eax
xorl 16(%
rsi
),
%r10d
movl %
edx
,
0
(
%rdi)
movl %
ecx
,
48
(
%rdi)
movl %
eax
,
32
(
%rdi)
movl %
r10d
,
16
(
%rdi)
movd %
xmm0
,
%rdx
movd %
xmm1
,
%rcx
movd %
xmm2
,
%rax
movd %
xmm3
,
%r10
pshufd $0x39,%
xmm0
,
%xmm0
pshufd $0x39,%
xmm1
,
%xmm1
pshufd $0x39,%
xmm2
,
%xmm2
pshufd $0x39,%
xmm3
,
%xmm3
xorl 20(%
rsi
),
%edx
xorl 4(%
rsi
),
%ecx
xorl 52(%
rsi
),
%eax
xorl 36(%
rsi
),
%r10d
movl %
edx
,
20
(
%rdi)
movl %
ecx
,
4
(
%rdi)
movl %
eax
,
52
(
%rdi)
movl %
r10d
,
36
(
%rdi)
movd %
xmm0
,
%rdx
movd %
xmm1
,
%rcx
movd %
xmm2
,
%rax
movd %
xmm3
,
%r10
pshufd $0x39,%
xmm0
,
%xmm0
pshufd $0x39,%
xmm1
,
%xmm1
pshufd $0x39,%
xmm2
,
%xmm2
pshufd $0x39,%
xmm3
,
%xmm3
xorl 40(%
rsi
),
%edx
xorl 24(%
rsi
),
%ecx
xorl 8(%
rsi
),
%eax
xorl 56(%
rsi
),
%r10d
movl %
edx
,
40
(
%rdi)
movl %
ecx
,
24
(
%rdi)
movl %
eax
,
8
(
%rdi)
movl %
r10d
,
56
(
%rdi)
movd %
xmm0
,
%rdx
movd %
xmm1
,
%rcx
movd %
xmm2
,
%rax
movd %
xmm3
,
%r10
xorl 60(%
rsi
),
%edx
xorl 44(%
rsi
),
%ecx
xorl 28(%
rsi
),
%eax
xorl 12(%
rsi
),
%r10d
movl %
edx
,
60
(
%rdi)
movl %
ecx
,
44
(
%rdi)
movl %
eax
,
28
(
%rdi)
movl %
r10d
,
12
(
%rdi)
movq 288(%
rsp
),
%rdx
movl 32(%
r8
),
%ecx
movl 52(%
r8
),
%eax
add $1,%
ecx
adc
$
0
,
%eax
movl %
ecx
,
32
(
%r8)
movl %
eax
,
52
(
%r8)
cmp $64,%
rdx
ja
.L_bytes_are_128_or_192
.L_done
:
CFI_REMEMBER_STATE
();
mov
%r11,%
rax
sub
%rsp,%
rax
mov
%r11,%
rsp
CFI_REGISTER
(
%r11, %
rsp
)
CFI_DEF_CFA_REGISTER
(
%rsp)
pop %
rbx
CFI_POP
(
%rbx)
ret_spec_stop
CFI_RESTORE_STATE();
.L_bytes_are_128_or_192:
sub $64,%
rdx
add
$
64
,
%rdi
add $64,%
rsi
jmp
.L_bytes_are_64_128_or_192
CFI_ENDPROC
();
ELF
(
.size
_
gcry_salsa20_amd64_encrypt_blocks
,
.
-
_
gcry_salsa20_amd64_encrypt_blocks
;)
#endif /*defined(USE_SALSA20)*/
#endif /*__x86_64*/
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Mon, Dec 23, 5:34 PM (2 h, 14 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
cc/c9/f13ef3ce718acfad8e72609c7c70
Attached To
rC libgcrypt
Event Timeline
Log In to Comment