Page Menu
Home
GnuPG
Search
Configure Global Search
Log In
Files
F18826332
rijndael-amd64.S
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Size
11 KB
Subscribers
None
rijndael-amd64.S
View Options
/*
rinjdael
-
amd64.S
-
AMD64
assembly
implementation
of
AES
cipher
*
*
Copyright
(
C
)
2013
Jussi
Kivilinna
<
jussi.kivilinna
@
iki.fi
>
*
*
This
file
is
part
of
Libgcrypt.
*
*
Libgcrypt
is
free
software
;
you
can
redistribute
it
and
/
or
modify
*
it
under
the
terms
of
the
GNU
Lesser
General
Public
License
as
*
published
by
the
Free
Software
Foundation
;
either
version
2.1
of
*
the
License
,
or
(
at
your
option
)
any
later
version.
*
*
Libgcrypt
is
distributed
in
the
hope
that
it
will
be
useful
,
*
but
WITHOUT
ANY
WARRANTY
;
without
even
the
implied
warranty
of
*
MERCHANTABILITY
or
FITNESS
FOR
A
PARTICULAR
PURPOSE.
See
the
*
GNU
Lesser
General
Public
License
for
more
details.
*
*
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
*
License
along
with
this
program
;
if
not
,
see
<
http
://
www.gnu.org
/
licenses
/>
.
*/
#ifdef __x86_64
#include <config.h>
#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
defined
(
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
))
&&
defined
(
USE_AES
)
#include "asm-common-amd64.h"
.text
/*
table
macros
*/
#define E0 (0)
#define Es0 (1)
#define Esize 4
#define Essize 4
#define D0 (0)
#define Ds0 (4 * 256)
#define Dsize 4
#define Dssize 1
/*
register
macros
*/
#define CTX %rdi
#define RTAB %r12
#define RA %rax
#define RB %rbx
#define RC %rcx
#define RD %rdx
#define RAd %eax
#define RBd %ebx
#define RCd %ecx
#define RDd %edx
#define RAbl %al
#define RBbl %bl
#define RCbl %cl
#define RDbl %dl
#define RAbh %ah
#define RBbh %bh
#define RCbh %ch
#define RDbh %dh
#define RNA %r8
#define RNB %r9
#define RNC %r10
#define RND %r11
#define RNAd %r8d
#define RNBd %r9d
#define RNCd %r10d
#define RNDd %r11d
#define RT0 %rbp
#define RT1 %rsi
#define RT0d %ebp
#define RT1d %esi
/*
helper
macros
*/
#define do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
movzbl
source
## bl, t0 ## d; \
movzbl
source
## bh, t1 ## d; \
op
## l table1(RTAB,t0,tablemul), dest1 ## d; \
op
## l table2(RTAB,t1,tablemul), dest2 ## d;
#define do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
movzbl
source
## bl, t0 ## d; \
movzbl
source
## bh, t1 ## d; \
shrl
$
(
shf
),
source
## d; \
op
## l table1(RTAB,t0,tablemul), dest1 ## d; \
op
## l table2(RTAB,t1,tablemul), dest2 ## d;
#define last_do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
movzbl
source
## bl, t0 ## d; \
movzbl
source
## bh, t1 ## d; \
movzbl
table1
(
RTAB
,
t0
,
tablemul
),
t0
## d; \
movzbl
table2
(
RTAB
,
t1
,
tablemul
),
t1
## d; \
op
## l t0 ## d, dest1 ## d; \
op
## l t1 ## d, dest2 ## d;
#define last_do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
movzbl
source
## bl, t0 ## d; \
movzbl
source
## bh, t1 ## d; \
shrl
$
(
shf
),
source
## d; \
movzbl
table1
(
RTAB
,
t0
,
tablemul
),
t0
## d; \
movzbl
table2
(
RTAB
,
t1
,
tablemul
),
t1
## d; \
op
## l t0 ## d, dest1 ## d; \
op
## l t1 ## d, dest2 ## d;
/***********************************************************************
*
AMD64
assembly
implementation
of
the
AES
cipher
***********************************************************************/
#define addroundkey(round, ra, rb, rc, rd) \
xorl
(((
round
)
*
16
)
+
0
*
4
)(
CTX
),
ra
## d; \
xorl
(((
round
)
*
16
)
+
1
*
4
)(
CTX
),
rb
## d; \
xorl
(((
round
)
*
16
)
+
2
*
4
)(
CTX
),
rc
## d; \
xorl
(((
round
)
*
16
)
+
3
*
4
)(
CTX
),
rd
## d;
#define do_encround(next_r) \
do16bit_shr
(
16
,
mov
,
RA
,
Esize
,
E0
,
RNA
,
E0
,
RND
,
RT0
,
RT1
);
\
do16bit
(
mov
,
RA
,
Esize
,
E0
,
RNC
,
E0
,
RNB
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
0
*
4
)(
CTX
),
RAd
;
\
roll
$
8
,
RNDd
;
\
xorl
RNAd
,
RAd
;
\
roll
$
8
,
RNCd
;
\
roll
$
8
,
RNBd
;
\
roll
$
8
,
RAd
;
\
\
do16bit_shr
(
16
,
xor
,
RD
,
Esize
,
E0
,
RND
,
E0
,
RNC
,
RT0
,
RT1
);
\
do16bit
(
xor
,
RD
,
Esize
,
E0
,
RNB
,
E0
,
RA
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
3
*
4
)(
CTX
),
RDd
;
\
roll
$
8
,
RNCd
;
\
xorl
RNDd
,
RDd
;
\
roll
$
8
,
RNBd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RDd
;
\
\
do16bit_shr
(
16
,
xor
,
RC
,
Esize
,
E0
,
RNC
,
E0
,
RNB
,
RT0
,
RT1
);
\
do16bit
(
xor
,
RC
,
Esize
,
E0
,
RA
,
E0
,
RD
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
2
*
4
)(
CTX
),
RCd
;
\
roll
$
8
,
RNBd
;
\
xorl
RNCd
,
RCd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RDd
;
\
roll
$
8
,
RCd
;
\
\
do16bit_shr
(
16
,
xor
,
RB
,
Esize
,
E0
,
RNB
,
E0
,
RA
,
RT0
,
RT1
);
\
do16bit
(
xor
,
RB
,
Esize
,
E0
,
RD
,
E0
,
RC
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
1
*
4
)(
CTX
),
RBd
;
\
roll
$
8
,
RAd
;
\
xorl
RNBd
,
RBd
;
\
roll
$
16
,
RDd
;
\
roll
$
24
,
RCd
;
#define do_lastencround(next_r) \
do16bit_shr
(
16
,
movzb
,
RA
,
Essize
,
Es0
,
RNA
,
Es0
,
RND
,
RT0
,
RT1
);
\
do16bit
(
movzb
,
RA
,
Essize
,
Es0
,
RNC
,
Es0
,
RNB
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
0
*
4
)(
CTX
),
RAd
;
\
roll
$
8
,
RNDd
;
\
xorl
RNAd
,
RAd
;
\
roll
$
8
,
RNCd
;
\
roll
$
8
,
RNBd
;
\
roll
$
8
,
RAd
;
\
\
last_do16bit_shr
(
16
,
xor
,
RD
,
Essize
,
Es0
,
RND
,
Es0
,
RNC
,
RT0
,
RT1
);
\
last_do16bit
(
xor
,
RD
,
Essize
,
Es0
,
RNB
,
Es0
,
RA
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
3
*
4
)(
CTX
),
RDd
;
\
roll
$
8
,
RNCd
;
\
xorl
RNDd
,
RDd
;
\
roll
$
8
,
RNBd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RDd
;
\
\
last_do16bit_shr
(
16
,
xor
,
RC
,
Essize
,
Es0
,
RNC
,
Es0
,
RNB
,
RT0
,
RT1
);
\
last_do16bit
(
xor
,
RC
,
Essize
,
Es0
,
RA
,
Es0
,
RD
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
2
*
4
)(
CTX
),
RCd
;
\
roll
$
8
,
RNBd
;
\
xorl
RNCd
,
RCd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RDd
;
\
roll
$
8
,
RCd
;
\
\
last_do16bit_shr
(
16
,
xor
,
RB
,
Essize
,
Es0
,
RNB
,
Es0
,
RA
,
RT0
,
RT1
);
\
last_do16bit
(
xor
,
RB
,
Essize
,
Es0
,
RD
,
Es0
,
RC
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
1
*
4
)(
CTX
),
RBd
;
\
roll
$
8
,
RAd
;
\
xorl
RNBd
,
RBd
;
\
roll
$
16
,
RDd
;
\
roll
$
24
,
RCd
;
#define firstencround(round) \
addroundkey
(
round
,
RA
,
RB
,
RC
,
RD
);
\
do_encround
((
round
)
+
1
);
#define encround(round) \
do_encround
((
round
)
+
1
);
#define lastencround(round) \
do_lastencround
((
round
)
+
1
);
.align
8
.globl
_
gcry_aes_amd64_encrypt_block
ELF
(
.type
_
gcry_aes_amd64_encrypt_block
,
@
function
;)
_
gcry_aes_amd64_encrypt_block
:
/*
input
:
*
%rdi: keysched, CTX
* %
rsi
:
dst
*
%rdx: src
* %
ecx
:
number
of
rounds..
10
,
12
or
14
*
%r8: encryption tables
*/
CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_5
subq $(5 * 8), %
rsp
;
CFI_ADJUST_CFA_OFFSET
(
5
*
8
);
movq
%rsi, (0 * 8)(%
rsp
);
movl
%ecx, (1 * 8)(%
rsp
);
movq
%rbp, (2 * 8)(%
rsp
);
movq
%rbx, (3 * 8)(%
rsp
);
movq
%r12, (4 * 8)(%
rsp
);
CFI_REL_OFFSET
(
%rbp, 2 * 8);
CFI_REL_OFFSET(%
rbx
,
3
*
8
);
CFI_REL_OFFSET
(
%r12, 4 * 8);
leaq (%
r8
),
RTAB
;
/*
read
input
block
*/
movl
0
*
4
(
%rdx), RAd;
movl 1 * 4(%
rdx
),
RBd
;
movl
2
*
4
(
%rdx), RCd;
movl 3 * 4(%
rdx
),
RDd
;
firstencround
(
0
);
encround
(
1
);
encround
(
2
);
encround
(
3
);
encround
(
4
);
encround
(
5
);
encround
(
6
);
encround
(
7
);
encround
(
8
);
cmpl
$
12
,
(
1
*
8
)(
%rsp);
jnb .Lenc_not_128;
lastencround(9);
.align 4
.Lenc_done:
/* write output block */
movq (0 * 8)(%
rsp
),
%rsi;
movl RAd, 0 * 4(%
rsi
);
movl
RBd
,
1
*
4
(
%rsi);
movl RCd, 2 * 4(%
rsi
);
movl
RDd
,
3
*
4
(
%rsi);
CFI_REMEMBER_STATE();
movq (4 * 8)(%
rsp
),
%r12;
movq (3 * 8)(%
rsp
),
%rbx;
movq (2 * 8)(%
rsp
),
%rbp;
CFI_RESTORE(%
r12
);
CFI_RESTORE
(
%rbx);
CFI_RESTORE(%
rbp
);
addq
$
(
5
*
8
),
%rsp;
CFI_ADJUST_CFA_OFFSET(-5 * 8);
movl $(6 * 8), %
eax
;
EXIT_SYSV_FUNC
ret_spec_stop
;
CFI_RESTORE_STATE
();
.align
4
.Lenc_not_128
:
je
.Lenc_192
encround
(
9
);
encround
(
10
);
encround
(
11
);
encround
(
12
);
lastencround
(
13
);
jmp
.Lenc_done
;
.align
4
.Lenc_192
:
encround
(
9
);
encround
(
10
);
lastencround
(
11
);
jmp
.Lenc_done
;
CFI_ENDPROC
();
ELF
(
.size
_
gcry_aes_amd64_encrypt_block
,
.
-
_
gcry_aes_amd64_encrypt_block
;)
#define do_decround(next_r) \
do16bit_shr
(
16
,
mov
,
RA
,
Dsize
,
D0
,
RNA
,
D0
,
RNB
,
RT0
,
RT1
);
\
do16bit
(
mov
,
RA
,
Dsize
,
D0
,
RNC
,
D0
,
RND
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
0
*
4
)(
CTX
),
RAd
;
\
roll
$
8
,
RNBd
;
\
xorl
RNAd
,
RAd
;
\
roll
$
8
,
RNCd
;
\
roll
$
8
,
RNDd
;
\
roll
$
8
,
RAd
;
\
\
do16bit_shr
(
16
,
xor
,
RB
,
Dsize
,
D0
,
RNB
,
D0
,
RNC
,
RT0
,
RT1
);
\
do16bit
(
xor
,
RB
,
Dsize
,
D0
,
RND
,
D0
,
RA
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
1
*
4
)(
CTX
),
RBd
;
\
roll
$
8
,
RNCd
;
\
xorl
RNBd
,
RBd
;
\
roll
$
8
,
RNDd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RBd
;
\
\
do16bit_shr
(
16
,
xor
,
RC
,
Dsize
,
D0
,
RNC
,
D0
,
RND
,
RT0
,
RT1
);
\
do16bit
(
xor
,
RC
,
Dsize
,
D0
,
RA
,
D0
,
RB
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
2
*
4
)(
CTX
),
RCd
;
\
roll
$
8
,
RNDd
;
\
xorl
RNCd
,
RCd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RBd
;
\
roll
$
8
,
RCd
;
\
\
do16bit_shr
(
16
,
xor
,
RD
,
Dsize
,
D0
,
RND
,
D0
,
RA
,
RT0
,
RT1
);
\
do16bit
(
xor
,
RD
,
Dsize
,
D0
,
RB
,
D0
,
RC
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
3
*
4
)(
CTX
),
RDd
;
\
roll
$
8
,
RAd
;
\
xorl
RNDd
,
RDd
;
\
roll
$
16
,
RBd
;
\
roll
$
24
,
RCd
;
#define do_lastdecround(next_r) \
do16bit_shr
(
16
,
movzb
,
RA
,
Dssize
,
Ds0
,
RNA
,
Ds0
,
RNB
,
RT0
,
RT1
);
\
do16bit
(
movzb
,
RA
,
Dssize
,
Ds0
,
RNC
,
Ds0
,
RND
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
0
*
4
)(
CTX
),
RAd
;
\
roll
$
8
,
RNBd
;
\
xorl
RNAd
,
RAd
;
\
roll
$
8
,
RNCd
;
\
roll
$
8
,
RNDd
;
\
roll
$
8
,
RAd
;
\
\
last_do16bit_shr
(
16
,
xor
,
RB
,
Dssize
,
Ds0
,
RNB
,
Ds0
,
RNC
,
RT0
,
RT1
);
\
last_do16bit
(
xor
,
RB
,
Dssize
,
Ds0
,
RND
,
Ds0
,
RA
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
1
*
4
)(
CTX
),
RBd
;
\
roll
$
8
,
RNCd
;
\
xorl
RNBd
,
RBd
;
\
roll
$
8
,
RNDd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RBd
;
\
\
last_do16bit_shr
(
16
,
xor
,
RC
,
Dssize
,
Ds0
,
RNC
,
Ds0
,
RND
,
RT0
,
RT1
);
\
last_do16bit
(
xor
,
RC
,
Dssize
,
Ds0
,
RA
,
Ds0
,
RB
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
2
*
4
)(
CTX
),
RCd
;
\
roll
$
8
,
RNDd
;
\
xorl
RNCd
,
RCd
;
\
roll
$
8
,
RAd
;
\
roll
$
8
,
RBd
;
\
roll
$
8
,
RCd
;
\
\
last_do16bit_shr
(
16
,
xor
,
RD
,
Dssize
,
Ds0
,
RND
,
Ds0
,
RA
,
RT0
,
RT1
);
\
last_do16bit
(
xor
,
RD
,
Dssize
,
Ds0
,
RB
,
Ds0
,
RC
,
RT0
,
RT1
);
\
movl
(((
next_r
)
*
16
)
+
3
*
4
)(
CTX
),
RDd
;
\
roll
$
8
,
RAd
;
\
xorl
RNDd
,
RDd
;
\
roll
$
16
,
RBd
;
\
roll
$
24
,
RCd
;
#define firstdecround(round) \
addroundkey
((
round
+
1
),
RA
,
RB
,
RC
,
RD
);
\
do_decround
(
round
);
#define decround(round) \
do_decround
(
round
);
#define lastdecround(round) \
do_lastdecround
(
round
);
.align
8
.globl
_
gcry_aes_amd64_decrypt_block
ELF
(
.type
_
gcry_aes_amd64_decrypt_block
,
@
function
;)
_
gcry_aes_amd64_decrypt_block
:
/*
input
:
*
%rdi: keysched, CTX
* %
rsi
:
dst
*
%rdx: src
* %
ecx
:
number
of
rounds..
10
,
12
or
14
*
%r8: decryption tables
*/
CFI_STARTPROC();
ENTER_SYSV_FUNC_PARAMS_5
subq $(5 * 8), %
rsp
;
CFI_ADJUST_CFA_OFFSET
(
5
*
8
);
movq
%rsi, (0 * 8)(%
rsp
);
movl
%ecx, (1 * 8)(%
rsp
);
movq
%rbp, (2 * 8)(%
rsp
);
movq
%rbx, (3 * 8)(%
rsp
);
movq
%r12, (4 * 8)(%
rsp
);
CFI_REL_OFFSET
(
%rbp, 2 * 8);
CFI_REL_OFFSET(%
rbx
,
3
*
8
);
CFI_REL_OFFSET
(
%r12, 4 * 8);
leaq (%
r8
),
RTAB
;
/*
read
input
block
*/
movl
0
*
4
(
%rdx), RAd;
movl 1 * 4(%
rdx
),
RBd
;
movl
2
*
4
(
%rdx), RCd;
movl 3 * 4(%
rdx
),
RDd
;
cmpl
$
12
,
(
1
*
8
)(
%rsp);
jnb .Ldec_256;
firstdecround(9);
.align 4
.Ldec_tail:
decround(8);
decround(7);
decround(6);
decround(5);
decround(4);
decround(3);
decround(2);
decround(1);
lastdecround(0);
/* write output block */
movq (0 * 8)(%
rsp
),
%rsi;
movl RAd, 0 * 4(%
rsi
);
movl
RBd
,
1
*
4
(
%rsi);
movl RCd, 2 * 4(%
rsi
);
movl
RDd
,
3
*
4
(
%rsi);
CFI_REMEMBER_STATE();
movq (4 * 8)(%
rsp
),
%r12;
movq (3 * 8)(%
rsp
),
%rbx;
movq (2 * 8)(%
rsp
),
%rbp;
CFI_RESTORE(%
r12
);
CFI_RESTORE
(
%rbx);
CFI_RESTORE(%
rbp
);
addq
$
(
5
*
8
),
%rsp;
CFI_ADJUST_CFA_OFFSET(-5 * 8);
movl $(6 * 8), %
eax
;
EXIT_SYSV_FUNC
ret_spec_stop
;
CFI_RESTORE_STATE
();
.align
4
.Ldec_256
:
je
.Ldec_192
;
firstdecround
(
13
);
decround
(
12
);
decround
(
11
);
decround
(
10
);
decround
(
9
);
jmp
.Ldec_tail
;
.align
4
.Ldec_192
:
firstdecround
(
11
);
decround
(
10
);
decround
(
9
);
jmp
.Ldec_tail
;
CFI_ENDPROC
();
ELF
(
.size
_
gcry_aes_amd64_decrypt_block
,
.
-
_
gcry_aes_amd64_decrypt_block
;)
#endif /*USE_AES*/
#endif /*__x86_64*/
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Mon, Dec 23, 5:06 PM (10 h, 31 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
c4/9f/2481e82ce0d2597c908f0870a531
Attached To
rC libgcrypt
Event Timeline
Log In to Comment