summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-03-26 11:04:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-03-26 11:04:34 -0700
commit562f477a54478002ddfbb5b85627c009ca41e71d (patch)
tree52384cc554ae64cc7a26878d64d606f40fd703ce /arch/x86
parentada19a31a90b4f46c040c25ef4ef8ffc203c7fc6 (diff)
parent949abe574739848b1e68271fbac86c3cb4506aad (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (29 commits) crypto: sha512-s390 - Add missing block size hwrng: timeriomem - Breaks an allyesconfig build on s390: nlattr: Fix build error with NET off crypto: testmgr - add zlib test crypto: zlib - New zlib crypto module, using pcomp crypto: testmgr - Add support for the pcomp interface crypto: compress - Add pcomp interface netlink: Move netlink attribute parsing support to lib crypto: Fix dead links hwrng: timeriomem - New driver crypto: chainiv - Use kcrypto_wq instead of keventd_wq crypto: cryptd - Per-CPU thread implementation based on kcrypto_wq crypto: api - Use dedicated workqueue for crypto subsystem crypto: testmgr - Test skciphers with no IVs crypto: aead - Avoid infinite loop when nivaead fails selftest crypto: skcipher - Avoid infinite loop when cipher fails selftest crypto: api - Fix crypto_alloc_tfm/create_create_tfm return convention crypto: api - crypto_alg_mod_lookup either tested or untested crypto: amcc - Add crypt4xx driver crypto: ansi_cprng - Add maintainer ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S18
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S6
-rw-r--r--arch/x86/crypto/aes_glue.c20
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S896
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c461
-rw-r--r--arch/x86/include/asm/aes.h11
-rw-r--r--arch/x86/include/asm/cpufeature.h1
8 files changed, 1399 insertions, 17 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 903de4aa5094..ebe7deedd5b4 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
+obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
@@ -19,3 +20,5 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
+
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index e41b147f4509..b949ec2f9af4 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -41,14 +41,14 @@
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
/* offsets to parameters with one register pushed onto stack */
-#define tfm 8
+#define ctx 8
#define out_blk 12
#define in_blk 16
-/* offsets in crypto_tfm structure */
-#define klen (crypto_tfm_ctx_offset + 0)
-#define ekey (crypto_tfm_ctx_offset + 4)
-#define dkey (crypto_tfm_ctx_offset + 244)
+/* offsets in crypto_aes_ctx structure */
+#define klen (480)
+#define ekey (0)
+#define dkey (240)
// register mapping for encrypt and decrypt subroutines
@@ -217,7 +217,7 @@
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
// AES (Rijndael) Encryption Subroutine
-/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
+/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.global aes_enc_blk
@@ -228,7 +228,7 @@
aes_enc_blk:
push %ebp
- mov tfm(%esp),%ebp
+ mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
@@ -292,7 +292,7 @@ aes_enc_blk:
ret
// AES (Rijndael) Decryption Subroutine
-/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
+/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
.global aes_dec_blk
@@ -303,7 +303,7 @@ aes_enc_blk:
aes_dec_blk:
push %ebp
- mov tfm(%esp),%ebp
+ mov ctx(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index a120f526c3df..5b577d5a059b 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -17,8 +17,6 @@
#include <asm/asm-offsets.h>
-#define BASE crypto_tfm_ctx_offset
-
#define R1 %rax
#define R1E %eax
#define R1X %ax
@@ -56,13 +54,13 @@
.align 8; \
FUNC: movq r1,r2; \
movq r3,r4; \
- leaq BASE+KEY+48+4(r8),r9; \
+ leaq KEY+48(r8),r9; \
movq r10,r11; \
movl (r7),r5 ## E; \
movl 4(r7),r1 ## E; \
movl 8(r7),r6 ## E; \
movl 12(r7),r7 ## E; \
- movl BASE+0(r8),r10 ## E; \
+ movl 480(r8),r10 ## E; \
xorl -48(r9),r5 ## E; \
xorl -44(r9),r1 ## E; \
xorl -40(r9),r6 ## E; \
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 71f457827116..49ae9fe32b22 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -5,17 +5,29 @@
#include <crypto/aes.h>
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+
+void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+{
+ aes_enc_blk(ctx, dst, src);
+}
+EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
+
+void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+{
+ aes_dec_blk(ctx, dst, src);
+}
+EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_enc_blk(tfm, dst, src);
+ aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_dec_blk(tfm, dst, src);
+ aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static struct crypto_alg aes_alg = {
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
new file mode 100644
index 000000000000..caba99601703
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -0,0 +1,896 @@
+/*
+ * Implement AES algorithm in Intel AES-NI instructions.
+ *
+ * The white paper of AES-NI instructions can be downloaded from:
+ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
+ *
+ * Copyright (C) 2008, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ * Vinodh Gopal <vinodh.gopal@intel.com>
+ * Kahraman Akdemir
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.text
+
+#define STATE1 %xmm0
+#define STATE2 %xmm4
+#define STATE3 %xmm5
+#define STATE4 %xmm6
+#define STATE STATE1
+#define IN1 %xmm1
+#define IN2 %xmm7
+#define IN3 %xmm8
+#define IN4 %xmm9
+#define IN IN1
+#define KEY %xmm2
+#define IV %xmm3
+
+#define KEYP %rdi
+#define OUTP %rsi
+#define INP %rdx
+#define LEN %rcx
+#define IVP %r8
+#define KLEN %r9d
+#define T1 %r10
+#define TKEYP T1
+#define T2 %r11
+
+_key_expansion_128:
+_key_expansion_256a:
+ pshufd $0b11111111, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+
+_key_expansion_192a:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ movaps %xmm2, %xmm6
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, %xmm1
+ shufps $0b01000100, %xmm0, %xmm6
+ movaps %xmm6, (%rcx)
+ shufps $0b01001110, %xmm2, %xmm1
+ movaps %xmm1, 16(%rcx)
+ add $0x20, %rcx
+ ret
+
+_key_expansion_192b:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+
+_key_expansion_256b:
+ pshufd $0b10101010, %xmm1, %xmm1
+ shufps $0b00010000, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ shufps $0b10001100, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ pxor %xmm1, %xmm2
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+ ret
+
+/*
+ * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ * unsigned int key_len)
+ */
+ENTRY(aesni_set_key)
+ movups (%rsi), %xmm0 # user key (first 16 bytes)
+ movaps %xmm0, (%rdi)
+ lea 0x10(%rdi), %rcx # key addr
+ movl %edx, 480(%rdi)
+ pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
+ cmp $24, %dl
+ jb .Lenc_key128
+ je .Lenc_key192
+ movups 0x10(%rsi), %xmm2 # other user key
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+ # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+ call _key_expansion_256a
+ # aeskeygenassist $0x1, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+ call _key_expansion_256b
+ # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+ call _key_expansion_256a
+ # aeskeygenassist $0x2, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+ call _key_expansion_256b
+ # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+ call _key_expansion_256a
+ # aeskeygenassist $0x4, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+ call _key_expansion_256b
+ # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+ call _key_expansion_256a
+ # aeskeygenassist $0x8, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+ call _key_expansion_256b
+ # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+ call _key_expansion_256a
+ # aeskeygenassist $0x10, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+ call _key_expansion_256b
+ # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+ call _key_expansion_256a
+ # aeskeygenassist $0x20, %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+ call _key_expansion_256b
+ # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+ call _key_expansion_256a
+ jmp .Ldec_key
+.Lenc_key192:
+ movq 0x10(%rsi), %xmm2 # other user key
+ # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+ call _key_expansion_192a
+ # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+ call _key_expansion_192b
+ # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+ call _key_expansion_192a
+ # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+ call _key_expansion_192b
+ # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+ call _key_expansion_192a
+ # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+ call _key_expansion_192b
+ # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+ call _key_expansion_192a
+ # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
+ call _key_expansion_192b
+ jmp .Ldec_key
+.Lenc_key128:
+ # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+ call _key_expansion_128
+ # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+ call _key_expansion_128
+ # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+ call _key_expansion_128
+ # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+ call _key_expansion_128
+ # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+ call _key_expansion_128
+ # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+ call _key_expansion_128
+ # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
+ call _key_expansion_128
+ # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
+ call _key_expansion_128
+ # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
+ call _key_expansion_128
+ # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
+ .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
+ call _key_expansion_128
+.Ldec_key:
+ sub $0x10, %rcx
+ movaps (%rdi), %xmm0
+ movaps (%rcx), %xmm1
+ movaps %xmm0, 240(%rcx)
+ movaps %xmm1, 240(%rdi)
+ add $0x10, %rdi
+ lea 240-16(%rcx), %rsi
+.align 4
+.Ldec_key_loop:
+ movaps (%rdi), %xmm0
+ # aesimc %xmm0, %xmm1
+ .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
+ movaps %xmm1, (%rsi)
+ add $0x10, %rdi
+ sub $0x10, %rsi
+ cmp %rcx, %rdi
+ jb .Ldec_key_loop
+ xor %rax, %rax
+ ret
+
+/*
+ * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ */
+ENTRY(aesni_enc)
+ movl 480(KEYP), KLEN # key length
+ movups (INP), STATE # input
+ call _aesni_enc1
+ movups STATE, (OUTP) # output
+ ret
+
+/*
+ * _aesni_enc1: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: round count
+ * STATE: initial state (input)
+ * output:
+ * STATE: finial state (output)
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_enc1:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE # round 0
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .Lenc128
+ lea 0x20(TKEYP), TKEYP
+ je .Lenc192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps -0x50(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+.align 4
+.Lenc192:
+ movaps -0x40(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps -0x30(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+.align 4
+.Lenc128:
+ movaps -0x20(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps -0x10(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps (TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x10(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x20(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x30(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x40(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x50(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x60(TKEYP), KEY
+ # aesenc KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ movaps 0x70(TKEYP), KEY
+ # aesenclast KEY, STATE # last round
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+ ret
+
+/*
+ * _aesni_enc4: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: round count
+ * STATE1: initial state (input)
+ * STATE2
+ * STATE3
+ * STATE4
+ * output:
+ * STATE1: finial state (output)
+ * STATE2
+ * STATE3
+ * STATE4
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_enc4:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE1 # round 0
+ pxor KEY, STATE2
+ pxor KEY, STATE3
+ pxor KEY, STATE4
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .L4enc128
+ lea 0x20(TKEYP), TKEYP
+ je .L4enc192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps -0x50(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+#.align 4
+.L4enc192:
+ movaps -0x40(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps -0x30(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+#.align 4
+.L4enc128:
+ movaps -0x20(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps -0x10(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps (TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x10(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x20(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x30(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x40(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x50(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x60(TKEYP), KEY
+ # aesenc KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ # aesenc KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
+ # aesenc KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
+ # aesenc KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ movaps 0x70(TKEYP), KEY
+ # aesenclast KEY, STATE1 # last round
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+ # aesenclast KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
+ # aesenclast KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
+ # aesenclast KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
+ ret
+
+/*
+ * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
+ */
+ENTRY(aesni_dec)
+ mov 480(KEYP), KLEN # key length
+ add $240, KEYP
+ movups (INP), STATE # input
+ call _aesni_dec1
+ movups STATE, (OUTP) #output
+ ret
+
+/*
+ * _aesni_dec1: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: key length
+ * STATE: initial state (input)
+ * output:
+ * STATE: finial state (output)
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_dec1:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE # round 0
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .Ldec128
+ lea 0x20(TKEYP), TKEYP
+ je .Ldec192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps -0x50(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+.align 4
+.Ldec192:
+ movaps -0x40(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps -0x30(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+.align 4
+.Ldec128:
+ movaps -0x20(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps -0x10(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps (TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x10(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x20(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x30(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x40(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x50(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x60(TKEYP), KEY
+ # aesdec KEY, STATE
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ movaps 0x70(TKEYP), KEY
+ # aesdeclast KEY, STATE # last round
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+ ret
+
+/*
+ * _aesni_dec4: internal ABI
+ * input:
+ * KEYP: key struct pointer
+ * KLEN: key length
+ * STATE1: initial state (input)
+ * STATE2
+ * STATE3
+ * STATE4
+ * output:
+ * STATE1: finial state (output)
+ * STATE2
+ * STATE3
+ * STATE4
+ * changed:
+ * KEY
+ * TKEYP (T1)
+ */
+_aesni_dec4:
+ movaps (KEYP), KEY # key
+ mov KEYP, TKEYP
+ pxor KEY, STATE1 # round 0
+ pxor KEY, STATE2
+ pxor KEY, STATE3
+ pxor KEY, STATE4
+ add $0x30, TKEYP
+ cmp $24, KLEN
+ jb .L4dec128
+ lea 0x20(TKEYP), TKEYP
+ je .L4dec192
+ add $0x20, TKEYP
+ movaps -0x60(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps -0x50(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+.align 4
+.L4dec192:
+ movaps -0x40(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps -0x30(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+.align 4
+.L4dec128:
+ movaps -0x20(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps -0x10(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps (TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x10(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x20(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x30(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x40(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x50(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x60(TKEYP), KEY
+ # aesdec KEY, STATE1
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ # aesdec KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
+ # aesdec KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xea
+ # aesdec KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ movaps 0x70(TKEYP), KEY
+ # aesdeclast KEY, STATE1 # last round
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+ # aesdeclast KEY, STATE2
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
+ # aesdeclast KEY, STATE3
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
+ # aesdeclast KEY, STATE4
+ .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
+ ret
+
+/*
+ * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len)
+ */
+ENTRY(aesni_ecb_enc)
+ test LEN, LEN # check length
+ jz .Lecb_enc_ret
+ mov 480(KEYP), KLEN
+ cmp $16, LEN
+ jb .Lecb_enc_ret
+ cmp $64, LEN
+ jb .Lecb_enc_loop1
+.align 4
+.Lecb_enc_loop4:
+ movups (INP), STATE1
+ movups 0x10(INP), STATE2
+ movups 0x20(INP), STATE3
+ movups 0x30(INP), STATE4
+ call _aesni_enc4
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lecb_enc_loop4
+ cmp $16, LEN
+ jb .Lecb_enc_ret
+.align 4
+.Lecb_enc_loop1:
+ movups (INP), STATE1
+ call _aesni_enc1
+ movups STATE1, (OUTP)
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lecb_enc_loop1
+.Lecb_enc_ret:
+ ret
+
+/*
+ * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len);
+ */
+ENTRY(aesni_ecb_dec)
+ test LEN, LEN
+ jz .Lecb_dec_ret
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+ cmp $16, LEN
+ jb .Lecb_dec_ret
+ cmp $64, LEN
+ jb .Lecb_dec_loop1
+.align 4
+.Lecb_dec_loop4:
+ movups (INP), STATE1
+ movups 0x10(INP), STATE2
+ movups 0x20(INP), STATE3
+ movups 0x30(INP), STATE4
+ call _aesni_dec4
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lecb_dec_loop4
+ cmp $16, LEN
+ jb .Lecb_dec_ret
+.align 4
+.Lecb_dec_loop1:
+ movups (INP), STATE1
+ call _aesni_dec1
+ movups STATE1, (OUTP)
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lecb_dec_loop1
+.Lecb_dec_ret:
+ ret
+
+/*
+ * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len, u8 *iv)
+ */
+ENTRY(aesni_cbc_enc)
+ cmp $16, LEN
+ jb .Lcbc_enc_ret
+ mov 480(KEYP), KLEN
+ movups (IVP), STATE # load iv as initial state
+.align 4
+.Lcbc_enc_loop:
+ movups (INP), IN # load input
+ pxor IN, STATE
+ call _aesni_enc1
+ movups STATE, (OUTP) # store output
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lcbc_enc_loop
+ movups STATE, (IVP)
+.Lcbc_enc_ret:
+ ret
+
+/*
+ * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len, u8 *iv)
+ */
+ENTRY(aesni_cbc_dec)
+ cmp $16, LEN
+ jb .Lcbc_dec_ret
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+ movups (IVP), IV
+ cmp $64, LEN
+ jb .Lcbc_dec_loop1
+.align 4
+.Lcbc_dec_loop4:
+ movups (INP), IN1
+ movaps IN1, STATE1
+ movups 0x10(INP), IN2
+ movaps IN2, STATE2
+ movups 0x20(INP), IN3
+ movaps IN3, STATE3
+ movups 0x30(INP), IN4
+ movaps IN4, STATE4
+ call _aesni_dec4
+ pxor IV, STATE1
+ pxor IN1, STATE2
+ pxor IN2, STATE3
+ pxor IN3, STATE4
+ movaps IN4, IV
+ movups STATE1, (OUTP)
+ movups STATE2, 0x10(OUTP)
+ movups STATE3, 0x20(OUTP)
+ movups STATE4, 0x30(OUTP)
+ sub $64, LEN
+ add $64, INP
+ add $64, OUTP
+ cmp $64, LEN
+ jge .Lcbc_dec_loop4
+ cmp $16, LEN
+ jb .Lcbc_dec_ret
+.align 4
+.Lcbc_dec_loop1:
+ movups (INP), IN
+ movaps IN, STATE
+ call _aesni_dec1
+ pxor IV, STATE
+ movups STATE, (OUTP)
+ movaps IN, IV
+ sub $16, LEN
+ add $16, INP
+ add $16, OUTP
+ cmp $16, LEN
+ jge .Lcbc_dec_loop1
+ movups IV, (IVP)
+.Lcbc_dec_ret:
+ ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
new file mode 100644
index 000000000000..02af0af65497
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -0,0 +1,461 @@
+/*
+ * Support for Intel AES-NI instructions. This file contains glue
+ * code, the real AES implementation is in intel-aes_asm.S.
+ *
+ * Copyright (C) 2008, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/cryptd.h>
+#include <asm/i387.h>
+#include <asm/aes.h>
+
+struct async_aes_ctx {
+ struct cryptd_ablkcipher *cryptd_tfm;
+};
+
+#define AESNI_ALIGN 16
+#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
+
+asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ unsigned int key_len);
+asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in);
+asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in);
+asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len);
+asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len);
+asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+
+static inline int kernel_fpu_using(void)
+{
+ if (in_interrupt() && !(read_cr0() & X86_CR0_TS))
+ return 1;
+ return 0;
+}
+
+static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
+{
+ unsigned long addr = (unsigned long)raw_ctx;
+ unsigned long align = AESNI_ALIGN;
+
+ if (align <= crypto_tfm_ctx_alignment())
+ align = 1;
+ return (struct crypto_aes_ctx *)ALIGN(addr, align);
+}
+
+static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
+ const u8 *in_key, unsigned int key_len)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
+ u32 *flags = &tfm->crt_flags;
+ int err;
+
+ if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ if (kernel_fpu_using())
+ err = crypto_aes_expand_key(ctx, in_key, key_len);
+ else {
+ kernel_fpu_begin();
+ err = aesni_set_key(ctx, in_key, key_len);
+ kernel_fpu_end();
+ }
+
+ return err;
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+ if (kernel_fpu_using())
+ crypto_aes_encrypt_x86(ctx, dst, src);
+ else {
+ kernel_fpu_begin();
+ aesni_enc(ctx, dst, src);
+ kernel_fpu_end();
+ }
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+ if (kernel_fpu_using())
+ crypto_aes_decrypt_x86(ctx, dst, src);
+ else {
+ kernel_fpu_begin();
+ aesni_dec(ctx, dst, src);
+ kernel_fpu_end();
+ }
+}
+
+static struct crypto_alg aesni_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-aesni",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = aes_set_key,
+ .cia_encrypt = aes_encrypt,
+ .cia_decrypt = aes_decrypt
+ }
+ }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static struct crypto_alg blk_ecb_alg = {
+ .cra_name = "__ecb-aes-aesni",
+ .cra_driver_name = "__driver-ecb-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+};
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK, walk.iv);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_fpu_begin();
+ while ((nbytes = walk.nbytes)) {
+ aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK, walk.iv);
+ nbytes &= AES_BLOCK_SIZE - 1;
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+ kernel_fpu_end();
+
+ return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+ .cra_name = "__cbc-aes-aesni",
+ .cra_driver_name = "__driver-cbc-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+};
+
+static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
+}
+
+static int ablk_encrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+ struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (kernel_fpu_using()) {
+ struct ablkcipher_request *cryptd_req =
+ ablkcipher_request_ctx(req);
+ memcpy(cryptd_req, req, sizeof(*req));
+ ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ return crypto_ablkcipher_encrypt(cryptd_req);
+ } else {
+ struct blkcipher_desc desc;
+ desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+ desc.info = req->info;
+ desc.flags = 0;
+ return crypto_blkcipher_crt(desc.tfm)->encrypt(
+ &desc, req->dst, req->src, req->nbytes);
+ }
+}
+
+static int ablk_decrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+ struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (kernel_fpu_using()) {
+ struct ablkcipher_request *cryptd_req =
+ ablkcipher_request_ctx(req);
+ memcpy(cryptd_req, req, sizeof(*req));
+ ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ return crypto_ablkcipher_decrypt(cryptd_req);
+ } else {
+ struct blkcipher_desc desc;
+ desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+ desc.info = req->info;
+ desc.flags = 0;
+ return crypto_blkcipher_crt(desc.tfm)->decrypt(
+ &desc, req->dst, req->src, req->nbytes);
+ }
+}
+
+static void ablk_exit(struct crypto_tfm *tfm)
+{
+ struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cryptd_free_ablkcipher(ctx->cryptd_tfm);
+}
+
+static void ablk_init_common(struct crypto_tfm *tfm,
+ struct cryptd_ablkcipher *cryptd_tfm)
+{
+ struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ ctx->cryptd_tfm = cryptd_tfm;
+ tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
+ crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+}
+
+static int ablk_ecb_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_ecb_alg = {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
+ .cra_init = ablk_ecb_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+};
+
+static int ablk_cbc_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_cbc_alg = {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
+ .cra_init = ablk_cbc_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+};
+
+static int __init aesni_init(void)
+{
+ int err;
+
+ if (!cpu_has_aes) {
+ printk(KERN_ERR "Intel AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+ if ((err = crypto_register_alg(&aesni_alg)))
+ goto aes_err;
+ if ((err = crypto_register_alg(&blk_ecb_alg)))
+ goto blk_ecb_err;
+ if ((err = crypto_register_alg(&blk_cbc_alg)))
+ goto blk_cbc_err;
+ if ((err = crypto_register_alg(&ablk_ecb_alg)))
+ goto ablk_ecb_err;
+ if ((err = crypto_register_alg(&ablk_cbc_alg)))
+ goto ablk_cbc_err;
+
+ return err;
+
+ablk_cbc_err:
+ crypto_unregister_alg(&ablk_ecb_alg);
+ablk_ecb_err:
+ crypto_unregister_alg(&blk_cbc_alg);
+blk_cbc_err:
+ crypto_unregister_alg(&blk_ecb_alg);
+blk_ecb_err:
+ crypto_unregister_alg(&aesni_alg);
+aes_err:
+ return err;
+}
+
+static void __exit aesni_exit(void)
+{
+ crypto_unregister_alg(&ablk_cbc_alg);
+ crypto_unregister_alg(&ablk_ecb_alg);
+ crypto_unregister_alg(&blk_cbc_alg);
+ crypto_unregister_alg(&blk_ecb_alg);
+ crypto_unregister_alg(&aesni_alg);
+}
+
+module_init(aesni_init);
+module_exit(aesni_exit);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h
new file mode 100644
index 000000000000..80545a1cbe39
--- /dev/null
+++ b/arch/x86/include/asm/aes.h
@@ -0,0 +1,11 @@
+#ifndef ASM_X86_AES_H
+#define ASM_X86_AES_H
+
+#include <linux/crypto.h>
+#include <crypto/aes.h>
+
+void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
+ const u8 *src);
+void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
+ const u8 *src);
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7301e60dc4a8..0beba0d1468d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -213,6 +213,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)