2 ; ---------------------------------------------------------------------------
3 ; Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved.
7 ; The free distribution and use of this software in both source and binary
8 ; form is allowed (with or without changes) provided that:
10 ; 1. distributions of this source code include the above copyright
11 ; notice, this list of conditions and the following disclaimer;
13 ; 2. distributions in binary form include the above copyright
14 ; notice, this list of conditions and the following disclaimer
15 ; in the documentation and/or other associated materials;
17 ; 3. the copyright holder's name is not used to endorse products
18 ; built using this software without specific written permission.
20 ; ALTERNATIVELY, provided that this notice is retained in full, this product
21 ; may be distributed under the terms of the GNU General Public License (GPL),
22 ; in which case the provisions of the GPL apply INSTEAD OF those given above.
26 ; This software is provided 'as is' with no explicit or implied warranties
27 ; in respect of its properties, including, but not limited to, correctness
28 ; and/or fitness for purpose.
29 ; ---------------------------------------------------------------------------
32 ; An AES implementation for x86 processors using the YASM (or NASM) assembler.
33 ; This is an assembler implementation that covers encryption and decryption
34 ; only and is intended as a replacement of the C file aescrypt.c. It hence
35 ; requires the file aeskey.c for keying and aestab.c for the AES tables. It
36 ; employs full tables rather than compressed tables.
38 ; This code provides the standard AES block size (128 bits, 16 bytes) and the
39 ; three standard AES key sizes (128, 192 and 256 bits). It has the same call
40 ; interface as my C implementation. The ebx, esi, edi and ebp registers are
41 ; preserved across calls but eax, ecx and edx and the artihmetic status flags
42 ; are not. It is also important that the defines below match those used in the
43 ; C code. This code uses the VC++ register saving conentions; if it is used
44 ; with another compiler, conventions for using and saving registers may need to
45 ; be checked (and calling conventions). The YASM command line for the VC++
46 ; custom build step is:
48 ; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
50 ; The calling intefaces are:
52 ; AES_RETURN aes_encrypt(const unsigned char in_blk[],
53 ; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
55 ; AES_RETURN aes_decrypt(const unsigned char in_blk[],
56 ; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
58 ; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
59 ; const aes_encrypt_ctx cx[1]);
61 ; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
62 ; const aes_decrypt_ctx cx[1]);
64 ; AES_RETURN aes_encrypt_key(const unsigned char key[],
65 ; unsigned int len, const aes_decrypt_ctx cx[1]);
67 ; AES_RETURN aes_decrypt_key(const unsigned char key[],
68 ; unsigned int len, const aes_decrypt_ctx cx[1]);
70 ; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
71 ; either bits or bytes.
73 ; Comment in/out the following lines to obtain the desired subroutines. These
74 ; selections MUST match those in the C header file aes.h
76 %define AES_128 ; define if AES with 128 bit keys is needed
77 %define AES_192 ; define if AES with 192 bit keys is needed
78 %define AES_256 ; define if AES with 256 bit keys is needed
79 %define AES_VAR ; define if a variable key size is needed
80 %define ENCRYPTION ; define if encryption is needed
81 %define DECRYPTION ; define if decryption is needed
82 %define AES_REV_DKS ; define if key decryption schedule is reversed
83 %define LAST_ROUND_TABLES ; define if tables are to be used for last round
85 ; offsets to parameters
87 in_blk equ 4 ; input byte array address parameter
88 out_blk equ 8 ; output byte array address parameter
89 ctx equ 12 ; AES context structure
90 stk_spc equ 20 ; stack space
91 %define parms 12 ; parameter space on stack
93 ; The encryption key schedule has the following in memory layout where N is the
94 ; number of rounds (10, 12 or 14):
96 ; lo: | input key (round 0) | ; each round is four 32-bit words
97 ; | encryption round 1 |
98 ; | encryption round 2 |
100 ; | encryption round N-1 |
101 ; hi: | encryption round N |
103 ; The decryption key schedule is normally set up so that it has the same
104 ; layout as above by actually reversing the order of the encryption key
105 ; schedule in memory (this happens when AES_REV_DKS is set):
107 ; lo: | decryption round 0 | = | encryption round N |
108 ; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
109 ; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
111 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
112 ; hi: | decryption round N | = | input key (round 0) |
114 ; with rounds except the first and last modified using inv_mix_column()
115 ; But if AES_REV_DKS is NOT set the order of keys is left as it is for
116 ; encryption so that it has to be accessed in reverse when used for
117 ; decryption (although the inverse mix column modifications are done)
119 ; lo: | decryption round 0 | = | input key (round 0) |
120 ; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
121 ; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
123 ; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
124 ; hi: | decryption round N | = | encryption round N |
126 ; This layout is faster when the assembler key scheduling provided here
129 ; The DLL interface must use the _stdcall convention in which the number
130 ; of bytes of parameter space is added after an @ to the sutine's name.
131 ; We must also remove our parameters from the stack before return (see
132 ; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
136 ; End of user defines
160 ; These macros implement stack based local variables
170 ; the DLL has to implement the _stdcall calling interface on return
171 ; In this case we have to take our parameters (3 4-byte pointers)
174 %macro do_name 1-2 parms
187 %macro do_call 1-2 parms
196 %macro do_exit 0-1 parms
208 %define etab_0(x) [t_fn+4*x]
209 %define etab_1(x) [t_fn+1024+4*x]
210 %define etab_2(x) [t_fn+2048+4*x]
211 %define etab_3(x) [t_fn+3072+4*x]
213 %ifdef LAST_ROUND_TABLES
217 %define eltab_0(x) [t_fl+4*x]
218 %define eltab_1(x) [t_fl+1024+4*x]
219 %define eltab_2(x) [t_fl+2048+4*x]
220 %define eltab_3(x) [t_fl+3072+4*x]
224 ; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
225 ; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
233 ; ESI column key[round][2]
234 ; EDI column key[round][3]
239 ; EBP column[0] unkeyed
240 ; EBX column[1] unkeyed
241 ; ESI column[2] keyed
242 ; EDI column[3] keyed
275 ; Basic MOV and XOR Operations for normal rounds
287 ; Basic MOV and XOR Operations for last round
289 %ifdef LAST_ROUND_TABLES
310 rnd_fun nr_xor, nr_mov
321 %macro enc_last_round 0
328 rnd_fun lr_xor, lr_mov
337 section .text align=32
339 ; AES Encryption Subroutine
349 mov esi,[esp+in_blk+stk_spc] ; input pointer
355 mov ebp,[esp+ctx+stk_spc] ; key pointer
356 movzx edi,byte [ebp+4*KS_LENGTH]
362 ; determine the number of rounds
388 mov edx,[esp+out_blk+stk_spc]
408 %define dtab_0(x) [t_in+4*x]
409 %define dtab_1(x) [t_in+1024+4*x]
410 %define dtab_2(x) [t_in+2048+4*x]
411 %define dtab_3(x) [t_in+3072+4*x]
413 %ifdef LAST_ROUND_TABLES
417 %define dltab_0(x) [t_il+4*x]
418 %define dltab_1(x) [t_il+1024+4*x]
419 %define dltab_2(x) [t_il+2048+4*x]
420 %define dltab_3(x) [t_il+3072+4*x]
452 ; Basic MOV and XOR Operations for normal rounds
464 ; Basic MOV and XOR Operations for last round
466 %ifdef LAST_ROUND_TABLES
491 irn_fun ni_xor, ni_mov
502 %macro dec_last_round 0
513 irn_fun li_xor, li_mov
522 section .text align=32
524 ; AES Decryption Subroutine
534 ; input four columns and xor in first round key
536 mov esi,[esp+in_blk+stk_spc] ; input pointer
543 mov ebp,[esp+ctx+stk_spc] ; key pointer
544 movzx edi,byte[ebp+4*KS_LENGTH]
545 %ifndef AES_REV_DKS ; if decryption key schedule is not reversed
546 lea ebp,[ebp+edi] ; we have to access it from the top down
548 xor eax,[ebp ] ; key schedule
553 ; determine the number of rounds
579 ; move final values to the output array.
581 mov ebp,[esp+out_blk+stk_spc]
599 %ifidn __OUTPUT_FORMAT__,elf
600 section .note.GNU-stack noalloc noexec nowrite progbits