odzhan 1 year ago
commit
f5c1beecb6
  1. 27
      LICENSE
  2. 3
      README.md
  3. 18
      block/aes/Makefile
  4. 173
      block/aes/README.md
  5. 236
      block/aes/aes.c
  6. 66
      block/aes/aes.h
  7. 785
      block/aes/aes.html
  8. 259
      block/aes/asm/ax.asm
  9. 175
      block/aes/asm/ax.s
  10. 316
      block/aes/asm/axx.asm
  11. 175
      block/aes/asm/axx.s
  12. 260
      block/aes/asm/msvc/ax.asm
  13. 243
      block/aes/asm/msvc/axb.asm
  14. 320
      block/aes/asm/msvc/axx.asm
  15. BIN
      block/aes/doc/A Specification for Rijndael, the AES Algorithm.pdf
  16. BIN
      block/aes/doc/A Very Compact Perfectly Masked S-Box.pdf
  17. 3192
      block/aes/doc/A Very Compact S-box for AES.pdf
  18. BIN
      block/aes/doc/ADVANCED ENCRYPTION STANDARD (AES).pdf
  19. BIN
      block/aes/doc/An Efficient Pipelined Multiplicative Inverse Architecture.pdf
  20. 574
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome.htm
  21. 41
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/analytics.js
  22. 64
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/css.css
  23. 8
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/css_002.css
  24. 22
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/fonts.css
  25. BIN
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/fw.png
  26. 12
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/jquery.js
  27. 2
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/jquery_002.js
  28. 1
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/main.css
  29. 1
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/prettify-hemisu.css
  30. 28
      block/aes/doc/Applied Cryptography Engineering — Quarrelsome_files/prettify.js
  31. BIN
      block/aes/doc/DPA, Bitslicing and Masking at 1 GHz.pdf
  32. BIN
      block/aes/doc/Efficient Software.pdf
  33. BIN
      block/aes/doc/How Far Can We Go on the x64 Processors.pdf
  34. BIN
      block/aes/doc/Parallel AES Encryption with Modified Mix-columns For Many Core Processor Arrays.pdf
  35. BIN
      block/aes/doc/Practical Mitigations for Timing-Based Side-Channel Attacks on Modern x86 Processors.pdf
  36. BIN
      block/aes/doc/Securing the AES Finalists Against Power Analysis Attacks.pdf
  37. BIN
      block/aes/doc/Side-Channel Analysis Resistant.pdf
  38. BIN
      block/aes/doc/Side-Channel Attacks Ten Years After Its Publication and the Impacts on Cryptographic Module Security Testing.pdf
  39. BIN
      block/aes/doc/The design of Rijndael.pdf
  40. BIN
      block/aes/doc/Ultra-lightweight 8-bit Multiplicative Inverse.pdf
  41. 638
      block/aes/doc/aes.html
  42. BIN
      block/aes/doc/cache.pdf
  43. 6024
      block/aes/doc/ecb_e_m.txt
  44. 1067
      block/aes/doc/rfc3686.txt
  45. 4
      block/aes/doc/tweet.c
  46. 260
      block/aes/msvc/ax.asm
  47. 243
      block/aes/msvc/axb.asm
  48. 320
      block/aes/msvc/axx.asm
  49. 19
      block/aes/old/Makefile
  50. 181
      block/aes/old/README.md
  51. 24
      block/aes/old/UNLICENSE
  52. 160
      block/aes/old/aes.c
  53. 81
      block/aes/old/aes.h
  54. 134
      block/aes/old/aesb.c
  55. 259
      block/aes/old/ax.asm
  56. 175
      block/aes/old/ax.s
  57. 316
      block/aes/old/axx.asm
  58. 175
      block/aes/old/axx.s
  59. 71
      block/aes/old/test.c
  60. 72
      block/aes/old/test2.c
  61. BIN
      block/aes/old/test_ACHINE
  62. BIN
      block/aes/test
  63. 62
      block/aes/test.c
  64. BIN
      block/aes/test2
  65. 131
      block/aes/test2.c
  66. BIN
      block/anubis/Anubis_Original.zip
  67. BIN
      block/anubis/anubis.zip
  68. BIN
      block/anubis/hill
  69. 85
      block/anubis/hill.cpp
  70. BIN
      block/anubis/mds
  71. 505
      block/anubis/mds.c
  72. 2
      block/anubis/mds.s
  73. 4
      block/belt/Makefile
  74. 33
      block/belt/README.txt
  75. 123
      block/belt/belt.c
  76. 301
      block/belt/belt.html
  77. BIN
      block/belt/doc/2015-JP-belt.pdf
  78. BIN
      block/belt/doc/belt-spec27.pdf
  79. 149
      block/belt/old/belt.c
  80. 49
      block/belt/old/belt.h
  81. 486
      block/belt/old/beltx.s
  82. 293
      block/belt/old/bx.asm
  83. 128
      block/belt/old/bx.h
  84. 225
      block/belt/old/bx2.asm
  85. BIN
      block/belt/old/hbox
  86. 67
      block/belt/old/hbox.c
  87. 48
      block/belt/old/hbox.s
  88. 121
      block/belt/ref/LICENSE
  89. 4
      block/belt/ref/README.md
  90. 171
      block/belt/ref/belt.c
  91. 58
      block/belt/ref/belt.h
  92. 78
      block/belt/ref/main.c
  93. 7
      block/belt/ref/makefile
  94. BIN
      block/belt/test
  95. 37
      block/belt/test.c
  96. 4
      block/blowfish/Makefile
  97. 40
      block/blowfish/README.txt
  98. 23
      block/blowfish/asm/bf.inc
  99. 505
      block/blowfish/asm/bf_x64.asm
  100. 524
      block/blowfish/asm/bf_x86.asm

27
LICENSE

@ -0,0 +1,27 @@
Copyright © 2015-2020 Odzhan. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY AUTHORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

3
README.md

@ -0,0 +1,3 @@
<h2>Disclaimer</h2>
<p>Just in case of misunderstanding. This repository is NOT a cryptographic library. Although there are source codes for cryptographic algorithms, it is not recommended you use them in any application that requires security. Some codes are incomplete or contain bugs. You've been warned.</p>

18
block/aes/Makefile

@ -0,0 +1,18 @@
MACHINE := $(shell uname -m)
ifeq ($(MACHINE), i386)
SRC := asm/ax.asm
else ifeq ($(MACHINE), x86_64)
SRC := asm/axx.asm
else ifeq ($(MACHINE), armv7l)
SRC := asm/ax.s
else ifeq ($(MACHINE), aarch64)
SRC := asm/axx.s
endif
test:
as $(SRC) -oax.o
gcc -Wall -Os test2.c aes.c -otest2
gcc -Wall -O2 test.c aes.c -otest
clean:
rm *.o test test2

173
block/aes/README.md

@ -0,0 +1,173 @@
<h4>AES Block Cipher</h4>
<p>AES-dust is a compact Implementation of the AES block cipher with support for 128 and 256-bit keys. The modes of encryption supported are Counter (CTR) and Electronic Code Book (ECB).</p>
<p>All code is intentionally optimized for size rather than speed making it suitable for resource constrained environments.</p>
<h4>Files</h4>
<p>All files with .s and .asm extensions are compatible with the GNU assembler (GAS) except for the files in the MSVC folder that will assemble with either NASM or YASM.</p>
<table>
<tr>
<th>File</th>
<th>Description</th>
</tr>
<tr>
<td>aes.c</td>
<td>AES-128,AES-256 in C for 8-bit, 32-bit and 64-bit architectures.</td>
</tr>
<tr>
<td>test.c</td>
<td>Monte Carlo test for AES-128, AES-256 in ECB mode.</td>
</tr>
<tr>
<td>test2.c</td>
<td>Simple test unit for AES-128, AES-256 in ECB and CTR mode.</td>
</tr>
<tr>
<td>asm/ax.asm</td>
<td>AES-128 in x86 assembly.</td>
</tr>
<tr>
<td>asm/axx.asm</td>
<td>AES-128 in AMD64 assembly.</td>
</tr>
<tr>
<td>asm/ax.s</td>
<td>AES-128 in ARM32 assembly.</td>
</tr>
<tr>
<td>asm/axx.s</td>
<td>AES-128 in ARM64 assembly.</td>
</tr>
</table>
<h4>Assembly</h4>
<p>The below table shows code sizes for the hand written versions of AES-128</p>
<table>
<tr>
<th>Architecture</th>
<th>ECB</th>
<th>CTR</th>
</tr>
<tr>
<td>x86</td>
<td>205</td>
<td>272</td>
</tr>
<tr>
<td>AMD64</td>
<td>253</td>
<td>339</td>
</tr>
<tr>
<td>ARM32</td>
<td>352</td>
<td></td>
</tr>
<tr>
<td>ARM64</td>
<td>352</td>
<td></td>
</tr>
</table>
<h4>C generated assembly</h4>
<p>The following table lists the size of assembly code generated by the GNU C compiler.</p>
<table>
<tr>
<th>Architecture</th>
<th>ECB</th>
<th>CTR</th>
</tr>
<tr>
<td>x86</td>
<td>524</td>
<td>701</td>
</tr>
<tr>
<td>AMD64</td>
<td>451</td>
<td>682</td>
</tr>
<tr>
<td>ARM32</td>
<td>480</td>
<td>668</td>
</tr>
<tr>
<td>ARM64</td>
<td>640</td>
<td>940</td>
</tr>
</table>
<p>The following table lists the size of assembly code generated by the Microsoft C compiler.</p>
<table>
<tr>
<th>Architecture</th>
<th>ECB</th>
<th>CTR</th>
</tr>
<tr>
<td>x86</td>
<td>321</td>
<td>477</td>
</tr>
<tr>
<td>AMD64</td>
<td>486</td>
<td>673</td>
</tr>
<tr>
<td>ARM32</td>
<td>372</td>
<td>502</td>
</tr>
<tr>
<td>ARM64</td>
<td>536</td>
<td>880</td>
</tr>
</table>
<h4>Side channel attacks</h4>
<p>AES was never intended to be resistant against side channel attacks. However, if you decide to use this code for an embedded project that requires a high level of security, first evaluate whether the code is sufficient against such attacks before including in your project.</p>
<h4>Licensing information</h4>
<p>Initially this was published with a BSD license. I decided to unlicense hoping more people would use and provide useful feedback.</p>
<pre>
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to http://unlicense.org/
</pre>

236
block/aes/aes.c

@ -0,0 +1,236 @@
/**
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/> */
#include "aes.h"
// Multiplication over GF(2**8)
#if AES_INT_LEN == 1
#define M(x)(((x)<<1)^((-((x)>>7))&0x1b))
#else
u32 M(u32 x) {
u32 t=x&0x80808080;
return((x^t)<<1)^((t>>7)*0x1b);
}
#endif
// the sbox array is used by default for optimal speed
#ifndef DYNAMIC
u8 sbox[256]=
{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
#define S(x) sbox[x]
#else
// SubByte
u8 S(u8 x) {
u8 i,y,c;
if(x) {
for(c=i=0,y=1;--i;y=(!c&&y==x)?c=1:y,y^=M(y));
x=y;
for(i=0;i<4;i++) {
x^=y=(y<<1)|(y>>7);
}
}
return x^99;
}
#endif
#if AES_INT_LEN == 1
// 128-bit version for 8-bit architectures
void aes_ecb(void *mk, void *data) {
u8 a,b,c,d,i,j,t,x[AES_BLK_LEN],
k[AES_KEY_LEN],rc=1,*s=(u8*)data;
// copy 128-bit plain text + 128-bit master key to x
for(i=0;i<AES_BLK_LEN;i++) {
x[i]=s[i], k[i]=((u32*)mk)[i];
}
for(;;) {
// AddRoundKey
for(i=0;i<AES_BLK_LEN;i++) {
s[i]=x[i]^k[i];
}
// if round 11, stop
if(rc==108)break;
// AddConstant
k[0]^=rc; rc=M(rc);
// ExpandKey
for(i=0;i<4;i++) {
k[i]^=S(k[12+((i-3)&3)]);
}
for(i=0;i<12;i++) {
k[i+4]^=k[i];
}
// SubBytes and ShiftRows
for(i=0;i<AES_BLK_LEN;i++) {
x[(i&3)+((((u32)(i>>2)-(i&3))&3)<<2)]=S(s[i]);
}
// if not round 11
if(rc!=108) {
// MixColumns
for(i=0;i<AES_BLK_LEN;i+=4) {
a=x[i],b=x[i+1],c=x[i+2],d=x[i+3];
for(j=0;j<4;j++) {
x[i+j]^=a^b^c^d^M(a^b);
t=a,a=b,b=c,c=d,d=t;
}
}
}
}
}
#else
// 32-bit or 64-bit versions
#if AES_KEY_LEN == 32
void aes_ecb(void *mk, void *data) {
u32 c=1,i,r=0,w,x[4],k[8], *s=(u32*)data;
// copy 128-bit plain text
for(i=0;i<4;i++) {
x[i] = s[i];
}
// copy 256-bit master key
for(i=0;i<8;i++) {
k[i] = ((u32*)mk)[i];
}
for(;;) {
// 1st part of ExpandKey
w=k[r?3:7];
for(i=0;i<4;i++) {
w=(w&-256) | S(w&255),w=R(w,8);
}
// AddConstant, update constant
if(!r)w=R(w,8)^c,c=M(c);
// AddRoundKey, 2nd part of ExpandKey
for(i=0;i<4;i++) {
((u32*)s)[i]=x[i]^k[r*4+i], w=k[r*4+i]^=w;
}
// if round 15, stop
if(c==27) break;
r=(r+1)&1;
// SubBytes and ShiftRows
for(i=0;i<AES_BLK_LEN;i++) {
((u8*)x)[(i%4)+(((i/4)-(i%4))%4)*4]=S(((u8*)s)[i]);
}
// if not round 15, MixColumns
if((c!=128) | r) {
for(i=0;i<4;i++) {
w=x[i],x[i]=R(w,8)^R(w,16)^R(w,24)^M(R(w,8)^w);
}
}
}
}
#else
void aes_ecb(void *mk, void *data) {
u32 c=1,i,w,x[4],k[4],*s=(u32*)data;
// copy 128-bit plain text + 128-bit master key to x
for(i=0;i<4;i++) {
x[i]=s[i], k[i]=((u32*)mk)[i];
}
for(;;) {
// 1st part of ExpandKey
w=k[3];
for(i=0;i<4;i++) {
w=(w&-256)|S(w&255), w=R(w,8);
}
// AddConstant, AddRoundKey, 2nd part of ExpandKey
w=R(w, 8)^c;
for(i=0;i<4;i++) {
((u32*)s)[i]=x[i]^k[i], w=k[i]^=w;
}
// if round 11, stop
if(c==108)break;
// update constant
c=M(c);
// SubBytes and ShiftRows
for(i=0;i<AES_BLK_LEN;i++) {
((u8*)x)[(i%4)+(((i/4)-(i%4))%4)*4]=S(((u8*)s)[i]);
}
// if not round 11, MixColumns
if(c!=108) {
for(i=0;i<4;i++) {
w=x[i],x[i]=R(w,8)^R(w,16)^R(w,24)^M(R(w,8)^w);
}
}
}
}
#endif
#endif
#ifdef CTR
// encrypt using Counter (CTR) mode
void aes_ctr(u32 len, void *ctr, void *data, void *mk) {
u8 i, r, t[AES_BLK_LEN], *p=data, *c=ctr;
while(len) {
// copy counter+nonce to local buffer
for(i=0;i<AES_BLK_LEN;i++)t[i] = c[i];
// encrypt t
aes_ecb(mk, t);
// XOR plaintext with ciphertext
r = len > AES_BLK_LEN ? AES_BLK_LEN : len;
for(i=0;i<r;i++) p[i] ^= t[i];
// update length + position
len -= r; p += r;
// update counter.
for(i=AES_BLK_LEN;i!=0;i--)
if(++c[i-1]) break;
}
}
#endif

66
block/aes/aes.h

@ -0,0 +1,66 @@
/**
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/> */
#ifndef AES_H
#define AES_H
#define AES_INT_LEN 1 // 1 = 8-bit, 4 for anything else
#define AES_KEY_LEN 16 // 16 = 128-bit, 32 = 256-bit
#define AES_BLK_LEN 16 // always 16 for 128-bit blocks
#if AES_INT_LEN == 1 && AES_KEY_LEN == 32
#error "AES-256 for 8-bit CPUs is currently unsupported."
#endif
typedef unsigned char u8;
typedef char s8;
#if AES_INT_LEN == 1
typedef unsigned char u32;
#else
#define R(v,n)(((v)>>(n))|((v)<<(32-(n))))
typedef unsigned int u32;
#endif
#ifdef __cplusplus
extern "C" {
#endif
// mk should point to a 128-bit or 256-bit key
// data should point to a 128-bit block of plaintext to encrypt
void aes_ecb(void *mk, void *data);
// len is the amount of bytes to encrypt
// ctr is the 128-bit counter and nonce
// data is the plaintext or ciphertext
// mk is the 128-bit or 256-bit master key
void aes_ctr(u32 len, void *ctr, void *data, void *mk);
#ifdef __cplusplus
}
#endif
#endif

785
block/aes/aes.html

@ -0,0 +1,785 @@
<h3><strong>Introduction</strong></h3>
<p>In January 1997, the National Institute of Standards and Technology (NIST) initiated a process to replace the Data Encryption Standard (DES) published in 1977. A draft criteria to evaluate potential algorithms was published, and members of the public were invited to provide feedback. The finalized criteria was published in September 1997 which outlined a minimum acceptable requirement for each submission. Four years later in November 2001, Rijndael by Belgian Cryptographers Vincent Rijmen and Joan Daemen that we now refer to as the Advanced Encryption Standard (AES), was announced as the winner.</p>
<p>Since publication, implementations of AES have frequently been optimized for speed. Code that executes the quickest has traditionally taken priority over how much ROM it uses. Developers will use lookup tables to accelerate each step of the encryption process, thus compact implementations are rarely if ever sought after. Our challenge here is to implement AES in the least amount of C and more specifically x86 assembly code. It will obviously result in a slow implementation, and will not be resistant to side-channel analysis, although the latter problem can likely be resolved using conditional move instructions (CMOVcc) if necessary.</p>
<h3><strong>Parameters</strong></h3>
<p>There are three different set of parameters available, with the main difference related to key length. Our implementation will be AES-128, which fits perfectly onto a 32-bit architecture.</p>
<table border="1" width="50%">
<tbody>
<tr>
<th></th>
<th>Key Length
(Nk words)</th>
<th>Block Size
(Nb words)</th>
<th>Number of Rounds
(Nr)</th>
</tr>
<tr align="center">
<td>AES-128</td>
<td>4</td>
<td>4</td>
<td>10</td>
</tr>
<tr align="center">
<td>AES-192</td>
<td>6</td>
<td>4</td>
<td>12</td>
</tr>
<tr align="center">
<td>AES-256</td>
<td>8</td>
<td>4</td>
<td>14</td>
</tr>
</tbody>
</table>
<h3><strong>Structure</strong></h3>
<p>Two IF statements are introduced in order to perform the encryption in one loop. What isn't included in the illustration below is <span class="step">ExpandRoundKey</span> and <span class="step">AddRoundConstant</span> which generate round keys.</p>
<table border="1" width="50%">
<tbody>
<tr>
<th align="left" valign="top">The first layout here is what we normally see used when describing AES.</th>
<th align="left" valign="top">The second introduces 2 conditional statements that makes the code more compact.</th>
</tr>
<tr>
<td align="center" valign="top"><img src="https://tinycrypt.files.wordpress.com/2018/03/struc12.png">
</td>
<td align="center" valign="top"><img src="https://tinycrypt.files.wordpress.com/2018/03/struc22.png">
</td>
</tr>
</tbody>
</table>
<h2>Source in C</h2>
<p>The optimizers built into C compilers can sometimes reveal more efficient ways to implement a piece of code. The following performs encryption, and results in approx. 400 bytes of x86 assembly.</p>
<pre style='color:#000000;background:#ffffff;'><span style='color:#004a43;'>#</span><span style='color:#004a43;'>define</span><span style='color:#004a43;'> R</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>v</span><span style='color:#808030;'>,</span><span style='color:#004a43;'>n</span><span style='color:#808030;'>)</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>v</span><span style='color:#808030;'>)</span><span style='color:#808030;'>&gt;</span><span style='color:#808030;'>&gt;</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>n</span><span style='color:#808030;'>)</span><span style='color:#808030;'>)</span><span style='color:#808030;'>|</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>v</span><span style='color:#808030;'>)</span><span style='color:#808030;'>&lt;</span><span style='color:#808030;'>&lt;</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>32</span><span style='color:#808030;'>-</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>n</span><span style='color:#808030;'>)</span><span style='color:#808030;'>)</span><span style='color:#808030;'>)</span><span style='color:#808030;'>)</span>
<span style='color:#004a43;'>#</span><span style='color:#004a43;'>define</span><span style='color:#004a43;'> F</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>n</span><span style='color:#808030;'>)</span><span style='color:#004a43;'>for</span><span style='color:#808030;'>(</span><span style='color:#004a43;'>i</span><span style='color:#808030;'>=</span><span style='color:#004a43;'>0</span><span style='color:#808030;'>;</span><span style='color:#004a43;'>i</span><span style='color:#808030;'>&lt;</span><span style='color:#004a43;'>n</span><span style='color:#808030;'>;</span><span style='color:#004a43;'>i</span><span style='color:#808030;'>+</span><span style='color:#808030;'>+</span><span style='color:#808030;'>)</span>
<span style='color:#800000;font-weight:bold;'>typedef</span> <span style='color:#800000;font-weight:bold;'>unsigned</span> <span style='color:#800000;font-weight:bold;'>char</span> B<span style='color:#800080;'>;</span>
<span style='color:#800000;font-weight:bold;'>typedef</span> <span style='color:#800000;font-weight:bold;'>unsigned</span> W<span style='color:#800080;'>;</span>
<span style='color:#696969;'>// Multiplication over GF(2**8)</span>
W M<span style='color:#808030;'>(</span>W x<span style='color:#808030;'>)</span><span style='color:#800080;'>{</span>
W t<span style='color:#808030;'>=</span>x<span style='color:#808030;'>&amp;</span><span style='color:#008000;'>0x80808080</span><span style='color:#800080;'>;</span>
<span style='color:#800000;font-weight:bold;'>return</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span>x<span style='color:#808030;'>^</span>t<span style='color:#808030;'>)</span><span style='color:#808030;'>*</span><span style='color:#008c00;'>2</span><span style='color:#808030;'>)</span><span style='color:#808030;'>^</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span>t<span style='color:#808030;'>&gt;</span><span style='color:#808030;'>&gt;</span><span style='color:#008c00;'>7</span><span style='color:#808030;'>)</span><span style='color:#808030;'>*</span><span style='color:#008c00;'>27</span><span style='color:#808030;'>)</span><span style='color:#800080;'>;</span>
<span style='color:#800080;'>}</span>
<span style='color:#696969;'>// SubByte</span>
B S<span style='color:#808030;'>(</span>B x<span style='color:#808030;'>)</span><span style='color:#800080;'>{</span>
B i<span style='color:#808030;'>,</span>y<span style='color:#808030;'>,</span>c<span style='color:#800080;'>;</span>
<span style='color:#800000;font-weight:bold;'>if</span><span style='color:#808030;'>(</span>x<span style='color:#808030;'>)</span><span style='color:#800080;'>{</span>
<span style='color:#800000;font-weight:bold;'>for</span><span style='color:#808030;'>(</span>c<span style='color:#808030;'>=</span>i<span style='color:#808030;'>=</span><span style='color:#008c00;'>0</span><span style='color:#808030;'>,</span>y<span style='color:#808030;'>=</span><span style='color:#008c00;'>1</span><span style='color:#800080;'>;</span><span style='color:#808030;'>-</span><span style='color:#808030;'>-</span>i<span style='color:#800080;'>;</span>y<span style='color:#808030;'>=</span><span style='color:#808030;'>(</span><span style='color:#808030;'>!</span>c<span style='color:#808030;'>&amp;</span><span style='color:#808030;'>&amp;</span>y<span style='color:#808030;'>=</span><span style='color:#808030;'>=</span>x<span style='color:#808030;'>)</span><span style='color:#800080;'>?</span>c<span style='color:#808030;'>=</span><span style='color:#008c00;'>1</span><span style='color:#800080;'>:</span>y<span style='color:#808030;'>,</span>y<span style='color:#808030;'>^</span><span style='color:#808030;'>=</span>M<span style='color:#808030;'>(</span>y<span style='color:#808030;'>)</span><span style='color:#808030;'>)</span><span style='color:#800080;'>;</span>
x<span style='color:#808030;'>=</span>y<span style='color:#800080;'>;</span>F<span style='color:#808030;'>(</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span>x<span style='color:#808030;'>^</span><span style='color:#808030;'>=</span>y<span style='color:#808030;'>=</span><span style='color:#808030;'>(</span>y<span style='color:#808030;'>&lt;</span><span style='color:#808030;'>&lt;</span><span style='color:#008c00;'>1</span><span style='color:#808030;'>)</span><span style='color:#808030;'>|</span><span style='color:#808030;'>(</span>y<span style='color:#808030;'>&gt;</span><span style='color:#808030;'>&gt;</span><span style='color:#008c00;'>7</span><span style='color:#808030;'>)</span><span style='color:#800080;'>;</span>
<span style='color:#800080;'>}</span>
<span style='color:#800000;font-weight:bold;'>return</span> x<span style='color:#808030;'>^</span><span style='color:#008c00;'>99</span><span style='color:#800080;'>;</span>
<span style='color:#800080;'>}</span>
<span style='color:#800000;font-weight:bold;'>void</span> E<span style='color:#808030;'>(</span>B <span style='color:#808030;'>*</span>s<span style='color:#808030;'>)</span><span style='color:#800080;'>{</span>
W i<span style='color:#808030;'>,</span>w<span style='color:#808030;'>,</span>x<span style='color:#808030;'>[</span><span style='color:#008c00;'>8</span><span style='color:#808030;'>]</span><span style='color:#808030;'>,</span>c<span style='color:#808030;'>=</span><span style='color:#008c00;'>1</span><span style='color:#808030;'>,</span><span style='color:#808030;'>*</span>k<span style='color:#808030;'>=</span><span style='color:#808030;'>(</span>W<span style='color:#808030;'>*</span><span style='color:#808030;'>)</span><span style='color:#808030;'>&amp;</span>x<span style='color:#808030;'>[</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>]</span><span style='color:#800080;'>;</span>
<span style='color:#696969;'>// copy plain text + master key to x</span>
F<span style='color:#808030;'>(</span><span style='color:#008c00;'>8</span><span style='color:#808030;'>)</span>x<span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#808030;'>=</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span>W<span style='color:#808030;'>*</span><span style='color:#808030;'>)</span>s<span style='color:#808030;'>)</span><span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#800080;'>;</span>
<span style='color:#800000;font-weight:bold;'>for</span><span style='color:#808030;'>(</span><span style='color:#800080;'>;</span><span style='color:#800080;'>;</span><span style='color:#808030;'>)</span><span style='color:#800080;'>{</span>
<span style='color:#696969;'>// AddRoundKey, 1st part of ExpandRoundKey</span>
w<span style='color:#808030;'>=</span>k<span style='color:#808030;'>[</span><span style='color:#008c00;'>3</span><span style='color:#808030;'>]</span><span style='color:#800080;'>;</span>F<span style='color:#808030;'>(</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span>w<span style='color:#808030;'>=</span><span style='color:#808030;'>(</span>w<span style='color:#808030;'>&amp;</span><span style='color:#808030;'>-</span><span style='color:#008c00;'>256</span><span style='color:#808030;'>)</span><span style='color:#808030;'>|</span>S<span style='color:#808030;'>(</span>w<span style='color:#808030;'>)</span><span style='color:#808030;'>,</span>w<span style='color:#808030;'>=</span>R<span style='color:#808030;'>(</span>w<span style='color:#808030;'>,</span><span style='color:#008c00;'>8</span><span style='color:#808030;'>)</span><span style='color:#808030;'>,</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span>W<span style='color:#808030;'>*</span><span style='color:#808030;'>)</span>s<span style='color:#808030;'>)</span><span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#808030;'>=</span>x<span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#808030;'>^</span>k<span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#800080;'>;</span>
<span style='color:#696969;'>// 2nd part of ExpandRoundKey</span>
w<span style='color:#808030;'>=</span>R<span style='color:#808030;'>(</span>w<span style='color:#808030;'>,</span><span style='color:#008c00;'>8</span><span style='color:#808030;'>)</span><span style='color:#808030;'>^</span>c<span style='color:#800080;'>;</span>F<span style='color:#808030;'>(</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span>w<span style='color:#808030;'>=</span>k<span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#808030;'>^</span><span style='color:#808030;'>=</span>w<span style='color:#800080;'>;</span>
<span style='color:#696969;'>// if round 11, stop;</span>
<span style='color:#800000;font-weight:bold;'>if</span><span style='color:#808030;'>(</span>c<span style='color:#808030;'>=</span><span style='color:#808030;'>=</span><span style='color:#008c00;'>108</span><span style='color:#808030;'>)</span><span style='color:#800000;font-weight:bold;'>break</span><span style='color:#800080;'>;</span>
<span style='color:#696969;'>// update round constant</span>
c<span style='color:#808030;'>=</span>M<span style='color:#808030;'>(</span>c<span style='color:#808030;'>)</span><span style='color:#800080;'>;</span>
<span style='color:#696969;'>// SubBytes and ShiftRows</span>
F<span style='color:#808030;'>(</span><span style='color:#008c00;'>16</span><span style='color:#808030;'>)</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span>B<span style='color:#808030;'>*</span><span style='color:#808030;'>)</span>x<span style='color:#808030;'>)</span><span style='color:#808030;'>[</span><span style='color:#808030;'>(</span>i<span style='color:#808030;'>%</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span><span style='color:#808030;'>+</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span><span style='color:#808030;'>(</span>i<span style='color:#808030;'>/</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span><span style='color:#808030;'>-</span><span style='color:#808030;'>(</span>i<span style='color:#808030;'>%</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span><span style='color:#808030;'>)</span><span style='color:#808030;'>%</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span><span style='color:#808030;'>*</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>]</span><span style='color:#808030;'>=</span>S<span style='color:#808030;'>(</span>s<span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#808030;'>)</span><span style='color:#800080;'>;</span>
<span style='color:#696969;'>// if not round 11, MixColumns</span>
<span style='color:#800000;font-weight:bold;'>if</span><span style='color:#808030;'>(</span>c<span style='color:#808030;'>!</span><span style='color:#808030;'>=</span><span style='color:#008c00;'>108</span><span style='color:#808030;'>)</span>
F<span style='color:#808030;'>(</span><span style='color:#008c00;'>4</span><span style='color:#808030;'>)</span>w<span style='color:#808030;'>=</span>x<span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#808030;'>,</span>x<span style='color:#808030;'>[</span>i<span style='color:#808030;'>]</span><span style='color:#808030;'>=</span>R<span style='color:#808030;'>(</span>w<span style='color:#808030;'>,</span><span style='color:#008c00;'>8</span><span style='color:#808030;'>)</span><span style='color:#808030;'>^</span>R<span style='color:#808030;'>(</span>w<span style='color:#808030;'>,</span><span style='color:#008c00;'>16</span><span style='color:#808030;'>)</span><span style='color:#808030;'>^</span>R<span style='color:#808030;'>(</span>w<span style='color:#808030;'>,</span><span style='color:#008c00;'>24</span><span style='color:#808030;'>)</span><span style='color:#808030;'>^</span>M<span style='color:#808030;'>(</span>R<span style='color:#808030;'>(</span>w<span style='color:#808030;'>,</span><span style='color:#008c00;'>8</span><span style='color:#808030;'>)</span><span style='color:#808030;'>^</span>w<span style='color:#808030;'>)</span><span style='color:#800080;'>;</span>
<span style='color:#800080;'>}</span>
<span style='color:#800080;'>}</span>
</pre>
<h3>x86 Overview</h3>
<p>Some x86 registers have special purposes, and it's important to know this when writing compact code.</p>
<table border="1">
<tbody>
<tr>
<th>Register</th>
<th>Description</th>
<th>Used by</th>
</tr>
<tr>
<td>eax</td>
<td>Accumulator</td>
<td>lods, stos, scas, xlat, mul, div</td>
</tr>
<tr>
<td>ebx</td>
<td>Base</td>
<td>xlat</td>
</tr>
<tr>
<td>ecx</td>
<td>Count</td>
<td>loop, rep (conditional suffixes E/Z and NE/NZ)</td>
</tr>
<tr>
<td>edx</td>
<td>Data</td>
<td>cdq, mul, div</td>
</tr>
<tr>
<td>esi</td>
<td>Source Index</td>
<td>lods, movs, cmps</td>
</tr>
<tr>
<td>edi</td>
<td>Destination Index</td>
<td>stos, movs, scas, cmps</td>
</tr>
<tr>
<td>ebp</td>
<td>Base Pointer</td>
<td>enter, leave</td>
</tr>
<tr>
<td>esp</td>
<td>Stack Pointer</td>
<td>pushad, popad, push, pop, call, enter, leave</td>
</tr>
</tbody>
</table>
<p>Those of you familiar with the x86 architecture will know certain instructions have dependencies or affect the state of other registers after execution. For example, LODSB will load a byte from memory pointer in SI to AL before incrementing SI by 1. STOSB will store a byte in AL to memory pointer in DI before incrementing DI by 1. MOVSB will move a byte from memory pointer in SI to memory pointer in DI, before adding 1 to both SI and DI. If the same instruction is preceded by REP (for repeat) then this also affects the CX register, decreasing by 1.</p>
<h3><strong>Initialization</strong></h3>
<p>The <strong>s</strong> parameter points to a 32-byte buffer containing a 16-byte plain text and 16-byte master key which is copied to the local buffer <strong>x</strong>.</p>
<img class="alignleft size-full wp-image-3024" src="https://tinycrypt.files.wordpress.com/2018/03/buffer.jpg" alt="" width="311" height="91" />
<p>A copy of the data is required, because both will be modified during the encryption process. ESI will point to <strong>s</strong> while EDI will point to <strong>x</strong>. EAX will hold <strong>Rcon</strong> value declared as <strong>c</strong>. ECX will be used exclusively for loops, and EDX is a spare register for loops which require an index starting position of zero. There's a reason to prefer EAX than other registers. Byte comparisons are only 2 bytes for AL, while 3 for others.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">2</span> vs <span style="color:#008c00;">3</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0001</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x3c\x6c"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">cmp</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x6c</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0003</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x80\xfb\x6c"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">cmp</span> <span style="color:#000080;">bl</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x6c</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0006</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x80\xf9\x6c"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">cmp</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x6c</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0009</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x80\xfa\x6c"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">cmp</span> <span style="color:#000080;">dl</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x6c</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
</pre>
In addition to this, one operation requires saving EAX in another register, which only requires 1 byte with XCHG. Other registers would require 2 bytes
<pre style="color:#000000;background:#ffffff;"><span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">1</span> vs <span style="color:#008c00;">2</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0001</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x92"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">xchg</span> <span style="color:#000080;">edx</span><span style="color:#808030;">,</span> <span style="color:#000080;">eax</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0002</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x87\xd3"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">xchg</span> <span style="color:#000080;">ebx</span><span style="color:#808030;">,</span> <span style="color:#000080;">edx</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
</pre>
Setting EAX to 1, our loop counter ECX to 4, and EDX to 0 can be accomplished in a variety of ways requiring only 7 bytes. The alternative for setting EAX here would be : XOR EAX, EAX; INC EAX
<pre style="color:#000000;background:#ffffff;"><span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">7</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0001</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x6a\x01"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">push</span> <span style="color:#008000;">0x1</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0003</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x58"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">pop</span> <span style="color:#000080;">eax</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0004</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x6a\x04"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">push</span> <span style="color:#008000;">0x4</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0006</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x59"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">pop</span> <span style="color:#000080;">ecx</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0007</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x99"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">cdq</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
</pre>
Another way ...
<pre style="color:#000000;background:#ffffff;"><span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">7</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0001</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x31\xc9"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">ecx</span><span style="color:#808030;">,</span> <span style="color:#000080;">ecx</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0003</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\xf7\xe1"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">mul</span> <span style="color:#000080;">ecx</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0005</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x40"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">inc</span> <span style="color:#000080;">eax</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0006</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\xb1\x04"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x4</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
</pre>
And another..
<pre style="color:#000000;background:#ffffff;"><span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">7</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0000</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x6a\x01"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">push</span> <span style="color:#008000;">0x1</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0002</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x58"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">pop</span> <span style="color:#000080;">eax</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0003</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x99"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">cdq</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0004</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x6b\xc8\x04"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">imul</span> <span style="color:#000080;">ecx</span><span style="color:#808030;">,</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x4</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
</pre>
<p>ESI will point to <em>s</em> which contains our plain text and master key. ESI is normally reserved for read operations. We can load a byte with LODS into AL/EAX, and move values from ESI to EDI using MOVS. Typically we see stack allocation using ADD or SUB, and sometimes (very rarely) using ENTER. This implementation only requires 32-bytes of stack space, and PUSHAD which saves 8 general purpose registers on the stack is exactly 32-bytes of memory, executed in 1 byte opcode. To illustrate why it makes more sense to use PUSHAD/POPAD instead of ADD/SUB or ENTER/LEAVE, the following are x86 opcodes generated by assembler.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">5</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0000</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\xc8\x20\x00\x00"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">enter</span> <span style="color:#008000;">0x20</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x0</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0004</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\xc9"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">leave</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">6</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0000</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x83\xec\x20"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">sub</span> <span style="color:#000080;">esp</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x20</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0003</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x83\xc4\x20"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">add</span> <span style="color:#000080;">esp</span><span style="color:#808030;">,</span> <span style="color:#008000;">0x20</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">2</span> bytes
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0000</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x60"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">pushad</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0001</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x61"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">popad</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
</pre>
Obviously the 2-byte example is better here, but once you require more than 96-bytes, usually ADD/SUB in combination with a register is the better option.
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; void E(void *s);</span>
<span style="color:#696969;">; *****************************</span>
<span style="color:#e34adc;">_E:</span>
<span style="color:#800000;font-weight:bold;">pushad</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">ecx</span><span style="color:#808030;">,</span> <span style="color:#000080;">ecx</span> <span style="color:#696969;">; ecx = 0</span>
<span style="color:#800000;font-weight:bold;">mul</span> <span style="color:#000080;">ecx</span> <span style="color:#696969;">; eax = 0, edx = 0</span>
<span style="color:#800000;font-weight:bold;">inc</span> <span style="color:#000080;">eax</span> <span style="color:#696969;">; c = 1</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">4</span>
<span style="color:#800000;font-weight:bold;">pushad</span> <span style="color:#696969;">; alloca(32)</span>
<span style="color:#696969;">; F(8)x[i]=((W*)s)[i];</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">esi</span><span style="color:#808030;">,</span> <span style="color:#808030;">[</span><span style="color:#000080;">esp</span><span style="color:#808030;">+</span><span style="color:#008c00;">64</span><span style="color:#008c00;">+4</span><span style="color:#808030;">]</span> <span style="color:#696969;">; esi = s</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">edi</span><span style="color:#808030;">,</span> <span style="color:#000080;">esp</span>
<span style="color:#800000;font-weight:bold;">pushad</span>
<span style="color:#800000;font-weight:bold;">add</span> <span style="color:#000080;">ecx</span><span style="color:#808030;">,</span> <span style="color:#000080;">ecx</span> <span style="color:#696969;">; copy state + master key to stack</span>
<span style="color:#800000;font-weight:bold;">rep</span> <span style="color:#800000;font-weight:bold;">movsd</span>
<span style="color:#800000;font-weight:bold;">popad</span>
</pre>
<h3>Multiplication</h3>
A pointer to this function is stored in EBP, and there are three reasons to use EBP over other registers:
<ol>
<li>EBP has no 8-bit registers, so we can't use it for any 8-bit operations.</li>
<li>Indirect memory access requires 1 byte more for index zero.</li>
<li>The only instructions that use EBP are ENTER and LEAVE.</li>
</ol>
<pre style="color:#000000;background:#ffffff;"><span style="color:#808030;">/</span><span style="color:#808030;">/</span> <span style="color:#008c00;">2</span> vs <span style="color:#008c00;">3</span> bytes for indirect access
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0001</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x8b\x5d\x00"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">ebx</span><span style="color:#808030;">,</span> <span style="color:#808030;">[</span><span style="color:#000080;">ebp</span><span style="color:#808030;">]</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
<span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#008c00;">0004</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span> <span style="color:#0000e6;">"\x8b\x1e"</span> <span style="color:#808030;">/</span><span style="color:#808030;">*</span> <span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">ebx</span><span style="color:#808030;">,</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">]</span> <span style="color:#808030;">*</span><span style="color:#808030;">/</span>
</pre>
When writing compact code, EBP is useful only as a temporary register or pointer to some function.
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; Multiplication over GF(2**8)</span>
<span style="color:#696969;">; *****************************</span>
<span style="color:#800000;font-weight:bold;">call</span> <span style="color:#e34adc;">$+21</span> <span style="color:#696969;">; save address </span>
<span style="color:#800000;font-weight:bold;">push</span> <span style="color:#000080;">ecx</span> <span style="color:#696969;">; save ecx</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">4</span> <span style="color:#696969;">; 4 bytes</span>
<span style="color:#800000;font-weight:bold;">add</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#000080;">al</span> <span style="color:#696969;">; al &lt;&lt;= 1</span>
<span style="color:#800000;font-weight:bold;">jnc</span> <span style="color:#e34adc;">$+4</span> <span style="color:#696969;">;</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#008c00;">27</span> <span style="color:#696969;">;</span>
<span style="color:#800000;font-weight:bold;">ror</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#008c00;">8</span> <span style="color:#696969;">; rotate for next byte</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">$-9</span> <span style="color:#696969;">; </span>
<span style="color:#800000;font-weight:bold;">pop</span> <span style="color:#000080;">ecx</span> <span style="color:#696969;">; restore ecx</span>
<span style="color:#800000;font-weight:bold;">ret</span>
<span style="color:#800000;font-weight:bold;">pop</span> <span style="color:#000080;">ebp</span>
</pre>
<h3>SubByte</h3>
<p>In the SubBytes step, each byte $latex a_{i,j}$ in the <em>state</em> matrix is replaced with $latex S(a_{i,j})$ using an 8-bit substitution box. The S-box is derived from the multiplicative inverse over $latex GF(2^8)$, and we can implement SubByte purely using code.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; B SubByte(B x)</span>
<span style="color:#696969;">; *****************************</span>
<span style="color:#e34adc;">sub_byte:</span>
<span style="color:#800000;font-weight:bold;">pushad</span>
<span style="color:#800000;font-weight:bold;">test</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#000080;">al</span> <span style="color:#696969;">; if(x){</span>
<span style="color:#800000;font-weight:bold;">jz</span> <span style="color:#e34adc;">sb_l6</span>
<span style="color:#800000;font-weight:bold;">xchg</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#000080;">edx</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">-1</span> <span style="color:#696969;">; i=255 </span>
<span style="color:#696969;">; for(c=i=0,y=1;--i;y=(!c&amp;&amp;y==x)?c=1:y,y^=M(y));</span>
<span style="color:#e34adc;">sb_l0:</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#008c00;">1</span> <span style="color:#696969;">; y=1</span>
<span style="color:#e34adc;">sb_l1:</span>
<span style="color:#800000;font-weight:bold;">test</span> <span style="color:#000080;">ah</span><span style="color:#808030;">,</span> <span style="color:#000080;">ah</span> <span style="color:#696969;">; !c</span>
<span style="color:#800000;font-weight:bold;">jnz</span> <span style="color:#e34adc;">sb_l2</span>
<span style="color:#800000;font-weight:bold;">cmp</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#000080;">dl</span> <span style="color:#696969;">; y!=x</span>
<span style="color:#800000;font-weight:bold;">setz</span> <span style="color:#000080;">ah</span>
<span style="color:#800000;font-weight:bold;">jz</span> <span style="color:#e34adc;">sb_l0</span>
<span style="color:#e34adc;">sb_l2:</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">dh</span><span style="color:#808030;">,</span> <span style="color:#000080;">al</span> <span style="color:#696969;">; y^=M(y)</span>
<span style="color:#800000;font-weight:bold;">call</span> <span style="color:#e34adc;">ebp</span> <span style="color:#696969;">;</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#000080;">dh</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">sb_l1</span> <span style="color:#696969;">; --i</span>
<span style="color:#696969;">; F(4)x^=y=(y&lt;&lt;1)|(y&gt;&gt;7);</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">dl</span><span style="color:#808030;">,</span> <span style="color:#000080;">al</span> <span style="color:#696969;">; dl=y</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">4</span> <span style="color:#696969;">; i=4 </span>
<span style="color:#e34adc;">sb_l5:</span>
<span style="color:#800000;font-weight:bold;">rol</span> <span style="color:#000080;">dl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">1</span> <span style="color:#696969;">; y=R(y,1)</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#000080;">dl</span> <span style="color:#696969;">; x^=y</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">sb_l5</span> <span style="color:#696969;">; i--</span>
<span style="color:#e34adc;">sb_l6:</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">al</span><span style="color:#808030;">,</span> <span style="color:#008c00;">99</span> <span style="color:#696969;">; return x^99</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#808030;">[</span><span style="color:#000080;">esp</span><span style="color:#808030;">+</span><span style="color:#008c00;">28</span><span style="color:#808030;">]</span><span style="color:#808030;">,</span> <span style="color:#000080;">al</span>
<span style="color:#800000;font-weight:bold;">popad</span>
<span style="color:#800000;font-weight:bold;">ret</span>
</pre>
<h3>AddRoundKey</h3>
<p>The <em>state</em> matrix is combined with a subkey using the bitwise XOR operation.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; AddRoundKey</span>
<span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; F(4)s[i]=x[i]^k[i];</span>
<span style="color:#800000;font-weight:bold;">pushad</span>
<span style="color:#800000;font-weight:bold;">xchg</span> <span style="color:#000080;">esi</span><span style="color:#808030;">,</span> <span style="color:#000080;">edi</span> <span style="color:#696969;">; swap x and s</span>
<span style="color:#e34adc;">xor_key:</span>
<span style="color:#800000;font-weight:bold;">lodsd</span> <span style="color:#696969;">; eax = x[i]</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#808030;">[</span><span style="color:#000080;">edi</span><span style="color:#808030;">+</span><span style="color:#008c00;">16</span><span style="color:#808030;">]</span> <span style="color:#696969;">; eax ^= k[i]</span>
<span style="color:#800000;font-weight:bold;">stosd</span> <span style="color:#696969;">; s[i] = eax</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">xor_key</span>
<span style="color:#800000;font-weight:bold;">popad</span>
</pre>
<h3>AddRoundConstant</h3>
<p>There are various cryptographic attacks possible against AES without this small, but important step. It protects against the <em>Slide Attack</em>, first described in 1999 by David Wagner and Alex Biryukov. Without different round constants to generate round keys, all the round keys will be the same.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; AddRoundConstant</span>
<span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; *k^=c; c=M(c);</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">+</span><span style="color:#008c00;">16</span><span style="color:#808030;">]</span><span style="color:#808030;">,</span> <span style="color:#000080;">al</span>
<span style="color:#800000;font-weight:bold;">call</span> <span style="color:#e34adc;">ebp</span>
</pre>
<h3>ExpandRoundKey</h3>
<p>The operation to expand the master key into subkeys for each round of encryption isn't normally in-lined. To boost performance, these round keys are precomputed before the encryption process since you would only waste CPU cycles repeating the same computation which is unnecessary. Compacting the AES code into a single call requires in-lining the key expansion operation. The C code here is not directly translated into x86 assembly, but the assembly does produce the same result.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; ***************************</span>
<span style="color:#696969;">; ExpandRoundKey</span>
<span style="color:#696969;">; ***************************</span>
<span style="color:#696969;">; F(4)w&lt;&lt;=8,w|=S(((B*)k)[15-i]);w=R(w,8);F(4)w=k[i]^=w;</span>
<span style="color:#800000;font-weight:bold;">pushad</span>
<span style="color:#800000;font-weight:bold;">add</span> <span style="color:#000080;">esi</span><span style="color:#808030;">,</span><span style="color:#008c00;">16</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">+</span><span style="color:#008c00;">3</span><span style="color:#808030;">*</span><span style="color:#008c00;">4</span><span style="color:#808030;">]</span> <span style="color:#696969;">; w=k[3]</span>
<span style="color:#800000;font-weight:bold;">ror</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#008c00;">8</span> <span style="color:#696969;">; w=R(w,8)</span>
<span style="color:#e34adc;">exp_l1:</span>
<span style="color:#800000;font-weight:bold;">call</span> <span style="color:#e34adc;">S</span> <span style="color:#696969;">; w=S(w)</span>
<span style="color:#800000;font-weight:bold;">ror</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#008c00;">8</span> <span style="color:#696969;">; w=R(w,8);</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">exp_l1</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">4</span>
<span style="color:#e34adc;">exp_l2:</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">]</span><span style="color:#808030;">,</span> <span style="color:#000080;">eax</span> <span style="color:#696969;">; k[i]^=w</span>
<span style="color:#800000;font-weight:bold;">lodsd</span> <span style="color:#696969;">; w=k[i]</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">exp_l2</span>
<span style="color:#800000;font-weight:bold;">popad</span>
</pre>
<h3>Combining the steps</h3>
<p>An earlier version of the code used seperate <span class="step">AddRoundKey</span>, <span class="step">AddRoundConstant</span> and <span class="step">ExpandRoundKey</span>, but since these steps all relate to using and updating the round key, the three steps are combined in order to reduce the number of loops, thus shaving off a few bytes.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; AddRoundKey, AddRoundConstant, ExpandRoundKey</span>
<span style="color:#696969;">; *****************************</span>
<span style="color:#696969;">; w=k[3];F(4)w=(w&amp;-256)|S(w),w=R(w,8),((W*)s)[i]=x[i]^k[i];</span>
<span style="color:#696969;">; w=R(w,8)^c;F(4)w=k[i]^=w;</span>
<span style="color:#800000;font-weight:bold;">pushad</span>
<span style="color:#800000;font-weight:bold;">xchg</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#000080;">edx</span>
<span style="color:#800000;font-weight:bold;">xchg</span> <span style="color:#000080;">esi</span><span style="color:#808030;">,</span> <span style="color:#000080;">edi</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">+</span><span style="color:#008c00;">16</span><span style="color:#008c00;">+12</span><span style="color:#808030;">]</span> <span style="color:#696969;">; w=R(k[3],8);</span>
<span style="color:#800000;font-weight:bold;">ror</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#008c00;">8</span>
<span style="color:#e34adc;">xor_key:</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">ebx</span><span style="color:#808030;">,</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">+</span><span style="color:#008c00;">16</span><span style="color:#808030;">]</span> <span style="color:#696969;">; t=k[i];</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">]</span><span style="color:#808030;">,</span> <span style="color:#000080;">ebx</span> <span style="color:#696969;">; x[i]^=t;</span>
<span style="color:#800000;font-weight:bold;">movsd</span> <span style="color:#696969;">; s[i]=x[i];</span>
<span style="color:#696969;">; w=(w&amp;-256)|S(w)</span>
<span style="color:#800000;font-weight:bold;">call</span> <span style="color:#e34adc;">S </span> <span style="color:#696969;">; al=S(al);</span>
<span style="color:#800000;font-weight:bold;">ror</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#008c00;">8</span> <span style="color:#696969;">; w=R(w,8);</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">xor_key</span>
<span style="color:#696969;">; w=R(w,8)^c;</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#000080;">eax</span><span style="color:#808030;">,</span> <span style="color:#000080;">edx</span> <span style="color:#696969;">; w^=c;</span>
<span style="color:#696969;">; F(4)w=k[i]^=w;</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">4</span>
<span style="color:#e34adc;">exp_key:</span>
<span style="color:#800000;font-weight:bold;">xor</span> <span style="color:#808030;">[</span><span style="color:#000080;">esi</span><span style="color:#808030;">]</span><span style="color:#808030;">,</span> <span style="color:#000080;">eax</span> <span style="color:#696969;">; k[i]^=w;</span>
<span style="color:#800000;font-weight:bold;">lodsd</span> <span style="color:#696969;">; w=k[i];</span>
<span style="color:#800000;font-weight:bold;">loop</span> <span style="color:#e34adc;">exp_key</span>
<span style="color:#800000;font-weight:bold;">popad</span>
</pre>
<h3>ShiftRows</h3>
<p><span class="step">ShiftRows</span> cyclically shifts the bytes in each row of the <em>state</em> matrix by a certain offset. The first row is left unchanged. Each byte of the second row is shifted one to the left, with the third and fourth rows shifted by two and three respectively. Because it doesn't matter about the order of <span class="step">SubBytes</span> and <span class="step">ShiftRows</span>, they're combined in one loop.</p>
<pre style="color:#000000;background:#ffffff;"><span style="color:#696969;">; ***************************</span>
<span style="color:#696969;">; ShiftRows and SubBytes</span>
<span style="color:#696969;">; ***************************</span>
<span style="color:#696969;">; F(16)((B*)x)[(i%4)+(((i/4)-(i%4))%4)*4]=S(((B*)s)[i]);</span>
<span style="color:#800000;font-weight:bold;">pushad</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">cl</span><span style="color:#808030;">,</span> <span style="color:#008c00;">16</span>
<span style="color:#e34adc;">shift_rows:</span>
<span style="color:#800000;font-weight:bold;">lodsb</span> <span style="color:#696969;">; al = S(s[i])</span>
<span style="color:#800000;font-weight:bold;">call</span> <span style="color:#e34adc;">sub_byte</span>
<span style="color:#800000;font-weight:bold;">push</span> <span style="color:#000080;">edx</span>
<span style="color:#800000;font-weight:bold;">mov</span> <span style="color:#000080;">ebx</span><span style="color:#808030;">,</span> <span style="color:#000080;">edx</span> <span style="color:#696969;">; ebx = i%4</span>
<span style="color:#800000;font-weight:bold;">and</span> <span style="color:#000080;">ebx</span><span style="color:#808030;">,</span> <span style="color:#008c00;">3</span> <span style="color:#696969;">;</span>
<span style="color:#800000;font-weight:bold;">shr</span> <span style="color:#000080;">edx</span><span style="color:#808030;">,</span> <span style="color:#008c00;">2</span> <span style="color:#696969;">; (i/4 - ebx) % 4</span>
<span style="color:#800000;font-weight:bold;">sub</span> <span style="color:#000080;">edx</span><span style="color:#808030;">,