2 chacha-merged.c version 20080118
11 typedef struct chacha_ctx chacha_ctx;
14 #define U32C(v) (v##U)
16 #define U8V(v) ((uint8_t)(v) & U8C(0xFF))
17 #define U32V(v) ((uint32_t)(v) & U32C(0xFFFFFFFF))
19 #define ROTL32(v, n) \
20 (U32V((v) << (n)) | ((v) >> (32 - (n))))
22 #define U8TO32_LITTLE(p) \
23 (((uint32_t)((p)[0]) ) | \
24 ((uint32_t)((p)[1]) << 8) | \
25 ((uint32_t)((p)[2]) << 16) | \
26 ((uint32_t)((p)[3]) << 24))
28 #define U32TO8_LITTLE(p, v) \
31 (p)[1] = U8V((v) >> 8); \
32 (p)[2] = U8V((v) >> 16); \
33 (p)[3] = U8V((v) >> 24); \
36 #define ROTATE(v,c) (ROTL32(v,c))
37 #define XOR(v,w) ((v) ^ (w))
38 #define PLUS(v,w) (U32V((v) + (w)))
39 #define PLUSONE(v) (PLUS((v),1))
41 #define QUARTERROUND(a,b,c,d) \
42 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
43 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
44 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
45 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
47 static const char sigma[16] = "expand 32-byte k";
48 static const char tau[16] = "expand 16-byte k";
50 void chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits)
52 const char *constants;
54 x->input[4] = U8TO32_LITTLE(k + 0);
55 x->input[5] = U8TO32_LITTLE(k + 4);
56 x->input[6] = U8TO32_LITTLE(k + 8);
57 x->input[7] = U8TO32_LITTLE(k + 12);
58 if (kbits == 256) { /* recommended */
61 } else { /* kbits == 128 */
64 x->input[8] = U8TO32_LITTLE(k + 0);
65 x->input[9] = U8TO32_LITTLE(k + 4);
66 x->input[10] = U8TO32_LITTLE(k + 8);
67 x->input[11] = U8TO32_LITTLE(k + 12);
68 x->input[0] = U8TO32_LITTLE(constants + 0);
69 x->input[1] = U8TO32_LITTLE(constants + 4);
70 x->input[2] = U8TO32_LITTLE(constants + 8);
71 x->input[3] = U8TO32_LITTLE(constants + 12);
74 void chacha_ivsetup(chacha_ctx *x, const uint8_t *iv, const uint8_t *counter)
76 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
77 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
78 x->input[14] = U8TO32_LITTLE(iv + 0);
79 x->input[15] = U8TO32_LITTLE(iv + 4);
82 void chacha_ivsetup_96(chacha_ctx *x, const uint8_t *iv, const uint8_t *counter)
84 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
85 x->input[13] = U8TO32_LITTLE(iv + 0);
86 x->input[14] = U8TO32_LITTLE(iv + 4);
87 x->input[15] = U8TO32_LITTLE(iv + 8);
91 chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes)
93 uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
94 uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
95 uint8_t *ctarget = NULL;
121 for (i = 0; i < bytes; ++i)
143 for (i = 20; i > 0; i -= 2) {
144 QUARTERROUND(x0, x4, x8, x12)
145 QUARTERROUND(x1, x5, x9, x13)
146 QUARTERROUND(x2, x6, x10, x14)
147 QUARTERROUND(x3, x7, x11, x15)
148 QUARTERROUND(x0, x5, x10, x15)
149 QUARTERROUND(x1, x6, x11, x12)
150 QUARTERROUND(x2, x7, x8, x13)
151 QUARTERROUND(x3, x4, x9, x14)
163 x10 = PLUS(x10, j10);
164 x11 = PLUS(x11, j11);
165 x12 = PLUS(x12, j12);
166 x13 = PLUS(x13, j13);
167 x14 = PLUS(x14, j14);
168 x15 = PLUS(x15, j15);
170 x0 = XOR(x0, U8TO32_LITTLE(m + 0));
171 x1 = XOR(x1, U8TO32_LITTLE(m + 4));
172 x2 = XOR(x2, U8TO32_LITTLE(m + 8));
173 x3 = XOR(x3, U8TO32_LITTLE(m + 12));
174 x4 = XOR(x4, U8TO32_LITTLE(m + 16));
175 x5 = XOR(x5, U8TO32_LITTLE(m + 20));
176 x6 = XOR(x6, U8TO32_LITTLE(m + 24));
177 x7 = XOR(x7, U8TO32_LITTLE(m + 28));
178 x8 = XOR(x8, U8TO32_LITTLE(m + 32));
179 x9 = XOR(x9, U8TO32_LITTLE(m + 36));
180 x10 = XOR(x10, U8TO32_LITTLE(m + 40));
181 x11 = XOR(x11, U8TO32_LITTLE(m + 44));
182 x12 = XOR(x12, U8TO32_LITTLE(m + 48));
183 x13 = XOR(x13, U8TO32_LITTLE(m + 52));
184 x14 = XOR(x14, U8TO32_LITTLE(m + 56));
185 x15 = XOR(x15, U8TO32_LITTLE(m + 60));
190 /* stopping at 2^70 bytes per nonce is user's responsibility */
193 U32TO8_LITTLE(c + 0, x0);
194 U32TO8_LITTLE(c + 4, x1);
195 U32TO8_LITTLE(c + 8, x2);
196 U32TO8_LITTLE(c + 12, x3);
197 U32TO8_LITTLE(c + 16, x4);
198 U32TO8_LITTLE(c + 20, x5);
199 U32TO8_LITTLE(c + 24, x6);
200 U32TO8_LITTLE(c + 28, x7);
201 U32TO8_LITTLE(c + 32, x8);
202 U32TO8_LITTLE(c + 36, x9);
203 U32TO8_LITTLE(c + 40, x10);
204 U32TO8_LITTLE(c + 44, x11);
205 U32TO8_LITTLE(c + 48, x12);
206 U32TO8_LITTLE(c + 52, x13);
207 U32TO8_LITTLE(c + 56, x14);
208 U32TO8_LITTLE(c + 60, x15);
212 for (i = 0; i < bytes; ++i)