2 chacha-merged.c version 20080118
11 typedef struct chacha_ctx chacha_ctx;
14 #define U32C(v) (v##U)
16 #define U8V(v) ((uint8_t)(v) & U8C(0xFF))
17 #define U32V(v) ((uint32_t)(v) & U32C(0xFFFFFFFF))
19 #define ROTL32(v, n) \
20 (U32V((v) << (n)) | ((v) >> (32 - (n))))
22 #define U8TO32_LITTLE(p) \
23 (((uint32_t)((p)[0]) ) | \
24 ((uint32_t)((p)[1]) << 8) | \
25 ((uint32_t)((p)[2]) << 16) | \
26 ((uint32_t)((p)[3]) << 24))
28 #define U32TO8_LITTLE(p, v) \
31 (p)[1] = U8V((v) >> 8); \
32 (p)[2] = U8V((v) >> 16); \
33 (p)[3] = U8V((v) >> 24); \
36 #define ROTATE(v,c) (ROTL32(v,c))
37 #define XOR(v,w) ((v) ^ (w))
38 #define PLUS(v,w) (U32V((v) + (w)))
39 #define PLUSONE(v) (PLUS((v),1))
41 #define QUARTERROUND(a,b,c,d) \
42 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
43 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
44 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
45 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
47 static const char sigma[16] = "expand 32-byte k";
48 static const char tau[16] = "expand 16-byte k";
50 void chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits)
52 const char *constants;
54 x->input[4] = U8TO32_LITTLE(k + 0);
55 x->input[5] = U8TO32_LITTLE(k + 4);
56 x->input[6] = U8TO32_LITTLE(k + 8);
57 x->input[7] = U8TO32_LITTLE(k + 12);
58 if (kbits == 256) { /* recommended */
61 } else { /* kbits == 128 */
64 x->input[8] = U8TO32_LITTLE(k + 0);
65 x->input[9] = U8TO32_LITTLE(k + 4);
66 x->input[10] = U8TO32_LITTLE(k + 8);
67 x->input[11] = U8TO32_LITTLE(k + 12);
68 x->input[0] = U8TO32_LITTLE(constants + 0);
69 x->input[1] = U8TO32_LITTLE(constants + 4);
70 x->input[2] = U8TO32_LITTLE(constants + 8);
71 x->input[3] = U8TO32_LITTLE(constants + 12);
74 void chacha_ivsetup(chacha_ctx *x, const uint8_t *iv, const uint8_t *counter)
76 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
77 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
78 x->input[14] = U8TO32_LITTLE(iv + 0);
79 x->input[15] = U8TO32_LITTLE(iv + 4);
83 chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes)
85 uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
86 uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
87 uint8_t *ctarget = NULL;
113 for (i = 0; i < bytes; ++i)
135 for (i = 20; i > 0; i -= 2) {
136 QUARTERROUND(x0, x4, x8, x12)
137 QUARTERROUND(x1, x5, x9, x13)
138 QUARTERROUND(x2, x6, x10, x14)
139 QUARTERROUND(x3, x7, x11, x15)
140 QUARTERROUND(x0, x5, x10, x15)
141 QUARTERROUND(x1, x6, x11, x12)
142 QUARTERROUND(x2, x7, x8, x13)
143 QUARTERROUND(x3, x4, x9, x14)
155 x10 = PLUS(x10, j10);
156 x11 = PLUS(x11, j11);
157 x12 = PLUS(x12, j12);
158 x13 = PLUS(x13, j13);
159 x14 = PLUS(x14, j14);
160 x15 = PLUS(x15, j15);
162 x0 = XOR(x0, U8TO32_LITTLE(m + 0));
163 x1 = XOR(x1, U8TO32_LITTLE(m + 4));
164 x2 = XOR(x2, U8TO32_LITTLE(m + 8));
165 x3 = XOR(x3, U8TO32_LITTLE(m + 12));
166 x4 = XOR(x4, U8TO32_LITTLE(m + 16));
167 x5 = XOR(x5, U8TO32_LITTLE(m + 20));
168 x6 = XOR(x6, U8TO32_LITTLE(m + 24));
169 x7 = XOR(x7, U8TO32_LITTLE(m + 28));
170 x8 = XOR(x8, U8TO32_LITTLE(m + 32));
171 x9 = XOR(x9, U8TO32_LITTLE(m + 36));
172 x10 = XOR(x10, U8TO32_LITTLE(m + 40));
173 x11 = XOR(x11, U8TO32_LITTLE(m + 44));
174 x12 = XOR(x12, U8TO32_LITTLE(m + 48));
175 x13 = XOR(x13, U8TO32_LITTLE(m + 52));
176 x14 = XOR(x14, U8TO32_LITTLE(m + 56));
177 x15 = XOR(x15, U8TO32_LITTLE(m + 60));
182 /* stopping at 2^70 bytes per nonce is user's responsibility */
185 U32TO8_LITTLE(c + 0, x0);
186 U32TO8_LITTLE(c + 4, x1);
187 U32TO8_LITTLE(c + 8, x2);
188 U32TO8_LITTLE(c + 12, x3);
189 U32TO8_LITTLE(c + 16, x4);
190 U32TO8_LITTLE(c + 20, x5);
191 U32TO8_LITTLE(c + 24, x6);
192 U32TO8_LITTLE(c + 28, x7);
193 U32TO8_LITTLE(c + 32, x8);
194 U32TO8_LITTLE(c + 36, x9);
195 U32TO8_LITTLE(c + 40, x10);
196 U32TO8_LITTLE(c + 44, x11);
197 U32TO8_LITTLE(c + 48, x12);
198 U32TO8_LITTLE(c + 52, x13);
199 U32TO8_LITTLE(c + 56, x14);
200 U32TO8_LITTLE(c + 60, x15);
204 for (i = 0; i < bytes; ++i)