/Users/brunogarcia/projects/bitcoin-core-dev/src/crypto/chacha20.cpp
Line | Count | Source |
1 | | // Copyright (c) 2017-present The Bitcoin Core developers |
2 | | // Distributed under the MIT software license, see the accompanying |
3 | | // file COPYING or http://www.opensource.org/licenses/mit-license.php. |
4 | | |
5 | | // Based on the public domain implementation 'merged' by D. J. Bernstein |
6 | | // See https://cr.yp.to/chacha.html. |
7 | | |
8 | | #include <crypto/common.h> |
9 | | #include <crypto/chacha20.h> |
10 | | #include <support/cleanse.h> |
11 | | |
12 | | #include <algorithm> |
13 | | #include <bit> |
14 | | #include <cassert> |
15 | | |
16 | | #define QUARTERROUND(a,b,c,d) \ |
17 | | a += b; d = std::rotl(d ^ a, 16); \ |
18 | | c += d; b = std::rotl(b ^ c, 12); \ |
19 | | a += b; d = std::rotl(d ^ a, 8); \ |
20 | | c += d; b = std::rotl(b ^ c, 7); |
21 | | |
22 | 220k | #define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0) |
23 | | |
24 | | void ChaCha20Aligned::SetKey(std::span<const std::byte> key) noexcept |
25 | 297k | { |
26 | 297k | assert(key.size() == KEYLEN); |
27 | 297k | input[0] = ReadLE32(key.data() + 0); |
28 | 297k | input[1] = ReadLE32(key.data() + 4); |
29 | 297k | input[2] = ReadLE32(key.data() + 8); |
30 | 297k | input[3] = ReadLE32(key.data() + 12); |
31 | 297k | input[4] = ReadLE32(key.data() + 16); |
32 | 297k | input[5] = ReadLE32(key.data() + 20); |
33 | 297k | input[6] = ReadLE32(key.data() + 24); |
34 | 297k | input[7] = ReadLE32(key.data() + 28); |
35 | 297k | input[8] = 0; |
36 | 297k | input[9] = 0; |
37 | 297k | input[10] = 0; |
38 | 297k | input[11] = 0; |
39 | 297k | } |
40 | | |
41 | | ChaCha20Aligned::~ChaCha20Aligned() |
42 | 153k | { |
43 | 153k | memory_cleanse(input, sizeof(input)); |
44 | 153k | } |
45 | | |
46 | | ChaCha20Aligned::ChaCha20Aligned(std::span<const std::byte> key) noexcept |
47 | 153k | { |
48 | 153k | SetKey(key); |
49 | 153k | } |
50 | | |
51 | | void ChaCha20Aligned::Seek(Nonce96 nonce, uint32_t block_counter) noexcept |
52 | 0 | { |
53 | 0 | input[8] = block_counter; |
54 | 0 | input[9] = nonce.first; |
55 | 0 | input[10] = nonce.second; |
56 | 0 | input[11] = nonce.second >> 32; |
57 | 0 | } |
58 | | |
59 | | inline void ChaCha20Aligned::Keystream(std::span<std::byte> output) noexcept |
60 | 220k | { |
61 | 220k | std::byte* c = output.data(); |
62 | 220k | size_t blocks = output.size() / BLOCKLEN; |
63 | 220k | assert(blocks * BLOCKLEN == output.size()); |
64 | | |
65 | 220k | uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
66 | 220k | uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; |
67 | | |
68 | 220k | if (!blocks) return0 ; |
69 | | |
70 | 220k | j4 = input[0]; |
71 | 220k | j5 = input[1]; |
72 | 220k | j6 = input[2]; |
73 | 220k | j7 = input[3]; |
74 | 220k | j8 = input[4]; |
75 | 220k | j9 = input[5]; |
76 | 220k | j10 = input[6]; |
77 | 220k | j11 = input[7]; |
78 | 220k | j12 = input[8]; |
79 | 220k | j13 = input[9]; |
80 | 220k | j14 = input[10]; |
81 | 220k | j15 = input[11]; |
82 | | |
83 | 220k | for (;;) { |
84 | 220k | x0 = 0x61707865; |
85 | 220k | x1 = 0x3320646e; |
86 | 220k | x2 = 0x79622d32; |
87 | 220k | x3 = 0x6b206574; |
88 | 220k | x4 = j4; |
89 | 220k | x5 = j5; |
90 | 220k | x6 = j6; |
91 | 220k | x7 = j7; |
92 | 220k | x8 = j8; |
93 | 220k | x9 = j9; |
94 | 220k | x10 = j10; |
95 | 220k | x11 = j11; |
96 | 220k | x12 = j12; |
97 | 220k | x13 = j13; |
98 | 220k | x14 = j14; |
99 | 220k | x15 = j15; |
100 | | |
101 | | // The 20 inner ChaCha20 rounds are unrolled here for performance. |
102 | 220k | REPEAT10( Line | Count | Source | 22 | 220k | #define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0) |
|
103 | 220k | QUARTERROUND( x0, x4, x8,x12); |
104 | 220k | QUARTERROUND( x1, x5, x9,x13); |
105 | 220k | QUARTERROUND( x2, x6,x10,x14); |
106 | 220k | QUARTERROUND( x3, x7,x11,x15); |
107 | 220k | QUARTERROUND( x0, x5,x10,x15); |
108 | 220k | QUARTERROUND( x1, x6,x11,x12); |
109 | 220k | QUARTERROUND( x2, x7, x8,x13); |
110 | 220k | QUARTERROUND( x3, x4, x9,x14); |
111 | 220k | ); |
112 | | |
113 | 220k | x0 += 0x61707865; |
114 | 220k | x1 += 0x3320646e; |
115 | 220k | x2 += 0x79622d32; |
116 | 220k | x3 += 0x6b206574; |
117 | 220k | x4 += j4; |
118 | 220k | x5 += j5; |
119 | 220k | x6 += j6; |
120 | 220k | x7 += j7; |
121 | 220k | x8 += j8; |
122 | 220k | x9 += j9; |
123 | 220k | x10 += j10; |
124 | 220k | x11 += j11; |
125 | 220k | x12 += j12; |
126 | 220k | x13 += j13; |
127 | 220k | x14 += j14; |
128 | 220k | x15 += j15; |
129 | | |
130 | 220k | ++j12; |
131 | 220k | if (!j12) ++j130 ; |
132 | | |
133 | 220k | WriteLE32(c + 0, x0); |
134 | 220k | WriteLE32(c + 4, x1); |
135 | 220k | WriteLE32(c + 8, x2); |
136 | 220k | WriteLE32(c + 12, x3); |
137 | 220k | WriteLE32(c + 16, x4); |
138 | 220k | WriteLE32(c + 20, x5); |
139 | 220k | WriteLE32(c + 24, x6); |
140 | 220k | WriteLE32(c + 28, x7); |
141 | 220k | WriteLE32(c + 32, x8); |
142 | 220k | WriteLE32(c + 36, x9); |
143 | 220k | WriteLE32(c + 40, x10); |
144 | 220k | WriteLE32(c + 44, x11); |
145 | 220k | WriteLE32(c + 48, x12); |
146 | 220k | WriteLE32(c + 52, x13); |
147 | 220k | WriteLE32(c + 56, x14); |
148 | 220k | WriteLE32(c + 60, x15); |
149 | | |
150 | 220k | if (blocks == 1) { |
151 | 220k | input[8] = j12; |
152 | 220k | input[9] = j13; |
153 | 220k | return; |
154 | 220k | } |
155 | 0 | blocks -= 1; |
156 | 0 | c += BLOCKLEN; |
157 | 0 | } |
158 | 220k | } |
159 | | |
160 | | inline void ChaCha20Aligned::Crypt(std::span<const std::byte> in_bytes, std::span<std::byte> out_bytes) noexcept |
161 | 0 | { |
162 | 0 | assert(in_bytes.size() == out_bytes.size()); |
163 | 0 | const std::byte* m = in_bytes.data(); |
164 | 0 | std::byte* c = out_bytes.data(); |
165 | 0 | size_t blocks = out_bytes.size() / BLOCKLEN; |
166 | 0 | assert(blocks * BLOCKLEN == out_bytes.size()); |
167 | | |
168 | 0 | uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
169 | 0 | uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; |
170 | |
|
171 | 0 | if (!blocks) return; |
172 | | |
173 | 0 | j4 = input[0]; |
174 | 0 | j5 = input[1]; |
175 | 0 | j6 = input[2]; |
176 | 0 | j7 = input[3]; |
177 | 0 | j8 = input[4]; |
178 | 0 | j9 = input[5]; |
179 | 0 | j10 = input[6]; |
180 | 0 | j11 = input[7]; |
181 | 0 | j12 = input[8]; |
182 | 0 | j13 = input[9]; |
183 | 0 | j14 = input[10]; |
184 | 0 | j15 = input[11]; |
185 | |
|
186 | 0 | for (;;) { |
187 | 0 | x0 = 0x61707865; |
188 | 0 | x1 = 0x3320646e; |
189 | 0 | x2 = 0x79622d32; |
190 | 0 | x3 = 0x6b206574; |
191 | 0 | x4 = j4; |
192 | 0 | x5 = j5; |
193 | 0 | x6 = j6; |
194 | 0 | x7 = j7; |
195 | 0 | x8 = j8; |
196 | 0 | x9 = j9; |
197 | 0 | x10 = j10; |
198 | 0 | x11 = j11; |
199 | 0 | x12 = j12; |
200 | 0 | x13 = j13; |
201 | 0 | x14 = j14; |
202 | 0 | x15 = j15; |
203 | | |
204 | | // The 20 inner ChaCha20 rounds are unrolled here for performance. |
205 | 0 | REPEAT10( Line | Count | Source | 22 | 0 | #define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0) |
|
206 | 0 | QUARTERROUND( x0, x4, x8,x12); |
207 | 0 | QUARTERROUND( x1, x5, x9,x13); |
208 | 0 | QUARTERROUND( x2, x6,x10,x14); |
209 | 0 | QUARTERROUND( x3, x7,x11,x15); |
210 | 0 | QUARTERROUND( x0, x5,x10,x15); |
211 | 0 | QUARTERROUND( x1, x6,x11,x12); |
212 | 0 | QUARTERROUND( x2, x7, x8,x13); |
213 | 0 | QUARTERROUND( x3, x4, x9,x14); |
214 | 0 | ); |
215 | |
|
216 | 0 | x0 += 0x61707865; |
217 | 0 | x1 += 0x3320646e; |
218 | 0 | x2 += 0x79622d32; |
219 | 0 | x3 += 0x6b206574; |
220 | 0 | x4 += j4; |
221 | 0 | x5 += j5; |
222 | 0 | x6 += j6; |
223 | 0 | x7 += j7; |
224 | 0 | x8 += j8; |
225 | 0 | x9 += j9; |
226 | 0 | x10 += j10; |
227 | 0 | x11 += j11; |
228 | 0 | x12 += j12; |
229 | 0 | x13 += j13; |
230 | 0 | x14 += j14; |
231 | 0 | x15 += j15; |
232 | |
|
233 | 0 | x0 ^= ReadLE32(m + 0); |
234 | 0 | x1 ^= ReadLE32(m + 4); |
235 | 0 | x2 ^= ReadLE32(m + 8); |
236 | 0 | x3 ^= ReadLE32(m + 12); |
237 | 0 | x4 ^= ReadLE32(m + 16); |
238 | 0 | x5 ^= ReadLE32(m + 20); |
239 | 0 | x6 ^= ReadLE32(m + 24); |
240 | 0 | x7 ^= ReadLE32(m + 28); |
241 | 0 | x8 ^= ReadLE32(m + 32); |
242 | 0 | x9 ^= ReadLE32(m + 36); |
243 | 0 | x10 ^= ReadLE32(m + 40); |
244 | 0 | x11 ^= ReadLE32(m + 44); |
245 | 0 | x12 ^= ReadLE32(m + 48); |
246 | 0 | x13 ^= ReadLE32(m + 52); |
247 | 0 | x14 ^= ReadLE32(m + 56); |
248 | 0 | x15 ^= ReadLE32(m + 60); |
249 | |
|
250 | 0 | ++j12; |
251 | 0 | if (!j12) ++j13; |
252 | |
|
253 | 0 | WriteLE32(c + 0, x0); |
254 | 0 | WriteLE32(c + 4, x1); |
255 | 0 | WriteLE32(c + 8, x2); |
256 | 0 | WriteLE32(c + 12, x3); |
257 | 0 | WriteLE32(c + 16, x4); |
258 | 0 | WriteLE32(c + 20, x5); |
259 | 0 | WriteLE32(c + 24, x6); |
260 | 0 | WriteLE32(c + 28, x7); |
261 | 0 | WriteLE32(c + 32, x8); |
262 | 0 | WriteLE32(c + 36, x9); |
263 | 0 | WriteLE32(c + 40, x10); |
264 | 0 | WriteLE32(c + 44, x11); |
265 | 0 | WriteLE32(c + 48, x12); |
266 | 0 | WriteLE32(c + 52, x13); |
267 | 0 | WriteLE32(c + 56, x14); |
268 | 0 | WriteLE32(c + 60, x15); |
269 | |
|
270 | 0 | if (blocks == 1) { |
271 | 0 | input[8] = j12; |
272 | 0 | input[9] = j13; |
273 | 0 | return; |
274 | 0 | } |
275 | 0 | blocks -= 1; |
276 | 0 | c += BLOCKLEN; |
277 | 0 | m += BLOCKLEN; |
278 | 0 | } |
279 | 0 | } |
280 | | |
281 | | void ChaCha20::Keystream(std::span<std::byte> out) noexcept |
282 | 287k | { |
283 | 287k | if (out.empty()) return0 ; |
284 | 287k | if (m_bufleft) { |
285 | 67.0k | unsigned reuse = std::min<size_t>(m_bufleft, out.size()); |
286 | 67.0k | std::copy(m_buffer.end() - m_bufleft, m_buffer.end() - m_bufleft + reuse, out.begin()); |
287 | 67.0k | m_bufleft -= reuse; |
288 | 67.0k | out = out.subspan(reuse); |
289 | 67.0k | } |
290 | 287k | if (out.size() >= m_aligned.BLOCKLEN) { |
291 | 0 | size_t blocks = out.size() / m_aligned.BLOCKLEN; |
292 | 0 | m_aligned.Keystream(out.first(blocks * m_aligned.BLOCKLEN)); |
293 | 0 | out = out.subspan(blocks * m_aligned.BLOCKLEN); |
294 | 0 | } |
295 | 287k | if (!out.empty()) { |
296 | 220k | m_aligned.Keystream(m_buffer); |
297 | 220k | std::copy(m_buffer.begin(), m_buffer.begin() + out.size(), out.begin()); |
298 | 220k | m_bufleft = m_aligned.BLOCKLEN - out.size(); |
299 | 220k | } |
300 | 287k | } |
301 | | |
302 | | void ChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept |
303 | 0 | { |
304 | 0 | assert(input.size() == output.size()); |
305 | | |
306 | 0 | if (!input.size()) return; |
307 | 0 | if (m_bufleft) { |
308 | 0 | unsigned reuse = std::min<size_t>(m_bufleft, input.size()); |
309 | 0 | for (unsigned i = 0; i < reuse; i++) { |
310 | 0 | output[i] = input[i] ^ m_buffer[m_aligned.BLOCKLEN - m_bufleft + i]; |
311 | 0 | } |
312 | 0 | m_bufleft -= reuse; |
313 | 0 | output = output.subspan(reuse); |
314 | 0 | input = input.subspan(reuse); |
315 | 0 | } |
316 | 0 | if (input.size() >= m_aligned.BLOCKLEN) { |
317 | 0 | size_t blocks = input.size() / m_aligned.BLOCKLEN; |
318 | 0 | m_aligned.Crypt(input.first(blocks * m_aligned.BLOCKLEN), output.first(blocks * m_aligned.BLOCKLEN)); |
319 | 0 | output = output.subspan(blocks * m_aligned.BLOCKLEN); |
320 | 0 | input = input.subspan(blocks * m_aligned.BLOCKLEN); |
321 | 0 | } |
322 | 0 | if (!input.empty()) { |
323 | 0 | m_aligned.Keystream(m_buffer); |
324 | 0 | for (unsigned i = 0; i < input.size(); i++) { |
325 | 0 | output[i] = input[i] ^ m_buffer[i]; |
326 | 0 | } |
327 | 0 | m_bufleft = m_aligned.BLOCKLEN - input.size(); |
328 | 0 | } |
329 | 0 | } |
330 | | |
331 | | ChaCha20::~ChaCha20() |
332 | 153k | { |
333 | 153k | memory_cleanse(m_buffer.data(), m_buffer.size()); |
334 | 153k | } |
335 | | |
336 | | void ChaCha20::SetKey(std::span<const std::byte> key) noexcept |
337 | 143k | { |
338 | 143k | m_aligned.SetKey(key); |
339 | 143k | m_bufleft = 0; |
340 | 143k | memory_cleanse(m_buffer.data(), m_buffer.size()); |
341 | 143k | } |
342 | | |
343 | | FSChaCha20::FSChaCha20(std::span<const std::byte> key, uint32_t rekey_interval) noexcept : |
344 | 0 | m_chacha20(key), m_rekey_interval(rekey_interval) |
345 | 0 | { |
346 | 0 | assert(key.size() == KEYLEN); |
347 | 0 | } |
348 | | |
349 | | void FSChaCha20::Crypt(std::span<const std::byte> input, std::span<std::byte> output) noexcept |
350 | 0 | { |
351 | 0 | assert(input.size() == output.size()); |
352 | | |
353 | | // Invoke internal stream cipher for actual encryption/decryption. |
354 | 0 | m_chacha20.Crypt(input, output); |
355 | | |
356 | | // Rekey after m_rekey_interval encryptions/decryptions. |
357 | 0 | if (++m_chunk_counter == m_rekey_interval) { |
358 | | // Get new key from the stream cipher. |
359 | 0 | std::byte new_key[KEYLEN]; |
360 | 0 | m_chacha20.Keystream(new_key); |
361 | | // Update its key. |
362 | 0 | m_chacha20.SetKey(new_key); |
363 | | // Wipe the key (a copy remains inside m_chacha20, where it'll be wiped on the next rekey |
364 | | // or on destruction). |
365 | 0 | memory_cleanse(new_key, sizeof(new_key)); |
366 | | // Set the nonce for the new section of output. |
367 | 0 | m_chacha20.Seek({0, ++m_rekey_counter}, 0); |
368 | | // Reset the chunk counter. |
369 | 0 | m_chunk_counter = 0; |
370 | 0 | } |
371 | 0 | } |