diff options
Diffstat (limited to '3rd_party/ed25519/fe.c')
-rw-r--r-- | 3rd_party/ed25519/fe.c | 1491 |
1 files changed, 1491 insertions, 0 deletions
diff --git a/3rd_party/ed25519/fe.c b/3rd_party/ed25519/fe.c new file mode 100644 index 0000000..2105eb7 --- /dev/null +++ b/3rd_party/ed25519/fe.c | |||
@@ -0,0 +1,1491 @@ | |||
1 | #include "fixedint.h" | ||
2 | #include "fe.h" | ||
3 | |||
4 | |||
5 | /* | ||
6 | helper functions | ||
7 | */ | ||
8 | static uint64_t load_3(const unsigned char *in) { | ||
9 | uint64_t result; | ||
10 | |||
11 | result = (uint64_t) in[0]; | ||
12 | result |= ((uint64_t) in[1]) << 8; | ||
13 | result |= ((uint64_t) in[2]) << 16; | ||
14 | |||
15 | return result; | ||
16 | } | ||
17 | |||
18 | static uint64_t load_4(const unsigned char *in) { | ||
19 | uint64_t result; | ||
20 | |||
21 | result = (uint64_t) in[0]; | ||
22 | result |= ((uint64_t) in[1]) << 8; | ||
23 | result |= ((uint64_t) in[2]) << 16; | ||
24 | result |= ((uint64_t) in[3]) << 24; | ||
25 | |||
26 | return result; | ||
27 | } | ||
28 | |||
29 | |||
30 | |||
31 | /* | ||
32 | h = 0 | ||
33 | */ | ||
34 | |||
35 | void fe_0(fe h) { | ||
36 | h[0] = 0; | ||
37 | h[1] = 0; | ||
38 | h[2] = 0; | ||
39 | h[3] = 0; | ||
40 | h[4] = 0; | ||
41 | h[5] = 0; | ||
42 | h[6] = 0; | ||
43 | h[7] = 0; | ||
44 | h[8] = 0; | ||
45 | h[9] = 0; | ||
46 | } | ||
47 | |||
48 | |||
49 | |||
50 | /* | ||
51 | h = 1 | ||
52 | */ | ||
53 | |||
54 | void fe_1(fe h) { | ||
55 | h[0] = 1; | ||
56 | h[1] = 0; | ||
57 | h[2] = 0; | ||
58 | h[3] = 0; | ||
59 | h[4] = 0; | ||
60 | h[5] = 0; | ||
61 | h[6] = 0; | ||
62 | h[7] = 0; | ||
63 | h[8] = 0; | ||
64 | h[9] = 0; | ||
65 | } | ||
66 | |||
67 | |||
68 | |||
69 | /* | ||
70 | h = f + g | ||
71 | Can overlap h with f or g. | ||
72 | |||
73 | Preconditions: | ||
74 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | ||
75 | |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | ||
76 | |||
77 | Postconditions: | ||
78 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | ||
79 | */ | ||
80 | |||
81 | void fe_add(fe h, const fe f, const fe g) { | ||
82 | int32_t f0 = f[0]; | ||
83 | int32_t f1 = f[1]; | ||
84 | int32_t f2 = f[2]; | ||
85 | int32_t f3 = f[3]; | ||
86 | int32_t f4 = f[4]; | ||
87 | int32_t f5 = f[5]; | ||
88 | int32_t f6 = f[6]; | ||
89 | int32_t f7 = f[7]; | ||
90 | int32_t f8 = f[8]; | ||
91 | int32_t f9 = f[9]; | ||
92 | int32_t g0 = g[0]; | ||
93 | int32_t g1 = g[1]; | ||
94 | int32_t g2 = g[2]; | ||
95 | int32_t g3 = g[3]; | ||
96 | int32_t g4 = g[4]; | ||
97 | int32_t g5 = g[5]; | ||
98 | int32_t g6 = g[6]; | ||
99 | int32_t g7 = g[7]; | ||
100 | int32_t g8 = g[8]; | ||
101 | int32_t g9 = g[9]; | ||
102 | int32_t h0 = f0 + g0; | ||
103 | int32_t h1 = f1 + g1; | ||
104 | int32_t h2 = f2 + g2; | ||
105 | int32_t h3 = f3 + g3; | ||
106 | int32_t h4 = f4 + g4; | ||
107 | int32_t h5 = f5 + g5; | ||
108 | int32_t h6 = f6 + g6; | ||
109 | int32_t h7 = f7 + g7; | ||
110 | int32_t h8 = f8 + g8; | ||
111 | int32_t h9 = f9 + g9; | ||
112 | |||
113 | h[0] = h0; | ||
114 | h[1] = h1; | ||
115 | h[2] = h2; | ||
116 | h[3] = h3; | ||
117 | h[4] = h4; | ||
118 | h[5] = h5; | ||
119 | h[6] = h6; | ||
120 | h[7] = h7; | ||
121 | h[8] = h8; | ||
122 | h[9] = h9; | ||
123 | } | ||
124 | |||
125 | |||
126 | |||
127 | /* | ||
128 | Replace (f,g) with (g,g) if b == 1; | ||
129 | replace (f,g) with (f,g) if b == 0. | ||
130 | |||
131 | Preconditions: b in {0,1}. | ||
132 | */ | ||
133 | |||
134 | void fe_cmov(fe f, const fe g, unsigned int b) { | ||
135 | int32_t f0 = f[0]; | ||
136 | int32_t f1 = f[1]; | ||
137 | int32_t f2 = f[2]; | ||
138 | int32_t f3 = f[3]; | ||
139 | int32_t f4 = f[4]; | ||
140 | int32_t f5 = f[5]; | ||
141 | int32_t f6 = f[6]; | ||
142 | int32_t f7 = f[7]; | ||
143 | int32_t f8 = f[8]; | ||
144 | int32_t f9 = f[9]; | ||
145 | int32_t g0 = g[0]; | ||
146 | int32_t g1 = g[1]; | ||
147 | int32_t g2 = g[2]; | ||
148 | int32_t g3 = g[3]; | ||
149 | int32_t g4 = g[4]; | ||
150 | int32_t g5 = g[5]; | ||
151 | int32_t g6 = g[6]; | ||
152 | int32_t g7 = g[7]; | ||
153 | int32_t g8 = g[8]; | ||
154 | int32_t g9 = g[9]; | ||
155 | int32_t x0 = f0 ^ g0; | ||
156 | int32_t x1 = f1 ^ g1; | ||
157 | int32_t x2 = f2 ^ g2; | ||
158 | int32_t x3 = f3 ^ g3; | ||
159 | int32_t x4 = f4 ^ g4; | ||
160 | int32_t x5 = f5 ^ g5; | ||
161 | int32_t x6 = f6 ^ g6; | ||
162 | int32_t x7 = f7 ^ g7; | ||
163 | int32_t x8 = f8 ^ g8; | ||
164 | int32_t x9 = f9 ^ g9; | ||
165 | |||
166 | b = (unsigned int) (- (int) b); /* silence warning */ | ||
167 | x0 &= b; | ||
168 | x1 &= b; | ||
169 | x2 &= b; | ||
170 | x3 &= b; | ||
171 | x4 &= b; | ||
172 | x5 &= b; | ||
173 | x6 &= b; | ||
174 | x7 &= b; | ||
175 | x8 &= b; | ||
176 | x9 &= b; | ||
177 | |||
178 | f[0] = f0 ^ x0; | ||
179 | f[1] = f1 ^ x1; | ||
180 | f[2] = f2 ^ x2; | ||
181 | f[3] = f3 ^ x3; | ||
182 | f[4] = f4 ^ x4; | ||
183 | f[5] = f5 ^ x5; | ||
184 | f[6] = f6 ^ x6; | ||
185 | f[7] = f7 ^ x7; | ||
186 | f[8] = f8 ^ x8; | ||
187 | f[9] = f9 ^ x9; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | Replace (f,g) with (g,f) if b == 1; | ||
192 | replace (f,g) with (f,g) if b == 0. | ||
193 | |||
194 | Preconditions: b in {0,1}. | ||
195 | */ | ||
196 | |||
197 | void fe_cswap(fe f,fe g,unsigned int b) { | ||
198 | int32_t f0 = f[0]; | ||
199 | int32_t f1 = f[1]; | ||
200 | int32_t f2 = f[2]; | ||
201 | int32_t f3 = f[3]; | ||
202 | int32_t f4 = f[4]; | ||
203 | int32_t f5 = f[5]; | ||
204 | int32_t f6 = f[6]; | ||
205 | int32_t f7 = f[7]; | ||
206 | int32_t f8 = f[8]; | ||
207 | int32_t f9 = f[9]; | ||
208 | int32_t g0 = g[0]; | ||
209 | int32_t g1 = g[1]; | ||
210 | int32_t g2 = g[2]; | ||
211 | int32_t g3 = g[3]; | ||
212 | int32_t g4 = g[4]; | ||
213 | int32_t g5 = g[5]; | ||
214 | int32_t g6 = g[6]; | ||
215 | int32_t g7 = g[7]; | ||
216 | int32_t g8 = g[8]; | ||
217 | int32_t g9 = g[9]; | ||
218 | int32_t x0 = f0 ^ g0; | ||
219 | int32_t x1 = f1 ^ g1; | ||
220 | int32_t x2 = f2 ^ g2; | ||
221 | int32_t x3 = f3 ^ g3; | ||
222 | int32_t x4 = f4 ^ g4; | ||
223 | int32_t x5 = f5 ^ g5; | ||
224 | int32_t x6 = f6 ^ g6; | ||
225 | int32_t x7 = f7 ^ g7; | ||
226 | int32_t x8 = f8 ^ g8; | ||
227 | int32_t x9 = f9 ^ g9; | ||
228 | b = (unsigned int) (- (int) b); /* silence warning */ | ||
229 | x0 &= b; | ||
230 | x1 &= b; | ||
231 | x2 &= b; | ||
232 | x3 &= b; | ||
233 | x4 &= b; | ||
234 | x5 &= b; | ||
235 | x6 &= b; | ||
236 | x7 &= b; | ||
237 | x8 &= b; | ||
238 | x9 &= b; | ||
239 | f[0] = f0 ^ x0; | ||
240 | f[1] = f1 ^ x1; | ||
241 | f[2] = f2 ^ x2; | ||
242 | f[3] = f3 ^ x3; | ||
243 | f[4] = f4 ^ x4; | ||
244 | f[5] = f5 ^ x5; | ||
245 | f[6] = f6 ^ x6; | ||
246 | f[7] = f7 ^ x7; | ||
247 | f[8] = f8 ^ x8; | ||
248 | f[9] = f9 ^ x9; | ||
249 | g[0] = g0 ^ x0; | ||
250 | g[1] = g1 ^ x1; | ||
251 | g[2] = g2 ^ x2; | ||
252 | g[3] = g3 ^ x3; | ||
253 | g[4] = g4 ^ x4; | ||
254 | g[5] = g5 ^ x5; | ||
255 | g[6] = g6 ^ x6; | ||
256 | g[7] = g7 ^ x7; | ||
257 | g[8] = g8 ^ x8; | ||
258 | g[9] = g9 ^ x9; | ||
259 | } | ||
260 | |||
261 | |||
262 | |||
263 | /* | ||
264 | h = f | ||
265 | */ | ||
266 | |||
267 | void fe_copy(fe h, const fe f) { | ||
268 | int32_t f0 = f[0]; | ||
269 | int32_t f1 = f[1]; | ||
270 | int32_t f2 = f[2]; | ||
271 | int32_t f3 = f[3]; | ||
272 | int32_t f4 = f[4]; | ||
273 | int32_t f5 = f[5]; | ||
274 | int32_t f6 = f[6]; | ||
275 | int32_t f7 = f[7]; | ||
276 | int32_t f8 = f[8]; | ||
277 | int32_t f9 = f[9]; | ||
278 | |||
279 | h[0] = f0; | ||
280 | h[1] = f1; | ||
281 | h[2] = f2; | ||
282 | h[3] = f3; | ||
283 | h[4] = f4; | ||
284 | h[5] = f5; | ||
285 | h[6] = f6; | ||
286 | h[7] = f7; | ||
287 | h[8] = f8; | ||
288 | h[9] = f9; | ||
289 | } | ||
290 | |||
291 | |||
292 | |||
293 | /* | ||
294 | Ignores top bit of h. | ||
295 | */ | ||
296 | |||
297 | void fe_frombytes(fe h, const unsigned char *s) { | ||
298 | int64_t h0 = load_4(s); | ||
299 | int64_t h1 = load_3(s + 4) << 6; | ||
300 | int64_t h2 = load_3(s + 7) << 5; | ||
301 | int64_t h3 = load_3(s + 10) << 3; | ||
302 | int64_t h4 = load_3(s + 13) << 2; | ||
303 | int64_t h5 = load_4(s + 16); | ||
304 | int64_t h6 = load_3(s + 20) << 7; | ||
305 | int64_t h7 = load_3(s + 23) << 5; | ||
306 | int64_t h8 = load_3(s + 26) << 4; | ||
307 | int64_t h9 = (load_3(s + 29) & 8388607) << 2; | ||
308 | int64_t carry0; | ||
309 | int64_t carry1; | ||
310 | int64_t carry2; | ||
311 | int64_t carry3; | ||
312 | int64_t carry4; | ||
313 | int64_t carry5; | ||
314 | int64_t carry6; | ||
315 | int64_t carry7; | ||
316 | int64_t carry8; | ||
317 | int64_t carry9; | ||
318 | |||
319 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; | ||
320 | h0 += carry9 * 19; | ||
321 | h9 -= carry9 << 25; | ||
322 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; | ||
323 | h2 += carry1; | ||
324 | h1 -= carry1 << 25; | ||
325 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; | ||
326 | h4 += carry3; | ||
327 | h3 -= carry3 << 25; | ||
328 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; | ||
329 | h6 += carry5; | ||
330 | h5 -= carry5 << 25; | ||
331 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; | ||
332 | h8 += carry7; | ||
333 | h7 -= carry7 << 25; | ||
334 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; | ||
335 | h1 += carry0; | ||
336 | h0 -= carry0 << 26; | ||
337 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; | ||
338 | h3 += carry2; | ||
339 | h2 -= carry2 << 26; | ||
340 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; | ||
341 | h5 += carry4; | ||
342 | h4 -= carry4 << 26; | ||
343 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; | ||
344 | h7 += carry6; | ||
345 | h6 -= carry6 << 26; | ||
346 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; | ||
347 | h9 += carry8; | ||
348 | h8 -= carry8 << 26; | ||
349 | |||
350 | h[0] = (int32_t) h0; | ||
351 | h[1] = (int32_t) h1; | ||
352 | h[2] = (int32_t) h2; | ||
353 | h[3] = (int32_t) h3; | ||
354 | h[4] = (int32_t) h4; | ||
355 | h[5] = (int32_t) h5; | ||
356 | h[6] = (int32_t) h6; | ||
357 | h[7] = (int32_t) h7; | ||
358 | h[8] = (int32_t) h8; | ||
359 | h[9] = (int32_t) h9; | ||
360 | } | ||
361 | |||
362 | |||
363 | |||
364 | void fe_invert(fe out, const fe z) { | ||
365 | fe t0; | ||
366 | fe t1; | ||
367 | fe t2; | ||
368 | fe t3; | ||
369 | int i; | ||
370 | |||
371 | fe_sq(t0, z); | ||
372 | |||
373 | for (i = 1; i < 1; ++i) { | ||
374 | fe_sq(t0, t0); | ||
375 | } | ||
376 | |||
377 | fe_sq(t1, t0); | ||
378 | |||
379 | for (i = 1; i < 2; ++i) { | ||
380 | fe_sq(t1, t1); | ||
381 | } | ||
382 | |||
383 | fe_mul(t1, z, t1); | ||
384 | fe_mul(t0, t0, t1); | ||
385 | fe_sq(t2, t0); | ||
386 | |||
387 | for (i = 1; i < 1; ++i) { | ||
388 | fe_sq(t2, t2); | ||
389 | } | ||
390 | |||
391 | fe_mul(t1, t1, t2); | ||
392 | fe_sq(t2, t1); | ||
393 | |||
394 | for (i = 1; i < 5; ++i) { | ||
395 | fe_sq(t2, t2); | ||
396 | } | ||
397 | |||
398 | fe_mul(t1, t2, t1); | ||
399 | fe_sq(t2, t1); | ||
400 | |||
401 | for (i = 1; i < 10; ++i) { | ||
402 | fe_sq(t2, t2); | ||
403 | } | ||
404 | |||
405 | fe_mul(t2, t2, t1); | ||
406 | fe_sq(t3, t2); | ||
407 | |||
408 | for (i = 1; i < 20; ++i) { | ||
409 | fe_sq(t3, t3); | ||
410 | } | ||
411 | |||
412 | fe_mul(t2, t3, t2); | ||
413 | fe_sq(t2, t2); | ||
414 | |||
415 | for (i = 1; i < 10; ++i) { | ||
416 | fe_sq(t2, t2); | ||
417 | } | ||
418 | |||
419 | fe_mul(t1, t2, t1); | ||
420 | fe_sq(t2, t1); | ||
421 | |||
422 | for (i = 1; i < 50; ++i) { | ||
423 | fe_sq(t2, t2); | ||
424 | } | ||
425 | |||
426 | fe_mul(t2, t2, t1); | ||
427 | fe_sq(t3, t2); | ||
428 | |||
429 | for (i = 1; i < 100; ++i) { | ||
430 | fe_sq(t3, t3); | ||
431 | } | ||
432 | |||
433 | fe_mul(t2, t3, t2); | ||
434 | fe_sq(t2, t2); | ||
435 | |||
436 | for (i = 1; i < 50; ++i) { | ||
437 | fe_sq(t2, t2); | ||
438 | } | ||
439 | |||
440 | fe_mul(t1, t2, t1); | ||
441 | fe_sq(t1, t1); | ||
442 | |||
443 | for (i = 1; i < 5; ++i) { | ||
444 | fe_sq(t1, t1); | ||
445 | } | ||
446 | |||
447 | fe_mul(out, t1, t0); | ||
448 | } | ||
449 | |||
450 | |||
451 | |||
452 | /* | ||
453 | return 1 if f is in {1,3,5,...,q-2} | ||
454 | return 0 if f is in {0,2,4,...,q-1} | ||
455 | |||
456 | Preconditions: | ||
457 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | ||
458 | */ | ||
459 | |||
460 | int fe_isnegative(const fe f) { | ||
461 | unsigned char s[32]; | ||
462 | |||
463 | fe_tobytes(s, f); | ||
464 | |||
465 | return s[0] & 1; | ||
466 | } | ||
467 | |||
468 | |||
469 | |||
470 | /* | ||
471 | return 1 if f == 0 | ||
472 | return 0 if f != 0 | ||
473 | |||
474 | Preconditions: | ||
475 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | ||
476 | */ | ||
477 | |||
478 | int fe_isnonzero(const fe f) { | ||
479 | unsigned char s[32]; | ||
480 | unsigned char r; | ||
481 | |||
482 | fe_tobytes(s, f); | ||
483 | |||
484 | r = s[0]; | ||
485 | #define F(i) r |= s[i] | ||
486 | F(1); | ||
487 | F(2); | ||
488 | F(3); | ||
489 | F(4); | ||
490 | F(5); | ||
491 | F(6); | ||
492 | F(7); | ||
493 | F(8); | ||
494 | F(9); | ||
495 | F(10); | ||
496 | F(11); | ||
497 | F(12); | ||
498 | F(13); | ||
499 | F(14); | ||
500 | F(15); | ||
501 | F(16); | ||
502 | F(17); | ||
503 | F(18); | ||
504 | F(19); | ||
505 | F(20); | ||
506 | F(21); | ||
507 | F(22); | ||
508 | F(23); | ||
509 | F(24); | ||
510 | F(25); | ||
511 | F(26); | ||
512 | F(27); | ||
513 | F(28); | ||
514 | F(29); | ||
515 | F(30); | ||
516 | F(31); | ||
517 | #undef F | ||
518 | |||
519 | return r != 0; | ||
520 | } | ||
521 | |||
522 | |||
523 | |||
524 | /* | ||
525 | h = f * g | ||
526 | Can overlap h with f or g. | ||
527 | |||
528 | Preconditions: | ||
529 | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. | ||
530 | |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. | ||
531 | |||
532 | Postconditions: | ||
533 | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. | ||
534 | */ | ||
535 | |||
536 | /* | ||
537 | Notes on implementation strategy: | ||
538 | |||
539 | Using schoolbook multiplication. | ||
540 | Karatsuba would save a little in some cost models. | ||
541 | |||
542 | Most multiplications by 2 and 19 are 32-bit precomputations; | ||
543 | cheaper than 64-bit postcomputations. | ||
544 | |||
545 | There is one remaining multiplication by 19 in the carry chain; | ||
546 | one *19 precomputation can be merged into this, | ||
547 | but the resulting data flow is considerably less clean. | ||
548 | |||
549 | There are 12 carries below. | ||
550 | 10 of them are 2-way parallelizable and vectorizable. | ||
551 | Can get away with 11 carries, but then data flow is much deeper. | ||
552 | |||
553 | With tighter constraints on inputs can squeeze carries into int32. | ||
554 | */ | ||
555 | |||
556 | void fe_mul(fe h, const fe f, const fe g) { | ||
557 | int32_t f0 = f[0]; | ||
558 | int32_t f1 = f[1]; | ||
559 | int32_t f2 = f[2]; | ||
560 | int32_t f3 = f[3]; | ||
561 | int32_t f4 = f[4]; | ||
562 | int32_t f5 = f[5]; | ||
563 | int32_t f6 = f[6]; | ||
564 | int32_t f7 = f[7]; | ||
565 | int32_t f8 = f[8]; | ||
566 | int32_t f9 = f[9]; | ||
567 | int32_t g0 = g[0]; | ||
568 | int32_t g1 = g[1]; | ||
569 | int32_t g2 = g[2]; | ||
570 | int32_t g3 = g[3]; | ||
571 | int32_t g4 = g[4]; | ||
572 | int32_t g5 = g[5]; | ||
573 | int32_t g6 = g[6]; | ||
574 | int32_t g7 = g[7]; | ||
575 | int32_t g8 = g[8]; | ||
576 | int32_t g9 = g[9]; | ||
577 | int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ | ||
578 | int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ | ||
579 | int32_t g3_19 = 19 * g3; | ||
580 | int32_t g4_19 = 19 * g4; | ||
581 | int32_t g5_19 = 19 * g5; | ||
582 | int32_t g6_19 = 19 * g6; | ||
583 | int32_t g7_19 = 19 * g7; | ||
584 | int32_t g8_19 = 19 * g8; | ||
585 | int32_t g9_19 = 19 * g9; | ||
586 | int32_t f1_2 = 2 * f1; | ||
587 | int32_t f3_2 = 2 * f3; | ||
588 | int32_t f5_2 = 2 * f5; | ||
589 | int32_t f7_2 = 2 * f7; | ||
590 | int32_t f9_2 = 2 * f9; | ||
591 | int64_t f0g0 = f0 * (int64_t) g0; | ||
592 | int64_t f0g1 = f0 * (int64_t) g1; | ||
593 | int64_t f0g2 = f0 * (int64_t) g2; | ||
594 | int64_t f0g3 = f0 * (int64_t) g3; | ||
595 | int64_t f0g4 = f0 * (int64_t) g4; | ||
596 | int64_t f0g5 = f0 * (int64_t) g5; | ||
597 | int64_t f0g6 = f0 * (int64_t) g6; | ||
598 | int64_t f0g7 = f0 * (int64_t) g7; | ||
599 | int64_t f0g8 = f0 * (int64_t) g8; | ||
600 | int64_t f0g9 = f0 * (int64_t) g9; | ||
601 | int64_t f1g0 = f1 * (int64_t) g0; | ||
602 | int64_t f1g1_2 = f1_2 * (int64_t) g1; | ||
603 | int64_t f1g2 = f1 * (int64_t) g2; | ||
604 | int64_t f1g3_2 = f1_2 * (int64_t) g3; | ||
605 | int64_t f1g4 = f1 * (int64_t) g4; | ||
606 | int64_t f1g5_2 = f1_2 * (int64_t) g5; | ||
607 | int64_t f1g6 = f1 * (int64_t) g6; | ||
608 | int64_t f1g7_2 = f1_2 * (int64_t) g7; | ||
609 | int64_t f1g8 = f1 * (int64_t) g8; | ||
610 | int64_t f1g9_38 = f1_2 * (int64_t) g9_19; | ||
611 | int64_t f2g0 = f2 * (int64_t) g0; | ||
612 | int64_t f2g1 = f2 * (int64_t) g1; | ||
613 | int64_t f2g2 = f2 * (int64_t) g2; | ||
614 | int64_t f2g3 = f2 * (int64_t) g3; | ||
615 | int64_t f2g4 = f2 * (int64_t) g4; | ||
616 | int64_t f2g5 = f2 * (int64_t) g5; | ||
617 | int64_t f2g6 = f2 * (int64_t) g6; | ||
618 | int64_t f2g7 = f2 * (int64_t) g7; | ||
619 | int64_t f2g8_19 = f2 * (int64_t) g8_19; | ||
620 | int64_t f2g9_19 = f2 * (int64_t) g9_19; | ||
621 | int64_t f3g0 = f3 * (int64_t) g0; | ||
622 | int64_t f3g1_2 = f3_2 * (int64_t) g1; | ||
623 | int64_t f3g2 = f3 * (int64_t) g2; | ||
624 | int64_t f3g3_2 = f3_2 * (int64_t) g3; | ||
625 | int64_t f3g4 = f3 * (int64_t) g4; | ||
626 | int64_t f3g5_2 = f3_2 * (int64_t) g5; | ||
627 | int64_t f3g6 = f3 * (int64_t) g6; | ||
628 | int64_t f3g7_38 = f3_2 * (int64_t) g7_19; | ||
629 | int64_t f3g8_19 = f3 * (int64_t) g8_19; | ||
630 | int64_t f3g9_38 = f3_2 * (int64_t) g9_19; | ||
631 | int64_t f4g0 = f4 * (int64_t) g0; | ||
632 | int64_t f4g1 = f4 * (int64_t) g1; | ||
633 | int64_t f4g2 = f4 * (int64_t) g2; | ||
634 | int64_t f4g3 = f4 * (int64_t) g3; | ||
635 | int64_t f4g4 = f4 * (int64_t) g4; | ||
636 | int64_t f4g5 = f4 * (int64_t) g5; | ||
637 | int64_t f4g6_19 = f4 * (int64_t) g6_19; | ||
638 | int64_t f4g7_19 = f4 * (int64_t) g7_19; | ||
639 | int64_t f4g8_19 = f4 * (int64_t) g8_19; | ||
640 | int64_t f4g9_19 = f4 * (int64_t) g9_19; | ||
641 | int64_t f5g0 = f5 * (int64_t) g0; | ||
642 | int64_t f5g1_2 = f5_2 * (int64_t) g1; | ||
643 | int64_t f5g2 = f5 * (int64_t) g2; | ||
644 | int64_t f5g3_2 = f5_2 * (int64_t) g3; | ||
645 | int64_t f5g4 = f5 * (int64_t) g4; | ||
646 | int64_t f5g5_38 = f5_2 * (int64_t) g5_19; | ||
647 | int64_t f5g6_19 = f5 * (int64_t) g6_19; | ||
648 | int64_t f5g7_38 = f5_2 * (int64_t) g7_19; | ||
649 | int64_t f5g8_19 = f5 * (int64_t) g8_19; | ||
650 | int64_t f5g9_38 = f5_2 * (int64_t) g9_19; | ||
651 | int64_t f6g0 = f6 * (int64_t) g0; | ||
652 | int64_t f6g1 = f6 * (int64_t) g1; | ||
653 | int64_t f6g2 = f6 * (int64_t) g2; | ||
654 | int64_t f6g3 = f6 * (int64_t) g3; | ||
655 | int64_t f6g4_19 = f6 * (int64_t) g4_19; | ||
656 | int64_t f6g5_19 = f6 * (int64_t) g5_19; | ||
657 | int64_t f6g6_19 = f6 * (int64_t) g6_19; | ||
658 | int64_t f6g7_19 = f6 * (int64_t) g7_19; | ||
659 | int64_t f6g8_19 = f6 * (int64_t) g8_19; | ||
660 | int64_t f6g9_19 = f6 * (int64_t) g9_19; | ||
661 | int64_t f7g0 = f7 * (int64_t) g0; | ||
662 | int64_t f7g1_2 = f7_2 * (int64_t) g1; | ||
663 | int64_t f7g2 = f7 * (int64_t) g2; | ||
664 | int64_t f7g3_38 = f7_2 * (int64_t) g3_19; | ||
665 | int64_t f7g4_19 = f7 * (int64_t) g4_19; | ||
666 | int64_t f7g5_38 = f7_2 * (int64_t) g5_19; | ||
667 | int64_t f7g6_19 = f7 * (int64_t) g6_19; | ||
668 | int64_t f7g7_38 = f7_2 * (int64_t) g7_19; | ||
669 | int64_t f7g8_19 = f7 * (int64_t) g8_19; | ||
670 | int64_t f7g9_38 = f7_2 * (int64_t) g9_19; | ||
671 | int64_t f8g0 = f8 * (int64_t) g0; | ||
672 | int64_t f8g1 = f8 * (int64_t) g1; | ||
673 | int64_t f8g2_19 = f8 * (int64_t) g2_19; | ||
674 | int64_t f8g3_19 = f8 * (int64_t) g3_19; | ||
675 | int64_t f8g4_19 = f8 * (int64_t) g4_19; | ||
676 | int64_t f8g5_19 = f8 * (int64_t) g5_19; | ||
677 | int64_t f8g6_19 = f8 * (int64_t) g6_19; | ||
678 | int64_t f8g7_19 = f8 * (int64_t) g7_19; | ||
679 | int64_t f8g8_19 = f8 * (int64_t) g8_19; | ||
680 | int64_t f8g9_19 = f8 * (int64_t) g9_19; | ||
681 | int64_t f9g0 = f9 * (int64_t) g0; | ||
682 | int64_t f9g1_38 = f9_2 * (int64_t) g1_19; | ||
683 | int64_t f9g2_19 = f9 * (int64_t) g2_19; | ||
684 | int64_t f9g3_38 = f9_2 * (int64_t) g3_19; | ||
685 | int64_t f9g4_19 = f9 * (int64_t) g4_19; | ||
686 | int64_t f9g5_38 = f9_2 * (int64_t) g5_19; | ||
687 | int64_t f9g6_19 = f9 * (int64_t) g6_19; | ||
688 | int64_t f9g7_38 = f9_2 * (int64_t) g7_19; | ||
689 | int64_t f9g8_19 = f9 * (int64_t) g8_19; | ||
690 | int64_t f9g9_38 = f9_2 * (int64_t) g9_19; | ||
691 | int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38; | ||
692 | int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19; | ||
693 | int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38; | ||
694 | int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19; | ||
695 | int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38; | ||
696 | int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19; | ||
697 | int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38; | ||
698 | int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19; | ||
699 | int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38; | ||
700 | int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 ; | ||
701 | int64_t carry0; | ||
702 | int64_t carry1; | ||
703 | int64_t carry2; | ||
704 | int64_t carry3; | ||
705 | int64_t carry4; | ||
706 | int64_t carry5; | ||
707 | int64_t carry6; | ||
708 | int64_t carry7; | ||
709 | int64_t carry8; | ||
710 | int64_t carry9; | ||
711 | |||
712 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; | ||
713 | h1 += carry0; | ||
714 | h0 -= carry0 << 26; | ||
715 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; | ||
716 | h5 += carry4; | ||
717 | h4 -= carry4 << 26; | ||
718 | |||
719 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; | ||
720 | h2 += carry1; | ||
721 | h1 -= carry1 << 25; | ||
722 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; | ||
723 | h6 += carry5; | ||
724 | h5 -= carry5 << 25; | ||
725 | |||
726 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; | ||
727 | h3 += carry2; | ||
728 | h2 -= carry2 << 26; | ||
729 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; | ||
730 | h7 += carry6; | ||
731 | h6 -= carry6 << 26; | ||
732 | |||
733 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; | ||
734 | h4 += carry3; | ||
735 | h3 -= carry3 << 25; | ||
736 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; | ||
737 | h8 += carry7; | ||
738 | h7 -= carry7 << 25; | ||
739 | |||
740 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; | ||
741 | h5 += carry4; | ||
742 | h4 -= carry4 << 26; | ||
743 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; | ||
744 | h9 += carry8; | ||
745 | h8 -= carry8 << 26; | ||
746 | |||
747 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; | ||
748 | h0 += carry9 * 19; | ||
749 | h9 -= carry9 << 25; | ||
750 | |||
751 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; | ||
752 | h1 += carry0; | ||
753 | h0 -= carry0 << 26; | ||
754 | |||
755 | h[0] = (int32_t) h0; | ||
756 | h[1] = (int32_t) h1; | ||
757 | h[2] = (int32_t) h2; | ||
758 | h[3] = (int32_t) h3; | ||
759 | h[4] = (int32_t) h4; | ||
760 | h[5] = (int32_t) h5; | ||
761 | h[6] = (int32_t) h6; | ||
762 | h[7] = (int32_t) h7; | ||
763 | h[8] = (int32_t) h8; | ||
764 | h[9] = (int32_t) h9; | ||
765 | } | ||
766 | |||
767 | |||
768 | /* | ||
769 | h = f * 121666 | ||
770 | Can overlap h with f. | ||
771 | |||
772 | Preconditions: | ||
773 | |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | ||
774 | |||
775 | Postconditions: | ||
776 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | ||
777 | */ | ||
778 | |||
779 | void fe_mul121666(fe h, fe f) { | ||
780 | int32_t f0 = f[0]; | ||
781 | int32_t f1 = f[1]; | ||
782 | int32_t f2 = f[2]; | ||
783 | int32_t f3 = f[3]; | ||
784 | int32_t f4 = f[4]; | ||
785 | int32_t f5 = f[5]; | ||
786 | int32_t f6 = f[6]; | ||
787 | int32_t f7 = f[7]; | ||
788 | int32_t f8 = f[8]; | ||
789 | int32_t f9 = f[9]; | ||
790 | int64_t h0 = f0 * (int64_t) 121666; | ||
791 | int64_t h1 = f1 * (int64_t) 121666; | ||
792 | int64_t h2 = f2 * (int64_t) 121666; | ||
793 | int64_t h3 = f3 * (int64_t) 121666; | ||
794 | int64_t h4 = f4 * (int64_t) 121666; | ||
795 | int64_t h5 = f5 * (int64_t) 121666; | ||
796 | int64_t h6 = f6 * (int64_t) 121666; | ||
797 | int64_t h7 = f7 * (int64_t) 121666; | ||
798 | int64_t h8 = f8 * (int64_t) 121666; | ||
799 | int64_t h9 = f9 * (int64_t) 121666; | ||
800 | int64_t carry0; | ||
801 | int64_t carry1; | ||
802 | int64_t carry2; | ||
803 | int64_t carry3; | ||
804 | int64_t carry4; | ||
805 | int64_t carry5; | ||
806 | int64_t carry6; | ||
807 | int64_t carry7; | ||
808 | int64_t carry8; | ||
809 | int64_t carry9; | ||
810 | |||
811 | carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; | ||
812 | carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; | ||
813 | carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; | ||
814 | carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; | ||
815 | carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; | ||
816 | |||
817 | carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; | ||
818 | carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; | ||
819 | carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; | ||
820 | carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; | ||
821 | carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; | ||
822 | |||
823 | h[0] = (int32_t) h0; | ||
824 | h[1] = (int32_t) h1; | ||
825 | h[2] = (int32_t) h2; | ||
826 | h[3] = (int32_t) h3; | ||
827 | h[4] = (int32_t) h4; | ||
828 | h[5] = (int32_t) h5; | ||
829 | h[6] = (int32_t) h6; | ||
830 | h[7] = (int32_t) h7; | ||
831 | h[8] = (int32_t) h8; | ||
832 | h[9] = (int32_t) h9; | ||
833 | } | ||
834 | |||
835 | |||
836 | /* | ||
837 | h = -f | ||
838 | |||
839 | Preconditions: | ||
840 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | ||
841 | |||
842 | Postconditions: | ||
843 | |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | ||
844 | */ | ||
845 | |||
846 | void fe_neg(fe h, const fe f) { | ||
847 | int32_t f0 = f[0]; | ||
848 | int32_t f1 = f[1]; | ||
849 | int32_t f2 = f[2]; | ||
850 | int32_t f3 = f[3]; | ||
851 | int32_t f4 = f[4]; | ||
852 | int32_t f5 = f[5]; | ||
853 | int32_t f6 = f[6]; | ||
854 | int32_t f7 = f[7]; | ||
855 | int32_t f8 = f[8]; | ||
856 | int32_t f9 = f[9]; | ||
857 | int32_t h0 = -f0; | ||
858 | int32_t h1 = -f1; | ||
859 | int32_t h2 = -f2; | ||
860 | int32_t h3 = -f3; | ||
861 | int32_t h4 = -f4; | ||
862 | int32_t h5 = -f5; | ||
863 | int32_t h6 = -f6; | ||
864 | int32_t h7 = -f7; | ||
865 | int32_t h8 = -f8; | ||
866 | int32_t h9 = -f9; | ||
867 | |||
868 | h[0] = h0; | ||
869 | h[1] = h1; | ||
870 | h[2] = h2; | ||
871 | h[3] = h3; | ||
872 | h[4] = h4; | ||
873 | h[5] = h5; | ||
874 | h[6] = h6; | ||
875 | h[7] = h7; | ||
876 | h[8] = h8; | ||
877 | h[9] = h9; | ||
878 | } | ||
879 | |||
880 | |||
881 | void fe_pow22523(fe out, const fe z) { | ||
882 | fe t0; | ||
883 | fe t1; | ||
884 | fe t2; | ||
885 | int i; | ||
886 | fe_sq(t0, z); | ||
887 | |||
888 | for (i = 1; i < 1; ++i) { | ||
889 | fe_sq(t0, t0); | ||
890 | } | ||
891 | |||
892 | fe_sq(t1, t0); | ||
893 | |||
894 | for (i = 1; i < 2; ++i) { | ||
895 | fe_sq(t1, t1); | ||
896 | } | ||
897 | |||
898 | fe_mul(t1, z, t1); | ||
899 | fe_mul(t0, t0, t1); | ||
900 | fe_sq(t0, t0); | ||
901 | |||
902 | for (i = 1; i < 1; ++i) { | ||
903 | fe_sq(t0, t0); | ||
904 | } | ||
905 | |||
906 | fe_mul(t0, t1, t0); | ||
907 | fe_sq(t1, t0); | ||
908 | |||
909 | for (i = 1; i < 5; ++i) { | ||
910 | fe_sq(t1, t1); | ||
911 | } | ||
912 | |||
913 | fe_mul(t0, t1, t0); | ||
914 | fe_sq(t1, t0); | ||
915 | |||
916 | for (i = 1; i < 10; ++i) { | ||
917 | fe_sq(t1, t1); | ||
918 | } | ||
919 | |||
920 | fe_mul(t1, t1, t0); | ||
921 | fe_sq(t2, t1); | ||
922 | |||
923 | for (i = 1; i < 20; ++i) { | ||
924 | fe_sq(t2, t2); | ||
925 | } | ||
926 | |||
927 | fe_mul(t1, t2, t1); | ||
928 | fe_sq(t1, t1); | ||
929 | |||
930 | for (i = 1; i < 10; ++i) { | ||
931 | fe_sq(t1, t1); | ||
932 | } | ||
933 | |||
934 | fe_mul(t0, t1, t0); | ||
935 | fe_sq(t1, t0); | ||
936 | |||
937 | for (i = 1; i < 50; ++i) { | ||
938 | fe_sq(t1, t1); | ||
939 | } | ||
940 | |||
941 | fe_mul(t1, t1, t0); | ||
942 | fe_sq(t2, t1); | ||
943 | |||
944 | for (i = 1; i < 100; ++i) { | ||
945 | fe_sq(t2, t2); | ||
946 | } | ||
947 | |||
948 | fe_mul(t1, t2, t1); | ||
949 | fe_sq(t1, t1); | ||
950 | |||
951 | for (i = 1; i < 50; ++i) { | ||
952 | fe_sq(t1, t1); | ||
953 | } | ||
954 | |||
955 | fe_mul(t0, t1, t0); | ||
956 | fe_sq(t0, t0); | ||
957 | |||
958 | for (i = 1; i < 2; ++i) { | ||
959 | fe_sq(t0, t0); | ||
960 | } | ||
961 | |||
962 | fe_mul(out, t0, z); | ||
963 | return; | ||
964 | } | ||
965 | |||
966 | |||
967 | /* | ||
968 | h = f * f | ||
969 | Can overlap h with f. | ||
970 | |||
971 | Preconditions: | ||
972 | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. | ||
973 | |||
974 | Postconditions: | ||
975 | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. | ||
976 | */ | ||
977 | |||
978 | /* | ||
979 | See fe_mul.c for discussion of implementation strategy. | ||
980 | */ | ||
981 | |||
982 | void fe_sq(fe h, const fe f) { | ||
983 | int32_t f0 = f[0]; | ||
984 | int32_t f1 = f[1]; | ||
985 | int32_t f2 = f[2]; | ||
986 | int32_t f3 = f[3]; | ||
987 | int32_t f4 = f[4]; | ||
988 | int32_t f5 = f[5]; | ||
989 | int32_t f6 = f[6]; | ||
990 | int32_t f7 = f[7]; | ||
991 | int32_t f8 = f[8]; | ||
992 | int32_t f9 = f[9]; | ||
993 | int32_t f0_2 = 2 * f0; | ||
994 | int32_t f1_2 = 2 * f1; | ||
995 | int32_t f2_2 = 2 * f2; | ||
996 | int32_t f3_2 = 2 * f3; | ||
997 | int32_t f4_2 = 2 * f4; | ||
998 | int32_t f5_2 = 2 * f5; | ||
999 | int32_t f6_2 = 2 * f6; | ||
1000 | int32_t f7_2 = 2 * f7; | ||
1001 | int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ | ||
1002 | int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ | ||
1003 | int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ | ||
1004 | int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ | ||
1005 | int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ | ||
1006 | int64_t f0f0 = f0 * (int64_t) f0; | ||
1007 | int64_t f0f1_2 = f0_2 * (int64_t) f1; | ||
1008 | int64_t f0f2_2 = f0_2 * (int64_t) f2; | ||
1009 | int64_t f0f3_2 = f0_2 * (int64_t) f3; | ||
1010 | int64_t f0f4_2 = f0_2 * (int64_t) f4; | ||
1011 | int64_t f0f5_2 = f0_2 * (int64_t) f5; | ||
1012 | int64_t f0f6_2 = f0_2 * (int64_t) f6; | ||
1013 | int64_t f0f7_2 = f0_2 * (int64_t) f7; | ||
1014 | int64_t f0f8_2 = f0_2 * (int64_t) f8; | ||
1015 | int64_t f0f9_2 = f0_2 * (int64_t) f9; | ||
1016 | int64_t f1f1_2 = f1_2 * (int64_t) f1; | ||
1017 | int64_t f1f2_2 = f1_2 * (int64_t) f2; | ||
1018 | int64_t f1f3_4 = f1_2 * (int64_t) f3_2; | ||
1019 | int64_t f1f4_2 = f1_2 * (int64_t) f4; | ||
1020 | int64_t f1f5_4 = f1_2 * (int64_t) f5_2; | ||
1021 | int64_t f1f6_2 = f1_2 * (int64_t) f6; | ||
1022 | int64_t f1f7_4 = f1_2 * (int64_t) f7_2; | ||
1023 | int64_t f1f8_2 = f1_2 * (int64_t) f8; | ||
1024 | int64_t f1f9_76 = f1_2 * (int64_t) f9_38; | ||
1025 | int64_t f2f2 = f2 * (int64_t) f2; | ||
1026 | int64_t f2f3_2 = f2_2 * (int64_t) f3; | ||
1027 | int64_t f2f4_2 = f2_2 * (int64_t) f4; | ||
1028 | int64_t f2f5_2 = f2_2 * (int64_t) f5; | ||
1029 | int64_t f2f6_2 = f2_2 * (int64_t) f6; | ||
1030 | int64_t f2f7_2 = f2_2 * (int64_t) f7; | ||
1031 | int64_t f2f8_38 = f2_2 * (int64_t) f8_19; | ||
1032 | int64_t f2f9_38 = f2 * (int64_t) f9_38; | ||
1033 | int64_t f3f3_2 = f3_2 * (int64_t) f3; | ||
1034 | int64_t f3f4_2 = f3_2 * (int64_t) f4; | ||
1035 | int64_t f3f5_4 = f3_2 * (int64_t) f5_2; | ||
1036 | int64_t f3f6_2 = f3_2 * (int64_t) f6; | ||
1037 | int64_t f3f7_76 = f3_2 * (int64_t) f7_38; | ||
1038 | int64_t f3f8_38 = f3_2 * (int64_t) f8_19; | ||
1039 | int64_t f3f9_76 = f3_2 * (int64_t) f9_38; | ||
1040 | int64_t f4f4 = f4 * (int64_t) f4; | ||
1041 | int64_t f4f5_2 = f4_2 * (int64_t) f5; | ||
1042 | int64_t f4f6_38 = f4_2 * (int64_t) f6_19; | ||
1043 | int64_t f4f7_38 = f4 * (int64_t) f7_38; | ||
1044 | int64_t f4f8_38 = f4_2 * (int64_t) f8_19; | ||
1045 | int64_t f4f9_38 = f4 * (int64_t) f9_38; | ||
1046 | int64_t f5f5_38 = f5 * (int64_t) f5_38; | ||
1047 | int64_t f5f6_38 = f5_2 * (int64_t) f6_19; | ||
1048 | int64_t f5f7_76 = f5_2 * (int64_t) f7_38; | ||
1049 | int64_t f5f8_38 = f5_2 * (int64_t) f8_19; | ||
1050 | int64_t f5f9_76 = f5_2 * (int64_t) f9_38; | ||
1051 | int64_t f6f6_19 = f6 * (int64_t) f6_19; | ||
1052 | int64_t f6f7_38 = f6 * (int64_t) f7_38; | ||
1053 | int64_t f6f8_38 = f6_2 * (int64_t) f8_19; | ||
1054 | int64_t f6f9_38 = f6 * (int64_t) f9_38; | ||
1055 | int64_t f7f7_38 = f7 * (int64_t) f7_38; | ||
1056 | int64_t f7f8_38 = f7_2 * (int64_t) f8_19; | ||
1057 | int64_t f7f9_76 = f7_2 * (int64_t) f9_38; | ||
1058 | int64_t f8f8_19 = f8 * (int64_t) f8_19; | ||
1059 | int64_t f8f9_38 = f8 * (int64_t) f9_38; | ||
1060 | int64_t f9f9_38 = f9 * (int64_t) f9_38; | ||
1061 | int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; | ||
1062 | int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; | ||
1063 | int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; | ||
1064 | int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; | ||
1065 | int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; | ||
1066 | int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; | ||
1067 | int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; | ||
1068 | int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; | ||
1069 | int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; | ||
1070 | int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; | ||
1071 | int64_t carry0; | ||
1072 | int64_t carry1; | ||
1073 | int64_t carry2; | ||
1074 | int64_t carry3; | ||
1075 | int64_t carry4; | ||
1076 | int64_t carry5; | ||
1077 | int64_t carry6; | ||
1078 | int64_t carry7; | ||
1079 | int64_t carry8; | ||
1080 | int64_t carry9; | ||
1081 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; | ||
1082 | h1 += carry0; | ||
1083 | h0 -= carry0 << 26; | ||
1084 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; | ||
1085 | h5 += carry4; | ||
1086 | h4 -= carry4 << 26; | ||
1087 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; | ||
1088 | h2 += carry1; | ||
1089 | h1 -= carry1 << 25; | ||
1090 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; | ||
1091 | h6 += carry5; | ||
1092 | h5 -= carry5 << 25; | ||
1093 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; | ||
1094 | h3 += carry2; | ||
1095 | h2 -= carry2 << 26; | ||
1096 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; | ||
1097 | h7 += carry6; | ||
1098 | h6 -= carry6 << 26; | ||
1099 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; | ||
1100 | h4 += carry3; | ||
1101 | h3 -= carry3 << 25; | ||
1102 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; | ||
1103 | h8 += carry7; | ||
1104 | h7 -= carry7 << 25; | ||
1105 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; | ||
1106 | h5 += carry4; | ||
1107 | h4 -= carry4 << 26; | ||
1108 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; | ||
1109 | h9 += carry8; | ||
1110 | h8 -= carry8 << 26; | ||
1111 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; | ||
1112 | h0 += carry9 * 19; | ||
1113 | h9 -= carry9 << 25; | ||
1114 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; | ||
1115 | h1 += carry0; | ||
1116 | h0 -= carry0 << 26; | ||
1117 | h[0] = (int32_t) h0; | ||
1118 | h[1] = (int32_t) h1; | ||
1119 | h[2] = (int32_t) h2; | ||
1120 | h[3] = (int32_t) h3; | ||
1121 | h[4] = (int32_t) h4; | ||
1122 | h[5] = (int32_t) h5; | ||
1123 | h[6] = (int32_t) h6; | ||
1124 | h[7] = (int32_t) h7; | ||
1125 | h[8] = (int32_t) h8; | ||
1126 | h[9] = (int32_t) h9; | ||
1127 | } | ||
1128 | |||
1129 | |||
1130 | /* | ||
1131 | h = 2 * f * f | ||
1132 | Can overlap h with f. | ||
1133 | |||
1134 | Preconditions: | ||
1135 | |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. | ||
1136 | |||
1137 | Postconditions: | ||
1138 | |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. | ||
1139 | */ | ||
1140 | |||
1141 | /* | ||
1142 | See fe_mul.c for discussion of implementation strategy. | ||
1143 | */ | ||
1144 | |||
1145 | void fe_sq2(fe h, const fe f) { | ||
1146 | int32_t f0 = f[0]; | ||
1147 | int32_t f1 = f[1]; | ||
1148 | int32_t f2 = f[2]; | ||
1149 | int32_t f3 = f[3]; | ||
1150 | int32_t f4 = f[4]; | ||
1151 | int32_t f5 = f[5]; | ||
1152 | int32_t f6 = f[6]; | ||
1153 | int32_t f7 = f[7]; | ||
1154 | int32_t f8 = f[8]; | ||
1155 | int32_t f9 = f[9]; | ||
1156 | int32_t f0_2 = 2 * f0; | ||
1157 | int32_t f1_2 = 2 * f1; | ||
1158 | int32_t f2_2 = 2 * f2; | ||
1159 | int32_t f3_2 = 2 * f3; | ||
1160 | int32_t f4_2 = 2 * f4; | ||
1161 | int32_t f5_2 = 2 * f5; | ||
1162 | int32_t f6_2 = 2 * f6; | ||
1163 | int32_t f7_2 = 2 * f7; | ||
1164 | int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ | ||
1165 | int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ | ||
1166 | int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ | ||
1167 | int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ | ||
1168 | int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ | ||
1169 | int64_t f0f0 = f0 * (int64_t) f0; | ||
1170 | int64_t f0f1_2 = f0_2 * (int64_t) f1; | ||
1171 | int64_t f0f2_2 = f0_2 * (int64_t) f2; | ||
1172 | int64_t f0f3_2 = f0_2 * (int64_t) f3; | ||
1173 | int64_t f0f4_2 = f0_2 * (int64_t) f4; | ||
1174 | int64_t f0f5_2 = f0_2 * (int64_t) f5; | ||
1175 | int64_t f0f6_2 = f0_2 * (int64_t) f6; | ||
1176 | int64_t f0f7_2 = f0_2 * (int64_t) f7; | ||
1177 | int64_t f0f8_2 = f0_2 * (int64_t) f8; | ||
1178 | int64_t f0f9_2 = f0_2 * (int64_t) f9; | ||
1179 | int64_t f1f1_2 = f1_2 * (int64_t) f1; | ||
1180 | int64_t f1f2_2 = f1_2 * (int64_t) f2; | ||
1181 | int64_t f1f3_4 = f1_2 * (int64_t) f3_2; | ||
1182 | int64_t f1f4_2 = f1_2 * (int64_t) f4; | ||
1183 | int64_t f1f5_4 = f1_2 * (int64_t) f5_2; | ||
1184 | int64_t f1f6_2 = f1_2 * (int64_t) f6; | ||
1185 | int64_t f1f7_4 = f1_2 * (int64_t) f7_2; | ||
1186 | int64_t f1f8_2 = f1_2 * (int64_t) f8; | ||
1187 | int64_t f1f9_76 = f1_2 * (int64_t) f9_38; | ||
1188 | int64_t f2f2 = f2 * (int64_t) f2; | ||
1189 | int64_t f2f3_2 = f2_2 * (int64_t) f3; | ||
1190 | int64_t f2f4_2 = f2_2 * (int64_t) f4; | ||
1191 | int64_t f2f5_2 = f2_2 * (int64_t) f5; | ||
1192 | int64_t f2f6_2 = f2_2 * (int64_t) f6; | ||
1193 | int64_t f2f7_2 = f2_2 * (int64_t) f7; | ||
1194 | int64_t f2f8_38 = f2_2 * (int64_t) f8_19; | ||
1195 | int64_t f2f9_38 = f2 * (int64_t) f9_38; | ||
1196 | int64_t f3f3_2 = f3_2 * (int64_t) f3; | ||
1197 | int64_t f3f4_2 = f3_2 * (int64_t) f4; | ||
1198 | int64_t f3f5_4 = f3_2 * (int64_t) f5_2; | ||
1199 | int64_t f3f6_2 = f3_2 * (int64_t) f6; | ||
1200 | int64_t f3f7_76 = f3_2 * (int64_t) f7_38; | ||
1201 | int64_t f3f8_38 = f3_2 * (int64_t) f8_19; | ||
1202 | int64_t f3f9_76 = f3_2 * (int64_t) f9_38; | ||
1203 | int64_t f4f4 = f4 * (int64_t) f4; | ||
1204 | int64_t f4f5_2 = f4_2 * (int64_t) f5; | ||
1205 | int64_t f4f6_38 = f4_2 * (int64_t) f6_19; | ||
1206 | int64_t f4f7_38 = f4 * (int64_t) f7_38; | ||
1207 | int64_t f4f8_38 = f4_2 * (int64_t) f8_19; | ||
1208 | int64_t f4f9_38 = f4 * (int64_t) f9_38; | ||
1209 | int64_t f5f5_38 = f5 * (int64_t) f5_38; | ||
1210 | int64_t f5f6_38 = f5_2 * (int64_t) f6_19; | ||
1211 | int64_t f5f7_76 = f5_2 * (int64_t) f7_38; | ||
1212 | int64_t f5f8_38 = f5_2 * (int64_t) f8_19; | ||
1213 | int64_t f5f9_76 = f5_2 * (int64_t) f9_38; | ||
1214 | int64_t f6f6_19 = f6 * (int64_t) f6_19; | ||
1215 | int64_t f6f7_38 = f6 * (int64_t) f7_38; | ||
1216 | int64_t f6f8_38 = f6_2 * (int64_t) f8_19; | ||
1217 | int64_t f6f9_38 = f6 * (int64_t) f9_38; | ||
1218 | int64_t f7f7_38 = f7 * (int64_t) f7_38; | ||
1219 | int64_t f7f8_38 = f7_2 * (int64_t) f8_19; | ||
1220 | int64_t f7f9_76 = f7_2 * (int64_t) f9_38; | ||
1221 | int64_t f8f8_19 = f8 * (int64_t) f8_19; | ||
1222 | int64_t f8f9_38 = f8 * (int64_t) f9_38; | ||
1223 | int64_t f9f9_38 = f9 * (int64_t) f9_38; | ||
1224 | int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38; | ||
1225 | int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38; | ||
1226 | int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19; | ||
1227 | int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38; | ||
1228 | int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38; | ||
1229 | int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38; | ||
1230 | int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19; | ||
1231 | int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38; | ||
1232 | int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38; | ||
1233 | int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2; | ||
1234 | int64_t carry0; | ||
1235 | int64_t carry1; | ||
1236 | int64_t carry2; | ||
1237 | int64_t carry3; | ||
1238 | int64_t carry4; | ||
1239 | int64_t carry5; | ||
1240 | int64_t carry6; | ||
1241 | int64_t carry7; | ||
1242 | int64_t carry8; | ||
1243 | int64_t carry9; | ||
1244 | h0 += h0; | ||
1245 | h1 += h1; | ||
1246 | h2 += h2; | ||
1247 | h3 += h3; | ||
1248 | h4 += h4; | ||
1249 | h5 += h5; | ||
1250 | h6 += h6; | ||
1251 | h7 += h7; | ||
1252 | h8 += h8; | ||
1253 | h9 += h9; | ||
1254 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; | ||
1255 | h1 += carry0; | ||
1256 | h0 -= carry0 << 26; | ||
1257 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; | ||
1258 | h5 += carry4; | ||
1259 | h4 -= carry4 << 26; | ||
1260 | carry1 = (h1 + (int64_t) (1 << 24)) >> 25; | ||
1261 | h2 += carry1; | ||
1262 | h1 -= carry1 << 25; | ||
1263 | carry5 = (h5 + (int64_t) (1 << 24)) >> 25; | ||
1264 | h6 += carry5; | ||
1265 | h5 -= carry5 << 25; | ||
1266 | carry2 = (h2 + (int64_t) (1 << 25)) >> 26; | ||
1267 | h3 += carry2; | ||
1268 | h2 -= carry2 << 26; | ||
1269 | carry6 = (h6 + (int64_t) (1 << 25)) >> 26; | ||
1270 | h7 += carry6; | ||
1271 | h6 -= carry6 << 26; | ||
1272 | carry3 = (h3 + (int64_t) (1 << 24)) >> 25; | ||
1273 | h4 += carry3; | ||
1274 | h3 -= carry3 << 25; | ||
1275 | carry7 = (h7 + (int64_t) (1 << 24)) >> 25; | ||
1276 | h8 += carry7; | ||
1277 | h7 -= carry7 << 25; | ||
1278 | carry4 = (h4 + (int64_t) (1 << 25)) >> 26; | ||
1279 | h5 += carry4; | ||
1280 | h4 -= carry4 << 26; | ||
1281 | carry8 = (h8 + (int64_t) (1 << 25)) >> 26; | ||
1282 | h9 += carry8; | ||
1283 | h8 -= carry8 << 26; | ||
1284 | carry9 = (h9 + (int64_t) (1 << 24)) >> 25; | ||
1285 | h0 += carry9 * 19; | ||
1286 | h9 -= carry9 << 25; | ||
1287 | carry0 = (h0 + (int64_t) (1 << 25)) >> 26; | ||
1288 | h1 += carry0; | ||
1289 | h0 -= carry0 << 26; | ||
1290 | h[0] = (int32_t) h0; | ||
1291 | h[1] = (int32_t) h1; | ||
1292 | h[2] = (int32_t) h2; | ||
1293 | h[3] = (int32_t) h3; | ||
1294 | h[4] = (int32_t) h4; | ||
1295 | h[5] = (int32_t) h5; | ||
1296 | h[6] = (int32_t) h6; | ||
1297 | h[7] = (int32_t) h7; | ||
1298 | h[8] = (int32_t) h8; | ||
1299 | h[9] = (int32_t) h9; | ||
1300 | } | ||
1301 | |||
1302 | |||
1303 | /* | ||
1304 | h = f - g | ||
1305 | Can overlap h with f or g. | ||
1306 | |||
1307 | Preconditions: | ||
1308 | |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | ||
1309 | |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. | ||
1310 | |||
1311 | Postconditions: | ||
1312 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | ||
1313 | */ | ||
1314 | |||
1315 | void fe_sub(fe h, const fe f, const fe g) { | ||
1316 | int32_t f0 = f[0]; | ||
1317 | int32_t f1 = f[1]; | ||
1318 | int32_t f2 = f[2]; | ||
1319 | int32_t f3 = f[3]; | ||
1320 | int32_t f4 = f[4]; | ||
1321 | int32_t f5 = f[5]; | ||
1322 | int32_t f6 = f[6]; | ||
1323 | int32_t f7 = f[7]; | ||
1324 | int32_t f8 = f[8]; | ||
1325 | int32_t f9 = f[9]; | ||
1326 | int32_t g0 = g[0]; | ||
1327 | int32_t g1 = g[1]; | ||
1328 | int32_t g2 = g[2]; | ||
1329 | int32_t g3 = g[3]; | ||
1330 | int32_t g4 = g[4]; | ||
1331 | int32_t g5 = g[5]; | ||
1332 | int32_t g6 = g[6]; | ||
1333 | int32_t g7 = g[7]; | ||
1334 | int32_t g8 = g[8]; | ||
1335 | int32_t g9 = g[9]; | ||
1336 | int32_t h0 = f0 - g0; | ||
1337 | int32_t h1 = f1 - g1; | ||
1338 | int32_t h2 = f2 - g2; | ||
1339 | int32_t h3 = f3 - g3; | ||
1340 | int32_t h4 = f4 - g4; | ||
1341 | int32_t h5 = f5 - g5; | ||
1342 | int32_t h6 = f6 - g6; | ||
1343 | int32_t h7 = f7 - g7; | ||
1344 | int32_t h8 = f8 - g8; | ||
1345 | int32_t h9 = f9 - g9; | ||
1346 | |||
1347 | h[0] = h0; | ||
1348 | h[1] = h1; | ||
1349 | h[2] = h2; | ||
1350 | h[3] = h3; | ||
1351 | h[4] = h4; | ||
1352 | h[5] = h5; | ||
1353 | h[6] = h6; | ||
1354 | h[7] = h7; | ||
1355 | h[8] = h8; | ||
1356 | h[9] = h9; | ||
1357 | } | ||
1358 | |||
1359 | |||
1360 | |||
1361 | /* | ||
1362 | Preconditions: | ||
1363 | |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. | ||
1364 | |||
1365 | Write p=2^255-19; q=floor(h/p). | ||
1366 | Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). | ||
1367 | |||
1368 | Proof: | ||
1369 | Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. | ||
1370 | Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. | ||
1371 | |||
1372 | Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). | ||
1373 | Then 0<y<1. | ||
1374 | |||
1375 | Write r=h-pq. | ||
1376 | Have 0<=r<=p-1=2^255-20. | ||
1377 | Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. | ||
1378 | |||
1379 | Write x=r+19(2^-255)r+y. | ||
1380 | Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. | ||
1381 | |||
1382 | Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) | ||
1383 | so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. | ||
1384 | */ | ||
1385 | |||
1386 | void fe_tobytes(unsigned char *s, const fe h) { | ||
1387 | int32_t h0 = h[0]; | ||
1388 | int32_t h1 = h[1]; | ||
1389 | int32_t h2 = h[2]; | ||
1390 | int32_t h3 = h[3]; | ||
1391 | int32_t h4 = h[4]; | ||
1392 | int32_t h5 = h[5]; | ||
1393 | int32_t h6 = h[6]; | ||
1394 | int32_t h7 = h[7]; | ||
1395 | int32_t h8 = h[8]; | ||
1396 | int32_t h9 = h[9]; | ||
1397 | int32_t q; | ||
1398 | int32_t carry0; | ||
1399 | int32_t carry1; | ||
1400 | int32_t carry2; | ||
1401 | int32_t carry3; | ||
1402 | int32_t carry4; | ||
1403 | int32_t carry5; | ||
1404 | int32_t carry6; | ||
1405 | int32_t carry7; | ||
1406 | int32_t carry8; | ||
1407 | int32_t carry9; | ||
1408 | q = (19 * h9 + (((int32_t) 1) << 24)) >> 25; | ||
1409 | q = (h0 + q) >> 26; | ||
1410 | q = (h1 + q) >> 25; | ||
1411 | q = (h2 + q) >> 26; | ||
1412 | q = (h3 + q) >> 25; | ||
1413 | q = (h4 + q) >> 26; | ||
1414 | q = (h5 + q) >> 25; | ||
1415 | q = (h6 + q) >> 26; | ||
1416 | q = (h7 + q) >> 25; | ||
1417 | q = (h8 + q) >> 26; | ||
1418 | q = (h9 + q) >> 25; | ||
1419 | /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ | ||
1420 | h0 += 19 * q; | ||
1421 | /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ | ||
1422 | carry0 = h0 >> 26; | ||
1423 | h1 += carry0; | ||
1424 | h0 -= carry0 << 26; | ||
1425 | carry1 = h1 >> 25; | ||
1426 | h2 += carry1; | ||
1427 | h1 -= carry1 << 25; | ||
1428 | carry2 = h2 >> 26; | ||
1429 | h3 += carry2; | ||
1430 | h2 -= carry2 << 26; | ||
1431 | carry3 = h3 >> 25; | ||
1432 | h4 += carry3; | ||
1433 | h3 -= carry3 << 25; | ||
1434 | carry4 = h4 >> 26; | ||
1435 | h5 += carry4; | ||
1436 | h4 -= carry4 << 26; | ||
1437 | carry5 = h5 >> 25; | ||
1438 | h6 += carry5; | ||
1439 | h5 -= carry5 << 25; | ||
1440 | carry6 = h6 >> 26; | ||
1441 | h7 += carry6; | ||
1442 | h6 -= carry6 << 26; | ||
1443 | carry7 = h7 >> 25; | ||
1444 | h8 += carry7; | ||
1445 | h7 -= carry7 << 25; | ||
1446 | carry8 = h8 >> 26; | ||
1447 | h9 += carry8; | ||
1448 | h8 -= carry8 << 26; | ||
1449 | carry9 = h9 >> 25; | ||
1450 | h9 -= carry9 << 25; | ||
1451 | |||
1452 | /* h10 = carry9 */ | ||
1453 | /* | ||
1454 | Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. | ||
1455 | Have h0+...+2^230 h9 between 0 and 2^255-1; | ||
1456 | evidently 2^255 h10-2^255 q = 0. | ||
1457 | Goal: Output h0+...+2^230 h9. | ||
1458 | */ | ||
1459 | s[0] = (unsigned char) (h0 >> 0); | ||
1460 | s[1] = (unsigned char) (h0 >> 8); | ||
1461 | s[2] = (unsigned char) (h0 >> 16); | ||
1462 | s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2)); | ||
1463 | s[4] = (unsigned char) (h1 >> 6); | ||
1464 | s[5] = (unsigned char) (h1 >> 14); | ||
1465 | s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3)); | ||
1466 | s[7] = (unsigned char) (h2 >> 5); | ||
1467 | s[8] = (unsigned char) (h2 >> 13); | ||
1468 | s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5)); | ||
1469 | s[10] = (unsigned char) (h3 >> 3); | ||
1470 | s[11] = (unsigned char) (h3 >> 11); | ||
1471 | s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6)); | ||
1472 | s[13] = (unsigned char) (h4 >> 2); | ||
1473 | s[14] = (unsigned char) (h4 >> 10); | ||
1474 | s[15] = (unsigned char) (h4 >> 18); | ||
1475 | s[16] = (unsigned char) (h5 >> 0); | ||
1476 | s[17] = (unsigned char) (h5 >> 8); | ||
1477 | s[18] = (unsigned char) (h5 >> 16); | ||
1478 | s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1)); | ||
1479 | s[20] = (unsigned char) (h6 >> 7); | ||
1480 | s[21] = (unsigned char) (h6 >> 15); | ||
1481 | s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3)); | ||
1482 | s[23] = (unsigned char) (h7 >> 5); | ||
1483 | s[24] = (unsigned char) (h7 >> 13); | ||
1484 | s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4)); | ||
1485 | s[26] = (unsigned char) (h8 >> 4); | ||
1486 | s[27] = (unsigned char) (h8 >> 12); | ||
1487 | s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6)); | ||
1488 | s[29] = (unsigned char) (h9 >> 2); | ||
1489 | s[30] = (unsigned char) (h9 >> 10); | ||
1490 | s[31] = (unsigned char) (h9 >> 18); | ||
1491 | } | ||