Bitcoin Core  26.99.0
P2P Digital Currency
field_5x52_asm_impl.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille *
3  * Distributed under the MIT software license, see the accompanying *
4  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5  ***********************************************************************/
6 
14 #ifndef SECP256K1_FIELD_INNER5X52_IMPL_H
15 #define SECP256K1_FIELD_INNER5X52_IMPL_H
16 
17 #include "util.h"
18 
19 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
29  uint64_t tmp1, tmp2, tmp3;
30 __asm__ __volatile__(
31  "movq 0(%%rsi),%%r10\n"
32  "movq 8(%%rsi),%%r11\n"
33  "movq 16(%%rsi),%%r12\n"
34  "movq 24(%%rsi),%%r13\n"
35  "movq 32(%%rsi),%%r14\n"
36 
37  /* d += a3 * b0 */
38  "movq 0(%%rbx),%%rax\n"
39  "mulq %%r13\n"
40  "movq %%rax,%%rcx\n"
41  "movq %%rdx,%%r15\n"
42  /* d += a2 * b1 */
43  "movq 8(%%rbx),%%rax\n"
44  "mulq %%r12\n"
45  "addq %%rax,%%rcx\n"
46  "adcq %%rdx,%%r15\n"
47  /* d += a1 * b2 */
48  "movq 16(%%rbx),%%rax\n"
49  "mulq %%r11\n"
50  "addq %%rax,%%rcx\n"
51  "adcq %%rdx,%%r15\n"
52  /* d = a0 * b3 */
53  "movq 24(%%rbx),%%rax\n"
54  "mulq %%r10\n"
55  "addq %%rax,%%rcx\n"
56  "adcq %%rdx,%%r15\n"
57  /* c = a4 * b4 */
58  "movq 32(%%rbx),%%rax\n"
59  "mulq %%r14\n"
60  "movq %%rax,%%r8\n"
61  "movq %%rdx,%%r9\n"
62  /* d += (c & M) * R */
63  "movq $0xfffffffffffff,%%rdx\n"
64  "andq %%rdx,%%rax\n"
65  "movq $0x1000003d10,%%rdx\n"
66  "mulq %%rdx\n"
67  "addq %%rax,%%rcx\n"
68  "adcq %%rdx,%%r15\n"
69  /* c >>= 52 (%%r8 only) */
70  "shrdq $52,%%r9,%%r8\n"
71  /* t3 (tmp1) = d & M */
72  "movq %%rcx,%%rsi\n"
73  "movq $0xfffffffffffff,%%rdx\n"
74  "andq %%rdx,%%rsi\n"
75  "movq %%rsi,%q1\n"
76  /* d >>= 52 */
77  "shrdq $52,%%r15,%%rcx\n"
78  "xorq %%r15,%%r15\n"
79  /* d += a4 * b0 */
80  "movq 0(%%rbx),%%rax\n"
81  "mulq %%r14\n"
82  "addq %%rax,%%rcx\n"
83  "adcq %%rdx,%%r15\n"
84  /* d += a3 * b1 */
85  "movq 8(%%rbx),%%rax\n"
86  "mulq %%r13\n"
87  "addq %%rax,%%rcx\n"
88  "adcq %%rdx,%%r15\n"
89  /* d += a2 * b2 */
90  "movq 16(%%rbx),%%rax\n"
91  "mulq %%r12\n"
92  "addq %%rax,%%rcx\n"
93  "adcq %%rdx,%%r15\n"
94  /* d += a1 * b3 */
95  "movq 24(%%rbx),%%rax\n"
96  "mulq %%r11\n"
97  "addq %%rax,%%rcx\n"
98  "adcq %%rdx,%%r15\n"
99  /* d += a0 * b4 */
100  "movq 32(%%rbx),%%rax\n"
101  "mulq %%r10\n"
102  "addq %%rax,%%rcx\n"
103  "adcq %%rdx,%%r15\n"
104  /* d += c * R */
105  "movq %%r8,%%rax\n"
106  "movq $0x1000003d10,%%rdx\n"
107  "mulq %%rdx\n"
108  "addq %%rax,%%rcx\n"
109  "adcq %%rdx,%%r15\n"
110  /* t4 = d & M (%%rsi) */
111  "movq %%rcx,%%rsi\n"
112  "movq $0xfffffffffffff,%%rdx\n"
113  "andq %%rdx,%%rsi\n"
114  /* d >>= 52 */
115  "shrdq $52,%%r15,%%rcx\n"
116  "xorq %%r15,%%r15\n"
117  /* tx = t4 >> 48 (tmp3) */
118  "movq %%rsi,%%rax\n"
119  "shrq $48,%%rax\n"
120  "movq %%rax,%q3\n"
121  /* t4 &= (M >> 4) (tmp2) */
122  "movq $0xffffffffffff,%%rax\n"
123  "andq %%rax,%%rsi\n"
124  "movq %%rsi,%q2\n"
125  /* c = a0 * b0 */
126  "movq 0(%%rbx),%%rax\n"
127  "mulq %%r10\n"
128  "movq %%rax,%%r8\n"
129  "movq %%rdx,%%r9\n"
130  /* d += a4 * b1 */
131  "movq 8(%%rbx),%%rax\n"
132  "mulq %%r14\n"
133  "addq %%rax,%%rcx\n"
134  "adcq %%rdx,%%r15\n"
135  /* d += a3 * b2 */
136  "movq 16(%%rbx),%%rax\n"
137  "mulq %%r13\n"
138  "addq %%rax,%%rcx\n"
139  "adcq %%rdx,%%r15\n"
140  /* d += a2 * b3 */
141  "movq 24(%%rbx),%%rax\n"
142  "mulq %%r12\n"
143  "addq %%rax,%%rcx\n"
144  "adcq %%rdx,%%r15\n"
145  /* d += a1 * b4 */
146  "movq 32(%%rbx),%%rax\n"
147  "mulq %%r11\n"
148  "addq %%rax,%%rcx\n"
149  "adcq %%rdx,%%r15\n"
150  /* u0 = d & M (%%rsi) */
151  "movq %%rcx,%%rsi\n"
152  "movq $0xfffffffffffff,%%rdx\n"
153  "andq %%rdx,%%rsi\n"
154  /* d >>= 52 */
155  "shrdq $52,%%r15,%%rcx\n"
156  "xorq %%r15,%%r15\n"
157  /* u0 = (u0 << 4) | tx (%%rsi) */
158  "shlq $4,%%rsi\n"
159  "movq %q3,%%rax\n"
160  "orq %%rax,%%rsi\n"
161  /* c += u0 * (R >> 4) */
162  "movq $0x1000003d1,%%rax\n"
163  "mulq %%rsi\n"
164  "addq %%rax,%%r8\n"
165  "adcq %%rdx,%%r9\n"
166  /* r[0] = c & M */
167  "movq %%r8,%%rax\n"
168  "movq $0xfffffffffffff,%%rdx\n"
169  "andq %%rdx,%%rax\n"
170  "movq %%rax,0(%%rdi)\n"
171  /* c >>= 52 */
172  "shrdq $52,%%r9,%%r8\n"
173  "xorq %%r9,%%r9\n"
174  /* c += a1 * b0 */
175  "movq 0(%%rbx),%%rax\n"
176  "mulq %%r11\n"
177  "addq %%rax,%%r8\n"
178  "adcq %%rdx,%%r9\n"
179  /* c += a0 * b1 */
180  "movq 8(%%rbx),%%rax\n"
181  "mulq %%r10\n"
182  "addq %%rax,%%r8\n"
183  "adcq %%rdx,%%r9\n"
184  /* d += a4 * b2 */
185  "movq 16(%%rbx),%%rax\n"
186  "mulq %%r14\n"
187  "addq %%rax,%%rcx\n"
188  "adcq %%rdx,%%r15\n"
189  /* d += a3 * b3 */
190  "movq 24(%%rbx),%%rax\n"
191  "mulq %%r13\n"
192  "addq %%rax,%%rcx\n"
193  "adcq %%rdx,%%r15\n"
194  /* d += a2 * b4 */
195  "movq 32(%%rbx),%%rax\n"
196  "mulq %%r12\n"
197  "addq %%rax,%%rcx\n"
198  "adcq %%rdx,%%r15\n"
199  /* c += (d & M) * R */
200  "movq %%rcx,%%rax\n"
201  "movq $0xfffffffffffff,%%rdx\n"
202  "andq %%rdx,%%rax\n"
203  "movq $0x1000003d10,%%rdx\n"
204  "mulq %%rdx\n"
205  "addq %%rax,%%r8\n"
206  "adcq %%rdx,%%r9\n"
207  /* d >>= 52 */
208  "shrdq $52,%%r15,%%rcx\n"
209  "xorq %%r15,%%r15\n"
210  /* r[1] = c & M */
211  "movq %%r8,%%rax\n"
212  "movq $0xfffffffffffff,%%rdx\n"
213  "andq %%rdx,%%rax\n"
214  "movq %%rax,8(%%rdi)\n"
215  /* c >>= 52 */
216  "shrdq $52,%%r9,%%r8\n"
217  "xorq %%r9,%%r9\n"
218  /* c += a2 * b0 */
219  "movq 0(%%rbx),%%rax\n"
220  "mulq %%r12\n"
221  "addq %%rax,%%r8\n"
222  "adcq %%rdx,%%r9\n"
223  /* c += a1 * b1 */
224  "movq 8(%%rbx),%%rax\n"
225  "mulq %%r11\n"
226  "addq %%rax,%%r8\n"
227  "adcq %%rdx,%%r9\n"
228  /* c += a0 * b2 (last use of %%r10 = a0) */
229  "movq 16(%%rbx),%%rax\n"
230  "mulq %%r10\n"
231  "addq %%rax,%%r8\n"
232  "adcq %%rdx,%%r9\n"
233  /* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */
234  "movq %q2,%%rsi\n"
235  "movq %q1,%%r10\n"
236  /* d += a4 * b3 */
237  "movq 24(%%rbx),%%rax\n"
238  "mulq %%r14\n"
239  "addq %%rax,%%rcx\n"
240  "adcq %%rdx,%%r15\n"
241  /* d += a3 * b4 */
242  "movq 32(%%rbx),%%rax\n"
243  "mulq %%r13\n"
244  "addq %%rax,%%rcx\n"
245  "adcq %%rdx,%%r15\n"
246  /* c += (d & M) * R */
247  "movq %%rcx,%%rax\n"
248  "movq $0xfffffffffffff,%%rdx\n"
249  "andq %%rdx,%%rax\n"
250  "movq $0x1000003d10,%%rdx\n"
251  "mulq %%rdx\n"
252  "addq %%rax,%%r8\n"
253  "adcq %%rdx,%%r9\n"
254  /* d >>= 52 (%%rcx only) */
255  "shrdq $52,%%r15,%%rcx\n"
256  /* r[2] = c & M */
257  "movq %%r8,%%rax\n"
258  "movq $0xfffffffffffff,%%rdx\n"
259  "andq %%rdx,%%rax\n"
260  "movq %%rax,16(%%rdi)\n"
261  /* c >>= 52 */
262  "shrdq $52,%%r9,%%r8\n"
263  "xorq %%r9,%%r9\n"
264  /* c += t3 */
265  "addq %%r10,%%r8\n"
266  /* c += d * R */
267  "movq %%rcx,%%rax\n"
268  "movq $0x1000003d10,%%rdx\n"
269  "mulq %%rdx\n"
270  "addq %%rax,%%r8\n"
271  "adcq %%rdx,%%r9\n"
272  /* r[3] = c & M */
273  "movq %%r8,%%rax\n"
274  "movq $0xfffffffffffff,%%rdx\n"
275  "andq %%rdx,%%rax\n"
276  "movq %%rax,24(%%rdi)\n"
277  /* c >>= 52 (%%r8 only) */
278  "shrdq $52,%%r9,%%r8\n"
279  /* c += t4 (%%r8 only) */
280  "addq %%rsi,%%r8\n"
281  /* r[4] = c */
282  "movq %%r8,32(%%rdi)\n"
283 : "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
284 : "b"(b), "D"(r)
285 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
286 );
287 }
288 
289 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
299  uint64_t tmp1, tmp2, tmp3;
300 __asm__ __volatile__(
301  "movq 0(%%rsi),%%r10\n"
302  "movq 8(%%rsi),%%r11\n"
303  "movq 16(%%rsi),%%r12\n"
304  "movq 24(%%rsi),%%r13\n"
305  "movq 32(%%rsi),%%r14\n"
306  "movq $0xfffffffffffff,%%r15\n"
307 
308  /* d = (a0*2) * a3 */
309  "leaq (%%r10,%%r10,1),%%rax\n"
310  "mulq %%r13\n"
311  "movq %%rax,%%rbx\n"
312  "movq %%rdx,%%rcx\n"
313  /* d += (a1*2) * a2 */
314  "leaq (%%r11,%%r11,1),%%rax\n"
315  "mulq %%r12\n"
316  "addq %%rax,%%rbx\n"
317  "adcq %%rdx,%%rcx\n"
318  /* c = a4 * a4 */
319  "movq %%r14,%%rax\n"
320  "mulq %%r14\n"
321  "movq %%rax,%%r8\n"
322  "movq %%rdx,%%r9\n"
323  /* d += (c & M) * R */
324  "andq %%r15,%%rax\n"
325  "movq $0x1000003d10,%%rdx\n"
326  "mulq %%rdx\n"
327  "addq %%rax,%%rbx\n"
328  "adcq %%rdx,%%rcx\n"
329  /* c >>= 52 (%%r8 only) */
330  "shrdq $52,%%r9,%%r8\n"
331  /* t3 (tmp1) = d & M */
332  "movq %%rbx,%%rsi\n"
333  "andq %%r15,%%rsi\n"
334  "movq %%rsi,%q1\n"
335  /* d >>= 52 */
336  "shrdq $52,%%rcx,%%rbx\n"
337  "xorq %%rcx,%%rcx\n"
338  /* a4 *= 2 */
339  "addq %%r14,%%r14\n"
340  /* d += a0 * a4 */
341  "movq %%r10,%%rax\n"
342  "mulq %%r14\n"
343  "addq %%rax,%%rbx\n"
344  "adcq %%rdx,%%rcx\n"
345  /* d+= (a1*2) * a3 */
346  "leaq (%%r11,%%r11,1),%%rax\n"
347  "mulq %%r13\n"
348  "addq %%rax,%%rbx\n"
349  "adcq %%rdx,%%rcx\n"
350  /* d += a2 * a2 */
351  "movq %%r12,%%rax\n"
352  "mulq %%r12\n"
353  "addq %%rax,%%rbx\n"
354  "adcq %%rdx,%%rcx\n"
355  /* d += c * R */
356  "movq %%r8,%%rax\n"
357  "movq $0x1000003d10,%%rdx\n"
358  "mulq %%rdx\n"
359  "addq %%rax,%%rbx\n"
360  "adcq %%rdx,%%rcx\n"
361  /* t4 = d & M (%%rsi) */
362  "movq %%rbx,%%rsi\n"
363  "andq %%r15,%%rsi\n"
364  /* d >>= 52 */
365  "shrdq $52,%%rcx,%%rbx\n"
366  "xorq %%rcx,%%rcx\n"
367  /* tx = t4 >> 48 (tmp3) */
368  "movq %%rsi,%%rax\n"
369  "shrq $48,%%rax\n"
370  "movq %%rax,%q3\n"
371  /* t4 &= (M >> 4) (tmp2) */
372  "movq $0xffffffffffff,%%rax\n"
373  "andq %%rax,%%rsi\n"
374  "movq %%rsi,%q2\n"
375  /* c = a0 * a0 */
376  "movq %%r10,%%rax\n"
377  "mulq %%r10\n"
378  "movq %%rax,%%r8\n"
379  "movq %%rdx,%%r9\n"
380  /* d += a1 * a4 */
381  "movq %%r11,%%rax\n"
382  "mulq %%r14\n"
383  "addq %%rax,%%rbx\n"
384  "adcq %%rdx,%%rcx\n"
385  /* d += (a2*2) * a3 */
386  "leaq (%%r12,%%r12,1),%%rax\n"
387  "mulq %%r13\n"
388  "addq %%rax,%%rbx\n"
389  "adcq %%rdx,%%rcx\n"
390  /* u0 = d & M (%%rsi) */
391  "movq %%rbx,%%rsi\n"
392  "andq %%r15,%%rsi\n"
393  /* d >>= 52 */
394  "shrdq $52,%%rcx,%%rbx\n"
395  "xorq %%rcx,%%rcx\n"
396  /* u0 = (u0 << 4) | tx (%%rsi) */
397  "shlq $4,%%rsi\n"
398  "movq %q3,%%rax\n"
399  "orq %%rax,%%rsi\n"
400  /* c += u0 * (R >> 4) */
401  "movq $0x1000003d1,%%rax\n"
402  "mulq %%rsi\n"
403  "addq %%rax,%%r8\n"
404  "adcq %%rdx,%%r9\n"
405  /* r[0] = c & M */
406  "movq %%r8,%%rax\n"
407  "andq %%r15,%%rax\n"
408  "movq %%rax,0(%%rdi)\n"
409  /* c >>= 52 */
410  "shrdq $52,%%r9,%%r8\n"
411  "xorq %%r9,%%r9\n"
412  /* a0 *= 2 */
413  "addq %%r10,%%r10\n"
414  /* c += a0 * a1 */
415  "movq %%r10,%%rax\n"
416  "mulq %%r11\n"
417  "addq %%rax,%%r8\n"
418  "adcq %%rdx,%%r9\n"
419  /* d += a2 * a4 */
420  "movq %%r12,%%rax\n"
421  "mulq %%r14\n"
422  "addq %%rax,%%rbx\n"
423  "adcq %%rdx,%%rcx\n"
424  /* d += a3 * a3 */
425  "movq %%r13,%%rax\n"
426  "mulq %%r13\n"
427  "addq %%rax,%%rbx\n"
428  "adcq %%rdx,%%rcx\n"
429  /* c += (d & M) * R */
430  "movq %%rbx,%%rax\n"
431  "andq %%r15,%%rax\n"
432  "movq $0x1000003d10,%%rdx\n"
433  "mulq %%rdx\n"
434  "addq %%rax,%%r8\n"
435  "adcq %%rdx,%%r9\n"
436  /* d >>= 52 */
437  "shrdq $52,%%rcx,%%rbx\n"
438  "xorq %%rcx,%%rcx\n"
439  /* r[1] = c & M */
440  "movq %%r8,%%rax\n"
441  "andq %%r15,%%rax\n"
442  "movq %%rax,8(%%rdi)\n"
443  /* c >>= 52 */
444  "shrdq $52,%%r9,%%r8\n"
445  "xorq %%r9,%%r9\n"
446  /* c += a0 * a2 (last use of %%r10) */
447  "movq %%r10,%%rax\n"
448  "mulq %%r12\n"
449  "addq %%rax,%%r8\n"
450  "adcq %%rdx,%%r9\n"
451  /* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */
452  "movq %q2,%%rsi\n"
453  "movq %q1,%%r10\n"
454  /* c += a1 * a1 */
455  "movq %%r11,%%rax\n"
456  "mulq %%r11\n"
457  "addq %%rax,%%r8\n"
458  "adcq %%rdx,%%r9\n"
459  /* d += a3 * a4 */
460  "movq %%r13,%%rax\n"
461  "mulq %%r14\n"
462  "addq %%rax,%%rbx\n"
463  "adcq %%rdx,%%rcx\n"
464  /* c += (d & M) * R */
465  "movq %%rbx,%%rax\n"
466  "andq %%r15,%%rax\n"
467  "movq $0x1000003d10,%%rdx\n"
468  "mulq %%rdx\n"
469  "addq %%rax,%%r8\n"
470  "adcq %%rdx,%%r9\n"
471  /* d >>= 52 (%%rbx only) */
472  "shrdq $52,%%rcx,%%rbx\n"
473  /* r[2] = c & M */
474  "movq %%r8,%%rax\n"
475  "andq %%r15,%%rax\n"
476  "movq %%rax,16(%%rdi)\n"
477  /* c >>= 52 */
478  "shrdq $52,%%r9,%%r8\n"
479  "xorq %%r9,%%r9\n"
480  /* c += t3 */
481  "addq %%r10,%%r8\n"
482  /* c += d * R */
483  "movq %%rbx,%%rax\n"
484  "movq $0x1000003d10,%%rdx\n"
485  "mulq %%rdx\n"
486  "addq %%rax,%%r8\n"
487  "adcq %%rdx,%%r9\n"
488  /* r[3] = c & M */
489  "movq %%r8,%%rax\n"
490  "andq %%r15,%%rax\n"
491  "movq %%rax,24(%%rdi)\n"
492  /* c >>= 52 (%%r8 only) */
493  "shrdq $52,%%r9,%%r8\n"
494  /* c += t4 (%%r8 only) */
495  "addq %%rsi,%%r8\n"
496  /* r[4] = c */
497  "movq %%r8,32(%%rdi)\n"
498 : "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
499 : "D"(r)
500 : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
501 );
502 }
503 
504 #endif /* SECP256K1_FIELD_INNER5X52_IMPL_H */
static SECP256K1_INLINE void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t *SECP256K1_RESTRICT b)
Changelog:
static SECP256K1_INLINE void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a)
#define SECP256K1_INLINE
Definition: util.h:48
#define SECP256K1_RESTRICT
Definition: util.h:176