Skip to content

Commit dca786f

Browse files
Merge branch 'master' of https://bearssl.org/git/BearSSL
2 parents 1b31cd3 + b2ec203 commit dca786f

File tree

1 file changed

+59
-8
lines changed

1 file changed

+59
-8
lines changed

src/ec/ec_p256_m64.c

+59-8
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
9999
unsigned __int128 w;
100100
uint64_t t;
101101

102+
/*
103+
* Do the addition, with an extra carry in t.
104+
*/
102105
w = (unsigned __int128)a[0] + b[0];
103106
d[0] = (uint64_t)w;
104107
w = (unsigned __int128)a[1] + b[1] + (w >> 64);
@@ -110,7 +113,7 @@ f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
110113
t = (uint64_t)(w >> 64);
111114

112115
/*
113-
* 2^256 = 2^224 - 2^192 - 2^96 + 1 in the field.
116+
* Fold carry t, using: 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p.
114117
*/
115118
w = (unsigned __int128)d[0] + t;
116119
d[0] = (uint64_t)w;
@@ -119,8 +122,22 @@ f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
119122
/* Here, carry "w >> 64" can only be 0 or -1 */
120123
w = (unsigned __int128)d[2] - ((w >> 64) & 1);
121124
d[2] = (uint64_t)w;
122-
/* Again, carry is 0 or -1 */
123-
d[3] += (uint64_t)(w >> 64) + (t << 32) - t;
125+
/* Again, carry is 0 or -1. But there can be carry only if t = 1,
126+
in which case the addition of (t << 32) - t is positive. */
127+
w = (unsigned __int128)d[3] - ((w >> 64) & 1) + (t << 32) - t;
128+
d[3] = (uint64_t)w;
129+
t = (uint64_t)(w >> 64);
130+
131+
/*
132+
* There can be an extra carry here, which we must fold again.
133+
*/
134+
w = (unsigned __int128)d[0] + t;
135+
d[0] = (uint64_t)w;
136+
w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);
137+
d[1] = (uint64_t)w;
138+
w = (unsigned __int128)d[2] - ((w >> 64) & 1);
139+
d[2] = (uint64_t)w;
140+
d[3] += (t << 32) - t - (uint64_t)((w >> 64) & 1);
124141

125142
#elif BR_UMUL128
126143

@@ -140,6 +157,15 @@ f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
140157
cc = _addcarry_u64(cc, d[0], 0, &d[0]);
141158
cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);
142159
cc = _addcarry_u64(cc, d[2], -t, &d[2]);
160+
cc = _addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
161+
162+
/*
163+
* We have to do it again if there still is a carry.
164+
*/
165+
t = cc;
166+
cc = _addcarry_u64(cc, d[0], 0, &d[0]);
167+
cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);
168+
cc = _addcarry_u64(cc, d[2], -t, &d[2]);
143169
(void)_addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
144170

145171
#endif
@@ -167,6 +193,7 @@ f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
167193
t = (uint64_t)(w >> 64) & 1;
168194

169195
/*
196+
* If there is a borrow (t = 1), then we must add the modulus
170197
* p = 2^256 - 2^224 + 2^192 + 2^96 - 1.
171198
*/
172199
w = (unsigned __int128)d[0] - t;
@@ -177,6 +204,20 @@ f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
177204
w = (unsigned __int128)d[2] + (w >> 64);
178205
d[2] = (uint64_t)w;
179206
/* Again, carry is 0 or +1 */
207+
w = (unsigned __int128)d[3] + (w >> 64) - (t << 32) + t;
208+
d[3] = (uint64_t)w;
209+
t = (uint64_t)(w >> 64) & 1;
210+
211+
/*
212+
* There may be again a borrow, in which case we must add the
213+
* modulus again.
214+
*/
215+
w = (unsigned __int128)d[0] - t;
216+
d[0] = (uint64_t)w;
217+
w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);
218+
d[1] = (uint64_t)w;
219+
w = (unsigned __int128)d[2] + (w >> 64);
220+
d[2] = (uint64_t)w;
180221
d[3] += (uint64_t)(w >> 64) - (t << 32) + t;
181222

182223
#elif BR_UMUL128
@@ -190,13 +231,23 @@ f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
190231
cc = _subborrow_u64(cc, a[3], b[3], &d[3]);
191232

192233
/*
193-
* If there is a carry, then we need to add p.
234+
* If there is a borrow, then we need to add p. We (virtually)
235+
* add 2^256, then subtract 2^256 - p.
236+
*/
237+
t = cc;
238+
cc = _subborrow_u64(0, d[0], t, &d[0]);
239+
cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);
240+
cc = _subborrow_u64(cc, d[2], -t, &d[2]);
241+
cc = _subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
242+
243+
/*
244+
* If there still is a borrow, then we need to add p again.
194245
*/
195246
t = cc;
196-
cc = _addcarry_u64(0, d[0], -t, &d[0]);
197-
cc = _addcarry_u64(cc, d[1], (-t) >> 32, &d[1]);
198-
cc = _addcarry_u64(cc, d[2], 0, &d[2]);
199-
(void)_addcarry_u64(cc, d[3], t - (t << 32), &d[3]);
247+
cc = _subborrow_u64(0, d[0], t, &d[0]);
248+
cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);
249+
cc = _subborrow_u64(cc, d[2], -t, &d[2]);
250+
(void)_subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
200251

201252
#endif
202253
}

0 commit comments

Comments
 (0)