Replace set/add with cmov in secp256k1_gej_add_ge.

Use a conditional move of the same kind we use for the affine points in the storage type instead of multiplying with the infinity flag and adding. This results in fewer constructions to worry about for sidechannel behavior. It also might be faster: It doesn't appear to benchmark as slower for me at least; but I think the CMOV is faster than the mul_int + add, but slower than the set+add; making it a wash.
2015-04-22 00:20:54 +00:00
parent f3d3519ce0
commit bb0ea50de8
4 changed files with 44 additions and 8 deletions
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -1068,6 +1068,26 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }

+static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
+    uint32_t mask0, mask1;
+    mask0 = flag + ~((uint32_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+    r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
+    r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
+    r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
+    r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
+    r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
+#ifdef VERIFY
+    r->magnitude = (r->magnitude & mask0) | (a->magnitude & mask1);
+    r->normalized = (r->normalized & mask0) | (a->normalized & mask1);
+#endif
+}
+
 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
    uint32_t mask0, mask1;
    mask0 = flag + ~((uint32_t)0);