From 0295f0a33d0bfec8ce712e849253c71a579651c3 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter.wuille@gmail.com>
Date: Wed, 10 Dec 2014 14:34:25 +0100
Subject: [PATCH 1/4] weak normalization

---
 src/ecmult_gen_impl.h  |  2 +-
 src/ecmult_impl.h      |  4 ++--
 src/field.h            |  3 +++
 src/field_10x26_impl.h | 28 ++++++++++++++++++++++++++++
 src/field_5x52_impl.h  | 24 ++++++++++++++++++++++++
 src/group.h            |  3 +--
 src/group_impl.h       | 18 ++++++------------
 7 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index 5a5b16ce..48436316 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -73,7 +73,7 @@ static void secp256k1_ecmult_gen_start(void) {
             secp256k1_gej_double_var(&numsbase, &numsbase);
             if (j == 62) {
                 /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
-                secp256k1_gej_neg_var(&numsbase, &numsbase);
+                secp256k1_gej_neg(&numsbase, &numsbase);
                 secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej);
             }
         }
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 65367710..345cfae7 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -70,8 +70,8 @@ static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const sec
         (neg)((r), &(pre)[(-(n)-1)/2]); \
 } while(0)
 
-#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg_var)
-#define ECMULT_TABLE_GET_GE(r,pre,n,w)  ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg_var)
+#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg)
+#define ECMULT_TABLE_GET_GE(r,pre,n,w)  ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg)
 
 typedef struct {
     /* For accelerating the computation of a*P + b*G: */
diff --git a/src/field.h b/src/field.h
index 9d5466a3..391d97a6 100644
--- a/src/field.h
+++ b/src/field.h
@@ -48,6 +48,9 @@ static void secp256k1_fe_stop(void);
 /** Normalize a field element. */
 static void secp256k1_fe_normalize(secp256k1_fe_t *r);
 
+/** Weakly normalize a field element: reduce it magnitude to 1, but don't fully normalize. */
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r);
+
 /** Normalize a field element, without constant-time guarantee. */
 static void secp256k1_fe_normalize_var(secp256k1_fe_t *r);
 
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index d20229cd..368e8518 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -103,6 +103,34 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
 #endif
 }
 
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
 static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index a045db31..74e5ab23 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -100,6 +100,30 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
 #endif
 }
 
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
 static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
     uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
 
diff --git a/src/group.h b/src/group.h
index 330e0696..6dea6bb5 100644
--- a/src/group.h
+++ b/src/group.h
@@ -60,7 +60,6 @@ static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a);
 static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a);
 
 static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a);
-static void secp256k1_ge_neg_var(secp256k1_ge_t *r, const secp256k1_ge_t *a);
 
 /** Get a hex representation of a point. *rlen will be overwritten with the real length. */
 static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a);
@@ -85,7 +84,7 @@ static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a);
 static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a);
 
 /** Set r equal to the inverse of a (i.e., mirrored around the X axis) */
-static void secp256k1_gej_neg_var(secp256k1_gej_t *r, const secp256k1_gej_t *a);
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
 
 /** Check whether a group element is the point at infinity. */
 static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
diff --git a/src/group_impl.h b/src/group_impl.h
index e6d4c342..82bfd370 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -29,13 +29,7 @@ static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a) {
 
 static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
     *r = *a;
-    secp256k1_fe_normalize(&r->y);
-    secp256k1_fe_negate(&r->y, &r->y, 1);
-}
-
-static void secp256k1_ge_neg_var(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
-    *r = *a;
-    secp256k1_fe_normalize_var(&r->y);
+    secp256k1_fe_normalize_weak(&r->y);
     secp256k1_fe_negate(&r->y, &r->y, 1);
 }
 
@@ -172,12 +166,12 @@ static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t
     return secp256k1_fe_equal(&r, &r2);
 }
 
-static void secp256k1_gej_neg_var(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
     r->infinity = a->infinity;
     r->x = a->x;
     r->y = a->y;
     r->z = a->z;
-    secp256k1_fe_normalize_var(&r->y);
+    secp256k1_fe_normalize_weak(&r->y);
     secp256k1_fe_negate(&r->y, &r->y, 1);
 }
 
@@ -359,9 +353,9 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
      */
 
     secp256k1_fe_t zz; secp256k1_fe_sqr(&zz, &a->z);                /* z = Z1^2 */
-    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize(&u1);          /* u1 = U1 = X1*Z2^2 (1) */
+    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize_weak(&u1);     /* u1 = U1 = X1*Z2^2 (1) */
     secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &zz);           /* u2 = U2 = X2*Z1^2 (1) */
-    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize(&s1);          /* s1 = S1 = Y1*Z2^3 (1) */
+    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_weak(&s1);     /* s1 = S1 = Y1*Z2^3 (1) */
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &zz);           /* s2 = Y2*Z2^2 (1) */
     secp256k1_fe_mul(&s2, &s2, &a->z);                              /* s2 = S2 = Y2*Z1^3 (1) */
     secp256k1_fe_t z = a->z;                                        /* z = Z = Z1*Z2 (8) */
@@ -388,7 +382,7 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
     secp256k1_fe_mul(&t, &t, &rr);                      /* t = R*(2*R^2-3*Q) (1) */
     secp256k1_fe_add(&t, &n);                           /* t = R*(2*R^2-3*Q)+M^4 (2) */
     secp256k1_fe_negate(&r->y, &t, 2);                  /* r->y = R*(3*Q-2*R^2)-M^4 (3) */
-    secp256k1_fe_normalize(&r->y);
+    secp256k1_fe_normalize_weak(&r->y);
     secp256k1_fe_mul_int(&r->x, 4 * (1 - a->infinity)); /* r->x = X3 = 4*(R^2-Q) */
     secp256k1_fe_mul_int(&r->y, 4 * (1 - a->infinity)); /* r->y = Y3 = 4*R*(3*Q-2*R^2)-4*M^4 (4) */
 

From d7174edf5fd9e7e1bfcd3c7e54271213b0ff9712 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter.wuille@gmail.com>
Date: Wed, 10 Dec 2014 14:52:18 +0100
Subject: [PATCH 2/4] Weak normalization for secp256k1_fe_equal

---
 src/field.h            |  4 ++--
 src/field_10x26_impl.h | 12 ------------
 src/field_5x52_impl.h  | 11 -----------
 src/field_impl.h       |  8 ++++++++
 src/group_impl.h       | 35 +++++++++++++----------------------
 src/tests.c            | 10 +++++-----
 6 files changed, 28 insertions(+), 52 deletions(-)

diff --git a/src/field.h b/src/field.h
index 391d97a6..940632f7 100644
--- a/src/field.h
+++ b/src/field.h
@@ -63,8 +63,8 @@ static int secp256k1_fe_is_zero(const secp256k1_fe_t *a);
 /** Check the "oddness" of a field element. Requires the input to be normalized. */
 static int secp256k1_fe_is_odd(const secp256k1_fe_t *a);
 
-/** Compare two field elements. Requires both inputs to be normalized */
-static int secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b);
+/** Compare two field elements. Requires magnitude-1 inputs. */
+static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b);
 
 /** Compare two field elements. Requires both inputs to be normalized */
 static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b);
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 368e8518..71ae7973 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -224,18 +224,6 @@ SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
     }
 }
 
-SECP256K1_INLINE static int secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-#ifdef VERIFY
-    VERIFY_CHECK(a->normalized);
-    VERIFY_CHECK(b->normalized);
-    secp256k1_fe_verify(a);
-    secp256k1_fe_verify(b);
-#endif
-    const uint32_t *t = a->n, *u = b->n;
-    return ((t[0]^u[0]) | (t[1]^u[1]) | (t[2]^u[2]) | (t[3]^u[3]) | (t[4]^u[4])
-          | (t[5]^u[5]) | (t[6]^u[6]) | (t[7]^u[7]) | (t[8]^u[8]) | (t[9]^u[9])) == 0;
-}
-
 static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 74e5ab23..27462b8e 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -205,17 +205,6 @@ SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
     }
 }
 
-SECP256K1_INLINE static int secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-#ifdef VERIFY
-    VERIFY_CHECK(a->normalized);
-    VERIFY_CHECK(b->normalized);
-    secp256k1_fe_verify(a);
-    secp256k1_fe_verify(b);
-#endif
-    const uint64_t *t = a->n, *u = b->n;
-    return ((t[0]^u[0]) | (t[1]^u[1]) | (t[2]^u[2]) | (t[3]^u[3]) | (t[4]^u[4])) == 0;
-}
-
 static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
diff --git a/src/field_impl.h b/src/field_impl.h
index 37df2e0a..abd8fa61 100644
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -64,6 +64,14 @@ static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
     return secp256k1_fe_set_b32(r, tmp);
 }
 
+SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    secp256k1_fe_t na;
+    secp256k1_fe_negate(&na, a, 1);
+    secp256k1_fe_add(&na, b);
+    secp256k1_fe_normalize_var(&na);
+    return secp256k1_fe_is_zero(&na);
+}
+
 static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 
     /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
diff --git a/src/group_impl.h b/src/group_impl.h
index 82bfd370..45383c0f 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -161,9 +161,9 @@ static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t
     VERIFY_CHECK(!a->infinity);
     secp256k1_fe_t r; secp256k1_fe_sqr(&r, &a->z); secp256k1_fe_mul(&r, &r, x);
     secp256k1_fe_t r2 = a->x;
-    secp256k1_fe_normalize_var(&r);
-    secp256k1_fe_normalize_var(&r2);
-    return secp256k1_fe_equal(&r, &r2);
+    secp256k1_fe_normalize_weak(&r);
+    secp256k1_fe_normalize_weak(&r2);
+    return secp256k1_fe_equal_var(&r, &r2);
 }
 
 static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
@@ -193,9 +193,8 @@ static int secp256k1_gej_is_valid_var(const secp256k1_gej_t *a) {
     secp256k1_fe_t z6; secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2);
     secp256k1_fe_mul_int(&z6, 7);
     secp256k1_fe_add(&x3, &z6);
-    secp256k1_fe_normalize_var(&y2);
-    secp256k1_fe_normalize_var(&x3);
-    return secp256k1_fe_equal(&y2, &x3);
+    secp256k1_fe_normalize_weak(&x3);
+    return secp256k1_fe_equal_var(&y2, &x3);
 }
 
 static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a) {
@@ -206,9 +205,8 @@ static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a) {
     secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
     secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7);
     secp256k1_fe_add(&x3, &c);
-    secp256k1_fe_normalize_var(&y2);
-    secp256k1_fe_normalize_var(&x3);
-    return secp256k1_fe_equal(&y2, &x3);
+    secp256k1_fe_normalize_weak(&x3);
+    return secp256k1_fe_equal_var(&y2, &x3);
 }
 
 static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
@@ -259,12 +257,8 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a,
     secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
     secp256k1_fe_t s1; secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    secp256k1_fe_normalize_var(&u1);
-    secp256k1_fe_normalize_var(&u2);
-    if (secp256k1_fe_equal(&u1, &u2)) {
-        secp256k1_fe_normalize_var(&s1);
-        secp256k1_fe_normalize_var(&s2);
-        if (secp256k1_fe_equal(&s1, &s2)) {
+    if (secp256k1_fe_equal_var(&u1, &u2)) {
+        if (secp256k1_fe_equal_var(&s1, &s2)) {
             secp256k1_gej_double_var(r, a);
         } else {
             r->infinity = 1;
@@ -298,15 +292,12 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
     }
     r->infinity = 0;
     secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z);
-    secp256k1_fe_t u1 = a->x;
+    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize_weak(&u1);
     secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
-    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_var(&s1);
+    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_weak(&s1);
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    secp256k1_fe_normalize_var(&u1);
-    secp256k1_fe_normalize_var(&u2);
-    if (secp256k1_fe_equal(&u1, &u2)) {
-        secp256k1_fe_normalize_var(&s2);
-        if (secp256k1_fe_equal(&s1, &s2)) {
+    if (secp256k1_fe_equal_var(&u1, &u2)) {
+        if (secp256k1_fe_equal_var(&s1, &s2)) {
             secp256k1_gej_double_var(r, a);
         } else {
             r->infinity = 1;
diff --git a/src/tests.c b/src/tests.c
index 8ba1f286..f08b71ba 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -494,9 +494,9 @@ void random_fe_non_square(secp256k1_fe_t *ns) {
 }
 
 int check_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-    secp256k1_fe_t an = *a; secp256k1_fe_normalize(&an);
+    secp256k1_fe_t an = *a; secp256k1_fe_normalize_weak(&an);
     secp256k1_fe_t bn = *b; secp256k1_fe_normalize_var(&bn);
-    return secp256k1_fe_equal(&an, &bn);
+    return secp256k1_fe_equal_var(&an, &bn);
 }
 
 int check_fe_inverse(const secp256k1_fe_t *a, const secp256k1_fe_t *ai) {
@@ -523,16 +523,16 @@ void run_field_misc(void) {
         random_fe_non_zero(&y);
         /* Test the fe equality and comparison operations. */
         CHECK(secp256k1_fe_cmp_var(&x, &x) == 0);
-        CHECK(secp256k1_fe_equal(&x, &x));
+        CHECK(secp256k1_fe_equal_var(&x, &x));
         z = x;
         secp256k1_fe_add(&z,&y);
         secp256k1_fe_normalize(&z);
         /* Test the conditional move. */
         secp256k1_fe_cmov(&z, &x, 0);
-        CHECK(secp256k1_fe_equal(&x, &z) == 0);
+        CHECK(secp256k1_fe_equal_var(&x, &z) == 0);
         CHECK(secp256k1_fe_cmp_var(&x, &z) != 0);
         secp256k1_fe_cmov(&y, &x, 1);
-        CHECK(secp256k1_fe_equal(&x, &y));
+        CHECK(secp256k1_fe_equal_var(&x, &y));
         /* Test that mul_int, mul, and add agree. */
         secp256k1_fe_add(&y, &x);
         secp256k1_fe_add(&y, &x);

From eed599dd7271a7efc3961cd97b17fc87f517a842 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Fri, 12 Dec 2014 12:55:01 +0700
Subject: [PATCH 3/4] Add _fe_normalizes_to_zero method

---
 src/field.h            |  4 ++++
 src/field_10x26_impl.h | 32 ++++++++++++++++++++++++++++++++
 src/field_5x52_impl.h  | 23 +++++++++++++++++++++++
 src/field_impl.h       |  4 ++--
 src/group_impl.h       | 23 ++++++++++-------------
 5 files changed, 71 insertions(+), 15 deletions(-)

diff --git a/src/field.h b/src/field.h
index 940632f7..ed34c965 100644
--- a/src/field.h
+++ b/src/field.h
@@ -54,6 +54,10 @@ static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r);
 /** Normalize a field element, without constant-time guarantee. */
 static void secp256k1_fe_normalize_var(secp256k1_fe_t *r);
 
+/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
+ *  implementation may optionally normalize the input, but this should not be relied upon. */
+static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r);
+
 /** Set a field element equal to a small integer. Resulting field element is normalized. */
 static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a);
 
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 71ae7973..7778ac3d 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -122,6 +122,9 @@ static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
     t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
     t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
 
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
     r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
     r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
 
@@ -187,6 +190,35 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
 #endif
 }
 
+static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    uint32_t z0, z1;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0  = t0; z1  = t0 ^ 0x3D0UL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
+                                         z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    return (z0 == 0) | (z1 == 0x3FFFFFFUL);
+}
+
 SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
     r->n[0] = a;
     r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 27462b8e..45f61e0d 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -168,6 +168,29 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
 #endif
 }
 
+static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    uint64_t z0, z1;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0  = t0; z1  = t0 ^ 0x1000003D0ULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
+                                                z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
+}
+
 SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
     r->n[0] = a;
     r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
diff --git a/src/field_impl.h b/src/field_impl.h
index abd8fa61..d1b6cd6f 100644
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -64,12 +64,12 @@ static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
     return secp256k1_fe_set_b32(r, tmp);
 }
 
+/* TODO Not actually var currently */
 SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
     secp256k1_fe_t na;
     secp256k1_fe_negate(&na, a, 1);
     secp256k1_fe_add(&na, b);
-    secp256k1_fe_normalize_var(&na);
-    return secp256k1_fe_is_zero(&na);
+    return secp256k1_fe_normalizes_to_zero(&na);
 }
 
 static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
diff --git a/src/group_impl.h b/src/group_impl.h
index 45383c0f..e62d3209 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -160,9 +160,7 @@ static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) {
 static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a) {
     VERIFY_CHECK(!a->infinity);
     secp256k1_fe_t r; secp256k1_fe_sqr(&r, &a->z); secp256k1_fe_mul(&r, &r, x);
-    secp256k1_fe_t r2 = a->x;
-    secp256k1_fe_normalize_weak(&r);
-    secp256k1_fe_normalize_weak(&r2);
+    secp256k1_fe_t r2 = a->x; secp256k1_fe_normalize_weak(&r2);
     return secp256k1_fe_equal_var(&r, &r2);
 }
 
@@ -257,16 +255,16 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a,
     secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
     secp256k1_fe_t s1; secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    if (secp256k1_fe_equal_var(&u1, &u2)) {
-        if (secp256k1_fe_equal_var(&s1, &s2)) {
+    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    if (secp256k1_fe_normalizes_to_zero(&h)) {
+        if (secp256k1_fe_normalizes_to_zero(&i)) {
             secp256k1_gej_double_var(r, a);
         } else {
             r->infinity = 1;
         }
         return;
     }
-    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
-    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
     secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
     secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
     secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
@@ -296,16 +294,16 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
     secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
     secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_weak(&s1);
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    if (secp256k1_fe_equal_var(&u1, &u2)) {
-        if (secp256k1_fe_equal_var(&s1, &s2)) {
+    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    if (secp256k1_fe_normalizes_to_zero(&h)) {
+        if (secp256k1_fe_normalizes_to_zero(&i)) {
             secp256k1_gej_double_var(r, a);
         } else {
             r->infinity = 1;
         }
         return;
     }
-    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
-    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
     secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
     secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
     secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
@@ -360,8 +358,7 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
     secp256k1_fe_add(&rr, &t);                                      /* rr = R = T^2-U1*U2 (3) */
     secp256k1_fe_sqr(&t, &rr);                                      /* t = R^2 (1) */
     secp256k1_fe_mul(&r->z, &m, &z);                                /* r->z = M*Z (1) */
-    secp256k1_fe_normalize(&r->z);
-    int infinity = secp256k1_fe_is_zero(&r->z) * (1 - a->infinity);
+    int infinity = secp256k1_fe_normalizes_to_zero(&r->z) * (1 - a->infinity);
     secp256k1_fe_mul_int(&r->z, 2 * (1 - a->infinity)); /* r->z = Z3 = 2*M*Z (2) */
     r->x = t;                                           /* r->x = R^2 (1) */
     secp256k1_fe_negate(&q, &q, 1);                     /* q = -Q (2) */

From 49ee0dbe167b54e92ed4602cc5c9372d1e619849 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Sat, 13 Dec 2014 17:14:26 +0700
Subject: [PATCH 4/4] Add _normalizes_to_zero_var variant

---
 src/field.h            |  4 ++++
 src/field_10x26_impl.h | 38 ++++++++++++++++++++++++++++++++++++++
 src/field_5x52_impl.h  | 31 +++++++++++++++++++++++++++++++
 src/field_impl.h       |  3 +--
 src/group_impl.h       |  8 ++++----
 5 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/src/field.h b/src/field.h
index ed34c965..14e2b813 100644
--- a/src/field.h
+++ b/src/field.h
@@ -58,6 +58,10 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r);
  *  implementation may optionally normalize the input, but this should not be relied upon. */
 static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r);
 
+/** Verify whether a field element represents zero i.e. would normalize to a zero value. The field
+ *  implementation may optionally normalize the input, but this should not be relied upon. */
+static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r);
+
 /** Set a field element equal to a small integer. Resulting field element is normalized. */
 static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a);
 
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 7778ac3d..8f5d15c7 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -219,6 +219,44 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
     return (z0 == 0) | (z1 == 0x3FFFFFFUL);
 }
 
+static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL;
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    uint32_t z0 = t0 & 0x3FFFFFFUL, z1 = z0 ^ 0x3D0UL;
+
+    /* Fast return path should catch the majority of cases */
+    if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL))
+        return 0;
+
+    uint32_t t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8];
+    t9 &= 0x03FFFFFUL;
+    t1 += (x << 6);
+
+    t1 += (t0 >> 26); t0  = z0;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
+                                         z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
+
+    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t9 >> 23 == 0);
+
+    return (z0 == 0) | (z1 == 0x3FFFFFFUL);
+}
+
 SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
     r->n[0] = a;
     r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 45f61e0d..33597c44 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -191,6 +191,37 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
     return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
 }
 
+static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+
+    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
+    uint64_t z0 = t0 & 0xFFFFFFFFFFFFFULL, z1 = z0 ^ 0x1000003D0ULL;
+
+    /* Fast return path should catch the majority of cases */
+    if ((z0 != 0ULL) & (z1 != 0xFFFFFFFFFFFFFULL))
+        return 0;
+
+    uint64_t t1 = r->n[1], t2 = r->n[2], t3 = r->n[3];
+    t4 &= 0x0FFFFFFFFFFFFULL;
+
+    t1 += (t0 >> 52); t0  = z0;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL; z0 |= t1; z1 &= t1;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL; z0 |= t2; z1 &= t2;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL; z0 |= t3; z1 &= t3;
+                                                z0 |= t4; z1 &= t4 ^ 0xF000000000000ULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    return (z0 == 0) | (z1 == 0xFFFFFFFFFFFFFULL);
+}
+
 SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
     r->n[0] = a;
     r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
diff --git a/src/field_impl.h b/src/field_impl.h
index d1b6cd6f..484e6731 100644
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -64,12 +64,11 @@ static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
     return secp256k1_fe_set_b32(r, tmp);
 }
 
-/* TODO Not actually var currently */
 SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
     secp256k1_fe_t na;
     secp256k1_fe_negate(&na, a, 1);
     secp256k1_fe_add(&na, b);
-    return secp256k1_fe_normalizes_to_zero(&na);
+    return secp256k1_fe_normalizes_to_zero_var(&na);
 }
 
 static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
diff --git a/src/group_impl.h b/src/group_impl.h
index e62d3209..fef06df2 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -257,8 +257,8 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a,
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
     secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
     secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
-    if (secp256k1_fe_normalizes_to_zero(&h)) {
-        if (secp256k1_fe_normalizes_to_zero(&i)) {
+    if (secp256k1_fe_normalizes_to_zero_var(&h)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&i)) {
             secp256k1_gej_double_var(r, a);
         } else {
             r->infinity = 1;
@@ -296,8 +296,8 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
     secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
     secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
-    if (secp256k1_fe_normalizes_to_zero(&h)) {
-        if (secp256k1_fe_normalizes_to_zero(&i)) {
+    if (secp256k1_fe_normalizes_to_zero_var(&h)) {
+        if (secp256k1_fe_normalizes_to_zero_var(&i)) {
             secp256k1_gej_double_var(r, a);
         } else {
             r->infinity = 1;