diff --git a/include/secp256k1.h b/include/secp256k1.h index 932bf027..94a6ef48 100644 --- a/include/secp256k1.h +++ b/include/secp256k1.h @@ -14,18 +14,6 @@ extern "C" { # endif # endif -# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) -# if SECP256K1_GNUC_PREREQ(3,0) -# define SECP256K1_RESTRICT __restrict__ -# elif (defined(_MSC_VER) && _MSC_VER >= 1400) -# define SECP256K1_RESTRICT __restrict -# else -# define SECP256K1_RESTRICT -# endif -# else -# define SECP256K1_RESTRICT restrict -# endif - # if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) # if SECP256K1_GNUC_PREREQ(2,7) # define SECP256K1_INLINE __inline__ diff --git a/src/field.h b/src/field.h index cbd48c00..99a049ff 100644 --- a/src/field.h +++ b/src/field.h @@ -82,7 +82,7 @@ static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a); /** Sets a field element to be the product of two others. Requires the inputs' magnitudes to be at most 8. * The output magnitude is 1 (but not guaranteed to be normalized). */ -static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b); +static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b); /** Sets a field element to be the square of another. Requires the input's magnitude to be at most 8. * The output magnitude is 1 (but not guaranteed to be normalized). */ diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h index 213b8483..13bad9c5 100644 --- a/src/field_10x26_impl.h +++ b/src/field_10x26_impl.h @@ -271,7 +271,7 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1 #define VERIFY_BITS(x, n) do { } while(0) #endif -SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t *b, uint32_t *r) { +SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b, uint32_t *r) { VERIFY_BITS(a[0], 30); VERIFY_BITS(a[1], 30); VERIFY_BITS(a[2], 30); @@ -871,12 +871,13 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint32_t *a, uint32_t } -static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) { +static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) { #ifdef VERIFY VERIFY_CHECK(a->magnitude <= 8); VERIFY_CHECK(b->magnitude <= 8); secp256k1_fe_verify(a); secp256k1_fe_verify(b); + VERIFY_CHECK(r != b); #endif secp256k1_fe_mul_inner(a->n, b->n, r->n); #ifdef VERIFY diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h index cc9d0c1f..29e770f2 100644 --- a/src/field_5x52_impl.h +++ b/src/field_5x52_impl.h @@ -247,12 +247,13 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1 #endif } -static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) { +static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) { #ifdef VERIFY VERIFY_CHECK(a->magnitude <= 8); VERIFY_CHECK(b->magnitude <= 8); secp256k1_fe_verify(a); secp256k1_fe_verify(b); + VERIFY_CHECK(r != b); #endif secp256k1_fe_mul_inner(a->n, b->n, r->n); #ifdef VERIFY diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h index c4764286..9ad4c04d 100644 --- a/src/field_5x52_int128_impl.h +++ b/src/field_5x52_int128_impl.h @@ -15,7 +15,7 @@ #define VERIFY_BITS(x, n) do { } while(0) #endif -SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) { +SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t * SECP256K1_RESTRICT r) { VERIFY_BITS(a[0], 56); VERIFY_BITS(a[1], 56); VERIFY_BITS(a[2], 56); @@ -26,6 +26,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uin VERIFY_BITS(b[2], 56); VERIFY_BITS(b[3], 56); VERIFY_BITS(b[4], 52); + VERIFY_CHECK(r != b); const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; /* [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n. @@ -33,15 +34,17 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uin * Note that [x 0 0 0 0 0] = [x*R]. */ + uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; + __int128 c, d; - d = (__int128)a[0] * b[3] - + (__int128)a[1] * b[2] - + (__int128)a[2] * b[1] - + (__int128)a[3] * b[0]; + d = (__int128)a0 * b[3] + + (__int128)a1 * b[2] + + (__int128)a2 * b[1] + + (__int128)a3 * b[0]; VERIFY_BITS(d, 114); /* [d 0 0 0] = [p3 0 0 0] */ - c = (__int128)a[4] * b[4]; + c = (__int128)a4 * b[4]; VERIFY_BITS(c, 112); /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ d += (c & M) * R; c >>= 52; @@ -53,11 +56,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uin VERIFY_BITS(d, 63); /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - d += (__int128)a[0] * b[4] - + (__int128)a[1] * b[3] - + (__int128)a[2] * b[2] - + (__int128)a[3] * b[1] - + (__int128)a[4] * b[0]; + d += (__int128)a0 * b[4] + + (__int128)a1 * b[3] + + (__int128)a2 * b[2] + + (__int128)a3 * b[1] + + (__int128)a4 * b[0]; VERIFY_BITS(d, 115); /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ d += c * R; @@ -72,13 +75,13 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uin VERIFY_BITS(t4, 48); /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - c = (__int128)a[0] * b[0]; + c = (__int128)a0 * b[0]; VERIFY_BITS(c, 112); /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ - d += (__int128)a[1] * b[4] - + (__int128)a[2] * b[3] - + (__int128)a[3] * b[2] - + (__int128)a[4] * b[1]; + d += (__int128)a1 * b[4] + + (__int128)a2 * b[3] + + (__int128)a3 * b[2] + + (__int128)a4 * b[1]; VERIFY_BITS(d, 115); /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ uint64_t u0 = d & M; d >>= 52; @@ -92,48 +95,43 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uin c += (__int128)u0 * (R >> 4); VERIFY_BITS(c, 115); /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - uint64_t t0 = c & M; c >>= 52; - VERIFY_BITS(t0, 52); - VERIFY_BITS(c, 61); - /* [d 0 t4 t3 0 c t0] = [p8 0 0 p5 p4 p3 0 0 p0] */ - - c += (__int128)a[0] * b[1] - + (__int128)a[1] * b[0]; - VERIFY_BITS(c, 114); - /* [d 0 t4 t3 0 c t0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ - d += (__int128)a[2] * b[4] - + (__int128)a[3] * b[3] - + (__int128)a[4] * b[2]; - VERIFY_BITS(d, 114); - /* [d 0 t4 t3 0 c t0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - c += (d & M) * R; d >>= 52; - VERIFY_BITS(c, 115); - VERIFY_BITS(d, 62); - /* [d 0 0 t4 t3 0 c t0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - uint64_t t1 = c & M; c >>= 52; - VERIFY_BITS(t1, 52); - VERIFY_BITS(c, 63); - /* [d 0 0 t4 t3 c t1 t0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - - c += (__int128)a[0] * b[2] - + (__int128)a[1] * b[1] - + (__int128)a[2] * b[0]; - VERIFY_BITS(c, 114); - /* [d 0 0 t4 t3 c t1 t0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ - d += (__int128)a[3] * b[4] - + (__int128)a[4] * b[3]; - VERIFY_BITS(d, 114); - /* [d 0 0 t4 t3 c t1 t0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += (d & M) * R; d >>= 52; - VERIFY_BITS(c, 115); - VERIFY_BITS(d, 62); - /* [d 0 0 0 t4 t3 c t1 t0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - - r[0] = t0; + r[0] = c & M; c >>= 52; VERIFY_BITS(r[0], 52); - /* [d 0 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[1] = t1; + VERIFY_BITS(c, 61); + /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ + + c += (__int128)a0 * b[1] + + (__int128)a1 * b[0]; + VERIFY_BITS(c, 114); + /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ + d += (__int128)a2 * b[4] + + (__int128)a3 * b[3] + + (__int128)a4 * b[2]; + VERIFY_BITS(d, 114); + /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + c += (d & M) * R; d >>= 52; + VERIFY_BITS(c, 115); + VERIFY_BITS(d, 62); + /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + r[1] = c & M; c >>= 52; VERIFY_BITS(r[1], 52); + VERIFY_BITS(c, 63); + /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ + + c += (__int128)a0 * b[2] + + (__int128)a1 * b[1] + + (__int128)a2 * b[0]; + VERIFY_BITS(c, 114); + /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ + d += (__int128)a3 * b[4] + + (__int128)a4 * b[3]; + VERIFY_BITS(d, 114); + /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + c += (d & M) * R; d >>= 52; + VERIFY_BITS(c, 115); + VERIFY_BITS(d, 62); + /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ + /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ r[2] = c & M; c >>= 52; VERIFY_BITS(r[2], 52); diff --git a/src/field_gmp_impl.h b/src/field_gmp_impl.h index 4e59fe35..269d52be 100644 --- a/src/field_gmp_impl.h +++ b/src/field_gmp_impl.h @@ -151,7 +151,8 @@ static void secp256k1_fe_reduce(secp256k1_fe_t *r, mp_limb_t *tmp) { r->n[FIELD_LIMBS] = mpn_add(r->n, tmp, FIELD_LIMBS, q, 1+(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS); } -static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) { +static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) { + VERIFY_CHECK(r != b); secp256k1_fe_t ac = *a; secp256k1_fe_t bc = *b; secp256k1_fe_normalize(&ac); diff --git a/src/field_impl.h b/src/field_impl.h index f2664873..4aac6ebc 100644 --- a/src/field_impl.h +++ b/src/field_impl.h @@ -197,7 +197,7 @@ static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) { for (int j=0; j<3; j++) secp256k1_fe_sqr(&t1, &t1); secp256k1_fe_mul(&t1, &t1, &x2); for (int j=0; j<2; j++) secp256k1_fe_sqr(&t1, &t1); - secp256k1_fe_mul(r, &t1, a); + secp256k1_fe_mul(r, a, &t1); } static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) { diff --git a/src/group_impl.h b/src/group_impl.h index 8d535bc4..d11ca4e3 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -217,7 +217,7 @@ static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t * } secp256k1_fe_t t1,t2,t3,t4; - secp256k1_fe_mul(&r->z, &a->y, &a->z); + secp256k1_fe_mul(&r->z, &a->z, &a->y); secp256k1_fe_mul_int(&r->z, 2); /* Z' = 2*Y*Z (2) */ secp256k1_fe_sqr(&t1, &a->x); secp256k1_fe_mul_int(&t1, 3); /* T1 = 3*X^2 (3) */ @@ -226,7 +226,7 @@ static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t * secp256k1_fe_mul_int(&t3, 2); /* T3 = 2*Y^2 (2) */ secp256k1_fe_sqr(&t4, &t3); secp256k1_fe_mul_int(&t4, 2); /* T4 = 8*Y^4 (2) */ - secp256k1_fe_mul(&t3, &a->x, &t3); /* T3 = 2*X*Y^2 (1) */ + secp256k1_fe_mul(&t3, &t3, &a->x); /* T3 = 2*X*Y^2 (1) */ r->x = t3; secp256k1_fe_mul_int(&r->x, 4); /* X' = 8*X*Y^2 (4) */ secp256k1_fe_negate(&r->x, &r->x, 4); /* X' = -8*X*Y^2 (5) */ diff --git a/src/util.h b/src/util.h index 96b47057..08b23a9d 100644 --- a/src/util.h +++ b/src/util.h @@ -61,4 +61,21 @@ #define VERIFY_CHECK(cond) do { (void)(cond); } while(0) #endif +/* Macro for restrict, when available and not in a VERIFY build. */ +#if defined(SECP256K1_BUILD) && defined(VERIFY) +# define SECP256K1_RESTRICT +#else +# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) +# if SECP256K1_GNUC_PREREQ(3,0) +# define SECP256K1_RESTRICT __restrict__ +# elif (defined(_MSC_VER) && _MSC_VER >= 1400) +# define SECP256K1_RESTRICT __restrict +# else +# define SECP256K1_RESTRICT +# endif +# else +# define SECP256K1_RESTRICT restrict +# endif +#endif + #endif