Merge ElementsProject/secp256k1-zkp#232: Backports from libsecp256k1 v0.3.2

39407c3f5999aa10e1470bc9eae8f63800a63e51 Mark stack variables as early clobber for technical correctness (Pieter Wuille) 56a5d41429a4daed2b02b59c45022044c3575955 Bugfix: mark outputs as early clobber in scalar x86_64 asm (Pieter Wuille) c8c0f55a1132c0fc9a726f0a4a1417288163b904 ct: Be cautious and use volatile trick in more "conditional" paths (Tim Ruffing) 3e9428996698257aa2a3b4e974f574b8479f1261 ct: Use volatile trick in scalar_cond_negate (Tim Ruffing) Pull request description: ACKs for top commit: real-or-random: ACK 39407c3f5999aa10e1470bc9eae8f63800a63e51 I also verified that the ct time tests pass on GCC 13.1.1 and Clang 15.0.7. Tree-SHA512: b7e695527ea58cc7b94a5f2fff6473b6779a469bc5c38baf92624b655cbdf303fbd204e6c1395fa02b98db3bc47bab32afe64bae4ab4fab18da856b621aab070
2023-05-14 20:19:44 +02:00 · 2023-05-14 20:19:44 +02:00 · ff33018fe7
commit ff33018fe7
parent edcba04c28 39407c3f59
6 changed files with 47 additions and 37 deletions
--- a/src/ecmult_const_impl.h
+++ b/src/ecmult_const_impl.h
@ -29,7 +29,7 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
 #define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \
    int m = 0; \
    /* Extract the sign-bit for a constant time absolute-value. */ \
-    int mask = (n) >> (sizeof(n) * CHAR_BIT - 1); \
+    int volatile mask = (n) >> (sizeof(n) * CHAR_BIT - 1); \
    int abs_n = ((n) + mask) ^ mask; \
    int idx_n = abs_n >> 1; \
    secp256k1_fe neg_y; \
--- a/src/field_5x52_asm_impl.h
+++ b/src/field_5x52_asm_impl.h
@ -278,7 +278,7 @@ __asm__ __volatile__(
    "addq %%rsi,%%r8\n"
    /* r[4] = c */
    "movq %%r8,32(%%rdi)\n"
-: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
+: "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
 : "b"(b), "D"(r)
 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
 );
@ -493,7 +493,7 @@ __asm__ __volatile__(
    "addq %%rsi,%%r8\n"
    /* r[4] = c */
    "movq %%r8,32(%%rdi)\n"
-: "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
+: "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
 : "D"(r)
 : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
 );
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@ -64,7 +64,7 @@ static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int3
    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
    int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
            r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
-    int32_t cond_add, cond_negate;
+    volatile int32_t cond_add, cond_negate;

 #ifdef VERIFY
    /* Verify that all limbs are in range (-2^30,2^30). */
@ -186,7 +186,8 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
     * being inside [-2^31,2^31) means that casting to signed works correctly.
     */
    uint32_t u = 1, v = 0, q = 0, r = 1;
-    uint32_t c1, c2, f = f0, g = g0, x, y, z;
+    volatile uint32_t c1, c2;
+    uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
    int i;

    for (i = 0; i < 30; ++i) {
@ -195,23 +196,25 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
        VERIFY_CHECK((q * f0 + r * g0) == g << i);
        /* Compute conditional masks for (zeta < 0) and for (g & 1). */
        c1 = zeta >> 31;
-        c2 = -(g & 1);
+        mask1 = c1;
+        c2 = g & 1;
+        mask2 = -c2;
        /* Compute x,y,z, conditionally negated versions of f,u,v. */
-        x = (f ^ c1) - c1;
-        y = (u ^ c1) - c1;
-        z = (v ^ c1) - c1;
+        x = (f ^ mask1) - mask1;
+        y = (u ^ mask1) - mask1;
+        z = (v ^ mask1) - mask1;
        /* Conditionally add x,y,z to g,q,r. */
-        g += x & c2;
-        q += y & c2;
-        r += z & c2;
-        /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */
-        c1 &= c2;
+        g += x & mask2;
+        q += y & mask2;
+        r += z & mask2;
+        /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
+        mask1 &= mask2;
        /* Conditionally change zeta into -zeta-2 or zeta-1. */
-        zeta = (zeta ^ c1) - 1;
+        zeta = (zeta ^ mask1) - 1;
        /* Conditionally add g,q,r to f,u,v. */
-        f += g & c1;
-        u += q & c1;
-        v += r & c1;
+        f += g & mask1;
+        u += q & mask1;
+        v += r & mask1;
        /* Shifts */
        g >>= 1;
        u <<= 1;
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@ -69,7 +69,7 @@ static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, i
 static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int64_t sign, const secp256k1_modinv64_modinfo *modinfo) {
    const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
    int64_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4];
-    int64_t cond_add, cond_negate;
+    volatile int64_t cond_add, cond_negate;

 #ifdef VERIFY
    /* Verify that all limbs are in range (-2^62,2^62). */
@ -165,7 +165,8 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_
     * being inside [-2^63,2^63) means that casting to signed works correctly.
     */
    uint64_t u = 8, v = 0, q = 0, r = 8;
-    uint64_t c1, c2, f = f0, g = g0, x, y, z;
+    volatile uint64_t c1, c2;
+    uint64_t mask1, mask2, f = f0, g = g0, x, y, z;
    int i;

    for (i = 3; i < 62; ++i) {
@ -174,23 +175,25 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_
        VERIFY_CHECK((q * f0 + r * g0) == g << i);
        /* Compute conditional masks for (zeta < 0) and for (g & 1). */
        c1 = zeta >> 63;
-        c2 = -(g & 1);
+        mask1 = c1;
+        c2 = g & 1;
+        mask2 = -c2;
        /* Compute x,y,z, conditionally negated versions of f,u,v. */
-        x = (f ^ c1) - c1;
-        y = (u ^ c1) - c1;
-        z = (v ^ c1) - c1;
+        x = (f ^ mask1) - mask1;
+        y = (u ^ mask1) - mask1;
+        z = (v ^ mask1) - mask1;
        /* Conditionally add x,y,z to g,q,r. */
-        g += x & c2;
-        q += y & c2;
-        r += z & c2;
+        g += x & mask2;
+        q += y & mask2;
+        r += z & mask2;
        /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */
-        c1 &= c2;
+        mask1 &= mask2;
        /* Conditionally change zeta into -zeta-2 or zeta-1. */
-        zeta = (zeta ^ c1) - 1;
+        zeta = (zeta ^ mask1) - 1;
        /* Conditionally add g,q,r to f,u,v. */
-        f += g & c1;
-        u += q & c1;
-        v += r & c1;
+        f += g & mask1;
+        u += q & mask1;
+        v += r & mask1;
        /* Shifts */
        g >>= 1;
        u <<= 1;
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@ -110,8 +110,9 @@ static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a,

 static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
    uint128_t t;
+    volatile int vflag = flag;
    VERIFY_CHECK(bit < 256);
-    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 6) > 3 makes this a noop */
+    bit += ((uint32_t) vflag - 1) & 0x100;  /* forcing (bit >> 6) > 3 makes this a noop */
    t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
    t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
@ -180,7 +181,8 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar *a) {
 static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
    /* If we are flag = 0, mask = 00...00 and this is a no-op;
     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
-    uint64_t mask = !flag - 1;
+    volatile int vflag = flag;
+    uint64_t mask = -vflag;
    uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1;
    uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
    r->d[0] = t & nonzero; t >>= 64;
@ -387,7 +389,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
    "movq %%r10, %q5\n"
    /* extract m6 */
    "movq %%r8, %q6\n"
-    : "=g"(m0), "=g"(m1), "=g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
+    : "=&g"(m0), "=&g"(m1), "=&g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
    : "S"(l), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
    : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc");

--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@ -153,8 +153,9 @@ static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a,

 static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
    uint64_t t;
+    volatile int vflag = flag;
    VERIFY_CHECK(bit < 256);
-    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 5) > 7 makes this a noop */
+    bit += ((uint32_t) vflag - 1) & 0x100;  /* forcing (bit >> 5) > 7 makes this a noop */
    t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
    t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
@ -253,7 +254,8 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar *a) {
 static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
    /* If we are flag = 0, mask = 00...00 and this is a no-op;
     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
-    uint32_t mask = !flag - 1;
+    volatile int vflag = flag;
+    uint32_t mask = -vflag;
    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(r) == 0);
    uint64_t t = (uint64_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
    r->d[0] = t & nonzero; t >>= 32;