Merge commits '44916ae9 86e3b38a ddf2b291 6138d73b e40fd277 ' into temp-merge-1156

This commit is contained in:
Jonas Nick 2023-07-17 14:01:52 +00:00
commit e996d076da
No known key found for this signature in database
GPG Key ID: 4861DBF262123605
21 changed files with 1225 additions and 342 deletions

View File

@ -74,6 +74,7 @@ task:
- env: {WIDEMUL: int64, RECOVERY: yes} - env: {WIDEMUL: int64, RECOVERY: yes}
- env: {WIDEMUL: int64, ECDH: yes, SCHNORRSIG: yes, EXPERIMENTAL: yes, ECDSA_S2C: yes, RANGEPROOF: yes, WHITELIST: yes, GENERATOR: yes, MUSIG: yes, ECDSAADAPTOR: yes, BPPP: yes} - env: {WIDEMUL: int64, ECDH: yes, SCHNORRSIG: yes, EXPERIMENTAL: yes, ECDSA_S2C: yes, RANGEPROOF: yes, WHITELIST: yes, GENERATOR: yes, MUSIG: yes, ECDSAADAPTOR: yes, BPPP: yes}
- env: {WIDEMUL: int128} - env: {WIDEMUL: int128}
- env: {WIDEMUL: int128_struct}
- env: {WIDEMUL: int128, RECOVERY: yes, SCHNORRSIG: yes} - env: {WIDEMUL: int128, RECOVERY: yes, SCHNORRSIG: yes}
- env: {WIDEMUL: int128, ECDH: yes, SCHNORRSIG: yes, EXPERIMENTAL: yes, ECDSA_S2C: yes, RANGEPROOF: yes, WHITELIST: yes, GENERATOR: yes, MUSIG: yes, ECDSAADAPTOR: yes, BPPP: yes} - env: {WIDEMUL: int128, ECDH: yes, SCHNORRSIG: yes, EXPERIMENTAL: yes, ECDSA_S2C: yes, RANGEPROOF: yes, WHITELIST: yes, GENERATOR: yes, MUSIG: yes, ECDSAADAPTOR: yes, BPPP: yes}
- env: {WIDEMUL: int128, ASM: x86_64} - env: {WIDEMUL: int128, ASM: x86_64}
@ -268,20 +269,26 @@ task:
ECDSAADAPTOR: yes ECDSAADAPTOR: yes
BPPP: yes BPPP: yes
CTIMETEST: no CTIMETEST: no
# Use a MinGW-w64 host to tell ./configure we're building for Windows.
# This will detect some MinGW-w64 tools but then make will need only
# the MSVC tools CC, AR and NM as specified below.
HOST: x86_64-w64-mingw32
CC: /opt/msvc/bin/x64/cl
AR: /opt/msvc/bin/x64/lib
NM: /opt/msvc/bin/x64/dumpbin -symbols -headers
# Set non-essential options that affect the CLI messages here. # Set non-essential options that affect the CLI messages here.
# (They depend on the user's taste, so we don't want to set them automatically in configure.ac.) # (They depend on the user's taste, so we don't want to set them automatically in configure.ac.)
CFLAGS: -nologo -diagnostics:caret CFLAGS: -nologo -diagnostics:caret
LDFLAGS: -XCClinker -nologo -XCClinker -diagnostics:caret LDFLAGS: -XCClinker -nologo -XCClinker -diagnostics:caret
# Use a MinGW-w64 host to tell ./configure we're building for Windows.
# This will detect some MinGW-w64 tools but then make will need only
# the MSVC tools CC, AR and NM as specified below.
matrix: matrix:
- name: "x86_64 (MSVC): Windows (Debian stable, Wine)" - name: "x86_64 (MSVC): Windows (Debian stable, Wine)"
- name: "x86_64 (MSVC): Windows (Debian stable, Wine, int128_struct)"
env: env:
HOST: x86_64-w64-mingw32 WIDEMUL: int128_struct
CC: /opt/msvc/bin/x64/cl - name: "x86_64 (MSVC): Windows (Debian stable, Wine, int128_struct with __(u)mulh)"
AR: /opt/msvc/bin/x64/lib env:
NM: /opt/msvc/bin/x64/dumpbin -symbols -headers WIDEMUL: int128_struct
CPPFLAGS: -DSECP256K1_MSVC_MULH_TEST_OVERRIDE
- name: "i686 (MSVC): Windows (Debian stable, Wine)" - name: "i686 (MSVC): Windows (Debian stable, Wine)"
env: env:
HOST: i686-w64-mingw32 HOST: i686-w64-mingw32
@ -346,6 +353,40 @@ task:
- ./ci/cirrus.sh - ./ci/cirrus.sh
<< : *CAT_LOGS << : *CAT_LOGS
# Memory sanitizers
task:
<< : *LINUX_CONTAINER
name: "MSan"
env:
ECDH: yes
RECOVERY: yes
SCHNORRSIG: yes
EXPERIMENTAL: yes
ECDSA_S2C: yes
GENERATOR: yes
RANGEPROOF: yes
WHITELIST: yes
MUSIG: yes
ECDSAADAPTOR: yes
BPPP: yes
CTIMETEST: no
CC: clang
SECP256K1_TEST_ITERS: 32
ASM: no
container:
memory: 2G
matrix:
- env:
CFLAGS: "-fsanitize=memory -g"
- env:
ECMULTGENPRECISION: 2
ECMULTWINDOW: 2
CFLAGS: "-fsanitize=memory -g -O3"
<< : *MERGE_BASE
test_script:
- ./ci/cirrus.sh
<< : *CAT_LOGS
task: task:
name: "C++ -fpermissive (entire project)" name: "C++ -fpermissive (entire project)"
<< : *LINUX_CONTAINER << : *LINUX_CONTAINER

View File

@ -50,6 +50,12 @@ noinst_HEADERS += src/precomputed_ecmult.h
noinst_HEADERS += src/precomputed_ecmult_gen.h noinst_HEADERS += src/precomputed_ecmult_gen.h
noinst_HEADERS += src/assumptions.h noinst_HEADERS += src/assumptions.h
noinst_HEADERS += src/util.h noinst_HEADERS += src/util.h
noinst_HEADERS += src/int128.h
noinst_HEADERS += src/int128_impl.h
noinst_HEADERS += src/int128_native.h
noinst_HEADERS += src/int128_native_impl.h
noinst_HEADERS += src/int128_struct.h
noinst_HEADERS += src/int128_struct_impl.h
noinst_HEADERS += src/scratch.h noinst_HEADERS += src/scratch.h
noinst_HEADERS += src/scratch_impl.h noinst_HEADERS += src/scratch_impl.h
noinst_HEADERS += src/selftest.h noinst_HEADERS += src/selftest.h

View File

@ -5,6 +5,27 @@ set -x
export LC_ALL=C export LC_ALL=C
# Print relevant CI environment to allow reproducing the job outside of CI.
print_environment() {
# Turn off -x because it messes up the output
set +x
# There are many ways to print variable names and their content. This one
# does not rely on bash.
for i in WERROR_CFLAGS MAKEFLAGS BUILD \
ECMULTWINDOW ECMULTGENPRECISION ASM WIDEMUL WITH_VALGRIND EXTRAFLAGS \
EXPERIMENTAL ECDH RECOVERY SCHNORRSIG \
ECDSA_S2C GENERATOR RANGEPROOF WHITELIST MUSIG ECDSAADAPTOR BPPP \
SECP256K1_TEST_ITERS BENCH SECP256K1_BENCH_ITERS CTIMETEST\
EXAMPLES \
WRAPPER_CMD CC AR NM HOST
do
eval 'printf "%s %s " "$i=\"${'"$i"'}\""'
done
echo "$0"
set -x
}
print_environment
# Start persistent wineserver if necessary. # Start persistent wineserver if necessary.
# This speeds up jobs with many invocations of wine (e.g., ./configure with MSVC) tremendously. # This speeds up jobs with many invocations of wine (e.g., ./configure with MSVC) tremendously.
case "$WRAPPER_CMD" in case "$WRAPPER_CMD" in

View File

@ -220,7 +220,11 @@ AC_ARG_ENABLE(reduced_surjection_proof_size,
[SECP_SET_DEFAULT([use_reduced_surjection_proof_size], [no], [no])]) [SECP_SET_DEFAULT([use_reduced_surjection_proof_size], [no], [no])])
# Test-only override of the (autodetected by the C code) "widemul" setting. # Test-only override of the (autodetected by the C code) "widemul" setting.
# Legal values are int64 (for [u]int64_t), int128 (for [unsigned] __int128), and auto (the default). # Legal values are:
# * int64 (for [u]int64_t),
# * int128 (for [unsigned] __int128),
# * int128_struct (for int128 implemented as a structure),
# * and auto (the default).
AC_ARG_WITH([test-override-wide-multiply], [] ,[set_widemul=$withval], [set_widemul=auto]) AC_ARG_WITH([test-override-wide-multiply], [] ,[set_widemul=$withval], [set_widemul=auto])
AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm|no|auto], AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm|no|auto],
@ -342,6 +346,9 @@ fi
# Select wide multiplication implementation # Select wide multiplication implementation
case $set_widemul in case $set_widemul in
int128_struct)
AC_DEFINE(USE_FORCE_WIDEMUL_INT128_STRUCT, 1, [Define this symbol to force the use of the structure for simulating (unsigned) int128 based wide multiplication])
;;
int128) int128)
AC_DEFINE(USE_FORCE_WIDEMUL_INT128, 1, [Define this symbol to force the use of the (unsigned) __int128 based wide multiplication implementation]) AC_DEFINE(USE_FORCE_WIDEMUL_INT128, 1, [Define this symbol to force the use of the (unsigned) __int128 based wide multiplication implementation])
;; ;;

View File

@ -10,6 +10,9 @@
#include <limits.h> #include <limits.h>
#include "util.h" #include "util.h"
#if defined(SECP256K1_INT128_NATIVE)
#include "int128_native.h"
#endif
/* This library, like most software, relies on a number of compiler implementation defined (but not undefined) /* This library, like most software, relies on a number of compiler implementation defined (but not undefined)
behaviours. Although the behaviours we require are essentially universal we test them specifically here to behaviours. Although the behaviours we require are essentially universal we test them specifically here to
@ -55,7 +58,7 @@ struct secp256k1_assumption_checker {
/* To int64_t. */ /* To int64_t. */
((int64_t)(uint64_t)0xB123C456D789E012ULL == (int64_t)-(int64_t)0x4EDC3BA928761FEEULL) && ((int64_t)(uint64_t)0xB123C456D789E012ULL == (int64_t)-(int64_t)0x4EDC3BA928761FEEULL) &&
#if defined(SECP256K1_WIDEMUL_INT128) #if defined(SECP256K1_INT128_NATIVE)
((int64_t)(((uint128_t)0xA1234567B8901234ULL << 64) + 0xC5678901D2345678ULL) == (int64_t)-(int64_t)0x3A9876FE2DCBA988ULL) && ((int64_t)(((uint128_t)0xA1234567B8901234ULL << 64) + 0xC5678901D2345678ULL) == (int64_t)-(int64_t)0x3A9876FE2DCBA988ULL) &&
(((int64_t)(int128_t)(((uint128_t)0xB1C2D3E4F5A6B7C8ULL << 64) + 0xD9E0F1A2B3C4D5E6ULL)) == (int64_t)(uint64_t)0xD9E0F1A2B3C4D5E6ULL) && (((int64_t)(int128_t)(((uint128_t)0xB1C2D3E4F5A6B7C8ULL << 64) + 0xD9E0F1A2B3C4D5E6ULL)) == (int64_t)(uint64_t)0xD9E0F1A2B3C4D5E6ULL) &&
(((int64_t)(int128_t)(((uint128_t)0xABCDEF0123456789ULL << 64) + 0x0123456789ABCDEFULL)) == (int64_t)(uint64_t)0x0123456789ABCDEFULL) && (((int64_t)(int128_t)(((uint128_t)0xABCDEF0123456789ULL << 64) + 0x0123456789ABCDEFULL)) == (int64_t)(uint64_t)0x0123456789ABCDEFULL) &&
@ -71,7 +74,7 @@ struct secp256k1_assumption_checker {
((((int16_t)0xE9AC) >> 4) == (int16_t)(uint16_t)0xFE9A) && ((((int16_t)0xE9AC) >> 4) == (int16_t)(uint16_t)0xFE9A) &&
((((int32_t)0x937C918A) >> 9) == (int32_t)(uint32_t)0xFFC9BE48) && ((((int32_t)0x937C918A) >> 9) == (int32_t)(uint32_t)0xFFC9BE48) &&
((((int64_t)0xA8B72231DF9CF4B9ULL) >> 19) == (int64_t)(uint64_t)0xFFFFF516E4463BF3ULL) && ((((int64_t)0xA8B72231DF9CF4B9ULL) >> 19) == (int64_t)(uint64_t)0xFFFFF516E4463BF3ULL) &&
#if defined(SECP256K1_WIDEMUL_INT128) #if defined(SECP256K1_INT128_NATIVE)
((((int128_t)(((uint128_t)0xCD833A65684A0DBCULL << 64) + 0xB349312F71EA7637ULL)) >> 39) == (int128_t)(((uint128_t)0xFFFFFFFFFF9B0674ULL << 64) + 0xCAD0941B79669262ULL)) && ((((int128_t)(((uint128_t)0xCD833A65684A0DBCULL << 64) + 0xB349312F71EA7637ULL)) >> 39) == (int128_t)(((uint128_t)0xFFFFFFFFFF9B0674ULL << 64) + 0xCAD0941B79669262ULL)) &&
#endif #endif
1) * 2 - 1]; 1) * 2 - 1];

View File

@ -11,6 +11,7 @@
#include "util.h" #include "util.h"
#include "bench.h" #include "bench.h"
#include "hash_impl.h" #include "hash_impl.h"
#include "int128_impl.h"
#include "scalar_impl.h" #include "scalar_impl.h"
#include "testrand_impl.h" #include "testrand_impl.h"

View File

@ -200,9 +200,15 @@ static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a,
bit += now; bit += now;
} }
#ifdef VERIFY #ifdef VERIFY
CHECK(carry == 0); {
while (bit < 256) { int verify_bit = bit;
CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0);
VERIFY_CHECK(carry == 0);
while (verify_bit < 256) {
VERIFY_CHECK(secp256k1_scalar_get_bits(&s, verify_bit, 1) == 0);
verify_bit++;
}
} }
#endif #endif
return last_set_bit + 1; return last_set_bit + 1;

View File

@ -9,14 +9,18 @@
#include <stdint.h> #include <stdint.h>
#include "int128.h"
#ifdef VERIFY #ifdef VERIFY
#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
#define VERIFY_BITS_128(x, n) VERIFY_CHECK(secp256k1_u128_check_bits((x), (n)))
#else #else
#define VERIFY_BITS(x, n) do { } while(0) #define VERIFY_BITS(x, n) do { } while(0)
#define VERIFY_BITS_128(x, n) do { } while(0)
#endif #endif
SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
uint128_t c, d; secp256k1_uint128 c, d;
uint64_t t3, t4, tx, u0; uint64_t t3, t4, tx, u0;
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
@ -40,121 +44,119 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
* Note that [x 0 0 0 0 0] = [x*R]. * Note that [x 0 0 0 0 0] = [x*R].
*/ */
d = (uint128_t)a0 * b[3] secp256k1_u128_mul(&d, a0, b[3]);
+ (uint128_t)a1 * b[2] secp256k1_u128_accum_mul(&d, a1, b[2]);
+ (uint128_t)a2 * b[1] secp256k1_u128_accum_mul(&d, a2, b[1]);
+ (uint128_t)a3 * b[0]; secp256k1_u128_accum_mul(&d, a3, b[0]);
VERIFY_BITS(d, 114); VERIFY_BITS_128(&d, 114);
/* [d 0 0 0] = [p3 0 0 0] */ /* [d 0 0 0] = [p3 0 0 0] */
c = (uint128_t)a4 * b[4]; secp256k1_u128_mul(&c, a4, b[4]);
VERIFY_BITS(c, 112); VERIFY_BITS_128(&c, 112);
/* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
d += (uint128_t)R * (uint64_t)c; c >>= 64; secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64);
VERIFY_BITS(d, 115); VERIFY_BITS_128(&d, 115);
VERIFY_BITS(c, 48); VERIFY_BITS_128(&c, 48);
/* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
t3 = d & M; d >>= 52; t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(t3, 52); VERIFY_BITS(t3, 52);
VERIFY_BITS(d, 63); VERIFY_BITS_128(&d, 63);
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
d += (uint128_t)a0 * b[4] secp256k1_u128_accum_mul(&d, a0, b[4]);
+ (uint128_t)a1 * b[3] secp256k1_u128_accum_mul(&d, a1, b[3]);
+ (uint128_t)a2 * b[2] secp256k1_u128_accum_mul(&d, a2, b[2]);
+ (uint128_t)a3 * b[1] secp256k1_u128_accum_mul(&d, a3, b[1]);
+ (uint128_t)a4 * b[0]; secp256k1_u128_accum_mul(&d, a4, b[0]);
VERIFY_BITS(d, 115); VERIFY_BITS_128(&d, 115);
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
d += (uint128_t)(R << 12) * (uint64_t)c; secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c));
VERIFY_BITS(d, 116); VERIFY_BITS_128(&d, 116);
/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
t4 = d & M; d >>= 52; t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(t4, 52); VERIFY_BITS(t4, 52);
VERIFY_BITS(d, 64); VERIFY_BITS_128(&d, 64);
/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
tx = (t4 >> 48); t4 &= (M >> 4); tx = (t4 >> 48); t4 &= (M >> 4);
VERIFY_BITS(tx, 4); VERIFY_BITS(tx, 4);
VERIFY_BITS(t4, 48); VERIFY_BITS(t4, 48);
/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
c = (uint128_t)a0 * b[0]; secp256k1_u128_mul(&c, a0, b[0]);
VERIFY_BITS(c, 112); VERIFY_BITS_128(&c, 112);
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
d += (uint128_t)a1 * b[4] secp256k1_u128_accum_mul(&d, a1, b[4]);
+ (uint128_t)a2 * b[3] secp256k1_u128_accum_mul(&d, a2, b[3]);
+ (uint128_t)a3 * b[2] secp256k1_u128_accum_mul(&d, a3, b[2]);
+ (uint128_t)a4 * b[1]; secp256k1_u128_accum_mul(&d, a4, b[1]);
VERIFY_BITS(d, 115); VERIFY_BITS_128(&d, 115);
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
u0 = d & M; d >>= 52; u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(u0, 52); VERIFY_BITS(u0, 52);
VERIFY_BITS(d, 63); VERIFY_BITS_128(&d, 63);
/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
/* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
u0 = (u0 << 4) | tx; u0 = (u0 << 4) | tx;
VERIFY_BITS(u0, 56); VERIFY_BITS(u0, 56);
/* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
c += (uint128_t)u0 * (R >> 4); secp256k1_u128_accum_mul(&c, u0, R >> 4);
VERIFY_BITS(c, 115); VERIFY_BITS_128(&c, 115);
/* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
r[0] = c & M; c >>= 52; r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[0], 52); VERIFY_BITS(r[0], 52);
VERIFY_BITS(c, 61); VERIFY_BITS_128(&c, 61);
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
c += (uint128_t)a0 * b[1] secp256k1_u128_accum_mul(&c, a0, b[1]);
+ (uint128_t)a1 * b[0]; secp256k1_u128_accum_mul(&c, a1, b[0]);
VERIFY_BITS(c, 114); VERIFY_BITS_128(&c, 114);
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
d += (uint128_t)a2 * b[4] secp256k1_u128_accum_mul(&d, a2, b[4]);
+ (uint128_t)a3 * b[3] secp256k1_u128_accum_mul(&d, a3, b[3]);
+ (uint128_t)a4 * b[2]; secp256k1_u128_accum_mul(&d, a4, b[2]);
VERIFY_BITS(d, 114); VERIFY_BITS_128(&d, 114);
/* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
c += (d & M) * R; d >>= 52; secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(c, 115); VERIFY_BITS_128(&c, 115);
VERIFY_BITS(d, 62); VERIFY_BITS_128(&d, 62);
/* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
r[1] = c & M; c >>= 52; r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[1], 52); VERIFY_BITS(r[1], 52);
VERIFY_BITS(c, 63); VERIFY_BITS_128(&c, 63);
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
c += (uint128_t)a0 * b[2] secp256k1_u128_accum_mul(&c, a0, b[2]);
+ (uint128_t)a1 * b[1] secp256k1_u128_accum_mul(&c, a1, b[1]);
+ (uint128_t)a2 * b[0]; secp256k1_u128_accum_mul(&c, a2, b[0]);
VERIFY_BITS(c, 114); VERIFY_BITS_128(&c, 114);
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
d += (uint128_t)a3 * b[4] secp256k1_u128_accum_mul(&d, a3, b[4]);
+ (uint128_t)a4 * b[3]; secp256k1_u128_accum_mul(&d, a4, b[3]);
VERIFY_BITS(d, 114); VERIFY_BITS_128(&d, 114);
/* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
c += (uint128_t)R * (uint64_t)d; d >>= 64; secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64);
VERIFY_BITS(c, 115); VERIFY_BITS_128(&c, 115);
VERIFY_BITS(d, 50); VERIFY_BITS_128(&d, 50);
/* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
r[2] = c & M; c >>= 52; r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[2], 52); VERIFY_BITS(r[2], 52);
VERIFY_BITS(c, 63); VERIFY_BITS_128(&c, 63);
/* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
c += (uint128_t)(R << 12) * (uint64_t)d + t3; secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d));
VERIFY_BITS(c, 100); secp256k1_u128_accum_u64(&c, t3);
VERIFY_BITS_128(&c, 100);
/* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
r[3] = c & M; c >>= 52; r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[3], 52); VERIFY_BITS(r[3], 52);
VERIFY_BITS(c, 48); VERIFY_BITS_128(&c, 48);
/* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
c += t4; r[4] = secp256k1_u128_to_u64(&c) + t4;
VERIFY_BITS(c, 49);
/* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
r[4] = c;
VERIFY_BITS(r[4], 49); VERIFY_BITS(r[4], 49);
/* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
} }
SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
uint128_t c, d; secp256k1_uint128 c, d;
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
int64_t t3, t4, tx, u0; int64_t t3, t4, tx, u0;
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
@ -170,107 +172,105 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
* Note that [x 0 0 0 0 0] = [x*R]. * Note that [x 0 0 0 0 0] = [x*R].
*/ */
d = (uint128_t)(a0*2) * a3 secp256k1_u128_mul(&d, a0*2, a3);
+ (uint128_t)(a1*2) * a2; secp256k1_u128_accum_mul(&d, a1*2, a2);
VERIFY_BITS(d, 114); VERIFY_BITS_128(&d, 114);
/* [d 0 0 0] = [p3 0 0 0] */ /* [d 0 0 0] = [p3 0 0 0] */
c = (uint128_t)a4 * a4; secp256k1_u128_mul(&c, a4, a4);
VERIFY_BITS(c, 112); VERIFY_BITS_128(&c, 112);
/* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
d += (uint128_t)R * (uint64_t)c; c >>= 64; secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64);
VERIFY_BITS(d, 115); VERIFY_BITS_128(&d, 115);
VERIFY_BITS(c, 48); VERIFY_BITS_128(&c, 48);
/* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
t3 = d & M; d >>= 52; t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(t3, 52); VERIFY_BITS(t3, 52);
VERIFY_BITS(d, 63); VERIFY_BITS_128(&d, 63);
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
a4 *= 2; a4 *= 2;
d += (uint128_t)a0 * a4 secp256k1_u128_accum_mul(&d, a0, a4);
+ (uint128_t)(a1*2) * a3 secp256k1_u128_accum_mul(&d, a1*2, a3);
+ (uint128_t)a2 * a2; secp256k1_u128_accum_mul(&d, a2, a2);
VERIFY_BITS(d, 115); VERIFY_BITS_128(&d, 115);
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
d += (uint128_t)(R << 12) * (uint64_t)c; secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c));
VERIFY_BITS(d, 116); VERIFY_BITS_128(&d, 116);
/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
t4 = d & M; d >>= 52; t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(t4, 52); VERIFY_BITS(t4, 52);
VERIFY_BITS(d, 64); VERIFY_BITS_128(&d, 64);
/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
tx = (t4 >> 48); t4 &= (M >> 4); tx = (t4 >> 48); t4 &= (M >> 4);
VERIFY_BITS(tx, 4); VERIFY_BITS(tx, 4);
VERIFY_BITS(t4, 48); VERIFY_BITS(t4, 48);
/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
c = (uint128_t)a0 * a0; secp256k1_u128_mul(&c, a0, a0);
VERIFY_BITS(c, 112); VERIFY_BITS_128(&c, 112);
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
d += (uint128_t)a1 * a4 secp256k1_u128_accum_mul(&d, a1, a4);
+ (uint128_t)(a2*2) * a3; secp256k1_u128_accum_mul(&d, a2*2, a3);
VERIFY_BITS(d, 114); VERIFY_BITS_128(&d, 114);
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
u0 = d & M; d >>= 52; u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(u0, 52); VERIFY_BITS(u0, 52);
VERIFY_BITS(d, 62); VERIFY_BITS_128(&d, 62);
/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
/* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
u0 = (u0 << 4) | tx; u0 = (u0 << 4) | tx;
VERIFY_BITS(u0, 56); VERIFY_BITS(u0, 56);
/* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
c += (uint128_t)u0 * (R >> 4); secp256k1_u128_accum_mul(&c, u0, R >> 4);
VERIFY_BITS(c, 113); VERIFY_BITS_128(&c, 113);
/* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
r[0] = c & M; c >>= 52; r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[0], 52); VERIFY_BITS(r[0], 52);
VERIFY_BITS(c, 61); VERIFY_BITS_128(&c, 61);
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
a0 *= 2; a0 *= 2;
c += (uint128_t)a0 * a1; secp256k1_u128_accum_mul(&c, a0, a1);
VERIFY_BITS(c, 114); VERIFY_BITS_128(&c, 114);
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
d += (uint128_t)a2 * a4 secp256k1_u128_accum_mul(&d, a2, a4);
+ (uint128_t)a3 * a3; secp256k1_u128_accum_mul(&d, a3, a3);
VERIFY_BITS(d, 114); VERIFY_BITS_128(&d, 114);
/* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
c += (d & M) * R; d >>= 52; secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52);
VERIFY_BITS(c, 115); VERIFY_BITS_128(&c, 115);
VERIFY_BITS(d, 62); VERIFY_BITS_128(&d, 62);
/* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
r[1] = c & M; c >>= 52; r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[1], 52); VERIFY_BITS(r[1], 52);
VERIFY_BITS(c, 63); VERIFY_BITS_128(&c, 63);
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
c += (uint128_t)a0 * a2 secp256k1_u128_accum_mul(&c, a0, a2);
+ (uint128_t)a1 * a1; secp256k1_u128_accum_mul(&c, a1, a1);
VERIFY_BITS(c, 114); VERIFY_BITS_128(&c, 114);
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
d += (uint128_t)a3 * a4; secp256k1_u128_accum_mul(&d, a3, a4);
VERIFY_BITS(d, 114); VERIFY_BITS_128(&d, 114);
/* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
c += (uint128_t)R * (uint64_t)d; d >>= 64; secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64);
VERIFY_BITS(c, 115); VERIFY_BITS_128(&c, 115);
VERIFY_BITS(d, 50); VERIFY_BITS_128(&d, 50);
/* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
r[2] = c & M; c >>= 52; r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[2], 52); VERIFY_BITS(r[2], 52);
VERIFY_BITS(c, 63); VERIFY_BITS_128(&c, 63);
/* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
c += (uint128_t)(R << 12) * (uint64_t)d + t3; secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d));
VERIFY_BITS(c, 100); secp256k1_u128_accum_u64(&c, t3);
VERIFY_BITS_128(&c, 100);
/* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
r[3] = c & M; c >>= 52; r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
VERIFY_BITS(r[3], 52); VERIFY_BITS(r[3], 52);
VERIFY_BITS(c, 48); VERIFY_BITS_128(&c, 48);
/* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
c += t4; r[4] = secp256k1_u128_to_u64(&c) + t4;
VERIFY_BITS(c, 49);
/* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
r[4] = c;
VERIFY_BITS(r[4], 49); VERIFY_BITS(r[4], 49);
/* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
} }

85
src/int128.h Normal file
View File

@ -0,0 +1,85 @@
#ifndef SECP256K1_INT128_H
#define SECP256K1_INT128_H
#include "util.h"
#if defined(SECP256K1_WIDEMUL_INT128)
# if defined(SECP256K1_INT128_NATIVE)
# include "int128_native.h"
# elif defined(SECP256K1_INT128_STRUCT)
# include "int128_struct.h"
# else
# error "Please select int128 implementation"
# endif
/* Construct an unsigned 128-bit value from a high and a low 64-bit value. */
static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo);
/* Multiply two unsigned 64-bit values a and b and write the result to r. */
static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b);
/* Multiply two unsigned 64-bit values a and b and add the result to r.
* The final result is taken modulo 2^128.
*/
static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b);
/* Add an unsigned 64-bit value a to r.
* The final result is taken modulo 2^128.
*/
static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a);
/* Unsigned (logical) right shift.
* Non-constant time in n.
*/
static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n);
/* Return the low 64-bits of a 128-bit value as an unsigned 64-bit value. */
static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a);
/* Return the high 64-bits of a 128-bit value as an unsigned 64-bit value. */
static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a);
/* Write an unsigned 64-bit value to r. */
static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a);
/* Tests if r is strictly less than to 2^n.
* n must be strictly less than 128.
*/
static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n);
/* Construct an signed 128-bit value from a high and a low 64-bit value. */
static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo);
/* Multiply two signed 64-bit values a and b and write the result to r. */
static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b);
/* Multiply two signed 64-bit values a and b and add the result to r.
* Overflow or underflow from the addition is undefined behaviour.
*/
static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b);
/* Compute a*d - b*c from signed 64-bit values and write the result to r. */
static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d);
/* Signed (arithmetic) right shift.
* Non-constant time in b.
*/
static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int b);
/* Return the low 64-bits of a 128-bit value interpreted as an signed 64-bit value. */
static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a);
/* Write a signed 64-bit value to r. */
static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a);
/* Compare two 128-bit values for equality. */
static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b);
/* Tests if r is equal to 2^n.
* n must be strictly less than 127.
*/
static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n);
#endif
#endif

18
src/int128_impl.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef SECP256K1_INT128_IMPL_H
#define SECP256K1_INT128_IMPL_H
#include "util.h"
#include "int128.h"
#if defined(SECP256K1_WIDEMUL_INT128)
# if defined(SECP256K1_INT128_NATIVE)
# include "int128_native_impl.h"
# elif defined(SECP256K1_INT128_STRUCT)
# include "int128_struct_impl.h"
# else
# error "Please select int128 implementation"
# endif
#endif
#endif

19
src/int128_native.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef SECP256K1_INT128_NATIVE_H
#define SECP256K1_INT128_NATIVE_H
#include <stdint.h>
#include "util.h"
#if !defined(UINT128_MAX) && defined(__SIZEOF_INT128__)
SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t;
SECP256K1_GNUC_EXT typedef __int128 int128_t;
# define UINT128_MAX ((uint128_t)(-1))
# define INT128_MAX ((int128_t)(UINT128_MAX >> 1))
# define INT128_MIN (-INT128_MAX - 1)
/* No (U)INT128_C macros because compilers providing __int128 do not support 128-bit literals. */
#endif
typedef uint128_t secp256k1_uint128;
typedef int128_t secp256k1_int128;
#endif

87
src/int128_native_impl.h Normal file
View File

@ -0,0 +1,87 @@
#ifndef SECP256K1_INT128_NATIVE_IMPL_H
#define SECP256K1_INT128_NATIVE_IMPL_H
#include "int128.h"
static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo) {
*r = (((uint128_t)hi) << 64) + lo;
}
static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
*r = (uint128_t)a * b;
}
static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
*r += (uint128_t)a * b;
}
static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) {
*r += a;
}
static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) {
VERIFY_CHECK(n < 128);
*r >>= n;
}
static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) {
return (uint64_t)(*a);
}
static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) {
return (uint64_t)(*a >> 64);
}
static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) {
*r = a;
}
static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) {
VERIFY_CHECK(n < 128);
return (*r >> n == 0);
}
static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo) {
*r = (((uint128_t)(uint64_t)hi) << 64) + lo;
}
static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
*r = (int128_t)a * b;
}
static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
int128_t ab = (int128_t)a * b;
VERIFY_CHECK(0 <= ab ? *r <= INT128_MAX - ab : INT128_MIN - ab <= *r);
*r += ab;
}
static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) {
int128_t ad = (int128_t)a * d;
int128_t bc = (int128_t)b * c;
VERIFY_CHECK(0 <= bc ? INT128_MIN + bc <= ad : ad <= INT128_MAX + bc);
*r = ad - bc;
}
static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) {
VERIFY_CHECK(n < 128);
*r >>= n;
}
static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) {
return *a;
}
static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) {
*r = a;
}
static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) {
return *a == *b;
}
static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) {
VERIFY_CHECK(n < 127);
return (*r == (int128_t)1 << n);
}
#endif

14
src/int128_struct.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef SECP256K1_INT128_STRUCT_H
#define SECP256K1_INT128_STRUCT_H
#include <stdint.h>
#include "util.h"
typedef struct {
uint64_t lo;
uint64_t hi;
} secp256k1_uint128;
typedef secp256k1_uint128 secp256k1_int128;
#endif

192
src/int128_struct_impl.h Normal file
View File

@ -0,0 +1,192 @@
#ifndef SECP256K1_INT128_STRUCT_IMPL_H
#define SECP256K1_INT128_STRUCT_IMPL_H
#include "int128.h"
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) /* MSVC */
# include <intrin.h>
# if defined(_M_ARM64) || defined(SECP256K1_MSVC_MULH_TEST_OVERRIDE)
/* On ARM64 MSVC, use __(u)mulh for the upper half of 64x64 multiplications.
(Define SECP256K1_MSVC_MULH_TEST_OVERRIDE to test this code path on X64,
which supports both __(u)mulh and _umul128.) */
# if defined(SECP256K1_MSVC_MULH_TEST_OVERRIDE)
# pragma message(__FILE__ ": SECP256K1_MSVC_MULH_TEST_OVERRIDE is defined, forcing use of __(u)mulh.")
# endif
static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) {
*hi = __umulh(a, b);
return a * b;
}
static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) {
*hi = __mulh(a, b);
return (uint64_t)a * (uint64_t)b;
}
# else
/* On x84_64 MSVC, use native _(u)mul128 for 64x64->128 multiplications. */
# define secp256k1_umul128 _umul128
# define secp256k1_mul128 _mul128
# endif
#else
/* On other systems, emulate 64x64->128 multiplications using 32x32->64 multiplications. */
static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) {
uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b;
uint64_t lh = (uint32_t)a * (b >> 32);
uint64_t hl = (a >> 32) * (uint32_t)b;
uint64_t hh = (a >> 32) * (b >> 32);
uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl;
*hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32);
return (mid34 << 32) + (uint32_t)ll;
}
static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) {
uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b;
int64_t lh = (uint32_t)a * (b >> 32);
int64_t hl = (a >> 32) * (uint32_t)b;
int64_t hh = (a >> 32) * (b >> 32);
uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl;
*hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32);
return (mid34 << 32) + (uint32_t)ll;
}
#endif
static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo) {
r->hi = hi;
r->lo = lo;
}
static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
r->lo = secp256k1_umul128(a, b, &r->hi);
}
static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
uint64_t lo, hi;
lo = secp256k1_umul128(a, b, &hi);
r->lo += lo;
r->hi += hi + (r->lo < lo);
}
static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) {
r->lo += a;
r->hi += r->lo < a;
}
/* Unsigned (logical) right shift.
* Non-constant time in n.
*/
static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) {
VERIFY_CHECK(n < 128);
if (n >= 64) {
r->lo = r->hi >> (n-64);
r->hi = 0;
} else if (n > 0) {
r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n;
r->hi >>= n;
}
}
static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) {
return a->lo;
}
static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) {
return a->hi;
}
static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) {
r->hi = 0;
r->lo = a;
}
static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) {
VERIFY_CHECK(n < 128);
return n >= 64 ? r->hi >> (n - 64) == 0
: r->hi == 0 && r->lo >> n == 0;
}
static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo) {
r->hi = hi;
r->lo = lo;
}
static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
int64_t hi;
r->lo = (uint64_t)secp256k1_mul128(a, b, &hi);
r->hi = (uint64_t)hi;
}
static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
int64_t hi;
uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi);
r->lo += lo;
hi += r->lo < lo;
/* Verify no overflow.
* If r represents a positive value (the sign bit is not set) and the value we are adding is a positive value (the sign bit is not set),
* then we require that the resulting value also be positive (the sign bit is not set).
* Note that (X <= Y) means (X implies Y) when X and Y are boolean values (i.e. 0 or 1).
*/
VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi <= 0x7fffffffffffffffu));
/* Verify no underflow.
* If r represents a negative value (the sign bit is set) and the value we are adding is a negative value (the sign bit is set),
* then we require that the resulting value also be negative (the sign bit is set).
*/
VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi > 0x7fffffffffffffffu));
r->hi += hi;
}
static SECP256K1_INLINE void secp256k1_i128_dissip_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
int64_t hi;
uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi);
hi += r->lo < lo;
/* Verify no overflow.
* If r represents a positive value (the sign bit is not set) and the value we are subtracting is a negative value (the sign bit is set),
* then we require that the resulting value also be positive (the sign bit is not set).
*/
VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi <= 0x7fffffffffffffffu));
/* Verify no underflow.
* If r represents a negative value (the sign bit is set) and the value we are subtracting is a positive value (the sign sign bit is not set),
* then we require that the resulting value also be negative (the sign bit is set).
*/
VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi > 0x7fffffffffffffffu));
r->hi -= hi;
r->lo -= lo;
}
static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) {
secp256k1_i128_mul(r, a, d);
secp256k1_i128_dissip_mul(r, b, c);
}
/* Signed (arithmetic) right shift.
* Non-constant time in n.
*/
static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) {
VERIFY_CHECK(n < 128);
if (n >= 64) {
r->lo = (uint64_t)((int64_t)(r->hi) >> (n-64));
r->hi = (uint64_t)((int64_t)(r->hi) >> 63);
} else if (n > 0) {
r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n;
r->hi = (uint64_t)((int64_t)(r->hi) >> n);
}
}
static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) {
return (int64_t)a->lo;
}
static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) {
r->hi = (uint64_t)(a >> 63);
r->lo = (uint64_t)a;
}
static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) {
return a->hi == b->hi && a->lo == b->lo;
}
static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) {
VERIFY_CHECK(n < 127);
return n >= 64 ? r->hi == (uint64_t)1 << (n - 64) && r->lo == 0
: r->hi == 0 && r->lo == (uint64_t)1 << n;
}
#endif

View File

@ -7,10 +7,9 @@
#ifndef SECP256K1_MODINV64_IMPL_H #ifndef SECP256K1_MODINV64_IMPL_H
#define SECP256K1_MODINV64_IMPL_H #define SECP256K1_MODINV64_IMPL_H
#include "int128.h"
#include "modinv64.h" #include "modinv64.h"
#include "util.h"
/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and /* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
* modular inversion" by Daniel J. Bernstein and Bo-Yin Yang. * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
* *
@ -18,6 +17,15 @@
* implementation for N=62, using 62-bit signed limbs represented as int64_t. * implementation for N=62, using 62-bit signed limbs represented as int64_t.
*/ */
/* Data type for transition matrices (see section 3 of explanation).
*
* t = [ u v ]
* [ q r ]
*/
typedef struct {
int64_t u, v, q, r;
} secp256k1_modinv64_trans2x2;
#ifdef VERIFY #ifdef VERIFY
/* Helper function to compute the absolute value of an int64_t. /* Helper function to compute the absolute value of an int64_t.
* (we don't use abs/labs/llabs as it depends on the int sizes). */ * (we don't use abs/labs/llabs as it depends on the int sizes). */
@ -32,15 +40,17 @@ static const secp256k1_modinv64_signed62 SECP256K1_SIGNED62_ONE = {{1}};
/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^62). */ /* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^62). */
static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp256k1_modinv64_signed62 *a, int alen, int64_t factor) { static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp256k1_modinv64_signed62 *a, int alen, int64_t factor) {
const int64_t M62 = (int64_t)(UINT64_MAX >> 2); const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
int128_t c = 0; secp256k1_int128 c, d;
int i; int i;
secp256k1_i128_from_i64(&c, 0);
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
if (i < alen) c += (int128_t)a->v[i] * factor; if (i < alen) secp256k1_i128_accum_mul(&c, a->v[i], factor);
r->v[i] = (int64_t)c & M62; c >>= 62; r->v[i] = secp256k1_i128_to_i64(&c) & M62; secp256k1_i128_rshift(&c, 62);
} }
if (4 < alen) c += (int128_t)a->v[4] * factor; if (4 < alen) secp256k1_i128_accum_mul(&c, a->v[4], factor);
VERIFY_CHECK(c == (int64_t)c); secp256k1_i128_from_i64(&d, secp256k1_i128_to_i64(&c));
r->v[4] = (int64_t)c; VERIFY_CHECK(secp256k1_i128_eq_var(&c, &d));
r->v[4] = secp256k1_i128_to_i64(&c);
} }
/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A has alen limbs; b has 5. */ /* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A has alen limbs; b has 5. */
@ -60,6 +70,13 @@ static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, i
} }
return 0; return 0;
} }
/* Check if the determinant of t is equal to 1 << n. */
static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n) {
secp256k1_int128 a;
secp256k1_i128_det(&a, t->u, t->v, t->q, t->r);
return secp256k1_i128_check_pow2(&a, n);
}
#endif #endif
/* Take as input a signed62 number in range (-2*modulus,modulus), and add a multiple of the modulus /* Take as input a signed62 number in range (-2*modulus,modulus), and add a multiple of the modulus
@ -136,15 +153,6 @@ static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int6
#endif #endif
} }
/* Data type for transition matrices (see section 3 of explanation).
*
* t = [ u v ]
* [ q r ]
*/
typedef struct {
int64_t u, v, q, r;
} secp256k1_modinv64_trans2x2;
/* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)). /* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)).
* Note that the transformation matrix is scaled by 2^62 and not 2^59. * Note that the transformation matrix is scaled by 2^62 and not 2^59.
* *
@ -206,13 +214,15 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_
t->v = (int64_t)v; t->v = (int64_t)v;
t->q = (int64_t)q; t->q = (int64_t)q;
t->r = (int64_t)r; t->r = (int64_t)r;
#ifdef VERIFY
/* The determinant of t must be a power of two. This guarantees that multiplication with t /* The determinant of t must be a power of two. This guarantees that multiplication with t
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
* will be divided out again). As each divstep's individual matrix has determinant 2, the * will be divided out again). As each divstep's individual matrix has determinant 2, the
* aggregate of 59 of them will have determinant 2^59. Multiplying with the initial * aggregate of 59 of them will have determinant 2^59. Multiplying with the initial
* 8*identity (which has determinant 2^6) means the overall outputs has determinant * 8*identity (which has determinant 2^6) means the overall outputs has determinant
* 2^65. */ * 2^65. */
VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 65); VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65));
#endif
return zeta; return zeta;
} }
@ -289,11 +299,13 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint
t->v = (int64_t)v; t->v = (int64_t)v;
t->q = (int64_t)q; t->q = (int64_t)q;
t->r = (int64_t)r; t->r = (int64_t)r;
#ifdef VERIFY
/* The determinant of t must be a power of two. This guarantees that multiplication with t /* The determinant of t must be a power of two. This guarantees that multiplication with t
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
* will be divided out again). As each divstep's individual matrix has determinant 2, the * will be divided out again). As each divstep's individual matrix has determinant 2, the
* aggregate of 62 of them will have determinant 2^62. */ * aggregate of 62 of them will have determinant 2^62. */
VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62); VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62));
#endif
return eta; return eta;
} }
@ -310,7 +322,7 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
const int64_t e0 = e->v[0], e1 = e->v[1], e2 = e->v[2], e3 = e->v[3], e4 = e->v[4]; const int64_t e0 = e->v[0], e1 = e->v[1], e2 = e->v[2], e3 = e->v[3], e4 = e->v[4];
const int64_t u = t->u, v = t->v, q = t->q, r = t->r; const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
int64_t md, me, sd, se; int64_t md, me, sd, se;
int128_t cd, ce; secp256k1_int128 cd, ce;
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */
@ -327,54 +339,64 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
md = (u & sd) + (v & se); md = (u & sd) + (v & se);
me = (q & sd) + (r & se); me = (q & sd) + (r & se);
/* Begin computing t*[d,e]. */ /* Begin computing t*[d,e]. */
cd = (int128_t)u * d0 + (int128_t)v * e0; secp256k1_i128_mul(&cd, u, d0);
ce = (int128_t)q * d0 + (int128_t)r * e0; secp256k1_i128_accum_mul(&cd, v, e0);
secp256k1_i128_mul(&ce, q, d0);
secp256k1_i128_accum_mul(&ce, r, e0);
/* Correct md,me so that t*[d,e]+modulus*[md,me] has 62 zero bottom bits. */ /* Correct md,me so that t*[d,e]+modulus*[md,me] has 62 zero bottom bits. */
md -= (modinfo->modulus_inv62 * (uint64_t)cd + md) & M62; md -= (modinfo->modulus_inv62 * (uint64_t)secp256k1_i128_to_i64(&cd) + md) & M62;
me -= (modinfo->modulus_inv62 * (uint64_t)ce + me) & M62; me -= (modinfo->modulus_inv62 * (uint64_t)secp256k1_i128_to_i64(&ce) + me) & M62;
/* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */ /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
cd += (int128_t)modinfo->modulus.v[0] * md; secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[0], md);
ce += (int128_t)modinfo->modulus.v[0] * me; secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[0], me);
/* Verify that the low 62 bits of the computation are indeed zero, and then throw them away. */ /* Verify that the low 62 bits of the computation are indeed zero, and then throw them away. */
VERIFY_CHECK(((int64_t)cd & M62) == 0); cd >>= 62; VERIFY_CHECK((secp256k1_i128_to_i64(&cd) & M62) == 0); secp256k1_i128_rshift(&cd, 62);
VERIFY_CHECK(((int64_t)ce & M62) == 0); ce >>= 62; VERIFY_CHECK((secp256k1_i128_to_i64(&ce) & M62) == 0); secp256k1_i128_rshift(&ce, 62);
/* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */ /* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */
cd += (int128_t)u * d1 + (int128_t)v * e1; secp256k1_i128_accum_mul(&cd, u, d1);
ce += (int128_t)q * d1 + (int128_t)r * e1; secp256k1_i128_accum_mul(&cd, v, e1);
secp256k1_i128_accum_mul(&ce, q, d1);
secp256k1_i128_accum_mul(&ce, r, e1);
if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */ if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */
cd += (int128_t)modinfo->modulus.v[1] * md; secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[1], md);
ce += (int128_t)modinfo->modulus.v[1] * me; secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[1], me);
} }
d->v[0] = (int64_t)cd & M62; cd >>= 62; d->v[0] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
e->v[0] = (int64_t)ce & M62; ce >>= 62; e->v[0] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
/* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */ /* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */
cd += (int128_t)u * d2 + (int128_t)v * e2; secp256k1_i128_accum_mul(&cd, u, d2);
ce += (int128_t)q * d2 + (int128_t)r * e2; secp256k1_i128_accum_mul(&cd, v, e2);
secp256k1_i128_accum_mul(&ce, q, d2);
secp256k1_i128_accum_mul(&ce, r, e2);
if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */ if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */
cd += (int128_t)modinfo->modulus.v[2] * md; secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[2], md);
ce += (int128_t)modinfo->modulus.v[2] * me; secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[2], me);
} }
d->v[1] = (int64_t)cd & M62; cd >>= 62; d->v[1] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
e->v[1] = (int64_t)ce & M62; ce >>= 62; e->v[1] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
/* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */ /* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */
cd += (int128_t)u * d3 + (int128_t)v * e3; secp256k1_i128_accum_mul(&cd, u, d3);
ce += (int128_t)q * d3 + (int128_t)r * e3; secp256k1_i128_accum_mul(&cd, v, e3);
secp256k1_i128_accum_mul(&ce, q, d3);
secp256k1_i128_accum_mul(&ce, r, e3);
if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */ if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */
cd += (int128_t)modinfo->modulus.v[3] * md; secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[3], md);
ce += (int128_t)modinfo->modulus.v[3] * me; secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[3], me);
} }
d->v[2] = (int64_t)cd & M62; cd >>= 62; d->v[2] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
e->v[2] = (int64_t)ce & M62; ce >>= 62; e->v[2] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
/* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */ /* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */
cd += (int128_t)u * d4 + (int128_t)v * e4; secp256k1_i128_accum_mul(&cd, u, d4);
ce += (int128_t)q * d4 + (int128_t)r * e4; secp256k1_i128_accum_mul(&cd, v, e4);
cd += (int128_t)modinfo->modulus.v[4] * md; secp256k1_i128_accum_mul(&ce, q, d4);
ce += (int128_t)modinfo->modulus.v[4] * me; secp256k1_i128_accum_mul(&ce, r, e4);
d->v[3] = (int64_t)cd & M62; cd >>= 62; secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[4], md);
e->v[3] = (int64_t)ce & M62; ce >>= 62; secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[4], me);
d->v[3] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
e->v[3] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
/* What remains is limb 5 of t*[d,e]+modulus*[md,me]; store it as output limb 4. */ /* What remains is limb 5 of t*[d,e]+modulus*[md,me]; store it as output limb 4. */
d->v[4] = (int64_t)cd; d->v[4] = secp256k1_i128_to_i64(&cd);
e->v[4] = (int64_t)ce; e->v[4] = secp256k1_i128_to_i64(&ce);
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */
@ -392,36 +414,46 @@ static void secp256k1_modinv64_update_fg_62(secp256k1_modinv64_signed62 *f, secp
const int64_t f0 = f->v[0], f1 = f->v[1], f2 = f->v[2], f3 = f->v[3], f4 = f->v[4]; const int64_t f0 = f->v[0], f1 = f->v[1], f2 = f->v[2], f3 = f->v[3], f4 = f->v[4];
const int64_t g0 = g->v[0], g1 = g->v[1], g2 = g->v[2], g3 = g->v[3], g4 = g->v[4]; const int64_t g0 = g->v[0], g1 = g->v[1], g2 = g->v[2], g3 = g->v[3], g4 = g->v[4];
const int64_t u = t->u, v = t->v, q = t->q, r = t->r; const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
int128_t cf, cg; secp256k1_int128 cf, cg;
/* Start computing t*[f,g]. */ /* Start computing t*[f,g]. */
cf = (int128_t)u * f0 + (int128_t)v * g0; secp256k1_i128_mul(&cf, u, f0);
cg = (int128_t)q * f0 + (int128_t)r * g0; secp256k1_i128_accum_mul(&cf, v, g0);
secp256k1_i128_mul(&cg, q, f0);
secp256k1_i128_accum_mul(&cg, r, g0);
/* Verify that the bottom 62 bits of the result are zero, and then throw them away. */ /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62; VERIFY_CHECK((secp256k1_i128_to_i64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62);
VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62; VERIFY_CHECK((secp256k1_i128_to_i64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62);
/* Compute limb 1 of t*[f,g], and store it as output limb 0 (= down shift). */ /* Compute limb 1 of t*[f,g], and store it as output limb 0 (= down shift). */
cf += (int128_t)u * f1 + (int128_t)v * g1; secp256k1_i128_accum_mul(&cf, u, f1);
cg += (int128_t)q * f1 + (int128_t)r * g1; secp256k1_i128_accum_mul(&cf, v, g1);
f->v[0] = (int64_t)cf & M62; cf >>= 62; secp256k1_i128_accum_mul(&cg, q, f1);
g->v[0] = (int64_t)cg & M62; cg >>= 62; secp256k1_i128_accum_mul(&cg, r, g1);
f->v[0] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
g->v[0] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
/* Compute limb 2 of t*[f,g], and store it as output limb 1. */ /* Compute limb 2 of t*[f,g], and store it as output limb 1. */
cf += (int128_t)u * f2 + (int128_t)v * g2; secp256k1_i128_accum_mul(&cf, u, f2);
cg += (int128_t)q * f2 + (int128_t)r * g2; secp256k1_i128_accum_mul(&cf, v, g2);
f->v[1] = (int64_t)cf & M62; cf >>= 62; secp256k1_i128_accum_mul(&cg, q, f2);
g->v[1] = (int64_t)cg & M62; cg >>= 62; secp256k1_i128_accum_mul(&cg, r, g2);
f->v[1] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
g->v[1] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
/* Compute limb 3 of t*[f,g], and store it as output limb 2. */ /* Compute limb 3 of t*[f,g], and store it as output limb 2. */
cf += (int128_t)u * f3 + (int128_t)v * g3; secp256k1_i128_accum_mul(&cf, u, f3);
cg += (int128_t)q * f3 + (int128_t)r * g3; secp256k1_i128_accum_mul(&cf, v, g3);
f->v[2] = (int64_t)cf & M62; cf >>= 62; secp256k1_i128_accum_mul(&cg, q, f3);
g->v[2] = (int64_t)cg & M62; cg >>= 62; secp256k1_i128_accum_mul(&cg, r, g3);
f->v[2] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
g->v[2] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
/* Compute limb 4 of t*[f,g], and store it as output limb 3. */ /* Compute limb 4 of t*[f,g], and store it as output limb 3. */
cf += (int128_t)u * f4 + (int128_t)v * g4; secp256k1_i128_accum_mul(&cf, u, f4);
cg += (int128_t)q * f4 + (int128_t)r * g4; secp256k1_i128_accum_mul(&cf, v, g4);
f->v[3] = (int64_t)cf & M62; cf >>= 62; secp256k1_i128_accum_mul(&cg, q, f4);
g->v[3] = (int64_t)cg & M62; cg >>= 62; secp256k1_i128_accum_mul(&cg, r, g4);
f->v[3] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
g->v[3] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
/* What remains is limb 5 of t*[f,g]; store it as output limb 4. */ /* What remains is limb 5 of t*[f,g]; store it as output limb 4. */
f->v[4] = (int64_t)cf; f->v[4] = secp256k1_i128_to_i64(&cf);
g->v[4] = (int64_t)cg; g->v[4] = secp256k1_i128_to_i64(&cg);
} }
/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps. /* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps.
@ -434,30 +466,34 @@ static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_sign
const int64_t M62 = (int64_t)(UINT64_MAX >> 2); const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
const int64_t u = t->u, v = t->v, q = t->q, r = t->r; const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
int64_t fi, gi; int64_t fi, gi;
int128_t cf, cg; secp256k1_int128 cf, cg;
int i; int i;
VERIFY_CHECK(len > 0); VERIFY_CHECK(len > 0);
/* Start computing t*[f,g]. */ /* Start computing t*[f,g]. */
fi = f->v[0]; fi = f->v[0];
gi = g->v[0]; gi = g->v[0];
cf = (int128_t)u * fi + (int128_t)v * gi; secp256k1_i128_mul(&cf, u, fi);
cg = (int128_t)q * fi + (int128_t)r * gi; secp256k1_i128_accum_mul(&cf, v, gi);
secp256k1_i128_mul(&cg, q, fi);
secp256k1_i128_accum_mul(&cg, r, gi);
/* Verify that the bottom 62 bits of the result are zero, and then throw them away. */ /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62; VERIFY_CHECK((secp256k1_i128_to_i64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62);
VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62; VERIFY_CHECK((secp256k1_i128_to_i64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62);
/* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
* down by 62 bits). */ * down by 62 bits). */
for (i = 1; i < len; ++i) { for (i = 1; i < len; ++i) {
fi = f->v[i]; fi = f->v[i];
gi = g->v[i]; gi = g->v[i];
cf += (int128_t)u * fi + (int128_t)v * gi; secp256k1_i128_accum_mul(&cf, u, fi);
cg += (int128_t)q * fi + (int128_t)r * gi; secp256k1_i128_accum_mul(&cf, v, gi);
f->v[i - 1] = (int64_t)cf & M62; cf >>= 62; secp256k1_i128_accum_mul(&cg, q, fi);
g->v[i - 1] = (int64_t)cg & M62; cg >>= 62; secp256k1_i128_accum_mul(&cg, r, gi);
f->v[i - 1] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
g->v[i - 1] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
} }
/* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */ /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
f->v[len - 1] = (int64_t)cf; f->v[len - 1] = secp256k1_i128_to_i64(&cf);
g->v[len - 1] = (int64_t)cg; g->v[len - 1] = secp256k1_i128_to_i64(&cg);
} }
/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */ /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */

View File

@ -14,10 +14,13 @@
#endif #endif
#include "../include/secp256k1.h" #include "../include/secp256k1.h"
#include "assumptions.h" #include "assumptions.h"
#include "util.h" #include "util.h"
#include "field_impl.h" #include "field_impl.h"
#include "group_impl.h" #include "group_impl.h"
#include "int128_impl.h"
#include "ecmult.h" #include "ecmult.h"
#include "ecmult_compute_table_impl.h" #include "ecmult_compute_table_impl.h"

View File

@ -8,9 +8,12 @@
#include <stdio.h> #include <stdio.h>
#include "../include/secp256k1.h" #include "../include/secp256k1.h"
#include "assumptions.h" #include "assumptions.h"
#include "util.h" #include "util.h"
#include "group.h" #include "group.h"
#include "int128_impl.h"
#include "ecmult_gen.h" #include "ecmult_gen.h"
#include "ecmult_gen_compute_table_impl.h" #include "ecmult_gen_compute_table_impl.h"

View File

@ -7,9 +7,8 @@
#ifndef SECP256K1_SCALAR_REPR_IMPL_H #ifndef SECP256K1_SCALAR_REPR_IMPL_H
#define SECP256K1_SCALAR_REPR_IMPL_H #define SECP256K1_SCALAR_REPR_IMPL_H
#include "scalar.h"
#include <string.h> #include <string.h>
#include "int128.h"
#include "modinv64_impl.h" #include "modinv64_impl.h"
/* Limbs of the secp256k1 order. */ /* Limbs of the secp256k1 order. */
@ -79,51 +78,62 @@ SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scal
} }
SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, unsigned int overflow) { SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, unsigned int overflow) {
uint128_t t; secp256k1_uint128 t;
VERIFY_CHECK(overflow <= 1); VERIFY_CHECK(overflow <= 1);
t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0; secp256k1_u128_from_u64(&t, r->d[0]);
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_0);
t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1; r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; secp256k1_u128_accum_u64(&t, r->d[1]);
t += (uint128_t)r->d[2] + overflow * SECP256K1_N_C_2; secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_1);
r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
t += (uint64_t)r->d[3]; secp256k1_u128_accum_u64(&t, r->d[2]);
r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_2);
r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
secp256k1_u128_accum_u64(&t, r->d[3]);
r->d[3] = secp256k1_u128_to_u64(&t);
return overflow; return overflow;
} }
static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) { static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
int overflow; int overflow;
uint128_t t = (uint128_t)a->d[0] + b->d[0]; secp256k1_uint128 t;
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; secp256k1_u128_from_u64(&t, a->d[0]);
t += (uint128_t)a->d[1] + b->d[1]; secp256k1_u128_accum_u64(&t, b->d[0]);
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
t += (uint128_t)a->d[2] + b->d[2]; secp256k1_u128_accum_u64(&t, a->d[1]);
r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; secp256k1_u128_accum_u64(&t, b->d[1]);
t += (uint128_t)a->d[3] + b->d[3]; r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; secp256k1_u128_accum_u64(&t, a->d[2]);
overflow = t + secp256k1_scalar_check_overflow(r); secp256k1_u128_accum_u64(&t, b->d[2]);
r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
secp256k1_u128_accum_u64(&t, a->d[3]);
secp256k1_u128_accum_u64(&t, b->d[3]);
r->d[3] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
overflow = secp256k1_u128_to_u64(&t) + secp256k1_scalar_check_overflow(r);
VERIFY_CHECK(overflow == 0 || overflow == 1); VERIFY_CHECK(overflow == 0 || overflow == 1);
secp256k1_scalar_reduce(r, overflow); secp256k1_scalar_reduce(r, overflow);
return overflow; return overflow;
} }
static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) { static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
uint128_t t; secp256k1_uint128 t;
volatile int vflag = flag; volatile int vflag = flag;
VERIFY_CHECK(bit < 256); VERIFY_CHECK(bit < 256);
bit += ((uint32_t) vflag - 1) & 0x100; /* forcing (bit >> 6) > 3 makes this a noop */ bit += ((uint32_t) vflag - 1) & 0x100; /* forcing (bit >> 6) > 3 makes this a noop */
t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F)); secp256k1_u128_from_u64(&t, r->d[0]);
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F)); r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; secp256k1_u128_accum_u64(&t, r->d[1]);
t += (uint128_t)r->d[2] + (((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F)); secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
t += (uint128_t)r->d[3] + (((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F)); secp256k1_u128_accum_u64(&t, r->d[2]);
r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
secp256k1_u128_accum_u64(&t, r->d[3]);
secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
r->d[3] = secp256k1_u128_to_u64(&t);
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK((t >> 64) == 0); VERIFY_CHECK(secp256k1_u128_hi_u64(&t) == 0);
VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
#endif #endif
} }
@ -152,14 +162,19 @@ SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a)
static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) { static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) {
uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0); uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0);
uint128_t t = (uint128_t)(~a->d[0]) + SECP256K1_N_0 + 1; secp256k1_uint128 t;
r->d[0] = t & nonzero; t >>= 64; secp256k1_u128_from_u64(&t, ~a->d[0]);
t += (uint128_t)(~a->d[1]) + SECP256K1_N_1; secp256k1_u128_accum_u64(&t, SECP256K1_N_0 + 1);
r->d[1] = t & nonzero; t >>= 64; r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
t += (uint128_t)(~a->d[2]) + SECP256K1_N_2; secp256k1_u128_accum_u64(&t, ~a->d[1]);
r->d[2] = t & nonzero; t >>= 64; secp256k1_u128_accum_u64(&t, SECP256K1_N_1);
t += (uint128_t)(~a->d[3]) + SECP256K1_N_3; r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
r->d[3] = t & nonzero; secp256k1_u128_accum_u64(&t, ~a->d[2]);
secp256k1_u128_accum_u64(&t, SECP256K1_N_2);
r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
secp256k1_u128_accum_u64(&t, ~a->d[3]);
secp256k1_u128_accum_u64(&t, SECP256K1_N_3);
r->d[3] = secp256k1_u128_to_u64(&t) & nonzero;
} }
SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) { SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
@ -184,14 +199,19 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
volatile int vflag = flag; volatile int vflag = flag;
uint64_t mask = -vflag; uint64_t mask = -vflag;
uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1; uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1;
uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask); secp256k1_uint128 t;
r->d[0] = t & nonzero; t >>= 64; secp256k1_u128_from_u64(&t, r->d[0] ^ mask);
t += (uint128_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask); secp256k1_u128_accum_u64(&t, (SECP256K1_N_0 + 1) & mask);
r->d[1] = t & nonzero; t >>= 64; r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
t += (uint128_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask); secp256k1_u128_accum_u64(&t, r->d[1] ^ mask);
r->d[2] = t & nonzero; t >>= 64; secp256k1_u128_accum_u64(&t, SECP256K1_N_1 & mask);
t += (uint128_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask); r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
r->d[3] = t & nonzero; secp256k1_u128_accum_u64(&t, r->d[2] ^ mask);
secp256k1_u128_accum_u64(&t, SECP256K1_N_2 & mask);
r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
secp256k1_u128_accum_u64(&t, r->d[3] ^ mask);
secp256k1_u128_accum_u64(&t, SECP256K1_N_3 & mask);
r->d[3] = secp256k1_u128_to_u64(&t) & nonzero;
return 2 * (mask == 0) - 1; return 2 * (mask == 0) - 1;
} }
@ -201,9 +221,10 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
#define muladd(a,b) { \ #define muladd(a,b) { \
uint64_t tl, th; \ uint64_t tl, th; \
{ \ { \
uint128_t t = (uint128_t)a * b; \ secp256k1_uint128 t; \
th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ secp256k1_u128_mul(&t, a, b); \
tl = t; \ th = secp256k1_u128_hi_u64(&t); /* at most 0xFFFFFFFFFFFFFFFE */ \
tl = secp256k1_u128_to_u64(&t); \
} \ } \
c0 += tl; /* overflow is handled on the next line */ \ c0 += tl; /* overflow is handled on the next line */ \
th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \ th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \
@ -216,9 +237,10 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
#define muladd_fast(a,b) { \ #define muladd_fast(a,b) { \
uint64_t tl, th; \ uint64_t tl, th; \
{ \ { \
uint128_t t = (uint128_t)a * b; \ secp256k1_uint128 t; \
th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ secp256k1_u128_mul(&t, a, b); \
tl = t; \ th = secp256k1_u128_hi_u64(&t); /* at most 0xFFFFFFFFFFFFFFFE */ \
tl = secp256k1_u128_to_u64(&t); \
} \ } \
c0 += tl; /* overflow is handled on the next line */ \ c0 += tl; /* overflow is handled on the next line */ \
th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \ th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \
@ -518,8 +540,8 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
: "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1) : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
: "rax", "rdx", "r8", "r9", "r10", "cc", "memory"); : "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
#else #else
uint128_t c; secp256k1_uint128 c128;
uint64_t c0, c1, c2; uint64_t c, c0, c1, c2;
uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7]; uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
uint64_t m0, m1, m2, m3, m4, m5; uint64_t m0, m1, m2, m3, m4, m5;
uint32_t m6; uint32_t m6;
@ -576,14 +598,18 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
/* Reduce 258 bits into 256. */ /* Reduce 258 bits into 256. */
/* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */ /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
c = p0 + (uint128_t)SECP256K1_N_C_0 * p4; secp256k1_u128_from_u64(&c128, p0);
r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_0, p4);
c += p1 + (uint128_t)SECP256K1_N_C_1 * p4; r->d[0] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; secp256k1_u128_accum_u64(&c128, p1);
c += p2 + (uint128_t)p4; secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_1, p4);
r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; r->d[1] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
c += p3; secp256k1_u128_accum_u64(&c128, p2);
r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; secp256k1_u128_accum_u64(&c128, p4);
r->d[2] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
secp256k1_u128_accum_u64(&c128, p3);
r->d[3] = secp256k1_u128_to_u64(&c128);
c = secp256k1_u128_hi_u64(&c128);
#endif #endif
/* Final reduction of r. */ /* Final reduction of r. */

View File

@ -22,6 +22,7 @@
#include "assumptions.h" #include "assumptions.h"
#include "util.h" #include "util.h"
#include "field_impl.h" #include "field_impl.h"
#include "scalar_impl.h" #include "scalar_impl.h"
#include "group_impl.h" #include "group_impl.h"
@ -32,6 +33,7 @@
#include "ecdsa_impl.h" #include "ecdsa_impl.h"
#include "eckey_impl.h" #include "eckey_impl.h"
#include "hash_impl.h" #include "hash_impl.h"
#include "int128_impl.h"
#include "scratch_impl.h" #include "scratch_impl.h"
#include "selftest.h" #include "selftest.h"

View File

@ -26,6 +26,7 @@
#include "modinv32_impl.h" #include "modinv32_impl.h"
#ifdef SECP256K1_WIDEMUL_INT128 #ifdef SECP256K1_WIDEMUL_INT128
#include "modinv64_impl.h" #include "modinv64_impl.h"
#include "int128_impl.h"
#endif #endif
#define CONDITIONAL_TEST(cnt, nam) if (count < (cnt)) { printf("Skipping %s (iteration count too low)\n", nam); } else #define CONDITIONAL_TEST(cnt, nam) if (count < (cnt)) { printf("Skipping %s (iteration count too low)\n", nam); } else
@ -476,6 +477,7 @@ void run_scratch_tests(void) {
secp256k1_context_destroy(none); secp256k1_context_destroy(none);
} }
void run_ctz_tests(void) { void run_ctz_tests(void) {
static const uint32_t b32[] = {1, 0xffffffff, 0x5e56968f, 0xe0d63129}; static const uint32_t b32[] = {1, 0xffffffff, 0x5e56968f, 0xe0d63129};
static const uint64_t b64[] = {1, 0xffffffffffffffff, 0xbcd02462139b3fc3, 0x98b5f80c769693ef}; static const uint64_t b64[] = {1, 0xffffffffffffffff, 0xbcd02462139b3fc3, 0x98b5f80c769693ef};
@ -860,7 +862,8 @@ uint64_t modinv2p64(uint64_t x) {
return w; return w;
} }
/* compute out = (a*b) mod m; if b=NULL, treat b=1.
/* compute out = (a*b) mod m; if b=NULL, treat b=1; if m=NULL, treat m=infinity.
* *
* Out is a 512-bit number (represented as 32 uint16_t's in LE order). The other * Out is a 512-bit number (represented as 32 uint16_t's in LE order). The other
* arguments are 256-bit numbers (represented as 16 uint16_t's in LE order). */ * arguments are 256-bit numbers (represented as 16 uint16_t's in LE order). */
@ -902,45 +905,47 @@ void mulmod256(uint16_t* out, const uint16_t* a, const uint16_t* b, const uint16
} }
} }
/* Compute the highest set bit in m. */ if (m) {
for (i = 255; i >= 0; --i) { /* Compute the highest set bit in m. */
if ((m[i >> 4] >> (i & 15)) & 1) { for (i = 255; i >= 0; --i) {
m_bitlen = i; if ((m[i >> 4] >> (i & 15)) & 1) {
break; m_bitlen = i;
} break;
}
/* Try do mul -= m<<i, for i going down to 0, whenever the result is not negative */
for (i = mul_bitlen - m_bitlen; i >= 0; --i) {
uint16_t mul2[32];
int64_t cs;
/* Compute mul2 = mul - m<<i. */
cs = 0; /* accumulator */
for (j = 0; j < 32; ++j) { /* j loops over the output limbs in mul2. */
/* Compute sub: the 16 bits in m that will be subtracted from mul2[j]. */
uint16_t sub = 0;
int p;
for (p = 0; p < 16; ++p) { /* p loops over the bit positions in mul2[j]. */
int bitpos = j * 16 - i + p; /* bitpos is the correspond bit position in m. */
if (bitpos >= 0 && bitpos < 256) {
sub |= ((m[bitpos >> 4] >> (bitpos & 15)) & 1) << p;
}
} }
/* Add mul[j]-sub to accumulator, and shift bottom 16 bits out to mul2[j]. */
cs += mul[j];
cs -= sub;
mul2[j] = (cs & 0xFFFF);
cs >>= 16;
} }
/* If remainder of subtraction is 0, set mul = mul2. */
if (cs == 0) { /* Try do mul -= m<<i, for i going down to 0, whenever the result is not negative */
memcpy(mul, mul2, sizeof(mul)); for (i = mul_bitlen - m_bitlen; i >= 0; --i) {
uint16_t mul2[32];
int64_t cs;
/* Compute mul2 = mul - m<<i. */
cs = 0; /* accumulator */
for (j = 0; j < 32; ++j) { /* j loops over the output limbs in mul2. */
/* Compute sub: the 16 bits in m that will be subtracted from mul2[j]. */
uint16_t sub = 0;
int p;
for (p = 0; p < 16; ++p) { /* p loops over the bit positions in mul2[j]. */
int bitpos = j * 16 - i + p; /* bitpos is the correspond bit position in m. */
if (bitpos >= 0 && bitpos < 256) {
sub |= ((m[bitpos >> 4] >> (bitpos & 15)) & 1) << p;
}
}
/* Add mul[j]-sub to accumulator, and shift bottom 16 bits out to mul2[j]. */
cs += mul[j];
cs -= sub;
mul2[j] = (cs & 0xFFFF);
cs >>= 16;
}
/* If remainder of subtraction is 0, set mul = mul2. */
if (cs == 0) {
memcpy(mul, mul2, sizeof(mul));
}
}
/* Sanity check: test that all limbs higher than m's highest are zero */
for (i = (m_bitlen >> 4) + 1; i < 32; ++i) {
CHECK(mul[i] == 0);
} }
}
/* Sanity check: test that all limbs higher than m's highest are zero */
for (i = (m_bitlen >> 4) + 1; i < 32; ++i) {
CHECK(mul[i] == 0);
} }
memcpy(out, mul, 32); memcpy(out, mul, 32);
} }
@ -1756,8 +1761,305 @@ void run_modinv_tests(void) {
} }
} }
/***** SCALAR TESTS *****/ /***** INT128 TESTS *****/
#ifdef SECP256K1_WIDEMUL_INT128
/* Add two 256-bit numbers (represented as 16 uint16_t's in LE order) together mod 2^256. */
void add256(uint16_t* out, const uint16_t* a, const uint16_t* b) {
int i;
uint32_t carry = 0;
for (i = 0; i < 16; ++i) {
carry += a[i];
carry += b[i];
out[i] = carry;
carry >>= 16;
}
}
/* Negate a 256-bit number (represented as 16 uint16_t's in LE order) mod 2^256. */
void neg256(uint16_t* out, const uint16_t* a) {
int i;
uint32_t carry = 1;
for (i = 0; i < 16; ++i) {
carry += (uint16_t)~a[i];
out[i] = carry;
carry >>= 16;
}
}
/* Right-shift a 256-bit number (represented as 16 uint16_t's in LE order). */
void rshift256(uint16_t* out, const uint16_t* a, int n, int sign_extend) {
uint16_t sign = sign_extend && (a[15] >> 15);
int i, j;
for (i = 15; i >= 0; --i) {
uint16_t v = 0;
for (j = 0; j < 16; ++j) {
int frompos = i*16 + j + n;
if (frompos >= 256) {
v |= sign << j;
} else {
v |= ((uint16_t)((a[frompos >> 4] >> (frompos & 15)) & 1)) << j;
}
}
out[i] = v;
}
}
/* Load a 64-bit unsigned integer into an array of 16 uint16_t's in LE order representing a 256-bit value. */
void load256u64(uint16_t* out, uint64_t v, int is_signed) {
int i;
uint64_t sign = is_signed && (v >> 63) ? UINT64_MAX : 0;
for (i = 0; i < 4; ++i) {
out[i] = v >> (16 * i);
}
for (i = 4; i < 16; ++i) {
out[i] = sign;
}
}
/* Load a 128-bit unsigned integer into an array of 16 uint16_t's in LE order representing a 256-bit value. */
void load256two64(uint16_t* out, uint64_t hi, uint64_t lo, int is_signed) {
int i;
uint64_t sign = is_signed && (hi >> 63) ? UINT64_MAX : 0;
for (i = 0; i < 4; ++i) {
out[i] = lo >> (16 * i);
}
for (i = 4; i < 8; ++i) {
out[i] = hi >> (16 * (i - 4));
}
for (i = 8; i < 16; ++i) {
out[i] = sign;
}
}
/* Check whether the 256-bit value represented by array of 16-bit values is in range -2^127 < v < 2^127. */
int int256is127(const uint16_t* v) {
int all_0 = ((v[7] & 0x8000) == 0), all_1 = ((v[7] & 0x8000) == 0x8000);
int i;
for (i = 8; i < 16; ++i) {
if (v[i] != 0) all_0 = 0;
if (v[i] != 0xffff) all_1 = 0;
}
return all_0 || all_1;
}
void load256u128(uint16_t* out, const secp256k1_uint128* v) {
uint64_t lo = secp256k1_u128_to_u64(v), hi = secp256k1_u128_hi_u64(v);
load256two64(out, hi, lo, 0);
}
void load256i128(uint16_t* out, const secp256k1_int128* v) {
uint64_t lo;
int64_t hi;
secp256k1_int128 c = *v;
lo = secp256k1_i128_to_i64(&c);
secp256k1_i128_rshift(&c, 64);
hi = secp256k1_i128_to_i64(&c);
load256two64(out, hi, lo, 1);
}
void run_int128_test_case(void) {
unsigned char buf[32];
uint64_t v[4];
secp256k1_int128 swa, swz;
secp256k1_uint128 uwa, uwz;
uint64_t ub, uc;
int64_t sb, sc;
uint16_t rswa[16], rswz[32], rswr[32], ruwa[16], ruwz[32], ruwr[32];
uint16_t rub[16], ruc[16], rsb[16], rsc[16];
int i;
/* Generate 32-byte random value. */
secp256k1_testrand256_test(buf);
/* Convert into 4 64-bit integers. */
for (i = 0; i < 4; ++i) {
uint64_t vi = 0;
int j;
for (j = 0; j < 8; ++j) vi = (vi << 8) + buf[8*i + j];
v[i] = vi;
}
/* Convert those into a 128-bit value and two 64-bit values (signed and unsigned). */
secp256k1_u128_load(&uwa, v[1], v[0]);
secp256k1_i128_load(&swa, v[1], v[0]);
ub = v[2];
sb = v[2];
uc = v[3];
sc = v[3];
/* Load those also into 16-bit array representations. */
load256u128(ruwa, &uwa);
load256i128(rswa, &swa);
load256u64(rub, ub, 0);
load256u64(rsb, sb, 1);
load256u64(ruc, uc, 0);
load256u64(rsc, sc, 1);
/* test secp256k1_u128_mul */
mulmod256(ruwr, rub, ruc, NULL);
secp256k1_u128_mul(&uwz, ub, uc);
load256u128(ruwz, &uwz);
CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0);
/* test secp256k1_u128_accum_mul */
mulmod256(ruwr, rub, ruc, NULL);
add256(ruwr, ruwr, ruwa);
uwz = uwa;
secp256k1_u128_accum_mul(&uwz, ub, uc);
load256u128(ruwz, &uwz);
CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0);
/* test secp256k1_u128_accum_u64 */
add256(ruwr, rub, ruwa);
uwz = uwa;
secp256k1_u128_accum_u64(&uwz, ub);
load256u128(ruwz, &uwz);
CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0);
/* test secp256k1_u128_rshift */
rshift256(ruwr, ruwa, uc % 128, 0);
uwz = uwa;
secp256k1_u128_rshift(&uwz, uc % 128);
load256u128(ruwz, &uwz);
CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0);
/* test secp256k1_u128_to_u64 */
CHECK(secp256k1_u128_to_u64(&uwa) == v[0]);
/* test secp256k1_u128_hi_u64 */
CHECK(secp256k1_u128_hi_u64(&uwa) == v[1]);
/* test secp256k1_u128_from_u64 */
secp256k1_u128_from_u64(&uwz, ub);
load256u128(ruwz, &uwz);
CHECK(secp256k1_memcmp_var(rub, ruwz, 16) == 0);
/* test secp256k1_u128_check_bits */
{
int uwa_bits = 0;
int j;
for (j = 0; j < 128; ++j) {
if (ruwa[j / 16] >> (j % 16)) uwa_bits = 1 + j;
}
for (j = 0; j < 128; ++j) {
CHECK(secp256k1_u128_check_bits(&uwa, j) == (uwa_bits <= j));
}
}
/* test secp256k1_i128_mul */
mulmod256(rswr, rsb, rsc, NULL);
secp256k1_i128_mul(&swz, sb, sc);
load256i128(rswz, &swz);
CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0);
/* test secp256k1_i128_accum_mul */
mulmod256(rswr, rsb, rsc, NULL);
add256(rswr, rswr, rswa);
if (int256is127(rswr)) {
swz = swa;
secp256k1_i128_accum_mul(&swz, sb, sc);
load256i128(rswz, &swz);
CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0);
}
/* test secp256k1_i128_det */
{
uint16_t rsd[16], rse[16], rst[32];
int64_t sd = v[0], se = v[1];
load256u64(rsd, sd, 1);
load256u64(rse, se, 1);
mulmod256(rst, rsc, rsd, NULL);
neg256(rst, rst);
mulmod256(rswr, rsb, rse, NULL);
add256(rswr, rswr, rst);
secp256k1_i128_det(&swz, sb, sc, sd, se);
load256i128(rswz, &swz);
CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0);
}
/* test secp256k1_i128_rshift */
rshift256(rswr, rswa, uc % 127, 1);
swz = swa;
secp256k1_i128_rshift(&swz, uc % 127);
load256i128(rswz, &swz);
CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0);
/* test secp256k1_i128_to_i64 */
CHECK((uint64_t)secp256k1_i128_to_i64(&swa) == v[0]);
/* test secp256k1_i128_from_i64 */
secp256k1_i128_from_i64(&swz, sb);
load256i128(rswz, &swz);
CHECK(secp256k1_memcmp_var(rsb, rswz, 16) == 0);
/* test secp256k1_i128_eq_var */
{
int expect = (uc & 1);
swz = swa;
if (!expect) {
/* Make sure swz != swa */
uint64_t v0c = v[0], v1c = v[1];
if (ub & 64) {
v1c ^= (((uint64_t)1) << (ub & 63));
} else {
v0c ^= (((uint64_t)1) << (ub & 63));
}
secp256k1_i128_load(&swz, v1c, v0c);
}
CHECK(secp256k1_i128_eq_var(&swa, &swz) == expect);
}
/* test secp256k1_i128_check_pow2 */
{
int expect = (uc & 1);
int pos = ub % 127;
if (expect) {
/* If expect==1, set swz to exactly (2 << pos). */
uint64_t hi = 0;
uint64_t lo = 0;
if (pos & 64) {
hi = (((uint64_t)1) << (pos & 63));
} else {
lo = (((uint64_t)1) << (pos & 63));
}
secp256k1_i128_load(&swz, hi, lo);
} else {
/* If expect==0, set swz = swa, but update expect=1 if swa happens to equal (2 << pos). */
if (pos & 64) {
if ((v[1] == (((uint64_t)1) << (pos & 63))) && v[0] == 0) expect = 1;
} else {
if ((v[0] == (((uint64_t)1) << (pos & 63))) && v[1] == 0) expect = 1;
}
swz = swa;
}
CHECK(secp256k1_i128_check_pow2(&swz, pos) == expect);
}
}
void run_int128_tests(void) {
{ /* secp256k1_u128_accum_mul */
secp256k1_uint128 res;
/* Check secp256k1_u128_accum_mul overflow */
secp256k1_u128_mul(&res, UINT64_MAX, UINT64_MAX);
secp256k1_u128_accum_mul(&res, UINT64_MAX, UINT64_MAX);
CHECK(secp256k1_u128_to_u64(&res) == 2);
CHECK(secp256k1_u128_hi_u64(&res) == 18446744073709551612U);
}
{ /* secp256k1_u128_accum_mul */
secp256k1_int128 res;
/* Compute INT128_MAX = 2^127 - 1 with secp256k1_i128_accum_mul */
secp256k1_i128_mul(&res, INT64_MAX, INT64_MAX);
secp256k1_i128_accum_mul(&res, INT64_MAX, INT64_MAX);
CHECK(secp256k1_i128_to_i64(&res) == 2);
secp256k1_i128_accum_mul(&res, 4, 9223372036854775807);
secp256k1_i128_accum_mul(&res, 1, 1);
CHECK((uint64_t)secp256k1_i128_to_i64(&res) == UINT64_MAX);
secp256k1_i128_rshift(&res, 64);
CHECK(secp256k1_i128_to_i64(&res) == INT64_MAX);
/* Compute INT128_MIN = - 2^127 with secp256k1_i128_accum_mul */
secp256k1_i128_mul(&res, INT64_MAX, INT64_MIN);
CHECK(secp256k1_i128_to_i64(&res) == INT64_MIN);
secp256k1_i128_accum_mul(&res, INT64_MAX, INT64_MIN);
CHECK(secp256k1_i128_to_i64(&res) == 0);
secp256k1_i128_accum_mul(&res, 2, INT64_MIN);
CHECK(secp256k1_i128_to_i64(&res) == 0);
secp256k1_i128_rshift(&res, 64);
CHECK(secp256k1_i128_to_i64(&res) == INT64_MIN);
}
{
/* Randomized tests. */
int i;
for (i = 0; i < 256 * count; ++i) run_int128_test_case();
}
}
#endif
/***** SCALAR TESTS *****/
void scalar_test(void) { void scalar_test(void) {
secp256k1_scalar s; secp256k1_scalar s;
@ -7409,6 +7711,9 @@ int main(int argc, char **argv) {
run_rand_int(); run_rand_int();
run_util_tests(); run_util_tests();
#ifdef SECP256K1_WIDEMUL_INT128
run_int128_tests();
#endif
run_ctz_tests(); run_ctz_tests();
run_modinv_tests(); run_modinv_tests();
run_inverse_tests(); run_inverse_tests();

View File

@ -281,28 +281,36 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag)
*r = (int)(r_masked | a_masked); *r = (int)(r_masked | a_masked);
} }
/* If USE_FORCE_WIDEMUL_{INT128,INT64} is set, use that wide multiplication implementation. #if defined(USE_FORCE_WIDEMUL_INT128_STRUCT)
* Otherwise use the presence of __SIZEOF_INT128__ to decide. /* If USE_FORCE_WIDEMUL_INT128_STRUCT is set, use int128_struct. */
*/
#if defined(USE_FORCE_WIDEMUL_INT128)
# define SECP256K1_WIDEMUL_INT128 1 # define SECP256K1_WIDEMUL_INT128 1
# define SECP256K1_INT128_STRUCT 1
#elif defined(USE_FORCE_WIDEMUL_INT128)
/* If USE_FORCE_WIDEMUL_INT128 is set, use int128. */
# define SECP256K1_WIDEMUL_INT128 1
# define SECP256K1_INT128_NATIVE 1
#elif defined(USE_FORCE_WIDEMUL_INT64) #elif defined(USE_FORCE_WIDEMUL_INT64)
/* If USE_FORCE_WIDEMUL_INT64 is set, use int64. */
# define SECP256K1_WIDEMUL_INT64 1 # define SECP256K1_WIDEMUL_INT64 1
#elif defined(UINT128_MAX) || defined(__SIZEOF_INT128__) #elif defined(UINT128_MAX) || defined(__SIZEOF_INT128__)
/* If a native 128-bit integer type exists, use int128. */
# define SECP256K1_WIDEMUL_INT128 1 # define SECP256K1_WIDEMUL_INT128 1
# define SECP256K1_INT128_NATIVE 1
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
/* On 64-bit MSVC targets (x86_64 and arm64), use int128_struct
* (which has special logic to implement using intrinsics on those systems). */
# define SECP256K1_WIDEMUL_INT128 1
# define SECP256K1_INT128_STRUCT 1
#elif SIZE_MAX > 0xffffffff
/* Systems with 64-bit pointers (and thus registers) very likely benefit from
* using 64-bit based arithmetic (even if we need to fall back to 32x32->64 based
* multiplication logic). */
# define SECP256K1_WIDEMUL_INT128 1
# define SECP256K1_INT128_STRUCT 1
#else #else
/* Lastly, fall back to int64 based arithmetic. */
# define SECP256K1_WIDEMUL_INT64 1 # define SECP256K1_WIDEMUL_INT64 1
#endif #endif
#if defined(SECP256K1_WIDEMUL_INT128)
# if !defined(UINT128_MAX) && defined(__SIZEOF_INT128__)
SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t;
SECP256K1_GNUC_EXT typedef __int128 int128_t;
#define UINT128_MAX ((uint128_t)(-1))
#define INT128_MAX ((int128_t)(UINT128_MAX >> 1))
#define INT128_MIN (-INT128_MAX - 1)
/* No (U)INT128_C macros because compilers providing __int128 do not support 128-bit literals. */
# endif
#endif
#ifndef __has_builtin #ifndef __has_builtin
#define __has_builtin(x) 0 #define __has_builtin(x) 0