Merge bitcoin-core/secp256k1#1056: Save negations in var-time group addition
2f984ffc45Save negations in var-time group addition (Peter Dettman) Pull request description: - Updated _gej_add_var, _gej_add_ge_var, _gej_add_zinv_var - 2 fewer _fe_negate in each method - Updated operation counts and standardize layout - Added internal benchmark for _gej_add_zinv_var benchmark_internal shows about 2% speedup in each method as a result (64bit). ACKs for top commit: real-or-random: ACK2f984ffc45jonasnick: ACK2f984ffc45Tree-SHA512: 01366fa23c83a8dd37c9a0a24e0acc53ce38a201607fe4da6672ea5618d82c62d1299f0e0aa50317883821539af739ea52b6561faff230c148e6fdc5bc5af30b
This commit is contained in:
@@ -254,6 +254,15 @@ void bench_group_add_affine_var(void* arg, int iters) {
|
||||
}
|
||||
}
|
||||
|
||||
void bench_group_add_zinv_var(void* arg, int iters) {
|
||||
int i;
|
||||
bench_inv *data = (bench_inv*)arg;
|
||||
|
||||
for (i = 0; i < iters; i++) {
|
||||
secp256k1_gej_add_zinv_var(&data->gej[0], &data->gej[0], &data->ge[1], &data->gej[0].y);
|
||||
}
|
||||
}
|
||||
|
||||
void bench_group_to_affine_var(void* arg, int iters) {
|
||||
int i;
|
||||
bench_inv *data = (bench_inv*)arg;
|
||||
@@ -376,6 +385,7 @@ int main(int argc, char **argv) {
|
||||
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, iters*10);
|
||||
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10);
|
||||
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10);
|
||||
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_zinv_var", bench_group_add_zinv_var, bench_setup, NULL, &data, 10, iters*10);
|
||||
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
|
||||
|
||||
if (d || have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters);
|
||||
|
||||
@@ -330,15 +330,14 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s
|
||||
}
|
||||
|
||||
static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_gej *b, secp256k1_fe *rzr) {
|
||||
/* Operations: 12 mul, 4 sqr, 2 normalize, 12 mul_int/add/negate */
|
||||
secp256k1_fe z22, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
|
||||
/* 12 mul, 4 sqr, 11 add/negate/normalizes_to_zero (ignoring special cases) */
|
||||
secp256k1_fe z22, z12, u1, u2, s1, s2, h, i, h2, h3, t;
|
||||
|
||||
if (a->infinity) {
|
||||
VERIFY_CHECK(rzr == NULL);
|
||||
*r = *b;
|
||||
return;
|
||||
}
|
||||
|
||||
if (b->infinity) {
|
||||
if (rzr != NULL) {
|
||||
secp256k1_fe_set_int(rzr, 1);
|
||||
@@ -347,7 +346,6 @@ static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, cons
|
||||
return;
|
||||
}
|
||||
|
||||
r->infinity = 0;
|
||||
secp256k1_fe_sqr(&z22, &b->z);
|
||||
secp256k1_fe_sqr(&z12, &a->z);
|
||||
secp256k1_fe_mul(&u1, &a->x, &z22);
|
||||
@@ -355,7 +353,7 @@ static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, cons
|
||||
secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
|
||||
secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
|
||||
secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
|
||||
secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
|
||||
secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1);
|
||||
if (secp256k1_fe_normalizes_to_zero_var(&h)) {
|
||||
if (secp256k1_fe_normalizes_to_zero_var(&i)) {
|
||||
secp256k1_gej_double_var(r, a, rzr);
|
||||
@@ -367,24 +365,33 @@ static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, cons
|
||||
}
|
||||
return;
|
||||
}
|
||||
secp256k1_fe_sqr(&i2, &i);
|
||||
secp256k1_fe_sqr(&h2, &h);
|
||||
secp256k1_fe_mul(&h3, &h, &h2);
|
||||
secp256k1_fe_mul(&h, &h, &b->z);
|
||||
|
||||
r->infinity = 0;
|
||||
secp256k1_fe_mul(&t, &h, &b->z);
|
||||
if (rzr != NULL) {
|
||||
*rzr = h;
|
||||
*rzr = t;
|
||||
}
|
||||
secp256k1_fe_mul(&r->z, &a->z, &h);
|
||||
secp256k1_fe_mul(&r->z, &a->z, &t);
|
||||
|
||||
secp256k1_fe_sqr(&h2, &h);
|
||||
secp256k1_fe_negate(&h2, &h2, 1);
|
||||
secp256k1_fe_mul(&h3, &h2, &h);
|
||||
secp256k1_fe_mul(&t, &u1, &h2);
|
||||
r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
|
||||
secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
|
||||
secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
|
||||
|
||||
secp256k1_fe_sqr(&r->x, &i);
|
||||
secp256k1_fe_add(&r->x, &h3);
|
||||
secp256k1_fe_add(&r->x, &t);
|
||||
secp256k1_fe_add(&r->x, &t);
|
||||
|
||||
secp256k1_fe_add(&t, &r->x);
|
||||
secp256k1_fe_mul(&r->y, &t, &i);
|
||||
secp256k1_fe_mul(&h3, &h3, &s1);
|
||||
secp256k1_fe_add(&r->y, &h3);
|
||||
}
|
||||
|
||||
static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, secp256k1_fe *rzr) {
|
||||
/* 8 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */
|
||||
secp256k1_fe z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
|
||||
/* 8 mul, 3 sqr, 13 add/negate/normalize_weak/normalizes_to_zero (ignoring special cases) */
|
||||
secp256k1_fe z12, u1, u2, s1, s2, h, i, h2, h3, t;
|
||||
if (a->infinity) {
|
||||
VERIFY_CHECK(rzr == NULL);
|
||||
secp256k1_gej_set_ge(r, b);
|
||||
@@ -397,7 +404,6 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, c
|
||||
*r = *a;
|
||||
return;
|
||||
}
|
||||
r->infinity = 0;
|
||||
|
||||
secp256k1_fe_sqr(&z12, &a->z);
|
||||
u1 = a->x; secp256k1_fe_normalize_weak(&u1);
|
||||
@@ -405,7 +411,7 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, c
|
||||
s1 = a->y; secp256k1_fe_normalize_weak(&s1);
|
||||
secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
|
||||
secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
|
||||
secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
|
||||
secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1);
|
||||
if (secp256k1_fe_normalizes_to_zero_var(&h)) {
|
||||
if (secp256k1_fe_normalizes_to_zero_var(&i)) {
|
||||
secp256k1_gej_double_var(r, a, rzr);
|
||||
@@ -417,28 +423,33 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, c
|
||||
}
|
||||
return;
|
||||
}
|
||||
secp256k1_fe_sqr(&i2, &i);
|
||||
secp256k1_fe_sqr(&h2, &h);
|
||||
secp256k1_fe_mul(&h3, &h, &h2);
|
||||
|
||||
r->infinity = 0;
|
||||
if (rzr != NULL) {
|
||||
*rzr = h;
|
||||
}
|
||||
secp256k1_fe_mul(&r->z, &a->z, &h);
|
||||
|
||||
secp256k1_fe_sqr(&h2, &h);
|
||||
secp256k1_fe_negate(&h2, &h2, 1);
|
||||
secp256k1_fe_mul(&h3, &h2, &h);
|
||||
secp256k1_fe_mul(&t, &u1, &h2);
|
||||
r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
|
||||
secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
|
||||
secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
|
||||
|
||||
secp256k1_fe_sqr(&r->x, &i);
|
||||
secp256k1_fe_add(&r->x, &h3);
|
||||
secp256k1_fe_add(&r->x, &t);
|
||||
secp256k1_fe_add(&r->x, &t);
|
||||
|
||||
secp256k1_fe_add(&t, &r->x);
|
||||
secp256k1_fe_mul(&r->y, &t, &i);
|
||||
secp256k1_fe_mul(&h3, &h3, &s1);
|
||||
secp256k1_fe_add(&r->y, &h3);
|
||||
}
|
||||
|
||||
static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, const secp256k1_fe *bzinv) {
|
||||
/* 9 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */
|
||||
secp256k1_fe az, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
|
||||
/* 9 mul, 3 sqr, 13 add/negate/normalize_weak/normalizes_to_zero (ignoring special cases) */
|
||||
secp256k1_fe az, z12, u1, u2, s1, s2, h, i, h2, h3, t;
|
||||
|
||||
if (b->infinity) {
|
||||
*r = *a;
|
||||
return;
|
||||
}
|
||||
if (a->infinity) {
|
||||
secp256k1_fe bzinv2, bzinv3;
|
||||
r->infinity = b->infinity;
|
||||
@@ -449,7 +460,10 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a,
|
||||
secp256k1_fe_set_int(&r->z, 1);
|
||||
return;
|
||||
}
|
||||
r->infinity = 0;
|
||||
if (b->infinity) {
|
||||
*r = *a;
|
||||
return;
|
||||
}
|
||||
|
||||
/** We need to calculate (rx,ry,rz) = (ax,ay,az) + (bx,by,1/bzinv). Due to
|
||||
* secp256k1's isomorphism we can multiply the Z coordinates on both sides
|
||||
@@ -467,7 +481,7 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a,
|
||||
s1 = a->y; secp256k1_fe_normalize_weak(&s1);
|
||||
secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &az);
|
||||
secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
|
||||
secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
|
||||
secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1);
|
||||
if (secp256k1_fe_normalizes_to_zero_var(&h)) {
|
||||
if (secp256k1_fe_normalizes_to_zero_var(&i)) {
|
||||
secp256k1_gej_double_var(r, a, NULL);
|
||||
@@ -476,14 +490,23 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a,
|
||||
}
|
||||
return;
|
||||
}
|
||||
secp256k1_fe_sqr(&i2, &i);
|
||||
|
||||
r->infinity = 0;
|
||||
secp256k1_fe_mul(&r->z, &a->z, &h);
|
||||
|
||||
secp256k1_fe_sqr(&h2, &h);
|
||||
secp256k1_fe_mul(&h3, &h, &h2);
|
||||
r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h);
|
||||
secp256k1_fe_negate(&h2, &h2, 1);
|
||||
secp256k1_fe_mul(&h3, &h2, &h);
|
||||
secp256k1_fe_mul(&t, &u1, &h2);
|
||||
r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
|
||||
secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
|
||||
secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
|
||||
|
||||
secp256k1_fe_sqr(&r->x, &i);
|
||||
secp256k1_fe_add(&r->x, &h3);
|
||||
secp256k1_fe_add(&r->x, &t);
|
||||
secp256k1_fe_add(&r->x, &t);
|
||||
|
||||
secp256k1_fe_add(&t, &r->x);
|
||||
secp256k1_fe_mul(&r->y, &t, &i);
|
||||
secp256k1_fe_mul(&h3, &h3, &s1);
|
||||
secp256k1_fe_add(&r->y, &h3);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user