Merge #797: Fix Jacobi benchmarks and other benchmark improvements

cb5524adc589d3ae5066a1aa2f818bbfb91d0b1d Add benchmark for secp256k1_ge_set_gej_var (Pieter Wuille)
5c6af60ec5f1f4bc7883737ba34dd1789f1e9bd8 Make jacobi benchmarks vary inputs (Pieter Wuille)
d0fdd5f00969861ebe3e48d39be6d5f706b9b17c Randomize the Z coordinates in bench_internal (Pieter Wuille)
c7a3424c5f45a538ef141402a653b038e050a1ac Rename bench_internal variables (Pieter Wuille)

Pull request description:

ACKs for top commit:
  real-or-random:
    ACK cb5524adc589d3ae5066a1aa2f818bbfb91d0b1d
  jonasnick:
    ACK cb5524adc589d3ae5066a1aa2f818bbfb91d0b1d

Tree-SHA512: 0cbcfffebebf563cf9a1bd951394a0419503ffd43a2d0df4c99e4a839c89c8454925314f7e7eee0c01bce94b6dfeab935f36cc27f9bfc878f702313d455db7e1
This commit is contained in:
Jonas Nick 2020-09-10 11:37:53 +00:00
commit f3733c5433
No known key found for this signature in database
GPG Key ID: 4861DBF262123605

View File

@ -20,10 +20,10 @@
#include "secp256k1.c" #include "secp256k1.c"
typedef struct { typedef struct {
secp256k1_scalar scalar_x, scalar_y; secp256k1_scalar scalar[2];
secp256k1_fe fe_x, fe_y; secp256k1_fe fe[4];
secp256k1_ge ge_x, ge_y; secp256k1_ge ge[2];
secp256k1_gej gej_x, gej_y; secp256k1_gej gej[2];
unsigned char data[64]; unsigned char data[64];
int wnaf[256]; int wnaf[256];
} bench_inv; } bench_inv;
@ -31,30 +31,53 @@ typedef struct {
void bench_setup(void* arg) { void bench_setup(void* arg) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
static const unsigned char init_x[32] = { static const unsigned char init[4][32] = {
0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13, /* Initializer for scalar[0], fe[0], first half of data, the X coordinate of ge[0],
0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35, and the (implied affine) X coordinate of gej[0]. */
0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59, {
0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83 0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
},
/* Initializer for scalar[1], fe[1], first half of data, the X coordinate of ge[1],
and the (implied affine) X coordinate of gej[1]. */
{
0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83,
0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5,
0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9,
0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3
},
/* Initializer for fe[2] and the Z coordinate of gej[0]. */
{
0x3d, 0x2d, 0xef, 0xf4, 0x25, 0x98, 0x4f, 0x5d,
0xe2, 0xca, 0x5f, 0x41, 0x3f, 0x3f, 0xce, 0x44,
0xaa, 0x2c, 0x53, 0x8a, 0xc6, 0x59, 0x1f, 0x38,
0x38, 0x23, 0xe4, 0x11, 0x27, 0xc6, 0xa0, 0xe7
},
/* Initializer for fe[3] and the Z coordinate of gej[1]. */
{
0xbd, 0x21, 0xa5, 0xe1, 0x13, 0x50, 0x73, 0x2e,
0x52, 0x98, 0xc8, 0x9e, 0xab, 0x00, 0xa2, 0x68,
0x43, 0xf5, 0xd7, 0x49, 0x80, 0x72, 0xa7, 0xf3,
0xd7, 0x60, 0xe6, 0xab, 0x90, 0x92, 0xdf, 0xc5
}
}; };
static const unsigned char init_y[32] = { secp256k1_scalar_set_b32(&data->scalar[0], init[0], NULL);
0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83, secp256k1_scalar_set_b32(&data->scalar[1], init[1], NULL);
0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5, secp256k1_fe_set_b32(&data->fe[0], init[0]);
0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9, secp256k1_fe_set_b32(&data->fe[1], init[1]);
0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3 secp256k1_fe_set_b32(&data->fe[2], init[2]);
}; secp256k1_fe_set_b32(&data->fe[3], init[3]);
CHECK(secp256k1_ge_set_xo_var(&data->ge[0], &data->fe[0], 0));
secp256k1_scalar_set_b32(&data->scalar_x, init_x, NULL); CHECK(secp256k1_ge_set_xo_var(&data->ge[1], &data->fe[1], 1));
secp256k1_scalar_set_b32(&data->scalar_y, init_y, NULL); secp256k1_gej_set_ge(&data->gej[0], &data->ge[0]);
secp256k1_fe_set_b32(&data->fe_x, init_x); secp256k1_gej_rescale(&data->gej[0], &data->fe[2]);
secp256k1_fe_set_b32(&data->fe_y, init_y); secp256k1_gej_set_ge(&data->gej[1], &data->ge[1]);
CHECK(secp256k1_ge_set_xo_var(&data->ge_x, &data->fe_x, 0)); secp256k1_gej_rescale(&data->gej[1], &data->fe[3]);
CHECK(secp256k1_ge_set_xo_var(&data->ge_y, &data->fe_y, 1)); memcpy(data->data, init[0], 32);
secp256k1_gej_set_ge(&data->gej_x, &data->ge_x); memcpy(data->data + 32, init[1], 32);
secp256k1_gej_set_ge(&data->gej_y, &data->ge_y);
memcpy(data->data, init_x, 32);
memcpy(data->data + 32, init_y, 32);
} }
void bench_scalar_add(void* arg, int iters) { void bench_scalar_add(void* arg, int iters) {
@ -62,7 +85,7 @@ void bench_scalar_add(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
} }
CHECK(j <= iters); CHECK(j <= iters);
} }
@ -72,7 +95,7 @@ void bench_scalar_negate(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x); secp256k1_scalar_negate(&data->scalar[0], &data->scalar[0]);
} }
} }
@ -81,7 +104,7 @@ void bench_scalar_sqr(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x); secp256k1_scalar_sqr(&data->scalar[0], &data->scalar[0]);
} }
} }
@ -90,7 +113,7 @@ void bench_scalar_mul(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y); secp256k1_scalar_mul(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
} }
} }
@ -100,8 +123,8 @@ void bench_scalar_split(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_scalar_split_lambda(&data->scalar_x, &data->scalar_y, &data->scalar_x); secp256k1_scalar_split_lambda(&data->scalar[0], &data->scalar[1], &data->scalar[0]);
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
} }
CHECK(j <= iters); CHECK(j <= iters);
} }
@ -112,8 +135,8 @@ void bench_scalar_inverse(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x); secp256k1_scalar_inverse(&data->scalar[0], &data->scalar[0]);
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
} }
CHECK(j <= iters); CHECK(j <= iters);
} }
@ -123,8 +146,8 @@ void bench_scalar_inverse_var(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x); secp256k1_scalar_inverse_var(&data->scalar[0], &data->scalar[0]);
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
} }
CHECK(j <= iters); CHECK(j <= iters);
} }
@ -134,7 +157,7 @@ void bench_field_normalize(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_fe_normalize(&data->fe_x); secp256k1_fe_normalize(&data->fe[0]);
} }
} }
@ -143,7 +166,7 @@ void bench_field_normalize_weak(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_fe_normalize_weak(&data->fe_x); secp256k1_fe_normalize_weak(&data->fe[0]);
} }
} }
@ -152,7 +175,7 @@ void bench_field_mul(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y); secp256k1_fe_mul(&data->fe[0], &data->fe[0], &data->fe[1]);
} }
} }
@ -161,7 +184,7 @@ void bench_field_sqr(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_fe_sqr(&data->fe_x, &data->fe_x); secp256k1_fe_sqr(&data->fe[0], &data->fe[0]);
} }
} }
@ -170,8 +193,8 @@ void bench_field_inverse(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_fe_inv(&data->fe_x, &data->fe_x); secp256k1_fe_inv(&data->fe[0], &data->fe[0]);
secp256k1_fe_add(&data->fe_x, &data->fe_y); secp256k1_fe_add(&data->fe[0], &data->fe[1]);
} }
} }
@ -180,8 +203,8 @@ void bench_field_inverse_var(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_fe_inv_var(&data->fe_x, &data->fe_x); secp256k1_fe_inv_var(&data->fe[0], &data->fe[0]);
secp256k1_fe_add(&data->fe_x, &data->fe_y); secp256k1_fe_add(&data->fe[0], &data->fe[1]);
} }
} }
@ -191,9 +214,9 @@ void bench_field_sqrt(void* arg, int iters) {
secp256k1_fe t; secp256k1_fe t;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
t = data->fe_x; t = data->fe[0];
j += secp256k1_fe_sqrt(&data->fe_x, &t); j += secp256k1_fe_sqrt(&data->fe[0], &t);
secp256k1_fe_add(&data->fe_x, &data->fe_y); secp256k1_fe_add(&data->fe[0], &data->fe[1]);
} }
CHECK(j <= iters); CHECK(j <= iters);
} }
@ -203,7 +226,7 @@ void bench_group_double_var(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL); secp256k1_gej_double_var(&data->gej[0], &data->gej[0], NULL);
} }
} }
@ -212,7 +235,7 @@ void bench_group_add_var(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL); secp256k1_gej_add_var(&data->gej[0], &data->gej[0], &data->gej[1], NULL);
} }
} }
@ -221,7 +244,7 @@ void bench_group_add_affine(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y); secp256k1_gej_add_ge(&data->gej[0], &data->gej[0], &data->ge[1]);
} }
} }
@ -230,7 +253,7 @@ void bench_group_add_affine_var(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL); secp256k1_gej_add_ge_var(&data->gej[0], &data->gej[0], &data->ge[1], NULL);
} }
} }
@ -239,9 +262,37 @@ void bench_group_jacobi_var(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
j += secp256k1_gej_has_quad_y_var(&data->gej_x); j += secp256k1_gej_has_quad_y_var(&data->gej[0]);
/* Vary the Y and Z coordinates of the input (the X coordinate doesn't matter to
secp256k1_gej_has_quad_y_var). Note that the resulting coordinates will
generally not correspond to a point on the curve, but this is not a problem
for the code being benchmarked here. Adding and normalizing have less
overhead than EC operations (which could guarantee the point remains on the
curve). */
secp256k1_fe_add(&data->gej[0].y, &data->fe[1]);
secp256k1_fe_add(&data->gej[0].z, &data->fe[2]);
secp256k1_fe_normalize_var(&data->gej[0].y);
secp256k1_fe_normalize_var(&data->gej[0].z);
}
CHECK(j <= iters);
}
void bench_group_to_affine_var(void* arg, int iters) {
int i;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; ++i) {
secp256k1_ge_set_gej_var(&data->ge[1], &data->gej[0]);
/* Use the output affine X/Y coordinates to vary the input X/Y/Z coordinates.
Similar to bench_group_jacobi_var, this approach does not result in
coordinates of points on the curve. */
secp256k1_fe_add(&data->gej[0].x, &data->ge[1].y);
secp256k1_fe_add(&data->gej[0].y, &data->fe[2]);
secp256k1_fe_add(&data->gej[0].z, &data->ge[1].x);
secp256k1_fe_normalize_var(&data->gej[0].x);
secp256k1_fe_normalize_var(&data->gej[0].y);
secp256k1_fe_normalize_var(&data->gej[0].z);
} }
CHECK(j == iters);
} }
void bench_ecmult_wnaf(void* arg, int iters) { void bench_ecmult_wnaf(void* arg, int iters) {
@ -249,8 +300,8 @@ void bench_ecmult_wnaf(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A); bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar[0], WINDOW_A);
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
} }
CHECK(overflow >= 0); CHECK(overflow >= 0);
CHECK(bits <= 256*iters); CHECK(bits <= 256*iters);
@ -261,8 +312,8 @@ void bench_wnaf_const(void* arg, int iters) {
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256); bits += secp256k1_wnaf_const(data->wnaf, &data->scalar[0], WINDOW_A, 256);
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
} }
CHECK(overflow >= 0); CHECK(overflow >= 0);
CHECK(bits <= 256*iters); CHECK(bits <= 256*iters);
@ -324,14 +375,15 @@ void bench_context_sign(void* arg, int iters) {
void bench_num_jacobi(void* arg, int iters) { void bench_num_jacobi(void* arg, int iters) {
int i, j = 0; int i, j = 0;
bench_inv *data = (bench_inv*)arg; bench_inv *data = (bench_inv*)arg;
secp256k1_num nx, norder; secp256k1_num nx, na, norder;
secp256k1_scalar_get_num(&nx, &data->scalar_x); secp256k1_scalar_get_num(&nx, &data->scalar[0]);
secp256k1_scalar_order_get_num(&norder); secp256k1_scalar_order_get_num(&norder);
secp256k1_scalar_get_num(&norder, &data->scalar_y); secp256k1_scalar_get_num(&na, &data->scalar[1]);
for (i = 0; i < iters; i++) { for (i = 0; i < iters; i++) {
j += secp256k1_num_jacobi(&nx, &norder); j += secp256k1_num_jacobi(&nx, &norder);
secp256k1_num_add(&nx, &nx, &na);
} }
CHECK(j <= iters); CHECK(j <= iters);
} }
@ -364,6 +416,7 @@ int main(int argc, char **argv) {
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10); if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10);
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10); if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10);
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "jacobi")) run_benchmark("group_jacobi_var", bench_group_jacobi_var, bench_setup, NULL, &data, 10, iters); if (have_flag(argc, argv, "group") || have_flag(argc, argv, "jacobi")) run_benchmark("group_jacobi_var", bench_group_jacobi_var, bench_setup, NULL, &data, 10, iters);
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters); if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters);
if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters); if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters);