diff --git a/Makefile b/Makefile index 2dc9184c..ea88584c 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ JAVA_FILES := src/java/org_bitcoin_NativeSecp256k1.h src/java/org_bitcoin_Native OBJS := ifeq ($(USE_ASM), 1) - OBJS := $(OBJS) obj/field_5x$(HAVE_LIMB)_asm.o + OBJS := $(OBJS) obj/field_5x52_asm.o endif STD="gnu99" @@ -20,9 +20,6 @@ clean: obj/field_5x52_asm.o: src/field_5x52_asm.asm $(YASM) -f elf64 -o obj/field_5x52_asm.o src/field_5x52_asm.asm -obj/field_5x64_asm.o: src/field_5x64_asm.asm - $(YASM) -f elf64 -o obj/field_5x64_asm.o src/field_5x64_asm.asm - obj/secp256k1.o: $(FILES) src/secp256k1.c include/secp256k1.h $(CC) -fPIC -std=$(STD) $(CFLAGS) $(CFLAGS_EXTRA) -DNDEBUG -$(OPTLEVEL) src/secp256k1.c -c -o obj/secp256k1.o diff --git a/bench_all b/bench_all deleted file mode 100644 index d9f411e5..00000000 --- a/bench_all +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -echo "Benchmark Results" >output.txt -for j in yasm; do - echo "5x64 $j:" >>output.txt - for i in O0 O1 O2 O3; do - make clean - ./configure --use-5x64 --$j - echo "OPTLEVEL=$i" >>config.mk - make bench - echo "OPTLEVEL=$i" >>output.txt - (time ./bench) |& grep real >>output.txt - done -done - diff --git a/configure b/configure index 8878561a..c3b95c22 100755 --- a/configure +++ b/configure @@ -97,9 +97,6 @@ if [ "$?" = 0 ]; then HAVE_INT128=1 fi -#default limb size -HAVE_LIMB=52 - for arg in "$@"; do case "$arg" in --no-yasm) @@ -110,9 +107,6 @@ for arg in "$@"; do ;; --no-openssl) HAVE_OPENSSL=0 - ;; - --use-5x64) - HAVE_LIMB=64 ;; --use-endomorphism) USE_ENDOMORPHISM=1 @@ -126,10 +120,10 @@ USE_ASM=0 # select field implementation if [ "$HAVE_YASM" = "1" ]; then - CFLAGS_FIELD="-DUSE_FIELD_5X$HAVE_LIMB -DUSE_FIELD_5X${HAVE_LIMB}_ASM" + CFLAGS_FIELD="-DUSE_FIELD_5X52 -DUSE_FIELD_5X52_ASM" USE_ASM=1 elif [ "$HAVE_INT128" = "1" ]; then - CFLAGS_FIELD="-DUSE_FIELD_5X$HAVE_LIMB -DUSE_FIELD_5X${HAVE_LIMB}_INT128" + CFLAGS_FIELD="-DUSE_FIELD_5X52 -DUSE_FIELD_5X52_INT128" elif [ "$HAVE_GMP" = "1" ]; then CFLAGS_FIELD="-DUSE_FIELD_GMP" LINK_GMP=1 @@ -176,5 +170,4 @@ echo "CFLAGS_TEST_EXTRA=$CFLAGS_TEST_EXTRA" >> config.mk echo "LDFLAGS_EXTRA=$LDFLAGS_EXTRA" >> config.mk echo "LDFLAGS_TEST_EXTRA=$LDFLAGS_TEST_EXTRA" >> config.mk echo "USE_ASM=$USE_ASM" >>config.mk -echo "HAVE_LIMB=$HAVE_LIMB" >>config.mk echo "OPTLEVEL=O2" >>config.mk diff --git a/src/field.h b/src/field.h index 4e0f2617..7d187d02 100644 --- a/src/field.h +++ b/src/field.h @@ -22,8 +22,6 @@ #include "field_10x26.h" #elif defined(USE_FIELD_5X52) #include "field_5x52.h" -#elif defined(USE_FIELD_5X64) -#include "field_5x64.h" #else #error "Please select field implementation" #endif diff --git a/src/field_5x64.h b/src/field_5x64.h deleted file mode 100644 index f3d47f54..00000000 --- a/src/field_5x64.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2013 Pieter Wuille -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef _SECP256K1_FIELD_REPR_ -#define _SECP256K1_FIELD_REPR_ - -#include - -typedef struct { - // X = sum(i=0..4, elem[i]*2^64) mod n - uint64_t n[5]; -#ifdef VERIFY - int reduced; // n[4] == 0 - int normalized; // reduced and X < 2^256 - 0x100003D1 -#endif -} secp256k1_fe_t; - -#endif diff --git a/src/field_5x64_asm.asm b/src/field_5x64_asm.asm deleted file mode 100644 index d449185c..00000000 --- a/src/field_5x64_asm.asm +++ /dev/null @@ -1,332 +0,0 @@ - ;; Added by Diederik Huys, March 2013 - ;; - ;; Provided public procedures: - ;; secp256k1_fe_mul_inner - ;; secp256k1_fe_sqr_inner - ;; - ;; Needed tools: YASM (http://yasm.tortall.net) - ;; - ;; - - BITS 64 - -COMP_LIMB EQU 000000001000003D1h - - ;; Procedure ExSetMult - ;; Register Layout: - ;; INPUT: rdi = a->n - ;; rsi = b->n - ;; rdx = r->a - ;; - ;; INTERNAL: rdx:rax = multiplication accumulator - ;; r8-r10 = c0-c2 - ;; r11-r15 = b.n[0]-b.n[4] / r3 - r7 - ;; rbx = r0 - ;; rcx = r1 - ;; rbp = r2 - ;; - GLOBAL secp256k1_fe_mul_inner - ALIGN 32 -secp256k1_fe_mul_inner: - push rbp - push rbx - push r12 - push r13 - push r14 - push r15 - push rdx - - mov r11,[rsi+8*0] ; preload b.n[0] - - ;; step 1: mul_c2 - mov rax,[rdi+0*8] ; load a.n[0] - mul r11 ; rdx:rax=a.n[0]*b.n[0] - mov r12,[rsi+1*8] ; preload b.n[1] - mov rbx,rax ; retire LO qword (r[0]) - mov r8,rdx ; save overflow - xor r9,r9 ; overflow HO qwords - xor r10,r10 - - ;; c+=a.n[0] * b.n[1] + a.n[1] * b.n[0] - mov rax,[rdi+0*8] - mul r12 - mov r13,[rsi+2*8] ; preload b.n[2] - add r8,rax ; still the same :-) - adc r9,rdx ; - adc r10,0 ; mmm... - - mov rax,[rdi+1*8] - mul r11 - add r8,rax - adc r9,rdx - adc r10,0 - mov rcx,r8 ; retire r[1] - xor r8,r8 - - ;; c+=a.n[0 1 2] * b.n[2 1 0] - mov rax,[rdi+0*8] - mul r13 - mov r14,[rsi+3*8] ; preload b.n[3] - add r9,rax - adc r10,rdx - adc r8,0 - - mov rax,[rdi+1*8] - mul r12 - add r9,rax - adc r10,rdx - adc r8,0 - - mov rax,[rdi+2*8] - mul r11 - add r9,rax - adc r10,rdx - adc r8,0 - mov rbp,r9 ; retire r[2] - xor r9,r9 - - ;; c+=a.n[0 1 2 3] * b.n[3 2 1 0] - mov rax,[rdi+0*8] - mul r14 - add r10,rax - adc r8,rdx - adc r9,0 - - mov rax,[rdi+1*8] - mul r13 - add r10,rax - adc r8,rdx - adc r9,0 - - mov rax,[rdi+2*8] - mul r12 - add r10,rax - adc r8,rdx - adc r9,0 - - mov rax,[rdi+3*8] - mul r11 - add r10,rax - adc r8,rdx - adc r9,0 - mov r11,r10 ; retire r[3] - xor r10,r10 - - ;; c+=a.n[1 2 3] * b.n[3 2 1] - mov rax,[rdi+1*8] - mul r14 - add r8,rax - adc r9,rdx - adc r10,0 - - mov rax,[rdi+2*8] - mul r13 - add r8,rax - adc r9,rdx - adc r10,0 - - mov rax,[rdi+3*8] - mul r12 - add r8,rax - adc r9,rdx - adc r10,0 - mov r12,r8 ; retire r[4] - xor r8,r8 - - ;; c+=a.n[2 3] * b.n[3 2] - mov rax,[rdi+2*8] - mul r14 - add r9,rax ; still the same :-) - adc r10,rdx ; - adc r8,0 ; mmm... - - mov rax,[rdi+3*8] - mul r13 - add r9,rax - adc r10,rdx - adc r8,0 - mov r13,r9 ; retire r[5] - xor r9,r9 - - ;; c+=a.n[3] * b.n[3] - mov rax,[rdi+3*8] - mul r14 - add r10,rax - adc r8,rdx - - mov r14,r10 - mov r15,r8 - - - ;; ******************************************************* -common_exit_norm: - mov rdi,COMP_LIMB - mov rax,r12 - mul rdi - add rax,rbx - adc rcx,rdx - pop rbx - mov [rbx],rax - - mov rax,r13 ; get r5 - mul rdi - add rax,rcx ; +r1 - adc rbp,rdx - mov [rbx+1*8],rax - - mov rax,r14 ; get r6 - mul rdi - add rax,rbp ; +r2 - adc r11,rdx - mov [rbx+2*8],rax - - mov rax,r15 ; get r7 - mul rdi - add rax,r11 ; +r3 - adc rdx,0 - mov [rbx+3*8],rax - mov [rbx+4*8],rdx - - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp - ret - - - ;; PROC ExSetSquare - ;; Register Layout: - ;; INPUT: rdi = a.n - ;; rsi = this.a - ;; INTERNAL: rdx:rax = multiplication accumulator - ;; r8-r10 = c - ;; r11-r15 = a.n[0]-a.n[4] / r3-r7 - ;; rbx = r0 - ;; rcx = r1 - ;; rbp = r2 - GLOBAL secp256k1_fe_sqr_inner - - ALIGN 32 -secp256k1_fe_sqr_inner: - push rbp - push rbx - push r12 - push r13 - push r14 - push r15 - push rsi - - mov r11,[rdi+8*0] ; preload a.n[0] - - ;; step 1: mul_c2 - mov rax,r11 ; load a.n[0] - mul rax ; rdx:rax=a.n[0]² - mov r12,[rdi+1*8] ; preload a.n[1] - mov rbx,rax ; retire LO qword (r[0]) - mov r8,rdx ; save overflow - xor r9,r9 ; overflow HO qwords - xor r10,r10 - - ;; c+=2*a.n[0] * a.n[1] - mov rax,r11 ; load a.n[0] - mul r12 ; rdx:rax=a.n[0] * a.n[1] - mov r13,[rdi+2*8] ; preload a.n[2] - add rax,rax ; rdx:rax*=2 - adc rdx,rdx - adc r10,0 - add r8,rax ; still the same :-) - adc r9,rdx - adc r10,0 ; mmm... - - mov rcx,r8 ; retire r[1] - xor r8,r8 - - ;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1] - mov rax,r11 ; load a.n[0] - mul r13 ; * a.n[2] - mov r14,[rdi+3*8] ; preload a.n[3] - add rax,rax ; rdx:rax*=2 - adc rdx,rdx - adc r8,0 - add r9,rax - adc r10,rdx - adc r8,0 - - mov rax,r12 - mul rax - add r9,rax - adc r10,rdx - adc r8,0 - - - mov rbp,r9 - xor r9,r9 - - ;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2] - mov rax,r11 ; load a.n[0] - mul r14 ; * a.n[3] - add rax,rax ; rdx:rax*=2 - adc rdx,rdx - adc r9,0 - add r10,rax - adc r8,rdx - adc r9,0 - - mov rax,r12 ; load a.n[1] - mul r13 ; * a.n[2] - add rax,rax - adc rdx,rdx - adc r9,0 - add r10,rax - adc r8,rdx - adc r9,0 - - mov r11,r10 - xor r10,r10 - - ;; c+=2*a.n[1]*a.n[3]+a.n[2]*a.n[2] - mov rax,r12 ; load a.n[1] - mul r14 ; * a.n[3] - add rax,rax ; rdx:rax*=2 - adc rdx,rdx - adc r10,0 - add r8,rax - adc r9,rdx - adc r10,0 - - mov rax,r13 - mul rax - add r8,rax - adc r9,rdx - adc r10,0 - - mov r12,r8 - xor r8,r8 - ;; c+=2*a.n[2]*a.n[3] - mov rax,r13 ; load a.n[2] - mul r14 ; * a.n[3] - add rax,rax ; rdx:rax*=2 - adc rdx,rdx - adc r8,0 - add r9,rax - adc r10,rdx - adc r8,0 - - mov r13,r9 - xor r9,r9 - - ;; c+=a.n[3]² - mov rax,r14 - mul rax - add r10,rax - adc r8,rdx - - mov r14,r10 - mov r15,r8 - - jmp common_exit_norm - end - - diff --git a/src/impl/field.h b/src/impl/field.h index edb2acad..c43188ad 100644 --- a/src/impl/field.h +++ b/src/impl/field.h @@ -11,8 +11,6 @@ #include "field_10x26.h" #elif defined(USE_FIELD_5X52) #include "field_5x52.h" -#elif defined(USE_FIELD_5X64) -#include "field_5x64.h" #else #error "Please select field implementation" #endif diff --git a/src/impl/field_5x64.h b/src/impl/field_5x64.h deleted file mode 100644 index 1e645cdd..00000000 --- a/src/impl/field_5x64.h +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright (c) 2013 Pieter Wuille -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef _SECP256K1_FIELD_REPR_IMPL_H_ -#define _SECP256K1_FIELD_REPR_IMPL_H_ - -#include -#include -#include "../num.h" -#include "../field.h" - -#include -#include "field_5x64_asm.h" - -/** Implements arithmetic modulo FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F, - * represented as 4 uint64_t's in base 2^64, and one overflow uint64_t. - */ - -#define FULL_LIMB (0xFFFFFFFFFFFFFFFFULL) -#define LAST_LIMB (0xFFFFFFFEFFFFFC2FULL) -#define COMP_LIMB (0x00000001000003D1ULL) - -void static secp256k1_fe_inner_start(void) {} -void static secp256k1_fe_inner_stop(void) {} - -void static secp256k1_fe_reduce(secp256k1_fe_t *r) { - unsigned __int128 c = (unsigned __int128)r->n[4] * COMP_LIMB + r->n[0]; - uint64_t n0 = c; - c = (c >> 64) + r->n[1]; - uint64_t n1 = c; - c = (c >> 64) + r->n[2]; - r->n[2] = c; - c = (c >> 64) + r->n[3]; - r->n[3] = c; - c = (c >> 64) * COMP_LIMB + n0; - r->n[0] = c; - r->n[1] = n1 + (c >> 64); - assert(r->n[1] >= n1); - r->n[4] = 0; -#ifdef VERIFY - r->reduced = 1; -#endif -} - -void static secp256k1_fe_normalize(secp256k1_fe_t *r) { - secp256k1_fe_reduce(r); - - // Subtract p if result >= p - uint64_t mask = -(int64_t)((r->n[0] < LAST_LIMB) | (r->n[1] != ~0ULL) | (r->n[2] != ~0ULL) | (r->n[3] != ~0ULL)); - r->n[0] -= (~mask & LAST_LIMB); - r->n[1] &= mask; - r->n[2] &= mask; - r->n[3] &= mask; - assert(r->n[4] == 0); - -#ifdef VERIFY - r->normalized = 1; -#endif -} - -void static inline secp256k1_fe_set_int(secp256k1_fe_t *r, int a) { - r->n[0] = a; - r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; - -#ifdef VERIFY - r->reduced = 1; - r->normalized = 1; -#endif -} - -// TODO: not constant time! -int static inline secp256k1_fe_is_zero(const secp256k1_fe_t *a) { -#ifdef VERIFY - assert(a->normalized); -#endif - return (a->n[0] == 0 && a->n[1] == 0 && a->n[2] == 0 && a->n[3] == 0); -} - -int static inline secp256k1_fe_is_odd(const secp256k1_fe_t *a) { -#ifdef VERIFY - assert(a->normalized); -#endif - return a->n[0] & 1; -} - -// TODO: not constant time! -int static inline secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { -#ifdef VERIFY - assert(a->normalized); - assert(b->normalized); -#endif - return (a->n[0] == b->n[0] && a->n[1] == b->n[1] && a->n[2] == b->n[2] && a->n[3] == b->n[3]); -} - -void static secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) { - r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; - for (int i=0; i<32; i++) { - r->n[i/8] |= (uint64_t)a[31-i] << (i&7)*8; - } -#ifdef VERIFY - r->reduced = 1; - r->normalized = 0; -#endif -} - -/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ -void static secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) { -#ifdef VERIFY - assert(a->normalized); -#endif - for (int i=0; i<32; i++) { - r[31-i] = a->n[i/8] >> ((i&7)*8); - } -} - -void static inline secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *ac, int m) { - secp256k1_fe_t a = *ac; - secp256k1_fe_reduce(&a); - unsigned __int128 c = (unsigned __int128)(~a.n[0]) + LAST_LIMB + 1; - r->n[0] = c; - c = (c >> 64) + (~a.n[1]) + FULL_LIMB; - r->n[1] = c; - c = (c >> 64) + (~a.n[2]) + FULL_LIMB; - r->n[2] = c; - c = (c >> 64) + (~a.n[3]) + FULL_LIMB; - r->n[3] = c; - r->n[4] = 0; -#ifdef VERIFY - r->reduced = 1; - r->normalized = 0; -#endif -} - -void static inline secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) { -#ifdef VERIFY - r->reduced = 0; - r->normalized = 0; -#endif - unsigned __int128 c = (unsigned __int128)r->n[0] * a; - r->n[0] = c; - c = (c >> 64) + (unsigned __int128)r->n[1] * a; - r->n[1] = c; - c = (c >> 64) + (unsigned __int128)r->n[2] * a; - r->n[2] = c; - c = (c >> 64) + (unsigned __int128)r->n[3] * a; - r->n[3] = c; - c = (c >> 64) + (unsigned __int128)r->n[4] * a; - r->n[4] = c; -} - -void static inline secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) { -#ifdef VERIFY - r->reduced = 0; - r->normalized = 0; -#endif - unsigned __int128 c = (unsigned __int128)r->n[0] + a->n[0]; - r->n[0] = c; - c = (unsigned __int128)r->n[1] + a->n[1] + (c >> 64); - r->n[1] = c; - c = (unsigned __int128)r->n[2] + a->n[2] + (c >> 64); - r->n[2] = c; - c = (unsigned __int128)r->n[3] + a->n[3] + (c >> 64); - r->n[3] = c; - c = (unsigned __int128)r->n[4] + a->n[4] + (c >> 64); - r->n[4] = c; - assert((c >> 64) == 0); -} - -#if 0 -#define muladd_c3(a,b,c0,c1,c2) { \ - unsigned __int128 q1 = ((unsigned __int128)(a)) * (b) + (c0); \ - (c0) = q1; \ - unsigned __int128 q2 = (q1 >> 64) + (c1) + (((unsigned __int128)(c2)) << 64); \ - (c1) = q2; \ - (c2) = q2 >> 64; \ -} - -#define sqradd_c3(a,c0,c1,c2) muladd_c3(a,a,c0,c1,c2) - -/*#define muladd_c3(a,b,c0,c1,c2) { \ - unsigned __int128 q = (unsigned __int128)(a) * (b) + (c0); \ - (c0) = q; \ - (c1) += (q >> 64); \ - (c2) += ((c1) < (q >> 64))?1:0; \ -}*/ - -#define muladd2_c3(a,b,c0,c1,c2) { \ - unsigned __int128 q = (unsigned __int128)(a) * (b); \ - uint64_t t1 = (q >> 64); \ - uint64_t t0 = q; \ - uint64_t t2 = t1+t1; (c2) += (t2> 64); \ -} - -void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *ac, const secp256k1_fe_t *bc) { - - secp256k1_fe_t a = *ac, b = *bc; - secp256k1_fe_reduce(&a); - secp256k1_fe_reduce(&b); - -#ifdef USE_FIELD_5X64_ASM - secp256k1_fe_mul_inner((&a)->n,(&b)->n,r->n); -#else - uint64_t c1,c2,c3; - c3=0; - mul_c2(a.n[0], b.n[0], c1, c2); - uint64_t r0 = c1; c1 = 0; - muladd_c3(a.n[0], b.n[1], c2, c3, c1); - muladd_c3(a.n[1], b.n[0], c2, c3, c1); - uint64_t r1 = c2; c2 = 0; - muladd_c3(a.n[2], b.n[0], c3, c1, c2); - muladd_c3(a.n[1], b.n[1], c3, c1, c2); - muladd_c3(a.n[0], b.n[2], c3, c1, c2); - uint64_t r2 = c3; c3 = 0; - muladd_c3(a.n[0], b.n[3], c1, c2, c3); - muladd_c3(a.n[1], b.n[2], c1, c2, c3); - muladd_c3(a.n[2], b.n[1], c1, c2, c3); - muladd_c3(a.n[3], b.n[0], c1, c2, c3); - uint64_t r3 = c1; c1 = 0; - muladd_c3(a.n[3], b.n[1], c2, c3, c1); - muladd_c3(a.n[2], b.n[2], c2, c3, c1); - muladd_c3(a.n[1], b.n[3], c2, c3, c1); - uint64_t r4 = c2; c2 = 0; - muladd_c3(a.n[2], b.n[3], c3, c1, c2); - muladd_c3(a.n[3], b.n[2], c3, c1, c2); - uint64_t r5 = c3; c3 = 0; - muladd_c3(a.n[3], b.n[3], c1, c2, c3); - uint64_t r6 = c1; - uint64_t r7 = c2; - assert(c3 == 0); - unsigned __int128 c = (unsigned __int128)r4 * COMP_LIMB + r0; - r->n[0] = c; - c = (unsigned __int128)r5 * COMP_LIMB + r1 + (c >> 64); - r->n[1] = c; - c = (unsigned __int128)r6 * COMP_LIMB + r2 + (c >> 64); - r->n[2] = c; - c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64); - r->n[3] = c; - r->n[4] = c >> 64; -#endif - -#ifdef VERIFY - r->normalized = 0; - r->reduced = 0; -#endif - secp256k1_fe_reduce(r); -} - -/*void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { - secp256k1_fe_mul(r, a, a); -}*/ - -void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) { - secp256k1_fe_t a = *ac; - secp256k1_fe_reduce(&a); - -#ifdef USE_FIELD_5X64_ASM - secp256k1_fe_sqr_inner((&a)->n,r->n); -#else - uint64_t c1,c2,c3; - c3=0; - mul_c2(a.n[0], a.n[0], c1, c2); - uint64_t r0 = c1; c1 = 0; - muladd2_c3(a.n[0], a.n[1], c2, c3, c1); - uint64_t r1 = c2; c2 = 0; - muladd2_c3(a.n[2], a.n[0], c3, c1, c2); - sqradd_c3(a.n[1], c3, c1, c2); - uint64_t r2 = c3; c3 = 0; - muladd2_c3(a.n[0], a.n[3], c1, c2, c3); - muladd2_c3(a.n[1], a.n[2], c1, c2, c3); - uint64_t r3 = c1; c1 = 0; - muladd2_c3(a.n[3], a.n[1], c2, c3, c1); - sqradd_c3(a.n[2], c2, c3, c1); - uint64_t r4 = c2; c2 = 0; - muladd2_c3(a.n[2], a.n[3], c3, c1, c2); - uint64_t r5 = c3; c3 = 0; - sqradd_c3(a.n[3], c1, c2, c3); - uint64_t r6 = c1; - uint64_t r7 = c2; - assert(c3 == 0); - unsigned __int128 c = (unsigned __int128)r4 * COMP_LIMB + r0; - r->n[0] = c; - c = (unsigned __int128)r5 * COMP_LIMB + r1 + (c >> 64); - r->n[1] = c; - c = (unsigned __int128)r6 * COMP_LIMB + r2 + (c >> 64); - r->n[2] = c; - c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64); - r->n[3] = c; - r->n[4] = c >> 64; -#endif - -#ifdef VERIFY - r->normalized = 0; - r->reduced = 0; -#endif - secp256k1_fe_reduce(r); -} - -#endif diff --git a/src/impl/field_5x64_asm.h b/src/impl/field_5x64_asm.h deleted file mode 100644 index 93c6ab6b..00000000 --- a/src/impl/field_5x64_asm.h +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) 2013 Pieter Wuille -// Distributed under the MIT/X11 software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_ -#define _SECP256K1_FIELD_INNER5X52_IMPL_H_ - -void __attribute__ ((sysv_abi)) secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r); -void __attribute__ ((sysv_abi)) secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r); - -#endif