Merge bitcoin-core/secp256k1#1446: field: Remove x86_64 asm
f07cead0ca96e26356466b635ce6e7fe3834c949 build: Don't call assembly an optimization (Tim Ruffing) 2f0762fa8fd30b457bc5dcf53403123212091df5 field: Remove x86_64 asm (Tim Ruffing) Pull request description: ACKs for top commit: sipa: utACK f07cead0ca96e26356466b635ce6e7fe3834c949 theStack: ACK f07cead0ca96e26356466b635ce6e7fe3834c949 jonasnick: ACK f07cead0ca96e26356466b635ce6e7fe3834c949 Tree-SHA512: df7f895ab8ab924c5f8f01c35d0cd2f65d5c947c5ab5325787d169c5b202834ab8aa5d85dedb25839fff3f518097fe8cf8e837d3c1918e5f039ddd6ddf4187da
This commit is contained in:
commit
10e6d29b60
@ -107,7 +107,7 @@ if(SECP256K1_TEST_OVERRIDE_WIDE_MULTIPLY)
|
||||
endif()
|
||||
mark_as_advanced(FORCE SECP256K1_TEST_OVERRIDE_WIDE_MULTIPLY)
|
||||
|
||||
set(SECP256K1_ASM "AUTO" CACHE STRING "Assembly optimizations to use: \"AUTO\", \"OFF\", \"x86_64\" or \"arm32\" (experimental). [default=AUTO]")
|
||||
set(SECP256K1_ASM "AUTO" CACHE STRING "Assembly to use: \"AUTO\", \"OFF\", \"x86_64\" or \"arm32\" (experimental). [default=AUTO]")
|
||||
set_property(CACHE SECP256K1_ASM PROPERTY STRINGS "AUTO" "OFF" "x86_64" "arm32")
|
||||
check_string_option_value(SECP256K1_ASM)
|
||||
if(SECP256K1_ASM STREQUAL "arm32")
|
||||
@ -117,7 +117,7 @@ if(SECP256K1_ASM STREQUAL "arm32")
|
||||
if(HAVE_ARM32_ASM)
|
||||
add_compile_definitions(USE_EXTERNAL_ASM=1)
|
||||
else()
|
||||
message(FATAL_ERROR "ARM32 assembly optimization requested but not available.")
|
||||
message(FATAL_ERROR "ARM32 assembly requested but not available.")
|
||||
endif()
|
||||
elseif(SECP256K1_ASM)
|
||||
include(CheckX86_64Assembly)
|
||||
@ -128,14 +128,14 @@ elseif(SECP256K1_ASM)
|
||||
elseif(SECP256K1_ASM STREQUAL "AUTO")
|
||||
set(SECP256K1_ASM "OFF")
|
||||
else()
|
||||
message(FATAL_ERROR "x86_64 assembly optimization requested but not available.")
|
||||
message(FATAL_ERROR "x86_64 assembly requested but not available.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(SECP256K1_EXPERIMENTAL "Allow experimental configuration options." OFF)
|
||||
if(NOT SECP256K1_EXPERIMENTAL)
|
||||
if(SECP256K1_ASM STREQUAL "arm32")
|
||||
message(FATAL_ERROR "ARM32 assembly optimization is experimental. Use -DSECP256K1_EXPERIMENTAL=ON to allow.")
|
||||
message(FATAL_ERROR "ARM32 assembly is experimental. Use -DSECP256K1_EXPERIMENTAL=ON to allow.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -280,7 +280,7 @@ message("Parameters:")
|
||||
message(" ecmult window size .................. ${SECP256K1_ECMULT_WINDOW_SIZE}")
|
||||
message(" ecmult gen precision bits ........... ${SECP256K1_ECMULT_GEN_PREC_BITS}")
|
||||
message("Optional features:")
|
||||
message(" assembly optimization ............... ${SECP256K1_ASM}")
|
||||
message(" assembly ............................ ${SECP256K1_ASM}")
|
||||
message(" external callbacks .................. ${SECP256K1_USE_EXTERNAL_DEFAULT_CALLBACKS}")
|
||||
if(SECP256K1_TEST_OVERRIDE_WIDE_MULTIPLY)
|
||||
message(" wide multiplication (test-only) ..... ${SECP256K1_TEST_OVERRIDE_WIDE_MULTIPLY}")
|
||||
|
@ -37,7 +37,6 @@ noinst_HEADERS += src/field_10x26_impl.h
|
||||
noinst_HEADERS += src/field_5x52.h
|
||||
noinst_HEADERS += src/field_5x52_impl.h
|
||||
noinst_HEADERS += src/field_5x52_int128_impl.h
|
||||
noinst_HEADERS += src/field_5x52_asm_impl.h
|
||||
noinst_HEADERS += src/modinv32.h
|
||||
noinst_HEADERS += src/modinv32_impl.h
|
||||
noinst_HEADERS += src/modinv64.h
|
||||
|
@ -33,7 +33,7 @@ Implementation details
|
||||
* Expose only higher level interfaces to minimize the API surface and improve application security. ("Be difficult to use insecurely.")
|
||||
* Field operations
|
||||
* Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1).
|
||||
* Using 5 52-bit limbs (including hand-optimized assembly for x86_64, by Diederik Huys).
|
||||
* Using 5 52-bit limbs
|
||||
* Using 10 26-bit limbs (including hand-optimized assembly for 32-bit ARM, by Wladimir J. van der Laan).
|
||||
* This is an experimental feature that has not received enough scrutiny to satisfy the standard of quality of this library but is made available for testing and review by the community.
|
||||
* Scalar operations
|
||||
|
14
configure.ac
14
configure.ac
@ -201,7 +201,7 @@ AC_ARG_ENABLE(external_default_callbacks,
|
||||
AC_ARG_WITH([test-override-wide-multiply], [] ,[set_widemul=$withval], [set_widemul=auto])
|
||||
|
||||
AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm32|no|auto],
|
||||
[assembly optimizations to use (experimental: arm32) [default=auto]])],[req_asm=$withval], [req_asm=auto])
|
||||
[assembly to use (experimental: arm32) [default=auto]])],[req_asm=$withval], [req_asm=auto])
|
||||
|
||||
AC_ARG_WITH([ecmult-window], [AS_HELP_STRING([--with-ecmult-window=SIZE|auto],
|
||||
[window size for ecmult precomputation for verification, specified as integer in range [2..24].]
|
||||
@ -279,24 +279,24 @@ else
|
||||
x86_64)
|
||||
SECP_X86_64_ASM_CHECK
|
||||
if test x"$has_x86_64_asm" != x"yes"; then
|
||||
AC_MSG_ERROR([x86_64 assembly optimization requested but not available])
|
||||
AC_MSG_ERROR([x86_64 assembly requested but not available])
|
||||
fi
|
||||
;;
|
||||
arm32)
|
||||
SECP_ARM32_ASM_CHECK
|
||||
if test x"$has_arm32_asm" != x"yes"; then
|
||||
AC_MSG_ERROR([ARM32 assembly optimization requested but not available])
|
||||
AC_MSG_ERROR([ARM32 assembly requested but not available])
|
||||
fi
|
||||
;;
|
||||
no)
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid assembly optimization selection])
|
||||
AC_MSG_ERROR([invalid assembly selection])
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Select assembly optimization
|
||||
# Select assembly
|
||||
enable_external_asm=no
|
||||
|
||||
case $set_asm in
|
||||
@ -309,7 +309,7 @@ arm32)
|
||||
no)
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([invalid assembly optimizations])
|
||||
AC_MSG_ERROR([invalid assembly selection])
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -425,7 +425,7 @@ if test x"$enable_experimental" = x"yes"; then
|
||||
AC_MSG_NOTICE([******])
|
||||
else
|
||||
if test x"$set_asm" = x"arm32"; then
|
||||
AC_MSG_ERROR([ARM32 assembly optimization is experimental. Use --enable-experimental to allow.])
|
||||
AC_MSG_ERROR([ARM32 assembly is experimental. Use --enable-experimental to allow.])
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -1,504 +0,0 @@
|
||||
/***********************************************************************
|
||||
* Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille *
|
||||
* Distributed under the MIT software license, see the accompanying *
|
||||
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
|
||||
***********************************************************************/
|
||||
|
||||
/**
|
||||
* Changelog:
|
||||
* - March 2013, Diederik Huys: original version
|
||||
* - November 2014, Pieter Wuille: updated to use Peter Dettman's parallel multiplication algorithm
|
||||
* - December 2014, Pieter Wuille: converted from YASM to GCC inline assembly
|
||||
*/
|
||||
|
||||
#ifndef SECP256K1_FIELD_INNER5X52_IMPL_H
|
||||
#define SECP256K1_FIELD_INNER5X52_IMPL_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
|
||||
/**
|
||||
* Registers: rdx:rax = multiplication accumulator
|
||||
* r9:r8 = c
|
||||
* r15:rcx = d
|
||||
* r10-r14 = a0-a4
|
||||
* rbx = b
|
||||
* rdi = r
|
||||
* rsi = a / t?
|
||||
*/
|
||||
uint64_t tmp1, tmp2, tmp3;
|
||||
__asm__ __volatile__(
|
||||
"movq 0(%%rsi),%%r10\n"
|
||||
"movq 8(%%rsi),%%r11\n"
|
||||
"movq 16(%%rsi),%%r12\n"
|
||||
"movq 24(%%rsi),%%r13\n"
|
||||
"movq 32(%%rsi),%%r14\n"
|
||||
|
||||
/* d += a3 * b0 */
|
||||
"movq 0(%%rbx),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"movq %%rax,%%rcx\n"
|
||||
"movq %%rdx,%%r15\n"
|
||||
/* d += a2 * b1 */
|
||||
"movq 8(%%rbx),%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a1 * b2 */
|
||||
"movq 16(%%rbx),%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d = a0 * b3 */
|
||||
"movq 24(%%rbx),%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* c = a4 * b4 */
|
||||
"movq 32(%%rbx),%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"movq %%rax,%%r8\n"
|
||||
"movq %%rdx,%%r9\n"
|
||||
/* d += (c & M) * R */
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* t3 (tmp1) = d & M */
|
||||
"movq %%rcx,%%rsi\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rsi\n"
|
||||
"movq %%rsi,%q1\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
"xorq %%r15,%%r15\n"
|
||||
/* d += a4 * b0 */
|
||||
"movq 0(%%rbx),%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a3 * b1 */
|
||||
"movq 8(%%rbx),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a2 * b2 */
|
||||
"movq 16(%%rbx),%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a1 * b3 */
|
||||
"movq 24(%%rbx),%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a0 * b4 */
|
||||
"movq 32(%%rbx),%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += c * R */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* t4 = d & M (%%rsi) */
|
||||
"movq %%rcx,%%rsi\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
"xorq %%r15,%%r15\n"
|
||||
/* tx = t4 >> 48 (tmp3) */
|
||||
"movq %%rsi,%%rax\n"
|
||||
"shrq $48,%%rax\n"
|
||||
"movq %%rax,%q3\n"
|
||||
/* t4 &= (M >> 4) (tmp2) */
|
||||
"movq $0xffffffffffff,%%rax\n"
|
||||
"andq %%rax,%%rsi\n"
|
||||
"movq %%rsi,%q2\n"
|
||||
/* c = a0 * b0 */
|
||||
"movq 0(%%rbx),%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
"movq %%rax,%%r8\n"
|
||||
"movq %%rdx,%%r9\n"
|
||||
/* d += a4 * b1 */
|
||||
"movq 8(%%rbx),%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a3 * b2 */
|
||||
"movq 16(%%rbx),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a2 * b3 */
|
||||
"movq 24(%%rbx),%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a1 * b4 */
|
||||
"movq 32(%%rbx),%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* u0 = d & M (%%rsi) */
|
||||
"movq %%rcx,%%rsi\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
"xorq %%r15,%%r15\n"
|
||||
/* u0 = (u0 << 4) | tx (%%rsi) */
|
||||
"shlq $4,%%rsi\n"
|
||||
"movq %q3,%%rax\n"
|
||||
"orq %%rax,%%rsi\n"
|
||||
/* c += u0 * (R >> 4) */
|
||||
"movq $0x1000003d1,%%rax\n"
|
||||
"mulq %%rsi\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[0] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,0(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
/* c += a1 * b0 */
|
||||
"movq 0(%%rbx),%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* c += a0 * b1 */
|
||||
"movq 8(%%rbx),%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d += a4 * b2 */
|
||||
"movq 16(%%rbx),%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a3 * b3 */
|
||||
"movq 24(%%rbx),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a2 * b4 */
|
||||
"movq 32(%%rbx),%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* c += (d & M) * R */
|
||||
"movq %%rcx,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
"xorq %%r15,%%r15\n"
|
||||
/* r[1] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,8(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
/* c += a2 * b0 */
|
||||
"movq 0(%%rbx),%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* c += a1 * b1 */
|
||||
"movq 8(%%rbx),%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* c += a0 * b2 (last use of %%r10 = a0) */
|
||||
"movq 16(%%rbx),%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */
|
||||
"movq %q2,%%rsi\n"
|
||||
"movq %q1,%%r10\n"
|
||||
/* d += a4 * b3 */
|
||||
"movq 24(%%rbx),%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* d += a3 * b4 */
|
||||
"movq 32(%%rbx),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rcx\n"
|
||||
"adcq %%rdx,%%r15\n"
|
||||
/* c += (d & M) * R */
|
||||
"movq %%rcx,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 (%%rcx only) */
|
||||
"shrdq $52,%%r15,%%rcx\n"
|
||||
/* r[2] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,16(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
/* c += t3 */
|
||||
"addq %%r10,%%r8\n"
|
||||
/* c += d * R */
|
||||
"movq %%rcx,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[3] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0xfffffffffffff,%%rdx\n"
|
||||
"andq %%rdx,%%rax\n"
|
||||
"movq %%rax,24(%%rdi)\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* c += t4 (%%r8 only) */
|
||||
"addq %%rsi,%%r8\n"
|
||||
/* r[4] = c */
|
||||
"movq %%r8,32(%%rdi)\n"
|
||||
: "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
|
||||
: "b"(b), "D"(r)
|
||||
: "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
|
||||
/**
|
||||
* Registers: rdx:rax = multiplication accumulator
|
||||
* r9:r8 = c
|
||||
* rcx:rbx = d
|
||||
* r10-r14 = a0-a4
|
||||
* r15 = M (0xfffffffffffff)
|
||||
* rdi = r
|
||||
* rsi = a / t?
|
||||
*/
|
||||
uint64_t tmp1, tmp2, tmp3;
|
||||
__asm__ __volatile__(
|
||||
"movq 0(%%rsi),%%r10\n"
|
||||
"movq 8(%%rsi),%%r11\n"
|
||||
"movq 16(%%rsi),%%r12\n"
|
||||
"movq 24(%%rsi),%%r13\n"
|
||||
"movq 32(%%rsi),%%r14\n"
|
||||
"movq $0xfffffffffffff,%%r15\n"
|
||||
|
||||
/* d = (a0*2) * a3 */
|
||||
"leaq (%%r10,%%r10,1),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"movq %%rax,%%rbx\n"
|
||||
"movq %%rdx,%%rcx\n"
|
||||
/* d += (a1*2) * a2 */
|
||||
"leaq (%%r11,%%r11,1),%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* c = a4 * a4 */
|
||||
"movq %%r14,%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"movq %%rax,%%r8\n"
|
||||
"movq %%rdx,%%r9\n"
|
||||
/* d += (c & M) * R */
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* t3 (tmp1) = d & M */
|
||||
"movq %%rbx,%%rsi\n"
|
||||
"andq %%r15,%%rsi\n"
|
||||
"movq %%rsi,%q1\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
"xorq %%rcx,%%rcx\n"
|
||||
/* a4 *= 2 */
|
||||
"addq %%r14,%%r14\n"
|
||||
/* d += a0 * a4 */
|
||||
"movq %%r10,%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* d+= (a1*2) * a3 */
|
||||
"leaq (%%r11,%%r11,1),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* d += a2 * a2 */
|
||||
"movq %%r12,%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* d += c * R */
|
||||
"movq %%r8,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* t4 = d & M (%%rsi) */
|
||||
"movq %%rbx,%%rsi\n"
|
||||
"andq %%r15,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
"xorq %%rcx,%%rcx\n"
|
||||
/* tx = t4 >> 48 (tmp3) */
|
||||
"movq %%rsi,%%rax\n"
|
||||
"shrq $48,%%rax\n"
|
||||
"movq %%rax,%q3\n"
|
||||
/* t4 &= (M >> 4) (tmp2) */
|
||||
"movq $0xffffffffffff,%%rax\n"
|
||||
"andq %%rax,%%rsi\n"
|
||||
"movq %%rsi,%q2\n"
|
||||
/* c = a0 * a0 */
|
||||
"movq %%r10,%%rax\n"
|
||||
"mulq %%r10\n"
|
||||
"movq %%rax,%%r8\n"
|
||||
"movq %%rdx,%%r9\n"
|
||||
/* d += a1 * a4 */
|
||||
"movq %%r11,%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* d += (a2*2) * a3 */
|
||||
"leaq (%%r12,%%r12,1),%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* u0 = d & M (%%rsi) */
|
||||
"movq %%rbx,%%rsi\n"
|
||||
"andq %%r15,%%rsi\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
"xorq %%rcx,%%rcx\n"
|
||||
/* u0 = (u0 << 4) | tx (%%rsi) */
|
||||
"shlq $4,%%rsi\n"
|
||||
"movq %q3,%%rax\n"
|
||||
"orq %%rax,%%rsi\n"
|
||||
/* c += u0 * (R >> 4) */
|
||||
"movq $0x1000003d1,%%rax\n"
|
||||
"mulq %%rsi\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[0] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,0(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
/* a0 *= 2 */
|
||||
"addq %%r10,%%r10\n"
|
||||
/* c += a0 * a1 */
|
||||
"movq %%r10,%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d += a2 * a4 */
|
||||
"movq %%r12,%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* d += a3 * a3 */
|
||||
"movq %%r13,%%rax\n"
|
||||
"mulq %%r13\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* c += (d & M) * R */
|
||||
"movq %%rbx,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
"xorq %%rcx,%%rcx\n"
|
||||
/* r[1] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,8(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
/* c += a0 * a2 (last use of %%r10) */
|
||||
"movq %%r10,%%rax\n"
|
||||
"mulq %%r12\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */
|
||||
"movq %q2,%%rsi\n"
|
||||
"movq %q1,%%r10\n"
|
||||
/* c += a1 * a1 */
|
||||
"movq %%r11,%%rax\n"
|
||||
"mulq %%r11\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d += a3 * a4 */
|
||||
"movq %%r13,%%rax\n"
|
||||
"mulq %%r14\n"
|
||||
"addq %%rax,%%rbx\n"
|
||||
"adcq %%rdx,%%rcx\n"
|
||||
/* c += (d & M) * R */
|
||||
"movq %%rbx,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* d >>= 52 (%%rbx only) */
|
||||
"shrdq $52,%%rcx,%%rbx\n"
|
||||
/* r[2] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,16(%%rdi)\n"
|
||||
/* c >>= 52 */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
"xorq %%r9,%%r9\n"
|
||||
/* c += t3 */
|
||||
"addq %%r10,%%r8\n"
|
||||
/* c += d * R */
|
||||
"movq %%rbx,%%rax\n"
|
||||
"movq $0x1000003d10,%%rdx\n"
|
||||
"mulq %%rdx\n"
|
||||
"addq %%rax,%%r8\n"
|
||||
"adcq %%rdx,%%r9\n"
|
||||
/* r[3] = c & M */
|
||||
"movq %%r8,%%rax\n"
|
||||
"andq %%r15,%%rax\n"
|
||||
"movq %%rax,24(%%rdi)\n"
|
||||
/* c >>= 52 (%%r8 only) */
|
||||
"shrdq $52,%%r9,%%r8\n"
|
||||
/* c += t4 (%%r8 only) */
|
||||
"addq %%rsi,%%r8\n"
|
||||
/* r[4] = c */
|
||||
"movq %%r8,32(%%rdi)\n"
|
||||
: "+S"(a), "=&m"(tmp1), "=&m"(tmp2), "=&m"(tmp3)
|
||||
: "D"(r)
|
||||
: "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* SECP256K1_FIELD_INNER5X52_IMPL_H */
|
@ -12,11 +12,7 @@
|
||||
#include "field.h"
|
||||
#include "modinv64_impl.h"
|
||||
|
||||
#if defined(USE_ASM_X86_64)
|
||||
#include "field_5x52_asm_impl.h"
|
||||
#else
|
||||
#include "field_5x52_int128_impl.h"
|
||||
#endif
|
||||
|
||||
#ifdef VERIFY
|
||||
static void secp256k1_fe_impl_verify(const secp256k1_fe *a) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user