2013-06-16 22:46:43 +02:00
|
|
|
;; Added by Diederik Huys, March 2013
|
|
|
|
;;
|
|
|
|
;; Provided public procedures:
|
|
|
|
;; secp256k1_fe_mul_inner
|
|
|
|
;; secp256k1_fe_sqr_inner
|
|
|
|
;;
|
|
|
|
;; Needed tools: YASM (http://yasm.tortall.net)
|
|
|
|
;;
|
|
|
|
;;
|
|
|
|
|
|
|
|
BITS 64
|
|
|
|
|
|
|
|
COMP_LIMB EQU 000000001000003D1h
|
|
|
|
|
|
|
|
;; Procedure ExSetMult
|
|
|
|
;; Register Layout:
|
|
|
|
;; INPUT: rdi = a->n
|
|
|
|
;; rsi = b->n
|
|
|
|
;; rdx = r->a
|
|
|
|
;;
|
|
|
|
;; INTERNAL: rdx:rax = multiplication accumulator
|
|
|
|
;; r8-r10 = c0-c2
|
|
|
|
;; r11-r15 = b.n[0]-b.n[4] / r3 - r7
|
|
|
|
;; rbx = r0
|
|
|
|
;; rcx = r1
|
|
|
|
;; rbp = r2
|
|
|
|
;;
|
|
|
|
GLOBAL secp256k1_fe_mul_inner
|
|
|
|
ALIGN 32
|
|
|
|
secp256k1_fe_mul_inner:
|
|
|
|
push rbp
|
|
|
|
push rbx
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
|
|
|
push rdx
|
|
|
|
|
|
|
|
mov r11,[rsi+8*0] ; preload b.n[0]
|
|
|
|
|
|
|
|
;; step 1: mul_c2
|
|
|
|
mov rax,[rdi+0*8] ; load a.n[0]
|
|
|
|
mul r11 ; rdx:rax=a.n[0]*b.n[0]
|
|
|
|
mov r12,[rsi+1*8] ; preload b.n[1]
|
|
|
|
mov rbx,rax ; retire LO qword (r[0])
|
|
|
|
mov r8,rdx ; save overflow
|
|
|
|
xor r9,r9 ; overflow HO qwords
|
|
|
|
xor r10,r10
|
|
|
|
|
|
|
|
;; c+=a.n[0] * b.n[1] + a.n[1] * b.n[0]
|
|
|
|
mov rax,[rdi+0*8]
|
|
|
|
mul r12
|
|
|
|
mov r13,[rsi+2*8] ; preload b.n[2]
|
|
|
|
add r8,rax ; still the same :-)
|
|
|
|
adc r9,rdx ;
|
|
|
|
adc r10,0 ; mmm...
|
|
|
|
|
|
|
|
mov rax,[rdi+1*8]
|
|
|
|
mul r11
|
|
|
|
add r8,rax
|
|
|
|
adc r9,rdx
|
|
|
|
adc r10,0
|
|
|
|
mov rcx,r8 ; retire r[1]
|
|
|
|
xor r8,r8
|
|
|
|
|
|
|
|
;; c+=a.n[0 1 2] * b.n[2 1 0]
|
|
|
|
mov rax,[rdi+0*8]
|
|
|
|
mul r13
|
|
|
|
mov r14,[rsi+3*8] ; preload b.n[3]
|
|
|
|
add r9,rax
|
|
|
|
adc r10,rdx
|
|
|
|
adc r8,0
|
|
|
|
|
|
|
|
mov rax,[rdi+1*8]
|
|
|
|
mul r12
|
|
|
|
add r9,rax
|
|
|
|
adc r10,rdx
|
|
|
|
adc r8,0
|
|
|
|
|
|
|
|
mov rax,[rdi+2*8]
|
|
|
|
mul r11
|
|
|
|
add r9,rax
|
|
|
|
adc r10,rdx
|
|
|
|
adc r8,0
|
2013-06-17 15:31:06 +02:00
|
|
|
mov rbp,r9 ; retire r[2]
|
2013-06-16 22:46:43 +02:00
|
|
|
xor r9,r9
|
|
|
|
|
|
|
|
;; c+=a.n[0 1 2 3] * b.n[3 2 1 0]
|
|
|
|
mov rax,[rdi+0*8]
|
|
|
|
mul r14
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
|
|
|
adc r9,0
|
|
|
|
|
|
|
|
mov rax,[rdi+1*8]
|
|
|
|
mul r13
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
|
|
|
adc r9,0
|
|
|
|
|
|
|
|
mov rax,[rdi+2*8]
|
|
|
|
mul r12
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
|
|
|
adc r9,0
|
|
|
|
|
|
|
|
mov rax,[rdi+3*8]
|
|
|
|
mul r11
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
|
|
|
adc r9,0
|
|
|
|
mov r11,r10 ; retire r[3]
|
|
|
|
xor r10,r10
|
|
|
|
|
|
|
|
;; c+=a.n[1 2 3] * b.n[3 2 1]
|
|
|
|
mov rax,[rdi+1*8]
|
|
|
|
mul r14
|
|
|
|
add r8,rax
|
|
|
|
adc r9,rdx
|
|
|
|
adc r10,0
|
|
|
|
|
|
|
|
mov rax,[rdi+2*8]
|
|
|
|
mul r13
|
|
|
|
add r8,rax
|
|
|
|
adc r9,rdx
|
|
|
|
adc r10,0
|
|
|
|
|
|
|
|
mov rax,[rdi+3*8]
|
|
|
|
mul r12
|
|
|
|
add r8,rax
|
|
|
|
adc r9,rdx
|
|
|
|
adc r10,0
|
|
|
|
mov r12,r8 ; retire r[4]
|
|
|
|
xor r8,r8
|
|
|
|
|
|
|
|
;; c+=a.n[2 3] * b.n[3 2]
|
|
|
|
mov rax,[rdi+2*8]
|
|
|
|
mul r14
|
|
|
|
add r9,rax ; still the same :-)
|
|
|
|
adc r10,rdx ;
|
|
|
|
adc r8,0 ; mmm...
|
|
|
|
|
|
|
|
mov rax,[rdi+3*8]
|
|
|
|
mul r13
|
|
|
|
add r9,rax
|
|
|
|
adc r10,rdx
|
|
|
|
adc r8,0
|
|
|
|
mov r13,r9 ; retire r[5]
|
|
|
|
xor r9,r9
|
|
|
|
|
|
|
|
;; c+=a.n[3] * b.n[3]
|
|
|
|
mov rax,[rdi+3*8]
|
|
|
|
mul r14
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
2013-06-17 15:31:06 +02:00
|
|
|
|
2013-06-16 22:46:43 +02:00
|
|
|
mov r14,r10
|
|
|
|
mov r15,r8
|
|
|
|
|
|
|
|
|
|
|
|
;; *******************************************************
|
|
|
|
common_exit_norm:
|
|
|
|
mov rdi,COMP_LIMB
|
|
|
|
mov rax,r12
|
|
|
|
mul rdi
|
|
|
|
add rax,rbx
|
|
|
|
adc rcx,rdx
|
|
|
|
pop rbx
|
|
|
|
mov [rbx],rax
|
|
|
|
|
|
|
|
mov rax,r13 ; get r5
|
|
|
|
mul rdi
|
|
|
|
add rax,rcx ; +r1
|
|
|
|
adc rbp,rdx
|
|
|
|
mov [rbx+1*8],rax
|
|
|
|
|
|
|
|
mov rax,r14 ; get r6
|
|
|
|
mul rdi
|
|
|
|
add rax,rbp ; +r2
|
|
|
|
adc r11,rdx
|
|
|
|
mov [rbx+2*8],rax
|
|
|
|
|
|
|
|
mov rax,r15 ; get r7
|
|
|
|
mul rdi
|
|
|
|
add rax,r11 ; +r3
|
|
|
|
adc rdx,0
|
|
|
|
mov [rbx+3*8],rax
|
|
|
|
mov [rbx+4*8],rdx
|
|
|
|
|
|
|
|
pop r15
|
|
|
|
pop r14
|
|
|
|
pop r13
|
|
|
|
pop r12
|
|
|
|
pop rbx
|
|
|
|
pop rbp
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
|
|
;; PROC ExSetSquare
|
|
|
|
;; Register Layout:
|
|
|
|
;; INPUT: rdi = a.n
|
|
|
|
;; rsi = this.a
|
|
|
|
;; INTERNAL: rdx:rax = multiplication accumulator
|
|
|
|
;; r8-r10 = c
|
|
|
|
;; r11-r15 = a.n[0]-a.n[4] / r3-r7
|
|
|
|
;; rbx = r0
|
|
|
|
;; rcx = r1
|
|
|
|
;; rbp = r2
|
|
|
|
GLOBAL secp256k1_fe_sqr_inner
|
|
|
|
|
|
|
|
ALIGN 32
|
|
|
|
secp256k1_fe_sqr_inner:
|
|
|
|
push rbp
|
|
|
|
push rbx
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
2013-06-17 15:31:06 +02:00
|
|
|
push rsi
|
2013-06-16 22:46:43 +02:00
|
|
|
|
|
|
|
mov r11,[rdi+8*0] ; preload a.n[0]
|
|
|
|
|
|
|
|
;; step 1: mul_c2
|
|
|
|
mov rax,r11 ; load a.n[0]
|
|
|
|
mul rax ; rdx:rax=a.n[0]²
|
|
|
|
mov r12,[rdi+1*8] ; preload a.n[1]
|
|
|
|
mov rbx,rax ; retire LO qword (r[0])
|
|
|
|
mov r8,rdx ; save overflow
|
|
|
|
xor r9,r9 ; overflow HO qwords
|
|
|
|
xor r10,r10
|
|
|
|
|
|
|
|
;; c+=2*a.n[0] * a.n[1]
|
|
|
|
mov rax,r11 ; load a.n[0]
|
|
|
|
mul r12 ; rdx:rax=a.n[0] * a.n[1]
|
|
|
|
mov r13,[rdi+2*8] ; preload a.n[2]
|
|
|
|
add rax,rax ; rdx:rax*=2
|
|
|
|
adc rdx,rdx
|
|
|
|
adc r10,0
|
|
|
|
add r8,rax ; still the same :-)
|
2013-06-17 15:31:06 +02:00
|
|
|
adc r9,rdx
|
2013-06-16 22:46:43 +02:00
|
|
|
adc r10,0 ; mmm...
|
|
|
|
|
|
|
|
mov rcx,r8 ; retire r[1]
|
|
|
|
xor r8,r8
|
|
|
|
|
|
|
|
;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1]
|
|
|
|
mov rax,r11 ; load a.n[0]
|
|
|
|
mul r13 ; * a.n[2]
|
|
|
|
mov r14,[rdi+3*8] ; preload a.n[3]
|
|
|
|
add rax,rax ; rdx:rax*=2
|
|
|
|
adc rdx,rdx
|
|
|
|
adc r8,0
|
|
|
|
add r9,rax
|
|
|
|
adc r10,rdx
|
|
|
|
adc r8,0
|
|
|
|
|
|
|
|
mov rax,r12
|
|
|
|
mul rax
|
|
|
|
add r9,rax
|
|
|
|
adc r10,rdx
|
|
|
|
adc r8,0
|
|
|
|
|
|
|
|
|
|
|
|
mov rbp,r9
|
|
|
|
xor r9,r9
|
|
|
|
|
|
|
|
;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2]
|
|
|
|
mov rax,r11 ; load a.n[0]
|
|
|
|
mul r14 ; * a.n[3]
|
|
|
|
add rax,rax ; rdx:rax*=2
|
|
|
|
adc rdx,rdx
|
|
|
|
adc r9,0
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
|
|
|
adc r9,0
|
|
|
|
|
|
|
|
mov rax,r12 ; load a.n[1]
|
|
|
|
mul r13 ; * a.n[2]
|
|
|
|
add rax,rax
|
|
|
|
adc rdx,rdx
|
|
|
|
adc r9,0
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
|
|
|
adc r9,0
|
|
|
|
|
|
|
|
mov r11,r10
|
|
|
|
xor r10,r10
|
|
|
|
|
|
|
|
;; c+=2*a.n[1]*a.n[3]+a.n[2]*a.n[2]
|
|
|
|
mov rax,r12 ; load a.n[1]
|
|
|
|
mul r14 ; * a.n[3]
|
|
|
|
add rax,rax ; rdx:rax*=2
|
|
|
|
adc rdx,rdx
|
|
|
|
adc r10,0
|
|
|
|
add r8,rax
|
|
|
|
adc r9,rdx
|
|
|
|
adc r10,0
|
|
|
|
|
|
|
|
mov rax,r13
|
|
|
|
mul rax
|
|
|
|
add r8,rax
|
|
|
|
adc r9,rdx
|
|
|
|
adc r10,0
|
|
|
|
|
|
|
|
mov r12,r8
|
|
|
|
xor r8,r8
|
|
|
|
;; c+=2*a.n[2]*a.n[3]
|
|
|
|
mov rax,r13 ; load a.n[2]
|
|
|
|
mul r14 ; * a.n[3]
|
|
|
|
add rax,rax ; rdx:rax*=2
|
|
|
|
adc rdx,rdx
|
|
|
|
adc r8,0
|
|
|
|
add r9,rax
|
|
|
|
adc r10,rdx
|
|
|
|
adc r8,0
|
|
|
|
|
|
|
|
mov r13,r9
|
2013-06-17 15:31:06 +02:00
|
|
|
xor r9,r9
|
2013-06-16 22:46:43 +02:00
|
|
|
|
|
|
|
;; c+=a.n[3]²
|
|
|
|
mov rax,r14
|
|
|
|
mul rax
|
|
|
|
add r10,rax
|
|
|
|
adc r8,rdx
|
2013-06-17 15:31:06 +02:00
|
|
|
|
2013-06-16 22:46:43 +02:00
|
|
|
mov r14,r10
|
|
|
|
mov r15,r8
|
|
|
|
|
|
|
|
jmp common_exit_norm
|
|
|
|
end
|
|
|
|
|
|
|
|
|