secp256k1-zkp/src/field_5x52_int128_impl.h

// Copyright (c) 2013 Pieter Wuille
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
#define _SECP256K1_FIELD_INNER5X52_IMPL_H_

#include <stdint.h>

SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) {

    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;

    __int128 c, d;

    d  = (__int128)a[0] * b[3]
       + (__int128)a[1] * b[2]
       + (__int128)a[2] * b[1]
       + (__int128)a[3] * b[0];
    c  = (__int128)a[4] * b[4];
    d += (c & M) * R; c >>= 52;
    uint64_t t3 = d & M; d >>= 52;

    d += (__int128)a[0] * b[4]
       + (__int128)a[1] * b[3]
       + (__int128)a[2] * b[2]
       + (__int128)a[3] * b[1]
       + (__int128)a[4] * b[0];
    d += c * R;
    uint64_t t4 = d & M; d >>= 52;
    uint64_t tx = (t4 >> 48); t4 &= (M >> 4);

    c  = (__int128)a[0] * b[0];
    d += (__int128)a[1] * b[4]
       + (__int128)a[2] * b[3]
       + (__int128)a[3] * b[2]
       + (__int128)a[4] * b[1];
    uint64_t u0 = d & M; d >>= 52;
    u0 = (u0 << 4) | tx;
    c += (__int128)u0 * (R >> 4);
    uint64_t t0 = c & M; c >>= 52;

    c += (__int128)a[0] * b[1]
       + (__int128)a[1] * b[0];
    d += (__int128)a[2] * b[4]
       + (__int128)a[3] * b[3]
       + (__int128)a[4] * b[2];
    c += (d & M) * R; d >>= 52;
    uint64_t t1 = c & M; c >>= 52;

    c += (__int128)a[0] * b[2]
       + (__int128)a[1] * b[1]
       + (__int128)a[2] * b[0];
    d += (__int128)a[3] * b[4]
       + (__int128)a[4] * b[3];
    c += (d & M) * R; d >>= 52;

    r[0] = t0;
    r[1] = t1;
    r[2] = c & M; c >>= 52;
    c   += d * R + t3;;
    r[3] = c & M; c >>= 52;
    c   += t4;
    r[4] = c; 
}

SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r) {

    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;

    __int128 c, d;

    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];

    d  = (__int128)(a0*2) * a3
       + (__int128)(a1*2) * a2;
    c  = (__int128)a4 * a4;
    d += (c & M) * R; c >>= 52;
    uint64_t t3 = d & M; d >>= 52;

    a4 *= 2;
    d += (__int128)a0 * a4
       + (__int128)(a1*2) * a3
       + (__int128)a2 * a2;
    d += c * R;
    uint64_t t4 = d & M; d >>= 52;
    uint64_t tx = (t4 >> 48); t4 &= (M >> 4);

    c  = (__int128)a0 * a0;
    d += (__int128)a1 * a4
       + (__int128)(a2*2) * a3;
    uint64_t u0 = d & M; d >>= 52;
    u0 = (u0 << 4) | tx;
    c += (__int128)u0 * (R >> 4);
    r[0] = c & M; c >>= 52;

    a0 *= 2;
    c += (__int128)a0 * a1;
    d += (__int128)a2 * a4
       + (__int128)a3 * a3;
    c += (d & M) * R; d >>= 52;
    r[1] = c & M; c >>= 52;

    c += (__int128)a0 * a2
       + (__int128)a1 * a1;
    d += (__int128)a3 * a4;
    c += (d & M) * R; d >>= 52;
    r[2] = c & M; c >>= 52;

    c   += d * R + t3;;
    r[3] = c & M; c >>= 52;
    c   += t4;
    r[4] = c; 
}

#endif
MIT License 2013-05-09 15:24:32 +02:00			`// Copyright (c) 2013 Pieter Wuille`
			`// Distributed under the MIT/X11 software license, see the accompanying`
			`// file COPYING or http://www.opensource.org/licenses/mit-license.php.`

Reorganize source tree: no .c for non-objects 2013-04-05 02:09:37 +02:00			`#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_`
			`#define _SECP256K1_FIELD_INNER5X52_IMPL_H_`

			`#include <stdint.h>`
Split 5x52 inner implementations 2013-03-31 04:37:15 +02:00
Reorder static to comply with C99 and switch to the inline macro. 2014-11-12 12:57:35 -08:00			`SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t a, const uint64_t b, uint64_t *r) {`
Split 5x52 inner implementations 2013-03-31 04:37:15 +02:00
Rewrite mul/sqr for 32bit/64bit - interleave calculation of the lower and upper partial product ranges, and reduction - less registers needed, more opportunities for parallel ops 2014-10-27 22:10:26 +07:00			`const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;`

			`__int128 c, d;`

			`d = (__int128)a[0] * b[3]`
			`+ (__int128)a[1] * b[2]`
			`+ (__int128)a[2] * b[1]`
			`+ (__int128)a[3] * b[0];`
			`c = (__int128)a[4] * b[4];`
			`d += (c & M) * R; c >>= 52;`
			`uint64_t t3 = d & M; d >>= 52;`

			`d += (__int128)a[0] * b[4]`
			`+ (__int128)a[1] * b[3]`
			`+ (__int128)a[2] * b[2]`
			`+ (__int128)a[3] * b[1]`
			`+ (__int128)a[4] * b[0];`
			`d += c * R;`
			`uint64_t t4 = d & M; d >>= 52;`
			`uint64_t tx = (t4 >> 48); t4 &= (M >> 4);`

			`c = (__int128)a[0] * b[0];`
			`d += (__int128)a[1] * b[4]`
			`+ (__int128)a[2] * b[3]`
			`+ (__int128)a[3] * b[2]`
			`+ (__int128)a[4] * b[1];`
			`uint64_t u0 = d & M; d >>= 52;`
			`u0 = (u0 << 4) \| tx;`
			`c += (__int128)u0 * (R >> 4);`
			`uint64_t t0 = c & M; c >>= 52;`

			`c += (__int128)a[0] * b[1]`
			`+ (__int128)a[1] * b[0];`
			`d += (__int128)a[2] * b[4]`
			`+ (__int128)a[3] * b[3]`
			`+ (__int128)a[4] * b[2];`
			`c += (d & M) * R; d >>= 52;`
			`uint64_t t1 = c & M; c >>= 52;`

			`c += (__int128)a[0] * b[2]`
			`+ (__int128)a[1] * b[1]`
			`+ (__int128)a[2] * b[0];`
			`d += (__int128)a[3] * b[4]`
			`+ (__int128)a[4] * b[3];`
			`c += (d & M) * R; d >>= 52;`

			`r[0] = t0;`
			`r[1] = t1;`
			`r[2] = c & M; c >>= 52;`
			`c += d * R + t3;;`
			`r[3] = c & M; c >>= 52;`
			`c += t4;`
			`r[4] = c;`
Split 5x52 inner implementations 2013-03-31 04:37:15 +02:00			`}`

Reorder static to comply with C99 and switch to the inline macro. 2014-11-12 12:57:35 -08:00			`SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint64_t a, uint64_t r) {`
Split 5x52 inner implementations 2013-03-31 04:37:15 +02:00
Rewrite mul/sqr for 32bit/64bit - interleave calculation of the lower and upper partial product ranges, and reduction - less registers needed, more opportunities for parallel ops 2014-10-27 22:10:26 +07:00			`const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;`

			`__int128 c, d;`

			`uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];`

			`d = (__int128)(a02) a3`
			`+ (__int128)(a12) a2;`
			`c = (__int128)a4 * a4;`
			`d += (c & M) * R; c >>= 52;`
			`uint64_t t3 = d & M; d >>= 52;`

			`a4 *= 2;`
			`d += (__int128)a0 * a4`
			`+ (__int128)(a12) a3`
			`+ (__int128)a2 * a2;`
			`d += c * R;`
			`uint64_t t4 = d & M; d >>= 52;`
			`uint64_t tx = (t4 >> 48); t4 &= (M >> 4);`

			`c = (__int128)a0 * a0;`
			`d += (__int128)a1 * a4`
			`+ (__int128)(a22) a3;`
			`uint64_t u0 = d & M; d >>= 52;`
			`u0 = (u0 << 4) \| tx;`
			`c += (__int128)u0 * (R >> 4);`
			`r[0] = c & M; c >>= 52;`

			`a0 *= 2;`
			`c += (__int128)a0 * a1;`
			`d += (__int128)a2 * a4`
			`+ (__int128)a3 * a3;`
			`c += (d & M) * R; d >>= 52;`
			`r[1] = c & M; c >>= 52;`

			`c += (__int128)a0 * a2`
			`+ (__int128)a1 * a1;`
			`d += (__int128)a3 * a4;`
			`c += (d & M) * R; d >>= 52;`
			`r[2] = c & M; c >>= 52;`

			`c += d * R + t3;;`
			`r[3] = c & M; c >>= 52;`
			`c += t4;`
			`r[4] = c;`
Split 5x52 inner implementations 2013-03-31 04:37:15 +02:00			`}`
Reorganize source tree: no .c for non-objects 2013-04-05 02:09:37 +02:00
			`#endif`