Refactor X25519 implementation.

Signed-off-by: NIIBE Yutaka <gniibe@fsij.org>
This commit is contained in:
NIIBE Yutaka 2023-12-01 15:19:10 +09:00
parent f3cb2694ce
commit 5a05a619c0
No known key found for this signature in database
GPG Key ID: 640114AF89DE6054
7 changed files with 163 additions and 115 deletions

View File

@ -1,3 +1,15 @@
2023-12-01 NIIBE Yutaka <gniibe@fsij.org>
* src/bn.c (bn256_swap_cond): New.
(bn256_set_cond): New.
* src/mod25638.c (mod25519_reduce): Use bn256_set_cond.
* src/ecc-x25519.c: Rename from ecc-mont.c, as computation is
actually X25519, while it's host side which uses big-endian
private key.
(mont_d_and_a): Refactor not using struct pt.
(compute_nQ): Use bn256_swap_cond.
* src/Makefile (CSRC): Follow the rename of exx-x25519.c.
2023-09-05 NIIBE Yutaka <gniibe@fsij.org>
* VERSION: 2.1.

View File

@ -11,7 +11,7 @@ CSRC = main.c \
aes.c gcm-siv.c \
bn.c mod.c \
modp256k1.c jpc_p256k1.c ec_p256k1.c call-ec_p256k1.c \
mod25638.c ecc-ed25519.c ecc-mont.c sha512.c \
mod25638.c ecc-ed25519.c ecc-x25519.c sha512.c \
p448.c ecc-x448.c \
ecc-ed448.c shake256.c \
random.c neug.c sha256.c

View File

@ -1,7 +1,7 @@
/*
* bn.c -- 256-bit (and 512-bit) bignum calculation
*
* Copyright (C) 2011, 2013, 2014, 2019
* Copyright (C) 2011, 2013, 2014, 2019, 2023
* Free Software Initiative of Japan
* Author: NIIBE Yutaka <gniibe@fsij.org>
*
@ -18,7 +18,7 @@
* License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
@ -425,3 +425,35 @@ bn256_random (bn256 *X)
}
}
#endif
void
bn256_swap_cond (bn256 *A, bn256 *B, uint32_t b)
{
uint32_t mask = 0UL - b;
int i;
uint32_t *p = A->word;
uint32_t *q = B->word;
for (i = 0; i < BN256_WORDS; i++)
{
uint32_t t = mask & (*p^*q);
*p++ ^= t;
*q++ ^= t;
}
}
void
bn256_set_cond (bn256 *A, const bn256 *B, uint32_t b)
{
uint32_t mask1 = 0UL - b;
uint32_t mask2 = b - 1UL;
int i;
uint32_t *p = A->word;
const uint32_t *q = B->word;
for (i = 0; i < BN256_WORDS; i++)
{
*p = (*p & mask2) | (*q++ & mask1);
p++;
}
}

View File

@ -21,3 +21,5 @@ int bn256_is_even (const bn256 *X);
int bn256_is_ge (const bn256 *A, const bn256 *B);
int bn256_cmp (const bn256 *A, const bn256 *B);
void bn256_random (bn256 *X);
void bn256_swap_cond (bn256 *A, bn256 *B, uint32_t b);
void bn256_set_cond (bn256 *A, const bn256 *B, uint32_t b);

View File

@ -1,8 +1,9 @@
/* -*- coding: utf-8 -*-
* ecc-mont.c - Elliptic curve computation for
* ecc-x25519.c - Elliptic curve computation for
* the Montgomery curve: y^2 = x^3 + 486662*x^2 + x.
*
* Copyright (C) 2014, 2015, 2017, 2021 Free Software Initiative of Japan
* Copyright (C) 2014, 2015, 2017, 2021, 2023
* Free Software Initiative of Japan
* Author: NIIBE Yutaka <gniibe@fsij.org>
*
* This file is a part of Gnuk, a GnuPG USB Token implementation.
@ -18,7 +19,7 @@
* License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
@ -99,41 +100,94 @@ mod25638_mul_121665 (bn256 *x, const bn256 *a)
}
typedef struct
{
bn256 x[1];
bn256 z[1];
} pt;
/* fe: Field Element */
typedef bn256 fe;
#define fe_add mod25638_add
#define fe_sub mod25638_sub
#define fe_mul mod25638_mul
#define fe_sqr mod25638_sqr
#define fe_m_d mod25638_mul_121665
/**
* @brief Process Montgomery double-and-add
*
* With Q0, Q1, DIF (= Q0 - Q1), compute PRD = 2Q0, SUM = Q0 + Q1
* Q0 and Q1 are clobbered.
* On return, PRD is in Q0, SUM is in Q1
* Caller provides temporary T0 and T1
*
* Note: indentation graphycally expresses the ladder.
*/
static void
mont_d_and_a (pt *prd, pt *sum, pt *q0, pt *q1, const bn256 *dif_x)
mont_d_and_a (fe *x0, fe *z0, fe *x1, fe *z1, const fe *dif_x, fe *t0, fe *t1)
{
mod25638_add (sum->x, q1->x, q1->z);
mod25638_sub (q1->z, q1->x, q1->z);
mod25638_add (prd->x, q0->x, q0->z);
mod25638_sub (q0->z, q0->x, q0->z);
mod25638_mul (q1->x, q0->z, sum->x);
mod25638_mul (q1->z, prd->x, q1->z);
mod25638_sqr (q0->x, prd->x);
mod25638_sqr (q0->z, q0->z);
mod25638_add (sum->x, q1->x, q1->z);
mod25638_sub (q1->z, q1->x, q1->z);
mod25638_mul (prd->x, q0->x, q0->z);
mod25638_sub (q0->z, q0->x, q0->z);
mod25638_sqr (sum->x, sum->x);
mod25638_sqr (sum->z, q1->z);
mod25638_mul_121665 (prd->z, q0->z);
mod25638_mul (sum->z, sum->z, dif_x);
mod25638_add (prd->z, q0->x, prd->z);
mod25638_mul (prd->z, prd->z, q0->z);
#define xp x0
#define zp z0
#define xs x1
#define zs z1
#define tmp0 t0
#define tmp1 t1
#define tmp2 x1
#define tmp3 x0
#define tmp4 t0
#define tmp5 t1
#define tmp6 z0
#define tmp7 x1
#define tmp8 z1
#define tmp9 t0
#define tmpA t1
#define tmpB t0
#define tmpC t0
#define tmpD z0
fe_add (tmp0,
x1,
z1);
fe_sub (tmp1,
x1,
z1);
fe_add (tmp2,
x0,
z0);
fe_sub (tmp3,
x0,
z0);
fe_mul (tmp4,
tmp3,
tmp0);
fe_mul (tmp5,
tmp2,
tmp1);
fe_sqr (tmp6,
tmp2);
fe_sqr (tmp7,
tmp3);
fe_add (tmp8,
tmp4,
tmp5);
fe_sub (tmp9,
tmp4,
tmp5);
fe_mul (xp,
tmp6,
tmp7);
fe_sub (tmpA,
tmp6,
tmp7);
fe_sqr (xs,
tmp8);
fe_sqr (tmpB,
tmp9);
fe_mul (zs,
tmpB, dif_x);
fe_m_d (tmpC,
tmpA);
fe_add (tmpD,
tmp6,
tmpC);
fe_mul (zp,
tmpD,
tmpA);
}
@ -147,42 +201,30 @@ mont_d_and_a (pt *prd, pt *sum, pt *q0, pt *q1, const bn256 *dif_x)
static void
compute_nQ (bn256 *res, const bn256 *n, const bn256 *q_x)
{
int i, j;
pt p0[1], p1[1], p0_[1], p1_[1];
int i;
bn256 x0[1], z0[1], x1[1], z1[1];
bn256 t0[1], t1[1];
uint32_t swap = 0;
const unsigned char *np = (const unsigned char *)n->word;
/* P0 = O = (1:0) */
memset (p0->x, 0, sizeof (bn256));
p0->x->word[0] = 1;
memset (p0->z, 0, sizeof (bn256));
memset (x0, 0, sizeof (bn256));
x0->word[0] = 1;
memset (z0, 0, sizeof (bn256));
/* P1 = (X:1) */
memcpy (p1->x, q_x, sizeof (bn256));
memset (p1->z, 0, sizeof (bn256));
p1->z->word[0] = 1;
memcpy (x1, q_x, sizeof (bn256));
memcpy (z1, x0, sizeof (bn256));
for (i = 0; i < 8; i++)
for (i = 254; i >= 0; i--)
{
uint32_t u = n->word[7-i];
uint32_t b = (np[i>>3]>>(i&7))&1;
for (j = 0; j < 16; j++)
{
pt *q0, *q1;
pt *sum_n, *prd_n;
if ((u & 0x80000000))
q0 = p1, q1 = p0, sum_n = p0_, prd_n = p1_;
else
q0 = p0, q1 = p1, sum_n = p1_, prd_n = p0_;
mont_d_and_a (prd_n, sum_n, q0, q1, q_x);
if ((u & 0x40000000))
q0 = p1_, q1 = p0_, sum_n = p0, prd_n = p1;
else
q0 = p0_, q1 = p1_, sum_n = p1, prd_n = p0;
mont_d_and_a (prd_n, sum_n, q0, q1, q_x);
u <<= 2;
}
swap ^= b;
bn256_swap_cond (x0, x1, swap);
bn256_swap_cond (z0, z1, swap);
swap = b;
mont_d_and_a (x0, z0, x1, z1, q_x, t0, t1);
}
/* We know the LSB of N is always 0. Thus, result is always in P0. */
@ -191,8 +233,8 @@ compute_nQ (bn256 *res, const bn256 *n, const bn256 *q_x)
* but returns 0 (like the implementation of z^(p-2)), thus, RES will
* be 0 in that case, which is correct value.
*/
mod_inv (res, p0->z, p25519);
mod25638_mul (res, res, p0->x);
mod_inv (res, z0, p25519);
mod25638_mul (res, res, x0);
mod25519_reduce (res);
}

View File

@ -231,57 +231,16 @@ void
mod25519_reduce (bn256 *X)
{
uint32_t q;
bn256 r0[1], r1[1];
int flag;
bn256 R[1];
memcpy (r0, X, sizeof (bn256));
q = (r0->word[7] >> 31);
r0->word[7] &= 0x7fffffff;
if (q)
{
bn256_add_uint (r0, r0, 19);
q = (r0->word[7] >> 31);
r0->word[7] &= 0x7fffffff;
if (q)
{
bn256_add_uint (r1, r0, 19);
q = (r1->word[7] >> 31);
r1->word[7] &= 0x7fffffff;
flag = 0;
}
else
flag = 1;
}
else
{
bn256_add_uint (r1, r0, 19);
q = (r1->word[7] >> 31); /* dummy */
r1->word[7] &= 0x7fffffff; /* dummy */
if (q)
flag = 2;
else
flag = 3;
}
q = (X->word[7] >> 31);
X->word[7] &= 0x7fffffff;
if (flag)
{
bn256_add_uint (r1, r0, 19);
q = (r1->word[7] >> 31);
r1->word[7] &= 0x7fffffff;
if (q)
memcpy (X, r1, sizeof (bn256));
else
memcpy (X, r0, sizeof (bn256));
}
else
{
if (q)
{
asm volatile ("" : : "r" (q) : "memory");
memcpy (X, r1, sizeof (bn256));
asm volatile ("" : : "r" (q) : "memory");
}
else
memcpy (X, r1, sizeof (bn256));
}
bn256_add_uint (X, X, q * 19);
bn256_add_uint (R, X, 19);
q = (R->word[7] >> 31);
R->word[7] &= 0x7fffffff;
bn256_set_cond (X, R, q);
}

View File

@ -1545,6 +1545,7 @@ proc_key_import (const uint8_t *data, int len)
if (len - 12 != 32)
return 0; /* Error. */
/* Revert the order, because it's big-endian MPI from server. */
for (i = 0; i < 32; i++)
priv[31-i] = data[12+i];
ecdh_compute_public_25519 (priv, pubkey);