diff --git a/ChangeLog b/ChangeLog index 0a71a11..0c5991a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2023-12-01 NIIBE Yutaka + + * src/bn.c (bn256_swap_cond): New. + (bn256_set_cond): New. + * src/mod25638.c (mod25519_reduce): Use bn256_set_cond. + * src/ecc-x25519.c: Rename from ecc-mont.c, as computation is + actually X25519, while it's host side which uses big-endian + private key. + (mont_d_and_a): Refactor not using struct pt. + (compute_nQ): Use bn256_swap_cond. + * src/Makefile (CSRC): Follow the rename of exx-x25519.c. + 2023-09-05 NIIBE Yutaka * VERSION: 2.1. diff --git a/src/Makefile b/src/Makefile index a854f45..77ae9c0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -11,7 +11,7 @@ CSRC = main.c \ aes.c gcm-siv.c \ bn.c mod.c \ modp256k1.c jpc_p256k1.c ec_p256k1.c call-ec_p256k1.c \ - mod25638.c ecc-ed25519.c ecc-mont.c sha512.c \ + mod25638.c ecc-ed25519.c ecc-x25519.c sha512.c \ p448.c ecc-x448.c \ ecc-ed448.c shake256.c \ random.c neug.c sha256.c diff --git a/src/bn.c b/src/bn.c index 9f32e60..5d3a361 100644 --- a/src/bn.c +++ b/src/bn.c @@ -1,7 +1,7 @@ /* * bn.c -- 256-bit (and 512-bit) bignum calculation * - * Copyright (C) 2011, 2013, 2014, 2019 + * Copyright (C) 2011, 2013, 2014, 2019, 2023 * Free Software Initiative of Japan * Author: NIIBE Yutaka * @@ -18,7 +18,7 @@ * License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * along with this program. If not, see . * */ @@ -425,3 +425,35 @@ bn256_random (bn256 *X) } } #endif + +void +bn256_swap_cond (bn256 *A, bn256 *B, uint32_t b) +{ + uint32_t mask = 0UL - b; + int i; + uint32_t *p = A->word; + uint32_t *q = B->word; + + for (i = 0; i < BN256_WORDS; i++) + { + uint32_t t = mask & (*p^*q); + *p++ ^= t; + *q++ ^= t; + } +} + +void +bn256_set_cond (bn256 *A, const bn256 *B, uint32_t b) +{ + uint32_t mask1 = 0UL - b; + uint32_t mask2 = b - 1UL; + int i; + uint32_t *p = A->word; + const uint32_t *q = B->word; + + for (i = 0; i < BN256_WORDS; i++) + { + *p = (*p & mask2) | (*q++ & mask1); + p++; + } +} diff --git a/src/bn.h b/src/bn.h index d22eea0..6044d61 100644 --- a/src/bn.h +++ b/src/bn.h @@ -21,3 +21,5 @@ int bn256_is_even (const bn256 *X); int bn256_is_ge (const bn256 *A, const bn256 *B); int bn256_cmp (const bn256 *A, const bn256 *B); void bn256_random (bn256 *X); +void bn256_swap_cond (bn256 *A, bn256 *B, uint32_t b); +void bn256_set_cond (bn256 *A, const bn256 *B, uint32_t b); diff --git a/src/ecc-x25519.c b/src/ecc-x25519.c index eeaceb1..fa53739 100644 --- a/src/ecc-x25519.c +++ b/src/ecc-x25519.c @@ -1,8 +1,9 @@ /* -*- coding: utf-8 -*- - * ecc-mont.c - Elliptic curve computation for - * the Montgomery curve: y^2 = x^3 + 486662*x^2 + x. + * ecc-x25519.c - Elliptic curve computation for + * the Montgomery curve: y^2 = x^3 + 486662*x^2 + x. * - * Copyright (C) 2014, 2015, 2017, 2021 Free Software Initiative of Japan + * Copyright (C) 2014, 2015, 2017, 2021, 2023 + * Free Software Initiative of Japan * Author: NIIBE Yutaka * * This file is a part of Gnuk, a GnuPG USB Token implementation. @@ -18,7 +19,7 @@ * License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see . + * along with this program. If not, see . * */ @@ -99,41 +100,94 @@ mod25638_mul_121665 (bn256 *x, const bn256 *a) } -typedef struct -{ - bn256 x[1]; - bn256 z[1]; -} pt; - +/* fe: Field Element */ +typedef bn256 fe; +#define fe_add mod25638_add +#define fe_sub mod25638_sub +#define fe_mul mod25638_mul +#define fe_sqr mod25638_sqr +#define fe_m_d mod25638_mul_121665 /** * @brief Process Montgomery double-and-add * * With Q0, Q1, DIF (= Q0 - Q1), compute PRD = 2Q0, SUM = Q0 + Q1 - * Q0 and Q1 are clobbered. + * On return, PRD is in Q0, SUM is in Q1 + * Caller provides temporary T0 and T1 * + * Note: indentation graphycally expresses the ladder. */ static void -mont_d_and_a (pt *prd, pt *sum, pt *q0, pt *q1, const bn256 *dif_x) +mont_d_and_a (fe *x0, fe *z0, fe *x1, fe *z1, const fe *dif_x, fe *t0, fe *t1) { - mod25638_add (sum->x, q1->x, q1->z); - mod25638_sub (q1->z, q1->x, q1->z); - mod25638_add (prd->x, q0->x, q0->z); - mod25638_sub (q0->z, q0->x, q0->z); - mod25638_mul (q1->x, q0->z, sum->x); - mod25638_mul (q1->z, prd->x, q1->z); - mod25638_sqr (q0->x, prd->x); - mod25638_sqr (q0->z, q0->z); - mod25638_add (sum->x, q1->x, q1->z); - mod25638_sub (q1->z, q1->x, q1->z); - mod25638_mul (prd->x, q0->x, q0->z); - mod25638_sub (q0->z, q0->x, q0->z); - mod25638_sqr (sum->x, sum->x); - mod25638_sqr (sum->z, q1->z); - mod25638_mul_121665 (prd->z, q0->z); - mod25638_mul (sum->z, sum->z, dif_x); - mod25638_add (prd->z, q0->x, prd->z); - mod25638_mul (prd->z, prd->z, q0->z); +#define xp x0 +#define zp z0 +#define xs x1 +#define zs z1 + +#define tmp0 t0 +#define tmp1 t1 +#define tmp2 x1 +#define tmp3 x0 +#define tmp4 t0 +#define tmp5 t1 +#define tmp6 z0 +#define tmp7 x1 +#define tmp8 z1 +#define tmp9 t0 +#define tmpA t1 +#define tmpB t0 +#define tmpC t0 +#define tmpD z0 + + fe_add (tmp0, + x1, + z1); + fe_sub (tmp1, + x1, + z1); + fe_add (tmp2, + x0, + z0); + fe_sub (tmp3, + x0, + z0); + fe_mul (tmp4, + tmp3, + tmp0); + fe_mul (tmp5, + tmp2, + tmp1); + fe_sqr (tmp6, + tmp2); + fe_sqr (tmp7, + tmp3); + fe_add (tmp8, + tmp4, + tmp5); + fe_sub (tmp9, + tmp4, + tmp5); + fe_mul (xp, + tmp6, + tmp7); + fe_sub (tmpA, + tmp6, + tmp7); + fe_sqr (xs, + tmp8); + fe_sqr (tmpB, + tmp9); + fe_mul (zs, + tmpB, dif_x); + fe_m_d (tmpC, + tmpA); + fe_add (tmpD, + tmp6, + tmpC); + fe_mul (zp, + tmpD, + tmpA); } @@ -147,42 +201,30 @@ mont_d_and_a (pt *prd, pt *sum, pt *q0, pt *q1, const bn256 *dif_x) static void compute_nQ (bn256 *res, const bn256 *n, const bn256 *q_x) { - int i, j; - pt p0[1], p1[1], p0_[1], p1_[1]; + int i; + bn256 x0[1], z0[1], x1[1], z1[1]; + bn256 t0[1], t1[1]; + uint32_t swap = 0; + const unsigned char *np = (const unsigned char *)n->word; /* P0 = O = (1:0) */ - memset (p0->x, 0, sizeof (bn256)); - p0->x->word[0] = 1; - memset (p0->z, 0, sizeof (bn256)); + memset (x0, 0, sizeof (bn256)); + x0->word[0] = 1; + memset (z0, 0, sizeof (bn256)); /* P1 = (X:1) */ - memcpy (p1->x, q_x, sizeof (bn256)); - memset (p1->z, 0, sizeof (bn256)); - p1->z->word[0] = 1; + memcpy (x1, q_x, sizeof (bn256)); + memcpy (z1, x0, sizeof (bn256)); - for (i = 0; i < 8; i++) + for (i = 254; i >= 0; i--) { - uint32_t u = n->word[7-i]; + uint32_t b = (np[i>>3]>>(i&7))&1; - for (j = 0; j < 16; j++) - { - pt *q0, *q1; - pt *sum_n, *prd_n; - - if ((u & 0x80000000)) - q0 = p1, q1 = p0, sum_n = p0_, prd_n = p1_; - else - q0 = p0, q1 = p1, sum_n = p1_, prd_n = p0_; - mont_d_and_a (prd_n, sum_n, q0, q1, q_x); - - if ((u & 0x40000000)) - q0 = p1_, q1 = p0_, sum_n = p0, prd_n = p1; - else - q0 = p0_, q1 = p1_, sum_n = p1, prd_n = p0; - mont_d_and_a (prd_n, sum_n, q0, q1, q_x); - - u <<= 2; - } + swap ^= b; + bn256_swap_cond (x0, x1, swap); + bn256_swap_cond (z0, z1, swap); + swap = b; + mont_d_and_a (x0, z0, x1, z1, q_x, t0, t1); } /* We know the LSB of N is always 0. Thus, result is always in P0. */ @@ -191,8 +233,8 @@ compute_nQ (bn256 *res, const bn256 *n, const bn256 *q_x) * but returns 0 (like the implementation of z^(p-2)), thus, RES will * be 0 in that case, which is correct value. */ - mod_inv (res, p0->z, p25519); - mod25638_mul (res, res, p0->x); + mod_inv (res, z0, p25519); + mod25638_mul (res, res, x0); mod25519_reduce (res); } diff --git a/src/mod25638.c b/src/mod25638.c index 9b0777a..dd1a362 100644 --- a/src/mod25638.c +++ b/src/mod25638.c @@ -231,57 +231,16 @@ void mod25519_reduce (bn256 *X) { uint32_t q; - bn256 r0[1], r1[1]; - int flag; + bn256 R[1]; - memcpy (r0, X, sizeof (bn256)); - q = (r0->word[7] >> 31); - r0->word[7] &= 0x7fffffff; - if (q) - { - bn256_add_uint (r0, r0, 19); - q = (r0->word[7] >> 31); - r0->word[7] &= 0x7fffffff; - if (q) - { - bn256_add_uint (r1, r0, 19); - q = (r1->word[7] >> 31); - r1->word[7] &= 0x7fffffff; - flag = 0; - } - else - flag = 1; - } - else - { - bn256_add_uint (r1, r0, 19); - q = (r1->word[7] >> 31); /* dummy */ - r1->word[7] &= 0x7fffffff; /* dummy */ - if (q) - flag = 2; - else - flag = 3; - } + q = (X->word[7] >> 31); + X->word[7] &= 0x7fffffff; - if (flag) - { - bn256_add_uint (r1, r0, 19); - q = (r1->word[7] >> 31); - r1->word[7] &= 0x7fffffff; - if (q) - memcpy (X, r1, sizeof (bn256)); - else - memcpy (X, r0, sizeof (bn256)); - } - else - { - if (q) - { - asm volatile ("" : : "r" (q) : "memory"); - memcpy (X, r1, sizeof (bn256)); - asm volatile ("" : : "r" (q) : "memory"); - } - else - memcpy (X, r1, sizeof (bn256)); - } + bn256_add_uint (X, X, q * 19); + + bn256_add_uint (R, X, 19); + q = (R->word[7] >> 31); + R->word[7] &= 0x7fffffff; + + bn256_set_cond (X, R, q); } diff --git a/src/openpgp-do.c b/src/openpgp-do.c index c60263a..500f567 100644 --- a/src/openpgp-do.c +++ b/src/openpgp-do.c @@ -1545,6 +1545,7 @@ proc_key_import (const uint8_t *data, int len) if (len - 12 != 32) return 0; /* Error. */ + /* Revert the order, because it's big-endian MPI from server. */ for (i = 0; i < 32; i++) priv[31-i] = data[12+i]; ecdh_compute_public_25519 (priv, pubkey);