pico-hsm/modp256k1.c
Pol Henarejos 0af5685495
Adding the rest of files:
- ASM is disabled
- Neug needs full rewrite
- Flash is based on PiMoroni 4MB flash (needs adjust)

Signed-off-by: Pol Henarejos <pol.henarejos@cttc.es>
2022-01-03 02:02:39 +01:00

316 lines
7.8 KiB
C

/*
* modp256k1.c -- modulo arithmetic for p256k1
*
* Copyright (C) 2014, 2016, 2020 Free Software Initiative of Japan
* Author: NIIBE Yutaka <gniibe@fsij.org>
*
* This file is a part of Gnuk, a GnuPG USB Token implementation.
*
* Gnuk is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Gnuk is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
* License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
/*
* p256k1 = 2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1
*/
#include <stdint.h>
#include <string.h>
#include "bn.h"
#include "modp256k1.h"
/*
256 224 192 160 128 96 64 32 0
2^256
1 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
2^256 - 2^32
0 ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff 00000000
2^256 - 2^32 - 2^9
0 ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff fffffffe fffffe00
2^256 - 2^32 - 2^9 - 2^8
0 ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff fffffffe fffffd00
2^256 - 2^32 - 2^9 - 2^8 - 2^7
0 ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff fffffffe fffffc80
2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6
0 ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff fffffffe fffffc40
2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4
0 ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff fffffffe fffffc30
2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1
0 ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff fffffffe fffffc2f
*/
const bn256 p256k1 = { {0xfffffc2f, 0xfffffffe, 0xffffffff, 0xffffffff,
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff } };
/*
* Implementation Note.
*
* It's always modulo p256k1.
*
* Once, I tried redundant representation which caused wrong
* calculation. Implementation could be correct with redundant
* representation, but it found that it's more expensive.
*
*/
/**
* @brief X = (A + B) mod p256k1
*/
void
modp256k1_add (bn256 *X, const bn256 *A, const bn256 *B)
{
uint32_t cond;
bn256 tmp[1];
bn256 dummy[1];
cond = (bn256_add (X, A, B) == 0);
cond &= bn256_sub (tmp, X, P256K1);
memcpy (cond?dummy:X, tmp, sizeof (bn256));
asm ("" : "=m" (dummy) : "m" (dummy) : "memory");
}
/**
* @brief X = (A - B) mod p256
*/
void
modp256k1_sub (bn256 *X, const bn256 *A, const bn256 *B)
{
uint32_t borrow;
bn256 tmp[1];
bn256 dummy[1];
borrow = bn256_sub (X, A, B);
bn256_add (tmp, X, P256K1);
memcpy (borrow?X:dummy, tmp, sizeof (bn256));
asm ("" : "=m" (dummy) : "m" (dummy) : "memory");
}
/**
* @brief X = A mod p256k1
*/
void
modp256k1_reduce (bn256 *X, const bn512 *A)
{
bn256 tmp[1];
uint32_t carry;
#define borrow carry
uint32_t s0, s1;
#define s00 tmp->word[0]
#define s01 tmp->word[1]
#define s02 tmp->word[2]
#define W0 X
#define W1 tmp
#define W2 tmp
#define W3 tmp
#define W4 tmp
#define W5 tmp
#define W6 tmp
#define W7 tmp
#define S tmp
/*
* Suppose: P256K1 = 2^256 - CONST
* Then, compute: W = A_low + A_high * CONST
* 256-bit W0 = W mod 2^256
* 64-bit (S1, S0) = W / 2^256
* where: CONST = 2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1
*/
/* W0 = A_low */
/* W7 = A_high */
/* W0 += W7 */
carry = bn256_add (W0, (const bn256 *)&A->word[8], (const bn256 *)A);
/* W6 = W7 << 4 */
/* W0 += W6 */
bn256_shift (W6, (const bn256 *)&A->word[8], 4);
carry += bn256_add (W0, W0, W6);
/* W5 = W6 << 2 */
/* W0 += W5 */
bn256_shift (W5, W6, 2);
carry += bn256_add (W0, W0, W5);
/* W4 = W5 << 1 */
/* W0 += W4 */
bn256_shift (W4, W5, 1);
carry += bn256_add (W0, W0, W4);
/* W3 = W4 << 1 */
/* W0 += W3 */
bn256_shift (W3, W4, 1);
carry += bn256_add (W0, W0, W3);
/* W2 = W3 << 1 */
/* W0 += W2 */
bn256_shift (W2, W3, 1);
carry += bn256_add (W0, W0, W2);
/* W1 = A_high << 32 */
/* W0 += W1 */
W1->word[7] = A->word[14];
W1->word[6] = A->word[13];
W1->word[5] = A->word[12];
W1->word[4] = A->word[11];
W1->word[3] = A->word[10];
W1->word[2] = A->word[9];
W1->word[1] = A->word[8];
W1->word[0] = 0;
carry += bn256_add (W0, W0, W1);
/* (S1, S0) = W / 2^256 */
s0 = A->word[15];
carry += (s0 >> 28) + (s0 >> 26) + (s0 >> 25) + (s0 >> 24) + (s0 >> 23);
carry += s0;
s1 = (carry < s0) ? 1 : 0;
s0 = carry;
/*
* Compute: S:=(S02, S01, S00), S = (S1,S0)*CONST
*/
S->word[7] = S->word[6] = S->word[5] = S->word[4] = S->word[3] = 0;
/* (S02, S01, S00) = (S1, S0) + (S1, S0)*2^32 */
s00 = s0;
s01 = s0 + s1;
s02 = s1 + ((s01 < s0)? 1 : 0);
/* (S02, S01, S00) += (S1, S0)*2^9 */
carry = (s0 >> 23) + s01;
s02 += (s1 >> 23) + ((carry < s01)? 1 : 0);
s01 = (s1 << 9) + carry;
s02 += ((s01 < carry)? 1 : 0);
s00 += (s0 << 9);
carry = ((s00 < (s0 << 9))? 1 : 0);
s01 += carry;
s02 += ((s01 < carry)? 1 : 0);
/* (S02, S01, S00) += (S1, S0)*2^8 */
carry = (s0 >> 24) + s01;
s02 += (s1 >> 24) + ((carry < s01)? 1 : 0);
s01 = (s1 << 8) + carry;
s02 += ((s01 < carry)? 1 : 0);
s00 += (s0 << 8);
carry = ((s00 < (s0 << 8))? 1 : 0);
s01 += carry;
s02 += ((s01 < carry)? 1 : 0);
/* (S02, S01, S00) += (S1, S0)*2^7 */
carry = (s0 >> 25) + s01;
s02 += (s1 >> 25) + ((carry < s01)? 1 : 0);
s01 = (s1 << 7) + carry;
s02 += ((s01 < carry)? 1 : 0);
s00 += (s0 << 7);
carry = ((s00 < (s0 << 7))? 1 : 0);
s01 += carry;
s02 += ((s01 < carry)? 1 : 0);
/* (S02, S01, S00) += (S1, S0)*2^6 */
carry = (s0 >> 26) + s01;
s02 += (s1 >> 26) + ((carry < s01)? 1 : 0);
s01 = (s1 << 6) + carry;
s02 += ((s01 < carry)? 1 : 0);
s00 += (s0 << 6);
carry = ((s00 < (s0 << 6))? 1 : 0);
s01 += carry;
s02 += ((s01 < carry)? 1 : 0);
/* (S02, S01, S00) += (S1, S0)*2^4 */
carry = (s0 >> 28) + s01;
s02 += (s1 >> 28) + ((carry < s01)? 1 : 0);
s01 = (s1 << 4) + carry;
s02 += ((s01 < carry)? 1 : 0);
s00 += (s0 << 4);
carry = ((s00 < (s0 << 4))? 1 : 0);
s01 += carry;
s02 += ((s01 < carry)? 1 : 0);
/* W0 += S */
modp256k1_add (W0, W0, S);
borrow = bn256_sub (tmp, W0, P256K1);
if (borrow)
memcpy (tmp, W0, sizeof (bn256));
else
memcpy (W0, tmp, sizeof (bn256));
#undef W0
#undef W1
#undef W2
#undef W3
#undef W4
#undef W5
#undef W6
#undef W7
#undef S
#undef s00
#undef s01
#undef s02
#undef borrow
}
/**
* @brief X = (A * B) mod p256k1
*/
void
modp256k1_mul (bn256 *X, const bn256 *A, const bn256 *B)
{
bn512 AB[1];
bn256_mul (AB, A, B);
modp256k1_reduce (X, AB);
}
/**
* @brief X = A * A mod p256k1
*/
void
modp256k1_sqr (bn256 *X, const bn256 *A)
{
bn512 AA[1];
bn256_sqr (AA, A);
modp256k1_reduce (X, AA);
}
/**
* @brief X = (A << shift) mod p256k1
* @note shift < 32
*/
void
modp256k1_shift (bn256 *X, const bn256 *A, int shift)
{
uint32_t carry;
bn256 tmp[1];
carry = bn256_shift (X, A, shift);
if (shift < 0)
return;
memset (tmp, 0, sizeof (bn256));
tmp->word[0] = carry + (carry << 9);
tmp->word[1] = carry + (tmp->word[0] < (carry << 9)) + (carry >> 23);
tmp->word[0] = tmp->word[0] + (carry << 8);
tmp->word[1] = tmp->word[1] + (tmp->word[0] < (carry << 8)) + (carry >> 24);
tmp->word[0] = tmp->word[0] + (carry << 7);
tmp->word[1] = tmp->word[1] + (tmp->word[0] < (carry << 7)) + (carry >> 25);
tmp->word[0] = tmp->word[0] + (carry << 6);
tmp->word[1] = tmp->word[1] + (tmp->word[0] < (carry << 6)) + (carry >> 26);
tmp->word[0] = tmp->word[0] + (carry << 4);
tmp->word[1] = tmp->word[1] + (tmp->word[0] < (carry << 4)) + (carry >> 28);
modp256k1_add (X, X, tmp);
}