ext/x25519_precomputed/x25519_x64.c in x25519-1.0.6 vs ext/x25519_precomputed/x25519_x64.c in x25519-1.0.7
- old
+ new
@@ -1,231 +1,233 @@
/**
- * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>.
+ * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
* Institute of Computing.
* University of Campinas, Brazil.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation, version 3.
+ * published by the Free Software Foundation, version 2 or greater.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <string.h>
#include "fp25519_x64.h"
-#include "table_ladder_x25519.h"
#include "x25519_precomputed.h"
+#include "table_ladder_x25519.h"
-/****** Implementation of Montgomery Ladder Algorithm ************/
-static inline void cswap_x64(uint64_t bit, uint64_t *const px, uint64_t *const py)
-{
- int i=0;
- uint64_t mask = (uint64_t)0-bit;
- for(i=0;i<NUM_WORDS_ELTFP25519_X64;i++)
- {
- uint64_t t = mask & (px[i] ^ py[i]);
- px[i] = px[i] ^ t;
- py[i] = py[i] ^ t;
- }
+static inline void cswap_x64(uint64_t bit, uint64_t *const px,
+ uint64_t *const py) {
+ int i = 0;
+ uint64_t mask = (uint64_t)0 - bit;
+ for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) {
+ uint64_t t = mask & (px[i] ^ py[i]);
+ px[i] = px[i] ^ t;
+ py[i] = py[i] ^ t;
+ }
}
-void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key, uint8_t *session_key)
-{
- ALIGN uint64_t buffer[4*NUM_WORDS_ELTFP25519_X64];
- ALIGN uint64_t coordinates[4*NUM_WORDS_ELTFP25519_X64];
- ALIGN uint64_t workspace[6*NUM_WORDS_ELTFP25519_X64];
- uint64_t save=0;
- int i=0, j=0;
- uint64_t prev = 0;
- uint64_t *const X1 = (uint64_t*)session_key;
- uint64_t *const key = (uint64_t*)private_key;
- uint64_t *const Px = coordinates+0;
- uint64_t *const Pz = coordinates+4;
- uint64_t *const Qx = coordinates+8;
- uint64_t *const Qz = coordinates+12;
- uint64_t *const X2 = Qx;
- uint64_t *const Z2 = Qz;
- uint64_t *const X3 = Px;
- uint64_t *const Z3 = Pz;
- uint64_t *const X2Z2 = Qx;
- uint64_t *const X3Z3 = Px;
+/** Original rfc7748_precomputed name: 'x25519_shared_secret_x64' */
+void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key,
+ uint8_t *session_key) {
+ ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64];
+ ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64];
+ ALIGN uint64_t workspace[6 * NUM_WORDS_ELTFP25519_X64];
+ ALIGN uint8_t session[X25519_KEYSIZE_BYTES];
+ ALIGN uint8_t private[X25519_KEYSIZE_BYTES];
- uint64_t *const A = workspace+0;
- uint64_t *const B = workspace+4;
- uint64_t *const D = workspace+8;
- uint64_t *const C = workspace+12;
- uint64_t *const DA = workspace+16;
- uint64_t *const CB = workspace+20;
- uint64_t *const AB = A;
- uint64_t *const DC = D;
- uint64_t *const DACB = DA;
- uint64_t *const buffer_1w = buffer;
- uint64_t *const buffer_2w = buffer;
+ int i = 0, j = 0;
+ uint64_t prev = 0;
+ uint64_t *const X1 = (uint64_t *)session;
+ uint64_t *const key = (uint64_t *)private;
+ uint64_t *const Px = coordinates + 0;
+ uint64_t *const Pz = coordinates + 4;
+ uint64_t *const Qx = coordinates + 8;
+ uint64_t *const Qz = coordinates + 12;
+ uint64_t *const X2 = Qx;
+ uint64_t *const Z2 = Qz;
+ uint64_t *const X3 = Px;
+ uint64_t *const Z3 = Pz;
+ uint64_t *const X2Z2 = Qx;
+ uint64_t *const X3Z3 = Px;
- /* clampC function */
- save = private_key[X25519_KEYSIZE_BYTES-1]<<16 | private_key[0];
- private_key[0] = private_key[0] & (~(uint8_t)0x7);
- private_key[X25519_KEYSIZE_BYTES-1] = (uint8_t)64 | (private_key[X25519_KEYSIZE_BYTES-1] & (uint8_t)0x7F);
+ uint64_t *const A = workspace + 0;
+ uint64_t *const B = workspace + 4;
+ uint64_t *const D = workspace + 8;
+ uint64_t *const C = workspace + 12;
+ uint64_t *const DA = workspace + 16;
+ uint64_t *const CB = workspace + 20;
+ uint64_t *const AB = A;
+ uint64_t *const DC = D;
+ uint64_t *const DACB = DA;
+ uint64_t *const buffer_1w = buffer;
+ uint64_t *const buffer_2w = buffer;
- /**
- * As in the draft:
- * When receiving such an array, implementations of curve25519
- * MUST mask the most-significant bit in the final byte. This
- * is done to preserve compatibility with point formats which
- * reserve the sign bit for use in other protocols and to
- * increase resistance to implementation fingerprinting
- **/
- session_key[X25519_KEYSIZE_BYTES-1] &= (1<<(255%8))-1;
+ memcpy(private, private_key, sizeof(private));
+ memcpy(session, session_key, sizeof(session));
- copy_EltFp25519_1w_x64(Px,(uint64_t*)session_key);
- setzero_EltFp25519_1w_x64(Pz);
- setzero_EltFp25519_1w_x64(Qx);
- setzero_EltFp25519_1w_x64(Qz);
+ /* clampC function */
+ private
+ [0] = private[0] & (~(uint8_t)0x7);
+ private
+ [X25519_KEYSIZE_BYTES - 1] =
+ (uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F);
- Pz[0] = 1;
- Qx[0] = 1;
+ /**
+ * As in the draft:
+ * When receiving such an array, implementations of curve25519
+ * MUST mask the most-significant bit in the final byte. This
+ * is done to preserve compatibility with point formats which
+ * reserve the sign bit for use in other protocols and to
+ * increase resistance to implementation fingerprinting
+ **/
+ session[X25519_KEYSIZE_BYTES - 1] &= (1 << (255 % 8)) - 1;
- /* main-loop */
- prev = 0;
- j = 62;
- for(i=3;i>=0;i--)
- {
- while(j >= 0)
- {
- uint64_t bit = (key[i]>>j)&0x1;
- uint64_t swap = bit^prev;
- prev = bit;
+ copy_EltFp25519_1w_x64(Px, X1);
+ setzero_EltFp25519_1w_x64(Pz);
+ setzero_EltFp25519_1w_x64(Qx);
+ setzero_EltFp25519_1w_x64(Qz);
- add_EltFp25519_1w_x64(A, X2, Z2); /* A = (X2+Z2) */
- sub_EltFp25519_1w_x64(B, X2, Z2); /* B = (X2-Z2) */
- add_EltFp25519_1w_x64(C, X3, Z3); /* C = (X3+Z3) */
- sub_EltFp25519_1w_x64(D, X3, Z3); /* D = (X3-Z3) */
- mul_EltFp25519_2w_x64(DACB,AB,DC); /* [DA|CB] = [A|B]*[D|C] */
+ Pz[0] = 1;
+ Qx[0] = 1;
- cswap_x64(swap, A, C);
- cswap_x64(swap, B, D);
+ /* main-loop */
+ prev = 0;
+ j = 62;
+ for (i = 3; i >= 0; i--) {
+ while (j >= 0) {
+ uint64_t bit = (key[i] >> j) & 0x1;
+ uint64_t swap = bit ^ prev;
+ prev = bit;
- sqr_EltFp25519_2w_x64(AB); /* [AA|BB] = [A^2|B^2] */
- add_EltFp25519_1w_x64(X3, DA, CB); /* X3 = (DA+CB) */
- sub_EltFp25519_1w_x64(Z3, DA, CB); /* Z3 = (DA-CB) */
- sqr_EltFp25519_2w_x64(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+ add_EltFp25519_1w_x64(A, X2, Z2); /* A = (X2+Z2) */
+ sub_EltFp25519_1w_x64(B, X2, Z2); /* B = (X2-Z2) */
+ add_EltFp25519_1w_x64(C, X3, Z3); /* C = (X3+Z3) */
+ sub_EltFp25519_1w_x64(D, X3, Z3); /* D = (X3-Z3) */
+ mul_EltFp25519_2w_x64(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
- copy_EltFp25519_1w_x64(X2,B); /* X2 = B^2 */
- sub_EltFp25519_1w_x64(Z2, A, B); /* Z2 = E = AA-BB */
- mul_a24_EltFp25519_1w_x64(B, Z2); /* B = a24*E */
- add_EltFp25519_1w_x64(B, B, X2); /* B = a24*E+B */
- mul_EltFp25519_2w_x64(X2Z2,X2Z2,AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
- mul_EltFp25519_1w_x64(Z3,Z3,X1); /* Z3 = Z3*X1 */
+ cswap_x64(swap, A, C);
+ cswap_x64(swap, B, D);
- j--;
- }
- j = 63;
- }
+ sqr_EltFp25519_2w_x64(AB); /* [AA|BB] = [A^2|B^2] */
+ add_EltFp25519_1w_x64(X3, DA, CB); /* X3 = (DA+CB) */
+ sub_EltFp25519_1w_x64(Z3, DA, CB); /* Z3 = (DA-CB) */
+ sqr_EltFp25519_2w_x64(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
- inv_EltFp25519_1w_x64(A, Qz);
- mul_EltFp25519_1w_x64((uint64_t*)shared,Qx,A);
- fred_EltFp25519_1w_x64((uint64_t *) shared);
- private_key[X25519_KEYSIZE_BYTES-1] = (uint8_t)((save>>16) & 0xFF);
- private_key[0] = (uint8_t)(save & 0xFF);
+ copy_EltFp25519_1w_x64(X2, B); /* X2 = B^2 */
+ sub_EltFp25519_1w_x64(Z2, A, B); /* Z2 = E = AA-BB */
+
+ mul_a24_EltFp25519_1w_x64(B, Z2); /* B = a24*E */
+ add_EltFp25519_1w_x64(B, B, X2); /* B = a24*E+B */
+ mul_EltFp25519_2w_x64(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
+ mul_EltFp25519_1w_x64(Z3, Z3, X1); /* Z3 = Z3*X1 */
+ j--;
+ }
+ j = 63;
+ }
+
+ inv_EltFp25519_1w_x64(A, Qz);
+ mul_EltFp25519_1w_x64((uint64_t *)shared, Qx, A);
+ fred_EltFp25519_1w_x64((uint64_t *)shared);
}
-void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key)
-{
- ALIGN uint64_t buffer[4*NUM_WORDS_ELTFP25519_X64];
- ALIGN uint64_t coordinates[4*NUM_WORDS_ELTFP25519_X64];
- ALIGN uint64_t workspace[4*NUM_WORDS_ELTFP25519_X64];
- const int ite[4] = {64,64,64,63};
- const int q = 3;
- uint64_t swap = 1;
- uint64_t bit;
- uint64_t save;
+/* Original rfc7748_precomputed name: 'x25519_keygen_precmp_x64' */
+void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key) {
+ ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64];
+ ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64];
+ ALIGN uint64_t workspace[4 * NUM_WORDS_ELTFP25519_X64];
+ ALIGN uint8_t private[X25519_KEYSIZE_BYTES];
- int i=0, j=0, k=0;
- uint64_t *const key = (uint64_t*)private_key;
- uint64_t *const Ur1 = coordinates+0;
- uint64_t *const Zr1 = coordinates+4;
- uint64_t *const Ur2 = coordinates+8;
- uint64_t *const Zr2 = coordinates+12;
+ int i = 0, j = 0, k = 0;
+ uint64_t *const key = (uint64_t *)private;
+ uint64_t *const Ur1 = coordinates + 0;
+ uint64_t *const Zr1 = coordinates + 4;
+ uint64_t *const Ur2 = coordinates + 8;
+ uint64_t *const Zr2 = coordinates + 12;
- uint64_t *const UZr1 = coordinates+0;
- uint64_t *const ZUr2 = coordinates+8;
+ uint64_t *const UZr1 = coordinates + 0;
+ uint64_t *const ZUr2 = coordinates + 8;
- uint64_t *const A = workspace+0;
- uint64_t *const B = workspace+4;
- uint64_t *const C = workspace+8;
- uint64_t *const D = workspace+12;
+ uint64_t *const A = workspace + 0;
+ uint64_t *const B = workspace + 4;
+ uint64_t *const C = workspace + 8;
+ uint64_t *const D = workspace + 12;
- uint64_t *const AB = workspace+0;
- uint64_t *const CD = workspace+8;
+ uint64_t *const AB = workspace + 0;
+ uint64_t *const CD = workspace + 8;
- uint64_t *const buffer_1w = buffer;
- uint64_t *const buffer_2w = buffer;
- uint64_t * P = (uint64_t *)Table_Ladder_8k;
+ uint64_t *const buffer_1w = buffer;
+ uint64_t *const buffer_2w = buffer;
+ uint64_t *P = (uint64_t *)Table_Ladder_8k;
- /* clampC function */
- save = private_key[X25519_KEYSIZE_BYTES-1]<<16 | private_key[0];
- private_key[0] = private_key[0] & (~(uint8_t)0x7);
- private_key[X25519_KEYSIZE_BYTES-1] = (uint8_t)64 | (private_key[X25519_KEYSIZE_BYTES-1] & (uint8_t)0x7F);
+ memcpy(private, private_key, sizeof(private));
- setzero_EltFp25519_1w_x64(Ur1);
- setzero_EltFp25519_1w_x64(Zr1);
- setzero_EltFp25519_1w_x64(Zr2);
- Ur1[0] = 1;
- Zr1[0] = 1;
- Zr2[0] = 1;
+ /* clampC function */
+ private
+ [0] = private[0] & (~(uint8_t)0x7);
+ private
+ [X25519_KEYSIZE_BYTES - 1] =
+ (uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F);
- /* G-S */
- Ur2[3] = 0x1eaecdeee27cab34;
- Ur2[2] = 0xadc7a0b9235d48e2;
- Ur2[1] = 0xbbf095ae14b2edf8;
- Ur2[0] = 0x7e94e1fec82faabd;
+ setzero_EltFp25519_1w_x64(Ur1);
+ setzero_EltFp25519_1w_x64(Zr1);
+ setzero_EltFp25519_1w_x64(Zr2);
+ Ur1[0] = 1;
+ Zr1[0] = 1;
+ Zr2[0] = 1;
- /* main-loop */
- j = q;
- for(i=0;i<NUM_WORDS_ELTFP25519_X64;i++)
- {
- while(j < ite[i])
- {
- k = (64*i+j-q);
- bit = (key[i]>>j)&0x1;
- swap = swap ^ bit;
- cswap_x64(swap, Ur1, Ur2);
- cswap_x64(swap, Zr1, Zr2);
- swap = bit;
- /** Addition */
- sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- mul_EltFp25519_1w_x64(C,&P[4*k],B); /* C = M0-B */
- sub_EltFp25519_1w_x64(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
- add_EltFp25519_1w_x64(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
- sqr_EltFp25519_2w_x64(AB); /* A = A^2 | B = B^2 */
- mul_EltFp25519_2w_x64(UZr1,ZUr2,AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
- j++;
- }
- j = 0;
- }
+ /* G-S */
+ Ur2[3] = 0x1eaecdeee27cab34;
+ Ur2[2] = 0xadc7a0b9235d48e2;
+ Ur2[1] = 0xbbf095ae14b2edf8;
+ Ur2[0] = 0x7e94e1fec82faabd;
- /** Doubling */
- for(i=0;i<q;i++)
- {
- add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */
- sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */
- sqr_EltFp25519_2w_x64(AB); /* A = A**2 B = B**2 */
- copy_EltFp25519_1w_x64(C,B); /* C = B */
- sub_EltFp25519_1w_x64(B, A, B); /* B = A-B */
- mul_a24_EltFp25519_1w_x64(D, B); /* D = my_a24*B */
- add_EltFp25519_1w_x64(D, D, C); /* D = D+C */
- mul_EltFp25519_2w_x64(UZr1,AB,CD); /* Ur1 = A*B Zr1 = Zr1*A */
- }
+ /* main-loop */
+ const int ite[4] = {64, 64, 64, 63};
+ const int q = 3;
+ uint64_t swap = 1;
- /* Convert to affine coordinates */
- inv_EltFp25519_1w_x64(A, Zr1);
- mul_EltFp25519_1w_x64((uint64_t*)session_key,Ur1,A);
- fred_EltFp25519_1w_x64((uint64_t *) session_key);
- private_key[X25519_KEYSIZE_BYTES-1] = (uint8_t)((save>>16) & 0xFF);
- private_key[0] = (uint8_t)(save & 0xFF);
+ j = q;
+ for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) {
+ while (j < ite[i]) {
+ k = (64 * i + j - q);
+ uint64_t bit = (key[i] >> j) & 0x1;
+ swap = swap ^ bit;
+ cswap_x64(swap, Ur1, Ur2);
+ cswap_x64(swap, Zr1, Zr2);
+ swap = bit;
+ /** Addition */
+ sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ mul_EltFp25519_1w_x64(C, &P[4 * k], B); /* C = M0-B */
+ sub_EltFp25519_1w_x64(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+ add_EltFp25519_1w_x64(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+ sqr_EltFp25519_2w_x64(AB); /* A = A^2 | B = B^2 */
+ mul_EltFp25519_2w_x64(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ j++;
+ }
+ j = 0;
+ }
+
+ /** Doubling */
+ for (i = 0; i < q; i++) {
+ add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ sqr_EltFp25519_2w_x64(AB); /* A = A**2 B = B**2 */
+ copy_EltFp25519_1w_x64(C, B); /* C = B */
+ sub_EltFp25519_1w_x64(B, A, B); /* B = A-B */
+ mul_a24_EltFp25519_1w_x64(D, B); /* D = my_a24*B */
+ add_EltFp25519_1w_x64(D, D, C); /* D = D+C */
+ mul_EltFp25519_2w_x64(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
+ }
+
+ /* Convert to affine coordinates */
+ inv_EltFp25519_1w_x64(A, Zr1);
+ mul_EltFp25519_1w_x64((uint64_t *)session_key, Ur1, A);
+ fred_EltFp25519_1w_x64((uint64_t *)session_key);
}