mirror of
https://github.com/kmackay/micro-ecc.git
synced 2026-06-05 21:14:51 +00:00
Update coding style.
This commit is contained in:
+172
-173
@@ -20,11 +20,10 @@
|
||||
|
||||
#if (uECC_ASM == uECC_asm_fast)
|
||||
|
||||
static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
uint32_t l_carry = 0;
|
||||
uint32_t l_left;
|
||||
uint32_t l_right;
|
||||
static uint32_t vli_add(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
uint32_t carry = 0;
|
||||
uint32_t left_word;
|
||||
uint32_t right_word;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -34,34 +33,34 @@ static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32
|
||||
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
|
||||
|
||||
/* Now we just do the remaining words with the carry bit (using ADC) */
|
||||
REPEAT(DEC(uECC_WORDS), "ldmia %[lptr]!, {%[left]} \n\t"
|
||||
REPEAT(DEC(uECC_WORDS),
|
||||
"ldmia %[lptr]!, {%[left]} \n\t"
|
||||
"ldmia %[rptr]!, {%[right]} \n\t"
|
||||
"adcs %[left], %[right] \n\t"
|
||||
"stmia %[dptr]!, {%[left]} \n\t")
|
||||
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
#if (uECC_PLATFORM == uECC_arm_thumb)
|
||||
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
|
||||
[carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
|
||||
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
|
||||
[carry] "+l" (carry), [left] "=l" (left_word), [right] "=l" (right_word)
|
||||
#else
|
||||
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
|
||||
[carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
|
||||
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
|
||||
[carry] "+r" (carry), [left] "=r" (left_word), [right] "=r" (right_word)
|
||||
#endif
|
||||
:
|
||||
: "cc", "memory"
|
||||
);
|
||||
return l_carry;
|
||||
return carry;
|
||||
}
|
||||
#define asm_add 1
|
||||
|
||||
static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
uint32_t l_carry = 0;
|
||||
uint32_t l_left;
|
||||
uint32_t l_right;
|
||||
static uint32_t vli_sub(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
uint32_t carry = 0;
|
||||
uint32_t left_word;
|
||||
uint32_t right_word;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -71,36 +70,37 @@ static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32
|
||||
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
|
||||
|
||||
/* Now we just do the remaining words with the carry bit (using SBC) */
|
||||
REPEAT(DEC(uECC_WORDS), "ldmia %[lptr]!, {%[left]} \n\t"
|
||||
REPEAT(DEC(uECC_WORDS),
|
||||
"ldmia %[lptr]!, {%[left]} \n\t"
|
||||
"ldmia %[rptr]!, {%[right]} \n\t"
|
||||
"sbcs %[left], %[right] \n\t"
|
||||
"stmia %[dptr]!, {%[left]} \n\t")
|
||||
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
#if (uECC_PLATFORM == uECC_arm_thumb)
|
||||
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
|
||||
[carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
|
||||
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
|
||||
[carry] "+l" (carry), [left] "=l" (left_word), [right] "=l" (right_word)
|
||||
#else
|
||||
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
|
||||
[carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
|
||||
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
|
||||
[carry] "+r" (carry), [left] "=r" (left_word), [right] "=r" (right_word)
|
||||
#endif
|
||||
:
|
||||
: "cc", "memory"
|
||||
);
|
||||
return !l_carry; // note that on ARM, carry flag set means "no borrow" when subtracting (for some reason...)
|
||||
return !carry; // note that on ARM, carry flag set means "no borrow" when subtracting
|
||||
// (for some reason...)
|
||||
}
|
||||
#define asm_sub 1
|
||||
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb)
|
||||
#if (uECC_WORDS == 5)
|
||||
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
register const uint32_t *r2 __asm__("r2") = p_right;
|
||||
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
register const uint32_t *r2 __asm__("r2") = right;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -267,11 +267,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
#endif /* (uECC_WORDS == 5) */
|
||||
|
||||
#if (uECC_WORDS == 6)
|
||||
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
register const uint32_t *r2 __asm__("r2") = p_right;
|
||||
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
register const uint32_t *r2 __asm__("r2") = right;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -503,11 +502,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
#endif /* (uECC_WORDS == 6) */
|
||||
|
||||
#if (uECC_WORDS == 8)
|
||||
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
register const uint32_t *r2 __asm__("r2") = p_right;
|
||||
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
register const uint32_t *r2 __asm__("r2") = right;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -924,10 +922,9 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
#endif /* (uECC_WORDS == 8) */
|
||||
|
||||
#if (uECC_WORDS == 5)
|
||||
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
{
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
static void vli_square(uint32_t *result, const uint32_t *left) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -1046,10 +1043,9 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
#endif /* (uECC_WORDS == 5) */
|
||||
|
||||
#if (uECC_WORDS == 6)
|
||||
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
{
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
static void vli_square(uint32_t *result, const uint32_t *left) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -1204,10 +1200,9 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
#endif /* (uECC_WORDS == 6) */
|
||||
|
||||
#if (uECC_WORDS == 8)
|
||||
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
{
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
static void vli_square(uint32_t *result, const uint32_t *left) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -1488,86 +1483,86 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
#endif /* (uECC_WORDS == 8) */
|
||||
|
||||
#endif /* (uECC_PLATFORM != uECC_arm_thumb) */
|
||||
|
||||
#endif /* (uECC_ASM == uECC_asm_fast) */
|
||||
|
||||
#if !asm_add
|
||||
static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
uint32_t l_counter = uECC_WORDS;
|
||||
uint32_t l_carry = 0; /* carry = 0 initially */
|
||||
uint32_t l_left;
|
||||
uint32_t l_right;
|
||||
static uint32_t vli_add(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
uint32_t counter = uECC_WORDS;
|
||||
uint32_t carry = 0;
|
||||
uint32_t left_word;
|
||||
uint32_t right_word;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
"1: \n\t"
|
||||
"ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
|
||||
"ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
|
||||
"lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */
|
||||
"lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */
|
||||
"adcs %[left], %[right] \n\t" /* Add with carry. */
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
|
||||
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
|
||||
"subs %[ctr], #1 \n\t" /* Decrement index. */
|
||||
"bne 1b \n\t" /* Loop until index == 0. */
|
||||
"subs %[ctr], #1 \n\t" /* Decrement counter. */
|
||||
"bne 1b \n\t" /* Loop until counter == 0. */
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
#if (uECC_PLATFORM == uECC_arm_thumb)
|
||||
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
|
||||
[ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
|
||||
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
|
||||
[ctr] "+l" (counter), [carry] "+l" (carry),
|
||||
[left] "=l" (left_word), [right] "=l" (right_word)
|
||||
#else
|
||||
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
|
||||
[ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
|
||||
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
|
||||
[ctr] "+r" (counter), [carry] "+r" (carry),
|
||||
[left] "=r" (left_word), [right] "=r" (right_word)
|
||||
#endif
|
||||
:
|
||||
: "cc", "memory"
|
||||
);
|
||||
return l_carry;
|
||||
return carry;
|
||||
}
|
||||
#define asm_add 1
|
||||
#endif
|
||||
|
||||
#if !asm_sub
|
||||
static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
uint32_t l_counter = uECC_WORDS;
|
||||
uint32_t l_carry = 1; /* carry = 1 initially (means don't borrow) */
|
||||
uint32_t l_left;
|
||||
uint32_t l_right;
|
||||
static uint32_t vli_sub(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
uint32_t counter = uECC_WORDS;
|
||||
uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */
|
||||
uint32_t left_word;
|
||||
uint32_t right_word;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
"1: \n\t"
|
||||
"ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
|
||||
"ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
|
||||
"lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */
|
||||
"lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */
|
||||
"sbcs %[left], %[right] \n\t" /* Subtract with borrow. */
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
|
||||
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
|
||||
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
|
||||
"subs %[ctr], #1 \n\t" /* Decrement index. */
|
||||
"bne 1b \n\t" /* Loop until index == 0. */
|
||||
"subs %[ctr], #1 \n\t" /* Decrement counter. */
|
||||
"bne 1b \n\t" /* Loop until counter == 0. */
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
#if (uECC_PLATFORM == uECC_arm_thumb)
|
||||
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
|
||||
[ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
|
||||
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
|
||||
[ctr] "+l" (counter), [carry] "+l" (carry),
|
||||
[left] "=l" (left_word), [right] "=l" (right_word)
|
||||
#else
|
||||
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
|
||||
[ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
|
||||
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
|
||||
[ctr] "+r" (counter), [carry] "+r" (carry),
|
||||
[left] "=r" (left_word), [right] "=r" (right_word)
|
||||
#endif
|
||||
:
|
||||
: "cc", "memory"
|
||||
);
|
||||
return !l_carry;
|
||||
return !carry;
|
||||
}
|
||||
#define asm_sub 1
|
||||
#endif
|
||||
|
||||
#if !asm_mult
|
||||
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
|
||||
{
|
||||
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb)
|
||||
uint32_t c0 = 0;
|
||||
uint32_t c1 = 0;
|
||||
@@ -1590,10 +1585,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
"3: \n\t" /* inner loop */
|
||||
"subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
|
||||
|
||||
"ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = p_right[k-i] */
|
||||
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */
|
||||
"ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */
|
||||
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */
|
||||
|
||||
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
|
||||
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
|
||||
|
||||
"adds %[c0], %[t0] \n\t" /* add low word to c0 */
|
||||
"adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */
|
||||
@@ -1601,38 +1596,40 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
|
||||
"adds %[i], #4 \n\t" /* i += 4 */
|
||||
"cmp %[i], %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */
|
||||
"bge 4f \n\t" /* if not, exit the loop */
|
||||
"bge 4f \n\t" /* if not, exit the loop */
|
||||
"cmp %[i], %[k] \n\t" /* i <= k? */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
|
||||
"4: \n\t" /* end inner loop */
|
||||
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
|
||||
"mov %[c0], %[c1] \n\t" /* c0 = c1 */
|
||||
"mov %[c1], %[c2] \n\t" /* c1 = c2 */
|
||||
"movs %[c2], #0 \n\t" /* c2 = 0 */
|
||||
"adds %[k], #4 \n\t" /* k += 4 */
|
||||
"cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
|
||||
/* end outer loop */
|
||||
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
|
||||
: [result] "r" (p_result), [left] "r" (p_left), [right] "r" (p_right),
|
||||
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
|
||||
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
|
||||
[k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
|
||||
: [result] "r" (result), [left] "r" (left), [right] "r" (right),
|
||||
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4),
|
||||
[eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
||||
#else /* Thumb-1 */
|
||||
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
register const uint32_t *r2 __asm__("r2") = p_right;
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
register const uint32_t *r2 __asm__("r2") = right;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -1641,7 +1638,7 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
"movs r5, #0 \n\t" /* c2 = 0 */
|
||||
"movs r6, #0 \n\t" /* k = 0 */
|
||||
|
||||
"push {r0} \n\t" /* keep p_result on the stack */
|
||||
"push {r0} \n\t" /* keep result on the stack */
|
||||
|
||||
"1: \n\t" /* outer loop (k < uECC_WORDS) */
|
||||
"movs r7, #0 \n\t" /* r7 = i = 0 */
|
||||
@@ -1653,10 +1650,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
|
||||
"3: \n\t" /* inner loop */
|
||||
"push {r3, r4, r5, r6} \n\t" /* push things, r3 (c0) is at the top of stack. */
|
||||
"subs r0, r6, r7 \n\t" /* r0 = k-i */
|
||||
"subs r0, r6, r7 \n\t" /* r0 = k - i */
|
||||
|
||||
"ldr r4, [r2, r0] \n\t" /* r4 = p_right[k-i] */
|
||||
"ldr r0, [r1, r7] \n\t" /* r0 = p_left[i] */
|
||||
"ldr r4, [r2, r0] \n\t" /* r4 = right[k - i] */
|
||||
"ldr r0, [r1, r7] \n\t" /* r0 = left[i] */
|
||||
|
||||
"lsrs r3, r0, #16 \n\t" /* r3 = a1 */
|
||||
"uxth r0, r0 \n\t" /* r0 = a0 */
|
||||
@@ -1665,21 +1662,21 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
"uxth r4, r4 \n\t" /* r4 = b0 */
|
||||
|
||||
"movs r6, r3 \n\t" /* r6 = a1 */
|
||||
"muls r6, r5, r6 \n\t" /* r6 = a1*b1 */
|
||||
"muls r3, r4, r3 \n\t" /* r3 = b0*a1 */
|
||||
"muls r5, r0, r5 \n\t" /* r5 = a0*b1 */
|
||||
"muls r0, r4, r0 \n\t" /* r0 = a0*b0 */
|
||||
"muls r6, r5, r6 \n\t" /* r6 = a1 * b1 */
|
||||
"muls r3, r4, r3 \n\t" /* r3 = b0 * a1 */
|
||||
"muls r5, r0, r5 \n\t" /* r5 = a0 * b1 */
|
||||
"muls r0, r4, r0 \n\t" /* r0 = a0 * b0 */
|
||||
|
||||
"movs r4, #0 \n\t" /* r4 = 0 */
|
||||
"adds r3, r5 \n\t" /* r3 = b0*a1 + a0*b1 */
|
||||
"adds r3, r5 \n\t" /* r3 = b0 * a1 + a0 * b1 */
|
||||
"adcs r4, r4 \n\t" /* r4 = carry */
|
||||
"lsls r4, #16 \n\t" /* r4 = carry << 16 */
|
||||
"adds r6, r4 \n\t" /* r6 = a1*b1 + carry */
|
||||
"adds r6, r4 \n\t" /* r6 = a1 * b1 + carry */
|
||||
|
||||
"lsls r4, r3, #16 \n\t" /* r4 = (b0*a1 + a0*b1) << 16 */
|
||||
"lsrs r3, #16 \n\t" /* r3 = (b0*a1 + a0*b1) >> 16 */
|
||||
"adds r0, r4 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
|
||||
"adcs r6, r3 \n\t" /* r6 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
|
||||
"lsls r4, r3, #16 \n\t" /* r4 = (b0 * a1 + a0 * b1) << 16 */
|
||||
"lsrs r3, #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) >> 16 */
|
||||
"adds r0, r4 \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */
|
||||
"adcs r6, r3 \n\t" /* r6 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */
|
||||
|
||||
"pop {r3, r4, r5} \n\t" /* r3 = c0, r4 = c1, r5 = c2 */
|
||||
"adds r3, r0 \n\t" /* add low word to c0 */
|
||||
@@ -1691,27 +1688,27 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
|
||||
"adds r7, #4 \n\t" /* i += 4 */
|
||||
"cmp r7, %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */
|
||||
"bge 4f \n\t" /* if not, exit the loop */
|
||||
"bge 4f \n\t" /* if not, exit the loop */
|
||||
"cmp r7, r6 \n\t" /* i <= k? */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
|
||||
"4: \n\t" /* end inner loop */
|
||||
|
||||
"ldr r0, [sp, #0] \n\t" /* r0 = p_result */
|
||||
"ldr r0, [sp, #0] \n\t" /* r0 = result */
|
||||
|
||||
"str r3, [r0, r6] \n\t" /* p_result[k] = c0 */
|
||||
"str r3, [r0, r6] \n\t" /* result[k] = c0 */
|
||||
"mov r3, r4 \n\t" /* c0 = c1 */
|
||||
"mov r4, r5 \n\t" /* c1 = c2 */
|
||||
"movs r5, #0 \n\t" /* c2 = 0 */
|
||||
"adds r6, #4 \n\t" /* k += 4 */
|
||||
"cmp r6, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"cmp r6, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
|
||||
/* end outer loop */
|
||||
|
||||
"str r3, [r0, r6] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
|
||||
"pop {r0} \n\t" /* pop p_result off the stack */
|
||||
"str r3, [r0, r6] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
|
||||
"pop {r0} \n\t" /* pop result off the stack */
|
||||
|
||||
".syntax divided \n\t"
|
||||
:
|
||||
@@ -1725,8 +1722,7 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
|
||||
|
||||
#if uECC_SQUARE_FUNC
|
||||
#if !asm_square
|
||||
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
{
|
||||
static void vli_square(uint32_t *result, const uint32_t *left) {
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb)
|
||||
uint32_t c0 = 0;
|
||||
uint32_t c1 = 0;
|
||||
@@ -1749,17 +1745,17 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
"3: \n\t" /* inner loop */
|
||||
"subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
|
||||
|
||||
"ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = p_left[k-i] */
|
||||
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */
|
||||
"ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */
|
||||
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */
|
||||
|
||||
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
|
||||
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
|
||||
|
||||
"cmp %[i], %[tt] \n\t" /* (i < k-i) ? */
|
||||
"bge 4f \n\t" /* if i >= k-i, skip */
|
||||
"lsls %[t1], #1 \n\t" /* high word << 1 */
|
||||
"adc %[c2], #0 \n\t" /* add carry bit to c2 */
|
||||
"lsls %[t0], #1 \n\t" /* low word << 1 */
|
||||
"adc %[t1], #0 \n\t" /* add carry bit to high word */
|
||||
"cmp %[i], %[tt] \n\t" /* (i < k - i) ? */
|
||||
"bge 4f \n\t" /* if i >= k - i, skip */
|
||||
"lsls %[t1], #1 \n\t" /* high word << 1 */
|
||||
"adc %[c2], #0 \n\t" /* add carry bit to c2 */
|
||||
"lsls %[t0], #1 \n\t" /* low word << 1 */
|
||||
"adc %[t1], #0 \n\t" /* add carry bit to high word */
|
||||
|
||||
"4: \n\t"
|
||||
|
||||
@@ -1769,38 +1765,40 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
|
||||
"adds %[i], #4 \n\t" /* i += 4 */
|
||||
"cmp %[i], %[k] \n\t" /* i <= k? */
|
||||
"bge 5f \n\t" /* if not, exit the loop */
|
||||
"subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
|
||||
"cmp %[i], %[tt] \n\t" /* i <= k-i? */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
"bge 5f \n\t" /* if not, exit the loop */
|
||||
"subs %[tt], %[k], %[i] \n\t" /* tt = k - i */
|
||||
"cmp %[i], %[tt] \n\t" /* i <= k - i? */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
|
||||
"5: \n\t" /* end inner loop */
|
||||
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
|
||||
"mov %[c0], %[c1] \n\t" /* c0 = c1 */
|
||||
"mov %[c1], %[c2] \n\t" /* c1 = c2 */
|
||||
"movs %[c2], #0 \n\t" /* c2 = 0 */
|
||||
"adds %[k], #4 \n\t" /* k += 4 */
|
||||
"cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
|
||||
/* end outer loop */
|
||||
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
|
||||
"str %[c0], [%[result], %[k]] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
|
||||
: [result] "r" (p_result), [left] "r" (p_left),
|
||||
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
|
||||
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
|
||||
[k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
|
||||
: [result] "r" (result), [left] "r" (left),
|
||||
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4),
|
||||
[eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
register uint32_t *r0 __asm__("r0") = p_result;
|
||||
register const uint32_t *r1 __asm__("r1") = p_left;
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
@@ -1809,7 +1807,7 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
"movs r4, #0 \n\t" /* c2 = 0 */
|
||||
"movs r5, #0 \n\t" /* k = 0 */
|
||||
|
||||
"push {r0} \n\t" /* keep p_result on the stack */
|
||||
"push {r0} \n\t" /* keep result on the stack */
|
||||
|
||||
"1: \n\t" /* outer loop (k < uECC_WORDS) */
|
||||
"movs r6, #0 \n\t" /* r6 = i = 0 */
|
||||
@@ -1821,10 +1819,10 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
|
||||
"3: \n\t" /* inner loop */
|
||||
"push {r2, r3, r4, r5} \n\t" /* push things, r2 (c0) is at the top of stack. */
|
||||
"subs r7, r5, r6 \n\t" /* r7 = k-i */
|
||||
"subs r7, r5, r6 \n\t" /* r7 = k - i */
|
||||
|
||||
"ldr r3, [r1, r7] \n\t" /* r3 = p_left[k-i] */
|
||||
"ldr r0, [r1, r6] \n\t" /* r0 = p_left[i] */
|
||||
"ldr r3, [r1, r7] \n\t" /* r3 = left[k - i] */
|
||||
"ldr r0, [r1, r6] \n\t" /* r0 = left[i] */
|
||||
|
||||
"lsrs r2, r0, #16 \n\t" /* r2 = a1 */
|
||||
"uxth r0, r0 \n\t" /* r0 = a0 */
|
||||
@@ -1833,26 +1831,26 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
"uxth r3, r3 \n\t" /* r3 = b0 */
|
||||
|
||||
"movs r5, r2 \n\t" /* r5 = a1 */
|
||||
"muls r5, r4, r5 \n\t" /* r5 = a1*b1 */
|
||||
"muls r2, r3, r2 \n\t" /* r2 = b0*a1 */
|
||||
"muls r4, r0, r4 \n\t" /* r4 = a0*b1 */
|
||||
"muls r0, r3, r0 \n\t" /* r0 = a0*b0 */
|
||||
"muls r5, r4, r5 \n\t" /* r5 = a1 * b1 */
|
||||
"muls r2, r3, r2 \n\t" /* r2 = b0 * a1 */
|
||||
"muls r4, r0, r4 \n\t" /* r4 = a0 * b1 */
|
||||
"muls r0, r3, r0 \n\t" /* r0 = a0 * b0 */
|
||||
|
||||
"movs r3, #0 \n\t" /* r3 = 0 */
|
||||
"adds r2, r4 \n\t" /* r2 = b0*a1 + a0*b1 */
|
||||
"adds r2, r4 \n\t" /* r2 = b0 * a1 + a0 * b1 */
|
||||
"adcs r3, r3 \n\t" /* r3 = carry */
|
||||
"lsls r3, #16 \n\t" /* r3 = carry << 16 */
|
||||
"adds r5, r3 \n\t" /* r5 = a1*b1 + carry */
|
||||
"adds r5, r3 \n\t" /* r5 = a1 * b1 + carry */
|
||||
|
||||
"lsls r3, r2, #16 \n\t" /* r3 = (b0*a1 + a0*b1) << 16 */
|
||||
"lsrs r2, #16 \n\t" /* r2 = (b0*a1 + a0*b1) >> 16 */
|
||||
"adds r0, r3 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
|
||||
"adcs r5, r2 \n\t" /* r5 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
|
||||
"lsls r3, r2, #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) << 16 */
|
||||
"lsrs r2, #16 \n\t" /* r2 = (b0 * a1 + a0 * b1) >> 16 */
|
||||
"adds r0, r3 \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */
|
||||
"adcs r5, r2 \n\t" /* r5 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */
|
||||
|
||||
"movs r3, #0 \n\t" /* r3 = 0 */
|
||||
"cmp r6, r7 \n\t" /* (i < k-i) ? */
|
||||
"cmp r6, r7 \n\t" /* (i < k - i) ? */
|
||||
"mov r7, r3 \n\t" /* r7 = 0 (does not affect condition)*/
|
||||
"bge 4f \n\t" /* if i >= k-i, skip */
|
||||
"bge 4f \n\t" /* if i >= k - i, skip */
|
||||
"lsls r5, #1 \n\t" /* high word << 1 */
|
||||
"adcs r7, r3 \n\t" /* r7 = carry bit for c2 */
|
||||
"lsls r0, #1 \n\t" /* low word << 1 */
|
||||
@@ -1870,33 +1868,34 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
|
||||
|
||||
"adds r6, #4 \n\t" /* i += 4 */
|
||||
"cmp r6, r5 \n\t" /* i <= k? */
|
||||
"bge 5f \n\t" /* if not, exit the loop */
|
||||
"subs r7, r5, r6 \n\t" /* r7 = k-i */
|
||||
"cmp r6, r7 \n\t" /* i <= k-i? */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
"bge 5f \n\t" /* if not, exit the loop */
|
||||
"subs r7, r5, r6 \n\t" /* r7 = k - i */
|
||||
"cmp r6, r7 \n\t" /* i <= k - i? */
|
||||
"ble 3b \n\t" /* if so, continue looping */
|
||||
|
||||
"5: \n\t" /* end inner loop */
|
||||
|
||||
"ldr r0, [sp, #0] \n\t" /* r0 = p_result */
|
||||
"ldr r0, [sp, #0] \n\t" /* r0 = result */
|
||||
|
||||
"str r2, [r0, r5] \n\t" /* p_result[k] = c0 */
|
||||
"str r2, [r0, r5] \n\t" /* result[k] = c0 */
|
||||
"mov r2, r3 \n\t" /* c0 = c1 */
|
||||
"mov r3, r4 \n\t" /* c1 = c2 */
|
||||
"movs r4, #0 \n\t" /* c2 = 0 */
|
||||
"adds r5, #4 \n\t" /* k += 4 */
|
||||
"cmp r5, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
|
||||
"cmp r5, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
|
||||
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
|
||||
/* end outer loop */
|
||||
|
||||
"str r2, [r0, r5] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
|
||||
"pop {r0} \n\t" /* pop p_result off the stack */
|
||||
"str r2, [r0, r5] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
|
||||
"pop {r0} \n\t" /* pop result off the stack */
|
||||
|
||||
".syntax divided \n\t"
|
||||
: [r0] "+l" (r0), [r1] "+l" (r1)
|
||||
: [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
|
||||
: "r2", "r3", "r4", "r5", "r6", "r7", "cc", "memory"
|
||||
: [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4),
|
||||
[eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
|
||||
: "r2", "r3", "r4", "r5", "r6", "cc", "memory"
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
+200
-212
@@ -44,32 +44,30 @@
|
||||
|
||||
#if (uECC_ASM == uECC_asm_fast)
|
||||
|
||||
static void vli_clear(uint8_t *p_vli)
|
||||
{
|
||||
static void vli_clear(uint8_t *vli) {
|
||||
__asm__ volatile (
|
||||
REPEAT(uECC_BYTES, "st %a[ptr]+, r1 \n\t")
|
||||
|
||||
: [ptr] "+e" (p_vli)
|
||||
REPEAT(uECC_BYTES,
|
||||
"st %a[ptr]+, r1 \n\t")
|
||||
: [ptr] "+e" (vli)
|
||||
:
|
||||
: "r0", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_clear 1
|
||||
|
||||
static void vli_set(uint8_t *p_dest, const uint8_t *p_src)
|
||||
{
|
||||
static void vli_set(uint8_t *dest, const uint8_t *src) {
|
||||
__asm__ volatile (
|
||||
REPEAT(uECC_BYTES, "ld r0, %a[sptr]+ \n\t"
|
||||
REPEAT(uECC_BYTES,
|
||||
"ld r0, %a[sptr]+ \n\t"
|
||||
"st %a[dptr]+, r0 \n\t")
|
||||
: [dptr] "+e" (p_dest), [sptr] "+e" (p_src)
|
||||
: [dptr] "+e" (dest), [sptr] "+e" (src)
|
||||
:
|
||||
: "r0", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_set 1
|
||||
|
||||
static void vli_rshift1(uint8_t *p_vli)
|
||||
{
|
||||
static void vli_rshift1(uint8_t *vli) {
|
||||
__asm__ volatile (
|
||||
"adiw r30, " STR(uECC_BYTES) " \n\t"
|
||||
"ld r0, -z \n\t" /* Load byte. */
|
||||
@@ -77,23 +75,22 @@ static void vli_rshift1(uint8_t *p_vli)
|
||||
"st z, r0 \n\t" /* Store the first result byte. */
|
||||
|
||||
/* Now we just do the remaining bytes with the carry bit (using ROR) */
|
||||
REPEAT(DEC(uECC_BYTES), "ld r0, -z \n\t"
|
||||
REPEAT(DEC(uECC_BYTES),
|
||||
"ld r0, -z \n\t"
|
||||
"ror r0 \n\t"
|
||||
"st z, r0 \n\t")
|
||||
|
||||
: "+z" (p_vli)
|
||||
: "+z" (vli)
|
||||
:
|
||||
: "r0", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_rshift1 1
|
||||
|
||||
/* Computes p_result = p_left + p_right, returning carry. Can modify in place. */
|
||||
static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
uint8_t l_carry = 0;
|
||||
uint8_t l_left;
|
||||
uint8_t l_right;
|
||||
/* Computes result = left + right, returning carry. Can modify in place. */
|
||||
static uint8_t vli_add(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
uint8_t carry = 0;
|
||||
uint8_t left_byte;
|
||||
uint8_t right_byte;
|
||||
|
||||
__asm__ volatile (
|
||||
"ld %[left], x+ \n\t" /* Load left byte. */
|
||||
@@ -102,30 +99,29 @@ static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
|
||||
"st z+, %[left] \n\t" /* Store the first result byte. */
|
||||
|
||||
/* Now we just do the remaining bytes with the carry bit (using ADC) */
|
||||
REPEAT(DEC(uECC_BYTES), "ld %[left], x+ \n\t"
|
||||
REPEAT(DEC(uECC_BYTES),
|
||||
"ld %[left], x+ \n\t"
|
||||
"ld %[right], y+ \n\t"
|
||||
"adc %[left], %[right] \n\t"
|
||||
"st z+, %[left] \n\t")
|
||||
|
||||
"adc %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
|
||||
|
||||
"adc %[carry], %[carry] \n\t" /* Store carry bit. */
|
||||
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
|
||||
|
||||
: "+z" (p_result), "+x" (p_left),
|
||||
[carry] "+r" (l_carry), [left] "=&r" (l_left), [right] "=&r" (l_right)
|
||||
: "y" (p_right)
|
||||
: "+z" (result), "+x" (left),
|
||||
[carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
|
||||
: "y" (right)
|
||||
: "cc", "memory"
|
||||
);
|
||||
return l_carry;
|
||||
return carry;
|
||||
}
|
||||
#define asm_add 1
|
||||
|
||||
/* Computes p_result = p_left - p_right, returning borrow. Can modify in place. */
|
||||
static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
uint8_t l_borrow = 0;
|
||||
uint8_t l_left;
|
||||
uint8_t l_right;
|
||||
/* Computes result = left - right, returning borrow. Can modify in place. */
|
||||
static uint8_t vli_sub(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
uint8_t borrow = 0;
|
||||
uint8_t left_byte;
|
||||
uint8_t right_byte;
|
||||
|
||||
__asm__ volatile (
|
||||
"ld %[left], x+ \n\t" /* Load left byte. */
|
||||
@@ -134,28 +130,27 @@ static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
|
||||
"st z+, %[left] \n\t" /* Store the first result byte. */
|
||||
|
||||
/* Now we just do the remaining bytes with the carry bit (using SBC) */
|
||||
REPEAT(DEC(uECC_BYTES), "ld %[left], x+ \n\t"
|
||||
REPEAT(DEC(uECC_BYTES),
|
||||
"ld %[left], x+ \n\t"
|
||||
"ld %[right], y+ \n\t"
|
||||
"sbc %[left], %[right] \n\t"
|
||||
"st z+, %[left] \n\t")
|
||||
|
||||
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in l_borrow. */
|
||||
|
||||
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
|
||||
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
|
||||
|
||||
: "+z" (p_result), "+x" (p_left),
|
||||
[borrow] "+r" (l_borrow), [left] "=&r" (l_left), [right] "=&r" (l_right)
|
||||
: "y" (p_right)
|
||||
: "+z" (result), "+x" (left),
|
||||
[borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
|
||||
: "y" (right)
|
||||
: "cc", "memory"
|
||||
);
|
||||
return l_borrow;
|
||||
return borrow;
|
||||
}
|
||||
#define asm_sub 1
|
||||
|
||||
#if (uECC_BYTES == 20)
|
||||
__attribute((noinline))
|
||||
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
__asm__ volatile (
|
||||
"adiw r30, 10 \n\t"
|
||||
"adiw r28, 10 \n\t"
|
||||
@@ -2060,17 +2055,17 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
|
||||
"st z+, r23 \n\t"
|
||||
"st z+, r24 \n\t"
|
||||
"eor r1, r1 \n\t"
|
||||
: "+x" (p_left), "+y" (p_right), "+z" (p_result)
|
||||
: "+x" (left), "+y" (right), "+z" (result)
|
||||
:
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
|
||||
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
|
||||
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
|
||||
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_mult 1
|
||||
#elif (uECC_BYTES == 24)
|
||||
__attribute((noinline))
|
||||
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
__asm__ volatile (
|
||||
"adiw r30, 20 \n\t"
|
||||
"adiw r28, 20 \n\t"
|
||||
@@ -4843,17 +4838,17 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
|
||||
"st z+, r23 \n\t"
|
||||
|
||||
"eor r1, r1 \n\t"
|
||||
: "+x" (p_left), "+y" (p_right), "+z" (p_result)
|
||||
: "+x" (left), "+y" (right), "+z" (result)
|
||||
:
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
|
||||
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
|
||||
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
|
||||
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_mult 1
|
||||
#elif (uECC_BYTES == 32)
|
||||
__attribute((noinline))
|
||||
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
__asm__ volatile (
|
||||
"adiw r30, 30 \n\t"
|
||||
"adiw r28, 30 \n\t"
|
||||
@@ -9769,10 +9764,11 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
|
||||
"st z+, r24 \n\t"
|
||||
|
||||
"eor r1, r1 \n\t"
|
||||
: "+x" (p_left), "+y" (p_right), "+z" (p_result)
|
||||
: "+x" (left), "+y" (right), "+z" (result)
|
||||
:
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
|
||||
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
|
||||
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
|
||||
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_mult 1
|
||||
@@ -9781,8 +9777,7 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
|
||||
#if uECC_SQUARE_FUNC
|
||||
|
||||
#if (uECC_BYTES == 20)
|
||||
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
{
|
||||
static void vli_square(uint8_t *result, const uint8_t *left) {
|
||||
__asm__ volatile (
|
||||
"ld r2, x+ \n\t"
|
||||
"ld r3, x+ \n\t"
|
||||
@@ -10937,10 +10932,11 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
"st z+, r23 \n\t"
|
||||
"st z+, r25 \n\t"
|
||||
"eor r1, r1 \n\t"
|
||||
: "+x" (p_left), "+z" (p_result)
|
||||
: "+x" (left), "+z" (result)
|
||||
:
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
|
||||
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
|
||||
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
|
||||
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_square 1
|
||||
@@ -10948,8 +10944,7 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
#elif (uECC_BYTES == 24)
|
||||
|
||||
__attribute((noinline))
|
||||
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
{
|
||||
static void vli_square(uint8_t *result, const uint8_t *left) {
|
||||
__asm__ volatile (
|
||||
"ldi r25, 0 \n\t"
|
||||
"movw r28, r26 \n\t"
|
||||
@@ -12596,10 +12591,11 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
"st z+, r23 \n\t"
|
||||
"st z+, r28 \n\t"
|
||||
"eor r1, r1 \n\t"
|
||||
: "+x" (p_left), "+z" (p_result)
|
||||
: "+x" (left), "+z" (result)
|
||||
:
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
|
||||
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
|
||||
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
|
||||
"r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_square 1
|
||||
@@ -12607,8 +12603,7 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
#elif (uECC_BYTES == 32)
|
||||
|
||||
__attribute((noinline))
|
||||
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
{
|
||||
static void vli_square(uint8_t *result, const uint8_t *left) {
|
||||
__asm__ volatile (
|
||||
"ldi r25, 0 \n\t"
|
||||
"movw r28, r26 \n\t"
|
||||
@@ -15431,10 +15426,11 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
"st z+, r23 \n\t"
|
||||
"st z+, r28 \n\t"
|
||||
"eor r1, r1 \n\t"
|
||||
: "+x" (p_left), "+z" (p_result)
|
||||
: "+x" (left), "+z" (result)
|
||||
:
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
|
||||
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
|
||||
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
|
||||
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
|
||||
"r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_square 1
|
||||
@@ -15442,29 +15438,29 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
#endif /* uECC_BYTES == xx */
|
||||
#endif /* uECC_SQUARE_FUNC */
|
||||
|
||||
static void vli_modSub_fast(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
static void vli_modSub_fast(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
uint8_t t1, t2;
|
||||
__asm__ volatile (
|
||||
"push r28 \n\t" /* Save Y */
|
||||
"push r29 \n\t"
|
||||
|
||||
"ld %[t1], x+ \n\t" /* Load left word. */
|
||||
"ld %[t2], y+ \n\t" /* Load right word. */
|
||||
"ld %[t1], x+ \n\t" /* Load left word. */
|
||||
"ld %[t2], y+ \n\t" /* Load right word. */
|
||||
"sub %[t1], %[t2] \n\t" /* Subtract the first word. */
|
||||
"st z+, %[t1] \n\t" /* Store the first result word. */
|
||||
"st z+, %[t1] \n\t" /* Store the first result word. */
|
||||
|
||||
/* Now we just do the remaining words with the carry bit (using SBC) */
|
||||
REPEAT(DEC(uECC_BYTES), "ld %[t1], x+ \n\t"
|
||||
REPEAT(DEC(uECC_BYTES),
|
||||
"ld %[t1], x+ \n\t"
|
||||
"ld %[t2], y+ \n\t"
|
||||
"sbc %[t1], %[t2] \n\t"
|
||||
"st z+, %[t1] \n\t")
|
||||
|
||||
"brcs 1f \n\t" /* If borrow is set, then we need to add */
|
||||
"brcs 1f \n\t" /* If borrow is set, then we need to add */
|
||||
"rjmp done \n\t" /* otherwise we are done */
|
||||
"1: \n\t"
|
||||
|
||||
"sbiw r30, " STR(uECC_BYTES) " \n\t" /* make z point at p_result again */
|
||||
"sbiw r30, " STR(uECC_BYTES) " \n\t" /* make z point at result again */
|
||||
"ldi r28, lo8(curve_p) \n\t" /* make y point at curve_p */
|
||||
"ldi r29, hi8(curve_p) \n\t"
|
||||
|
||||
@@ -15473,7 +15469,8 @@ static void vli_modSub_fast(uint8_t *p_result, const uint8_t *p_left, const uint
|
||||
"ld %[t2], y+ \n\t"
|
||||
"add %[t1], %[t2] \n\t"
|
||||
"st z+, %[t1] \n\t"
|
||||
REPEAT(DEC(uECC_BYTES), "ld %[t1], z \n\t"
|
||||
REPEAT(DEC(uECC_BYTES),
|
||||
"ld %[t1], z \n\t"
|
||||
"ld %[t2], y+ \n\t"
|
||||
"adc %[t1], %[t2] \n\t"
|
||||
"st z+, %[t1] \n\t")
|
||||
@@ -15482,18 +15479,17 @@ static void vli_modSub_fast(uint8_t *p_result, const uint8_t *p_left, const uint
|
||||
"pop r29 \n\t" /* Restore Y */
|
||||
"pop r28 \n\t"
|
||||
|
||||
: "+z" (p_result), "+x" (p_left),
|
||||
: "+z" (result), "+x" (left),
|
||||
[t1] "=&r" (t1), [t2] "=&r" (t2)
|
||||
: "y" (p_right)
|
||||
: "y" (right)
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_modSub_fast 1
|
||||
|
||||
#if uECC_CURVE == uECC_secp160r1
|
||||
static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_product)
|
||||
{
|
||||
uint8_t l_carry = 0;
|
||||
static void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
|
||||
uint8_t carry = 0;
|
||||
__asm__ volatile (
|
||||
"in r30, __SP_L__ \n\t"
|
||||
"in r31, __SP_H__ \n\t"
|
||||
@@ -15504,23 +15500,25 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"out __SREG__, r0 \n\t"
|
||||
"out __SP_L__, r30 \n\t"
|
||||
|
||||
"adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes (+ 1 since z initially points below the stack) */
|
||||
"adiw r26, 40 \n\t" /* end of p_product */
|
||||
"ld r18, -x \n\t" /* Load word. */
|
||||
"lsr r18 \n\t" /* Shift. */
|
||||
"st -z, r18 \n\t" /* Store the first result word. */
|
||||
"adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes
|
||||
(+ 1 since z initially points below the stack) */
|
||||
"adiw r26, 40 \n\t" /* end of product */
|
||||
"ld r18, -x \n\t" /* Load word. */
|
||||
"lsr r18 \n\t" /* Shift. */
|
||||
"st -z, r18 \n\t" /* Store the first result word. */
|
||||
|
||||
/* Now we just do the remaining words with the carry bit (using ROR) */
|
||||
REPEAT(19, "ld r18, -x \n\t"
|
||||
REPEAT(19,
|
||||
"ld r18, -x \n\t"
|
||||
"ror r18 \n\t"
|
||||
"st -z, r18 \n\t")
|
||||
|
||||
"eor r18, r18 \n\t" /* r18 = 0 */
|
||||
"ror r18 \n\t" /* get last bit */
|
||||
"st -z, r18 \n\t" /* store it */
|
||||
"ror r18 \n\t" /* get last bit */
|
||||
"st -z, r18 \n\t" /* store it */
|
||||
|
||||
"sbiw r30, 3 \n\t" /* move z back to point at tmp */
|
||||
/* now we add p_right */
|
||||
/* now we add right */
|
||||
"ld r18, x+ \n\t"
|
||||
"st z+, r18 \n\t" /* the first 3 bytes do not need to be added */
|
||||
"ld r18, x+ \n\t"
|
||||
@@ -15534,12 +15532,13 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"st z+, r18 \n\t"
|
||||
|
||||
/* Now we just do the remaining words with the carry bit (using ADC) */
|
||||
REPEAT(16, "ld r18, x+ \n\t"
|
||||
REPEAT(16,
|
||||
"ld r18, x+ \n\t"
|
||||
"ld r19, z \n\t"
|
||||
"adc r18, r19 \n\t"
|
||||
"st z+, r18 \n\t")
|
||||
|
||||
/* Propagate over the remaining bytes of p_result */
|
||||
/* Propagate over the remaining bytes of result */
|
||||
"ld r18, z \n\t"
|
||||
"adc r18, r1 \n\t"
|
||||
"st z+, r18 \n\t"
|
||||
@@ -15557,27 +15556,29 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"st z+, r18 \n\t"
|
||||
|
||||
"sbiw r30, 24 \n\t" /* move z back to point at tmp */
|
||||
"sbiw r26, 40 \n\t" /* move x back to point at p_product */
|
||||
"sbiw r26, 40 \n\t" /* move x back to point at product */
|
||||
|
||||
/* add low bytes of tmp to p_product, storing in p_result */
|
||||
/* add low bytes of tmp to product, storing in result */
|
||||
"ld r18, z+ \n\t"
|
||||
"ld r19, x+ \n\t"
|
||||
"add r18, r19 \n\t"
|
||||
"st y+, r18 \n\t"
|
||||
REPEAT(19, "ld r18, z+ \n\t"
|
||||
REPEAT(19,
|
||||
"ld r18, z+ \n\t"
|
||||
"ld r19, x+ \n\t"
|
||||
"adc r18, r19 \n\t"
|
||||
"st y+, r18 \n\t")
|
||||
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
|
||||
/* at this point x is at the end of p_product, y is at the end of p_result, z is 20 bytes into tmp */
|
||||
"sbiw r28, 20 \n\t" /* move y back to point at p_result */
|
||||
"adiw r30, 4 \n\t" /* move z to point to the end of tmp */
|
||||
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
|
||||
/* at this point x is at the end of product, y is at the end of result,
|
||||
z is 20 bytes into tmp */
|
||||
"sbiw r28, 20 \n\t" /* move y back to point at result */
|
||||
"adiw r30, 4 \n\t" /* move z to point to the end of tmp */
|
||||
|
||||
/* do omega_mult again with the 4 relevant bytes */
|
||||
/* z points to the end of tmp, x points to the end of p_product */
|
||||
"ld r18, -z \n\t" /* Load word. */
|
||||
"lsr r18 \n\t" /* Shift. */
|
||||
"st -x, r18 \n\t" /* Store the first result word. */
|
||||
/* z points to the end of tmp, x points to the end of product */
|
||||
"ld r18, -z \n\t" /* Load word. */
|
||||
"lsr r18 \n\t" /* Shift. */
|
||||
"st -x, r18 \n\t" /* Store the first result word. */
|
||||
|
||||
"ld r18, -z \n\t"
|
||||
"ror r18 \n\t"
|
||||
@@ -15590,8 +15591,8 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"st -x, r18 \n\t"
|
||||
|
||||
"eor r18, r18 \n\t" /* r18 = 0 */
|
||||
"ror r18 \n\t" /* get last bit */
|
||||
"st -x, r18 \n\t" /* store it */
|
||||
"ror r18 \n\t" /* get last bit */
|
||||
"st -x, r18 \n\t" /* store it */
|
||||
|
||||
"sbiw r26, 3 \n\t" /* move x back to point at beginning */
|
||||
/* now we add a copy of the 4 bytes */
|
||||
@@ -15624,25 +15625,28 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"adc r18, r1 \n\t"
|
||||
"st x+, r18 \n\t"
|
||||
|
||||
/* now z points to the end of tmp, x points to the end of p_product (y still points at p_result) */
|
||||
/* now z points to the end of tmp, x points to the end of product
|
||||
(y still points at result) */
|
||||
"sbiw r26, 8 \n\t" /* move x back to point at beginning of actual data */
|
||||
/* add into p_result */
|
||||
/* add into result */
|
||||
"ld r18, x+ \n\t"
|
||||
"ld r19, y \n\t"
|
||||
"add r18, r19 \n\t"
|
||||
"st y+, r18 \n\t"
|
||||
REPEAT(7, "ld r18, x+ \n\t"
|
||||
REPEAT(7,
|
||||
"ld r18, x+ \n\t"
|
||||
"ld r19, y \n\t"
|
||||
"adc r18, r19 \n\t"
|
||||
"st y+, r18 \n\t")
|
||||
|
||||
/* Done adding, now propagate carry bit */
|
||||
REPEAT(12, "ld r18, y \n\t"
|
||||
REPEAT(12,
|
||||
"ld r18, y \n\t"
|
||||
"adc r18, __zero_reg__ \n\t"
|
||||
"st y+, r18 \n\t")
|
||||
|
||||
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
|
||||
"sbiw r28, 20 \n\t" /* move y back to point at p_result */
|
||||
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
|
||||
"sbiw r28, 20 \n\t" /* move y back to point at result */
|
||||
|
||||
"sbiw r30, 1 \n\t" /* fix stack pointer */
|
||||
"in r0, __SREG__ \n\t"
|
||||
@@ -15651,32 +15655,27 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"out __SREG__, r0 \n\t"
|
||||
"out __SP_L__, r30 \n\t"
|
||||
|
||||
: "+x" (p_product), [carry] "+r" (l_carry)
|
||||
: "y" (p_result)
|
||||
: "+x" (product), [carry] "+r" (carry)
|
||||
: "y" (result)
|
||||
: "r0", "r18", "r19", "r30", "r31", "cc", "memory"
|
||||
);
|
||||
|
||||
if(l_carry > 0)
|
||||
{
|
||||
--l_carry;
|
||||
vli_sub(p_result, p_result, curve_p);
|
||||
if (carry > 0) {
|
||||
--carry;
|
||||
vli_sub(result, result, curve_p);
|
||||
}
|
||||
if(l_carry > 0)
|
||||
{
|
||||
vli_sub(p_result, p_result, curve_p);
|
||||
if (carry > 0) {
|
||||
vli_sub(result, result, curve_p);
|
||||
}
|
||||
|
||||
if(vli_cmp(p_result, curve_p) > 0)
|
||||
{
|
||||
vli_sub(p_result, p_result, curve_p);
|
||||
if (vli_cmp(result, curve_p) > 0) {
|
||||
vli_sub(result, result, curve_p);
|
||||
}
|
||||
}
|
||||
#define asm_mmod_fast 1
|
||||
|
||||
#elif (uECC_CURVE == uECC_secp256k1)
|
||||
static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_product)
|
||||
{
|
||||
uint8_t l_carry = 0;
|
||||
static void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
|
||||
uint8_t carry = 0;
|
||||
__asm__ volatile (
|
||||
"in r30, __SP_L__ \n\t"
|
||||
"in r31, __SP_H__ \n\t"
|
||||
@@ -15687,8 +15686,8 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"out __SREG__, r0 \n\t"
|
||||
"out __SP_L__, r30 \n\t"
|
||||
|
||||
"adiw r30, 1 \n\t" /* add 1 since z initially points below the stack */
|
||||
"adiw r26, 32 \n\t" /* p_product + uECC_WORDS */
|
||||
"adiw r30, 1 \n\t" /* add 1 since z initially points below the stack */
|
||||
"adiw r26, 32 \n\t" /* product + uECC_WORDS */
|
||||
"ldi r25, 0x03 \n\t"
|
||||
"ldi r24, 0xD1 \n\t"
|
||||
"ld r18, x+ \n\t"
|
||||
@@ -15852,27 +15851,29 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"eor r1, r1 \n\t" /* make r1 be 0 again */
|
||||
|
||||
"sbiw r30, 37 \n\t" /* move z back to point at tmp */
|
||||
"subi r26, 64 \n\t" /* move x back to point at p_product */
|
||||
"subi r26, 64 \n\t" /* move x back to point at product */
|
||||
"sbc r27, __zero_reg__ \n\t"
|
||||
|
||||
/* add low bytes of tmp to p_product, storing in p_result */
|
||||
/* add low bytes of tmp to product, storing in result */
|
||||
"ld r18, z+ \n\t"
|
||||
"ld r19, x+ \n\t"
|
||||
"add r18, r19 \n\t"
|
||||
"st y+, r18 \n\t"
|
||||
REPEAT(31, "ld r18, z+ \n\t"
|
||||
REPEAT(31,
|
||||
"ld r18, z+ \n\t"
|
||||
"ld r19, x+ \n\t"
|
||||
"adc r18, r19 \n\t"
|
||||
"st y+, r18 \n\t")
|
||||
|
||||
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
|
||||
/* at this point x is at the end of p_product, y is at the end of p_result, z is 32 bytes into tmp */
|
||||
"sbiw r28, 32 \n\t" /* move y back to point at p_result */
|
||||
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
|
||||
/* at this point x is at the end of product, y is at the end of result,
|
||||
z is 32 bytes into tmp */
|
||||
"sbiw r28, 32 \n\t" /* move y back to point at result */
|
||||
|
||||
/* do omega_mult again with the 5 relevant bytes */
|
||||
/* z points to l_tmp + uECC_WORDS, x points to the end of p_product */
|
||||
"sbiw r26, 32 \n\t" /* shift x back to point into the p_product buffer (we can overwrite it now) */
|
||||
|
||||
/* z points to tmp + uECC_WORDS, x points to the end of product */
|
||||
"sbiw r26, 32 \n\t" /* shift x back to point into the product buffer
|
||||
(we can overwrite it now) */
|
||||
"ld r18, z+ \n\t"
|
||||
"ld r19, z+ \n\t"
|
||||
"ld r20, z+ \n\t"
|
||||
@@ -15947,25 +15948,28 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"st x+, r22 \n\t"
|
||||
"eor r1, r1 \n\t" /* make r1 be 0 again */
|
||||
|
||||
/* now z points to the end of tmp, x points to the end of p_product (y still points at p_result) */
|
||||
/* now z points to the end of tmp, x points to the end of product
|
||||
(y still points at result) */
|
||||
"sbiw r26, 10 \n\t" /* move x back to point at beginning of actual data */
|
||||
/* add into p_result */
|
||||
/* add into result */
|
||||
"ld r18, x+ \n\t"
|
||||
"ld r19, y \n\t"
|
||||
"add r18, r19 \n\t"
|
||||
"st y+, r18 \n\t"
|
||||
REPEAT(9, "ld r18, x+ \n\t"
|
||||
REPEAT(9,
|
||||
"ld r18, x+ \n\t"
|
||||
"ld r19, y \n\t"
|
||||
"adc r18, r19 \n\t"
|
||||
"st y+, r18 \n\t")
|
||||
|
||||
/* Done adding, now propagate carry bit */
|
||||
REPEAT(22, "ld r18, y \n\t"
|
||||
REPEAT(22,
|
||||
"ld r18, y \n\t"
|
||||
"adc r18, __zero_reg__ \n\t"
|
||||
"st y+, r18 \n\t")
|
||||
|
||||
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
|
||||
"sbiw r28, 32 \n\t" /* move y back to point at p_result */
|
||||
"sbiw r28, 32 \n\t" /* move y back to point at result */
|
||||
|
||||
"sbiw r30, 1 \n\t" /* fix stack pointer */
|
||||
"in r0, __SREG__ \n\t"
|
||||
@@ -15974,24 +15978,20 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
"out __SREG__, r0 \n\t"
|
||||
"out __SP_L__, r30 \n\t"
|
||||
|
||||
: "+x" (p_product), [carry] "+r" (l_carry)
|
||||
: "y" (p_result)
|
||||
: "+x" (product), [carry] "+r" (carry)
|
||||
: "y" (result)
|
||||
: "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc", "memory"
|
||||
);
|
||||
|
||||
if(l_carry > 0)
|
||||
{
|
||||
--l_carry;
|
||||
vli_sub(p_result, p_result, curve_p);
|
||||
if (carry > 0) {
|
||||
--carry;
|
||||
vli_sub(result, result, curve_p);
|
||||
}
|
||||
if(l_carry > 0)
|
||||
{
|
||||
vli_sub(p_result, p_result, curve_p);
|
||||
if (carry > 0) {
|
||||
vli_sub(result, result, curve_p);
|
||||
}
|
||||
|
||||
if(vli_cmp(p_result, curve_p) > 0)
|
||||
{
|
||||
vli_sub(p_result, p_result, curve_p);
|
||||
if (vli_cmp(result, curve_p) > 0) {
|
||||
vli_sub(result, result, curve_p);
|
||||
}
|
||||
}
|
||||
#define asm_mmod_fast 1
|
||||
@@ -16001,8 +16001,7 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
|
||||
#endif /* (uECC_ASM == uECC_asm_fast) */
|
||||
|
||||
#if !asm_rshift1
|
||||
static void vli_rshift1(uint8_t *p_vli)
|
||||
{
|
||||
static void vli_rshift1(uint8_t *vli) {
|
||||
uint8_t i = uECC_BYTES;
|
||||
__asm__ volatile (
|
||||
"adiw r30, " STR(uECC_BYTES) " \n\t"
|
||||
@@ -16015,7 +16014,7 @@ static void vli_rshift1(uint8_t *p_vli)
|
||||
"dec %[i] \n\t"
|
||||
"brne 1b \n\t"
|
||||
|
||||
: "+z" (p_vli), [i] "+r" (i)
|
||||
: "+z" (vli), [i] "+r" (i)
|
||||
:
|
||||
: "r0", "cc", "memory"
|
||||
);
|
||||
@@ -16024,12 +16023,11 @@ static void vli_rshift1(uint8_t *p_vli)
|
||||
#endif
|
||||
|
||||
#if !asm_add
|
||||
static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
static uint8_t vli_add(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
uint8_t i = uECC_BYTES;
|
||||
uint8_t l_carry = 0;
|
||||
uint8_t l_left;
|
||||
uint8_t l_right;
|
||||
uint8_t carry = 0;
|
||||
uint8_t left_byte;
|
||||
uint8_t right_byte;
|
||||
|
||||
__asm__ volatile (
|
||||
"clc \n\t"
|
||||
@@ -16042,27 +16040,25 @@ static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
|
||||
"dec %[i] \n\t"
|
||||
"brne 1b \n\t"
|
||||
|
||||
"adc %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
|
||||
|
||||
"adc %[carry], %[carry] \n\t" /* Store carry bit. */
|
||||
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
|
||||
|
||||
: "+z" (p_result), "+x" (p_left), [i] "+r" (i),
|
||||
[carry] "+r" (l_carry), [left] "=&r" (l_left), [right] "=&r" (l_right)
|
||||
: "y" (p_right)
|
||||
: "+z" (result), "+x" (left), [i] "+r" (i),
|
||||
[carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
|
||||
: "y" (right)
|
||||
: "cc", "memory"
|
||||
);
|
||||
return l_carry;
|
||||
return carry;
|
||||
}
|
||||
#define asm_add 1
|
||||
#endif
|
||||
|
||||
#if !asm_sub
|
||||
static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
static uint8_t vli_sub(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
uint8_t i = uECC_BYTES;
|
||||
uint8_t l_borrow = 0;
|
||||
uint8_t l_left;
|
||||
uint8_t l_right;
|
||||
uint8_t borrow = 0;
|
||||
uint8_t left_byte;
|
||||
uint8_t right_byte;
|
||||
|
||||
__asm__ volatile (
|
||||
"clc \n\t"
|
||||
@@ -16075,37 +16071,33 @@ static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
|
||||
"dec %[i] \n\t"
|
||||
"brne 1b \n\t"
|
||||
|
||||
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in l_borrow. */
|
||||
|
||||
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
|
||||
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
|
||||
|
||||
: "+z" (p_result), "+x" (p_left), [i] "+r" (i),
|
||||
[borrow] "+r" (l_borrow), [left] "=&r" (l_left), [right] "=&r" (l_right)
|
||||
: "y" (p_right)
|
||||
: "+z" (result), "+x" (left), [i] "+r" (i),
|
||||
[borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
|
||||
: "y" (right)
|
||||
: "cc", "memory"
|
||||
);
|
||||
return l_borrow;
|
||||
return borrow;
|
||||
}
|
||||
#define asm_sub 1
|
||||
#endif
|
||||
|
||||
#if !asm_mult
|
||||
__attribute((noinline))
|
||||
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
|
||||
{
|
||||
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
|
||||
uint8_t r0 = 0;
|
||||
uint8_t r1 = 0;
|
||||
uint8_t r2 = 0;
|
||||
|
||||
uint8_t l_zero = 0;
|
||||
|
||||
uint8_t zero = 0;
|
||||
uint8_t k, i;
|
||||
|
||||
__asm__ volatile (
|
||||
"ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES; ++k */
|
||||
|
||||
"1: \n\t"
|
||||
"ldi %[i], 0 \n\t" /* i=0; i < k; ++i */
|
||||
"ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */
|
||||
|
||||
"add r28, %[k] \n\t" /* pre-add right ptr */
|
||||
"adc r29, %[zero] \n\t"
|
||||
@@ -16137,10 +16129,10 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
|
||||
|
||||
/* second half */
|
||||
"ldi %[k], " STR(uECC_BYTES) " \n\t" /* k = uECC_BYTES; k > 0; --k */
|
||||
"adiw r28, " STR(uECC_BYTES) " \n\t" /* move right ptr to point at the end of p_right */
|
||||
"adiw r28, " STR(uECC_BYTES) " \n\t" /* move right ptr to point at the end of right */
|
||||
|
||||
"1: \n\t"
|
||||
"ldi %[i], 0 \n\t" /* i=0; i < k; ++i */
|
||||
"ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */
|
||||
|
||||
"2: \n\t"
|
||||
"ld r0, x+ \n\t"
|
||||
@@ -16164,22 +16156,21 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
|
||||
"mov %[r2], %[zero] \n\t"
|
||||
|
||||
"dec %[k] \n\t"
|
||||
"sub r26, %[k] \n\t" /* fix up left ptr (after k is decremented, so next time we start 1 higher) */
|
||||
"sub r26, %[k] \n\t" /* fix up left ptr (after k is decremented, so next time
|
||||
we start 1 higher) */
|
||||
"sbc r27, %[zero] \n\t"
|
||||
|
||||
"cpi %[k], 0 \n\t"
|
||||
"brne 1b \n\t" /* loop if k > 0 */
|
||||
|
||||
"st z+, %[r0] \n\t" /* Store last result byte. */
|
||||
|
||||
"eor r1, r1 \n\t" /* fix r1 to be 0 again */
|
||||
|
||||
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
|
||||
|
||||
: "+z" (p_result), "+x" (p_left),
|
||||
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (l_zero),
|
||||
: "+z" (result), "+x" (left),
|
||||
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
|
||||
[k] "=&a" (k), [i] "=&a" (i)
|
||||
: "y" (p_right)
|
||||
: "y" (right)
|
||||
: "r0", "cc", "memory"
|
||||
);
|
||||
}
|
||||
@@ -16188,18 +16179,15 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
|
||||
|
||||
#if uECC_SQUARE_FUNC
|
||||
#if !asm_square
|
||||
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
{
|
||||
static void vli_square(uint8_t *result, const uint8_t *left) {
|
||||
uint8_t r0 = 0;
|
||||
uint8_t r1 = 0;
|
||||
uint8_t r2 = 0;
|
||||
|
||||
uint8_t l_zero = 0;
|
||||
|
||||
uint8_t zero = 0;
|
||||
uint8_t k;
|
||||
|
||||
__asm__ volatile (
|
||||
"ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES*2; ++k */
|
||||
"ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES * 2; ++k */
|
||||
|
||||
"1: \n\t"
|
||||
|
||||
@@ -16223,7 +16211,8 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
|
||||
"3: \n\t"
|
||||
"ld r0, x+ \n\t"
|
||||
"cp r26, r30 \n\t" /* if left == right here, then we are done after this mult (and we don't need to double) */
|
||||
"cp r26, r30 \n\t" /* if left == right here, then we are done after this mult
|
||||
(and we don't need to double) */
|
||||
"breq 4f \n\t"
|
||||
"ld r1, -z \n\t"
|
||||
"mul r0, r1 \n\t"
|
||||
@@ -16248,9 +16237,9 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
"adc %[r2], %[zero] \n\t"
|
||||
|
||||
"5: \n\t"
|
||||
"movw r30, %[result] \n\t" /* make z point to result */
|
||||
"st z+, %[r0] \n\t" /* Store the result. */
|
||||
"movw %[result], r30 \n\t" /* update result ptr*/
|
||||
"movw r30, %[result] \n\t" /* make z point to result */
|
||||
"st z+, %[r0] \n\t" /* Store the result. */
|
||||
"movw %[result], r30 \n\t" /* update result ptr*/
|
||||
"mov %[r0], %[r1] \n\t"
|
||||
"mov %[r1], %[r2] \n\t"
|
||||
"mov %[r2], %[zero] \n\t"
|
||||
@@ -16261,13 +16250,12 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
|
||||
|
||||
"movw r30, %[result] \n\t" /* make z point to result */
|
||||
"st z+, %[r0] \n\t" /* Store last result byte. */
|
||||
|
||||
"eor r1, r1 \n\t" /* fix r1 to be 0 again */
|
||||
|
||||
: [result] "+r" (p_result),
|
||||
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (l_zero),
|
||||
: [result] "+r" (result),
|
||||
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
|
||||
[k] "=&a" (k)
|
||||
: [orig] "r" (p_left), [max] "M" (2*uECC_BYTES)
|
||||
: [orig] "r" (left), [max] "M" (2*uECC_BYTES)
|
||||
: "r0", "r26", "r27", "r30", "r31", "cc", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
+34
-47
@@ -2,95 +2,82 @@
|
||||
|
||||
extern "C" {
|
||||
|
||||
static int RNG(uint8_t *p_dest, unsigned p_size)
|
||||
{
|
||||
// Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of random noise)
|
||||
// This can take a long time to generate random data if the result of analogRead(0) doesn't change very frequently.
|
||||
while(p_size) {
|
||||
uint8_t l_val = 0;
|
||||
for(unsigned i=0; i<8; ++i)
|
||||
{
|
||||
int l_init = analogRead(0);
|
||||
int l_count = 0;
|
||||
while(analogRead(0) == l_init)
|
||||
{
|
||||
++l_count;
|
||||
static int RNG(uint8_t *dest, unsigned size) {
|
||||
// Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of
|
||||
// random noise). This can take a long time to generate random data if the result of analogRead(0)
|
||||
// doesn't change very frequently.
|
||||
while (size) {
|
||||
uint8_t val = 0;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
int init = analogRead(0);
|
||||
int count = 0;
|
||||
while (analogRead(0) == init) {
|
||||
++count;
|
||||
}
|
||||
|
||||
if(l_count == 0)
|
||||
{
|
||||
l_val = (l_val << 1) | (l_init & 0x01);
|
||||
}
|
||||
else
|
||||
{
|
||||
l_val = (l_val << 1) | (l_count & 0x01);
|
||||
if (count == 0) {
|
||||
val = (val << 1) | (init & 0x01);
|
||||
} else {
|
||||
val = (val << 1) | (count & 0x01);
|
||||
}
|
||||
}
|
||||
*p_dest = l_val;
|
||||
++p_dest;
|
||||
--p_size;
|
||||
*dest = val;
|
||||
++dest;
|
||||
--size;
|
||||
}
|
||||
|
||||
// NOTE: it would be a good idea to hash the resulting random data using SHA-256 or similar.
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
||||
} // extern "C"
|
||||
|
||||
void setup()
|
||||
{
|
||||
void setup() {
|
||||
Serial.begin(115200);
|
||||
Serial.print("Testing ecc\n");
|
||||
|
||||
uECC_set_rng(&RNG);
|
||||
}
|
||||
|
||||
void loop() {
|
||||
uint8_t l_private1[uECC_BYTES];
|
||||
uint8_t l_private2[uECC_BYTES];
|
||||
uint8_t private1[uECC_BYTES];
|
||||
uint8_t private2[uECC_BYTES];
|
||||
|
||||
uint8_t l_public1[uECC_BYTES * 2];
|
||||
uint8_t l_public2[uECC_BYTES * 2];
|
||||
uint8_t public1[uECC_BYTES * 2];
|
||||
uint8_t public2[uECC_BYTES * 2];
|
||||
|
||||
uint8_t l_secret1[uECC_BYTES];
|
||||
uint8_t l_secret2[uECC_BYTES];
|
||||
uint8_t secret1[uECC_BYTES];
|
||||
uint8_t secret2[uECC_BYTES];
|
||||
|
||||
unsigned long a = millis();
|
||||
uECC_make_key(l_public1, l_private1);
|
||||
uECC_make_key(public1, private1);
|
||||
unsigned long b = millis();
|
||||
|
||||
Serial.print("Made key 1 in "); Serial.println(b-a);
|
||||
a = millis();
|
||||
uECC_make_key(l_public2, l_private2);
|
||||
uECC_make_key(public2, private2);
|
||||
b = millis();
|
||||
Serial.print("Made key 2 in "); Serial.println(b-a);
|
||||
|
||||
a = millis();
|
||||
int r = uECC_shared_secret(l_public2, l_private1, l_secret1);
|
||||
int r = uECC_shared_secret(public2, private1, secret1);
|
||||
b = millis();
|
||||
Serial.print("Shared secret 1 in "); Serial.println(b-a);
|
||||
if(!r)
|
||||
{
|
||||
if (!r) {
|
||||
Serial.print("shared_secret() failed (1)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
a = millis();
|
||||
r = uECC_shared_secret(l_public1, l_private2, l_secret2);
|
||||
r = uECC_shared_secret(public1, private2, secret2);
|
||||
b = millis();
|
||||
Serial.print("Shared secret 2 in "); Serial.println(b-a);
|
||||
if(!r)
|
||||
{
|
||||
if (!r) {
|
||||
Serial.print("shared_secret() failed (2)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if(memcmp(l_secret1, l_secret2, sizeof(l_secret1)) != 0)
|
||||
{
|
||||
if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
|
||||
Serial.print("Shared secrets are not identical!\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
Serial.print("Shared secrets are identical\n");
|
||||
}
|
||||
}
|
||||
|
||||
+19
-26
@@ -5,64 +5,57 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
void vli_print(uint8_t *p_vli, unsigned int p_size)
|
||||
{
|
||||
while(p_size)
|
||||
{
|
||||
printf("%02X ", (unsigned)p_vli[p_size - 1]);
|
||||
--p_size;
|
||||
void vli_print(uint8_t *vli, unsigned int size) {
|
||||
while (size) {
|
||||
printf("%02X ", (unsigned)vli[size - 1]);
|
||||
--size;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
int i;
|
||||
int success;
|
||||
|
||||
uint8_t l_private[uECC_BYTES];
|
||||
|
||||
uint8_t l_public[uECC_BYTES * 2];
|
||||
uint8_t l_public_computed[uECC_BYTES * 2];
|
||||
uint8_t private[uECC_BYTES];
|
||||
uint8_t public[uECC_BYTES * 2];
|
||||
uint8_t public_computed[uECC_BYTES * 2];
|
||||
|
||||
printf("Testing 256 random private key pairs\n");
|
||||
|
||||
for(i=0; i<256; ++i)
|
||||
{
|
||||
for (i = 0; i < 256; ++i) {
|
||||
printf(".");
|
||||
#if !LPC11XX
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
int success = uECC_make_key(l_public, l_private);
|
||||
success = uECC_make_key(public, private);
|
||||
if (!success) {
|
||||
printf("uECC_make_key() failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
success = uECC_compute_public_key(l_private, l_public_computed);
|
||||
success = uECC_compute_public_key(private, public_computed);
|
||||
if (!success) {
|
||||
printf("uECC_compute_public_key() failed\n");
|
||||
}
|
||||
|
||||
if(memcmp(l_public, l_public_computed, sizeof(l_public)) != 0)
|
||||
{
|
||||
if (memcmp(public, public_computed, sizeof(public)) != 0) {
|
||||
printf("Computed and provided public keys are not identical!\n");
|
||||
printf("Computed public key = ");
|
||||
vli_print(l_public_computed, uECC_BYTES);
|
||||
vli_print(public_computed, uECC_BYTES);
|
||||
printf("\n");
|
||||
printf("Provided public key = ");
|
||||
vli_print(l_public, uECC_BYTES);
|
||||
vli_print(public, uECC_BYTES);
|
||||
printf("\n");
|
||||
printf("Private key = ");
|
||||
vli_print(l_private, uECC_BYTES);
|
||||
vli_print(private, uECC_BYTES);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
printf("Testing private key = 0\n");
|
||||
|
||||
memset(l_private, 0, uECC_BYTES);
|
||||
success = uECC_compute_public_key(l_private, l_public_computed);
|
||||
memset(private, 0, uECC_BYTES);
|
||||
success = uECC_compute_public_key(private, public_computed);
|
||||
if (success) {
|
||||
printf("uECC_compute_public_key() should have failed\n");
|
||||
}
|
||||
|
||||
+25
-38
@@ -11,34 +11,29 @@
|
||||
#include "/Projects/lpc11xx/peripherals/time.h"
|
||||
|
||||
static uint64_t g_rand = 88172645463325252ull;
|
||||
int fake_rng(uint8_t *p_dest, unsigned p_size)
|
||||
{
|
||||
while(p_size)
|
||||
{
|
||||
int fake_rng(uint8_t *dest, unsigned size) {
|
||||
while (size) {
|
||||
g_rand ^= (g_rand << 13);
|
||||
g_rand ^= (g_rand >> 7);
|
||||
g_rand ^= (g_rand << 17);
|
||||
|
||||
unsigned l_amount = (p_size > 8 ? 8 : p_size);
|
||||
memcpy(p_dest, &g_rand, l_amount);
|
||||
p_size -= l_amount;
|
||||
unsigned amount = (size > 8 ? 8 : size);
|
||||
memcpy(dest, &g_rand, amount);
|
||||
size -= amount;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void vli_print(uint8_t *p_vli, unsigned int p_size)
|
||||
{
|
||||
while(p_size)
|
||||
{
|
||||
printf("%02X ", (unsigned)p_vli[p_size - 1]);
|
||||
--p_size;
|
||||
void vli_print(uint8_t *vli, unsigned int size) {
|
||||
while (size) {
|
||||
printf("%02X ", (unsigned)vli[size - 1]);
|
||||
--size;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
#if LPC11XX
|
||||
uartInit(BAUD_115200);
|
||||
initTime();
|
||||
@@ -47,57 +42,49 @@ int main()
|
||||
#endif
|
||||
|
||||
int i;
|
||||
|
||||
uint8_t l_private1[uECC_BYTES];
|
||||
uint8_t l_private2[uECC_BYTES];
|
||||
|
||||
uint8_t l_public1[uECC_BYTES * 2];
|
||||
uint8_t l_public2[uECC_BYTES * 2];
|
||||
|
||||
uint8_t l_secret1[uECC_BYTES];
|
||||
uint8_t l_secret2[uECC_BYTES];
|
||||
uint8_t private1[uECC_BYTES];
|
||||
uint8_t private2[uECC_BYTES];
|
||||
uint8_t public1[uECC_BYTES * 2];
|
||||
uint8_t public2[uECC_BYTES * 2];
|
||||
uint8_t secret1[uECC_BYTES];
|
||||
uint8_t secret2[uECC_BYTES];
|
||||
|
||||
printf("Testing 256 random private key pairs\n");
|
||||
|
||||
for(i=0; i<256; ++i)
|
||||
{
|
||||
for (i = 0; i < 256; ++i) {
|
||||
printf(".");
|
||||
#if !LPC11XX
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
if(!uECC_make_key(l_public1, l_private1) || !uECC_make_key(l_public2, l_private2))
|
||||
{
|
||||
if (!uECC_make_key(public1, private1) || !uECC_make_key(public2, private2)) {
|
||||
printf("uECC_make_key() failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(!uECC_shared_secret(l_public2, l_private1, l_secret1))
|
||||
{
|
||||
if (!uECC_shared_secret(public2, private1, secret1)) {
|
||||
printf("shared_secret() failed (1)\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(!uECC_shared_secret(l_public1, l_private2, l_secret2))
|
||||
{
|
||||
if (!uECC_shared_secret(public1, private2, secret2)) {
|
||||
printf("shared_secret() failed (2)\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(memcmp(l_secret1, l_secret2, sizeof(l_secret1)) != 0)
|
||||
{
|
||||
if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
|
||||
printf("Shared secrets are not identical!\n");
|
||||
printf("Shared secret 1 = ");
|
||||
vli_print(l_secret1, uECC_BYTES);
|
||||
vli_print(secret1, uECC_BYTES);
|
||||
printf("\n");
|
||||
printf("Shared secret 2 = ");
|
||||
vli_print(l_secret2, uECC_BYTES);
|
||||
vli_print(secret2, uECC_BYTES);
|
||||
printf("\n");
|
||||
printf("Private key 1 = ");
|
||||
vli_print(l_private1, uECC_BYTES);
|
||||
vli_print(private1, uECC_BYTES);
|
||||
printf("\n");
|
||||
printf("Private key 2 = ");
|
||||
vli_print(l_private2, uECC_BYTES);
|
||||
vli_print(private2, uECC_BYTES);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
+15
-26
@@ -11,25 +11,22 @@
|
||||
#include "/Projects/lpc11xx/peripherals/time.h"
|
||||
|
||||
static uint64_t g_rand = 88172645463325252ull;
|
||||
int fake_rng(uint8_t *p_dest, unsigned p_size)
|
||||
{
|
||||
while(p_size)
|
||||
{
|
||||
int fake_rng(uint8_t *dest, unsigned size) {
|
||||
while (size) {
|
||||
g_rand ^= (g_rand << 13);
|
||||
g_rand ^= (g_rand >> 7);
|
||||
g_rand ^= (g_rand << 17);
|
||||
|
||||
unsigned l_amount = (p_size > 8 ? 8 : p_size);
|
||||
memcpy(p_dest, &g_rand, l_amount);
|
||||
p_size -= l_amount;
|
||||
unsigned amount = (size > 8 ? 8 : size);
|
||||
memcpy(dest, &g_rand, amount);
|
||||
size -= amount;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
#if LPC11XX
|
||||
uartInit(BAUD_115200);
|
||||
initTime();
|
||||
@@ -37,39 +34,31 @@ int main()
|
||||
uECC_set_rng(&fake_rng);
|
||||
#endif
|
||||
|
||||
uint8_t l_public[uECC_BYTES*2];
|
||||
uint8_t l_private[uECC_BYTES];
|
||||
|
||||
uint8_t l_hash[uECC_BYTES];
|
||||
|
||||
uint8_t l_sig[uECC_BYTES*2];
|
||||
uint8_t public[uECC_BYTES * 2];
|
||||
uint8_t private[uECC_BYTES];
|
||||
uint8_t hash[uECC_BYTES];
|
||||
uint8_t sig[uECC_BYTES * 2];
|
||||
|
||||
int i;
|
||||
|
||||
printf("Testing 256 signatures\n");
|
||||
|
||||
for(i=0; i<256; ++i)
|
||||
{
|
||||
for (i = 0; i < 256; ++i) {
|
||||
printf(".");
|
||||
#if !LPC11XX
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
if(!uECC_make_key(l_public, l_private))
|
||||
{
|
||||
if (!uECC_make_key(public, private)) {
|
||||
printf("uECC_make_key() failed\n");
|
||||
continue;
|
||||
}
|
||||
memcpy(l_hash, l_public, uECC_BYTES);
|
||||
memcpy(hash, public, uECC_BYTES);
|
||||
|
||||
if(!uECC_sign(l_private, l_hash, l_sig))
|
||||
{
|
||||
if (!uECC_sign(private, hash, sig)) {
|
||||
printf("uECC_sign() failed\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!uECC_verify(l_public, l_hash, l_sig))
|
||||
{
|
||||
if (!uECC_verify(public, hash, sig)) {
|
||||
printf("uECC_verify() failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,11 +17,13 @@ Possible values for uECC_PLATFORM are defined below: */
|
||||
#define uECC_arm_thumb2 6
|
||||
|
||||
/* If desired, you can define uECC_WORD_SIZE as appropriate for your platform (1, 4, or 8 bytes).
|
||||
If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your platform. */
|
||||
If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your
|
||||
platform. */
|
||||
|
||||
/* Inline assembly options.
|
||||
uECC_asm_none - Use standard C99 only.
|
||||
uECC_asm_small - Use GCC inline assembly for the target platform (if available), optimized for minimum size.
|
||||
uECC_asm_small - Use GCC inline assembly for the target platform (if available), optimized for
|
||||
minimum size.
|
||||
uECC_asm_fast - Use GCC inline assembly optimized for maximum speed. */
|
||||
#define uECC_asm_none 0
|
||||
#define uECC_asm_small 1
|
||||
@@ -39,8 +41,9 @@ uECC_asm_fast - Use GCC inline assembly optimized for maximum speed. */
|
||||
#define uECC_CURVE uECC_secp160r1
|
||||
#endif
|
||||
|
||||
/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be used for (scalar) squaring
|
||||
instead of the generic multiplication function. This will make things faster by about 8% but increases the code size. */
|
||||
/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be
|
||||
used for (scalar) squaring instead of the generic multiplication function. This will make things
|
||||
faster by about 8% but increases the code size. */
|
||||
#ifndef uECC_SQUARE_FUNC
|
||||
#define uECC_SQUARE_FUNC 1
|
||||
#endif
|
||||
@@ -61,8 +64,8 @@ extern "C"
|
||||
#endif
|
||||
|
||||
/* uECC_RNG_Function type
|
||||
The RNG function should fill p_size random bytes into p_dest. It should return 1 if
|
||||
p_dest was filled with random data, or 0 if the random data could not be generated.
|
||||
The RNG function should fill 'size' random bytes into 'dest'. It should return 1 if
|
||||
'dest' was filled with random data, or 0 if the random data could not be generated.
|
||||
The filled-in values should be either truly random, or from a cryptographically-secure PRNG.
|
||||
|
||||
A correctly functioning RNG function must be set (using uECC_set_rng()) before calling
|
||||
@@ -73,7 +76,7 @@ If you are building on another POSIX-compliant system that supports /dev/random
|
||||
you can define uECC_POSIX to use the predefined RNG. For embedded platforms there is no predefined
|
||||
RNG function; you must provide your own.
|
||||
*/
|
||||
typedef int (*uECC_RNG_Function)(uint8_t *p_dest, unsigned p_size);
|
||||
typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size);
|
||||
|
||||
/* uECC_set_rng() function.
|
||||
Set the function that will be used to generate random bytes. The RNG function should
|
||||
@@ -83,35 +86,38 @@ On platforms where there is no predefined RNG function (eg embedded platforms),
|
||||
be called before uECC_make_key() or uECC_sign() are used.
|
||||
|
||||
Inputs:
|
||||
p_rng - The function that will be used to generate random bytes.
|
||||
rng_function - The function that will be used to generate random bytes.
|
||||
*/
|
||||
void uECC_set_rng(uECC_RNG_Function p_rng);
|
||||
void uECC_set_rng(uECC_RNG_Function rng_function);
|
||||
|
||||
/* uECC_make_key() function.
|
||||
Create a public/private key pair.
|
||||
|
||||
Outputs:
|
||||
p_publicKey - Will be filled in with the public key.
|
||||
p_privateKey - Will be filled in with the private key.
|
||||
public_key - Will be filled in with the public key.
|
||||
private_key - Will be filled in with the private key.
|
||||
|
||||
Returns 1 if the key pair was generated successfully, 0 if an error occurred.
|
||||
*/
|
||||
int uECC_make_key(uint8_t p_publicKey[uECC_BYTES*2], uint8_t p_privateKey[uECC_BYTES]);
|
||||
int uECC_make_key(uint8_t public_key[uECC_BYTES*2], uint8_t private_key[uECC_BYTES]);
|
||||
|
||||
/* uECC_shared_secret() function.
|
||||
Compute a shared secret given your secret key and someone else's public key.
|
||||
Note: It is recommended that you hash the result of uECC_shared_secret() before using it for symmetric encryption or HMAC.
|
||||
Note: It is recommended that you hash the result of uECC_shared_secret() before using it for
|
||||
symmetric encryption or HMAC.
|
||||
|
||||
Inputs:
|
||||
p_publicKey - The public key of the remote party.
|
||||
p_privateKey - Your private key.
|
||||
public_key - The public key of the remote party.
|
||||
private_key - Your private key.
|
||||
|
||||
Outputs:
|
||||
p_secret - Will be filled in with the shared secret value.
|
||||
secret - Will be filled in with the shared secret value.
|
||||
|
||||
Returns 1 if the shared secret was generated successfully, 0 if an error occurred.
|
||||
*/
|
||||
int uECC_shared_secret(const uint8_t p_publicKey[uECC_BYTES*2], const uint8_t p_privateKey[uECC_BYTES], uint8_t p_secret[uECC_BYTES]);
|
||||
int uECC_shared_secret(const uint8_t public_key[uECC_BYTES*2],
|
||||
const uint8_t private_key[uECC_BYTES],
|
||||
uint8_t secret[uECC_BYTES]);
|
||||
|
||||
/* uECC_sign() function.
|
||||
Generate an ECDSA signature for a given hash value.
|
||||
@@ -120,15 +126,17 @@ Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pa
|
||||
this function along with your private key.
|
||||
|
||||
Inputs:
|
||||
p_privateKey - Your private key.
|
||||
p_hash - The message hash to sign.
|
||||
private_key - Your private key.
|
||||
hash - The message hash to sign.
|
||||
|
||||
Outputs:
|
||||
p_signature - Will be filled in with the signature value.
|
||||
signature - Will be filled in with the signature value.
|
||||
|
||||
Returns 1 if the signature generated successfully, 0 if an error occurred.
|
||||
*/
|
||||
int uECC_sign(const uint8_t p_privateKey[uECC_BYTES], const uint8_t p_hash[uECC_BYTES], uint8_t p_signature[uECC_BYTES*2]);
|
||||
int uECC_sign(const uint8_t private_key[uECC_BYTES],
|
||||
const uint8_t hash[uECC_BYTES],
|
||||
uint8_t signature[uECC_BYTES*2]);
|
||||
|
||||
/* uECC_verify() function.
|
||||
Verify an ECDSA signature.
|
||||
@@ -137,35 +145,37 @@ Usage: Compute the hash of the signed data using the same hash as the signer and
|
||||
pass it to this function along with the signer's public key and the signature values (r and s).
|
||||
|
||||
Inputs:
|
||||
p_publicKey - The signer's public key
|
||||
p_hash - The hash of the signed data.
|
||||
p_signature - The signature value.
|
||||
public_key - The signer's public key
|
||||
hash - The hash of the signed data.
|
||||
signature - The signature value.
|
||||
|
||||
Returns 1 if the signature is valid, 0 if it is invalid.
|
||||
*/
|
||||
int uECC_verify(const uint8_t p_publicKey[uECC_BYTES*2], const uint8_t p_hash[uECC_BYTES], const uint8_t p_signature[uECC_BYTES*2]);
|
||||
int uECC_verify(const uint8_t private_key[uECC_BYTES*2],
|
||||
const uint8_t hash[uECC_BYTES],
|
||||
const uint8_t signature[uECC_BYTES*2]);
|
||||
|
||||
/* uECC_compress() function.
|
||||
Compress a public key.
|
||||
|
||||
Inputs:
|
||||
p_publicKey - The public key to compress.
|
||||
public_key - The public key to compress.
|
||||
|
||||
Outputs:
|
||||
p_compressed - Will be filled in with the compressed public key.
|
||||
compressed - Will be filled in with the compressed public key.
|
||||
*/
|
||||
void uECC_compress(const uint8_t p_publicKey[uECC_BYTES*2], uint8_t p_compressed[uECC_BYTES+1]);
|
||||
void uECC_compress(const uint8_t public_key[uECC_BYTES*2], uint8_t compressed[uECC_BYTES+1]);
|
||||
|
||||
/* uECC_decompress() function.
|
||||
Decompress a compressed public key.
|
||||
|
||||
Inputs:
|
||||
p_compressed - The compressed public key.
|
||||
compressed - The compressed public key.
|
||||
|
||||
Outputs:
|
||||
p_publicKey - Will be filled in with the decompressed public key.
|
||||
public_key - Will be filled in with the decompressed public key.
|
||||
*/
|
||||
void uECC_decompress(const uint8_t p_compressed[uECC_BYTES+1], uint8_t p_publicKey[uECC_BYTES*2]);
|
||||
void uECC_decompress(const uint8_t compressed[uECC_BYTES+1], uint8_t public_key[uECC_BYTES*2]);
|
||||
|
||||
/* uECC_valid_public_key() function.
|
||||
Check to see if a public key is valid.
|
||||
@@ -175,24 +185,25 @@ functions. However, you may wish to avoid spending CPU time computing a shared s
|
||||
verifying a signature using an invalid public key.
|
||||
|
||||
Inputs:
|
||||
p_publicKey - The public key to check.
|
||||
public_key - The public key to check.
|
||||
|
||||
Returns 1 if the public key is valid, 0 if it is invalid.
|
||||
*/
|
||||
int uECC_valid_public_key(const uint8_t p_publicKey[uECC_BYTES*2]);
|
||||
int uECC_valid_public_key(const uint8_t public_key[uECC_BYTES*2]);
|
||||
|
||||
/* uECC_compute_public_key() function.
|
||||
Compute the corresponding public key for a private key.
|
||||
|
||||
Inputs:
|
||||
p_privateKey - The private key to compute the public key for
|
||||
private_key - The private key to compute the public key for
|
||||
|
||||
Outputs:
|
||||
p_publicKey - Will be filled in with the corresponding public key
|
||||
public_key - Will be filled in with the corresponding public key
|
||||
|
||||
Returns 1 if the key was computed successfully, 0 if an error occurred.
|
||||
*/
|
||||
int uECC_compute_public_key(const uint8_t p_privateKey[uECC_BYTES], uint8_t p_publicKey[uECC_BYTES * 2]);
|
||||
int uECC_compute_public_key(const uint8_t private_key[uECC_BYTES],
|
||||
uint8_t public_key[uECC_BYTES * 2]);
|
||||
|
||||
|
||||
/* uECC_bytes() function.
|
||||
|
||||
Reference in New Issue
Block a user