Update coding style.

This commit is contained in:
Ken MacKay
2015-06-12 22:53:38 -07:00
parent 963d8b839e
commit 400b453176
8 changed files with 1541 additions and 1718 deletions
+172 -173
View File
@@ -20,11 +20,10 @@
#if (uECC_ASM == uECC_asm_fast)
static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
uint32_t l_carry = 0;
uint32_t l_left;
uint32_t l_right;
static uint32_t vli_add(uint32_t *result, const uint32_t *left, const uint32_t *right) {
uint32_t carry = 0;
uint32_t left_word;
uint32_t right_word;
__asm__ volatile (
".syntax unified \n\t"
@@ -34,34 +33,34 @@ static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
/* Now we just do the remaining words with the carry bit (using ADC) */
REPEAT(DEC(uECC_WORDS), "ldmia %[lptr]!, {%[left]} \n\t"
REPEAT(DEC(uECC_WORDS),
"ldmia %[lptr]!, {%[left]} \n\t"
"ldmia %[rptr]!, {%[right]} \n\t"
"adcs %[left], %[right] \n\t"
"stmia %[dptr]!, {%[left]} \n\t")
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
#if (uECC_PLATFORM != uECC_arm_thumb2)
".syntax divided \n\t"
#endif
#if (uECC_PLATFORM == uECC_arm_thumb)
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
[carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
[carry] "+l" (carry), [left] "=l" (left_word), [right] "=l" (right_word)
#else
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
[carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
[carry] "+r" (carry), [left] "=r" (left_word), [right] "=r" (right_word)
#endif
:
: "cc", "memory"
);
return l_carry;
return carry;
}
#define asm_add 1
static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
uint32_t l_carry = 0;
uint32_t l_left;
uint32_t l_right;
static uint32_t vli_sub(uint32_t *result, const uint32_t *left, const uint32_t *right) {
uint32_t carry = 0;
uint32_t left_word;
uint32_t right_word;
__asm__ volatile (
".syntax unified \n\t"
@@ -71,36 +70,37 @@ static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
/* Now we just do the remaining words with the carry bit (using SBC) */
REPEAT(DEC(uECC_WORDS), "ldmia %[lptr]!, {%[left]} \n\t"
REPEAT(DEC(uECC_WORDS),
"ldmia %[lptr]!, {%[left]} \n\t"
"ldmia %[rptr]!, {%[right]} \n\t"
"sbcs %[left], %[right] \n\t"
"stmia %[dptr]!, {%[left]} \n\t")
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
#if (uECC_PLATFORM != uECC_arm_thumb2)
".syntax divided \n\t"
#endif
#if (uECC_PLATFORM == uECC_arm_thumb)
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
[carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
[carry] "+l" (carry), [left] "=l" (left_word), [right] "=l" (right_word)
#else
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
[carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
[carry] "+r" (carry), [left] "=r" (left_word), [right] "=r" (right_word)
#endif
:
: "cc", "memory"
);
return !l_carry; // note that on ARM, carry flag set means "no borrow" when subtracting (for some reason...)
return !carry; // note that on ARM, carry flag set means "no borrow" when subtracting
// (for some reason...)
}
#define asm_sub 1
#if (uECC_PLATFORM != uECC_arm_thumb)
#if (uECC_WORDS == 5)
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
register const uint32_t *r2 __asm__("r2") = p_right;
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
register const uint32_t *r2 __asm__("r2") = right;
__asm__ volatile (
".syntax unified \n\t"
@@ -267,11 +267,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
#endif /* (uECC_WORDS == 5) */
#if (uECC_WORDS == 6)
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
register const uint32_t *r2 __asm__("r2") = p_right;
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
register const uint32_t *r2 __asm__("r2") = right;
__asm__ volatile (
".syntax unified \n\t"
@@ -503,11 +502,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
#endif /* (uECC_WORDS == 6) */
#if (uECC_WORDS == 8)
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
register const uint32_t *r2 __asm__("r2") = p_right;
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
register const uint32_t *r2 __asm__("r2") = right;
__asm__ volatile (
".syntax unified \n\t"
@@ -924,10 +922,9 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
#endif /* (uECC_WORDS == 8) */
#if (uECC_WORDS == 5)
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
{
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
static void vli_square(uint32_t *result, const uint32_t *left) {
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
__asm__ volatile (
".syntax unified \n\t"
@@ -1046,10 +1043,9 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
#endif /* (uECC_WORDS == 5) */
#if (uECC_WORDS == 6)
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
{
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
static void vli_square(uint32_t *result, const uint32_t *left) {
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
__asm__ volatile (
".syntax unified \n\t"
@@ -1204,10 +1200,9 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
#endif /* (uECC_WORDS == 6) */
#if (uECC_WORDS == 8)
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
{
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
static void vli_square(uint32_t *result, const uint32_t *left) {
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
__asm__ volatile (
".syntax unified \n\t"
@@ -1488,86 +1483,86 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
#endif /* (uECC_WORDS == 8) */
#endif /* (uECC_PLATFORM != uECC_arm_thumb) */
#endif /* (uECC_ASM == uECC_asm_fast) */
#if !asm_add
static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
uint32_t l_counter = uECC_WORDS;
uint32_t l_carry = 0; /* carry = 0 initially */
uint32_t l_left;
uint32_t l_right;
static uint32_t vli_add(uint32_t *result, const uint32_t *left, const uint32_t *right) {
uint32_t counter = uECC_WORDS;
uint32_t carry = 0;
uint32_t left_word;
uint32_t right_word;
__asm__ volatile (
".syntax unified \n\t"
"1: \n\t"
"ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
"ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
"lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */
"lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */
"adcs %[left], %[right] \n\t" /* Add with carry. */
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
"subs %[ctr], #1 \n\t" /* Decrement index. */
"bne 1b \n\t" /* Loop until index == 0. */
"subs %[ctr], #1 \n\t" /* Decrement counter. */
"bne 1b \n\t" /* Loop until counter == 0. */
#if (uECC_PLATFORM != uECC_arm_thumb2)
".syntax divided \n\t"
#endif
#if (uECC_PLATFORM == uECC_arm_thumb)
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
[ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
[ctr] "+l" (counter), [carry] "+l" (carry),
[left] "=l" (left_word), [right] "=l" (right_word)
#else
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
[ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
[ctr] "+r" (counter), [carry] "+r" (carry),
[left] "=r" (left_word), [right] "=r" (right_word)
#endif
:
: "cc", "memory"
);
return l_carry;
return carry;
}
#define asm_add 1
#endif
#if !asm_sub
static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
uint32_t l_counter = uECC_WORDS;
uint32_t l_carry = 1; /* carry = 1 initially (means don't borrow) */
uint32_t l_left;
uint32_t l_right;
static uint32_t vli_sub(uint32_t *result, const uint32_t *left, const uint32_t *right) {
uint32_t counter = uECC_WORDS;
uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */
uint32_t left_word;
uint32_t right_word;
__asm__ volatile (
".syntax unified \n\t"
"1: \n\t"
"ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
"ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
"lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */
"lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */
"sbcs %[left], %[right] \n\t" /* Subtract with borrow. */
"adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
"adcs %[carry], %[carry] \n\t" /* Store carry bit. */
"stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
"subs %[ctr], #1 \n\t" /* Decrement index. */
"bne 1b \n\t" /* Loop until index == 0. */
"subs %[ctr], #1 \n\t" /* Decrement counter. */
"bne 1b \n\t" /* Loop until counter == 0. */
#if (uECC_PLATFORM != uECC_arm_thumb2)
".syntax divided \n\t"
#endif
#if (uECC_PLATFORM == uECC_arm_thumb)
: [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right),
[ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
: [dptr] "+l" (result), [lptr] "+l" (left), [rptr] "+l" (right),
[ctr] "+l" (counter), [carry] "+l" (carry),
[left] "=l" (left_word), [right] "=l" (right_word)
#else
: [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right),
[ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
: [dptr] "+r" (result), [lptr] "+r" (left), [rptr] "+r" (right),
[ctr] "+r" (counter), [carry] "+r" (carry),
[left] "=r" (left_word), [right] "=r" (right_word)
#endif
:
: "cc", "memory"
);
return !l_carry;
return !carry;
}
#define asm_sub 1
#endif
#if !asm_mult
static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
{
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
#if (uECC_PLATFORM != uECC_arm_thumb)
uint32_t c0 = 0;
uint32_t c1 = 0;
@@ -1590,10 +1585,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
"3: \n\t" /* inner loop */
"subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
"ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = p_right[k-i] */
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */
"ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
"adds %[c0], %[t0] \n\t" /* add low word to c0 */
"adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */
@@ -1601,38 +1596,40 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
"adds %[i], #4 \n\t" /* i += 4 */
"cmp %[i], %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */
"bge 4f \n\t" /* if not, exit the loop */
"bge 4f \n\t" /* if not, exit the loop */
"cmp %[i], %[k] \n\t" /* i <= k? */
"ble 3b \n\t" /* if so, continue looping */
"ble 3b \n\t" /* if so, continue looping */
"4: \n\t" /* end inner loop */
"str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
"str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
"mov %[c0], %[c1] \n\t" /* c0 = c1 */
"mov %[c1], %[c2] \n\t" /* c1 = c2 */
"movs %[c2], #0 \n\t" /* c2 = 0 */
"adds %[k], #4 \n\t" /* k += 4 */
"cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
/* end outer loop */
"str %[c0], [%[result], %[k]] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
"str %[c0], [%[result], %[k]] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
#if (uECC_PLATFORM != uECC_arm_thumb2)
".syntax divided \n\t"
#endif
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
: [result] "r" (p_result), [left] "r" (p_left), [right] "r" (p_right),
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
[k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
: [result] "r" (result), [left] "r" (left), [right] "r" (right),
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4),
[eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
: "cc", "memory"
);
#else /* Thumb-1 */
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
register const uint32_t *r2 __asm__("r2") = p_right;
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
register const uint32_t *r2 __asm__("r2") = right;
__asm__ volatile (
".syntax unified \n\t"
@@ -1641,7 +1638,7 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
"movs r5, #0 \n\t" /* c2 = 0 */
"movs r6, #0 \n\t" /* k = 0 */
"push {r0} \n\t" /* keep p_result on the stack */
"push {r0} \n\t" /* keep result on the stack */
"1: \n\t" /* outer loop (k < uECC_WORDS) */
"movs r7, #0 \n\t" /* r7 = i = 0 */
@@ -1653,10 +1650,10 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
"3: \n\t" /* inner loop */
"push {r3, r4, r5, r6} \n\t" /* push things, r3 (c0) is at the top of stack. */
"subs r0, r6, r7 \n\t" /* r0 = k-i */
"subs r0, r6, r7 \n\t" /* r0 = k - i */
"ldr r4, [r2, r0] \n\t" /* r4 = p_right[k-i] */
"ldr r0, [r1, r7] \n\t" /* r0 = p_left[i] */
"ldr r4, [r2, r0] \n\t" /* r4 = right[k - i] */
"ldr r0, [r1, r7] \n\t" /* r0 = left[i] */
"lsrs r3, r0, #16 \n\t" /* r3 = a1 */
"uxth r0, r0 \n\t" /* r0 = a0 */
@@ -1665,21 +1662,21 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
"uxth r4, r4 \n\t" /* r4 = b0 */
"movs r6, r3 \n\t" /* r6 = a1 */
"muls r6, r5, r6 \n\t" /* r6 = a1*b1 */
"muls r3, r4, r3 \n\t" /* r3 = b0*a1 */
"muls r5, r0, r5 \n\t" /* r5 = a0*b1 */
"muls r0, r4, r0 \n\t" /* r0 = a0*b0 */
"muls r6, r5, r6 \n\t" /* r6 = a1 * b1 */
"muls r3, r4, r3 \n\t" /* r3 = b0 * a1 */
"muls r5, r0, r5 \n\t" /* r5 = a0 * b1 */
"muls r0, r4, r0 \n\t" /* r0 = a0 * b0 */
"movs r4, #0 \n\t" /* r4 = 0 */
"adds r3, r5 \n\t" /* r3 = b0*a1 + a0*b1 */
"adds r3, r5 \n\t" /* r3 = b0 * a1 + a0 * b1 */
"adcs r4, r4 \n\t" /* r4 = carry */
"lsls r4, #16 \n\t" /* r4 = carry << 16 */
"adds r6, r4 \n\t" /* r6 = a1*b1 + carry */
"adds r6, r4 \n\t" /* r6 = a1 * b1 + carry */
"lsls r4, r3, #16 \n\t" /* r4 = (b0*a1 + a0*b1) << 16 */
"lsrs r3, #16 \n\t" /* r3 = (b0*a1 + a0*b1) >> 16 */
"adds r0, r4 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
"adcs r6, r3 \n\t" /* r6 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
"lsls r4, r3, #16 \n\t" /* r4 = (b0 * a1 + a0 * b1) << 16 */
"lsrs r3, #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) >> 16 */
"adds r0, r4 \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */
"adcs r6, r3 \n\t" /* r6 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */
"pop {r3, r4, r5} \n\t" /* r3 = c0, r4 = c1, r5 = c2 */
"adds r3, r0 \n\t" /* add low word to c0 */
@@ -1691,27 +1688,27 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
"adds r7, #4 \n\t" /* i += 4 */
"cmp r7, %[eccd] \n\t" /* i < uECC_WORDS (times 4)? */
"bge 4f \n\t" /* if not, exit the loop */
"bge 4f \n\t" /* if not, exit the loop */
"cmp r7, r6 \n\t" /* i <= k? */
"ble 3b \n\t" /* if so, continue looping */
"ble 3b \n\t" /* if so, continue looping */
"4: \n\t" /* end inner loop */
"ldr r0, [sp, #0] \n\t" /* r0 = p_result */
"ldr r0, [sp, #0] \n\t" /* r0 = result */
"str r3, [r0, r6] \n\t" /* p_result[k] = c0 */
"str r3, [r0, r6] \n\t" /* result[k] = c0 */
"mov r3, r4 \n\t" /* c0 = c1 */
"mov r4, r5 \n\t" /* c1 = c2 */
"movs r5, #0 \n\t" /* c2 = 0 */
"adds r6, #4 \n\t" /* k += 4 */
"cmp r6, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"cmp r6, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
/* end outer loop */
"str r3, [r0, r6] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
"pop {r0} \n\t" /* pop p_result off the stack */
"str r3, [r0, r6] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
"pop {r0} \n\t" /* pop result off the stack */
".syntax divided \n\t"
:
@@ -1725,8 +1722,7 @@ static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t
#if uECC_SQUARE_FUNC
#if !asm_square
static void vli_square(uint32_t *p_result, const uint32_t *p_left)
{
static void vli_square(uint32_t *result, const uint32_t *left) {
#if (uECC_PLATFORM != uECC_arm_thumb)
uint32_t c0 = 0;
uint32_t c1 = 0;
@@ -1749,17 +1745,17 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
"3: \n\t" /* inner loop */
"subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
"ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = p_left[k-i] */
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */
"ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */
"ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
"umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
"cmp %[i], %[tt] \n\t" /* (i < k-i) ? */
"bge 4f \n\t" /* if i >= k-i, skip */
"lsls %[t1], #1 \n\t" /* high word << 1 */
"adc %[c2], #0 \n\t" /* add carry bit to c2 */
"lsls %[t0], #1 \n\t" /* low word << 1 */
"adc %[t1], #0 \n\t" /* add carry bit to high word */
"cmp %[i], %[tt] \n\t" /* (i < k - i) ? */
"bge 4f \n\t" /* if i >= k - i, skip */
"lsls %[t1], #1 \n\t" /* high word << 1 */
"adc %[c2], #0 \n\t" /* add carry bit to c2 */
"lsls %[t0], #1 \n\t" /* low word << 1 */
"adc %[t1], #0 \n\t" /* add carry bit to high word */
"4: \n\t"
@@ -1769,38 +1765,40 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
"adds %[i], #4 \n\t" /* i += 4 */
"cmp %[i], %[k] \n\t" /* i <= k? */
"bge 5f \n\t" /* if not, exit the loop */
"subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
"cmp %[i], %[tt] \n\t" /* i <= k-i? */
"ble 3b \n\t" /* if so, continue looping */
"bge 5f \n\t" /* if not, exit the loop */
"subs %[tt], %[k], %[i] \n\t" /* tt = k - i */
"cmp %[i], %[tt] \n\t" /* i <= k - i? */
"ble 3b \n\t" /* if so, continue looping */
"5: \n\t" /* end inner loop */
"str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
"str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
"mov %[c0], %[c1] \n\t" /* c0 = c1 */
"mov %[c1], %[c2] \n\t" /* c1 = c2 */
"movs %[c2], #0 \n\t" /* c2 = 0 */
"adds %[k], #4 \n\t" /* k += 4 */
"cmp %[k], %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"cmp %[k], %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
/* end outer loop */
"str %[c0], [%[result], %[k]] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
"str %[c0], [%[result], %[k]] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
#if (uECC_PLATFORM != uECC_arm_thumb2)
".syntax divided \n\t"
#endif
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
: [result] "r" (p_result), [left] "r" (p_left),
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
: [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
[k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
: [result] "r" (result), [left] "r" (left),
[eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4),
[eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
: "cc", "memory"
);
#else
register uint32_t *r0 __asm__("r0") = p_result;
register const uint32_t *r1 __asm__("r1") = p_left;
register uint32_t *r0 __asm__("r0") = result;
register const uint32_t *r1 __asm__("r1") = left;
__asm__ volatile (
".syntax unified \n\t"
@@ -1809,7 +1807,7 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
"movs r4, #0 \n\t" /* c2 = 0 */
"movs r5, #0 \n\t" /* k = 0 */
"push {r0} \n\t" /* keep p_result on the stack */
"push {r0} \n\t" /* keep result on the stack */
"1: \n\t" /* outer loop (k < uECC_WORDS) */
"movs r6, #0 \n\t" /* r6 = i = 0 */
@@ -1821,10 +1819,10 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
"3: \n\t" /* inner loop */
"push {r2, r3, r4, r5} \n\t" /* push things, r2 (c0) is at the top of stack. */
"subs r7, r5, r6 \n\t" /* r7 = k-i */
"subs r7, r5, r6 \n\t" /* r7 = k - i */
"ldr r3, [r1, r7] \n\t" /* r3 = p_left[k-i] */
"ldr r0, [r1, r6] \n\t" /* r0 = p_left[i] */
"ldr r3, [r1, r7] \n\t" /* r3 = left[k - i] */
"ldr r0, [r1, r6] \n\t" /* r0 = left[i] */
"lsrs r2, r0, #16 \n\t" /* r2 = a1 */
"uxth r0, r0 \n\t" /* r0 = a0 */
@@ -1833,26 +1831,26 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
"uxth r3, r3 \n\t" /* r3 = b0 */
"movs r5, r2 \n\t" /* r5 = a1 */
"muls r5, r4, r5 \n\t" /* r5 = a1*b1 */
"muls r2, r3, r2 \n\t" /* r2 = b0*a1 */
"muls r4, r0, r4 \n\t" /* r4 = a0*b1 */
"muls r0, r3, r0 \n\t" /* r0 = a0*b0 */
"muls r5, r4, r5 \n\t" /* r5 = a1 * b1 */
"muls r2, r3, r2 \n\t" /* r2 = b0 * a1 */
"muls r4, r0, r4 \n\t" /* r4 = a0 * b1 */
"muls r0, r3, r0 \n\t" /* r0 = a0 * b0 */
"movs r3, #0 \n\t" /* r3 = 0 */
"adds r2, r4 \n\t" /* r2 = b0*a1 + a0*b1 */
"adds r2, r4 \n\t" /* r2 = b0 * a1 + a0 * b1 */
"adcs r3, r3 \n\t" /* r3 = carry */
"lsls r3, #16 \n\t" /* r3 = carry << 16 */
"adds r5, r3 \n\t" /* r5 = a1*b1 + carry */
"adds r5, r3 \n\t" /* r5 = a1 * b1 + carry */
"lsls r3, r2, #16 \n\t" /* r3 = (b0*a1 + a0*b1) << 16 */
"lsrs r2, #16 \n\t" /* r2 = (b0*a1 + a0*b1) >> 16 */
"adds r0, r3 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
"adcs r5, r2 \n\t" /* r5 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
"lsls r3, r2, #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) << 16 */
"lsrs r2, #16 \n\t" /* r2 = (b0 * a1 + a0 * b1) >> 16 */
"adds r0, r3 \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */
"adcs r5, r2 \n\t" /* r5 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */
"movs r3, #0 \n\t" /* r3 = 0 */
"cmp r6, r7 \n\t" /* (i < k-i) ? */
"cmp r6, r7 \n\t" /* (i < k - i) ? */
"mov r7, r3 \n\t" /* r7 = 0 (does not affect condition)*/
"bge 4f \n\t" /* if i >= k-i, skip */
"bge 4f \n\t" /* if i >= k - i, skip */
"lsls r5, #1 \n\t" /* high word << 1 */
"adcs r7, r3 \n\t" /* r7 = carry bit for c2 */
"lsls r0, #1 \n\t" /* low word << 1 */
@@ -1870,33 +1868,34 @@ static void vli_square(uint32_t *p_result, const uint32_t *p_left)
"adds r6, #4 \n\t" /* i += 4 */
"cmp r6, r5 \n\t" /* i <= k? */
"bge 5f \n\t" /* if not, exit the loop */
"subs r7, r5, r6 \n\t" /* r7 = k-i */
"cmp r6, r7 \n\t" /* i <= k-i? */
"ble 3b \n\t" /* if so, continue looping */
"bge 5f \n\t" /* if not, exit the loop */
"subs r7, r5, r6 \n\t" /* r7 = k - i */
"cmp r6, r7 \n\t" /* i <= k - i? */
"ble 3b \n\t" /* if so, continue looping */
"5: \n\t" /* end inner loop */
"ldr r0, [sp, #0] \n\t" /* r0 = p_result */
"ldr r0, [sp, #0] \n\t" /* r0 = result */
"str r2, [r0, r5] \n\t" /* p_result[k] = c0 */
"str r2, [r0, r5] \n\t" /* result[k] = c0 */
"mov r2, r3 \n\t" /* c0 = c1 */
"mov r3, r4 \n\t" /* c1 = c2 */
"movs r4, #0 \n\t" /* c2 = 0 */
"adds r5, #4 \n\t" /* k += 4 */
"cmp r5, %[eccd] \n\t" /* k < uECC_WORDS (times 4) ? */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"blt 1b \n\t" /* if not, loop back, start with i = 0 */
"cmp r5, %[eccd2m1] \n\t" /* k < uECC_WORDS * 2 - 1 (times 4) ? */
"blt 2b \n\t" /* if not, loop back, start with i = (k+1) - uECC_WORDS */
"blt 2b \n\t" /* if not, loop back, start with i = (k + 1) - uECC_WORDS */
/* end outer loop */
"str r2, [r0, r5] \n\t" /* p_result[uECC_WORDS * 2 - 1] = c0 */
"pop {r0} \n\t" /* pop p_result off the stack */
"str r2, [r0, r5] \n\t" /* result[uECC_WORDS * 2 - 1] = c0 */
"pop {r0} \n\t" /* pop result off the stack */
".syntax divided \n\t"
: [r0] "+l" (r0), [r1] "+l" (r1)
: [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4), [eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
: "r2", "r3", "r4", "r5", "r6", "r7", "cc", "memory"
: [eccd] "I" (uECC_WORDS * 4), [eccdm1] "I" ((uECC_WORDS-1) * 4),
[eccd2m1] "I" ((uECC_WORDS * 2 - 1) * 4)
: "r2", "r3", "r4", "r5", "r6", "cc", "memory"
);
#endif
}
+200 -212
View File
@@ -44,32 +44,30 @@
#if (uECC_ASM == uECC_asm_fast)
static void vli_clear(uint8_t *p_vli)
{
static void vli_clear(uint8_t *vli) {
__asm__ volatile (
REPEAT(uECC_BYTES, "st %a[ptr]+, r1 \n\t")
: [ptr] "+e" (p_vli)
REPEAT(uECC_BYTES,
"st %a[ptr]+, r1 \n\t")
: [ptr] "+e" (vli)
:
: "r0", "cc", "memory"
);
}
#define asm_clear 1
static void vli_set(uint8_t *p_dest, const uint8_t *p_src)
{
static void vli_set(uint8_t *dest, const uint8_t *src) {
__asm__ volatile (
REPEAT(uECC_BYTES, "ld r0, %a[sptr]+ \n\t"
REPEAT(uECC_BYTES,
"ld r0, %a[sptr]+ \n\t"
"st %a[dptr]+, r0 \n\t")
: [dptr] "+e" (p_dest), [sptr] "+e" (p_src)
: [dptr] "+e" (dest), [sptr] "+e" (src)
:
: "r0", "cc", "memory"
);
}
#define asm_set 1
static void vli_rshift1(uint8_t *p_vli)
{
static void vli_rshift1(uint8_t *vli) {
__asm__ volatile (
"adiw r30, " STR(uECC_BYTES) " \n\t"
"ld r0, -z \n\t" /* Load byte. */
@@ -77,23 +75,22 @@ static void vli_rshift1(uint8_t *p_vli)
"st z, r0 \n\t" /* Store the first result byte. */
/* Now we just do the remaining bytes with the carry bit (using ROR) */
REPEAT(DEC(uECC_BYTES), "ld r0, -z \n\t"
REPEAT(DEC(uECC_BYTES),
"ld r0, -z \n\t"
"ror r0 \n\t"
"st z, r0 \n\t")
: "+z" (p_vli)
: "+z" (vli)
:
: "r0", "cc", "memory"
);
}
#define asm_rshift1 1
/* Computes p_result = p_left + p_right, returning carry. Can modify in place. */
static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
uint8_t l_carry = 0;
uint8_t l_left;
uint8_t l_right;
/* Computes result = left + right, returning carry. Can modify in place. */
static uint8_t vli_add(uint8_t *result, const uint8_t *left, const uint8_t *right) {
uint8_t carry = 0;
uint8_t left_byte;
uint8_t right_byte;
__asm__ volatile (
"ld %[left], x+ \n\t" /* Load left byte. */
@@ -102,30 +99,29 @@ static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
"st z+, %[left] \n\t" /* Store the first result byte. */
/* Now we just do the remaining bytes with the carry bit (using ADC) */
REPEAT(DEC(uECC_BYTES), "ld %[left], x+ \n\t"
REPEAT(DEC(uECC_BYTES),
"ld %[left], x+ \n\t"
"ld %[right], y+ \n\t"
"adc %[left], %[right] \n\t"
"st z+, %[left] \n\t")
"adc %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
"adc %[carry], %[carry] \n\t" /* Store carry bit. */
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
: "+z" (p_result), "+x" (p_left),
[carry] "+r" (l_carry), [left] "=&r" (l_left), [right] "=&r" (l_right)
: "y" (p_right)
: "+z" (result), "+x" (left),
[carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
: "y" (right)
: "cc", "memory"
);
return l_carry;
return carry;
}
#define asm_add 1
/* Computes p_result = p_left - p_right, returning borrow. Can modify in place. */
static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
uint8_t l_borrow = 0;
uint8_t l_left;
uint8_t l_right;
/* Computes result = left - right, returning borrow. Can modify in place. */
static uint8_t vli_sub(uint8_t *result, const uint8_t *left, const uint8_t *right) {
uint8_t borrow = 0;
uint8_t left_byte;
uint8_t right_byte;
__asm__ volatile (
"ld %[left], x+ \n\t" /* Load left byte. */
@@ -134,28 +130,27 @@ static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
"st z+, %[left] \n\t" /* Store the first result byte. */
/* Now we just do the remaining bytes with the carry bit (using SBC) */
REPEAT(DEC(uECC_BYTES), "ld %[left], x+ \n\t"
REPEAT(DEC(uECC_BYTES),
"ld %[left], x+ \n\t"
"ld %[right], y+ \n\t"
"sbc %[left], %[right] \n\t"
"st z+, %[left] \n\t")
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in l_borrow. */
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
: "+z" (p_result), "+x" (p_left),
[borrow] "+r" (l_borrow), [left] "=&r" (l_left), [right] "=&r" (l_right)
: "y" (p_right)
: "+z" (result), "+x" (left),
[borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
: "y" (right)
: "cc", "memory"
);
return l_borrow;
return borrow;
}
#define asm_sub 1
#if (uECC_BYTES == 20)
__attribute((noinline))
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
__asm__ volatile (
"adiw r30, 10 \n\t"
"adiw r28, 10 \n\t"
@@ -2060,17 +2055,17 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
"st z+, r23 \n\t"
"st z+, r24 \n\t"
"eor r1, r1 \n\t"
: "+x" (p_left), "+y" (p_right), "+z" (p_result)
: "+x" (left), "+y" (right), "+z" (result)
:
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
);
}
#define asm_mult 1
#elif (uECC_BYTES == 24)
__attribute((noinline))
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
__asm__ volatile (
"adiw r30, 20 \n\t"
"adiw r28, 20 \n\t"
@@ -4843,17 +4838,17 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
"st z+, r23 \n\t"
"eor r1, r1 \n\t"
: "+x" (p_left), "+y" (p_right), "+z" (p_result)
: "+x" (left), "+y" (right), "+z" (result)
:
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
);
}
#define asm_mult 1
#elif (uECC_BYTES == 32)
__attribute((noinline))
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
__asm__ volatile (
"adiw r30, 30 \n\t"
"adiw r28, 30 \n\t"
@@ -9769,10 +9764,11 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
"st z+, r24 \n\t"
"eor r1, r1 \n\t"
: "+x" (p_left), "+y" (p_right), "+z" (p_result)
: "+x" (left), "+y" (right), "+z" (result)
:
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
);
}
#define asm_mult 1
@@ -9781,8 +9777,7 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
#if uECC_SQUARE_FUNC
#if (uECC_BYTES == 20)
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
{
static void vli_square(uint8_t *result, const uint8_t *left) {
__asm__ volatile (
"ld r2, x+ \n\t"
"ld r3, x+ \n\t"
@@ -10937,10 +10932,11 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
"st z+, r23 \n\t"
"st z+, r25 \n\t"
"eor r1, r1 \n\t"
: "+x" (p_left), "+z" (p_result)
: "+x" (left), "+z" (result)
:
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "cc", "memory"
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
"r21", "r22", "r23", "r24", "r25", "cc", "memory"
);
}
#define asm_square 1
@@ -10948,8 +10944,7 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
#elif (uECC_BYTES == 24)
__attribute((noinline))
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
{
static void vli_square(uint8_t *result, const uint8_t *left) {
__asm__ volatile (
"ldi r25, 0 \n\t"
"movw r28, r26 \n\t"
@@ -12596,10 +12591,11 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
"st z+, r23 \n\t"
"st z+, r28 \n\t"
"eor r1, r1 \n\t"
: "+x" (p_left), "+z" (p_result)
: "+x" (left), "+z" (result)
:
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
"r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
);
}
#define asm_square 1
@@ -12607,8 +12603,7 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
#elif (uECC_BYTES == 32)
__attribute((noinline))
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
{
static void vli_square(uint8_t *result, const uint8_t *left) {
__asm__ volatile (
"ldi r25, 0 \n\t"
"movw r28, r26 \n\t"
@@ -15431,10 +15426,11 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
"st z+, r23 \n\t"
"st z+, r28 \n\t"
"eor r1, r1 \n\t"
: "+x" (p_left), "+z" (p_result)
: "+x" (left), "+z" (result)
:
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",
"r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
: "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
"r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
);
}
#define asm_square 1
@@ -15442,29 +15438,29 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
#endif /* uECC_BYTES == xx */
#endif /* uECC_SQUARE_FUNC */
static void vli_modSub_fast(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
static void vli_modSub_fast(uint8_t *result, const uint8_t *left, const uint8_t *right) {
uint8_t t1, t2;
__asm__ volatile (
"push r28 \n\t" /* Save Y */
"push r29 \n\t"
"ld %[t1], x+ \n\t" /* Load left word. */
"ld %[t2], y+ \n\t" /* Load right word. */
"ld %[t1], x+ \n\t" /* Load left word. */
"ld %[t2], y+ \n\t" /* Load right word. */
"sub %[t1], %[t2] \n\t" /* Subtract the first word. */
"st z+, %[t1] \n\t" /* Store the first result word. */
"st z+, %[t1] \n\t" /* Store the first result word. */
/* Now we just do the remaining words with the carry bit (using SBC) */
REPEAT(DEC(uECC_BYTES), "ld %[t1], x+ \n\t"
REPEAT(DEC(uECC_BYTES),
"ld %[t1], x+ \n\t"
"ld %[t2], y+ \n\t"
"sbc %[t1], %[t2] \n\t"
"st z+, %[t1] \n\t")
"brcs 1f \n\t" /* If borrow is set, then we need to add */
"brcs 1f \n\t" /* If borrow is set, then we need to add */
"rjmp done \n\t" /* otherwise we are done */
"1: \n\t"
"sbiw r30, " STR(uECC_BYTES) " \n\t" /* make z point at p_result again */
"sbiw r30, " STR(uECC_BYTES) " \n\t" /* make z point at result again */
"ldi r28, lo8(curve_p) \n\t" /* make y point at curve_p */
"ldi r29, hi8(curve_p) \n\t"
@@ -15473,7 +15469,8 @@ static void vli_modSub_fast(uint8_t *p_result, const uint8_t *p_left, const uint
"ld %[t2], y+ \n\t"
"add %[t1], %[t2] \n\t"
"st z+, %[t1] \n\t"
REPEAT(DEC(uECC_BYTES), "ld %[t1], z \n\t"
REPEAT(DEC(uECC_BYTES),
"ld %[t1], z \n\t"
"ld %[t2], y+ \n\t"
"adc %[t1], %[t2] \n\t"
"st z+, %[t1] \n\t")
@@ -15482,18 +15479,17 @@ static void vli_modSub_fast(uint8_t *p_result, const uint8_t *p_left, const uint
"pop r29 \n\t" /* Restore Y */
"pop r28 \n\t"
: "+z" (p_result), "+x" (p_left),
: "+z" (result), "+x" (left),
[t1] "=&r" (t1), [t2] "=&r" (t2)
: "y" (p_right)
: "y" (right)
: "cc", "memory"
);
}
#define asm_modSub_fast 1
#if uECC_CURVE == uECC_secp160r1
static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_product)
{
uint8_t l_carry = 0;
static void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
uint8_t carry = 0;
__asm__ volatile (
"in r30, __SP_L__ \n\t"
"in r31, __SP_H__ \n\t"
@@ -15504,23 +15500,25 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"out __SREG__, r0 \n\t"
"out __SP_L__, r30 \n\t"
"adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes (+ 1 since z initially points below the stack) */
"adiw r26, 40 \n\t" /* end of p_product */
"ld r18, -x \n\t" /* Load word. */
"lsr r18 \n\t" /* Shift. */
"st -z, r18 \n\t" /* Store the first result word. */
"adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes
(+ 1 since z initially points below the stack) */
"adiw r26, 40 \n\t" /* end of product */
"ld r18, -x \n\t" /* Load word. */
"lsr r18 \n\t" /* Shift. */
"st -z, r18 \n\t" /* Store the first result word. */
/* Now we just do the remaining words with the carry bit (using ROR) */
REPEAT(19, "ld r18, -x \n\t"
REPEAT(19,
"ld r18, -x \n\t"
"ror r18 \n\t"
"st -z, r18 \n\t")
"eor r18, r18 \n\t" /* r18 = 0 */
"ror r18 \n\t" /* get last bit */
"st -z, r18 \n\t" /* store it */
"ror r18 \n\t" /* get last bit */
"st -z, r18 \n\t" /* store it */
"sbiw r30, 3 \n\t" /* move z back to point at tmp */
/* now we add p_right */
/* now we add right */
"ld r18, x+ \n\t"
"st z+, r18 \n\t" /* the first 3 bytes do not need to be added */
"ld r18, x+ \n\t"
@@ -15534,12 +15532,13 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"st z+, r18 \n\t"
/* Now we just do the remaining words with the carry bit (using ADC) */
REPEAT(16, "ld r18, x+ \n\t"
REPEAT(16,
"ld r18, x+ \n\t"
"ld r19, z \n\t"
"adc r18, r19 \n\t"
"st z+, r18 \n\t")
/* Propagate over the remaining bytes of p_result */
/* Propagate over the remaining bytes of result */
"ld r18, z \n\t"
"adc r18, r1 \n\t"
"st z+, r18 \n\t"
@@ -15557,27 +15556,29 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"st z+, r18 \n\t"
"sbiw r30, 24 \n\t" /* move z back to point at tmp */
"sbiw r26, 40 \n\t" /* move x back to point at p_product */
"sbiw r26, 40 \n\t" /* move x back to point at product */
/* add low bytes of tmp to p_product, storing in p_result */
/* add low bytes of tmp to product, storing in result */
"ld r18, z+ \n\t"
"ld r19, x+ \n\t"
"add r18, r19 \n\t"
"st y+, r18 \n\t"
REPEAT(19, "ld r18, z+ \n\t"
REPEAT(19,
"ld r18, z+ \n\t"
"ld r19, x+ \n\t"
"adc r18, r19 \n\t"
"st y+, r18 \n\t")
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
/* at this point x is at the end of p_product, y is at the end of p_result, z is 20 bytes into tmp */
"sbiw r28, 20 \n\t" /* move y back to point at p_result */
"adiw r30, 4 \n\t" /* move z to point to the end of tmp */
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
/* at this point x is at the end of product, y is at the end of result,
z is 20 bytes into tmp */
"sbiw r28, 20 \n\t" /* move y back to point at result */
"adiw r30, 4 \n\t" /* move z to point to the end of tmp */
/* do omega_mult again with the 4 relevant bytes */
/* z points to the end of tmp, x points to the end of p_product */
"ld r18, -z \n\t" /* Load word. */
"lsr r18 \n\t" /* Shift. */
"st -x, r18 \n\t" /* Store the first result word. */
/* z points to the end of tmp, x points to the end of product */
"ld r18, -z \n\t" /* Load word. */
"lsr r18 \n\t" /* Shift. */
"st -x, r18 \n\t" /* Store the first result word. */
"ld r18, -z \n\t"
"ror r18 \n\t"
@@ -15590,8 +15591,8 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"st -x, r18 \n\t"
"eor r18, r18 \n\t" /* r18 = 0 */
"ror r18 \n\t" /* get last bit */
"st -x, r18 \n\t" /* store it */
"ror r18 \n\t" /* get last bit */
"st -x, r18 \n\t" /* store it */
"sbiw r26, 3 \n\t" /* move x back to point at beginning */
/* now we add a copy of the 4 bytes */
@@ -15624,25 +15625,28 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"adc r18, r1 \n\t"
"st x+, r18 \n\t"
/* now z points to the end of tmp, x points to the end of p_product (y still points at p_result) */
/* now z points to the end of tmp, x points to the end of product
(y still points at result) */
"sbiw r26, 8 \n\t" /* move x back to point at beginning of actual data */
/* add into p_result */
/* add into result */
"ld r18, x+ \n\t"
"ld r19, y \n\t"
"add r18, r19 \n\t"
"st y+, r18 \n\t"
REPEAT(7, "ld r18, x+ \n\t"
REPEAT(7,
"ld r18, x+ \n\t"
"ld r19, y \n\t"
"adc r18, r19 \n\t"
"st y+, r18 \n\t")
/* Done adding, now propagate carry bit */
REPEAT(12, "ld r18, y \n\t"
REPEAT(12,
"ld r18, y \n\t"
"adc r18, __zero_reg__ \n\t"
"st y+, r18 \n\t")
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
"sbiw r28, 20 \n\t" /* move y back to point at p_result */
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
"sbiw r28, 20 \n\t" /* move y back to point at result */
"sbiw r30, 1 \n\t" /* fix stack pointer */
"in r0, __SREG__ \n\t"
@@ -15651,32 +15655,27 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"out __SREG__, r0 \n\t"
"out __SP_L__, r30 \n\t"
: "+x" (p_product), [carry] "+r" (l_carry)
: "y" (p_result)
: "+x" (product), [carry] "+r" (carry)
: "y" (result)
: "r0", "r18", "r19", "r30", "r31", "cc", "memory"
);
if(l_carry > 0)
{
--l_carry;
vli_sub(p_result, p_result, curve_p);
if (carry > 0) {
--carry;
vli_sub(result, result, curve_p);
}
if(l_carry > 0)
{
vli_sub(p_result, p_result, curve_p);
if (carry > 0) {
vli_sub(result, result, curve_p);
}
if(vli_cmp(p_result, curve_p) > 0)
{
vli_sub(p_result, p_result, curve_p);
if (vli_cmp(result, curve_p) > 0) {
vli_sub(result, result, curve_p);
}
}
#define asm_mmod_fast 1
#elif (uECC_CURVE == uECC_secp256k1)
static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_product)
{
uint8_t l_carry = 0;
static void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
uint8_t carry = 0;
__asm__ volatile (
"in r30, __SP_L__ \n\t"
"in r31, __SP_H__ \n\t"
@@ -15687,8 +15686,8 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"out __SREG__, r0 \n\t"
"out __SP_L__, r30 \n\t"
"adiw r30, 1 \n\t" /* add 1 since z initially points below the stack */
"adiw r26, 32 \n\t" /* p_product + uECC_WORDS */
"adiw r30, 1 \n\t" /* add 1 since z initially points below the stack */
"adiw r26, 32 \n\t" /* product + uECC_WORDS */
"ldi r25, 0x03 \n\t"
"ldi r24, 0xD1 \n\t"
"ld r18, x+ \n\t"
@@ -15852,27 +15851,29 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"eor r1, r1 \n\t" /* make r1 be 0 again */
"sbiw r30, 37 \n\t" /* move z back to point at tmp */
"subi r26, 64 \n\t" /* move x back to point at p_product */
"subi r26, 64 \n\t" /* move x back to point at product */
"sbc r27, __zero_reg__ \n\t"
/* add low bytes of tmp to p_product, storing in p_result */
/* add low bytes of tmp to product, storing in result */
"ld r18, z+ \n\t"
"ld r19, x+ \n\t"
"add r18, r19 \n\t"
"st y+, r18 \n\t"
REPEAT(31, "ld r18, z+ \n\t"
REPEAT(31,
"ld r18, z+ \n\t"
"ld r19, x+ \n\t"
"adc r18, r19 \n\t"
"st y+, r18 \n\t")
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
/* at this point x is at the end of p_product, y is at the end of p_result, z is 32 bytes into tmp */
"sbiw r28, 32 \n\t" /* move y back to point at p_result */
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
/* at this point x is at the end of product, y is at the end of result,
z is 32 bytes into tmp */
"sbiw r28, 32 \n\t" /* move y back to point at result */
/* do omega_mult again with the 5 relevant bytes */
/* z points to l_tmp + uECC_WORDS, x points to the end of p_product */
"sbiw r26, 32 \n\t" /* shift x back to point into the p_product buffer (we can overwrite it now) */
/* z points to tmp + uECC_WORDS, x points to the end of product */
"sbiw r26, 32 \n\t" /* shift x back to point into the product buffer
(we can overwrite it now) */
"ld r18, z+ \n\t"
"ld r19, z+ \n\t"
"ld r20, z+ \n\t"
@@ -15947,25 +15948,28 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"st x+, r22 \n\t"
"eor r1, r1 \n\t" /* make r1 be 0 again */
/* now z points to the end of tmp, x points to the end of p_product (y still points at p_result) */
/* now z points to the end of tmp, x points to the end of product
(y still points at result) */
"sbiw r26, 10 \n\t" /* move x back to point at beginning of actual data */
/* add into p_result */
/* add into result */
"ld r18, x+ \n\t"
"ld r19, y \n\t"
"add r18, r19 \n\t"
"st y+, r18 \n\t"
REPEAT(9, "ld r18, x+ \n\t"
REPEAT(9,
"ld r18, x+ \n\t"
"ld r19, y \n\t"
"adc r18, r19 \n\t"
"st y+, r18 \n\t")
/* Done adding, now propagate carry bit */
REPEAT(22, "ld r18, y \n\t"
REPEAT(22,
"ld r18, y \n\t"
"adc r18, __zero_reg__ \n\t"
"st y+, r18 \n\t")
"adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
"sbiw r28, 32 \n\t" /* move y back to point at p_result */
"sbiw r28, 32 \n\t" /* move y back to point at result */
"sbiw r30, 1 \n\t" /* fix stack pointer */
"in r0, __SREG__ \n\t"
@@ -15974,24 +15978,20 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
"out __SREG__, r0 \n\t"
"out __SP_L__, r30 \n\t"
: "+x" (p_product), [carry] "+r" (l_carry)
: "y" (p_result)
: "+x" (product), [carry] "+r" (carry)
: "y" (result)
: "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc", "memory"
);
if(l_carry > 0)
{
--l_carry;
vli_sub(p_result, p_result, curve_p);
if (carry > 0) {
--carry;
vli_sub(result, result, curve_p);
}
if(l_carry > 0)
{
vli_sub(p_result, p_result, curve_p);
if (carry > 0) {
vli_sub(result, result, curve_p);
}
if(vli_cmp(p_result, curve_p) > 0)
{
vli_sub(p_result, p_result, curve_p);
if (vli_cmp(result, curve_p) > 0) {
vli_sub(result, result, curve_p);
}
}
#define asm_mmod_fast 1
@@ -16001,8 +16001,7 @@ static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_produc
#endif /* (uECC_ASM == uECC_asm_fast) */
#if !asm_rshift1
static void vli_rshift1(uint8_t *p_vli)
{
static void vli_rshift1(uint8_t *vli) {
uint8_t i = uECC_BYTES;
__asm__ volatile (
"adiw r30, " STR(uECC_BYTES) " \n\t"
@@ -16015,7 +16014,7 @@ static void vli_rshift1(uint8_t *p_vli)
"dec %[i] \n\t"
"brne 1b \n\t"
: "+z" (p_vli), [i] "+r" (i)
: "+z" (vli), [i] "+r" (i)
:
: "r0", "cc", "memory"
);
@@ -16024,12 +16023,11 @@ static void vli_rshift1(uint8_t *p_vli)
#endif
#if !asm_add
static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
static uint8_t vli_add(uint8_t *result, const uint8_t *left, const uint8_t *right) {
uint8_t i = uECC_BYTES;
uint8_t l_carry = 0;
uint8_t l_left;
uint8_t l_right;
uint8_t carry = 0;
uint8_t left_byte;
uint8_t right_byte;
__asm__ volatile (
"clc \n\t"
@@ -16042,27 +16040,25 @@ static uint8_t vli_add(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
"dec %[i] \n\t"
"brne 1b \n\t"
"adc %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */
"adc %[carry], %[carry] \n\t" /* Store carry bit. */
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
: "+z" (p_result), "+x" (p_left), [i] "+r" (i),
[carry] "+r" (l_carry), [left] "=&r" (l_left), [right] "=&r" (l_right)
: "y" (p_right)
: "+z" (result), "+x" (left), [i] "+r" (i),
[carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
: "y" (right)
: "cc", "memory"
);
return l_carry;
return carry;
}
#define asm_add 1
#endif
#if !asm_sub
static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
static uint8_t vli_sub(uint8_t *result, const uint8_t *left, const uint8_t *right) {
uint8_t i = uECC_BYTES;
uint8_t l_borrow = 0;
uint8_t l_left;
uint8_t l_right;
uint8_t borrow = 0;
uint8_t left_byte;
uint8_t right_byte;
__asm__ volatile (
"clc \n\t"
@@ -16075,37 +16071,33 @@ static uint8_t vli_sub(uint8_t *p_result, const uint8_t *p_left, const uint8_t *
"dec %[i] \n\t"
"brne 1b \n\t"
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in l_borrow. */
"adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
: "+z" (p_result), "+x" (p_left), [i] "+r" (i),
[borrow] "+r" (l_borrow), [left] "=&r" (l_left), [right] "=&r" (l_right)
: "y" (p_right)
: "+z" (result), "+x" (left), [i] "+r" (i),
[borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
: "y" (right)
: "cc", "memory"
);
return l_borrow;
return borrow;
}
#define asm_sub 1
#endif
#if !asm_mult
__attribute((noinline))
static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_right)
{
static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
uint8_t r0 = 0;
uint8_t r1 = 0;
uint8_t r2 = 0;
uint8_t l_zero = 0;
uint8_t zero = 0;
uint8_t k, i;
__asm__ volatile (
"ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES; ++k */
"1: \n\t"
"ldi %[i], 0 \n\t" /* i=0; i < k; ++i */
"ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */
"add r28, %[k] \n\t" /* pre-add right ptr */
"adc r29, %[zero] \n\t"
@@ -16137,10 +16129,10 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
/* second half */
"ldi %[k], " STR(uECC_BYTES) " \n\t" /* k = uECC_BYTES; k > 0; --k */
"adiw r28, " STR(uECC_BYTES) " \n\t" /* move right ptr to point at the end of p_right */
"adiw r28, " STR(uECC_BYTES) " \n\t" /* move right ptr to point at the end of right */
"1: \n\t"
"ldi %[i], 0 \n\t" /* i=0; i < k; ++i */
"ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */
"2: \n\t"
"ld r0, x+ \n\t"
@@ -16164,22 +16156,21 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
"mov %[r2], %[zero] \n\t"
"dec %[k] \n\t"
"sub r26, %[k] \n\t" /* fix up left ptr (after k is decremented, so next time we start 1 higher) */
"sub r26, %[k] \n\t" /* fix up left ptr (after k is decremented, so next time
we start 1 higher) */
"sbc r27, %[zero] \n\t"
"cpi %[k], 0 \n\t"
"brne 1b \n\t" /* loop if k > 0 */
"st z+, %[r0] \n\t" /* Store last result byte. */
"eor r1, r1 \n\t" /* fix r1 to be 0 again */
"sbiw r28, " STR(uECC_BYTES) " \n\t" /* Restore Y */
: "+z" (p_result), "+x" (p_left),
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (l_zero),
: "+z" (result), "+x" (left),
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
[k] "=&a" (k), [i] "=&a" (i)
: "y" (p_right)
: "y" (right)
: "r0", "cc", "memory"
);
}
@@ -16188,18 +16179,15 @@ static void vli_mult(uint8_t *p_result, const uint8_t *p_left, const uint8_t *p_
#if uECC_SQUARE_FUNC
#if !asm_square
static void vli_square(uint8_t *p_result, const uint8_t *p_left)
{
static void vli_square(uint8_t *result, const uint8_t *left) {
uint8_t r0 = 0;
uint8_t r1 = 0;
uint8_t r2 = 0;
uint8_t l_zero = 0;
uint8_t zero = 0;
uint8_t k;
__asm__ volatile (
"ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES*2; ++k */
"ldi %[k], 1 \n\t" /* k = 1; k < uECC_BYTES * 2; ++k */
"1: \n\t"
@@ -16223,7 +16211,8 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
"3: \n\t"
"ld r0, x+ \n\t"
"cp r26, r30 \n\t" /* if left == right here, then we are done after this mult (and we don't need to double) */
"cp r26, r30 \n\t" /* if left == right here, then we are done after this mult
(and we don't need to double) */
"breq 4f \n\t"
"ld r1, -z \n\t"
"mul r0, r1 \n\t"
@@ -16248,9 +16237,9 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
"adc %[r2], %[zero] \n\t"
"5: \n\t"
"movw r30, %[result] \n\t" /* make z point to result */
"st z+, %[r0] \n\t" /* Store the result. */
"movw %[result], r30 \n\t" /* update result ptr*/
"movw r30, %[result] \n\t" /* make z point to result */
"st z+, %[r0] \n\t" /* Store the result. */
"movw %[result], r30 \n\t" /* update result ptr*/
"mov %[r0], %[r1] \n\t"
"mov %[r1], %[r2] \n\t"
"mov %[r2], %[zero] \n\t"
@@ -16261,13 +16250,12 @@ static void vli_square(uint8_t *p_result, const uint8_t *p_left)
"movw r30, %[result] \n\t" /* make z point to result */
"st z+, %[r0] \n\t" /* Store last result byte. */
"eor r1, r1 \n\t" /* fix r1 to be 0 again */
: [result] "+r" (p_result),
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (l_zero),
: [result] "+r" (result),
[r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
[k] "=&a" (k)
: [orig] "r" (p_left), [max] "M" (2*uECC_BYTES)
: [orig] "r" (left), [max] "M" (2*uECC_BYTES)
: "r0", "r26", "r27", "r30", "r31", "cc", "memory"
);
}
+34 -47
View File
@@ -2,95 +2,82 @@
extern "C" {
static int RNG(uint8_t *p_dest, unsigned p_size)
{
// Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of random noise)
// This can take a long time to generate random data if the result of analogRead(0) doesn't change very frequently.
while(p_size) {
uint8_t l_val = 0;
for(unsigned i=0; i<8; ++i)
{
int l_init = analogRead(0);
int l_count = 0;
while(analogRead(0) == l_init)
{
++l_count;
static int RNG(uint8_t *dest, unsigned size) {
// Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of
// random noise). This can take a long time to generate random data if the result of analogRead(0)
// doesn't change very frequently.
while (size) {
uint8_t val = 0;
for (unsigned i = 0; i < 8; ++i) {
int init = analogRead(0);
int count = 0;
while (analogRead(0) == init) {
++count;
}
if(l_count == 0)
{
l_val = (l_val << 1) | (l_init & 0x01);
}
else
{
l_val = (l_val << 1) | (l_count & 0x01);
if (count == 0) {
val = (val << 1) | (init & 0x01);
} else {
val = (val << 1) | (count & 0x01);
}
}
*p_dest = l_val;
++p_dest;
--p_size;
*dest = val;
++dest;
--size;
}
// NOTE: it would be a good idea to hash the resulting random data using SHA-256 or similar.
return 1;
}
}
} // extern "C"
void setup()
{
void setup() {
Serial.begin(115200);
Serial.print("Testing ecc\n");
uECC_set_rng(&RNG);
}
void loop() {
uint8_t l_private1[uECC_BYTES];
uint8_t l_private2[uECC_BYTES];
uint8_t private1[uECC_BYTES];
uint8_t private2[uECC_BYTES];
uint8_t l_public1[uECC_BYTES * 2];
uint8_t l_public2[uECC_BYTES * 2];
uint8_t public1[uECC_BYTES * 2];
uint8_t public2[uECC_BYTES * 2];
uint8_t l_secret1[uECC_BYTES];
uint8_t l_secret2[uECC_BYTES];
uint8_t secret1[uECC_BYTES];
uint8_t secret2[uECC_BYTES];
unsigned long a = millis();
uECC_make_key(l_public1, l_private1);
uECC_make_key(public1, private1);
unsigned long b = millis();
Serial.print("Made key 1 in "); Serial.println(b-a);
a = millis();
uECC_make_key(l_public2, l_private2);
uECC_make_key(public2, private2);
b = millis();
Serial.print("Made key 2 in "); Serial.println(b-a);
a = millis();
int r = uECC_shared_secret(l_public2, l_private1, l_secret1);
int r = uECC_shared_secret(public2, private1, secret1);
b = millis();
Serial.print("Shared secret 1 in "); Serial.println(b-a);
if(!r)
{
if (!r) {
Serial.print("shared_secret() failed (1)\n");
return;
}
a = millis();
r = uECC_shared_secret(l_public1, l_private2, l_secret2);
r = uECC_shared_secret(public1, private2, secret2);
b = millis();
Serial.print("Shared secret 2 in "); Serial.println(b-a);
if(!r)
{
if (!r) {
Serial.print("shared_secret() failed (2)\n");
return;
}
if(memcmp(l_secret1, l_secret2, sizeof(l_secret1)) != 0)
{
if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
Serial.print("Shared secrets are not identical!\n");
}
else
{
} else {
Serial.print("Shared secrets are identical\n");
}
}
+19 -26
View File
@@ -5,64 +5,57 @@
#include <stdio.h>
#include <string.h>
void vli_print(uint8_t *p_vli, unsigned int p_size)
{
while(p_size)
{
printf("%02X ", (unsigned)p_vli[p_size - 1]);
--p_size;
void vli_print(uint8_t *vli, unsigned int size) {
while (size) {
printf("%02X ", (unsigned)vli[size - 1]);
--size;
}
}
int main()
{
int main() {
int i;
int success;
uint8_t l_private[uECC_BYTES];
uint8_t l_public[uECC_BYTES * 2];
uint8_t l_public_computed[uECC_BYTES * 2];
uint8_t private[uECC_BYTES];
uint8_t public[uECC_BYTES * 2];
uint8_t public_computed[uECC_BYTES * 2];
printf("Testing 256 random private key pairs\n");
for(i=0; i<256; ++i)
{
for (i = 0; i < 256; ++i) {
printf(".");
#if !LPC11XX
fflush(stdout);
#endif
int success = uECC_make_key(l_public, l_private);
success = uECC_make_key(public, private);
if (!success) {
printf("uECC_make_key() failed\n");
return 1;
}
success = uECC_compute_public_key(l_private, l_public_computed);
success = uECC_compute_public_key(private, public_computed);
if (!success) {
printf("uECC_compute_public_key() failed\n");
}
if(memcmp(l_public, l_public_computed, sizeof(l_public)) != 0)
{
if (memcmp(public, public_computed, sizeof(public)) != 0) {
printf("Computed and provided public keys are not identical!\n");
printf("Computed public key = ");
vli_print(l_public_computed, uECC_BYTES);
vli_print(public_computed, uECC_BYTES);
printf("\n");
printf("Provided public key = ");
vli_print(l_public, uECC_BYTES);
vli_print(public, uECC_BYTES);
printf("\n");
printf("Private key = ");
vli_print(l_private, uECC_BYTES);
vli_print(private, uECC_BYTES);
printf("\n");
}
}
printf("\n");
printf("Testing private key = 0\n");
memset(l_private, 0, uECC_BYTES);
success = uECC_compute_public_key(l_private, l_public_computed);
memset(private, 0, uECC_BYTES);
success = uECC_compute_public_key(private, public_computed);
if (success) {
printf("uECC_compute_public_key() should have failed\n");
}
+25 -38
View File
@@ -11,34 +11,29 @@
#include "/Projects/lpc11xx/peripherals/time.h"
static uint64_t g_rand = 88172645463325252ull;
int fake_rng(uint8_t *p_dest, unsigned p_size)
{
while(p_size)
{
int fake_rng(uint8_t *dest, unsigned size) {
while (size) {
g_rand ^= (g_rand << 13);
g_rand ^= (g_rand >> 7);
g_rand ^= (g_rand << 17);
unsigned l_amount = (p_size > 8 ? 8 : p_size);
memcpy(p_dest, &g_rand, l_amount);
p_size -= l_amount;
unsigned amount = (size > 8 ? 8 : size);
memcpy(dest, &g_rand, amount);
size -= amount;
}
return 1;
}
#endif
void vli_print(uint8_t *p_vli, unsigned int p_size)
{
while(p_size)
{
printf("%02X ", (unsigned)p_vli[p_size - 1]);
--p_size;
void vli_print(uint8_t *vli, unsigned int size) {
while (size) {
printf("%02X ", (unsigned)vli[size - 1]);
--size;
}
}
int main()
{
int main() {
#if LPC11XX
uartInit(BAUD_115200);
initTime();
@@ -47,57 +42,49 @@ int main()
#endif
int i;
uint8_t l_private1[uECC_BYTES];
uint8_t l_private2[uECC_BYTES];
uint8_t l_public1[uECC_BYTES * 2];
uint8_t l_public2[uECC_BYTES * 2];
uint8_t l_secret1[uECC_BYTES];
uint8_t l_secret2[uECC_BYTES];
uint8_t private1[uECC_BYTES];
uint8_t private2[uECC_BYTES];
uint8_t public1[uECC_BYTES * 2];
uint8_t public2[uECC_BYTES * 2];
uint8_t secret1[uECC_BYTES];
uint8_t secret2[uECC_BYTES];
printf("Testing 256 random private key pairs\n");
for(i=0; i<256; ++i)
{
for (i = 0; i < 256; ++i) {
printf(".");
#if !LPC11XX
fflush(stdout);
#endif
if(!uECC_make_key(l_public1, l_private1) || !uECC_make_key(l_public2, l_private2))
{
if (!uECC_make_key(public1, private1) || !uECC_make_key(public2, private2)) {
printf("uECC_make_key() failed\n");
return 1;
}
if(!uECC_shared_secret(l_public2, l_private1, l_secret1))
{
if (!uECC_shared_secret(public2, private1, secret1)) {
printf("shared_secret() failed (1)\n");
return 1;
}
if(!uECC_shared_secret(l_public1, l_private2, l_secret2))
{
if (!uECC_shared_secret(public1, private2, secret2)) {
printf("shared_secret() failed (2)\n");
return 1;
}
if(memcmp(l_secret1, l_secret2, sizeof(l_secret1)) != 0)
{
if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
printf("Shared secrets are not identical!\n");
printf("Shared secret 1 = ");
vli_print(l_secret1, uECC_BYTES);
vli_print(secret1, uECC_BYTES);
printf("\n");
printf("Shared secret 2 = ");
vli_print(l_secret2, uECC_BYTES);
vli_print(secret2, uECC_BYTES);
printf("\n");
printf("Private key 1 = ");
vli_print(l_private1, uECC_BYTES);
vli_print(private1, uECC_BYTES);
printf("\n");
printf("Private key 2 = ");
vli_print(l_private2, uECC_BYTES);
vli_print(private2, uECC_BYTES);
printf("\n");
}
}
+15 -26
View File
@@ -11,25 +11,22 @@
#include "/Projects/lpc11xx/peripherals/time.h"
static uint64_t g_rand = 88172645463325252ull;
int fake_rng(uint8_t *p_dest, unsigned p_size)
{
while(p_size)
{
int fake_rng(uint8_t *dest, unsigned size) {
while (size) {
g_rand ^= (g_rand << 13);
g_rand ^= (g_rand >> 7);
g_rand ^= (g_rand << 17);
unsigned l_amount = (p_size > 8 ? 8 : p_size);
memcpy(p_dest, &g_rand, l_amount);
p_size -= l_amount;
unsigned amount = (size > 8 ? 8 : size);
memcpy(dest, &g_rand, amount);
size -= amount;
}
return 1;
}
#endif
int main()
{
int main() {
#if LPC11XX
uartInit(BAUD_115200);
initTime();
@@ -37,39 +34,31 @@ int main()
uECC_set_rng(&fake_rng);
#endif
uint8_t l_public[uECC_BYTES*2];
uint8_t l_private[uECC_BYTES];
uint8_t l_hash[uECC_BYTES];
uint8_t l_sig[uECC_BYTES*2];
uint8_t public[uECC_BYTES * 2];
uint8_t private[uECC_BYTES];
uint8_t hash[uECC_BYTES];
uint8_t sig[uECC_BYTES * 2];
int i;
printf("Testing 256 signatures\n");
for(i=0; i<256; ++i)
{
for (i = 0; i < 256; ++i) {
printf(".");
#if !LPC11XX
fflush(stdout);
#endif
if(!uECC_make_key(l_public, l_private))
{
if (!uECC_make_key(public, private)) {
printf("uECC_make_key() failed\n");
continue;
}
memcpy(l_hash, l_public, uECC_BYTES);
memcpy(hash, public, uECC_BYTES);
if(!uECC_sign(l_private, l_hash, l_sig))
{
if (!uECC_sign(private, hash, sig)) {
printf("uECC_sign() failed\n");
continue;
}
if(!uECC_verify(l_public, l_hash, l_sig))
{
if (!uECC_verify(public, hash, sig)) {
printf("uECC_verify() failed\n");
}
}
+1029 -1160
View File
File diff suppressed because it is too large Load Diff
+47 -36
View File
@@ -17,11 +17,13 @@ Possible values for uECC_PLATFORM are defined below: */
#define uECC_arm_thumb2 6
/* If desired, you can define uECC_WORD_SIZE as appropriate for your platform (1, 4, or 8 bytes).
If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your platform. */
If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your
platform. */
/* Inline assembly options.
uECC_asm_none - Use standard C99 only.
uECC_asm_small - Use GCC inline assembly for the target platform (if available), optimized for minimum size.
uECC_asm_small - Use GCC inline assembly for the target platform (if available), optimized for
minimum size.
uECC_asm_fast - Use GCC inline assembly optimized for maximum speed. */
#define uECC_asm_none 0
#define uECC_asm_small 1
@@ -39,8 +41,9 @@ uECC_asm_fast - Use GCC inline assembly optimized for maximum speed. */
#define uECC_CURVE uECC_secp160r1
#endif
/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be used for (scalar) squaring
instead of the generic multiplication function. This will make things faster by about 8% but increases the code size. */
/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be
used for (scalar) squaring instead of the generic multiplication function. This will make things
faster by about 8% but increases the code size. */
#ifndef uECC_SQUARE_FUNC
#define uECC_SQUARE_FUNC 1
#endif
@@ -61,8 +64,8 @@ extern "C"
#endif
/* uECC_RNG_Function type
The RNG function should fill p_size random bytes into p_dest. It should return 1 if
p_dest was filled with random data, or 0 if the random data could not be generated.
The RNG function should fill 'size' random bytes into 'dest'. It should return 1 if
'dest' was filled with random data, or 0 if the random data could not be generated.
The filled-in values should be either truly random, or from a cryptographically-secure PRNG.
A correctly functioning RNG function must be set (using uECC_set_rng()) before calling
@@ -73,7 +76,7 @@ If you are building on another POSIX-compliant system that supports /dev/random
you can define uECC_POSIX to use the predefined RNG. For embedded platforms there is no predefined
RNG function; you must provide your own.
*/
typedef int (*uECC_RNG_Function)(uint8_t *p_dest, unsigned p_size);
typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size);
/* uECC_set_rng() function.
Set the function that will be used to generate random bytes. The RNG function should
@@ -83,35 +86,38 @@ On platforms where there is no predefined RNG function (eg embedded platforms),
be called before uECC_make_key() or uECC_sign() are used.
Inputs:
p_rng - The function that will be used to generate random bytes.
rng_function - The function that will be used to generate random bytes.
*/
void uECC_set_rng(uECC_RNG_Function p_rng);
void uECC_set_rng(uECC_RNG_Function rng_function);
/* uECC_make_key() function.
Create a public/private key pair.
Outputs:
p_publicKey - Will be filled in with the public key.
p_privateKey - Will be filled in with the private key.
public_key - Will be filled in with the public key.
private_key - Will be filled in with the private key.
Returns 1 if the key pair was generated successfully, 0 if an error occurred.
*/
int uECC_make_key(uint8_t p_publicKey[uECC_BYTES*2], uint8_t p_privateKey[uECC_BYTES]);
int uECC_make_key(uint8_t public_key[uECC_BYTES*2], uint8_t private_key[uECC_BYTES]);
/* uECC_shared_secret() function.
Compute a shared secret given your secret key and someone else's public key.
Note: It is recommended that you hash the result of uECC_shared_secret() before using it for symmetric encryption or HMAC.
Note: It is recommended that you hash the result of uECC_shared_secret() before using it for
symmetric encryption or HMAC.
Inputs:
p_publicKey - The public key of the remote party.
p_privateKey - Your private key.
public_key - The public key of the remote party.
private_key - Your private key.
Outputs:
p_secret - Will be filled in with the shared secret value.
secret - Will be filled in with the shared secret value.
Returns 1 if the shared secret was generated successfully, 0 if an error occurred.
*/
int uECC_shared_secret(const uint8_t p_publicKey[uECC_BYTES*2], const uint8_t p_privateKey[uECC_BYTES], uint8_t p_secret[uECC_BYTES]);
int uECC_shared_secret(const uint8_t public_key[uECC_BYTES*2],
const uint8_t private_key[uECC_BYTES],
uint8_t secret[uECC_BYTES]);
/* uECC_sign() function.
Generate an ECDSA signature for a given hash value.
@@ -120,15 +126,17 @@ Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pa
this function along with your private key.
Inputs:
p_privateKey - Your private key.
p_hash - The message hash to sign.
private_key - Your private key.
hash - The message hash to sign.
Outputs:
p_signature - Will be filled in with the signature value.
signature - Will be filled in with the signature value.
Returns 1 if the signature generated successfully, 0 if an error occurred.
*/
int uECC_sign(const uint8_t p_privateKey[uECC_BYTES], const uint8_t p_hash[uECC_BYTES], uint8_t p_signature[uECC_BYTES*2]);
int uECC_sign(const uint8_t private_key[uECC_BYTES],
const uint8_t hash[uECC_BYTES],
uint8_t signature[uECC_BYTES*2]);
/* uECC_verify() function.
Verify an ECDSA signature.
@@ -137,35 +145,37 @@ Usage: Compute the hash of the signed data using the same hash as the signer and
pass it to this function along with the signer's public key and the signature values (r and s).
Inputs:
p_publicKey - The signer's public key
p_hash - The hash of the signed data.
p_signature - The signature value.
public_key - The signer's public key
hash - The hash of the signed data.
signature - The signature value.
Returns 1 if the signature is valid, 0 if it is invalid.
*/
int uECC_verify(const uint8_t p_publicKey[uECC_BYTES*2], const uint8_t p_hash[uECC_BYTES], const uint8_t p_signature[uECC_BYTES*2]);
int uECC_verify(const uint8_t private_key[uECC_BYTES*2],
const uint8_t hash[uECC_BYTES],
const uint8_t signature[uECC_BYTES*2]);
/* uECC_compress() function.
Compress a public key.
Inputs:
p_publicKey - The public key to compress.
public_key - The public key to compress.
Outputs:
p_compressed - Will be filled in with the compressed public key.
compressed - Will be filled in with the compressed public key.
*/
void uECC_compress(const uint8_t p_publicKey[uECC_BYTES*2], uint8_t p_compressed[uECC_BYTES+1]);
void uECC_compress(const uint8_t public_key[uECC_BYTES*2], uint8_t compressed[uECC_BYTES+1]);
/* uECC_decompress() function.
Decompress a compressed public key.
Inputs:
p_compressed - The compressed public key.
compressed - The compressed public key.
Outputs:
p_publicKey - Will be filled in with the decompressed public key.
public_key - Will be filled in with the decompressed public key.
*/
void uECC_decompress(const uint8_t p_compressed[uECC_BYTES+1], uint8_t p_publicKey[uECC_BYTES*2]);
void uECC_decompress(const uint8_t compressed[uECC_BYTES+1], uint8_t public_key[uECC_BYTES*2]);
/* uECC_valid_public_key() function.
Check to see if a public key is valid.
@@ -175,24 +185,25 @@ functions. However, you may wish to avoid spending CPU time computing a shared s
verifying a signature using an invalid public key.
Inputs:
p_publicKey - The public key to check.
public_key - The public key to check.
Returns 1 if the public key is valid, 0 if it is invalid.
*/
int uECC_valid_public_key(const uint8_t p_publicKey[uECC_BYTES*2]);
int uECC_valid_public_key(const uint8_t public_key[uECC_BYTES*2]);
/* uECC_compute_public_key() function.
Compute the corresponding public key for a private key.
Inputs:
p_privateKey - The private key to compute the public key for
private_key - The private key to compute the public key for
Outputs:
p_publicKey - Will be filled in with the corresponding public key
public_key - Will be filled in with the corresponding public key
Returns 1 if the key was computed successfully, 0 if an error occurred.
*/
int uECC_compute_public_key(const uint8_t p_privateKey[uECC_BYTES], uint8_t p_publicKey[uECC_BYTES * 2]);
int uECC_compute_public_key(const uint8_t private_key[uECC_BYTES],
uint8_t public_key[uECC_BYTES * 2]);
/* uECC_bytes() function.