mirror of
https://github.com/kmackay/micro-ecc.git
synced 2026-06-05 21:14:51 +00:00
Add ARM fast multiply/square for secp224r1.
This commit is contained in:
+548
@@ -502,6 +502,331 @@ static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *rig
|
||||
#define asm_mult 1
|
||||
#endif /* (uECC_WORDS == 6) */
|
||||
|
||||
#if (uECC_WORDS == 7)
|
||||
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
register const uint32_t *r2 __asm__("r2") = right;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
"add r0, 24 \n\t"
|
||||
"add r2, 24 \n\t"
|
||||
"ldmia r1!, {r3} \n\t"
|
||||
"ldmia r2!, {r6} \n\t"
|
||||
|
||||
"umull r9, r10, r3, r6 \n\t"
|
||||
"stmia r0!, {r9, r10} \n\t"
|
||||
|
||||
"sub r0, 20 \n\t"
|
||||
"sub r2, 16 \n\t"
|
||||
"ldmia r2!, {r6, r7, r8} \n\t"
|
||||
"ldmia r1!, {r4, r5} \n\t"
|
||||
|
||||
"umull r9, r10, r3, r6 \n\t"
|
||||
"stmia r0!, {r9} \n\t"
|
||||
|
||||
"mov r14, #0 \n\t"
|
||||
"umull r9, r12, r3, r7 \n\t"
|
||||
"adds r10, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"umull r9, r11, r4, r6 \n\t"
|
||||
"adds r10, r9 \n\t"
|
||||
"adcs r12, r11 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"stmia r0!, {r10} \n\t"
|
||||
|
||||
"mov r9, #0 \n\t"
|
||||
"umull r10, r11, r3, r8 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"umull r10, r11, r4, r7 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"umull r10, r11, r5, r6 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"stmia r0!, {r12} \n\t"
|
||||
|
||||
"ldmia r1!, {r3} \n\t"
|
||||
"mov r10, #0 \n\t"
|
||||
"umull r11, r12, r4, r8 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"umull r11, r12, r5, r7 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"umull r11, r12, r3, r6 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"ldr r11, [r0] \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, #0 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"stmia r0!, {r14} \n\t"
|
||||
|
||||
"ldmia r2!, {r6} \n\t"
|
||||
"mov r11, #0 \n\t"
|
||||
"umull r12, r14, r4, r6 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"umull r12, r14, r5, r8 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"umull r12, r14, r3, r7 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"ldr r12, [r0] \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, #0 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"stmia r0!, {r9} \n\t"
|
||||
|
||||
"mov r12, #0 \n\t"
|
||||
"umull r14, r9, r5, r6 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"umull r14, r9, r3, r8 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"stmia r0!, {r10} \n\t"
|
||||
|
||||
"umull r9, r10, r3, r6 \n\t"
|
||||
"adds r11, r9 \n\t"
|
||||
"adc r12, r10 \n\t"
|
||||
"stmia r0!, {r11, r12} \n\t"
|
||||
|
||||
"sub r0, 44 \n\t"
|
||||
"sub r1, 16 \n\t"
|
||||
"sub r2, 28 \n\t"
|
||||
"ldmia r1!, {r3,r4,r5} \n\t"
|
||||
"ldmia r2!, {r6,r7,r8} \n\t"
|
||||
|
||||
"umull r9, r10, r3, r6 \n\t"
|
||||
"stmia r0!, {r9} \n\t"
|
||||
|
||||
"mov r14, #0 \n\t"
|
||||
"umull r9, r12, r3, r7 \n\t"
|
||||
"adds r10, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"umull r9, r11, r4, r6 \n\t"
|
||||
"adds r10, r9 \n\t"
|
||||
"adcs r12, r11 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"stmia r0!, {r10} \n\t"
|
||||
|
||||
"mov r9, #0 \n\t"
|
||||
"umull r10, r11, r3, r8 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"umull r10, r11, r4, r7 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"umull r10, r11, r5, r6 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"stmia r0!, {r12} \n\t"
|
||||
|
||||
"ldmia r1!, {r3} \n\t"
|
||||
"mov r10, #0 \n\t"
|
||||
"umull r11, r12, r4, r8 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"umull r11, r12, r5, r7 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"umull r11, r12, r3, r6 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"ldr r11, [r0] \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, #0 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"stmia r0!, {r14} \n\t"
|
||||
|
||||
"ldmia r1!, {r4} \n\t"
|
||||
"mov r11, #0 \n\t"
|
||||
"umull r12, r14, r5, r8 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"umull r12, r14, r3, r7 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"umull r12, r14, r4, r6 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"ldr r12, [r0] \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, #0 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"stmia r0!, {r9} \n\t"
|
||||
|
||||
"ldmia r1!, {r5} \n\t"
|
||||
"mov r12, #0 \n\t"
|
||||
"umull r14, r9, r3, r8 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"umull r14, r9, r4, r7 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"umull r14, r9, r5, r6 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"ldr r14, [r0] \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, #0 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"stmia r0!, {r10} \n\t"
|
||||
|
||||
"ldmia r1!, {r3} \n\t"
|
||||
"mov r14, #0 \n\t"
|
||||
"umull r9, r10, r4, r8 \n\t"
|
||||
"adds r11, r9 \n\t"
|
||||
"adcs r12, r10 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"umull r9, r10, r5, r7 \n\t"
|
||||
"adds r11, r9 \n\t"
|
||||
"adcs r12, r10 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"umull r9, r10, r3, r6 \n\t"
|
||||
"adds r11, r9 \n\t"
|
||||
"adcs r12, r10 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"ldr r9, [r0] \n\t"
|
||||
"adds r11, r9 \n\t"
|
||||
"adcs r12, #0 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"stmia r0!, {r11} \n\t"
|
||||
|
||||
"ldmia r2!, {r6} \n\t"
|
||||
"mov r9, #0 \n\t"
|
||||
"umull r10, r11, r4, r6 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"umull r10, r11, r5, r8 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"umull r10, r11, r3, r7 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"ldr r10, [r0] \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r14, #0 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"stmia r0!, {r12} \n\t"
|
||||
|
||||
"ldmia r2!, {r7} \n\t"
|
||||
"mov r10, #0 \n\t"
|
||||
"umull r11, r12, r4, r7 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"umull r11, r12, r5, r6 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"umull r11, r12, r3, r8 \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"ldr r11, [r0] \n\t"
|
||||
"adds r14, r11 \n\t"
|
||||
"adcs r9, #0 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"stmia r0!, {r14} \n\t"
|
||||
|
||||
"ldmia r2!, {r8} \n\t"
|
||||
"mov r11, #0 \n\t"
|
||||
"umull r12, r14, r4, r8 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"umull r12, r14, r5, r7 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"umull r12, r14, r3, r6 \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, r14 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"ldr r12, [r0] \n\t"
|
||||
"adds r9, r12 \n\t"
|
||||
"adcs r10, #0 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"stmia r0!, {r9} \n\t"
|
||||
|
||||
"ldmia r2!, {r6} \n\t"
|
||||
"mov r12, #0 \n\t"
|
||||
"umull r14, r9, r4, r6 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"umull r14, r9, r5, r8 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"umull r14, r9, r3, r7 \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, r9 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"ldr r14, [r0] \n\t"
|
||||
"adds r10, r14 \n\t"
|
||||
"adcs r11, #0 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"stmia r0!, {r10} \n\t"
|
||||
|
||||
"mov r14, #0 \n\t"
|
||||
"umull r9, r10, r5, r6 \n\t"
|
||||
"adds r11, r9 \n\t"
|
||||
"adcs r12, r10 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"umull r9, r10, r3, r8 \n\t"
|
||||
"adds r11, r9 \n\t"
|
||||
"adcs r12, r10 \n\t"
|
||||
"adc r14, #0 \n\t"
|
||||
"stmia r0!, {r11} \n\t"
|
||||
|
||||
"umull r10, r11, r3, r6 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adc r14, r11 \n\t"
|
||||
"stmia r0!, {r12, r14} \n\t"
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
: "+r" (r0), "+r" (r1), "+r" (r2)
|
||||
:
|
||||
: "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_mult 1
|
||||
#endif /* (uECC_WORDS == 7) */
|
||||
|
||||
#if (uECC_WORDS == 8)
|
||||
static void vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
@@ -1200,6 +1525,229 @@ static void vli_square(uint32_t *result, const uint32_t *left) {
|
||||
#define asm_square 1
|
||||
#endif /* (uECC_WORDS == 6) */
|
||||
|
||||
#if (uECC_WORDS == 7)
|
||||
static void vli_square(uint32_t *result, const uint32_t *left) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
register const uint32_t *r1 __asm__("r1") = left;
|
||||
|
||||
__asm__ volatile (
|
||||
".syntax unified \n\t"
|
||||
"ldmia r1!, {r2} \n\t"
|
||||
"add r1, 20 \n\t"
|
||||
"ldmia r1!, {r5} \n\t"
|
||||
"add r0, 24 \n\t"
|
||||
"umull r8, r9, r2, r5 \n\t"
|
||||
"stmia r0!, {r8, r9} \n\t"
|
||||
"sub r0, 32 \n\t"
|
||||
"sub r1, 28 \n\t"
|
||||
|
||||
"ldmia r1!, {r2, r3, r4, r5, r6, r7} \n\t"
|
||||
|
||||
"umull r11, r12, r2, r2 \n\t"
|
||||
"stmia r0!, {r11} \n\t"
|
||||
|
||||
"mov r9, #0 \n\t"
|
||||
"umull r10, r11, r2, r3 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r8, r11, #0 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"adds r12, r10 \n\t"
|
||||
"adcs r8, r11 \n\t"
|
||||
"adc r9, #0 \n\t"
|
||||
"stmia r0!, {r12} \n\t"
|
||||
|
||||
"mov r10, #0 \n\t"
|
||||
"umull r11, r12, r2, r4 \n\t"
|
||||
"adds r11, r11 \n\t"
|
||||
"adcs r12, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"adds r8, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"umull r11, r12, r3, r3 \n\t"
|
||||
"adds r8, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"mov r12, #0 \n\t"
|
||||
"umull r8, r11, r2, r5 \n\t"
|
||||
"mov r14, r11 \n\t"
|
||||
"umlal r8, r11, r3, r4 \n\t"
|
||||
"cmp r14, r11 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r12, #0 \n\t"
|
||||
"adds r8, r8 \n\t"
|
||||
"adcs r11, r11 \n\t"
|
||||
"adc r12, r12 \n\t"
|
||||
"adds r8, r9 \n\t"
|
||||
"adcs r11, r10 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"mov r10, #0 \n\t"
|
||||
"umull r8, r9, r2, r6 \n\t"
|
||||
"mov r14, r9 \n\t"
|
||||
"umlal r8, r9, r3, r5 \n\t"
|
||||
"cmp r14, r9 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r10, #0 \n\t"
|
||||
"adds r8, r8 \n\t"
|
||||
"adcs r9, r9 \n\t"
|
||||
"adc r10, r10 \n\t"
|
||||
"mov r14, r9 \n\t"
|
||||
"umlal r8, r9, r4, r4 \n\t"
|
||||
"cmp r14, r9 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r10, #0 \n\t"
|
||||
"adds r8, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"mov r12, #0 \n\t"
|
||||
"umull r8, r11, r2, r7 \n\t"
|
||||
"mov r14, r11 \n\t"
|
||||
"umlal r8, r11, r3, r6 \n\t"
|
||||
"cmp r14, r11 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r12, #0 \n\t"
|
||||
"mov r14, r11 \n\t"
|
||||
"umlal r8, r11, r4, r5 \n\t"
|
||||
"cmp r14, r11 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r12, #0 \n\t"
|
||||
"adds r8, r8 \n\t"
|
||||
"adcs r11, r11 \n\t"
|
||||
"adc r12, r12 \n\t"
|
||||
"adds r8, r9 \n\t"
|
||||
"adcs r11, r10 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"ldmia r1!, {r2} \n\t"
|
||||
"mov r10, #0 \n\t"
|
||||
"umull r8, r9, r3, r7 \n\t"
|
||||
"mov r14, r9 \n\t"
|
||||
"umlal r8, r9, r4, r6 \n\t"
|
||||
"cmp r14, r9 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r10, #0 \n\t"
|
||||
"ldr r14, [r0] \n\t"
|
||||
"adds r8, r14 \n\t"
|
||||
"adcs r9, #0 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"adds r8, r8 \n\t"
|
||||
"adcs r9, r9 \n\t"
|
||||
"adc r10, r10 \n\t"
|
||||
"mov r14, r9 \n\t"
|
||||
"umlal r8, r9, r5, r5 \n\t"
|
||||
"cmp r14, r9 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r10, #0 \n\t"
|
||||
"adds r8, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"mov r12, #0 \n\t"
|
||||
"umull r8, r11, r3, r2 \n\t"
|
||||
"mov r14, r11 \n\t"
|
||||
"umlal r8, r11, r4, r7 \n\t"
|
||||
"cmp r14, r11 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r12, #0 \n\t"
|
||||
"mov r14, r11 \n\t"
|
||||
"umlal r8, r11, r5, r6 \n\t"
|
||||
"cmp r14, r11 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r12, #0 \n\t"
|
||||
"ldr r14, [r0] \n\t"
|
||||
"adds r8, r14 \n\t"
|
||||
"adcs r11, #0 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"adds r8, r8 \n\t"
|
||||
"adcs r11, r11 \n\t"
|
||||
"adc r12, r12 \n\t"
|
||||
"adds r8, r9 \n\t"
|
||||
"adcs r11, r10 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"mov r10, #0 \n\t"
|
||||
"umull r8, r9, r4, r2 \n\t"
|
||||
"mov r14, r9 \n\t"
|
||||
"umlal r8, r9, r5, r7 \n\t"
|
||||
"cmp r14, r9 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r10, #0 \n\t"
|
||||
"adds r8, r8 \n\t"
|
||||
"adcs r9, r9 \n\t"
|
||||
"adc r10, r10 \n\t"
|
||||
"mov r14, r9 \n\t"
|
||||
"umlal r8, r9, r6, r6 \n\t"
|
||||
"cmp r14, r9 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r10, #0 \n\t"
|
||||
"adds r8, r11 \n\t"
|
||||
"adcs r9, r12 \n\t"
|
||||
"adc r10, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"mov r12, #0 \n\t"
|
||||
"umull r8, r11, r5, r2 \n\t"
|
||||
"mov r14, r11 \n\t"
|
||||
"umlal r8, r11, r6, r7 \n\t"
|
||||
"cmp r14, r11 \n\t"
|
||||
"it hi \n\t"
|
||||
"adchi r12, #0 \n\t"
|
||||
"adds r8, r8 \n\t"
|
||||
"adcs r11, r11 \n\t"
|
||||
"adc r12, r12 \n\t"
|
||||
"adds r8, r9 \n\t"
|
||||
"adcs r11, r10 \n\t"
|
||||
"adc r12, #0 \n\t"
|
||||
"stmia r0!, {r8} \n\t"
|
||||
|
||||
"mov r8, #0 \n\t"
|
||||
"umull r1, r10, r6, r2 \n\t"
|
||||
"adds r1, r1 \n\t"
|
||||
"adcs r10, r10 \n\t"
|
||||
"adc r8, #0 \n\t"
|
||||
"adds r11, r1 \n\t"
|
||||
"adcs r12, r10 \n\t"
|
||||
"adc r8, #0 \n\t"
|
||||
"umull r1, r10, r7, r7 \n\t"
|
||||
"adds r11, r1 \n\t"
|
||||
"adcs r12, r10 \n\t"
|
||||
"adc r8, #0 \n\t"
|
||||
"stmia r0!, {r11} \n\t"
|
||||
|
||||
"mov r11, #0 \n\t"
|
||||
"umull r1, r10, r7, r2 \n\t"
|
||||
"adds r1, r1 \n\t"
|
||||
"adcs r10, r10 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"adds r12, r1 \n\t"
|
||||
"adcs r8, r10 \n\t"
|
||||
"adc r11, #0 \n\t"
|
||||
"stmia r0!, {r12} \n\t"
|
||||
|
||||
"umull r1, r10, r2, r2 \n\t"
|
||||
"adds r8, r1 \n\t"
|
||||
"adcs r11, r10 \n\t"
|
||||
"stmia r0!, {r8, r11} \n\t"
|
||||
#if (uECC_PLATFORM != uECC_arm_thumb2)
|
||||
".syntax divided \n\t"
|
||||
#endif
|
||||
: "+r" (r0), "+r" (r1)
|
||||
:
|
||||
: "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
|
||||
);
|
||||
}
|
||||
#define asm_square 1
|
||||
#endif /* (uECC_WORDS == 7) */
|
||||
|
||||
#if (uECC_WORDS == 8)
|
||||
static void vli_square(uint32_t *result, const uint32_t *left) {
|
||||
register uint32_t *r0 __asm__("r0") = result;
|
||||
|
||||
+4
-4
@@ -26,8 +26,8 @@ ry = [6, 7, 8]
|
||||
emit("add r0, %s", (size - init_size) * 4) # move z
|
||||
emit("add r2, %s", (size - init_size) * 4) # move y
|
||||
|
||||
emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
|
||||
emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
|
||||
emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
|
||||
emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
|
||||
|
||||
print ""
|
||||
if init_size == 1:
|
||||
@@ -84,11 +84,11 @@ emit("sub r0, %s", (2 * init_size + 3) * 4)
|
||||
emit("sub r2, %s", (init_size + 3) * 4)
|
||||
|
||||
#### load y registers
|
||||
emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(3)]))
|
||||
emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(3)]))
|
||||
|
||||
#### load additional x registers
|
||||
if init_size != 3:
|
||||
emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
|
||||
emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
|
||||
print ""
|
||||
|
||||
prev_size = init_size
|
||||
|
||||
+15
-4
@@ -8,7 +8,7 @@ if len(sys.argv) < 2:
|
||||
|
||||
size = int(sys.argv[1])
|
||||
|
||||
if size > 6 and size != 8:
|
||||
if size > 8:
|
||||
print "This script doesn't work with integer size %s due to laziness" % (size)
|
||||
sys.exit(1)
|
||||
|
||||
@@ -37,8 +37,19 @@ r = [2, 3, 4, 5, 6, 7]
|
||||
|
||||
s = size - init_size
|
||||
|
||||
# Note that I just implemented the init_size = 2 case directly
|
||||
if init_size > 0:
|
||||
if init_size == 1:
|
||||
emit("ldmia r1!, {r2}")
|
||||
emit("add r1, %s", (size - init_size * 2) * 4)
|
||||
emit("ldmia r1!, {r5}")
|
||||
|
||||
emit("add r0, %s", (size - init_size) * 4)
|
||||
emit("umull r8, r9, r2, r5")
|
||||
emit("stmia r0!, {r8, r9}")
|
||||
|
||||
emit("sub r0, %s", (size + init_size) * 4)
|
||||
emit("sub r1, %s", (size) * 4)
|
||||
print ""
|
||||
elif init_size == 2:
|
||||
emit("ldmia r1!, {r2, r3}")
|
||||
emit("add r1, %s", (size - init_size * 2) * 4)
|
||||
emit("ldmia r1!, {r5, r6}")
|
||||
@@ -66,7 +77,7 @@ if init_size > 0:
|
||||
emit("sub r1, %s", (size) * 4)
|
||||
|
||||
# load input words
|
||||
emit("ldmia r1!, {%s}", ",".join(["r%s" % (r[i]) for i in xrange(s)]))
|
||||
emit("ldmia r1!, {%s}", ", ".join(["r%s" % (r[i]) for i in xrange(s)]))
|
||||
print ""
|
||||
|
||||
emit("umull r11, r12, r2, r2")
|
||||
|
||||
Reference in New Issue
Block a user