From 467c49e7cf6fa04771f64b42f52239fc07f8c046 Mon Sep 17 00:00:00 2001 From: Ken MacKay Date: Tue, 12 Jan 2016 21:22:01 -0800 Subject: [PATCH] Add fast square asm for AVR (#50) --- asm_avr.inc | 61 +- asm_avr_mult_square.inc | 2698 +++++++++++++++++++++++++++++++++------ 2 files changed, 2378 insertions(+), 381 deletions(-) diff --git a/asm_avr.inc b/asm_avr.inc index b6c4a41..c988040 100644 --- a/asm_avr.inc +++ b/asm_avr.inc @@ -250,17 +250,70 @@ uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, FAST_MULT_ASM_32 "pop r18 \n\t" #endif - "done: \n\t" + "2: \n\t" "eor r1, r1 \n\t" : "+x" (left), "+y" (right), "+z" (result) : "r" (r18) : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20", - "r21", "r22", "r23", "r24", "r25", "cc", "memory" + "r21", "r22", "r23", "r24", "r25", "cc" ); } #define asm_mult 1 +#if uECC_SQUARE_FUNC +__attribute((noinline)) +uECC_VLI_API void uECC_vli_square(uECC_word_t *result, + const uECC_word_t *left, + wordcount_t num_words) { + /* num_words should already be in r20. */ + register wordcount_t r20 __asm__("r20") = num_words; + + __asm__ volatile ( + "push r20 \n\t" +#if (uECC_MIN_WORDS == 20) + FAST_SQUARE_ASM_20 + "pop r20 \n\t" + #if (uECC_MAX_WORDS > 20) + FAST_SQUARE_ASM_20_TO_24 + #endif + #if (uECC_MAX_WORDS > 24) + FAST_SQUARE_ASM_24_TO_28 + #endif + #if (uECC_MAX_WORDS > 28) + FAST_SQUARE_ASM_28_TO_32 + #endif +#elif (uECC_MIN_WORDS == 24) + FAST_SQUARE_ASM_24 + "pop r20 \n\t" + #if (uECC_MAX_WORDS > 24) + FAST_SQUARE_ASM_24_TO_28 + #endif + #if (uECC_MAX_WORDS > 28) + FAST_SQUARE_ASM_28_TO_32 + #endif +#elif (uECC_MIN_WORDS == 28) + FAST_SQUARE_ASM_28 + "pop r20 \n\t" + #if (uECC_MAX_WORDS > 28) + FAST_SQUARE_ASM_28_TO_32 + #endif +#elif (uECC_MIN_WORDS == 32) + FAST_SQUARE_ASM_32 + "pop r20 \n\t" +#endif + "2: \n\t" + "eor r1, r1 \n\t" + : "+x" (left), "+z" (result) + : "r" (r20) + : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", + "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc" + ); +} +#define asm_square 1 +#endif /* uECC_SQUARE_FUNC */ + #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */ #if uECC_SUPPORTS_secp160r1 @@ -434,7 +487,7 @@ static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { : "+x" (product), [carry] "+r" (carry) : "y" (result) - : "r0", "r18", "r19", "r30", "r31", "cc", "memory" + : "r0", "r18", "r19", "r30", "r31", "cc" ); if (carry > 0) { @@ -759,7 +812,7 @@ static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) { : "+x" (product), [carry] "+r" (carry) : "y" (result) - : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc", "memory" + : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc" ); if (carry > 0) { diff --git a/asm_avr_mult_square.inc b/asm_avr_mult_square.inc index 687c618..7ae08bc 100644 --- a/asm_avr_mult_square.inc +++ b/asm_avr_mult_square.inc @@ -1910,7 +1910,7 @@ #define FAST_MULT_ASM_20_TO_24 \ "cpi r18, 20 \n\t" \ "brne 1f \n\t" \ - "jmp done \n\t" \ + "jmp 2f \n\t" \ "1: \n\t" \ "ld r2, x+ \n\t" \ "ld r6, y+ \n\t" \ @@ -5583,7 +5583,7 @@ #define FAST_MULT_ASM_24_TO_28 \ "cpi r18, 24 \n\t" \ "brne 1f \n\t" \ - "jmp done \n\t" \ + "jmp 2f \n\t" \ "1: \n\t" \ "ld r2, x+ \n\t" \ "ld r6, y+ \n\t" \ @@ -10404,7 +10404,7 @@ #define FAST_MULT_ASM_28_TO_32 \ "cpi r18, 28 \n\t" \ "brne 1f \n\t" \ - "jmp done \n\t" \ + "jmp 2f \n\t" \ "1: \n\t" \ "ld r2, x+ \n\t" \ "ld r6, y+ \n\t" \ @@ -16557,7 +16557,9 @@ "ld r19, x+ \n\t" \ "ld r20, x+ \n\t" \ "ld r21, x+ \n\t" \ - "ldi r27, 0 \n\t" \ + "push r26 \n\t" \ + "push r27 \n\t" \ + "ldi r25, 0 \n\t" \ \ "ldi r23, 0 \n\t" \ "mul r2, r2 \n\t" \ @@ -16568,40 +16570,40 @@ "mul r2, r3 \n\t" \ "lsl r0 \n\t" \ "rol r1 \n\t" \ - "adc r24, r27 \n\t" \ + "adc r24, r25 \n\t" \ "add r22, r0 \n\t" \ "adc r23, r1 \n\t" \ - "adc r24, r27 \n\t" \ + "adc r24, r25 \n\t" \ "st z+, r22 \n\t" \ \ "ldi r22, 0 \n\t" \ "mul r2, r4 \n\t" \ "lsl r0 \n\t" \ "rol r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r3, r3 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r5 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r4 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -16611,37 +16613,37 @@ "mul r3, r5 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r4, r4 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r7 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r6 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r5 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -16651,45 +16653,45 @@ "mul r3, r7 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r4, r6 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r5, r5 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r9 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r8 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r7 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r5, r6 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -16699,53 +16701,53 @@ "mul r3, r9 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r4, r8 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r5, r7 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r6, r6 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r11 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r10 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r9 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r5, r8 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r6, r7 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -16755,61 +16757,61 @@ "mul r3, r11 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r4, r10 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r5, r9 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r6, r8 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r7, r7 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r13 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r12 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r11 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r5, r10 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r6, r9 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r7, r8 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -16819,69 +16821,69 @@ "mul r3, r13 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r4, r12 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r5, r11 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r6, r10 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r7, r9 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r8, r8 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r15 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r14 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r13 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r5, r12 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r6, r11 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r7, r10 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r8, r9 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -16891,77 +16893,77 @@ "mul r3, r15 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r4, r14 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r5, r13 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r6, r12 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r7, r11 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r8, r10 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r9, r9 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r17 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r16 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r15 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r5, r14 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r6, r13 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r7, r12 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r8, r11 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r9, r10 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -16971,85 +16973,85 @@ "mul r3, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r4, r16 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r5, r15 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r6, r14 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r7, r13 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r8, r12 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r9, r11 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r10, r10 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r19 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r17 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r5, r16 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r6, r15 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r7, r14 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r8, r13 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r9, r12 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r10, r11 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17059,93 +17061,93 @@ "mul r3, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r4, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r5, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r6, r16 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r7, r15 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r8, r14 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r9, r13 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r10, r12 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r11, r11 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r2, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r3, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r4, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r5, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r6, r17 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r7, r16 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r8, r15 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r9, r14 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r10, r13 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r11, r12 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17155,89 +17157,89 @@ "mul r4, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r5, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r6, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r7, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r8, r16 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r9, r15 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r10, r14 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r11, r13 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r12, r12 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r4, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r5, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r6, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r7, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r8, r17 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r9, r16 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r10, r15 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r11, r14 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r12, r13 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17247,81 +17249,81 @@ "mul r6, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r7, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r8, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r9, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r10, r16 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r11, r15 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r12, r14 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r13, r13 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r6, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r7, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r8, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r9, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r10, r17 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r11, r16 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r12, r15 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r13, r14 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17331,73 +17333,73 @@ "mul r8, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r9, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r10, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r11, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r12, r16 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r13, r15 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r14, r14 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r8, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r9, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r10, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r11, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r12, r17 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r13, r16 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r14, r15 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17407,65 +17409,65 @@ "mul r10, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r11, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r12, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r13, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r14, r16 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r15, r15 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r10, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r11, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r12, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r13, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r14, r17 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r15, r16 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17475,57 +17477,57 @@ "mul r12, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r13, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r14, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r15, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r16, r16 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r12, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r13, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r14, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r15, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r16, r17 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17535,49 +17537,49 @@ "mul r14, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r15, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r16, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r17, r17 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r14, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r15, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r16, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r17, r18 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17587,41 +17589,41 @@ "mul r16, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "mul r17, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r18, r18 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r16, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r17, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "mul r18, r19 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r22, 0 \n\t" \ @@ -17631,65 +17633,622 @@ "mul r18, r20 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "lsl r23 \n\t" \ "rol r24 \n\t" \ "rol r22 \n\t" \ "mul r19, r19 \n\t" \ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ - "adc r22, r27 \n\t" \ - "add r23, r25 \n\t" \ + "adc r22, r25 \n\t" \ + "add r23, r27 \n\t" \ "adc r24, r26 \n\t" \ - "adc r22, r27 \n\t" \ + "adc r22, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r26, 0 \n\t" \ "mul r18, r21 \n\t" \ "mov r23, r0 \n\t" \ - "mov r25, r1 \n\t" \ + "mov r27, r1 \n\t" \ "mul r19, r20 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r1 \n\t" \ + "adc r26, r25 \n\t" \ "lsl r23 \n\t" \ - "rol r25 \n\t" \ + "rol r27 \n\t" \ "rol r26 \n\t" \ "add r23, r24 \n\t" \ - "adc r25, r22 \n\t" \ - "adc r26, r27 \n\t" \ + "adc r27, r22 \n\t" \ + "adc r26, r25 \n\t" \ "st z+, r23 \n\t" \ \ "ldi r23, 0 \n\t" \ "mul r19, r21 \n\t" \ "lsl r0 \n\t" \ "rol r1 \n\t" \ - "adc r23, r27 \n\t" \ - "add r25, r0 \n\t" \ + "adc r23, r25 \n\t" \ + "add r27, r0 \n\t" \ "adc r26, r1 \n\t" \ - "adc r23, r27 \n\t" \ + "adc r23, r25 \n\t" \ "mul r20, r20 \n\t" \ - "add r25, r0 \n\t" \ + "add r27, r0 \n\t" \ "adc r26, r1 \n\t" \ - "adc r23, r27 \n\t" \ - "st z+, r25 \n\t" \ + "adc r23, r25 \n\t" \ + "st z+, r27 \n\t" \ \ - "ldi r25, 0 \n\t" \ + "ldi r27, 0 \n\t" \ "mul r20, r21 \n\t" \ "lsl r0 \n\t" \ "rol r1 \n\t" \ - "adc r25, r27 \n\t" \ + "adc r27, r25 \n\t" \ "add r26, r0 \n\t" \ "adc r23, r1 \n\t" \ - "adc r25, r27 \n\t" \ + "adc r27, r25 \n\t" \ "st z+, r26 \n\t" \ \ "mul r21, r21 \n\t" \ "add r23, r0 \n\t" \ - "adc r25, r1 \n\t" \ + "adc r27, r1 \n\t" \ "st z+, r23 \n\t" \ - "st z+, r25 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r27 \n\t" \ + "pop r27 \n\t" \ + "pop r26 \n\t" + +#define FAST_SQUARE_ASM_20_TO_24 \ + "cpi r20, 20 \n\t" \ + "brne 1f \n\t" \ + "jmp 2f \n\t" \ + "1: \n\t" \ + "ld r2, x+ \n\t" \ + "ld r3, x+ \n\t" \ + "ld r4, x+ \n\t" \ + "ld r5, x+ \n\t" \ + "sbiw r26, 24 \n\t" \ + "sbiw r30, 20 \n\t" \ + "ld r6, x+ \n\t" \ + "ld r7, x+ \n\t" \ + "ld r8, x+ \n\t" \ + "ld r9, x+ \n\t" \ + \ + "mul r2, r6 \n\t" \ + "mov r10, r0 \n\t" \ + "mov r11, r1 \n\t" \ + "mov r12, r25 \n\t" \ + "mov r13, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + \ + "mov r14, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + \ + "mov r15, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r16, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r17, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r18, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r19, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r21, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r22, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r23, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r24, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r28, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r29, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + \ + "lsl r10 \n\t" \ + "rol r11 \n\t" \ + "rol r12 \n\t" \ + "rol r13 \n\t" \ + "rol r14 \n\t" \ + "rol r15 \n\t" \ + "rol r16 \n\t" \ + "rol r17 \n\t" \ + "rol r18 \n\t" \ + "rol r19 \n\t" \ + "rol r21 \n\t" \ + "rol r22 \n\t" \ + "rol r23 \n\t" \ + "rol r24 \n\t" \ + "rol r28 \n\t" \ + "rol r29 \n\t" \ + "ld r0, z \n\t" \ + "add r10, r0 \n\t" \ + "st z+, r10 \n\t" \ + "ld r0, z \n\t" \ + "adc r11, r0 \n\t" \ + "st z+, r11 \n\t" \ + "ld r0, z \n\t" \ + "adc r12, r0 \n\t" \ + "st z+, r12 \n\t" \ + "ld r0, z \n\t" \ + "adc r13, r0 \n\t" \ + "st z+, r13 \n\t" \ + "ld r0, z \n\t" \ + "adc r14, r0 \n\t" \ + "st z+, r14 \n\t" \ + "ld r0, z \n\t" \ + "adc r15, r0 \n\t" \ + "st z+, r15 \n\t" \ + "ld r0, z \n\t" \ + "adc r16, r0 \n\t" \ + "st z+, r16 \n\t" \ + "ld r0, z \n\t" \ + "adc r17, r0 \n\t" \ + "st z+, r17 \n\t" \ + "ld r0, z \n\t" \ + "adc r18, r0 \n\t" \ + "st z+, r18 \n\t" \ + "ld r0, z \n\t" \ + "adc r19, r0 \n\t" \ + "st z+, r19 \n\t" \ + "ld r0, z \n\t" \ + "adc r21, r0 \n\t" \ + "st z+, r21 \n\t" \ + "ld r0, z \n\t" \ + "adc r22, r0 \n\t" \ + "st z+, r22 \n\t" \ + "ld r0, z \n\t" \ + "adc r23, r0 \n\t" \ + "st z+, r23 \n\t" \ + "ld r0, z \n\t" \ + "adc r24, r0 \n\t" \ + "st z+, r24 \n\t" \ + "adc r28, r25 \n\t" \ + "adc r29, r25 \n\t" \ + "bst r28, 0 \n\t" \ + "lsr r29 \n\t" \ + "ror r28 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r10, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r11, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r12, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r13, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r14, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r15, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "lsl r28 \n\t" \ + "bld r28, 0 \n\t" \ + "rol r29 \n\t" \ + "rol r10 \n\t" \ + "rol r11 \n\t" \ + "rol r12 \n\t" \ + "rol r13 \n\t" \ + "rol r14 \n\t" \ + "rol r15 \n\t" \ + "ld r0, z \n\t" \ + "add r28, r0 \n\t" \ + "st z+, r28 \n\t" \ + "ld r0, z \n\t" \ + "adc r29, r0 \n\t" \ + "st z+, r29 \n\t" \ + "ld r0, z \n\t" \ + "adc r10, r0 \n\t" \ + "st z+, r10 \n\t" \ + "ld r0, z \n\t" \ + "adc r11, r0 \n\t" \ + "st z+, r11 \n\t" \ + "ld r0, z \n\t" \ + "adc r12, r0 \n\t" \ + "st z+, r12 \n\t" \ + "ld r0, z \n\t" \ + "adc r13, r0 \n\t" \ + "st z+, r13 \n\t" \ + "adc r14, r25 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "mul r2, r2 \n\t" \ + "mov r16, r0 \n\t" \ + "mov r17, r1 \n\t" \ + "mul r3, r3 \n\t" \ + "mov r18, r0 \n\t" \ + "mov r19, r1 \n\t" \ + "mul r4, r4 \n\t" \ + "mov r21, r0 \n\t" \ + "mov r22, r1 \n\t" \ + "mul r5, r5 \n\t" \ + "mov r23, r0 \n\t" \ + "mov r24, r1 \n\t" \ + "add r16, r14 \n\t" \ + "adc r17, r15 \n\t" \ + "adc r18, r25 \n\t" \ + "adc r19, r25 \n\t" \ + \ + "mul r7, r5 \n\t" \ + "mov r14, r0 \n\t" \ + "mov r15, r1 \n\t" \ + "mov r28, r25 \n\t" \ + "mul r8, r4 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r9, r3 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mov r29, r25 \n\t" \ + "mul r8, r5 \n\t" \ + "add r15, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r9, r4 \n\t" \ + "add r15, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r2, r3 \n\t" \ + "add r15, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mov r10, r25 \n\t" \ + "mul r9, r5 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r2, r4 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mov r11, r25 \n\t" \ + "mul r2, r5 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r3, r4 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mov r12, r25 \n\t" \ + "mul r3, r5 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r4, r5 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + \ + "lsl r14 \n\t" \ + "rol r15 \n\t" \ + "rol r28 \n\t" \ + "rol r29 \n\t" \ + "rol r10 \n\t" \ + "rol r11 \n\t" \ + "rol r12 \n\t" \ + "adc r24, r25 \n\t" \ + "add r16, r14 \n\t" \ + "adc r17, r15 \n\t" \ + "adc r18, r28 \n\t" \ + "adc r19, r29 \n\t" \ + "adc r21, r10 \n\t" \ + "adc r22, r11 \n\t" \ + "adc r23, r12 \n\t" \ + "adc r24, r25 \n\t" \ + \ + "st z+, r16 \n\t" \ + "st z+, r17 \n\t" \ + "st z+, r18 \n\t" \ + "st z+, r19 \n\t" \ + "st z+, r21 \n\t" \ + "st z+, r22 \n\t" \ + "st z+, r23 \n\t" \ + "st z+, r24 \n\t" \ + "adiw r26, 4 \n\t" #define FAST_SQUARE_ASM_24 \ "ldi r25, 0 \n\t" \ @@ -19335,8 +19894,655 @@ "add r23, r0 \n\t" \ "adc r28, r1 \n\t" \ "st z+, r23 \n\t" \ - "st z+, r28 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r28 \n\t" + +#define FAST_SQUARE_ASM_24_TO_28 \ + "cpi r20, 24 \n\t" \ + "brne 1f \n\t" \ + "jmp 2f \n\t" \ + "1: \n\t" \ + "ld r2, x+ \n\t" \ + "ld r3, x+ \n\t" \ + "ld r4, x+ \n\t" \ + "ld r5, x+ \n\t" \ + "sbiw r26, 28 \n\t" \ + "sbiw r30, 24 \n\t" \ + "ld r6, x+ \n\t" \ + "ld r7, x+ \n\t" \ + "ld r8, x+ \n\t" \ + "ld r9, x+ \n\t" \ + \ + "mul r2, r6 \n\t" \ + "mov r10, r0 \n\t" \ + "mov r11, r1 \n\t" \ + "mov r12, r25 \n\t" \ + "mov r13, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + \ + "mov r14, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + \ + "mov r15, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r16, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r17, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r18, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r19, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r21, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r22, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r23, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r24, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r28, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r29, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + \ + "lsl r10 \n\t" \ + "rol r11 \n\t" \ + "rol r12 \n\t" \ + "rol r13 \n\t" \ + "rol r14 \n\t" \ + "rol r15 \n\t" \ + "rol r16 \n\t" \ + "rol r17 \n\t" \ + "rol r18 \n\t" \ + "rol r19 \n\t" \ + "rol r21 \n\t" \ + "rol r22 \n\t" \ + "rol r23 \n\t" \ + "rol r24 \n\t" \ + "rol r28 \n\t" \ + "rol r29 \n\t" \ + "ld r0, z \n\t" \ + "add r10, r0 \n\t" \ + "st z+, r10 \n\t" \ + "ld r0, z \n\t" \ + "adc r11, r0 \n\t" \ + "st z+, r11 \n\t" \ + "ld r0, z \n\t" \ + "adc r12, r0 \n\t" \ + "st z+, r12 \n\t" \ + "ld r0, z \n\t" \ + "adc r13, r0 \n\t" \ + "st z+, r13 \n\t" \ + "ld r0, z \n\t" \ + "adc r14, r0 \n\t" \ + "st z+, r14 \n\t" \ + "ld r0, z \n\t" \ + "adc r15, r0 \n\t" \ + "st z+, r15 \n\t" \ + "ld r0, z \n\t" \ + "adc r16, r0 \n\t" \ + "st z+, r16 \n\t" \ + "ld r0, z \n\t" \ + "adc r17, r0 \n\t" \ + "st z+, r17 \n\t" \ + "ld r0, z \n\t" \ + "adc r18, r0 \n\t" \ + "st z+, r18 \n\t" \ + "ld r0, z \n\t" \ + "adc r19, r0 \n\t" \ + "st z+, r19 \n\t" \ + "ld r0, z \n\t" \ + "adc r21, r0 \n\t" \ + "st z+, r21 \n\t" \ + "ld r0, z \n\t" \ + "adc r22, r0 \n\t" \ + "st z+, r22 \n\t" \ + "ld r0, z \n\t" \ + "adc r23, r0 \n\t" \ + "st z+, r23 \n\t" \ + "ld r0, z \n\t" \ + "adc r24, r0 \n\t" \ + "st z+, r24 \n\t" \ + "adc r28, r25 \n\t" \ + "adc r29, r25 \n\t" \ + "bst r28, 0 \n\t" \ + "lsr r29 \n\t" \ + "ror r28 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r10, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r11, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r12, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r13, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r14, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r15, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r16, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r17, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r18, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r19, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + \ + "lsl r28 \n\t" \ + "bld r28, 0 \n\t" \ + "rol r29 \n\t" \ + "rol r10 \n\t" \ + "rol r11 \n\t" \ + "rol r12 \n\t" \ + "rol r13 \n\t" \ + "rol r14 \n\t" \ + "rol r15 \n\t" \ + "rol r16 \n\t" \ + "rol r17 \n\t" \ + "rol r18 \n\t" \ + "rol r19 \n\t" \ + "ld r0, z \n\t" \ + "add r28, r0 \n\t" \ + "st z+, r28 \n\t" \ + "ld r0, z \n\t" \ + "adc r29, r0 \n\t" \ + "st z+, r29 \n\t" \ + "ld r0, z \n\t" \ + "adc r10, r0 \n\t" \ + "st z+, r10 \n\t" \ + "ld r0, z \n\t" \ + "adc r11, r0 \n\t" \ + "st z+, r11 \n\t" \ + "ld r0, z \n\t" \ + "adc r12, r0 \n\t" \ + "st z+, r12 \n\t" \ + "ld r0, z \n\t" \ + "adc r13, r0 \n\t" \ + "st z+, r13 \n\t" \ + "ld r0, z \n\t" \ + "adc r14, r0 \n\t" \ + "st z+, r14 \n\t" \ + "ld r0, z \n\t" \ + "adc r15, r0 \n\t" \ + "st z+, r15 \n\t" \ + "ld r0, z \n\t" \ + "adc r16, r0 \n\t" \ + "st z+, r16 \n\t" \ + "ld r0, z \n\t" \ + "adc r17, r0 \n\t" \ + "st z+, r17 \n\t" \ + "adc r18, r25 \n\t" \ + "adc r19, r25 \n\t" \ + \ + "mul r2, r2 \n\t" \ + "mov r21, r0 \n\t" \ + "mov r22, r1 \n\t" \ + "mul r3, r3 \n\t" \ + "mov r23, r0 \n\t" \ + "mov r24, r1 \n\t" \ + "mul r4, r4 \n\t" \ + "mov r28, r0 \n\t" \ + "mov r29, r1 \n\t" \ + "mul r5, r5 \n\t" \ + "mov r10, r0 \n\t" \ + "mov r11, r1 \n\t" \ + "add r21, r18 \n\t" \ + "adc r22, r19 \n\t" \ + "adc r23, r25 \n\t" \ + "adc r24, r25 \n\t" \ + \ + "mul r7, r5 \n\t" \ + "mov r18, r0 \n\t" \ + "mov r19, r1 \n\t" \ + "mov r12, r25 \n\t" \ + "mul r8, r4 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r9, r3 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mov r13, r25 \n\t" \ + "mul r8, r5 \n\t" \ + "add r19, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r9, r4 \n\t" \ + "add r19, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r2, r3 \n\t" \ + "add r19, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mov r14, r25 \n\t" \ + "mul r9, r5 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r2, r4 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mov r15, r25 \n\t" \ + "mul r2, r5 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r3, r4 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mov r16, r25 \n\t" \ + "mul r3, r5 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r4, r5 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + \ + "lsl r18 \n\t" \ + "rol r19 \n\t" \ + "rol r12 \n\t" \ + "rol r13 \n\t" \ + "rol r14 \n\t" \ + "rol r15 \n\t" \ + "rol r16 \n\t" \ + "adc r11, r25 \n\t" \ + "add r21, r18 \n\t" \ + "adc r22, r19 \n\t" \ + "adc r23, r12 \n\t" \ + "adc r24, r13 \n\t" \ + "adc r28, r14 \n\t" \ + "adc r29, r15 \n\t" \ + "adc r10, r16 \n\t" \ + "adc r11, r25 \n\t" \ + \ + "st z+, r21 \n\t" \ + "st z+, r22 \n\t" \ + "st z+, r23 \n\t" \ + "st z+, r24 \n\t" \ + "st z+, r28 \n\t" \ + "st z+, r29 \n\t" \ + "st z+, r10 \n\t" \ + "st z+, r11 \n\t" \ + "adiw r26, 4 \n\t" #define FAST_SQUARE_ASM_28 \ "ldi r25, 0 \n\t" \ @@ -21538,8 +22744,747 @@ "add r23, r0 \n\t" \ "adc r28, r1 \n\t" \ "st z+, r23 \n\t" \ - "st z+, r28 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r28 \n\t" + +#define FAST_SQUARE_ASM_28_TO_32 \ + "cpi r20, 28 \n\t" \ + "brne 1f \n\t" \ + "jmp 2f \n\t" \ + "1: \n\t" \ + "ld r2, x+ \n\t" \ + "ld r3, x+ \n\t" \ + "ld r4, x+ \n\t" \ + "ld r5, x+ \n\t" \ + "sbiw r26, 32 \n\t" \ + "sbiw r30, 28 \n\t" \ + "ld r6, x+ \n\t" \ + "ld r7, x+ \n\t" \ + "ld r8, x+ \n\t" \ + "ld r9, x+ \n\t" \ + \ + "mul r2, r6 \n\t" \ + "mov r10, r0 \n\t" \ + "mov r11, r1 \n\t" \ + "mov r12, r25 \n\t" \ + "mov r13, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + \ + "mov r14, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + \ + "mov r15, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r16, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r17, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r18, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r19, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r21, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r22, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r23, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r24, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r28, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r28, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r29, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r24, r0 \n\t" \ + "adc r28, r1 \n\t" \ + "adc r29, r25 \n\t" \ + \ + "lsl r10 \n\t" \ + "rol r11 \n\t" \ + "rol r12 \n\t" \ + "rol r13 \n\t" \ + "rol r14 \n\t" \ + "rol r15 \n\t" \ + "rol r16 \n\t" \ + "rol r17 \n\t" \ + "rol r18 \n\t" \ + "rol r19 \n\t" \ + "rol r21 \n\t" \ + "rol r22 \n\t" \ + "rol r23 \n\t" \ + "rol r24 \n\t" \ + "rol r28 \n\t" \ + "rol r29 \n\t" \ + "ld r0, z \n\t" \ + "add r10, r0 \n\t" \ + "st z+, r10 \n\t" \ + "ld r0, z \n\t" \ + "adc r11, r0 \n\t" \ + "st z+, r11 \n\t" \ + "ld r0, z \n\t" \ + "adc r12, r0 \n\t" \ + "st z+, r12 \n\t" \ + "ld r0, z \n\t" \ + "adc r13, r0 \n\t" \ + "st z+, r13 \n\t" \ + "ld r0, z \n\t" \ + "adc r14, r0 \n\t" \ + "st z+, r14 \n\t" \ + "ld r0, z \n\t" \ + "adc r15, r0 \n\t" \ + "st z+, r15 \n\t" \ + "ld r0, z \n\t" \ + "adc r16, r0 \n\t" \ + "st z+, r16 \n\t" \ + "ld r0, z \n\t" \ + "adc r17, r0 \n\t" \ + "st z+, r17 \n\t" \ + "ld r0, z \n\t" \ + "adc r18, r0 \n\t" \ + "st z+, r18 \n\t" \ + "ld r0, z \n\t" \ + "adc r19, r0 \n\t" \ + "st z+, r19 \n\t" \ + "ld r0, z \n\t" \ + "adc r21, r0 \n\t" \ + "st z+, r21 \n\t" \ + "ld r0, z \n\t" \ + "adc r22, r0 \n\t" \ + "st z+, r22 \n\t" \ + "ld r0, z \n\t" \ + "adc r23, r0 \n\t" \ + "st z+, r23 \n\t" \ + "ld r0, z \n\t" \ + "adc r24, r0 \n\t" \ + "st z+, r24 \n\t" \ + "adc r28, r25 \n\t" \ + "adc r29, r25 \n\t" \ + "bst r28, 0 \n\t" \ + "lsr r29 \n\t" \ + "ror r28 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r10, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r28, r0 \n\t" \ + "adc r29, r1 \n\t" \ + "adc r10, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r11, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r29, r0 \n\t" \ + "adc r10, r1 \n\t" \ + "adc r11, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r12, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r10, r0 \n\t" \ + "adc r11, r1 \n\t" \ + "adc r12, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r13, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r11, r0 \n\t" \ + "adc r12, r1 \n\t" \ + "adc r13, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r14, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r12, r0 \n\t" \ + "adc r13, r1 \n\t" \ + "adc r14, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r15, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r13, r0 \n\t" \ + "adc r14, r1 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r16, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r14, r0 \n\t" \ + "adc r15, r1 \n\t" \ + "adc r16, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r17, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r15, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r18, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r19, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + \ + "ld r6, x+ \n\t" \ + "mov r21, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r9 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + \ + "ld r7, x+ \n\t" \ + "mov r22, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r4, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r22, r25 \n\t" \ + \ + "ld r8, x+ \n\t" \ + "mov r23, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + "mul r5, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r22, r1 \n\t" \ + "adc r23, r25 \n\t" \ + \ + "ld r9, x+ \n\t" \ + "mov r24, r25 \n\t" \ + "mul r2, r9 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r22, r0 \n\t" \ + "adc r23, r1 \n\t" \ + "adc r24, r25 \n\t" \ + \ + "lsl r28 \n\t" \ + "bld r28, 0 \n\t" \ + "rol r29 \n\t" \ + "rol r10 \n\t" \ + "rol r11 \n\t" \ + "rol r12 \n\t" \ + "rol r13 \n\t" \ + "rol r14 \n\t" \ + "rol r15 \n\t" \ + "rol r16 \n\t" \ + "rol r17 \n\t" \ + "rol r18 \n\t" \ + "rol r19 \n\t" \ + "rol r21 \n\t" \ + "rol r22 \n\t" \ + "rol r23 \n\t" \ + "rol r24 \n\t" \ + "ld r0, z \n\t" \ + "add r28, r0 \n\t" \ + "st z+, r28 \n\t" \ + "ld r0, z \n\t" \ + "adc r29, r0 \n\t" \ + "st z+, r29 \n\t" \ + "ld r0, z \n\t" \ + "adc r10, r0 \n\t" \ + "st z+, r10 \n\t" \ + "ld r0, z \n\t" \ + "adc r11, r0 \n\t" \ + "st z+, r11 \n\t" \ + "ld r0, z \n\t" \ + "adc r12, r0 \n\t" \ + "st z+, r12 \n\t" \ + "ld r0, z \n\t" \ + "adc r13, r0 \n\t" \ + "st z+, r13 \n\t" \ + "ld r0, z \n\t" \ + "adc r14, r0 \n\t" \ + "st z+, r14 \n\t" \ + "ld r0, z \n\t" \ + "adc r15, r0 \n\t" \ + "st z+, r15 \n\t" \ + "ld r0, z \n\t" \ + "adc r16, r0 \n\t" \ + "st z+, r16 \n\t" \ + "ld r0, z \n\t" \ + "adc r17, r0 \n\t" \ + "st z+, r17 \n\t" \ + "ld r0, z \n\t" \ + "adc r18, r0 \n\t" \ + "st z+, r18 \n\t" \ + "ld r0, z \n\t" \ + "adc r19, r0 \n\t" \ + "st z+, r19 \n\t" \ + "ld r0, z \n\t" \ + "adc r21, r0 \n\t" \ + "st z+, r21 \n\t" \ + "ld r0, z \n\t" \ + "adc r22, r0 \n\t" \ + "st z+, r22 \n\t" \ + "adc r23, r25 \n\t" \ + "adc r24, r25 \n\t" \ + \ + "mul r2, r2 \n\t" \ + "mov r28, r0 \n\t" \ + "mov r29, r1 \n\t" \ + "mul r3, r3 \n\t" \ + "mov r10, r0 \n\t" \ + "mov r11, r1 \n\t" \ + "mul r4, r4 \n\t" \ + "mov r12, r0 \n\t" \ + "mov r13, r1 \n\t" \ + "mul r5, r5 \n\t" \ + "mov r14, r0 \n\t" \ + "mov r15, r1 \n\t" \ + "add r28, r23 \n\t" \ + "adc r29, r24 \n\t" \ + "adc r10, r25 \n\t" \ + "adc r11, r25 \n\t" \ + \ + "mul r7, r5 \n\t" \ + "mov r23, r0 \n\t" \ + "mov r24, r1 \n\t" \ + "mov r16, r25 \n\t" \ + "mul r8, r4 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mul r9, r3 \n\t" \ + "add r23, r0 \n\t" \ + "adc r24, r1 \n\t" \ + "adc r16, r25 \n\t" \ + "mov r17, r25 \n\t" \ + "mul r8, r5 \n\t" \ + "add r24, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r9, r4 \n\t" \ + "add r24, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mul r2, r3 \n\t" \ + "add r24, r0 \n\t" \ + "adc r16, r1 \n\t" \ + "adc r17, r25 \n\t" \ + "mov r18, r25 \n\t" \ + "mul r9, r5 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mul r2, r4 \n\t" \ + "add r16, r0 \n\t" \ + "adc r17, r1 \n\t" \ + "adc r18, r25 \n\t" \ + "mov r19, r25 \n\t" \ + "mul r2, r5 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r4 \n\t" \ + "add r17, r0 \n\t" \ + "adc r18, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mov r21, r25 \n\t" \ + "mul r3, r5 \n\t" \ + "add r18, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r5 \n\t" \ + "add r19, r0 \n\t" \ + "adc r21, r1 \n\t" \ + \ + "lsl r23 \n\t" \ + "rol r24 \n\t" \ + "rol r16 \n\t" \ + "rol r17 \n\t" \ + "rol r18 \n\t" \ + "rol r19 \n\t" \ + "rol r21 \n\t" \ + "adc r15, r25 \n\t" \ + "add r28, r23 \n\t" \ + "adc r29, r24 \n\t" \ + "adc r10, r16 \n\t" \ + "adc r11, r17 \n\t" \ + "adc r12, r18 \n\t" \ + "adc r13, r19 \n\t" \ + "adc r14, r21 \n\t" \ + "adc r15, r25 \n\t" \ + \ + "st z+, r28 \n\t" \ + "st z+, r29 \n\t" \ + "st z+, r10 \n\t" \ + "st z+, r11 \n\t" \ + "st z+, r12 \n\t" \ + "st z+, r13 \n\t" \ + "st z+, r14 \n\t" \ + "st z+, r15 \n\t" \ + "adiw r26, 4 \n\t" #define FAST_SQUARE_ASM_32 \ "ldi r25, 0 \n\t" \ @@ -24361,7 +26306,6 @@ "add r23, r0 \n\t" \ "adc r28, r1 \n\t" \ "st z+, r23 \n\t" \ - "st z+, r28 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r28 \n\t" #endif /* _UECC_ASM_AVR_MULT_SQUARE_H_ */