diff --git a/asm_arm.inc b/asm_arm.inc index cd235b8..5f07264 100644 --- a/asm_arm.inc +++ b/asm_arm.inc @@ -3,8 +3,6 @@ #ifndef _UECC_ASM_ARM_H_ #define _UECC_ASM_ARM_H_ -#include "asm_arm_mult_square.inc" - #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) #define uECC_MIN_WORDS 8 #endif @@ -158,6 +156,8 @@ uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, #if (uECC_OPTIMIZATION_LEVEL >= 3) +#include "asm_arm_mult_square.inc" + #define FAST_MULT_ASM_5_TO_6 \ "cmp r3, #5 \n\t" \ "beq 1f \n\t" \ diff --git a/asm_avr.inc b/asm_avr.inc index c99bf82..0d4582f 100644 --- a/asm_avr.inc +++ b/asm_avr.inc @@ -3,6 +3,22 @@ #ifndef _UECC_ASM_AVR_H_ #define _UECC_ASM_AVR_H_ +#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) + #define uECC_MIN_WORDS 32 +#endif +#if uECC_SUPPORTS_secp224r1 + #undef uECC_MIN_WORDS + #define uECC_MIN_WORDS 28 +#endif +#if uECC_SUPPORTS_secp192r1 + #undef uECC_MIN_WORDS + #define uECC_MIN_WORDS 24 +#endif +#if uECC_SUPPORTS_secp160r1 + #undef uECC_MIN_WORDS + #define uECC_MIN_WORDS 20 +#endif + #if __AVR_HAVE_EIJMP_EICALL__ #define IJMP "eijmp \n\t" #else @@ -189,6 +205,64 @@ uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, } #define asm_sub 1 +#if (uECC_OPTIMIZATION_LEVEL >= 3) + +#include "asm_avr_mult_square.inc" + +__attribute((noinline)) +uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + /* num_words should already be in r18. */ + register wordcount_t r18 __asm__("r18") = num_words; + + __asm__ volatile ( + "push r18 \n\t" +#if (uECC_MIN_WORDS == 20) + FAST_MULT_ASM_20 + "pop r18 \n\t" + #if (uECC_MAX_WORDS > 20) + FAST_MULT_ASM_20_TO_24 + #endif + #if (uECC_MAX_WORDS > 24) + FAST_MULT_ASM_24_TO_28 + #endif + #if (uECC_MAX_WORDS > 28) + FAST_MULT_ASM_28_TO_32 + #endif +#elif (uECC_MIN_WORDS == 24) + FAST_MULT_ASM_24 + "pop r18 \n\t" + #if (uECC_MAX_WORDS > 24) + FAST_MULT_ASM_24_TO_28 + #endif + #if (uECC_MAX_WORDS > 28) + FAST_MULT_ASM_28_TO_32 + #endif +#elif (uECC_MIN_WORDS == 28) + FAST_MULT_ASM_28 + "pop r18 \n\t" + #if (uECC_MAX_WORDS > 28) + FAST_MULT_ASM_28_TO_32 + #endif +#elif (uECC_MIN_WORDS == 32) + FAST_MULT_ASM_32 + "pop r18 \n\t" +#endif + "done: \n\t" + "eor r1, r1 \n\t" + : "+x" (left), "+y" (right), "+z" (result) + : "r" (r18) + : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20", + "r21", "r22", "r23", "r24", "r25", "cc", "memory" + ); +} +#define asm_mult 1 + +#endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */ + #if uECC_SUPPORTS_secp160r1 static const struct uECC_Curve_t curve_secp160r1; static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { @@ -704,6 +778,8 @@ static void vli_mmod_fast_secp256r1(uECC_word_t *result, uECC_word_t *product) { #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */ +/* ---- "Small" implementations ---- */ + #if !asm_add uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, const uECC_word_t *left, diff --git a/asm_avr_mult_square.inc b/asm_avr_mult_square.inc index 5581bb4..687c618 100644 --- a/asm_avr_mult_square.inc +++ b/asm_avr_mult_square.inc @@ -3,7 +3,7 @@ #ifndef _UECC_ASM_AVR_MULT_SQUARE_H_ #define _UECC_ASM_AVR_MULT_SQUARE_H_ -#define FAST_MULT_ASM_5 \ +#define FAST_MULT_ASM_20 \ "adiw r30, 10 \n\t" \ "adiw r28, 10 \n\t" \ "ld r2, x+ \n\t" \ @@ -1905,10 +1905,911 @@ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ "st z+, r23 \n\t" \ - "st z+, r24 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r24 \n\t" -#define FAST_MULT_ASM_6 \ +#define FAST_MULT_ASM_20_TO_24 \ + "cpi r18, 20 \n\t" \ + "brne 1f \n\t" \ + "jmp done \n\t" \ + "1: \n\t" \ + "ld r2, x+ \n\t" \ + "ld r6, y+ \n\t" \ + "ld r3, x+ \n\t" \ + "ld r7, y+ \n\t" \ + "ld r4, x+ \n\t" \ + "ld r8, y+ \n\t" \ + "ld r5, x+ \n\t" \ + "ld r9, y+ \n\t" \ + "sbiw r26, 24 \n\t" \ + "sbiw r28, 24 \n\t" \ + "sbiw r30, 20 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + \ + "mul r2, r14 \n\t" \ + "mov r19, r0 \n\t" \ + "mov r20, r1 \n\t" \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r2, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r2, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r2, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "mul r11, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r12, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r13, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "mul r12, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r13, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "mul r13, r9 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "mul r2, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "mul r3, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "mul r4, r9 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "mul r5, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "st z+, r21 \n\t" \ + "st z+, r19 \n\t" \ + "adiw r26, 4 \n\t" \ + "adiw r28, 4 \n\t" + +#define FAST_MULT_ASM_24 \ "adiw r30, 20 \n\t" \ "adiw r28, 20 \n\t" \ "ld r2, x+ \n\t" \ @@ -4677,10 +5578,1071 @@ "add r22, r0 \n\t" \ "adc r23, r1 \n\t" \ "st z+, r22 \n\t" \ - "st z+, r23 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r23 \n\t" -#define FAST_MULT_ASM_7 \ +#define FAST_MULT_ASM_24_TO_28 \ + "cpi r18, 24 \n\t" \ + "brne 1f \n\t" \ + "jmp done \n\t" \ + "1: \n\t" \ + "ld r2, x+ \n\t" \ + "ld r6, y+ \n\t" \ + "ld r3, x+ \n\t" \ + "ld r7, y+ \n\t" \ + "ld r4, x+ \n\t" \ + "ld r8, y+ \n\t" \ + "ld r5, x+ \n\t" \ + "ld r9, y+ \n\t" \ + "sbiw r26, 28 \n\t" \ + "sbiw r28, 28 \n\t" \ + "sbiw r30, 24 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + \ + "mul r2, r14 \n\t" \ + "mov r19, r0 \n\t" \ + "mov r20, r1 \n\t" \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r2, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r2, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r2, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "mul r11, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r12, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r13, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "mul r12, r9 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r13, r8 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "mul r13, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "mul r2, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "mul r3, r9 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "mul r4, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "mul r5, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "st z+, r19 \n\t" \ + "st z+, r20 \n\t" \ + "adiw r26, 4 \n\t" \ + "adiw r28, 4 \n\t" + +#define FAST_MULT_ASM_28 \ "adiw r30, 20 \n\t" \ "adiw r28, 20 \n\t" \ "ld r2, x+ \n\t" \ @@ -8437,10 +10399,1230 @@ "add r24, r0 \n\t" \ "adc r22, r1 \n\t" \ "st z+, r24 \n\t" \ - "st z+, r22 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r22 \n\t" -#define FAST_MULT_ASM_8 \ +#define FAST_MULT_ASM_28_TO_32 \ + "cpi r18, 28 \n\t" \ + "brne 1f \n\t" \ + "jmp done \n\t" \ + "1: \n\t" \ + "ld r2, x+ \n\t" \ + "ld r6, y+ \n\t" \ + "ld r3, x+ \n\t" \ + "ld r7, y+ \n\t" \ + "ld r4, x+ \n\t" \ + "ld r8, y+ \n\t" \ + "ld r5, x+ \n\t" \ + "ld r9, y+ \n\t" \ + "sbiw r26, 32 \n\t" \ + "sbiw r28, 32 \n\t" \ + "sbiw r30, 28 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + \ + "mul r2, r14 \n\t" \ + "mov r19, r0 \n\t" \ + "mov r20, r1 \n\t" \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r2, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r2, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r2, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r10, x+ \n\t" \ + "ld r14, y+ \n\t" \ + "mul r2, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r25 \n\t" \ + "ld r11, x+ \n\t" \ + "ld r15, y+ \n\t" \ + "mul r2, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r6, r11 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r14 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r7, r10 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r8, r13 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r9, r12 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r25 \n\t" \ + "ld r12, x+ \n\t" \ + "ld r16, y+ \n\t" \ + "mul r2, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r6, r12 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r15 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r7, r11 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r14 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r8, r10 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r9, r13 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "ld r0, z \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r25 \n\t" \ + "ld r13, x+ \n\t" \ + "ld r17, y+ \n\t" \ + "mul r2, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r6, r13 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r16 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r7, r12 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r15 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r8, r11 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r14 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r9, r10 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "mul r11, r9 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r12, r8 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r13, r7 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r2, r6 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r17 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r16 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r15 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "mul r12, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r13, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r2, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r3, r6 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r17 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r16 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "mul r13, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r2, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r3, r7 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r4, r6 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r17 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "ldi r19, 0 \n\t" \ + "mul r2, r9 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r3, r8 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r4, r7 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "mul r5, r6 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "adc r19, r25 \n\t" \ + "st z+, r20 \n\t" \ + \ + "ldi r20, 0 \n\t" \ + "mul r3, r9 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r4, r8 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "mul r5, r7 \n\t" \ + "add r21, r0 \n\t" \ + "adc r19, r1 \n\t" \ + "adc r20, r25 \n\t" \ + "st z+, r21 \n\t" \ + \ + "ldi r21, 0 \n\t" \ + "mul r4, r9 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "mul r5, r8 \n\t" \ + "add r19, r0 \n\t" \ + "adc r20, r1 \n\t" \ + "adc r21, r25 \n\t" \ + "st z+, r19 \n\t" \ + \ + "mul r5, r9 \n\t" \ + "add r20, r0 \n\t" \ + "adc r21, r1 \n\t" \ + "st z+, r20 \n\t" \ + "st z+, r21 \n\t" + /* Not necessary to move ptrs since we don't support sizes > 32 */ + +#define FAST_MULT_ASM_32 \ "adiw r30, 30 \n\t" \ "adiw r28, 30 \n\t" \ "ld r2, x+ \n\t" \ @@ -13352,10 +16534,9 @@ "add r23, r0 \n\t" \ "adc r24, r1 \n\t" \ "st z+, r23 \n\t" \ - "st z+, r24 \n\t" \ - "eor r1, r1 \n\t" + "st z+, r24 \n\t" -#define FAST_SQUARE_ASM_5 \ +#define FAST_SQUARE_ASM_20 \ "ld r2, x+ \n\t" \ "ld r3, x+ \n\t" \ "ld r4, x+ \n\t" \ @@ -14510,7 +17691,7 @@ "st z+, r25 \n\t" \ "eor r1, r1 \n\t" -#define FAST_SQUARE_ASM_6 \ +#define FAST_SQUARE_ASM_24 \ "ldi r25, 0 \n\t" \ "movw r28, r26 \n\t" \ "ld r2, x+ \n\t" \ @@ -16157,7 +19338,7 @@ "st z+, r28 \n\t" \ "eor r1, r1 \n\t" -#define FAST_SQUARE_ASM_7 \ +#define FAST_SQUARE_ASM_28 \ "ldi r25, 0 \n\t" \ "movw r28, r26 \n\t" \ "ld r2, x+ \n\t" \ @@ -18360,7 +21541,7 @@ "st z+, r28 \n\t" \ "eor r1, r1 \n\t" -#define FAST_SQUARE_ASM_8 \ +#define FAST_SQUARE_ASM_32 \ "ldi r25, 0 \n\t" \ "movw r28, r26 \n\t" \ "ld r2, x+ \n\t" \ diff --git a/scripts/mult_avr_extra.py b/scripts/mult_avr_extra.py new file mode 100755 index 0000000..f6e654f --- /dev/null +++ b/scripts/mult_avr_extra.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python + +import sys + +if len(sys.argv) < 2: + print "Provide the integer size in bytes" + sys.exit(1) + +size = int(sys.argv[1]) + +def lhi(i): + return i + 2 + +def rhi(i): + return i + 6 + +left_lo = [10, 11, 12, 13] +right_lo = [14, 15, 16, 17] + +def llo(i): + return left_lo[i] + +def rlo(i): + return right_lo[i] + +def emit(line, *args): + s = '"' + line + r' \n\t"' + print s % args + +def update_low(): + global left_lo + global right_lo + left_lo = left_lo[1:] + left_lo[:1] + right_lo = right_lo[1:] + right_lo[:1] + emit("ld r%s, x+", left_lo[3]) + emit("ld r%s, y+", right_lo[3]) + +accum = [19, 20, 21] + +def acc(i): + return accum[i] + +def rotate_acc(): + global accum + accum = accum[1:] + accum[:1] + +# Load high values +for i in xrange(4): + emit("ld r%s, x+", lhi(i)) + emit("ld r%s, y+", rhi(i)) + +emit("sbiw r26, %s", size + 4) +emit("sbiw r28, %s", size + 4) +emit("sbiw r30, %s", size) + +# Load low values +for i in xrange(4): + emit("ld r%s, x+", llo(i)) + emit("ld r%s, y+", rlo(i)) +print "" + +# Compute initial triangles +emit("mul r%s, r%s", lhi(0), rlo(0)) +emit("mov r%s, r0", acc(0)) +emit("mov r%s, r1", acc(1)) +emit("ldi r%s, 0", acc(2)) +emit("ld r0, z") +emit("add r%s, r0", acc(0)) +emit("adc r%s, r25", acc(1)) +emit("mul r%s, r%s", rhi(0), llo(0)) +emit("add r%s, r0", acc(0)) +emit("adc r%s, r1", acc(1)) +emit("adc r%s, r25", acc(2)) +emit("st z+, r%s", acc(0)) +print "" +rotate_acc() + +for i in xrange(1, 4): + emit("ldi r%s, 0", acc(2)) + emit("ld r0, z") + emit("add r%s, r0", acc(0)) + emit("adc r%s, r25", acc(1)) + for j in xrange(i + 1): + emit("mul r%s, r%s", lhi(j), rlo(i-j)) + emit("add r%s, r0", acc(0)) + emit("adc r%s, r1", acc(1)) + emit("adc r%s, r25", acc(2)) + emit("mul r%s, r%s", rhi(j), llo(i-j)) + emit("add r%s, r0", acc(0)) + emit("adc r%s, r1", acc(1)) + emit("adc r%s, r25", acc(2)) + emit("st z+, r%s", acc(0)) + print "" + rotate_acc() + +# Compute rows overlapping old block +for i in xrange(4, size): + emit("ldi r%s, 0", acc(2)) + emit("ld r0, z") + emit("add r%s, r0", acc(0)) + emit("adc r%s, r25", acc(1)) + update_low() + for j in xrange(4): + emit("mul r%s, r%s", lhi(j), rlo(3-j)) + emit("add r%s, r0", acc(0)) + emit("adc r%s, r1", acc(1)) + emit("adc r%s, r25", acc(2)) + emit("mul r%s, r%s", rhi(j), llo(3-j)) + emit("add r%s, r0", acc(0)) + emit("adc r%s, r1", acc(1)) + emit("adc r%s, r25", acc(2)) + emit("st z+, r%s", acc(0)) + print "" + rotate_acc() + +# Compute new triangle +left_combined = [llo(1), llo(2), llo(3), lhi(0), lhi(1), lhi(2), lhi(3)] +right_combined = [rlo(1), rlo(2), rlo(3), rhi(0), rhi(1), rhi(2), rhi(3)] + +def left(i): + return left_combined[i] + +def right(i): + return right_combined[i] + +for i in xrange(6): + emit("ldi r%s, 0", acc(2)) + for j in xrange(7 - i): + emit("mul r%s, r%s", left(i+j), right(6-j)) + emit("add r%s, r0", acc(0)) + emit("adc r%s, r1", acc(1)) + emit("adc r%s, r25", acc(2)) + emit("st z+, r%s", acc(0)) + print "" + rotate_acc() + +emit("mul r%s, r%s", left(6), right(6)) +emit("add r%s, r0", acc(0)) +emit("adc r%s, r1", acc(1)) +emit("st z+, r%s", acc(0)) +emit("st z+, r%s", acc(1)) +emit("adiw r26, 4") +emit("adiw r28, 4")