more performance tweak

This commit is contained in:
NIIBE Yutaka 2011-05-31 10:17:38 +09:00
parent 09748fc046
commit 8c930952f2
2 changed files with 65 additions and 64 deletions

View File

@ -1,3 +1,8 @@
2011-05-31 NIIBE Yutaka <gniibe@fsij.org>
* include/polarssl/bn_mul.h [__arm__]
(MULADDC_HUIT, MULADDC_INIT, MULADDC_CORE, MULADDC_STOP): Tweak.
2011-05-27 NIIBE Yutaka <gniibe@fsij.org> 2011-05-27 NIIBE Yutaka <gniibe@fsij.org>
* tool/gnuk_put_binary.py (main): Confirm Serial ID is written * tool/gnuk_put_binary.py (main): Confirm Serial ID is written

View File

@ -496,73 +496,69 @@
#if defined(__arm__) #if defined(__arm__)
#define MULADDC_HUIT \ #define MULADDC_HUIT \
asm( "ldmia r0!, { r4, r5 } " ); \ "ldmia %0!, { r4, r5 } \n" \
asm( "ldmia r1, { r8, r9 } " ); \ "ldmia %1, { r8, r9 } \n" \
asm( "umull r6, r7, r3, r4 " ); \ "umull r6, r7, %2, r4 \n" \
asm( "adcs r6, r6, r2 " ); \ "adcs r6, r6, %3 \n" \
asm( "adc r7, r7, #0 " ); \ "adc r7, r7, #0 \n" \
asm( "adds r8, r8, r6 " ); \ "adds r8, r8, r6 \n" \
asm( "umull r6, r2, r3, r5 " ); \ "umull r6, %3, %2, r5 \n" \
asm( "adcs r6, r6, r7 " ); \ "adcs r6, r6, r7 \n" \
asm( "adc r2, r2, #0 " ); \ "adc %3, %3, #0 \n" \
asm( "adds r9, r9, r6 " ); \ "adds r9, r9, r6 \n" \
asm( "stmia r1!, { r8, r9 } " ); \ "stmia %1!, { r8, r9 } \n" \
asm( "ldmia r0!, { r4, r5 } " ); \ "ldmia %0!, { r4, r5 } \n" \
asm( "ldmia r1, { r8, r9 } " ); \ "ldmia %1, { r8, r9 } \n" \
asm( "umull r6, r7, r3, r4 " ); \ "umull r6, r7, %2, r4 \n" \
asm( "adcs r6, r6, r2 " ); \ "adcs r6, r6, %3 \n" \
asm( "adc r7, r7, #0 " ); \ "adc r7, r7, #0 \n" \
asm( "adds r8, r8, r6 " ); \ "adds r8, r8, r6 \n" \
asm( "umull r6, r2, r3, r5 " ); \ "umull r6, %3, %2, r5 \n" \
asm( "adcs r6, r6, r7 " ); \ "adcs r6, r6, r7 \n" \
asm( "adc r2, r2, #0 " ); \ "adc %3, %3, #0 \n" \
asm( "adds r9, r9, r6 " ); \ "adds r9, r9, r6 \n" \
asm( "stmia r1!, { r8, r9 } " ); \ "stmia %1!, { r8, r9 } \n" \
asm( "ldmia r0!, { r4, r5 } " ); \ "ldmia %0!, { r4, r5 } \n" \
asm( "ldmia r1, { r8, r9 } " ); \ "ldmia %1, { r8, r9 } \n" \
asm( "umull r6, r7, r3, r4 " ); \ "umull r6, r7, %2, r4 \n" \
asm( "adcs r6, r6, r2 " ); \ "adcs r6, r6, %3 \n" \
asm( "adc r7, r7, #0 " ); \ "adc r7, r7, #0 \n" \
asm( "adds r8, r8, r6 " ); \ "adds r8, r8, r6 \n" \
asm( "umull r6, r2, r3, r5 " ); \ "umull r6, %3, %2, r5 \n" \
asm( "adcs r6, r6, r7 " ); \ "adcs r6, r6, r7 \n" \
asm( "adc r2, r2, #0 " ); \ "adc %3, %3, #0 \n" \
asm( "adds r9, r9, r6 " ); \ "adds r9, r9, r6 \n" \
asm( "stmia r1!, { r8, r9 } " ); \ "stmia %1!, { r8, r9 } \n" \
asm( "ldmia r0!, { r4, r5 } " ); \ "ldmia %0!, { r4, r5 } \n" \
asm( "ldmia r1, { r8, r9 } " ); \ "ldmia %1, { r8, r9 } \n" \
asm( "umull r6, r7, r3, r4 " ); \ "umull r6, r7, %2, r4 \n" \
asm( "adcs r6, r6, r2 " ); \ "adcs r6, r6, %3 \n" \
asm( "adc r7, r7, #0 " ); \ "adc r7, r7, #0 \n" \
asm( "adds r8, r8, r6 " ); \ "adds r8, r8, r6 \n" \
asm( "umull r6, r2, r3, r5 " ); \ "umull r6, %3, %2, r5 \n" \
asm( "adcs r6, r6, r7 " ); \ "adcs r6, r6, r7 \n" \
asm( "adc r2, r2, #0 " ); \ "adc %3, %3, #0 \n" \
asm( "adds r9, r9, r6 " ); \ "adds r9, r9, r6 \n" \
asm( "stmia r1!, { r8, r9 } " ); "stmia %1!, { r8, r9 } \n"
#define MULADDC_INIT \ #define MULADDC_INIT \
asm( "ldr r0, %0 " :: "m" (s)); \ asm( "adds %0, #0 \n"
asm( "ldr r1, %0 " :: "m" (d)); \
asm( "ldr r2, %0 " :: "m" (c)); \
asm( "ldr r3, %0 " :: "m" (b)); \
asm( "adds r0, #0 ");
#define MULADDC_CORE \ #define MULADDC_CORE \
asm( "ldr r5, [r1] " ); \ "ldr r5, [%1] \n" \
asm( "ldr r4, [r0], #4 " ); \ "ldr r4, [%0], #4 \n" \
asm( "adcs r5, r2, r5 " ); \ "umull r6, r7, %2, r4 \n" \
asm( "mov r2, #0 " ); \ "adcs r6, r6, %3 \n" \
asm( "umlal r5, r2, r3, r4 " ); \ "adc %3, r7, #0 \n" \
asm( "str r5, [r1], #4 " ); "adds r5, r5, r6 \n" \
"str r5, [%1], #4 \n"
#define MULADDC_STOP \ #define MULADDC_STOP \
asm( "adc r2, r2, #0 " ); \ "adc %3, %3, #0 " \
asm( "str r2, %0 " : "=m" (c)); \ : "=r" (s), "=r" (d), "=r" (b), "=r" (c) \
asm( "str r1, %0 " : "=m" (d)); \ : "0" (s), "1" (d), "2" (b), "3" (c) \
asm( "str r0, %0 " : "=m" (s) :: \ : "r4", "r5", "r6", "r7", "r8", "r9", "memory", "cc" );
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9");
#endif /* ARMv3 */ #endif /* ARMv3 */