Index: apps/codecs/libmad/synth.c =================================================================== RCS file: /cvsroot/rockbox/apps/codecs/libmad/synth.c,v retrieving revision 1.8 diff -u -r1.8 synth.c --- apps/codecs/libmad/synth.c 27 Oct 2005 11:39:01 -0000 1.8 +++ apps/codecs/libmad/synth.c 10 Apr 2006 18:11:55 -0000 @@ -121,6 +121,18 @@ : [a] "r" ((x)), [b] "r" ((y))); \ hi; \ }) +# elif 0 && defined(FPM_ARM) +# define OPT_DCTO +# define MUL(x, y) \ + ({ \ + mad_fixed64hi_t hi; \ + mad_fixed64lo_t lo; \ + asm volatile("smull r0, %[hi], %[a], %[b]\n\t" \ + : [hi] "=&r" (hi), [lo] "=&r" (lo) \ + : [a] "r" ((x)), [b] "r" ((y)) \ + : "r0"); \ + hi; \ + }) # elif defined(OPT_SPEED) && defined(MAD_F_MLX) # define OPT_DCTO # define MUL(x, y) \ @@ -769,10 +781,11 @@ unsigned int phase, ch, s, sb, pe, po; mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; mad_fixed_t const (*sbsample)[36][32]; - register mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; - register mad_fixed_t const (*Dptr)[32], *ptr; - register mad_fixed64hi_t hi; - register mad_fixed64lo_t lo; + //mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; + int fe, fx, fo; + mad_fixed_t const (*Dptr)[32], *ptr, *ptr1, *ptr2; + mad_fixed64hi_t hi; + mad_fixed64lo_t lo; for (ch = 0; ch < nch; ++ch) { sbsample = &frame->sbsample[ch]; @@ -789,32 +802,58 @@ /* calculate 32 samples */ + fe = fx = fo = 0; +#if 0 fe = &(*filter)[0][ phase & 1][0]; fx = &(*filter)[0][~phase & 1][0]; fo = &(*filter)[1][~phase & 1][0]; +#endif Dptr = &D[0]; - - ptr = *Dptr + po; - ML0(hi, lo, (*fx)[0], ptr[ 0]); - MLA(hi, lo, (*fx)[1], ptr[14]); - MLA(hi, lo, (*fx)[2], ptr[12]); - MLA(hi, lo, (*fx)[3], ptr[10]); - MLA(hi, lo, (*fx)[4], ptr[ 8]); - MLA(hi, lo, (*fx)[5], ptr[ 6]); - MLA(hi, lo, (*fx)[6], ptr[ 4]); - MLA(hi, lo, (*fx)[7], ptr[ 2]); - MLN(hi, lo); - - ptr = *Dptr + pe; - MLA(hi, lo, (*fe)[0], ptr[ 0]); - MLA(hi, lo, (*fe)[1], ptr[14]); - MLA(hi, lo, (*fe)[2], ptr[12]); - MLA(hi, lo, (*fe)[3], ptr[10]); - MLA(hi, lo, (*fe)[4], ptr[ 8]); - MLA(hi, lo, (*fe)[5], ptr[ 6]); - MLA(hi, lo, (*fe)[6], ptr[ 4]); - MLA(hi, lo, (*fe)[7], ptr[ 2]); + ptr1 = (*filter)[0][~phase & 1][fx]; + ptr2 = (*filter)[0][ phase & 1][fe]; + asm volatile( + "ldmia %2!, {r0, r1, r2, r3}\n\t" + "ldr r6, [%4]\n\t" + "smull %1, %0, r0, r6\n\t" + "ldr r6, [%4, #56]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%4, #48]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%4, #40]\n\t" + "smlal %1, %0, r3, r6\n\t" + "ldmia %2, {r0, r1, r2, r3}\n\t" + "ldr r6, [%4, #32]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%4, #24]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%4, #16]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%4, #8]\n\t" + "smlal %1, %0, r3, r6\n\t" + "rsbs %1, %1, #0\n\t" + "rsc %0, %0, #0\n\t" + "ldmia %3!, {r0, r1, r2, r3}\n\t" + "ldr r6, [%5]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%5, #56]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%5, #48]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%5, #40]\n\t" + "smlal %1, %0, r3, r6\n\t" + "ldmia %3, {r0, r1, r2, r3}\n\t" + "ldr r6, [%5, #32]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%5, #24]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%5, #16]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%5, #8]\n\t" + "smlal %1, %0, r3, r6\n\t" + : "=&r" (hi), "=&r" (lo), "+&r" (ptr1), "+&r" (ptr2) + : "r" (*Dptr + po), "r" (*Dptr + pe) + : "r0", "r1", "r2", "r3", "r4", "r6"); *pcm1++ = SHIFT(MLZ(hi, lo)); @@ -826,29 +865,99 @@ /* D[32 - sb][i] == -D[sb][31 - i] */ - ptr = *Dptr + po; - ML0(hi, lo, (*fo)[0], ptr[ 0]); - MLA(hi, lo, (*fo)[1], ptr[14]); - MLA(hi, lo, (*fo)[2], ptr[12]); - MLA(hi, lo, (*fo)[3], ptr[10]); - MLA(hi, lo, (*fo)[4], ptr[ 8]); - MLA(hi, lo, (*fo)[5], ptr[ 6]); - MLA(hi, lo, (*fo)[6], ptr[ 4]); - MLA(hi, lo, (*fo)[7], ptr[ 2]); - MLN(hi, lo); - - ptr = *Dptr + pe; - MLA(hi, lo, (*fe)[7], ptr[ 2]); - MLA(hi, lo, (*fe)[6], ptr[ 4]); - MLA(hi, lo, (*fe)[5], ptr[ 6]); - MLA(hi, lo, (*fe)[4], ptr[ 8]); - MLA(hi, lo, (*fe)[3], ptr[10]); - MLA(hi, lo, (*fe)[2], ptr[12]); - MLA(hi, lo, (*fe)[1], ptr[14]); - MLA(hi, lo, (*fe)[0], ptr[ 0]); - + ptr1 = (*filter)[1][~phase & 1][fo]; + ptr2 = (*filter)[0][ phase & 1][fe]; + + asm volatile( + "ldmia %2!, {r0, r1, r2, r3}\n\t" + "ldr r6, [%4]\n\t" + "smull %1, %0, r0, r6\n\t" + "ldr r6, [%4, #56]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%4, #48]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%4, #40]\n\t" + "smlal %1, %0, r3, r6\n\t" + "ldmia %2, {r0, r1, r2, r3}\n\t" + "ldr r6, [%4, #32]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%4, #24]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%4, #16]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%4, #8]\n\t" + "smlal %1, %0, r3, r6\n\t" + "rsbs %1, %1, #0\n\t" + "rsc %0, %0, #0\n\t" + "ldmia %3!, {r0, r1, r2, r3}\n\t" + "ldr r6, [%5]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%5, #56]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%5, #48]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%5, #40]\n\t" + "smlal %1, %0, r3, r6\n\t" + "ldmia %3, {r0, r1, r2, r3}\n\t" + "ldr r6, [%5, #32]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%5, #24]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%5, #16]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%5, #8]\n\t" + "smlal %1, %0, r3, r6\n\t" + : "=&r" (hi), "=&r" (lo), "+&r" (ptr1), "+&r" (ptr2) + : "r" (*Dptr + po), "r" (*Dptr + pe) + : "r0", "r1", "r2", "r3", "r6"); *pcm1++ = SHIFT(MLZ(hi, lo)); + ptr1 = (*filter)[0][ phase & 1][fe]; + ptr2 = (*filter)[1][~phase & 1][fo]; + + asm volatile( + "ldmia %2!, {r0, r1, r2, r3}\n\t" + "ldr r6, [%4, #60]\n\t" + "smull %1, %0, r0, r6\n\t" + "ldr r6, [%4, #68]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%4, #76]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%4, #84]\n\t" + "smlal %1, %0, r3, r6\n\t" + "ldmia %2, {r0, r1, r2, r3}\n\t" + "ldr r6, [%4, #92]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%4, #100]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%4, #108]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%4, #116]\n\t" + "smlal %1, %0, r3, r6\n\t" + "ldmia %3!, {r0, r1, r2, r3}\n\t" + "ldr r6, [%5, #60]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%5, #68]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%5, #76]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%5, #84]\n\t" + "smlal %1, %0, r3, r6\n\t" + "ldmia %3, {r0, r1, r2, r3}\n\t" + "ldr r6, [%5, #92]\n\t" + "smlal %1, %0, r0, r6\n\t" + "ldr r6, [%5, #100]\n\t" + "smlal %1, %0, r1, r6\n\t" + "ldr r6, [%5, #108]\n\t" + "smlal %1, %0, r2, r6\n\t" + "ldr r6, [%5, #116]\n\t" + "smlal %1, %0, r3, r6\n\t" + : "=&r" (hi), "=&r" (lo), "+&r" (ptr1), "+&r" (ptr2) + : "r" (*Dptr - pe), "r" (*Dptr - po) + : "r0", "r1", "r2", "r3", "r6"); + *pcm2-- = SHIFT(MLZ(hi, lo)); + +#if 0 ptr = *Dptr - pe; ML0(hi, lo, (*fe)[0], ptr[31 - 16]); MLA(hi, lo, (*fe)[1], ptr[31 - 14]); @@ -870,21 +979,22 @@ MLA(hi, lo, (*fo)[0], ptr[31 - 16]); *pcm2-- = SHIFT(MLZ(hi, lo)); - +#endif ++fo; } ++Dptr; ptr = *Dptr + po; - ML0(hi, lo, (*fo)[0], ptr[ 0]); - MLA(hi, lo, (*fo)[1], ptr[14]); - MLA(hi, lo, (*fo)[2], ptr[12]); - MLA(hi, lo, (*fo)[3], ptr[10]); - MLA(hi, lo, (*fo)[4], ptr[ 8]); - MLA(hi, lo, (*fo)[5], ptr[ 6]); - MLA(hi, lo, (*fo)[6], ptr[ 4]); - MLA(hi, lo, (*fo)[7], ptr[ 2]); + ptr2 = (*filter)[1][~phase & 1][fo]; + ML0(hi, lo, ptr2[0], ptr[ 0]); + MLA(hi, lo, ptr2[1], ptr[14]); + MLA(hi, lo, ptr2[2], ptr[12]); + MLA(hi, lo, ptr2[3], ptr[10]); + MLA(hi, lo, ptr2[4], ptr[ 8]); + MLA(hi, lo, ptr2[5], ptr[ 6]); + MLA(hi, lo, ptr2[6], ptr[ 4]); + MLA(hi, lo, ptr2[7], ptr[ 2]); *pcm1 = SHIFT(-MLZ(hi, lo)); pcm1 += 16;