$OpenBSD: patch-libavcodec_mips_aacpsy_mips_h,v 1.1 2015/12/06 08:51:46 ajacoutot Exp $

AAC encoder: improve SF range utilization

--- libavcodec/mips/aacpsy_mips.h.orig	Wed Jul 22 04:29:21 2015
+++ libavcodec/mips/aacpsy_mips.h	Sat Dec  5 15:01:19 2015
@@ -61,58 +61,62 @@
 #if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
 static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
                                AacPsyChannel *pch, const uint8_t *band_sizes,
-                               const float *coefs)
+                               const float *coefs, const int cutoff)
 {
     int i, w, g;
-    int start = 0;
+    int start = 0, wstart = 0;
     for (w = 0; w < wi->num_windows*16; w += 16) {
+        wstart = 0;
         for (g = 0; g < num_bands; g++) {
             AacPsyBand *band = &pch->band[w+g];
 
             float form_factor = 0.0f;
             float Temp;
             band->energy = 0.0f;
-            for (i = 0; i < band_sizes[g]; i+=4) {
-                float a, b, c, d;
-                float ax, bx, cx, dx;
-                float *cf = (float *)&coefs[start+i];
+            if (wstart < cutoff) {
+                for (i = 0; i < band_sizes[g]; i+=4) {
+                    float a, b, c, d;
+                    float ax, bx, cx, dx;
+                    float *cf = (float *)&coefs[start+i];
 
-                __asm__ volatile (
-                    "lwc1   %[a],   0(%[cf])                \n\t"
-                    "lwc1   %[b],   4(%[cf])                \n\t"
-                    "lwc1   %[c],   8(%[cf])                \n\t"
-                    "lwc1   %[d],   12(%[cf])               \n\t"
-                    "abs.s  %[a],   %[a]                    \n\t"
-                    "abs.s  %[b],   %[b]                    \n\t"
-                    "abs.s  %[c],   %[c]                    \n\t"
-                    "abs.s  %[d],   %[d]                    \n\t"
-                    "sqrt.s %[ax],  %[a]                    \n\t"
-                    "sqrt.s %[bx],  %[b]                    \n\t"
-                    "sqrt.s %[cx],  %[c]                    \n\t"
-                    "sqrt.s %[dx],  %[d]                    \n\t"
-                    "madd.s %[e],   %[e],   %[a],   %[a]    \n\t"
-                    "madd.s %[e],   %[e],   %[b],   %[b]    \n\t"
-                    "madd.s %[e],   %[e],   %[c],   %[c]    \n\t"
-                    "madd.s %[e],   %[e],   %[d],   %[d]    \n\t"
-                    "add.s  %[f],   %[f],   %[ax]           \n\t"
-                    "add.s  %[f],   %[f],   %[bx]           \n\t"
-                    "add.s  %[f],   %[f],   %[cx]           \n\t"
-                    "add.s  %[f],   %[f],   %[dx]           \n\t"
+                    __asm__ volatile (
+                        "lwc1   %[a],   0(%[cf])                \n\t"
+                        "lwc1   %[b],   4(%[cf])                \n\t"
+                        "lwc1   %[c],   8(%[cf])                \n\t"
+                        "lwc1   %[d],   12(%[cf])               \n\t"
+                        "abs.s  %[a],   %[a]                    \n\t"
+                        "abs.s  %[b],   %[b]                    \n\t"
+                        "abs.s  %[c],   %[c]                    \n\t"
+                        "abs.s  %[d],   %[d]                    \n\t"
+                        "sqrt.s %[ax],  %[a]                    \n\t"
+                        "sqrt.s %[bx],  %[b]                    \n\t"
+                        "sqrt.s %[cx],  %[c]                    \n\t"
+                        "sqrt.s %[dx],  %[d]                    \n\t"
+                        "madd.s %[e],   %[e],   %[a],   %[a]    \n\t"
+                        "madd.s %[e],   %[e],   %[b],   %[b]    \n\t"
+                        "madd.s %[e],   %[e],   %[c],   %[c]    \n\t"
+                        "madd.s %[e],   %[e],   %[d],   %[d]    \n\t"
+                        "add.s  %[f],   %[f],   %[ax]           \n\t"
+                        "add.s  %[f],   %[f],   %[bx]           \n\t"
+                        "add.s  %[f],   %[f],   %[cx]           \n\t"
+                        "add.s  %[f],   %[f],   %[dx]           \n\t"
 
-                    : [a]"=&f"(a), [b]"=&f"(b),
-                      [c]"=&f"(c), [d]"=&f"(d),
-                      [e]"+f"(band->energy), [f]"+f"(form_factor),
-                      [ax]"=&f"(ax), [bx]"=&f"(bx),
-                      [cx]"=&f"(cx), [dx]"=&f"(dx)
-                    : [cf]"r"(cf)
-                    : "memory"
-                );
+                        : [a]"=&f"(a), [b]"=&f"(b),
+                          [c]"=&f"(c), [d]"=&f"(d),
+                          [e]"+f"(band->energy), [f]"+f"(form_factor),
+                          [ax]"=&f"(ax), [bx]"=&f"(bx),
+                          [cx]"=&f"(cx), [dx]"=&f"(dx)
+                        : [cf]"r"(cf)
+                        : "memory"
+                    );
+                }
             }
 
             Temp = sqrtf((float)band_sizes[g] / band->energy);
             band->thr      = band->energy * 0.001258925f;
             band->nz_lines = form_factor * sqrtf(Temp);
             start += band_sizes[g];
+            wstart += band_sizes[g];
         }
     }
 }
