diff libavcodec/arm/dsputil_armv6.S @ 2:897f711a7157

rearrange to work with autoconf
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 25 Sep 2012 15:55:33 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libavcodec/arm/dsputil_armv6.S	Tue Sep 25 15:55:33 2012 +0200
     1.3 @@ -0,0 +1,623 @@
     1.4 +/*
     1.5 + * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
     1.6 + *
     1.7 + * This file is part of FFmpeg.
     1.8 + *
     1.9 + * FFmpeg is free software; you can redistribute it and/or
    1.10 + * modify it under the terms of the GNU Lesser General Public
    1.11 + * License as published by the Free Software Foundation; either
    1.12 + * version 2.1 of the License, or (at your option) any later version.
    1.13 + *
    1.14 + * FFmpeg is distributed in the hope that it will be useful,
    1.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.17 + * Lesser General Public License for more details.
    1.18 + *
    1.19 + * You should have received a copy of the GNU Lesser General Public
    1.20 + * License along with FFmpeg; if not, write to the Free Software
    1.21 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    1.22 + */
    1.23 +
    1.24 +#include "asm.S"
    1.25 +
    1.26 +        preserve8
    1.27 +
    1.28 +        .text
    1.29 +
    1.30 +.macro  call_2x_pixels  type, subp
    1.31 +function ff_\type\()_pixels16\subp\()_armv6, export=1
    1.32 +        push            {r0-r3, lr}
    1.33 +        bl              ff_\type\()_pixels8\subp\()_armv6
    1.34 +        pop             {r0-r3, lr}
    1.35 +        add             r0,  r0,  #8
    1.36 +        add             r1,  r1,  #8
    1.37 +        b               ff_\type\()_pixels8\subp\()_armv6
    1.38 +endfunc
    1.39 +.endm
    1.40 +
    1.41 +call_2x_pixels          avg
    1.42 +call_2x_pixels          put, _x2
    1.43 +call_2x_pixels          put, _y2
    1.44 +call_2x_pixels          put, _x2_no_rnd
    1.45 +call_2x_pixels          put, _y2_no_rnd
    1.46 +
    1.47 +function ff_put_pixels16_armv6, export=1
    1.48 +        push            {r4-r11}
    1.49 +1:
    1.50 +        ldr             r5,  [r1, #4]
    1.51 +        ldr             r6,  [r1, #8]
    1.52 +        ldr             r7,  [r1, #12]
    1.53 +        ldr             r4,  [r1], r2
    1.54 +        strd            r6,  r7,  [r0, #8]
    1.55 +        ldr             r9,  [r1, #4]
    1.56 +        strd            r4,  r5,  [r0],  r2
    1.57 +        ldr             r10, [r1, #8]
    1.58 +        ldr             r11, [r1, #12]
    1.59 +        ldr             r8,  [r1], r2
    1.60 +        strd            r10, r11, [r0, #8]
    1.61 +        subs            r3,  r3,  #2
    1.62 +        strd            r8,  r9,  [r0],  r2
    1.63 +        bne             1b
    1.64 +
    1.65 +        pop             {r4-r11}
    1.66 +        bx              lr
    1.67 +endfunc
    1.68 +
    1.69 +function ff_put_pixels8_armv6, export=1
    1.70 +        push            {r4-r7}
    1.71 +1:
    1.72 +        ldr             r5,  [r1, #4]
    1.73 +        ldr             r4,  [r1], r2
    1.74 +        ldr             r7,  [r1, #4]
    1.75 +        strd            r4,  r5,  [r0],  r2
    1.76 +        ldr             r6,  [r1], r2
    1.77 +        subs            r3,  r3,  #2
    1.78 +        strd            r6,  r7,  [r0],  r2
    1.79 +        bne             1b
    1.80 +
    1.81 +        pop             {r4-r7}
    1.82 +        bx              lr
    1.83 +endfunc
    1.84 +
    1.85 +function ff_put_pixels8_x2_armv6, export=1
    1.86 +        push            {r4-r11, lr}
    1.87 +        mov             r12, #1
    1.88 +        orr             r12, r12, r12, lsl #8
    1.89 +        orr             r12, r12, r12, lsl #16
    1.90 +1:
    1.91 +        ldr             r4,  [r1]
    1.92 +        subs            r3,  r3,  #2
    1.93 +        ldr             r5,  [r1, #4]
    1.94 +        ldr             r7,  [r1, #5]
    1.95 +        lsr             r6,  r4,  #8
    1.96 +        ldr             r8,  [r1, r2]!
    1.97 +        orr             r6,  r6,  r5,  lsl #24
    1.98 +        ldr             r9,  [r1, #4]
    1.99 +        ldr             r11, [r1, #5]
   1.100 +        lsr             r10, r8,  #8
   1.101 +        add             r1,  r1,  r2
   1.102 +        orr             r10, r10, r9,  lsl #24
   1.103 +        eor             r14, r4,  r6
   1.104 +        uhadd8          r4,  r4,  r6
   1.105 +        eor             r6,  r5,  r7
   1.106 +        uhadd8          r5,  r5,  r7
   1.107 +        and             r14, r14, r12
   1.108 +        and             r6,  r6,  r12
   1.109 +        uadd8           r4,  r4,  r14
   1.110 +        eor             r14, r8,  r10
   1.111 +        uadd8           r5,  r5,  r6
   1.112 +        eor             r6,  r9,  r11
   1.113 +        uhadd8          r8,  r8,  r10
   1.114 +        and             r14, r14, r12
   1.115 +        uhadd8          r9,  r9,  r11
   1.116 +        and             r6,  r6,  r12
   1.117 +        uadd8           r8,  r8,  r14
   1.118 +        strd            r4,  r5,  [r0],  r2
   1.119 +        uadd8           r9,  r9,  r6
   1.120 +        strd            r8,  r9,  [r0],  r2
   1.121 +        bne             1b
   1.122 +
   1.123 +        pop             {r4-r11, pc}
   1.124 +endfunc
   1.125 +
   1.126 +function ff_put_pixels8_y2_armv6, export=1
   1.127 +        push            {r4-r11}
   1.128 +        mov             r12, #1
   1.129 +        orr             r12, r12, r12, lsl #8
   1.130 +        orr             r12, r12, r12, lsl #16
   1.131 +        ldr             r4,  [r1]
   1.132 +        ldr             r5,  [r1, #4]
   1.133 +        ldr             r6,  [r1, r2]!
   1.134 +        ldr             r7,  [r1, #4]
   1.135 +1:
   1.136 +        subs            r3,  r3,  #2
   1.137 +        uhadd8          r8,  r4,  r6
   1.138 +        eor             r10, r4,  r6
   1.139 +        uhadd8          r9,  r5,  r7
   1.140 +        eor             r11, r5,  r7
   1.141 +        and             r10, r10, r12
   1.142 +        ldr             r4,  [r1, r2]!
   1.143 +        uadd8           r8,  r8,  r10
   1.144 +        and             r11, r11, r12
   1.145 +        uadd8           r9,  r9,  r11
   1.146 +        ldr             r5,  [r1, #4]
   1.147 +        uhadd8          r10, r4,  r6
   1.148 +        eor             r6,  r4,  r6
   1.149 +        uhadd8          r11, r5,  r7
   1.150 +        and             r6,  r6,  r12
   1.151 +        eor             r7,  r5,  r7
   1.152 +        uadd8           r10, r10, r6
   1.153 +        and             r7,  r7,  r12
   1.154 +        ldr             r6,  [r1, r2]!
   1.155 +        uadd8           r11, r11, r7
   1.156 +        strd            r8,  r9,  [r0],  r2
   1.157 +        ldr             r7,  [r1, #4]
   1.158 +        strd            r10, r11, [r0],  r2
   1.159 +        bne             1b
   1.160 +
   1.161 +        pop             {r4-r11}
   1.162 +        bx              lr
   1.163 +endfunc
   1.164 +
   1.165 +function ff_put_pixels8_x2_no_rnd_armv6, export=1
   1.166 +        push            {r4-r9, lr}
   1.167 +1:
   1.168 +        subs            r3,  r3,  #2
   1.169 +        ldr             r4,  [r1]
   1.170 +        ldr             r5,  [r1, #4]
   1.171 +        ldr             r7,  [r1, #5]
   1.172 +        ldr             r8,  [r1, r2]!
   1.173 +        ldr             r9,  [r1, #4]
   1.174 +        ldr             r14, [r1, #5]
   1.175 +        add             r1,  r1,  r2
   1.176 +        lsr             r6,  r4,  #8
   1.177 +        orr             r6,  r6,  r5,  lsl #24
   1.178 +        lsr             r12, r8,  #8
   1.179 +        orr             r12, r12, r9,  lsl #24
   1.180 +        uhadd8          r4,  r4,  r6
   1.181 +        uhadd8          r5,  r5,  r7
   1.182 +        uhadd8          r8,  r8,  r12
   1.183 +        uhadd8          r9,  r9,  r14
   1.184 +        stm             r0,  {r4,r5}
   1.185 +        add             r0,  r0,  r2
   1.186 +        stm             r0,  {r8,r9}
   1.187 +        add             r0,  r0,  r2
   1.188 +        bne             1b
   1.189 +
   1.190 +        pop             {r4-r9, pc}
   1.191 +endfunc
   1.192 +
   1.193 +function ff_put_pixels8_y2_no_rnd_armv6, export=1
   1.194 +        push            {r4-r9, lr}
   1.195 +        ldr             r4,  [r1]
   1.196 +        ldr             r5,  [r1, #4]
   1.197 +        ldr             r6,  [r1, r2]!
   1.198 +        ldr             r7,  [r1, #4]
   1.199 +1:
   1.200 +        subs            r3,  r3,  #2
   1.201 +        uhadd8          r8,  r4,  r6
   1.202 +        ldr             r4,  [r1, r2]!
   1.203 +        uhadd8          r9,  r5,  r7
   1.204 +        ldr             r5,  [r1, #4]
   1.205 +        uhadd8          r12, r4,  r6
   1.206 +        ldr             r6,  [r1, r2]!
   1.207 +        uhadd8          r14, r5,  r7
   1.208 +        ldr             r7,  [r1, #4]
   1.209 +        stm             r0,  {r8,r9}
   1.210 +        add             r0,  r0,  r2
   1.211 +        stm             r0,  {r12,r14}
   1.212 +        add             r0,  r0,  r2
   1.213 +        bne             1b
   1.214 +
   1.215 +        pop             {r4-r9, pc}
   1.216 +endfunc
   1.217 +
   1.218 +function ff_avg_pixels8_armv6, export=1
   1.219 +        pld             [r1, r2]
   1.220 +        push            {r4-r10, lr}
   1.221 +        mov             lr,  #1
   1.222 +        orr             lr,  lr,  lr,  lsl #8
   1.223 +        orr             lr,  lr,  lr,  lsl #16
   1.224 +        ldrd            r4,  r5,  [r0]
   1.225 +        ldr             r10, [r1, #4]
   1.226 +        ldr             r9,  [r1], r2
   1.227 +        subs            r3,  r3,  #2
   1.228 +1:
   1.229 +        pld             [r1, r2]
   1.230 +        eor             r8,  r4,  r9
   1.231 +        uhadd8          r4,  r4,  r9
   1.232 +        eor             r12, r5,  r10
   1.233 +        ldrd            r6,  r7,  [r0, r2]
   1.234 +        uhadd8          r5,  r5,  r10
   1.235 +        and             r8,  r8,  lr
   1.236 +        ldr             r10, [r1, #4]
   1.237 +        and             r12, r12, lr
   1.238 +        uadd8           r4,  r4,  r8
   1.239 +        ldr             r9,  [r1], r2
   1.240 +        eor             r8,  r6,  r9
   1.241 +        uadd8           r5,  r5,  r12
   1.242 +        pld             [r1, r2,  lsl #1]
   1.243 +        eor             r12, r7,  r10
   1.244 +        uhadd8          r6,  r6,  r9
   1.245 +        strd            r4,  r5,  [r0], r2
   1.246 +        uhadd8          r7,  r7,  r10
   1.247 +        beq             2f
   1.248 +        and             r8,  r8,  lr
   1.249 +        ldrd            r4,  r5,  [r0, r2]
   1.250 +        uadd8           r6,  r6,  r8
   1.251 +        ldr             r10, [r1, #4]
   1.252 +        and             r12, r12, lr
   1.253 +        subs            r3,  r3,  #2
   1.254 +        uadd8           r7,  r7,  r12
   1.255 +        ldr             r9,  [r1], r2
   1.256 +        strd            r6,  r7,  [r0], r2
   1.257 +        b               1b
   1.258 +2:
   1.259 +        and             r8,  r8,  lr
   1.260 +        and             r12, r12, lr
   1.261 +        uadd8           r6,  r6,  r8
   1.262 +        uadd8           r7,  r7,  r12
   1.263 +        strd            r6,  r7,  [r0], r2
   1.264 +
   1.265 +        pop             {r4-r10, pc}
   1.266 +endfunc
   1.267 +
   1.268 +function ff_add_pixels_clamped_armv6, export=1
   1.269 +        push            {r4-r8,lr}
   1.270 +        mov             r3,  #8
   1.271 +1:
   1.272 +        ldm             r0!, {r4,r5,r12,lr}
   1.273 +        ldrd            r6,  r7,  [r1]
   1.274 +        pkhbt           r8,  r4,  r5,  lsl #16
   1.275 +        pkhtb           r5,  r5,  r4,  asr #16
   1.276 +        pkhbt           r4,  r12, lr,  lsl #16
   1.277 +        pkhtb           lr,  lr,  r12, asr #16
   1.278 +        pld             [r1, r2]
   1.279 +        uxtab16         r8,  r8,  r6
   1.280 +        uxtab16         r5,  r5,  r6,  ror #8
   1.281 +        uxtab16         r4,  r4,  r7
   1.282 +        uxtab16         lr,  lr,  r7,  ror #8
   1.283 +        usat16          r8,  #8,  r8
   1.284 +        usat16          r5,  #8,  r5
   1.285 +        usat16          r4,  #8,  r4
   1.286 +        usat16          lr,  #8,  lr
   1.287 +        orr             r6,  r8,  r5,  lsl #8
   1.288 +        orr             r7,  r4,  lr,  lsl #8
   1.289 +        subs            r3,  r3,  #1
   1.290 +        strd            r6,  r7,  [r1],  r2
   1.291 +        bgt             1b
   1.292 +        pop             {r4-r8,pc}
   1.293 +endfunc
   1.294 +
   1.295 +function ff_get_pixels_armv6, export=1
   1.296 +        pld             [r1, r2]
   1.297 +        push            {r4-r8, lr}
   1.298 +        mov             lr,  #8
   1.299 +1:
   1.300 +        ldrd            r4,  r5,  [r1],  r2
   1.301 +        subs            lr,  lr,  #1
   1.302 +        uxtb16          r6,  r4
   1.303 +        uxtb16          r4,  r4,  ror #8
   1.304 +        uxtb16          r12, r5
   1.305 +        uxtb16          r8,  r5,  ror #8
   1.306 +        pld             [r1, r2]
   1.307 +        pkhbt           r5,  r6,  r4,  lsl #16
   1.308 +        pkhtb           r6,  r4,  r6,  asr #16
   1.309 +        pkhbt           r7,  r12, r8,  lsl #16
   1.310 +        pkhtb           r12, r8,  r12, asr #16
   1.311 +        stm             r0!, {r5,r6,r7,r12}
   1.312 +        bgt             1b
   1.313 +
   1.314 +        pop             {r4-r8, pc}
   1.315 +endfunc
   1.316 +
   1.317 +function ff_diff_pixels_armv6, export=1
   1.318 +        pld             [r1, r3]
   1.319 +        pld             [r2, r3]
   1.320 +        push            {r4-r9, lr}
   1.321 +        mov             lr,  #8
   1.322 +1:
   1.323 +        ldrd            r4,  r5,  [r1],  r3
   1.324 +        ldrd            r6,  r7,  [r2],  r3
   1.325 +        uxtb16          r8,  r4
   1.326 +        uxtb16          r4,  r4,  ror #8
   1.327 +        uxtb16          r9,  r6
   1.328 +        uxtb16          r6,  r6,  ror #8
   1.329 +        pld             [r1, r3]
   1.330 +        ssub16          r9,  r8,  r9
   1.331 +        ssub16          r6,  r4,  r6
   1.332 +        uxtb16          r8,  r5
   1.333 +        uxtb16          r5,  r5,  ror #8
   1.334 +        pld             [r2, r3]
   1.335 +        pkhbt           r4,  r9,  r6,  lsl #16
   1.336 +        pkhtb           r6,  r6,  r9,  asr #16
   1.337 +        uxtb16          r9,  r7
   1.338 +        uxtb16          r7,  r7,  ror #8
   1.339 +        ssub16          r9,  r8,  r9
   1.340 +        ssub16          r5,  r5,  r7
   1.341 +        subs            lr,  lr,  #1
   1.342 +        pkhbt           r8,  r9,  r5,  lsl #16
   1.343 +        pkhtb           r9,  r5,  r9,  asr #16
   1.344 +        stm             r0!, {r4,r6,r8,r9}
   1.345 +        bgt             1b
   1.346 +
   1.347 +        pop             {r4-r9, pc}
   1.348 +endfunc
   1.349 +
   1.350 +function ff_pix_abs16_armv6, export=1
   1.351 +        ldr             r0,  [sp]
   1.352 +        push            {r4-r9, lr}
   1.353 +        mov             r12, #0
   1.354 +        mov             lr,  #0
   1.355 +        ldm             r1,  {r4-r7}
   1.356 +        ldr             r8,  [r2]
   1.357 +1:
   1.358 +        ldr             r9,  [r2, #4]
   1.359 +        pld             [r1, r3]
   1.360 +        usada8          r12, r4,  r8,  r12
   1.361 +        ldr             r8,  [r2, #8]
   1.362 +        pld             [r2, r3]
   1.363 +        usada8          lr,  r5,  r9,  lr
   1.364 +        ldr             r9,  [r2, #12]
   1.365 +        usada8          r12, r6,  r8,  r12
   1.366 +        subs            r0,  r0,  #1
   1.367 +        usada8          lr,  r7,  r9,  lr
   1.368 +        beq             2f
   1.369 +        add             r1,  r1,  r3
   1.370 +        ldm             r1,  {r4-r7}
   1.371 +        add             r2,  r2,  r3
   1.372 +        ldr             r8,  [r2]
   1.373 +        b               1b
   1.374 +2:
   1.375 +        add             r0,  r12, lr
   1.376 +        pop             {r4-r9, pc}
   1.377 +endfunc
   1.378 +
   1.379 +function ff_pix_abs16_x2_armv6, export=1
   1.380 +        ldr             r12, [sp]
   1.381 +        push            {r4-r11, lr}
   1.382 +        mov             r0,  #0
   1.383 +        mov             lr,  #1
   1.384 +        orr             lr,  lr,  lr,  lsl #8
   1.385 +        orr             lr,  lr,  lr,  lsl #16
   1.386 +1:
   1.387 +        ldr             r8,  [r2]
   1.388 +        ldr             r9,  [r2, #4]
   1.389 +        lsr             r10, r8,  #8
   1.390 +        ldr             r4,  [r1]
   1.391 +        lsr             r6,  r9,  #8
   1.392 +        orr             r10, r10, r9,  lsl #24
   1.393 +        ldr             r5,  [r2, #8]
   1.394 +        eor             r11, r8,  r10
   1.395 +        uhadd8          r7,  r8,  r10
   1.396 +        orr             r6,  r6,  r5,  lsl #24
   1.397 +        and             r11, r11, lr
   1.398 +        uadd8           r7,  r7,  r11
   1.399 +        ldr             r8,  [r1, #4]
   1.400 +        usada8          r0,  r4,  r7,  r0
   1.401 +        eor             r7,  r9,  r6
   1.402 +        lsr             r10, r5,  #8
   1.403 +        and             r7,  r7,  lr
   1.404 +        uhadd8          r4,  r9,  r6
   1.405 +        ldr             r6,  [r2, #12]
   1.406 +        uadd8           r4,  r4,  r7
   1.407 +        pld             [r1, r3]
   1.408 +        orr             r10, r10, r6,  lsl #24
   1.409 +        usada8          r0,  r8,  r4,  r0
   1.410 +        ldr             r4,  [r1, #8]
   1.411 +        eor             r11, r5,  r10
   1.412 +        ldrb            r7,  [r2, #16]
   1.413 +        and             r11, r11, lr
   1.414 +        uhadd8          r8,  r5,  r10
   1.415 +        ldr             r5,  [r1, #12]
   1.416 +        uadd8           r8,  r8,  r11
   1.417 +        pld             [r2, r3]
   1.418 +        lsr             r10, r6,  #8
   1.419 +        usada8          r0,  r4,  r8,  r0
   1.420 +        orr             r10, r10, r7,  lsl #24
   1.421 +        subs            r12,  r12,  #1
   1.422 +        eor             r11, r6,  r10
   1.423 +        add             r1,  r1,  r3
   1.424 +        uhadd8          r9,  r6,  r10
   1.425 +        and             r11, r11, lr
   1.426 +        uadd8           r9,  r9,  r11
   1.427 +        add             r2,  r2,  r3
   1.428 +        usada8          r0,  r5,  r9,  r0
   1.429 +        bgt             1b
   1.430 +
   1.431 +        pop             {r4-r11, pc}
   1.432 +endfunc
   1.433 +
   1.434 +.macro  usad_y2         p0,  p1,  p2,  p3,  n0,  n1,  n2,  n3
   1.435 +        ldr             \n0, [r2]
   1.436 +        eor             \n1, \p0, \n0
   1.437 +        uhadd8          \p0, \p0, \n0
   1.438 +        and             \n1, \n1, lr
   1.439 +        ldr             \n2, [r1]
   1.440 +        uadd8           \p0, \p0, \n1
   1.441 +        ldr             \n1, [r2, #4]
   1.442 +        usada8          r0,  \p0, \n2, r0
   1.443 +        pld             [r1,  r3]
   1.444 +        eor             \n3, \p1, \n1
   1.445 +        uhadd8          \p1, \p1, \n1
   1.446 +        and             \n3, \n3, lr
   1.447 +        ldr             \p0, [r1, #4]
   1.448 +        uadd8           \p1, \p1, \n3
   1.449 +        ldr             \n2, [r2, #8]
   1.450 +        usada8          r0,  \p1, \p0, r0
   1.451 +        pld             [r2,  r3]
   1.452 +        eor             \p0, \p2, \n2
   1.453 +        uhadd8          \p2, \p2, \n2
   1.454 +        and             \p0, \p0, lr
   1.455 +        ldr             \p1, [r1, #8]
   1.456 +        uadd8           \p2, \p2, \p0
   1.457 +        ldr             \n3, [r2, #12]
   1.458 +        usada8          r0,  \p2, \p1, r0
   1.459 +        eor             \p1, \p3, \n3
   1.460 +        uhadd8          \p3, \p3, \n3
   1.461 +        and             \p1, \p1, lr
   1.462 +        ldr             \p0,  [r1, #12]
   1.463 +        uadd8           \p3, \p3, \p1
   1.464 +        add             r1,  r1,  r3
   1.465 +        usada8          r0,  \p3, \p0,  r0
   1.466 +        add             r2,  r2,  r3
   1.467 +.endm
   1.468 +
   1.469 +function ff_pix_abs16_y2_armv6, export=1
   1.470 +        pld             [r1]
   1.471 +        pld             [r2]
   1.472 +        ldr             r12, [sp]
   1.473 +        push            {r4-r11, lr}
   1.474 +        mov             r0,  #0
   1.475 +        mov             lr,  #1
   1.476 +        orr             lr,  lr,  lr,  lsl #8
   1.477 +        orr             lr,  lr,  lr,  lsl #16
   1.478 +        ldr             r4,  [r2]
   1.479 +        ldr             r5,  [r2, #4]
   1.480 +        ldr             r6,  [r2, #8]
   1.481 +        ldr             r7,  [r2, #12]
   1.482 +        add             r2,  r2,  r3
   1.483 +1:
   1.484 +        usad_y2         r4,  r5,  r6,  r7,  r8,  r9,  r10, r11
   1.485 +        subs            r12, r12, #2
   1.486 +        usad_y2         r8,  r9,  r10, r11, r4,  r5,  r6,  r7
   1.487 +        bgt             1b
   1.488 +
   1.489 +        pop             {r4-r11, pc}
   1.490 +endfunc
   1.491 +
   1.492 +function ff_pix_abs8_armv6, export=1
   1.493 +        pld             [r2, r3]
   1.494 +        ldr             r12, [sp]
   1.495 +        push            {r4-r9, lr}
   1.496 +        mov             r0,  #0
   1.497 +        mov             lr,  #0
   1.498 +        ldrd            r4,  r5,  [r1], r3
   1.499 +1:
   1.500 +        subs            r12, r12, #2
   1.501 +        ldr             r7,  [r2, #4]
   1.502 +        ldr             r6,  [r2], r3
   1.503 +        ldrd            r8,  r9,  [r1], r3
   1.504 +        usada8          r0,  r4,  r6,  r0
   1.505 +        pld             [r2, r3]
   1.506 +        usada8          lr,  r5,  r7,  lr
   1.507 +        ldr             r7,  [r2, #4]
   1.508 +        ldr             r6,  [r2], r3
   1.509 +        beq             2f
   1.510 +        ldrd            r4,  r5,  [r1], r3
   1.511 +        usada8          r0,  r8,  r6,  r0
   1.512 +        pld             [r2, r3]
   1.513 +        usada8          lr,  r9,  r7,  lr
   1.514 +        b               1b
   1.515 +2:
   1.516 +        usada8          r0,  r8,  r6,  r0
   1.517 +        usada8          lr,  r9,  r7,  lr
   1.518 +        add             r0,  r0,  lr
   1.519 +        pop             {r4-r9, pc}
   1.520 +endfunc
   1.521 +
   1.522 +function ff_sse16_armv6, export=1
   1.523 +        ldr             r12, [sp]
   1.524 +        push            {r4-r9, lr}
   1.525 +        mov             r0,  #0
   1.526 +1:
   1.527 +        ldrd            r4,  r5,  [r1]
   1.528 +        ldr             r8,  [r2]
   1.529 +        uxtb16          lr,  r4
   1.530 +        uxtb16          r4,  r4,  ror #8
   1.531 +        uxtb16          r9,  r8
   1.532 +        uxtb16          r8,  r8,  ror #8
   1.533 +        ldr             r7,  [r2, #4]
   1.534 +        usub16          lr,  lr,  r9
   1.535 +        usub16          r4,  r4,  r8
   1.536 +        smlad           r0,  lr,  lr,  r0
   1.537 +        uxtb16          r6,  r5
   1.538 +        uxtb16          lr,  r5,  ror #8
   1.539 +        uxtb16          r8,  r7
   1.540 +        uxtb16          r9,  r7,  ror #8
   1.541 +        smlad           r0,  r4,  r4,  r0
   1.542 +        ldrd            r4,  r5,  [r1, #8]
   1.543 +        usub16          r6,  r6,  r8
   1.544 +        usub16          r8,  lr,  r9
   1.545 +        ldr             r7,  [r2, #8]
   1.546 +        smlad           r0,  r6,  r6,  r0
   1.547 +        uxtb16          lr,  r4
   1.548 +        uxtb16          r4,  r4,  ror #8
   1.549 +        uxtb16          r9,  r7
   1.550 +        uxtb16          r7,  r7, ror #8
   1.551 +        smlad           r0,  r8,  r8,  r0
   1.552 +        ldr             r8,  [r2, #12]
   1.553 +        usub16          lr,  lr,  r9
   1.554 +        usub16          r4,  r4,  r7
   1.555 +        smlad           r0,  lr,  lr,  r0
   1.556 +        uxtb16          r6,  r5
   1.557 +        uxtb16          r5,  r5,  ror #8
   1.558 +        uxtb16          r9,  r8
   1.559 +        uxtb16          r8,  r8,  ror #8
   1.560 +        smlad           r0,  r4,  r4,  r0
   1.561 +        usub16          r6,  r6,  r9
   1.562 +        usub16          r5,  r5,  r8
   1.563 +        smlad           r0,  r6,  r6,  r0
   1.564 +        add             r1,  r1,  r3
   1.565 +        add             r2,  r2,  r3
   1.566 +        subs            r12, r12, #1
   1.567 +        smlad           r0,  r5,  r5,  r0
   1.568 +        bgt             1b
   1.569 +
   1.570 +        pop             {r4-r9, pc}
   1.571 +endfunc
   1.572 +
   1.573 +function ff_pix_norm1_armv6, export=1
   1.574 +        push            {r4-r6, lr}
   1.575 +        mov             r12, #16
   1.576 +        mov             lr,  #0
   1.577 +1:
   1.578 +        ldm             r0,  {r2-r5}
   1.579 +        uxtb16          r6,  r2
   1.580 +        uxtb16          r2,  r2,  ror #8
   1.581 +        smlad           lr,  r6,  r6,  lr
   1.582 +        uxtb16          r6,  r3
   1.583 +        smlad           lr,  r2,  r2,  lr
   1.584 +        uxtb16          r3,  r3,  ror #8
   1.585 +        smlad           lr,  r6,  r6,  lr
   1.586 +        uxtb16          r6,  r4
   1.587 +        smlad           lr,  r3,  r3,  lr
   1.588 +        uxtb16          r4,  r4,  ror #8
   1.589 +        smlad           lr,  r6,  r6,  lr
   1.590 +        uxtb16          r6,  r5
   1.591 +        smlad           lr,  r4,  r4,  lr
   1.592 +        uxtb16          r5,  r5,  ror #8
   1.593 +        smlad           lr,  r6,  r6,  lr
   1.594 +        subs            r12, r12, #1
   1.595 +        add             r0,  r0,  r1
   1.596 +        smlad           lr,  r5,  r5,  lr
   1.597 +        bgt             1b
   1.598 +
   1.599 +        mov             r0,  lr
   1.600 +        pop             {r4-r6, pc}
   1.601 +endfunc
   1.602 +
   1.603 +function ff_pix_sum_armv6, export=1
   1.604 +        push            {r4-r7, lr}
   1.605 +        mov             r12, #16
   1.606 +        mov             r2,  #0
   1.607 +        mov             r3,  #0
   1.608 +        mov             lr,  #0
   1.609 +        ldr             r4,  [r0]
   1.610 +1:
   1.611 +        subs            r12, r12, #1
   1.612 +        ldr             r5,  [r0, #4]
   1.613 +        usada8          r2,  r4,  lr,  r2
   1.614 +        ldr             r6,  [r0, #8]
   1.615 +        usada8          r3,  r5,  lr,  r3
   1.616 +        ldr             r7,  [r0, #12]
   1.617 +        usada8          r2,  r6,  lr,  r2
   1.618 +        beq             2f
   1.619 +        ldr             r4,  [r0, r1]!
   1.620 +        usada8          r3,  r7,  lr,  r3
   1.621 +        bgt             1b
   1.622 +2:
   1.623 +        usada8          r3,  r7,  lr,  r3
   1.624 +        add             r0,  r2,  r3
   1.625 +        pop             {r4-r7, pc}
   1.626 +endfunc