Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/arm/dsputil_armv6.S @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/arm/dsputil_armv6.S Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,623 @@ 1.4 +/* 1.5 + * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> 1.6 + * 1.7 + * This file is part of FFmpeg. 1.8 + * 1.9 + * FFmpeg is free software; you can redistribute it and/or 1.10 + * modify it under the terms of the GNU Lesser General Public 1.11 + * License as published by the Free Software Foundation; either 1.12 + * version 2.1 of the License, or (at your option) any later version. 1.13 + * 1.14 + * FFmpeg is distributed in the hope that it will be useful, 1.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.17 + * Lesser General Public License for more details. 1.18 + * 1.19 + * You should have received a copy of the GNU Lesser General Public 1.20 + * License along with FFmpeg; if not, write to the Free Software 1.21 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 1.22 + */ 1.23 + 1.24 +#include "asm.S" 1.25 + 1.26 + preserve8 1.27 + 1.28 + .text 1.29 + 1.30 +.macro call_2x_pixels type, subp 1.31 +function ff_\type\()_pixels16\subp\()_armv6, export=1 1.32 + push {r0-r3, lr} 1.33 + bl ff_\type\()_pixels8\subp\()_armv6 1.34 + pop {r0-r3, lr} 1.35 + add r0, r0, #8 1.36 + add r1, r1, #8 1.37 + b ff_\type\()_pixels8\subp\()_armv6 1.38 +endfunc 1.39 +.endm 1.40 + 1.41 +call_2x_pixels avg 1.42 +call_2x_pixels put, _x2 1.43 +call_2x_pixels put, _y2 1.44 +call_2x_pixels put, _x2_no_rnd 1.45 +call_2x_pixels put, _y2_no_rnd 1.46 + 1.47 +function ff_put_pixels16_armv6, export=1 1.48 + push {r4-r11} 1.49 +1: 1.50 + ldr r5, [r1, #4] 1.51 + ldr r6, [r1, #8] 1.52 + ldr r7, [r1, #12] 1.53 + ldr r4, [r1], r2 1.54 + strd r6, r7, [r0, #8] 1.55 + ldr r9, [r1, #4] 1.56 + strd r4, r5, [r0], r2 1.57 + ldr r10, [r1, #8] 1.58 + ldr r11, [r1, #12] 1.59 + ldr r8, [r1], r2 1.60 + strd r10, r11, [r0, #8] 1.61 + subs r3, r3, #2 1.62 + strd r8, r9, [r0], r2 1.63 + bne 1b 1.64 + 1.65 + pop {r4-r11} 1.66 + bx lr 1.67 +endfunc 1.68 + 1.69 +function ff_put_pixels8_armv6, export=1 1.70 + push {r4-r7} 1.71 +1: 1.72 + ldr r5, [r1, #4] 1.73 + ldr r4, [r1], r2 1.74 + ldr r7, [r1, #4] 1.75 + strd r4, r5, [r0], r2 1.76 + ldr r6, [r1], r2 1.77 + subs r3, r3, #2 1.78 + strd r6, r7, [r0], r2 1.79 + bne 1b 1.80 + 1.81 + pop {r4-r7} 1.82 + bx lr 1.83 +endfunc 1.84 + 1.85 +function ff_put_pixels8_x2_armv6, export=1 1.86 + push {r4-r11, lr} 1.87 + mov r12, #1 1.88 + orr r12, r12, r12, lsl #8 1.89 + orr r12, r12, r12, lsl #16 1.90 +1: 1.91 + ldr r4, [r1] 1.92 + subs r3, r3, #2 1.93 + ldr r5, [r1, #4] 1.94 + ldr r7, [r1, #5] 1.95 + lsr r6, r4, #8 1.96 + ldr r8, [r1, r2]! 1.97 + orr r6, r6, r5, lsl #24 1.98 + ldr r9, [r1, #4] 1.99 + ldr r11, [r1, #5] 1.100 + lsr r10, r8, #8 1.101 + add r1, r1, r2 1.102 + orr r10, r10, r9, lsl #24 1.103 + eor r14, r4, r6 1.104 + uhadd8 r4, r4, r6 1.105 + eor r6, r5, r7 1.106 + uhadd8 r5, r5, r7 1.107 + and r14, r14, r12 1.108 + and r6, r6, r12 1.109 + uadd8 r4, r4, r14 1.110 + eor r14, r8, r10 1.111 + uadd8 r5, r5, r6 1.112 + eor r6, r9, r11 1.113 + uhadd8 r8, r8, r10 1.114 + and r14, r14, r12 1.115 + uhadd8 r9, r9, r11 1.116 + and r6, r6, r12 1.117 + uadd8 r8, r8, r14 1.118 + strd r4, r5, [r0], r2 1.119 + uadd8 r9, r9, r6 1.120 + strd r8, r9, [r0], r2 1.121 + bne 1b 1.122 + 1.123 + pop {r4-r11, pc} 1.124 +endfunc 1.125 + 1.126 +function ff_put_pixels8_y2_armv6, export=1 1.127 + push {r4-r11} 1.128 + mov r12, #1 1.129 + orr r12, r12, r12, lsl #8 1.130 + orr r12, r12, r12, lsl #16 1.131 + ldr r4, [r1] 1.132 + ldr r5, [r1, #4] 1.133 + ldr r6, [r1, r2]! 1.134 + ldr r7, [r1, #4] 1.135 +1: 1.136 + subs r3, r3, #2 1.137 + uhadd8 r8, r4, r6 1.138 + eor r10, r4, r6 1.139 + uhadd8 r9, r5, r7 1.140 + eor r11, r5, r7 1.141 + and r10, r10, r12 1.142 + ldr r4, [r1, r2]! 1.143 + uadd8 r8, r8, r10 1.144 + and r11, r11, r12 1.145 + uadd8 r9, r9, r11 1.146 + ldr r5, [r1, #4] 1.147 + uhadd8 r10, r4, r6 1.148 + eor r6, r4, r6 1.149 + uhadd8 r11, r5, r7 1.150 + and r6, r6, r12 1.151 + eor r7, r5, r7 1.152 + uadd8 r10, r10, r6 1.153 + and r7, r7, r12 1.154 + ldr r6, [r1, r2]! 1.155 + uadd8 r11, r11, r7 1.156 + strd r8, r9, [r0], r2 1.157 + ldr r7, [r1, #4] 1.158 + strd r10, r11, [r0], r2 1.159 + bne 1b 1.160 + 1.161 + pop {r4-r11} 1.162 + bx lr 1.163 +endfunc 1.164 + 1.165 +function ff_put_pixels8_x2_no_rnd_armv6, export=1 1.166 + push {r4-r9, lr} 1.167 +1: 1.168 + subs r3, r3, #2 1.169 + ldr r4, [r1] 1.170 + ldr r5, [r1, #4] 1.171 + ldr r7, [r1, #5] 1.172 + ldr r8, [r1, r2]! 1.173 + ldr r9, [r1, #4] 1.174 + ldr r14, [r1, #5] 1.175 + add r1, r1, r2 1.176 + lsr r6, r4, #8 1.177 + orr r6, r6, r5, lsl #24 1.178 + lsr r12, r8, #8 1.179 + orr r12, r12, r9, lsl #24 1.180 + uhadd8 r4, r4, r6 1.181 + uhadd8 r5, r5, r7 1.182 + uhadd8 r8, r8, r12 1.183 + uhadd8 r9, r9, r14 1.184 + stm r0, {r4,r5} 1.185 + add r0, r0, r2 1.186 + stm r0, {r8,r9} 1.187 + add r0, r0, r2 1.188 + bne 1b 1.189 + 1.190 + pop {r4-r9, pc} 1.191 +endfunc 1.192 + 1.193 +function ff_put_pixels8_y2_no_rnd_armv6, export=1 1.194 + push {r4-r9, lr} 1.195 + ldr r4, [r1] 1.196 + ldr r5, [r1, #4] 1.197 + ldr r6, [r1, r2]! 1.198 + ldr r7, [r1, #4] 1.199 +1: 1.200 + subs r3, r3, #2 1.201 + uhadd8 r8, r4, r6 1.202 + ldr r4, [r1, r2]! 1.203 + uhadd8 r9, r5, r7 1.204 + ldr r5, [r1, #4] 1.205 + uhadd8 r12, r4, r6 1.206 + ldr r6, [r1, r2]! 1.207 + uhadd8 r14, r5, r7 1.208 + ldr r7, [r1, #4] 1.209 + stm r0, {r8,r9} 1.210 + add r0, r0, r2 1.211 + stm r0, {r12,r14} 1.212 + add r0, r0, r2 1.213 + bne 1b 1.214 + 1.215 + pop {r4-r9, pc} 1.216 +endfunc 1.217 + 1.218 +function ff_avg_pixels8_armv6, export=1 1.219 + pld [r1, r2] 1.220 + push {r4-r10, lr} 1.221 + mov lr, #1 1.222 + orr lr, lr, lr, lsl #8 1.223 + orr lr, lr, lr, lsl #16 1.224 + ldrd r4, r5, [r0] 1.225 + ldr r10, [r1, #4] 1.226 + ldr r9, [r1], r2 1.227 + subs r3, r3, #2 1.228 +1: 1.229 + pld [r1, r2] 1.230 + eor r8, r4, r9 1.231 + uhadd8 r4, r4, r9 1.232 + eor r12, r5, r10 1.233 + ldrd r6, r7, [r0, r2] 1.234 + uhadd8 r5, r5, r10 1.235 + and r8, r8, lr 1.236 + ldr r10, [r1, #4] 1.237 + and r12, r12, lr 1.238 + uadd8 r4, r4, r8 1.239 + ldr r9, [r1], r2 1.240 + eor r8, r6, r9 1.241 + uadd8 r5, r5, r12 1.242 + pld [r1, r2, lsl #1] 1.243 + eor r12, r7, r10 1.244 + uhadd8 r6, r6, r9 1.245 + strd r4, r5, [r0], r2 1.246 + uhadd8 r7, r7, r10 1.247 + beq 2f 1.248 + and r8, r8, lr 1.249 + ldrd r4, r5, [r0, r2] 1.250 + uadd8 r6, r6, r8 1.251 + ldr r10, [r1, #4] 1.252 + and r12, r12, lr 1.253 + subs r3, r3, #2 1.254 + uadd8 r7, r7, r12 1.255 + ldr r9, [r1], r2 1.256 + strd r6, r7, [r0], r2 1.257 + b 1b 1.258 +2: 1.259 + and r8, r8, lr 1.260 + and r12, r12, lr 1.261 + uadd8 r6, r6, r8 1.262 + uadd8 r7, r7, r12 1.263 + strd r6, r7, [r0], r2 1.264 + 1.265 + pop {r4-r10, pc} 1.266 +endfunc 1.267 + 1.268 +function ff_add_pixels_clamped_armv6, export=1 1.269 + push {r4-r8,lr} 1.270 + mov r3, #8 1.271 +1: 1.272 + ldm r0!, {r4,r5,r12,lr} 1.273 + ldrd r6, r7, [r1] 1.274 + pkhbt r8, r4, r5, lsl #16 1.275 + pkhtb r5, r5, r4, asr #16 1.276 + pkhbt r4, r12, lr, lsl #16 1.277 + pkhtb lr, lr, r12, asr #16 1.278 + pld [r1, r2] 1.279 + uxtab16 r8, r8, r6 1.280 + uxtab16 r5, r5, r6, ror #8 1.281 + uxtab16 r4, r4, r7 1.282 + uxtab16 lr, lr, r7, ror #8 1.283 + usat16 r8, #8, r8 1.284 + usat16 r5, #8, r5 1.285 + usat16 r4, #8, r4 1.286 + usat16 lr, #8, lr 1.287 + orr r6, r8, r5, lsl #8 1.288 + orr r7, r4, lr, lsl #8 1.289 + subs r3, r3, #1 1.290 + strd r6, r7, [r1], r2 1.291 + bgt 1b 1.292 + pop {r4-r8,pc} 1.293 +endfunc 1.294 + 1.295 +function ff_get_pixels_armv6, export=1 1.296 + pld [r1, r2] 1.297 + push {r4-r8, lr} 1.298 + mov lr, #8 1.299 +1: 1.300 + ldrd r4, r5, [r1], r2 1.301 + subs lr, lr, #1 1.302 + uxtb16 r6, r4 1.303 + uxtb16 r4, r4, ror #8 1.304 + uxtb16 r12, r5 1.305 + uxtb16 r8, r5, ror #8 1.306 + pld [r1, r2] 1.307 + pkhbt r5, r6, r4, lsl #16 1.308 + pkhtb r6, r4, r6, asr #16 1.309 + pkhbt r7, r12, r8, lsl #16 1.310 + pkhtb r12, r8, r12, asr #16 1.311 + stm r0!, {r5,r6,r7,r12} 1.312 + bgt 1b 1.313 + 1.314 + pop {r4-r8, pc} 1.315 +endfunc 1.316 + 1.317 +function ff_diff_pixels_armv6, export=1 1.318 + pld [r1, r3] 1.319 + pld [r2, r3] 1.320 + push {r4-r9, lr} 1.321 + mov lr, #8 1.322 +1: 1.323 + ldrd r4, r5, [r1], r3 1.324 + ldrd r6, r7, [r2], r3 1.325 + uxtb16 r8, r4 1.326 + uxtb16 r4, r4, ror #8 1.327 + uxtb16 r9, r6 1.328 + uxtb16 r6, r6, ror #8 1.329 + pld [r1, r3] 1.330 + ssub16 r9, r8, r9 1.331 + ssub16 r6, r4, r6 1.332 + uxtb16 r8, r5 1.333 + uxtb16 r5, r5, ror #8 1.334 + pld [r2, r3] 1.335 + pkhbt r4, r9, r6, lsl #16 1.336 + pkhtb r6, r6, r9, asr #16 1.337 + uxtb16 r9, r7 1.338 + uxtb16 r7, r7, ror #8 1.339 + ssub16 r9, r8, r9 1.340 + ssub16 r5, r5, r7 1.341 + subs lr, lr, #1 1.342 + pkhbt r8, r9, r5, lsl #16 1.343 + pkhtb r9, r5, r9, asr #16 1.344 + stm r0!, {r4,r6,r8,r9} 1.345 + bgt 1b 1.346 + 1.347 + pop {r4-r9, pc} 1.348 +endfunc 1.349 + 1.350 +function ff_pix_abs16_armv6, export=1 1.351 + ldr r0, [sp] 1.352 + push {r4-r9, lr} 1.353 + mov r12, #0 1.354 + mov lr, #0 1.355 + ldm r1, {r4-r7} 1.356 + ldr r8, [r2] 1.357 +1: 1.358 + ldr r9, [r2, #4] 1.359 + pld [r1, r3] 1.360 + usada8 r12, r4, r8, r12 1.361 + ldr r8, [r2, #8] 1.362 + pld [r2, r3] 1.363 + usada8 lr, r5, r9, lr 1.364 + ldr r9, [r2, #12] 1.365 + usada8 r12, r6, r8, r12 1.366 + subs r0, r0, #1 1.367 + usada8 lr, r7, r9, lr 1.368 + beq 2f 1.369 + add r1, r1, r3 1.370 + ldm r1, {r4-r7} 1.371 + add r2, r2, r3 1.372 + ldr r8, [r2] 1.373 + b 1b 1.374 +2: 1.375 + add r0, r12, lr 1.376 + pop {r4-r9, pc} 1.377 +endfunc 1.378 + 1.379 +function ff_pix_abs16_x2_armv6, export=1 1.380 + ldr r12, [sp] 1.381 + push {r4-r11, lr} 1.382 + mov r0, #0 1.383 + mov lr, #1 1.384 + orr lr, lr, lr, lsl #8 1.385 + orr lr, lr, lr, lsl #16 1.386 +1: 1.387 + ldr r8, [r2] 1.388 + ldr r9, [r2, #4] 1.389 + lsr r10, r8, #8 1.390 + ldr r4, [r1] 1.391 + lsr r6, r9, #8 1.392 + orr r10, r10, r9, lsl #24 1.393 + ldr r5, [r2, #8] 1.394 + eor r11, r8, r10 1.395 + uhadd8 r7, r8, r10 1.396 + orr r6, r6, r5, lsl #24 1.397 + and r11, r11, lr 1.398 + uadd8 r7, r7, r11 1.399 + ldr r8, [r1, #4] 1.400 + usada8 r0, r4, r7, r0 1.401 + eor r7, r9, r6 1.402 + lsr r10, r5, #8 1.403 + and r7, r7, lr 1.404 + uhadd8 r4, r9, r6 1.405 + ldr r6, [r2, #12] 1.406 + uadd8 r4, r4, r7 1.407 + pld [r1, r3] 1.408 + orr r10, r10, r6, lsl #24 1.409 + usada8 r0, r8, r4, r0 1.410 + ldr r4, [r1, #8] 1.411 + eor r11, r5, r10 1.412 + ldrb r7, [r2, #16] 1.413 + and r11, r11, lr 1.414 + uhadd8 r8, r5, r10 1.415 + ldr r5, [r1, #12] 1.416 + uadd8 r8, r8, r11 1.417 + pld [r2, r3] 1.418 + lsr r10, r6, #8 1.419 + usada8 r0, r4, r8, r0 1.420 + orr r10, r10, r7, lsl #24 1.421 + subs r12, r12, #1 1.422 + eor r11, r6, r10 1.423 + add r1, r1, r3 1.424 + uhadd8 r9, r6, r10 1.425 + and r11, r11, lr 1.426 + uadd8 r9, r9, r11 1.427 + add r2, r2, r3 1.428 + usada8 r0, r5, r9, r0 1.429 + bgt 1b 1.430 + 1.431 + pop {r4-r11, pc} 1.432 +endfunc 1.433 + 1.434 +.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 1.435 + ldr \n0, [r2] 1.436 + eor \n1, \p0, \n0 1.437 + uhadd8 \p0, \p0, \n0 1.438 + and \n1, \n1, lr 1.439 + ldr \n2, [r1] 1.440 + uadd8 \p0, \p0, \n1 1.441 + ldr \n1, [r2, #4] 1.442 + usada8 r0, \p0, \n2, r0 1.443 + pld [r1, r3] 1.444 + eor \n3, \p1, \n1 1.445 + uhadd8 \p1, \p1, \n1 1.446 + and \n3, \n3, lr 1.447 + ldr \p0, [r1, #4] 1.448 + uadd8 \p1, \p1, \n3 1.449 + ldr \n2, [r2, #8] 1.450 + usada8 r0, \p1, \p0, r0 1.451 + pld [r2, r3] 1.452 + eor \p0, \p2, \n2 1.453 + uhadd8 \p2, \p2, \n2 1.454 + and \p0, \p0, lr 1.455 + ldr \p1, [r1, #8] 1.456 + uadd8 \p2, \p2, \p0 1.457 + ldr \n3, [r2, #12] 1.458 + usada8 r0, \p2, \p1, r0 1.459 + eor \p1, \p3, \n3 1.460 + uhadd8 \p3, \p3, \n3 1.461 + and \p1, \p1, lr 1.462 + ldr \p0, [r1, #12] 1.463 + uadd8 \p3, \p3, \p1 1.464 + add r1, r1, r3 1.465 + usada8 r0, \p3, \p0, r0 1.466 + add r2, r2, r3 1.467 +.endm 1.468 + 1.469 +function ff_pix_abs16_y2_armv6, export=1 1.470 + pld [r1] 1.471 + pld [r2] 1.472 + ldr r12, [sp] 1.473 + push {r4-r11, lr} 1.474 + mov r0, #0 1.475 + mov lr, #1 1.476 + orr lr, lr, lr, lsl #8 1.477 + orr lr, lr, lr, lsl #16 1.478 + ldr r4, [r2] 1.479 + ldr r5, [r2, #4] 1.480 + ldr r6, [r2, #8] 1.481 + ldr r7, [r2, #12] 1.482 + add r2, r2, r3 1.483 +1: 1.484 + usad_y2 r4, r5, r6, r7, r8, r9, r10, r11 1.485 + subs r12, r12, #2 1.486 + usad_y2 r8, r9, r10, r11, r4, r5, r6, r7 1.487 + bgt 1b 1.488 + 1.489 + pop {r4-r11, pc} 1.490 +endfunc 1.491 + 1.492 +function ff_pix_abs8_armv6, export=1 1.493 + pld [r2, r3] 1.494 + ldr r12, [sp] 1.495 + push {r4-r9, lr} 1.496 + mov r0, #0 1.497 + mov lr, #0 1.498 + ldrd r4, r5, [r1], r3 1.499 +1: 1.500 + subs r12, r12, #2 1.501 + ldr r7, [r2, #4] 1.502 + ldr r6, [r2], r3 1.503 + ldrd r8, r9, [r1], r3 1.504 + usada8 r0, r4, r6, r0 1.505 + pld [r2, r3] 1.506 + usada8 lr, r5, r7, lr 1.507 + ldr r7, [r2, #4] 1.508 + ldr r6, [r2], r3 1.509 + beq 2f 1.510 + ldrd r4, r5, [r1], r3 1.511 + usada8 r0, r8, r6, r0 1.512 + pld [r2, r3] 1.513 + usada8 lr, r9, r7, lr 1.514 + b 1b 1.515 +2: 1.516 + usada8 r0, r8, r6, r0 1.517 + usada8 lr, r9, r7, lr 1.518 + add r0, r0, lr 1.519 + pop {r4-r9, pc} 1.520 +endfunc 1.521 + 1.522 +function ff_sse16_armv6, export=1 1.523 + ldr r12, [sp] 1.524 + push {r4-r9, lr} 1.525 + mov r0, #0 1.526 +1: 1.527 + ldrd r4, r5, [r1] 1.528 + ldr r8, [r2] 1.529 + uxtb16 lr, r4 1.530 + uxtb16 r4, r4, ror #8 1.531 + uxtb16 r9, r8 1.532 + uxtb16 r8, r8, ror #8 1.533 + ldr r7, [r2, #4] 1.534 + usub16 lr, lr, r9 1.535 + usub16 r4, r4, r8 1.536 + smlad r0, lr, lr, r0 1.537 + uxtb16 r6, r5 1.538 + uxtb16 lr, r5, ror #8 1.539 + uxtb16 r8, r7 1.540 + uxtb16 r9, r7, ror #8 1.541 + smlad r0, r4, r4, r0 1.542 + ldrd r4, r5, [r1, #8] 1.543 + usub16 r6, r6, r8 1.544 + usub16 r8, lr, r9 1.545 + ldr r7, [r2, #8] 1.546 + smlad r0, r6, r6, r0 1.547 + uxtb16 lr, r4 1.548 + uxtb16 r4, r4, ror #8 1.549 + uxtb16 r9, r7 1.550 + uxtb16 r7, r7, ror #8 1.551 + smlad r0, r8, r8, r0 1.552 + ldr r8, [r2, #12] 1.553 + usub16 lr, lr, r9 1.554 + usub16 r4, r4, r7 1.555 + smlad r0, lr, lr, r0 1.556 + uxtb16 r6, r5 1.557 + uxtb16 r5, r5, ror #8 1.558 + uxtb16 r9, r8 1.559 + uxtb16 r8, r8, ror #8 1.560 + smlad r0, r4, r4, r0 1.561 + usub16 r6, r6, r9 1.562 + usub16 r5, r5, r8 1.563 + smlad r0, r6, r6, r0 1.564 + add r1, r1, r3 1.565 + add r2, r2, r3 1.566 + subs r12, r12, #1 1.567 + smlad r0, r5, r5, r0 1.568 + bgt 1b 1.569 + 1.570 + pop {r4-r9, pc} 1.571 +endfunc 1.572 + 1.573 +function ff_pix_norm1_armv6, export=1 1.574 + push {r4-r6, lr} 1.575 + mov r12, #16 1.576 + mov lr, #0 1.577 +1: 1.578 + ldm r0, {r2-r5} 1.579 + uxtb16 r6, r2 1.580 + uxtb16 r2, r2, ror #8 1.581 + smlad lr, r6, r6, lr 1.582 + uxtb16 r6, r3 1.583 + smlad lr, r2, r2, lr 1.584 + uxtb16 r3, r3, ror #8 1.585 + smlad lr, r6, r6, lr 1.586 + uxtb16 r6, r4 1.587 + smlad lr, r3, r3, lr 1.588 + uxtb16 r4, r4, ror #8 1.589 + smlad lr, r6, r6, lr 1.590 + uxtb16 r6, r5 1.591 + smlad lr, r4, r4, lr 1.592 + uxtb16 r5, r5, ror #8 1.593 + smlad lr, r6, r6, lr 1.594 + subs r12, r12, #1 1.595 + add r0, r0, r1 1.596 + smlad lr, r5, r5, lr 1.597 + bgt 1b 1.598 + 1.599 + mov r0, lr 1.600 + pop {r4-r6, pc} 1.601 +endfunc 1.602 + 1.603 +function ff_pix_sum_armv6, export=1 1.604 + push {r4-r7, lr} 1.605 + mov r12, #16 1.606 + mov r2, #0 1.607 + mov r3, #0 1.608 + mov lr, #0 1.609 + ldr r4, [r0] 1.610 +1: 1.611 + subs r12, r12, #1 1.612 + ldr r5, [r0, #4] 1.613 + usada8 r2, r4, lr, r2 1.614 + ldr r6, [r0, #8] 1.615 + usada8 r3, r5, lr, r3 1.616 + ldr r7, [r0, #12] 1.617 + usada8 r2, r6, lr, r2 1.618 + beq 2f 1.619 + ldr r4, [r0, r1]! 1.620 + usada8 r3, r7, lr, r3 1.621 + bgt 1b 1.622 +2: 1.623 + usada8 r3, r7, lr, r3 1.624 + add r0, r2, r3 1.625 + pop {r4-r7, pc} 1.626 +endfunc
