Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
comparison libavcodec/arm/dsputil_arm.S @ 4:96e628866d41
naming some tasks to help debugging
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 19 Dec 2012 15:40:26 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:50444ffd10ad |
|---|---|
| 1 @ | |
| 2 @ ARMv4 optimized DSP utils | |
| 3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> | |
| 4 @ | |
| 5 @ This file is part of FFmpeg. | |
| 6 @ | |
| 7 @ FFmpeg is free software; you can redistribute it and/or | |
| 8 @ modify it under the terms of the GNU Lesser General Public | |
| 9 @ License as published by the Free Software Foundation; either | |
| 10 @ version 2.1 of the License, or (at your option) any later version. | |
| 11 @ | |
| 12 @ FFmpeg is distributed in the hope that it will be useful, | |
| 13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 @ Lesser General Public License for more details. | |
| 16 @ | |
| 17 @ You should have received a copy of the GNU Lesser General Public | |
| 18 @ License along with FFmpeg; if not, write to the Free Software | |
| 19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 20 @ | |
| 21 | |
| 22 #include "config.h" | |
| 23 #include "asm.S" | |
| 24 | |
| 25 preserve8 | |
| 26 | |
| 27 #if !HAVE_PLD | |
| 28 .macro pld reg | |
| 29 .endm | |
| 30 #endif | |
| 31 | |
| 32 #if HAVE_ARMV5TE | |
| 33 function ff_prefetch_arm, export=1 | |
| 34 subs r2, r2, #1 | |
| 35 pld [r0] | |
| 36 add r0, r0, r1 | |
| 37 bne ff_prefetch_arm | |
| 38 bx lr | |
| 39 endfunc | |
| 40 #endif | |
| 41 | |
| 42 .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 | |
| 43 mov \Rd0, \Rn0, lsr #(\shift * 8) | |
| 44 mov \Rd1, \Rn1, lsr #(\shift * 8) | |
| 45 mov \Rd2, \Rn2, lsr #(\shift * 8) | |
| 46 mov \Rd3, \Rn3, lsr #(\shift * 8) | |
| 47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) | |
| 48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) | |
| 49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) | |
| 50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) | |
| 51 .endm | |
| 52 .macro ALIGN_DWORD shift, R0, R1, R2 | |
| 53 mov \R0, \R0, lsr #(\shift * 8) | |
| 54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) | |
| 55 mov \R1, \R1, lsr #(\shift * 8) | |
| 56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) | |
| 57 .endm | |
| 58 .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 | |
| 59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) | |
| 60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) | |
| 61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) | |
| 62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) | |
| 63 .endm | |
| 64 | |
| 65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |
| 66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |
| 67 @ Rmask = 0xFEFEFEFE | |
| 68 @ Rn = destroy | |
| 69 eor \Rd0, \Rn0, \Rm0 | |
| 70 eor \Rd1, \Rn1, \Rm1 | |
| 71 orr \Rn0, \Rn0, \Rm0 | |
| 72 orr \Rn1, \Rn1, \Rm1 | |
| 73 and \Rd0, \Rd0, \Rmask | |
| 74 and \Rd1, \Rd1, \Rmask | |
| 75 sub \Rd0, \Rn0, \Rd0, lsr #1 | |
| 76 sub \Rd1, \Rn1, \Rd1, lsr #1 | |
| 77 .endm | |
| 78 | |
| 79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask | |
| 80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) | |
| 81 @ Rmask = 0xFEFEFEFE | |
| 82 @ Rn = destroy | |
| 83 eor \Rd0, \Rn0, \Rm0 | |
| 84 eor \Rd1, \Rn1, \Rm1 | |
| 85 and \Rn0, \Rn0, \Rm0 | |
| 86 and \Rn1, \Rn1, \Rm1 | |
| 87 and \Rd0, \Rd0, \Rmask | |
| 88 and \Rd1, \Rd1, \Rmask | |
| 89 add \Rd0, \Rn0, \Rd0, lsr #1 | |
| 90 add \Rd1, \Rn1, \Rd1, lsr #1 | |
| 91 .endm | |
| 92 | |
| 93 .macro JMP_ALIGN tmp, reg | |
| 94 ands \tmp, \reg, #3 | |
| 95 bic \reg, \reg, #3 | |
| 96 beq 1f | |
| 97 subs \tmp, \tmp, #1 | |
| 98 beq 2f | |
| 99 subs \tmp, \tmp, #1 | |
| 100 beq 3f | |
| 101 b 4f | |
| 102 .endm | |
| 103 | |
| 104 @ ---------------------------------------------------------------- | |
| 105 .align 5 | |
| 106 function ff_put_pixels16_arm, export=1 | |
| 107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 108 @ block = word aligned, pixles = unaligned | |
| 109 pld [r1] | |
| 110 push {r4-r11, lr} | |
| 111 JMP_ALIGN r5, r1 | |
| 112 1: | |
| 113 ldm r1, {r4-r7} | |
| 114 add r1, r1, r2 | |
| 115 stm r0, {r4-r7} | |
| 116 pld [r1] | |
| 117 subs r3, r3, #1 | |
| 118 add r0, r0, r2 | |
| 119 bne 1b | |
| 120 pop {r4-r11, pc} | |
| 121 .align 5 | |
| 122 2: | |
| 123 ldm r1, {r4-r8} | |
| 124 add r1, r1, r2 | |
| 125 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |
| 126 pld [r1] | |
| 127 subs r3, r3, #1 | |
| 128 stm r0, {r9-r12} | |
| 129 add r0, r0, r2 | |
| 130 bne 2b | |
| 131 pop {r4-r11, pc} | |
| 132 .align 5 | |
| 133 3: | |
| 134 ldm r1, {r4-r8} | |
| 135 add r1, r1, r2 | |
| 136 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |
| 137 pld [r1] | |
| 138 subs r3, r3, #1 | |
| 139 stm r0, {r9-r12} | |
| 140 add r0, r0, r2 | |
| 141 bne 3b | |
| 142 pop {r4-r11, pc} | |
| 143 .align 5 | |
| 144 4: | |
| 145 ldm r1, {r4-r8} | |
| 146 add r1, r1, r2 | |
| 147 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 | |
| 148 pld [r1] | |
| 149 subs r3, r3, #1 | |
| 150 stm r0, {r9-r12} | |
| 151 add r0, r0, r2 | |
| 152 bne 4b | |
| 153 pop {r4-r11,pc} | |
| 154 endfunc | |
| 155 | |
| 156 @ ---------------------------------------------------------------- | |
| 157 .align 5 | |
| 158 function ff_put_pixels8_arm, export=1 | |
| 159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 160 @ block = word aligned, pixles = unaligned | |
| 161 pld [r1] | |
| 162 push {r4-r5,lr} | |
| 163 JMP_ALIGN r5, r1 | |
| 164 1: | |
| 165 ldm r1, {r4-r5} | |
| 166 add r1, r1, r2 | |
| 167 subs r3, r3, #1 | |
| 168 pld [r1] | |
| 169 stm r0, {r4-r5} | |
| 170 add r0, r0, r2 | |
| 171 bne 1b | |
| 172 pop {r4-r5,pc} | |
| 173 .align 5 | |
| 174 2: | |
| 175 ldm r1, {r4-r5, r12} | |
| 176 add r1, r1, r2 | |
| 177 ALIGN_DWORD 1, r4, r5, r12 | |
| 178 pld [r1] | |
| 179 subs r3, r3, #1 | |
| 180 stm r0, {r4-r5} | |
| 181 add r0, r0, r2 | |
| 182 bne 2b | |
| 183 pop {r4-r5,pc} | |
| 184 .align 5 | |
| 185 3: | |
| 186 ldm r1, {r4-r5, r12} | |
| 187 add r1, r1, r2 | |
| 188 ALIGN_DWORD 2, r4, r5, r12 | |
| 189 pld [r1] | |
| 190 subs r3, r3, #1 | |
| 191 stm r0, {r4-r5} | |
| 192 add r0, r0, r2 | |
| 193 bne 3b | |
| 194 pop {r4-r5,pc} | |
| 195 .align 5 | |
| 196 4: | |
| 197 ldm r1, {r4-r5, r12} | |
| 198 add r1, r1, r2 | |
| 199 ALIGN_DWORD 3, r4, r5, r12 | |
| 200 pld [r1] | |
| 201 subs r3, r3, #1 | |
| 202 stm r0, {r4-r5} | |
| 203 add r0, r0, r2 | |
| 204 bne 4b | |
| 205 pop {r4-r5,pc} | |
| 206 endfunc | |
| 207 | |
| 208 @ ---------------------------------------------------------------- | |
| 209 .align 5 | |
| 210 function ff_put_pixels8_x2_arm, export=1 | |
| 211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 212 @ block = word aligned, pixles = unaligned | |
| 213 pld [r1] | |
| 214 push {r4-r10,lr} | |
| 215 ldr r12, =0xfefefefe | |
| 216 JMP_ALIGN r5, r1 | |
| 217 1: | |
| 218 ldm r1, {r4-r5, r10} | |
| 219 add r1, r1, r2 | |
| 220 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
| 221 pld [r1] | |
| 222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 223 subs r3, r3, #1 | |
| 224 stm r0, {r8-r9} | |
| 225 add r0, r0, r2 | |
| 226 bne 1b | |
| 227 pop {r4-r10,pc} | |
| 228 .align 5 | |
| 229 2: | |
| 230 ldm r1, {r4-r5, r10} | |
| 231 add r1, r1, r2 | |
| 232 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
| 233 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
| 234 pld [r1] | |
| 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 236 subs r3, r3, #1 | |
| 237 stm r0, {r4-r5} | |
| 238 add r0, r0, r2 | |
| 239 bne 2b | |
| 240 pop {r4-r10,pc} | |
| 241 .align 5 | |
| 242 3: | |
| 243 ldm r1, {r4-r5, r10} | |
| 244 add r1, r1, r2 | |
| 245 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |
| 246 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
| 247 pld [r1] | |
| 248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 249 subs r3, r3, #1 | |
| 250 stm r0, {r4-r5} | |
| 251 add r0, r0, r2 | |
| 252 bne 3b | |
| 253 pop {r4-r10,pc} | |
| 254 .align 5 | |
| 255 4: | |
| 256 ldm r1, {r4-r5, r10} | |
| 257 add r1, r1, r2 | |
| 258 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |
| 259 pld [r1] | |
| 260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
| 261 subs r3, r3, #1 | |
| 262 stm r0, {r8-r9} | |
| 263 add r0, r0, r2 | |
| 264 bne 4b | |
| 265 pop {r4-r10,pc} | |
| 266 endfunc | |
| 267 | |
| 268 .align 5 | |
| 269 function ff_put_no_rnd_pixels8_x2_arm, export=1 | |
| 270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 271 @ block = word aligned, pixles = unaligned | |
| 272 pld [r1] | |
| 273 push {r4-r10,lr} | |
| 274 ldr r12, =0xfefefefe | |
| 275 JMP_ALIGN r5, r1 | |
| 276 1: | |
| 277 ldm r1, {r4-r5, r10} | |
| 278 add r1, r1, r2 | |
| 279 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
| 280 pld [r1] | |
| 281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 282 subs r3, r3, #1 | |
| 283 stm r0, {r8-r9} | |
| 284 add r0, r0, r2 | |
| 285 bne 1b | |
| 286 pop {r4-r10,pc} | |
| 287 .align 5 | |
| 288 2: | |
| 289 ldm r1, {r4-r5, r10} | |
| 290 add r1, r1, r2 | |
| 291 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 | |
| 292 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 | |
| 293 pld [r1] | |
| 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 295 subs r3, r3, #1 | |
| 296 stm r0, {r4-r5} | |
| 297 add r0, r0, r2 | |
| 298 bne 2b | |
| 299 pop {r4-r10,pc} | |
| 300 .align 5 | |
| 301 3: | |
| 302 ldm r1, {r4-r5, r10} | |
| 303 add r1, r1, r2 | |
| 304 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 | |
| 305 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 | |
| 306 pld [r1] | |
| 307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 | |
| 308 subs r3, r3, #1 | |
| 309 stm r0, {r4-r5} | |
| 310 add r0, r0, r2 | |
| 311 bne 3b | |
| 312 pop {r4-r10,pc} | |
| 313 .align 5 | |
| 314 4: | |
| 315 ldm r1, {r4-r5, r10} | |
| 316 add r1, r1, r2 | |
| 317 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 | |
| 318 pld [r1] | |
| 319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 | |
| 320 subs r3, r3, #1 | |
| 321 stm r0, {r8-r9} | |
| 322 add r0, r0, r2 | |
| 323 bne 4b | |
| 324 pop {r4-r10,pc} | |
| 325 endfunc | |
| 326 | |
| 327 | |
| 328 @ ---------------------------------------------------------------- | |
| 329 .align 5 | |
| 330 function ff_put_pixels8_y2_arm, export=1 | |
| 331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 332 @ block = word aligned, pixles = unaligned | |
| 333 pld [r1] | |
| 334 push {r4-r11,lr} | |
| 335 mov r3, r3, lsr #1 | |
| 336 ldr r12, =0xfefefefe | |
| 337 JMP_ALIGN r5, r1 | |
| 338 1: | |
| 339 ldm r1, {r4-r5} | |
| 340 add r1, r1, r2 | |
| 341 6: ldm r1, {r6-r7} | |
| 342 add r1, r1, r2 | |
| 343 pld [r1] | |
| 344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 345 ldm r1, {r4-r5} | |
| 346 add r1, r1, r2 | |
| 347 stm r0, {r8-r9} | |
| 348 add r0, r0, r2 | |
| 349 pld [r1] | |
| 350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
| 351 subs r3, r3, #1 | |
| 352 stm r0, {r8-r9} | |
| 353 add r0, r0, r2 | |
| 354 bne 6b | |
| 355 pop {r4-r11,pc} | |
| 356 .align 5 | |
| 357 2: | |
| 358 ldm r1, {r4-r6} | |
| 359 add r1, r1, r2 | |
| 360 pld [r1] | |
| 361 ALIGN_DWORD 1, r4, r5, r6 | |
| 362 6: ldm r1, {r7-r9} | |
| 363 add r1, r1, r2 | |
| 364 pld [r1] | |
| 365 ALIGN_DWORD 1, r7, r8, r9 | |
| 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
| 367 stm r0, {r10-r11} | |
| 368 add r0, r0, r2 | |
| 369 ldm r1, {r4-r6} | |
| 370 add r1, r1, r2 | |
| 371 pld [r1] | |
| 372 ALIGN_DWORD 1, r4, r5, r6 | |
| 373 subs r3, r3, #1 | |
| 374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 375 stm r0, {r10-r11} | |
| 376 add r0, r0, r2 | |
| 377 bne 6b | |
| 378 pop {r4-r11,pc} | |
| 379 .align 5 | |
| 380 3: | |
| 381 ldm r1, {r4-r6} | |
| 382 add r1, r1, r2 | |
| 383 pld [r1] | |
| 384 ALIGN_DWORD 2, r4, r5, r6 | |
| 385 6: ldm r1, {r7-r9} | |
| 386 add r1, r1, r2 | |
| 387 pld [r1] | |
| 388 ALIGN_DWORD 2, r7, r8, r9 | |
| 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
| 390 stm r0, {r10-r11} | |
| 391 add r0, r0, r2 | |
| 392 ldm r1, {r4-r6} | |
| 393 add r1, r1, r2 | |
| 394 pld [r1] | |
| 395 ALIGN_DWORD 2, r4, r5, r6 | |
| 396 subs r3, r3, #1 | |
| 397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 398 stm r0, {r10-r11} | |
| 399 add r0, r0, r2 | |
| 400 bne 6b | |
| 401 pop {r4-r11,pc} | |
| 402 .align 5 | |
| 403 4: | |
| 404 ldm r1, {r4-r6} | |
| 405 add r1, r1, r2 | |
| 406 pld [r1] | |
| 407 ALIGN_DWORD 3, r4, r5, r6 | |
| 408 6: ldm r1, {r7-r9} | |
| 409 add r1, r1, r2 | |
| 410 pld [r1] | |
| 411 ALIGN_DWORD 3, r7, r8, r9 | |
| 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
| 413 stm r0, {r10-r11} | |
| 414 add r0, r0, r2 | |
| 415 ldm r1, {r4-r6} | |
| 416 add r1, r1, r2 | |
| 417 pld [r1] | |
| 418 ALIGN_DWORD 3, r4, r5, r6 | |
| 419 subs r3, r3, #1 | |
| 420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 421 stm r0, {r10-r11} | |
| 422 add r0, r0, r2 | |
| 423 bne 6b | |
| 424 pop {r4-r11,pc} | |
| 425 endfunc | |
| 426 | |
| 427 .align 5 | |
| 428 function ff_put_no_rnd_pixels8_y2_arm, export=1 | |
| 429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 430 @ block = word aligned, pixles = unaligned | |
| 431 pld [r1] | |
| 432 push {r4-r11,lr} | |
| 433 mov r3, r3, lsr #1 | |
| 434 ldr r12, =0xfefefefe | |
| 435 JMP_ALIGN r5, r1 | |
| 436 1: | |
| 437 ldm r1, {r4-r5} | |
| 438 add r1, r1, r2 | |
| 439 6: ldm r1, {r6-r7} | |
| 440 add r1, r1, r2 | |
| 441 pld [r1] | |
| 442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 | |
| 443 ldm r1, {r4-r5} | |
| 444 add r1, r1, r2 | |
| 445 stm r0, {r8-r9} | |
| 446 add r0, r0, r2 | |
| 447 pld [r1] | |
| 448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 | |
| 449 subs r3, r3, #1 | |
| 450 stm r0, {r8-r9} | |
| 451 add r0, r0, r2 | |
| 452 bne 6b | |
| 453 pop {r4-r11,pc} | |
| 454 .align 5 | |
| 455 2: | |
| 456 ldm r1, {r4-r6} | |
| 457 add r1, r1, r2 | |
| 458 pld [r1] | |
| 459 ALIGN_DWORD 1, r4, r5, r6 | |
| 460 6: ldm r1, {r7-r9} | |
| 461 add r1, r1, r2 | |
| 462 pld [r1] | |
| 463 ALIGN_DWORD 1, r7, r8, r9 | |
| 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
| 465 stm r0, {r10-r11} | |
| 466 add r0, r0, r2 | |
| 467 ldm r1, {r4-r6} | |
| 468 add r1, r1, r2 | |
| 469 pld [r1] | |
| 470 ALIGN_DWORD 1, r4, r5, r6 | |
| 471 subs r3, r3, #1 | |
| 472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 473 stm r0, {r10-r11} | |
| 474 add r0, r0, r2 | |
| 475 bne 6b | |
| 476 pop {r4-r11,pc} | |
| 477 .align 5 | |
| 478 3: | |
| 479 ldm r1, {r4-r6} | |
| 480 add r1, r1, r2 | |
| 481 pld [r1] | |
| 482 ALIGN_DWORD 2, r4, r5, r6 | |
| 483 6: ldm r1, {r7-r9} | |
| 484 add r1, r1, r2 | |
| 485 pld [r1] | |
| 486 ALIGN_DWORD 2, r7, r8, r9 | |
| 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
| 488 stm r0, {r10-r11} | |
| 489 add r0, r0, r2 | |
| 490 ldm r1, {r4-r6} | |
| 491 add r1, r1, r2 | |
| 492 pld [r1] | |
| 493 ALIGN_DWORD 2, r4, r5, r6 | |
| 494 subs r3, r3, #1 | |
| 495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 496 stm r0, {r10-r11} | |
| 497 add r0, r0, r2 | |
| 498 bne 6b | |
| 499 pop {r4-r11,pc} | |
| 500 .align 5 | |
| 501 4: | |
| 502 ldm r1, {r4-r6} | |
| 503 add r1, r1, r2 | |
| 504 pld [r1] | |
| 505 ALIGN_DWORD 3, r4, r5, r6 | |
| 506 6: ldm r1, {r7-r9} | |
| 507 add r1, r1, r2 | |
| 508 pld [r1] | |
| 509 ALIGN_DWORD 3, r7, r8, r9 | |
| 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 | |
| 511 stm r0, {r10-r11} | |
| 512 add r0, r0, r2 | |
| 513 ldm r1, {r4-r6} | |
| 514 add r1, r1, r2 | |
| 515 pld [r1] | |
| 516 ALIGN_DWORD 3, r4, r5, r6 | |
| 517 subs r3, r3, #1 | |
| 518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 | |
| 519 stm r0, {r10-r11} | |
| 520 add r0, r0, r2 | |
| 521 bne 6b | |
| 522 pop {r4-r11,pc} | |
| 523 endfunc | |
| 524 | |
| 525 .ltorg | |
| 526 | |
| 527 @ ---------------------------------------------------------------- | |
| 528 .macro RND_XY2_IT align, rnd | |
| 529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) | |
| 530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) | |
| 531 .if \align == 0 | |
| 532 ldm r1, {r6-r8} | |
| 533 .elseif \align == 3 | |
| 534 ldm r1, {r5-r7} | |
| 535 .else | |
| 536 ldm r1, {r8-r10} | |
| 537 .endif | |
| 538 add r1, r1, r2 | |
| 539 pld [r1] | |
| 540 .if \align == 0 | |
| 541 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8 | |
| 542 .elseif \align == 1 | |
| 543 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10 | |
| 544 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10 | |
| 545 .elseif \align == 2 | |
| 546 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10 | |
| 547 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10 | |
| 548 .elseif \align == 3 | |
| 549 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7 | |
| 550 .endif | |
| 551 ldr r14, =0x03030303 | |
| 552 tst r3, #1 | |
| 553 and r8, r4, r14 | |
| 554 and r9, r5, r14 | |
| 555 and r10, r6, r14 | |
| 556 and r11, r7, r14 | |
| 557 andeq r14, r14, r14, \rnd #1 | |
| 558 add r8, r8, r10 | |
| 559 add r9, r9, r11 | |
| 560 ldr r12, =0xfcfcfcfc >> 2 | |
| 561 addeq r8, r8, r14 | |
| 562 addeq r9, r9, r14 | |
| 563 and r4, r12, r4, lsr #2 | |
| 564 and r5, r12, r5, lsr #2 | |
| 565 and r6, r12, r6, lsr #2 | |
| 566 and r7, r12, r7, lsr #2 | |
| 567 add r10, r4, r6 | |
| 568 add r11, r5, r7 | |
| 569 subs r3, r3, #1 | |
| 570 .endm | |
| 571 | |
| 572 .macro RND_XY2_EXPAND align, rnd | |
| 573 RND_XY2_IT \align, \rnd | |
| 574 6: push {r8-r11} | |
| 575 RND_XY2_IT \align, \rnd | |
| 576 pop {r4-r7} | |
| 577 add r4, r4, r8 | |
| 578 add r5, r5, r9 | |
| 579 ldr r14, =0x0f0f0f0f | |
| 580 add r6, r6, r10 | |
| 581 add r7, r7, r11 | |
| 582 and r4, r14, r4, lsr #2 | |
| 583 and r5, r14, r5, lsr #2 | |
| 584 add r4, r4, r6 | |
| 585 add r5, r5, r7 | |
| 586 stm r0, {r4-r5} | |
| 587 add r0, r0, r2 | |
| 588 bge 6b | |
| 589 pop {r4-r11,pc} | |
| 590 .endm | |
| 591 | |
| 592 .align 5 | |
| 593 function ff_put_pixels8_xy2_arm, export=1 | |
| 594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 595 @ block = word aligned, pixles = unaligned | |
| 596 pld [r1] | |
| 597 push {r4-r11,lr} @ R14 is also called LR | |
| 598 JMP_ALIGN r5, r1 | |
| 599 1: RND_XY2_EXPAND 0, lsl | |
| 600 .align 5 | |
| 601 2: RND_XY2_EXPAND 1, lsl | |
| 602 .align 5 | |
| 603 3: RND_XY2_EXPAND 2, lsl | |
| 604 .align 5 | |
| 605 4: RND_XY2_EXPAND 3, lsl | |
| 606 endfunc | |
| 607 | |
| 608 .align 5 | |
| 609 function ff_put_no_rnd_pixels8_xy2_arm, export=1 | |
| 610 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) | |
| 611 @ block = word aligned, pixles = unaligned | |
| 612 pld [r1] | |
| 613 push {r4-r11,lr} | |
| 614 JMP_ALIGN r5, r1 | |
| 615 1: RND_XY2_EXPAND 0, lsr | |
| 616 .align 5 | |
| 617 2: RND_XY2_EXPAND 1, lsr | |
| 618 .align 5 | |
| 619 3: RND_XY2_EXPAND 2, lsr | |
| 620 .align 5 | |
| 621 4: RND_XY2_EXPAND 3, lsr | |
| 622 endfunc | |
| 623 | |
| 624 .align 5 | |
| 625 @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) | |
| 626 function ff_add_pixels_clamped_arm, export=1 | |
| 627 push {r4-r10} | |
| 628 mov r10, #8 | |
| 629 1: | |
| 630 ldr r4, [r1] /* load dest */ | |
| 631 /* block[0] and block[1]*/ | |
| 632 ldrsh r5, [r0] | |
| 633 ldrsh r7, [r0, #2] | |
| 634 and r6, r4, #0xFF | |
| 635 and r8, r4, #0xFF00 | |
| 636 add r6, r5, r6 | |
| 637 add r8, r7, r8, lsr #8 | |
| 638 mvn r5, r5 | |
| 639 mvn r7, r7 | |
| 640 tst r6, #0x100 | |
| 641 movne r6, r5, lsr #24 | |
| 642 tst r8, #0x100 | |
| 643 movne r8, r7, lsr #24 | |
| 644 mov r9, r6 | |
| 645 ldrsh r5, [r0, #4] /* moved form [A] */ | |
| 646 orr r9, r9, r8, lsl #8 | |
| 647 /* block[2] and block[3] */ | |
| 648 /* [A] */ | |
| 649 ldrsh r7, [r0, #6] | |
| 650 and r6, r4, #0xFF0000 | |
| 651 and r8, r4, #0xFF000000 | |
| 652 add r6, r5, r6, lsr #16 | |
| 653 add r8, r7, r8, lsr #24 | |
| 654 mvn r5, r5 | |
| 655 mvn r7, r7 | |
| 656 tst r6, #0x100 | |
| 657 movne r6, r5, lsr #24 | |
| 658 tst r8, #0x100 | |
| 659 movne r8, r7, lsr #24 | |
| 660 orr r9, r9, r6, lsl #16 | |
| 661 ldr r4, [r1, #4] /* moved form [B] */ | |
| 662 orr r9, r9, r8, lsl #24 | |
| 663 /* store dest */ | |
| 664 ldrsh r5, [r0, #8] /* moved form [C] */ | |
| 665 str r9, [r1] | |
| 666 | |
| 667 /* load dest */ | |
| 668 /* [B] */ | |
| 669 /* block[4] and block[5] */ | |
| 670 /* [C] */ | |
| 671 ldrsh r7, [r0, #10] | |
| 672 and r6, r4, #0xFF | |
| 673 and r8, r4, #0xFF00 | |
| 674 add r6, r5, r6 | |
| 675 add r8, r7, r8, lsr #8 | |
| 676 mvn r5, r5 | |
| 677 mvn r7, r7 | |
| 678 tst r6, #0x100 | |
| 679 movne r6, r5, lsr #24 | |
| 680 tst r8, #0x100 | |
| 681 movne r8, r7, lsr #24 | |
| 682 mov r9, r6 | |
| 683 ldrsh r5, [r0, #12] /* moved from [D] */ | |
| 684 orr r9, r9, r8, lsl #8 | |
| 685 /* block[6] and block[7] */ | |
| 686 /* [D] */ | |
| 687 ldrsh r7, [r0, #14] | |
| 688 and r6, r4, #0xFF0000 | |
| 689 and r8, r4, #0xFF000000 | |
| 690 add r6, r5, r6, lsr #16 | |
| 691 add r8, r7, r8, lsr #24 | |
| 692 mvn r5, r5 | |
| 693 mvn r7, r7 | |
| 694 tst r6, #0x100 | |
| 695 movne r6, r5, lsr #24 | |
| 696 tst r8, #0x100 | |
| 697 movne r8, r7, lsr #24 | |
| 698 orr r9, r9, r6, lsl #16 | |
| 699 add r0, r0, #16 /* moved from [E] */ | |
| 700 orr r9, r9, r8, lsl #24 | |
| 701 subs r10, r10, #1 /* moved from [F] */ | |
| 702 /* store dest */ | |
| 703 str r9, [r1, #4] | |
| 704 | |
| 705 /* [E] */ | |
| 706 /* [F] */ | |
| 707 add r1, r1, r2 | |
| 708 bne 1b | |
| 709 | |
| 710 pop {r4-r10} | |
| 711 bx lr | |
| 712 endfunc |
