Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
comparison libavcodec/arm/jrevdct_arm.S @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:08fc73efa550 |
|---|---|
| 1 /* | |
| 2 C-like prototype : | |
| 3 void j_rev_dct_arm(DCTBLOCK data) | |
| 4 | |
| 5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' | |
| 6 | |
| 7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) | |
| 8 | |
| 9 Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 10 of this software and associated documentation files (the "Software"), to deal | |
| 11 in the Software without restriction, including without limitation the rights | |
| 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 13 copies of the Software, and to permit persons to whom the Software is | |
| 14 furnished to do so, subject to the following conditions: | |
| 15 | |
| 16 The above copyright notice and this permission notice shall be included in | |
| 17 all copies or substantial portions of the Software. | |
| 18 | |
| 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |
| 23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
| 24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| 25 | |
| 26 */ | |
| 27 | |
| 28 #include "asm.S" | |
| 29 | |
| 30 #define FIX_0_298631336 2446 | |
| 31 #define FIX_0_541196100 4433 | |
| 32 #define FIX_0_765366865 6270 | |
| 33 #define FIX_1_175875602 9633 | |
| 34 #define FIX_1_501321110 12299 | |
| 35 #define FIX_2_053119869 16819 | |
| 36 #define FIX_3_072711026 25172 | |
| 37 #define FIX_M_0_390180644 -3196 | |
| 38 #define FIX_M_0_899976223 -7373 | |
| 39 #define FIX_M_1_847759065 -15137 | |
| 40 #define FIX_M_1_961570560 -16069 | |
| 41 #define FIX_M_2_562915447 -20995 | |
| 42 #define FIX_0xFFFF 0xFFFF | |
| 43 | |
| 44 #define FIX_0_298631336_ID 0 | |
| 45 #define FIX_0_541196100_ID 4 | |
| 46 #define FIX_0_765366865_ID 8 | |
| 47 #define FIX_1_175875602_ID 12 | |
| 48 #define FIX_1_501321110_ID 16 | |
| 49 #define FIX_2_053119869_ID 20 | |
| 50 #define FIX_3_072711026_ID 24 | |
| 51 #define FIX_M_0_390180644_ID 28 | |
| 52 #define FIX_M_0_899976223_ID 32 | |
| 53 #define FIX_M_1_847759065_ID 36 | |
| 54 #define FIX_M_1_961570560_ID 40 | |
| 55 #define FIX_M_2_562915447_ID 44 | |
| 56 #define FIX_0xFFFF_ID 48 | |
| 57 .text | |
| 58 .align | |
| 59 | |
| 60 function ff_j_rev_dct_arm, export=1 | |
| 61 stmdb sp!, { r4 - r12, lr } @ all callee saved regs | |
| 62 | |
| 63 sub sp, sp, #4 @ reserve some space on the stack | |
| 64 str r0, [ sp ] @ save the DCT pointer to the stack | |
| 65 | |
| 66 mov lr, r0 @ lr = pointer to the current row | |
| 67 mov r12, #8 @ r12 = row-counter | |
| 68 adr r11, const_array @ r11 = base pointer to the constants array | |
| 69 row_loop: | |
| 70 ldrsh r0, [lr, # 0] @ r0 = 'd0' | |
| 71 ldrsh r2, [lr, # 2] @ r2 = 'd2' | |
| 72 | |
| 73 @ Optimization for row that have all items except the first set to 0 | |
| 74 @ (this works as the DCTELEMS are always 4-byte aligned) | |
| 75 ldr r5, [lr, # 0] | |
| 76 ldr r6, [lr, # 4] | |
| 77 ldr r3, [lr, # 8] | |
| 78 ldr r4, [lr, #12] | |
| 79 orr r3, r3, r4 | |
| 80 orr r3, r3, r6 | |
| 81 orrs r5, r3, r5 | |
| 82 beq end_of_row_loop @ nothing to be done as ALL of them are '0' | |
| 83 orrs r3, r3, r2 | |
| 84 beq empty_row | |
| 85 | |
| 86 ldrsh r1, [lr, # 8] @ r1 = 'd1' | |
| 87 ldrsh r4, [lr, # 4] @ r4 = 'd4' | |
| 88 ldrsh r6, [lr, # 6] @ r6 = 'd6' | |
| 89 | |
| 90 ldr r3, [r11, #FIX_0_541196100_ID] | |
| 91 add r7, r2, r6 | |
| 92 ldr r5, [r11, #FIX_M_1_847759065_ID] | |
| 93 mul r7, r3, r7 @ r7 = z1 | |
| 94 ldr r3, [r11, #FIX_0_765366865_ID] | |
| 95 mla r6, r5, r6, r7 @ r6 = tmp2 | |
| 96 add r5, r0, r4 @ r5 = tmp0 | |
| 97 mla r2, r3, r2, r7 @ r2 = tmp3 | |
| 98 sub r3, r0, r4 @ r3 = tmp1 | |
| 99 | |
| 100 add r0, r2, r5, lsl #13 @ r0 = tmp10 | |
| 101 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |
| 102 add r4, r6, r3, lsl #13 @ r4 = tmp11 | |
| 103 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 | |
| 104 | |
| 105 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |
| 106 | |
| 107 ldrsh r3, [lr, #10] @ r3 = 'd3' | |
| 108 ldrsh r5, [lr, #12] @ r5 = 'd5' | |
| 109 ldrsh r7, [lr, #14] @ r7 = 'd7' | |
| 110 | |
| 111 add r0, r3, r5 @ r0 = 'z2' | |
| 112 add r2, r1, r7 @ r2 = 'z1' | |
| 113 add r4, r3, r7 @ r4 = 'z3' | |
| 114 add r6, r1, r5 @ r6 = 'z4' | |
| 115 ldr r9, [r11, #FIX_1_175875602_ID] | |
| 116 add r8, r4, r6 @ r8 = z3 + z4 | |
| 117 ldr r10, [r11, #FIX_M_0_899976223_ID] | |
| 118 mul r8, r9, r8 @ r8 = 'z5' | |
| 119 ldr r9, [r11, #FIX_M_2_562915447_ID] | |
| 120 mul r2, r10, r2 @ r2 = 'z1' | |
| 121 ldr r10, [r11, #FIX_M_1_961570560_ID] | |
| 122 mul r0, r9, r0 @ r0 = 'z2' | |
| 123 ldr r9, [r11, #FIX_M_0_390180644_ID] | |
| 124 mla r4, r10, r4, r8 @ r4 = 'z3' | |
| 125 ldr r10, [r11, #FIX_0_298631336_ID] | |
| 126 mla r6, r9, r6, r8 @ r6 = 'z4' | |
| 127 ldr r9, [r11, #FIX_2_053119869_ID] | |
| 128 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |
| 129 ldr r10, [r11, #FIX_3_072711026_ID] | |
| 130 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |
| 131 ldr r9, [r11, #FIX_1_501321110_ID] | |
| 132 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |
| 133 add r7, r7, r4 @ r7 = tmp0 | |
| 134 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |
| 135 add r5, r5, r6 @ r5 = tmp1 | |
| 136 add r3, r3, r4 @ r3 = tmp2 | |
| 137 add r1, r1, r6 @ r1 = tmp3 | |
| 138 | |
| 139 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 | |
| 140 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |
| 141 | |
| 142 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) | |
| 143 add r8, r0, r1 | |
| 144 add r8, r8, #(1<<10) | |
| 145 mov r8, r8, asr #11 | |
| 146 strh r8, [lr, # 0] | |
| 147 | |
| 148 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) | |
| 149 sub r8, r0, r1 | |
| 150 add r8, r8, #(1<<10) | |
| 151 mov r8, r8, asr #11 | |
| 152 strh r8, [lr, #14] | |
| 153 | |
| 154 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) | |
| 155 add r8, r6, r3 | |
| 156 add r8, r8, #(1<<10) | |
| 157 mov r8, r8, asr #11 | |
| 158 strh r8, [lr, # 2] | |
| 159 | |
| 160 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) | |
| 161 sub r8, r6, r3 | |
| 162 add r8, r8, #(1<<10) | |
| 163 mov r8, r8, asr #11 | |
| 164 strh r8, [lr, #12] | |
| 165 | |
| 166 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) | |
| 167 add r8, r4, r5 | |
| 168 add r8, r8, #(1<<10) | |
| 169 mov r8, r8, asr #11 | |
| 170 strh r8, [lr, # 4] | |
| 171 | |
| 172 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) | |
| 173 sub r8, r4, r5 | |
| 174 add r8, r8, #(1<<10) | |
| 175 mov r8, r8, asr #11 | |
| 176 strh r8, [lr, #10] | |
| 177 | |
| 178 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) | |
| 179 add r8, r2, r7 | |
| 180 add r8, r8, #(1<<10) | |
| 181 mov r8, r8, asr #11 | |
| 182 strh r8, [lr, # 6] | |
| 183 | |
| 184 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) | |
| 185 sub r8, r2, r7 | |
| 186 add r8, r8, #(1<<10) | |
| 187 mov r8, r8, asr #11 | |
| 188 strh r8, [lr, # 8] | |
| 189 | |
| 190 @ End of row loop | |
| 191 add lr, lr, #16 | |
| 192 subs r12, r12, #1 | |
| 193 bne row_loop | |
| 194 beq start_column_loop | |
| 195 | |
| 196 empty_row: | |
| 197 ldr r1, [r11, #FIX_0xFFFF_ID] | |
| 198 mov r0, r0, lsl #2 | |
| 199 and r0, r0, r1 | |
| 200 add r0, r0, r0, lsl #16 | |
| 201 str r0, [lr, # 0] | |
| 202 str r0, [lr, # 4] | |
| 203 str r0, [lr, # 8] | |
| 204 str r0, [lr, #12] | |
| 205 | |
| 206 end_of_row_loop: | |
| 207 @ End of loop | |
| 208 add lr, lr, #16 | |
| 209 subs r12, r12, #1 | |
| 210 bne row_loop | |
| 211 | |
| 212 start_column_loop: | |
| 213 @ Start of column loop | |
| 214 ldr lr, [ sp ] | |
| 215 mov r12, #8 | |
| 216 column_loop: | |
| 217 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' | |
| 218 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' | |
| 219 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' | |
| 220 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' | |
| 221 | |
| 222 ldr r3, [r11, #FIX_0_541196100_ID] | |
| 223 add r1, r2, r6 | |
| 224 ldr r5, [r11, #FIX_M_1_847759065_ID] | |
| 225 mul r1, r3, r1 @ r1 = z1 | |
| 226 ldr r3, [r11, #FIX_0_765366865_ID] | |
| 227 mla r6, r5, r6, r1 @ r6 = tmp2 | |
| 228 add r5, r0, r4 @ r5 = tmp0 | |
| 229 mla r2, r3, r2, r1 @ r2 = tmp3 | |
| 230 sub r3, r0, r4 @ r3 = tmp1 | |
| 231 | |
| 232 add r0, r2, r5, lsl #13 @ r0 = tmp10 | |
| 233 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 | |
| 234 add r4, r6, r3, lsl #13 @ r4 = tmp11 | |
| 235 rsb r6, r6, r3, lsl #13 @ r6 = tmp12 | |
| 236 | |
| 237 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' | |
| 238 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' | |
| 239 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' | |
| 240 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' | |
| 241 | |
| 242 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) | |
| 243 orr r9, r1, r3 | |
| 244 orr r10, r5, r7 | |
| 245 orrs r10, r9, r10 | |
| 246 beq empty_odd_column | |
| 247 | |
| 248 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 | |
| 249 | |
| 250 add r0, r3, r5 @ r0 = 'z2' | |
| 251 add r2, r1, r7 @ r2 = 'z1' | |
| 252 add r4, r3, r7 @ r4 = 'z3' | |
| 253 add r6, r1, r5 @ r6 = 'z4' | |
| 254 ldr r9, [r11, #FIX_1_175875602_ID] | |
| 255 add r8, r4, r6 | |
| 256 ldr r10, [r11, #FIX_M_0_899976223_ID] | |
| 257 mul r8, r9, r8 @ r8 = 'z5' | |
| 258 ldr r9, [r11, #FIX_M_2_562915447_ID] | |
| 259 mul r2, r10, r2 @ r2 = 'z1' | |
| 260 ldr r10, [r11, #FIX_M_1_961570560_ID] | |
| 261 mul r0, r9, r0 @ r0 = 'z2' | |
| 262 ldr r9, [r11, #FIX_M_0_390180644_ID] | |
| 263 mla r4, r10, r4, r8 @ r4 = 'z3' | |
| 264 ldr r10, [r11, #FIX_0_298631336_ID] | |
| 265 mla r6, r9, r6, r8 @ r6 = 'z4' | |
| 266 ldr r9, [r11, #FIX_2_053119869_ID] | |
| 267 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 | |
| 268 ldr r10, [r11, #FIX_3_072711026_ID] | |
| 269 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 | |
| 270 ldr r9, [r11, #FIX_1_501321110_ID] | |
| 271 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 | |
| 272 add r7, r7, r4 @ r7 = tmp0 | |
| 273 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 | |
| 274 add r5, r5, r6 @ r5 = tmp1 | |
| 275 add r3, r3, r4 @ r3 = tmp2 | |
| 276 add r1, r1, r6 @ r1 = tmp3 | |
| 277 | |
| 278 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 | |
| 279 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 | |
| 280 | |
| 281 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |
| 282 add r8, r0, r1 | |
| 283 add r8, r8, #(1<<17) | |
| 284 mov r8, r8, asr #18 | |
| 285 strh r8, [lr, #( 0*8)] | |
| 286 | |
| 287 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |
| 288 sub r8, r0, r1 | |
| 289 add r8, r8, #(1<<17) | |
| 290 mov r8, r8, asr #18 | |
| 291 strh r8, [lr, #(14*8)] | |
| 292 | |
| 293 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |
| 294 add r8, r4, r3 | |
| 295 add r8, r8, #(1<<17) | |
| 296 mov r8, r8, asr #18 | |
| 297 strh r8, [lr, #( 2*8)] | |
| 298 | |
| 299 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |
| 300 sub r8, r4, r3 | |
| 301 add r8, r8, #(1<<17) | |
| 302 mov r8, r8, asr #18 | |
| 303 strh r8, [lr, #(12*8)] | |
| 304 | |
| 305 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |
| 306 add r8, r6, r5 | |
| 307 add r8, r8, #(1<<17) | |
| 308 mov r8, r8, asr #18 | |
| 309 strh r8, [lr, #( 4*8)] | |
| 310 | |
| 311 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |
| 312 sub r8, r6, r5 | |
| 313 add r8, r8, #(1<<17) | |
| 314 mov r8, r8, asr #18 | |
| 315 strh r8, [lr, #(10*8)] | |
| 316 | |
| 317 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |
| 318 add r8, r2, r7 | |
| 319 add r8, r8, #(1<<17) | |
| 320 mov r8, r8, asr #18 | |
| 321 strh r8, [lr, #( 6*8)] | |
| 322 | |
| 323 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |
| 324 sub r8, r2, r7 | |
| 325 add r8, r8, #(1<<17) | |
| 326 mov r8, r8, asr #18 | |
| 327 strh r8, [lr, #( 8*8)] | |
| 328 | |
| 329 @ End of row loop | |
| 330 add lr, lr, #2 | |
| 331 subs r12, r12, #1 | |
| 332 bne column_loop | |
| 333 beq the_end | |
| 334 | |
| 335 empty_odd_column: | |
| 336 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) | |
| 337 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) | |
| 338 add r0, r0, #(1<<17) | |
| 339 mov r0, r0, asr #18 | |
| 340 strh r0, [lr, #( 0*8)] | |
| 341 strh r0, [lr, #(14*8)] | |
| 342 | |
| 343 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) | |
| 344 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) | |
| 345 add r4, r4, #(1<<17) | |
| 346 mov r4, r4, asr #18 | |
| 347 strh r4, [lr, #( 2*8)] | |
| 348 strh r4, [lr, #(12*8)] | |
| 349 | |
| 350 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) | |
| 351 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) | |
| 352 add r6, r6, #(1<<17) | |
| 353 mov r6, r6, asr #18 | |
| 354 strh r6, [lr, #( 4*8)] | |
| 355 strh r6, [lr, #(10*8)] | |
| 356 | |
| 357 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) | |
| 358 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) | |
| 359 add r2, r2, #(1<<17) | |
| 360 mov r2, r2, asr #18 | |
| 361 strh r2, [lr, #( 6*8)] | |
| 362 strh r2, [lr, #( 8*8)] | |
| 363 | |
| 364 @ End of row loop | |
| 365 add lr, lr, #2 | |
| 366 subs r12, r12, #1 | |
| 367 bne column_loop | |
| 368 | |
| 369 the_end: | |
| 370 @ The end.... | |
| 371 add sp, sp, #4 | |
| 372 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return | |
| 373 | |
| 374 const_array: | |
| 375 .align | |
| 376 .word FIX_0_298631336 | |
| 377 .word FIX_0_541196100 | |
| 378 .word FIX_0_765366865 | |
| 379 .word FIX_1_175875602 | |
| 380 .word FIX_1_501321110 | |
| 381 .word FIX_2_053119869 | |
| 382 .word FIX_3_072711026 | |
| 383 .word FIX_M_0_390180644 | |
| 384 .word FIX_M_0_899976223 | |
| 385 .word FIX_M_1_847759065 | |
| 386 .word FIX_M_1_961570560 | |
| 387 .word FIX_M_2_562915447 | |
| 388 .word FIX_0xFFFF |
