Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/arm/jrevdct_arm.S @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/arm/jrevdct_arm.S Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,388 @@ 1.4 +/* 1.5 + C-like prototype : 1.6 + void j_rev_dct_arm(DCTBLOCK data) 1.7 + 1.8 + With DCTBLOCK being a pointer to an array of 64 'signed shorts' 1.9 + 1.10 + Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) 1.11 + 1.12 + Permission is hereby granted, free of charge, to any person obtaining a copy 1.13 + of this software and associated documentation files (the "Software"), to deal 1.14 + in the Software without restriction, including without limitation the rights 1.15 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 1.16 + copies of the Software, and to permit persons to whom the Software is 1.17 + furnished to do so, subject to the following conditions: 1.18 + 1.19 + The above copyright notice and this permission notice shall be included in 1.20 + all copies or substantial portions of the Software. 1.21 + 1.22 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1.23 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1.24 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1.25 + COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 1.26 + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 1.27 + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 1.28 + 1.29 +*/ 1.30 + 1.31 +#include "asm.S" 1.32 + 1.33 +#define FIX_0_298631336 2446 1.34 +#define FIX_0_541196100 4433 1.35 +#define FIX_0_765366865 6270 1.36 +#define FIX_1_175875602 9633 1.37 +#define FIX_1_501321110 12299 1.38 +#define FIX_2_053119869 16819 1.39 +#define FIX_3_072711026 25172 1.40 +#define FIX_M_0_390180644 -3196 1.41 +#define FIX_M_0_899976223 -7373 1.42 +#define FIX_M_1_847759065 -15137 1.43 +#define FIX_M_1_961570560 -16069 1.44 +#define FIX_M_2_562915447 -20995 1.45 +#define FIX_0xFFFF 0xFFFF 1.46 + 1.47 +#define FIX_0_298631336_ID 0 1.48 +#define FIX_0_541196100_ID 4 1.49 +#define FIX_0_765366865_ID 8 1.50 +#define FIX_1_175875602_ID 12 1.51 +#define FIX_1_501321110_ID 16 1.52 +#define FIX_2_053119869_ID 20 1.53 +#define FIX_3_072711026_ID 24 1.54 +#define FIX_M_0_390180644_ID 28 1.55 +#define FIX_M_0_899976223_ID 32 1.56 +#define FIX_M_1_847759065_ID 36 1.57 +#define FIX_M_1_961570560_ID 40 1.58 +#define FIX_M_2_562915447_ID 44 1.59 +#define FIX_0xFFFF_ID 48 1.60 + .text 1.61 + .align 1.62 + 1.63 +function ff_j_rev_dct_arm, export=1 1.64 + stmdb sp!, { r4 - r12, lr } @ all callee saved regs 1.65 + 1.66 + sub sp, sp, #4 @ reserve some space on the stack 1.67 + str r0, [ sp ] @ save the DCT pointer to the stack 1.68 + 1.69 + mov lr, r0 @ lr = pointer to the current row 1.70 + mov r12, #8 @ r12 = row-counter 1.71 + adr r11, const_array @ r11 = base pointer to the constants array 1.72 +row_loop: 1.73 + ldrsh r0, [lr, # 0] @ r0 = 'd0' 1.74 + ldrsh r2, [lr, # 2] @ r2 = 'd2' 1.75 + 1.76 + @ Optimization for row that have all items except the first set to 0 1.77 + @ (this works as the DCTELEMS are always 4-byte aligned) 1.78 + ldr r5, [lr, # 0] 1.79 + ldr r6, [lr, # 4] 1.80 + ldr r3, [lr, # 8] 1.81 + ldr r4, [lr, #12] 1.82 + orr r3, r3, r4 1.83 + orr r3, r3, r6 1.84 + orrs r5, r3, r5 1.85 + beq end_of_row_loop @ nothing to be done as ALL of them are '0' 1.86 + orrs r3, r3, r2 1.87 + beq empty_row 1.88 + 1.89 + ldrsh r1, [lr, # 8] @ r1 = 'd1' 1.90 + ldrsh r4, [lr, # 4] @ r4 = 'd4' 1.91 + ldrsh r6, [lr, # 6] @ r6 = 'd6' 1.92 + 1.93 + ldr r3, [r11, #FIX_0_541196100_ID] 1.94 + add r7, r2, r6 1.95 + ldr r5, [r11, #FIX_M_1_847759065_ID] 1.96 + mul r7, r3, r7 @ r7 = z1 1.97 + ldr r3, [r11, #FIX_0_765366865_ID] 1.98 + mla r6, r5, r6, r7 @ r6 = tmp2 1.99 + add r5, r0, r4 @ r5 = tmp0 1.100 + mla r2, r3, r2, r7 @ r2 = tmp3 1.101 + sub r3, r0, r4 @ r3 = tmp1 1.102 + 1.103 + add r0, r2, r5, lsl #13 @ r0 = tmp10 1.104 + rsb r2, r2, r5, lsl #13 @ r2 = tmp13 1.105 + add r4, r6, r3, lsl #13 @ r4 = tmp11 1.106 + rsb r3, r6, r3, lsl #13 @ r3 = tmp12 1.107 + 1.108 + stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 1.109 + 1.110 + ldrsh r3, [lr, #10] @ r3 = 'd3' 1.111 + ldrsh r5, [lr, #12] @ r5 = 'd5' 1.112 + ldrsh r7, [lr, #14] @ r7 = 'd7' 1.113 + 1.114 + add r0, r3, r5 @ r0 = 'z2' 1.115 + add r2, r1, r7 @ r2 = 'z1' 1.116 + add r4, r3, r7 @ r4 = 'z3' 1.117 + add r6, r1, r5 @ r6 = 'z4' 1.118 + ldr r9, [r11, #FIX_1_175875602_ID] 1.119 + add r8, r4, r6 @ r8 = z3 + z4 1.120 + ldr r10, [r11, #FIX_M_0_899976223_ID] 1.121 + mul r8, r9, r8 @ r8 = 'z5' 1.122 + ldr r9, [r11, #FIX_M_2_562915447_ID] 1.123 + mul r2, r10, r2 @ r2 = 'z1' 1.124 + ldr r10, [r11, #FIX_M_1_961570560_ID] 1.125 + mul r0, r9, r0 @ r0 = 'z2' 1.126 + ldr r9, [r11, #FIX_M_0_390180644_ID] 1.127 + mla r4, r10, r4, r8 @ r4 = 'z3' 1.128 + ldr r10, [r11, #FIX_0_298631336_ID] 1.129 + mla r6, r9, r6, r8 @ r6 = 'z4' 1.130 + ldr r9, [r11, #FIX_2_053119869_ID] 1.131 + mla r7, r10, r7, r2 @ r7 = tmp0 + z1 1.132 + ldr r10, [r11, #FIX_3_072711026_ID] 1.133 + mla r5, r9, r5, r0 @ r5 = tmp1 + z2 1.134 + ldr r9, [r11, #FIX_1_501321110_ID] 1.135 + mla r3, r10, r3, r0 @ r3 = tmp2 + z2 1.136 + add r7, r7, r4 @ r7 = tmp0 1.137 + mla r1, r9, r1, r2 @ r1 = tmp3 + z1 1.138 + add r5, r5, r6 @ r5 = tmp1 1.139 + add r3, r3, r4 @ r3 = tmp2 1.140 + add r1, r1, r6 @ r1 = tmp3 1.141 + 1.142 + ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 1.143 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 1.144 + 1.145 + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) 1.146 + add r8, r0, r1 1.147 + add r8, r8, #(1<<10) 1.148 + mov r8, r8, asr #11 1.149 + strh r8, [lr, # 0] 1.150 + 1.151 + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) 1.152 + sub r8, r0, r1 1.153 + add r8, r8, #(1<<10) 1.154 + mov r8, r8, asr #11 1.155 + strh r8, [lr, #14] 1.156 + 1.157 + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) 1.158 + add r8, r6, r3 1.159 + add r8, r8, #(1<<10) 1.160 + mov r8, r8, asr #11 1.161 + strh r8, [lr, # 2] 1.162 + 1.163 + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) 1.164 + sub r8, r6, r3 1.165 + add r8, r8, #(1<<10) 1.166 + mov r8, r8, asr #11 1.167 + strh r8, [lr, #12] 1.168 + 1.169 + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) 1.170 + add r8, r4, r5 1.171 + add r8, r8, #(1<<10) 1.172 + mov r8, r8, asr #11 1.173 + strh r8, [lr, # 4] 1.174 + 1.175 + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) 1.176 + sub r8, r4, r5 1.177 + add r8, r8, #(1<<10) 1.178 + mov r8, r8, asr #11 1.179 + strh r8, [lr, #10] 1.180 + 1.181 + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) 1.182 + add r8, r2, r7 1.183 + add r8, r8, #(1<<10) 1.184 + mov r8, r8, asr #11 1.185 + strh r8, [lr, # 6] 1.186 + 1.187 + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) 1.188 + sub r8, r2, r7 1.189 + add r8, r8, #(1<<10) 1.190 + mov r8, r8, asr #11 1.191 + strh r8, [lr, # 8] 1.192 + 1.193 + @ End of row loop 1.194 + add lr, lr, #16 1.195 + subs r12, r12, #1 1.196 + bne row_loop 1.197 + beq start_column_loop 1.198 + 1.199 +empty_row: 1.200 + ldr r1, [r11, #FIX_0xFFFF_ID] 1.201 + mov r0, r0, lsl #2 1.202 + and r0, r0, r1 1.203 + add r0, r0, r0, lsl #16 1.204 + str r0, [lr, # 0] 1.205 + str r0, [lr, # 4] 1.206 + str r0, [lr, # 8] 1.207 + str r0, [lr, #12] 1.208 + 1.209 +end_of_row_loop: 1.210 + @ End of loop 1.211 + add lr, lr, #16 1.212 + subs r12, r12, #1 1.213 + bne row_loop 1.214 + 1.215 +start_column_loop: 1.216 + @ Start of column loop 1.217 + ldr lr, [ sp ] 1.218 + mov r12, #8 1.219 +column_loop: 1.220 + ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' 1.221 + ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' 1.222 + ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' 1.223 + ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' 1.224 + 1.225 + ldr r3, [r11, #FIX_0_541196100_ID] 1.226 + add r1, r2, r6 1.227 + ldr r5, [r11, #FIX_M_1_847759065_ID] 1.228 + mul r1, r3, r1 @ r1 = z1 1.229 + ldr r3, [r11, #FIX_0_765366865_ID] 1.230 + mla r6, r5, r6, r1 @ r6 = tmp2 1.231 + add r5, r0, r4 @ r5 = tmp0 1.232 + mla r2, r3, r2, r1 @ r2 = tmp3 1.233 + sub r3, r0, r4 @ r3 = tmp1 1.234 + 1.235 + add r0, r2, r5, lsl #13 @ r0 = tmp10 1.236 + rsb r2, r2, r5, lsl #13 @ r2 = tmp13 1.237 + add r4, r6, r3, lsl #13 @ r4 = tmp11 1.238 + rsb r6, r6, r3, lsl #13 @ r6 = tmp12 1.239 + 1.240 + ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' 1.241 + ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' 1.242 + ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' 1.243 + ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' 1.244 + 1.245 + @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) 1.246 + orr r9, r1, r3 1.247 + orr r10, r5, r7 1.248 + orrs r10, r9, r10 1.249 + beq empty_odd_column 1.250 + 1.251 + stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 1.252 + 1.253 + add r0, r3, r5 @ r0 = 'z2' 1.254 + add r2, r1, r7 @ r2 = 'z1' 1.255 + add r4, r3, r7 @ r4 = 'z3' 1.256 + add r6, r1, r5 @ r6 = 'z4' 1.257 + ldr r9, [r11, #FIX_1_175875602_ID] 1.258 + add r8, r4, r6 1.259 + ldr r10, [r11, #FIX_M_0_899976223_ID] 1.260 + mul r8, r9, r8 @ r8 = 'z5' 1.261 + ldr r9, [r11, #FIX_M_2_562915447_ID] 1.262 + mul r2, r10, r2 @ r2 = 'z1' 1.263 + ldr r10, [r11, #FIX_M_1_961570560_ID] 1.264 + mul r0, r9, r0 @ r0 = 'z2' 1.265 + ldr r9, [r11, #FIX_M_0_390180644_ID] 1.266 + mla r4, r10, r4, r8 @ r4 = 'z3' 1.267 + ldr r10, [r11, #FIX_0_298631336_ID] 1.268 + mla r6, r9, r6, r8 @ r6 = 'z4' 1.269 + ldr r9, [r11, #FIX_2_053119869_ID] 1.270 + mla r7, r10, r7, r2 @ r7 = tmp0 + z1 1.271 + ldr r10, [r11, #FIX_3_072711026_ID] 1.272 + mla r5, r9, r5, r0 @ r5 = tmp1 + z2 1.273 + ldr r9, [r11, #FIX_1_501321110_ID] 1.274 + mla r3, r10, r3, r0 @ r3 = tmp2 + z2 1.275 + add r7, r7, r4 @ r7 = tmp0 1.276 + mla r1, r9, r1, r2 @ r1 = tmp3 + z1 1.277 + add r5, r5, r6 @ r5 = tmp1 1.278 + add r3, r3, r4 @ r3 = tmp2 1.279 + add r1, r1, r6 @ r1 = tmp3 1.280 + 1.281 + ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 1.282 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 1.283 + 1.284 + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 1.285 + add r8, r0, r1 1.286 + add r8, r8, #(1<<17) 1.287 + mov r8, r8, asr #18 1.288 + strh r8, [lr, #( 0*8)] 1.289 + 1.290 + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 1.291 + sub r8, r0, r1 1.292 + add r8, r8, #(1<<17) 1.293 + mov r8, r8, asr #18 1.294 + strh r8, [lr, #(14*8)] 1.295 + 1.296 + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 1.297 + add r8, r4, r3 1.298 + add r8, r8, #(1<<17) 1.299 + mov r8, r8, asr #18 1.300 + strh r8, [lr, #( 2*8)] 1.301 + 1.302 + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 1.303 + sub r8, r4, r3 1.304 + add r8, r8, #(1<<17) 1.305 + mov r8, r8, asr #18 1.306 + strh r8, [lr, #(12*8)] 1.307 + 1.308 + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 1.309 + add r8, r6, r5 1.310 + add r8, r8, #(1<<17) 1.311 + mov r8, r8, asr #18 1.312 + strh r8, [lr, #( 4*8)] 1.313 + 1.314 + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 1.315 + sub r8, r6, r5 1.316 + add r8, r8, #(1<<17) 1.317 + mov r8, r8, asr #18 1.318 + strh r8, [lr, #(10*8)] 1.319 + 1.320 + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 1.321 + add r8, r2, r7 1.322 + add r8, r8, #(1<<17) 1.323 + mov r8, r8, asr #18 1.324 + strh r8, [lr, #( 6*8)] 1.325 + 1.326 + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 1.327 + sub r8, r2, r7 1.328 + add r8, r8, #(1<<17) 1.329 + mov r8, r8, asr #18 1.330 + strh r8, [lr, #( 8*8)] 1.331 + 1.332 + @ End of row loop 1.333 + add lr, lr, #2 1.334 + subs r12, r12, #1 1.335 + bne column_loop 1.336 + beq the_end 1.337 + 1.338 +empty_odd_column: 1.339 + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 1.340 + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 1.341 + add r0, r0, #(1<<17) 1.342 + mov r0, r0, asr #18 1.343 + strh r0, [lr, #( 0*8)] 1.344 + strh r0, [lr, #(14*8)] 1.345 + 1.346 + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 1.347 + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 1.348 + add r4, r4, #(1<<17) 1.349 + mov r4, r4, asr #18 1.350 + strh r4, [lr, #( 2*8)] 1.351 + strh r4, [lr, #(12*8)] 1.352 + 1.353 + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 1.354 + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 1.355 + add r6, r6, #(1<<17) 1.356 + mov r6, r6, asr #18 1.357 + strh r6, [lr, #( 4*8)] 1.358 + strh r6, [lr, #(10*8)] 1.359 + 1.360 + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 1.361 + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 1.362 + add r2, r2, #(1<<17) 1.363 + mov r2, r2, asr #18 1.364 + strh r2, [lr, #( 6*8)] 1.365 + strh r2, [lr, #( 8*8)] 1.366 + 1.367 + @ End of row loop 1.368 + add lr, lr, #2 1.369 + subs r12, r12, #1 1.370 + bne column_loop 1.371 + 1.372 +the_end: 1.373 + @ The end.... 1.374 + add sp, sp, #4 1.375 + ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return 1.376 + 1.377 +const_array: 1.378 + .align 1.379 + .word FIX_0_298631336 1.380 + .word FIX_0_541196100 1.381 + .word FIX_0_765366865 1.382 + .word FIX_1_175875602 1.383 + .word FIX_1_501321110 1.384 + .word FIX_2_053119869 1.385 + .word FIX_3_072711026 1.386 + .word FIX_M_0_390180644 1.387 + .word FIX_M_0_899976223 1.388 + .word FIX_M_1_847759065 1.389 + .word FIX_M_1_961570560 1.390 + .word FIX_M_2_562915447 1.391 + .word FIX_0xFFFF
