annotate libavcodec/arm/jrevdct_arm.S @ 9:ea1ba68cf0ed

update to match api changes + add sscc produced source
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Wed, 05 Jun 2013 14:43:26 +0200
parents
children
rev   line source
nengel@2 1 /*
nengel@2 2 C-like prototype :
nengel@2 3 void j_rev_dct_arm(DCTBLOCK data)
nengel@2 4
nengel@2 5 With DCTBLOCK being a pointer to an array of 64 'signed shorts'
nengel@2 6
nengel@2 7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
nengel@2 8
nengel@2 9 Permission is hereby granted, free of charge, to any person obtaining a copy
nengel@2 10 of this software and associated documentation files (the "Software"), to deal
nengel@2 11 in the Software without restriction, including without limitation the rights
nengel@2 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
nengel@2 13 copies of the Software, and to permit persons to whom the Software is
nengel@2 14 furnished to do so, subject to the following conditions:
nengel@2 15
nengel@2 16 The above copyright notice and this permission notice shall be included in
nengel@2 17 all copies or substantial portions of the Software.
nengel@2 18
nengel@2 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
nengel@2 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
nengel@2 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
nengel@2 22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
nengel@2 23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
nengel@2 24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
nengel@2 25
nengel@2 26 */
nengel@2 27
nengel@2 28 #include "asm.S"
nengel@2 29
nengel@2 30 #define FIX_0_298631336 2446
nengel@2 31 #define FIX_0_541196100 4433
nengel@2 32 #define FIX_0_765366865 6270
nengel@2 33 #define FIX_1_175875602 9633
nengel@2 34 #define FIX_1_501321110 12299
nengel@2 35 #define FIX_2_053119869 16819
nengel@2 36 #define FIX_3_072711026 25172
nengel@2 37 #define FIX_M_0_390180644 -3196
nengel@2 38 #define FIX_M_0_899976223 -7373
nengel@2 39 #define FIX_M_1_847759065 -15137
nengel@2 40 #define FIX_M_1_961570560 -16069
nengel@2 41 #define FIX_M_2_562915447 -20995
nengel@2 42 #define FIX_0xFFFF 0xFFFF
nengel@2 43
nengel@2 44 #define FIX_0_298631336_ID 0
nengel@2 45 #define FIX_0_541196100_ID 4
nengel@2 46 #define FIX_0_765366865_ID 8
nengel@2 47 #define FIX_1_175875602_ID 12
nengel@2 48 #define FIX_1_501321110_ID 16
nengel@2 49 #define FIX_2_053119869_ID 20
nengel@2 50 #define FIX_3_072711026_ID 24
nengel@2 51 #define FIX_M_0_390180644_ID 28
nengel@2 52 #define FIX_M_0_899976223_ID 32
nengel@2 53 #define FIX_M_1_847759065_ID 36
nengel@2 54 #define FIX_M_1_961570560_ID 40
nengel@2 55 #define FIX_M_2_562915447_ID 44
nengel@2 56 #define FIX_0xFFFF_ID 48
nengel@2 57 .text
nengel@2 58 .align
nengel@2 59
nengel@2 60 function ff_j_rev_dct_arm, export=1
nengel@2 61 stmdb sp!, { r4 - r12, lr } @ all callee saved regs
nengel@2 62
nengel@2 63 sub sp, sp, #4 @ reserve some space on the stack
nengel@2 64 str r0, [ sp ] @ save the DCT pointer to the stack
nengel@2 65
nengel@2 66 mov lr, r0 @ lr = pointer to the current row
nengel@2 67 mov r12, #8 @ r12 = row-counter
nengel@2 68 adr r11, const_array @ r11 = base pointer to the constants array
nengel@2 69 row_loop:
nengel@2 70 ldrsh r0, [lr, # 0] @ r0 = 'd0'
nengel@2 71 ldrsh r2, [lr, # 2] @ r2 = 'd2'
nengel@2 72
nengel@2 73 @ Optimization for row that have all items except the first set to 0
nengel@2 74 @ (this works as the DCTELEMS are always 4-byte aligned)
nengel@2 75 ldr r5, [lr, # 0]
nengel@2 76 ldr r6, [lr, # 4]
nengel@2 77 ldr r3, [lr, # 8]
nengel@2 78 ldr r4, [lr, #12]
nengel@2 79 orr r3, r3, r4
nengel@2 80 orr r3, r3, r6
nengel@2 81 orrs r5, r3, r5
nengel@2 82 beq end_of_row_loop @ nothing to be done as ALL of them are '0'
nengel@2 83 orrs r3, r3, r2
nengel@2 84 beq empty_row
nengel@2 85
nengel@2 86 ldrsh r1, [lr, # 8] @ r1 = 'd1'
nengel@2 87 ldrsh r4, [lr, # 4] @ r4 = 'd4'
nengel@2 88 ldrsh r6, [lr, # 6] @ r6 = 'd6'
nengel@2 89
nengel@2 90 ldr r3, [r11, #FIX_0_541196100_ID]
nengel@2 91 add r7, r2, r6
nengel@2 92 ldr r5, [r11, #FIX_M_1_847759065_ID]
nengel@2 93 mul r7, r3, r7 @ r7 = z1
nengel@2 94 ldr r3, [r11, #FIX_0_765366865_ID]
nengel@2 95 mla r6, r5, r6, r7 @ r6 = tmp2
nengel@2 96 add r5, r0, r4 @ r5 = tmp0
nengel@2 97 mla r2, r3, r2, r7 @ r2 = tmp3
nengel@2 98 sub r3, r0, r4 @ r3 = tmp1
nengel@2 99
nengel@2 100 add r0, r2, r5, lsl #13 @ r0 = tmp10
nengel@2 101 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
nengel@2 102 add r4, r6, r3, lsl #13 @ r4 = tmp11
nengel@2 103 rsb r3, r6, r3, lsl #13 @ r3 = tmp12
nengel@2 104
nengel@2 105 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
nengel@2 106
nengel@2 107 ldrsh r3, [lr, #10] @ r3 = 'd3'
nengel@2 108 ldrsh r5, [lr, #12] @ r5 = 'd5'
nengel@2 109 ldrsh r7, [lr, #14] @ r7 = 'd7'
nengel@2 110
nengel@2 111 add r0, r3, r5 @ r0 = 'z2'
nengel@2 112 add r2, r1, r7 @ r2 = 'z1'
nengel@2 113 add r4, r3, r7 @ r4 = 'z3'
nengel@2 114 add r6, r1, r5 @ r6 = 'z4'
nengel@2 115 ldr r9, [r11, #FIX_1_175875602_ID]
nengel@2 116 add r8, r4, r6 @ r8 = z3 + z4
nengel@2 117 ldr r10, [r11, #FIX_M_0_899976223_ID]
nengel@2 118 mul r8, r9, r8 @ r8 = 'z5'
nengel@2 119 ldr r9, [r11, #FIX_M_2_562915447_ID]
nengel@2 120 mul r2, r10, r2 @ r2 = 'z1'
nengel@2 121 ldr r10, [r11, #FIX_M_1_961570560_ID]
nengel@2 122 mul r0, r9, r0 @ r0 = 'z2'
nengel@2 123 ldr r9, [r11, #FIX_M_0_390180644_ID]
nengel@2 124 mla r4, r10, r4, r8 @ r4 = 'z3'
nengel@2 125 ldr r10, [r11, #FIX_0_298631336_ID]
nengel@2 126 mla r6, r9, r6, r8 @ r6 = 'z4'
nengel@2 127 ldr r9, [r11, #FIX_2_053119869_ID]
nengel@2 128 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
nengel@2 129 ldr r10, [r11, #FIX_3_072711026_ID]
nengel@2 130 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
nengel@2 131 ldr r9, [r11, #FIX_1_501321110_ID]
nengel@2 132 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
nengel@2 133 add r7, r7, r4 @ r7 = tmp0
nengel@2 134 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
nengel@2 135 add r5, r5, r6 @ r5 = tmp1
nengel@2 136 add r3, r3, r4 @ r3 = tmp2
nengel@2 137 add r1, r1, r6 @ r1 = tmp3
nengel@2 138
nengel@2 139 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
nengel@2 140 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
nengel@2 141
nengel@2 142 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
nengel@2 143 add r8, r0, r1
nengel@2 144 add r8, r8, #(1<<10)
nengel@2 145 mov r8, r8, asr #11
nengel@2 146 strh r8, [lr, # 0]
nengel@2 147
nengel@2 148 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
nengel@2 149 sub r8, r0, r1
nengel@2 150 add r8, r8, #(1<<10)
nengel@2 151 mov r8, r8, asr #11
nengel@2 152 strh r8, [lr, #14]
nengel@2 153
nengel@2 154 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
nengel@2 155 add r8, r6, r3
nengel@2 156 add r8, r8, #(1<<10)
nengel@2 157 mov r8, r8, asr #11
nengel@2 158 strh r8, [lr, # 2]
nengel@2 159
nengel@2 160 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
nengel@2 161 sub r8, r6, r3
nengel@2 162 add r8, r8, #(1<<10)
nengel@2 163 mov r8, r8, asr #11
nengel@2 164 strh r8, [lr, #12]
nengel@2 165
nengel@2 166 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
nengel@2 167 add r8, r4, r5
nengel@2 168 add r8, r8, #(1<<10)
nengel@2 169 mov r8, r8, asr #11
nengel@2 170 strh r8, [lr, # 4]
nengel@2 171
nengel@2 172 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
nengel@2 173 sub r8, r4, r5
nengel@2 174 add r8, r8, #(1<<10)
nengel@2 175 mov r8, r8, asr #11
nengel@2 176 strh r8, [lr, #10]
nengel@2 177
nengel@2 178 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
nengel@2 179 add r8, r2, r7
nengel@2 180 add r8, r8, #(1<<10)
nengel@2 181 mov r8, r8, asr #11
nengel@2 182 strh r8, [lr, # 6]
nengel@2 183
nengel@2 184 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
nengel@2 185 sub r8, r2, r7
nengel@2 186 add r8, r8, #(1<<10)
nengel@2 187 mov r8, r8, asr #11
nengel@2 188 strh r8, [lr, # 8]
nengel@2 189
nengel@2 190 @ End of row loop
nengel@2 191 add lr, lr, #16
nengel@2 192 subs r12, r12, #1
nengel@2 193 bne row_loop
nengel@2 194 beq start_column_loop
nengel@2 195
nengel@2 196 empty_row:
nengel@2 197 ldr r1, [r11, #FIX_0xFFFF_ID]
nengel@2 198 mov r0, r0, lsl #2
nengel@2 199 and r0, r0, r1
nengel@2 200 add r0, r0, r0, lsl #16
nengel@2 201 str r0, [lr, # 0]
nengel@2 202 str r0, [lr, # 4]
nengel@2 203 str r0, [lr, # 8]
nengel@2 204 str r0, [lr, #12]
nengel@2 205
nengel@2 206 end_of_row_loop:
nengel@2 207 @ End of loop
nengel@2 208 add lr, lr, #16
nengel@2 209 subs r12, r12, #1
nengel@2 210 bne row_loop
nengel@2 211
nengel@2 212 start_column_loop:
nengel@2 213 @ Start of column loop
nengel@2 214 ldr lr, [ sp ]
nengel@2 215 mov r12, #8
nengel@2 216 column_loop:
nengel@2 217 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
nengel@2 218 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
nengel@2 219 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
nengel@2 220 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
nengel@2 221
nengel@2 222 ldr r3, [r11, #FIX_0_541196100_ID]
nengel@2 223 add r1, r2, r6
nengel@2 224 ldr r5, [r11, #FIX_M_1_847759065_ID]
nengel@2 225 mul r1, r3, r1 @ r1 = z1
nengel@2 226 ldr r3, [r11, #FIX_0_765366865_ID]
nengel@2 227 mla r6, r5, r6, r1 @ r6 = tmp2
nengel@2 228 add r5, r0, r4 @ r5 = tmp0
nengel@2 229 mla r2, r3, r2, r1 @ r2 = tmp3
nengel@2 230 sub r3, r0, r4 @ r3 = tmp1
nengel@2 231
nengel@2 232 add r0, r2, r5, lsl #13 @ r0 = tmp10
nengel@2 233 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
nengel@2 234 add r4, r6, r3, lsl #13 @ r4 = tmp11
nengel@2 235 rsb r6, r6, r3, lsl #13 @ r6 = tmp12
nengel@2 236
nengel@2 237 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
nengel@2 238 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
nengel@2 239 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
nengel@2 240 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
nengel@2 241
nengel@2 242 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
nengel@2 243 orr r9, r1, r3
nengel@2 244 orr r10, r5, r7
nengel@2 245 orrs r10, r9, r10
nengel@2 246 beq empty_odd_column
nengel@2 247
nengel@2 248 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
nengel@2 249
nengel@2 250 add r0, r3, r5 @ r0 = 'z2'
nengel@2 251 add r2, r1, r7 @ r2 = 'z1'
nengel@2 252 add r4, r3, r7 @ r4 = 'z3'
nengel@2 253 add r6, r1, r5 @ r6 = 'z4'
nengel@2 254 ldr r9, [r11, #FIX_1_175875602_ID]
nengel@2 255 add r8, r4, r6
nengel@2 256 ldr r10, [r11, #FIX_M_0_899976223_ID]
nengel@2 257 mul r8, r9, r8 @ r8 = 'z5'
nengel@2 258 ldr r9, [r11, #FIX_M_2_562915447_ID]
nengel@2 259 mul r2, r10, r2 @ r2 = 'z1'
nengel@2 260 ldr r10, [r11, #FIX_M_1_961570560_ID]
nengel@2 261 mul r0, r9, r0 @ r0 = 'z2'
nengel@2 262 ldr r9, [r11, #FIX_M_0_390180644_ID]
nengel@2 263 mla r4, r10, r4, r8 @ r4 = 'z3'
nengel@2 264 ldr r10, [r11, #FIX_0_298631336_ID]
nengel@2 265 mla r6, r9, r6, r8 @ r6 = 'z4'
nengel@2 266 ldr r9, [r11, #FIX_2_053119869_ID]
nengel@2 267 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
nengel@2 268 ldr r10, [r11, #FIX_3_072711026_ID]
nengel@2 269 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
nengel@2 270 ldr r9, [r11, #FIX_1_501321110_ID]
nengel@2 271 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
nengel@2 272 add r7, r7, r4 @ r7 = tmp0
nengel@2 273 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
nengel@2 274 add r5, r5, r6 @ r5 = tmp1
nengel@2 275 add r3, r3, r4 @ r3 = tmp2
nengel@2 276 add r1, r1, r6 @ r1 = tmp3
nengel@2 277
nengel@2 278 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
nengel@2 279 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
nengel@2 280
nengel@2 281 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
nengel@2 282 add r8, r0, r1
nengel@2 283 add r8, r8, #(1<<17)
nengel@2 284 mov r8, r8, asr #18
nengel@2 285 strh r8, [lr, #( 0*8)]
nengel@2 286
nengel@2 287 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
nengel@2 288 sub r8, r0, r1
nengel@2 289 add r8, r8, #(1<<17)
nengel@2 290 mov r8, r8, asr #18
nengel@2 291 strh r8, [lr, #(14*8)]
nengel@2 292
nengel@2 293 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
nengel@2 294 add r8, r4, r3
nengel@2 295 add r8, r8, #(1<<17)
nengel@2 296 mov r8, r8, asr #18
nengel@2 297 strh r8, [lr, #( 2*8)]
nengel@2 298
nengel@2 299 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
nengel@2 300 sub r8, r4, r3
nengel@2 301 add r8, r8, #(1<<17)
nengel@2 302 mov r8, r8, asr #18
nengel@2 303 strh r8, [lr, #(12*8)]
nengel@2 304
nengel@2 305 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
nengel@2 306 add r8, r6, r5
nengel@2 307 add r8, r8, #(1<<17)
nengel@2 308 mov r8, r8, asr #18
nengel@2 309 strh r8, [lr, #( 4*8)]
nengel@2 310
nengel@2 311 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
nengel@2 312 sub r8, r6, r5
nengel@2 313 add r8, r8, #(1<<17)
nengel@2 314 mov r8, r8, asr #18
nengel@2 315 strh r8, [lr, #(10*8)]
nengel@2 316
nengel@2 317 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
nengel@2 318 add r8, r2, r7
nengel@2 319 add r8, r8, #(1<<17)
nengel@2 320 mov r8, r8, asr #18
nengel@2 321 strh r8, [lr, #( 6*8)]
nengel@2 322
nengel@2 323 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
nengel@2 324 sub r8, r2, r7
nengel@2 325 add r8, r8, #(1<<17)
nengel@2 326 mov r8, r8, asr #18
nengel@2 327 strh r8, [lr, #( 8*8)]
nengel@2 328
nengel@2 329 @ End of row loop
nengel@2 330 add lr, lr, #2
nengel@2 331 subs r12, r12, #1
nengel@2 332 bne column_loop
nengel@2 333 beq the_end
nengel@2 334
nengel@2 335 empty_odd_column:
nengel@2 336 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
nengel@2 337 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
nengel@2 338 add r0, r0, #(1<<17)
nengel@2 339 mov r0, r0, asr #18
nengel@2 340 strh r0, [lr, #( 0*8)]
nengel@2 341 strh r0, [lr, #(14*8)]
nengel@2 342
nengel@2 343 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
nengel@2 344 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
nengel@2 345 add r4, r4, #(1<<17)
nengel@2 346 mov r4, r4, asr #18
nengel@2 347 strh r4, [lr, #( 2*8)]
nengel@2 348 strh r4, [lr, #(12*8)]
nengel@2 349
nengel@2 350 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
nengel@2 351 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
nengel@2 352 add r6, r6, #(1<<17)
nengel@2 353 mov r6, r6, asr #18
nengel@2 354 strh r6, [lr, #( 4*8)]
nengel@2 355 strh r6, [lr, #(10*8)]
nengel@2 356
nengel@2 357 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
nengel@2 358 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
nengel@2 359 add r2, r2, #(1<<17)
nengel@2 360 mov r2, r2, asr #18
nengel@2 361 strh r2, [lr, #( 6*8)]
nengel@2 362 strh r2, [lr, #( 8*8)]
nengel@2 363
nengel@2 364 @ End of row loop
nengel@2 365 add lr, lr, #2
nengel@2 366 subs r12, r12, #1
nengel@2 367 bne column_loop
nengel@2 368
nengel@2 369 the_end:
nengel@2 370 @ The end....
nengel@2 371 add sp, sp, #4
nengel@2 372 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
nengel@2 373
nengel@2 374 const_array:
nengel@2 375 .align
nengel@2 376 .word FIX_0_298631336
nengel@2 377 .word FIX_0_541196100
nengel@2 378 .word FIX_0_765366865
nengel@2 379 .word FIX_1_175875602
nengel@2 380 .word FIX_1_501321110
nengel@2 381 .word FIX_2_053119869
nengel@2 382 .word FIX_3_072711026
nengel@2 383 .word FIX_M_0_390180644
nengel@2 384 .word FIX_M_0_899976223
nengel@2 385 .word FIX_M_1_847759065
nengel@2 386 .word FIX_M_1_961570560
nengel@2 387 .word FIX_M_2_562915447
nengel@2 388 .word FIX_0xFFFF