nengel@2: /* nengel@2: C-like prototype : nengel@2: void j_rev_dct_arm(DCTBLOCK data) nengel@2: nengel@2: With DCTBLOCK being a pointer to an array of 64 'signed shorts' nengel@2: nengel@2: Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) nengel@2: nengel@2: Permission is hereby granted, free of charge, to any person obtaining a copy nengel@2: of this software and associated documentation files (the "Software"), to deal nengel@2: in the Software without restriction, including without limitation the rights nengel@2: to use, copy, modify, merge, publish, distribute, sublicense, and/or sell nengel@2: copies of the Software, and to permit persons to whom the Software is nengel@2: furnished to do so, subject to the following conditions: nengel@2: nengel@2: The above copyright notice and this permission notice shall be included in nengel@2: all copies or substantial portions of the Software. nengel@2: nengel@2: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR nengel@2: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, nengel@2: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE nengel@2: COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER nengel@2: IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN nengel@2: CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. nengel@2: nengel@2: */ nengel@2: nengel@2: #include "asm.S" nengel@2: nengel@2: #define FIX_0_298631336 2446 nengel@2: #define FIX_0_541196100 4433 nengel@2: #define FIX_0_765366865 6270 nengel@2: #define FIX_1_175875602 9633 nengel@2: #define FIX_1_501321110 12299 nengel@2: #define FIX_2_053119869 16819 nengel@2: #define FIX_3_072711026 25172 nengel@2: #define FIX_M_0_390180644 -3196 nengel@2: #define FIX_M_0_899976223 -7373 nengel@2: #define FIX_M_1_847759065 -15137 nengel@2: #define FIX_M_1_961570560 -16069 nengel@2: #define FIX_M_2_562915447 -20995 nengel@2: #define FIX_0xFFFF 0xFFFF nengel@2: nengel@2: #define FIX_0_298631336_ID 0 nengel@2: #define FIX_0_541196100_ID 4 nengel@2: #define FIX_0_765366865_ID 8 nengel@2: #define FIX_1_175875602_ID 12 nengel@2: #define FIX_1_501321110_ID 16 nengel@2: #define FIX_2_053119869_ID 20 nengel@2: #define FIX_3_072711026_ID 24 nengel@2: #define FIX_M_0_390180644_ID 28 nengel@2: #define FIX_M_0_899976223_ID 32 nengel@2: #define FIX_M_1_847759065_ID 36 nengel@2: #define FIX_M_1_961570560_ID 40 nengel@2: #define FIX_M_2_562915447_ID 44 nengel@2: #define FIX_0xFFFF_ID 48 nengel@2: .text nengel@2: .align nengel@2: nengel@2: function ff_j_rev_dct_arm, export=1 nengel@2: stmdb sp!, { r4 - r12, lr } @ all callee saved regs nengel@2: nengel@2: sub sp, sp, #4 @ reserve some space on the stack nengel@2: str r0, [ sp ] @ save the DCT pointer to the stack nengel@2: nengel@2: mov lr, r0 @ lr = pointer to the current row nengel@2: mov r12, #8 @ r12 = row-counter nengel@2: adr r11, const_array @ r11 = base pointer to the constants array nengel@2: row_loop: nengel@2: ldrsh r0, [lr, # 0] @ r0 = 'd0' nengel@2: ldrsh r2, [lr, # 2] @ r2 = 'd2' nengel@2: nengel@2: @ Optimization for row that have all items except the first set to 0 nengel@2: @ (this works as the DCTELEMS are always 4-byte aligned) nengel@2: ldr r5, [lr, # 0] nengel@2: ldr r6, [lr, # 4] nengel@2: ldr r3, [lr, # 8] nengel@2: ldr r4, [lr, #12] nengel@2: orr r3, r3, r4 nengel@2: orr r3, r3, r6 nengel@2: orrs r5, r3, r5 nengel@2: beq end_of_row_loop @ nothing to be done as ALL of them are '0' nengel@2: orrs r3, r3, r2 nengel@2: beq empty_row nengel@2: nengel@2: ldrsh r1, [lr, # 8] @ r1 = 'd1' nengel@2: ldrsh r4, [lr, # 4] @ r4 = 'd4' nengel@2: ldrsh r6, [lr, # 6] @ r6 = 'd6' nengel@2: nengel@2: ldr r3, [r11, #FIX_0_541196100_ID] nengel@2: add r7, r2, r6 nengel@2: ldr r5, [r11, #FIX_M_1_847759065_ID] nengel@2: mul r7, r3, r7 @ r7 = z1 nengel@2: ldr r3, [r11, #FIX_0_765366865_ID] nengel@2: mla r6, r5, r6, r7 @ r6 = tmp2 nengel@2: add r5, r0, r4 @ r5 = tmp0 nengel@2: mla r2, r3, r2, r7 @ r2 = tmp3 nengel@2: sub r3, r0, r4 @ r3 = tmp1 nengel@2: nengel@2: add r0, r2, r5, lsl #13 @ r0 = tmp10 nengel@2: rsb r2, r2, r5, lsl #13 @ r2 = tmp13 nengel@2: add r4, r6, r3, lsl #13 @ r4 = tmp11 nengel@2: rsb r3, r6, r3, lsl #13 @ r3 = tmp12 nengel@2: nengel@2: stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 nengel@2: nengel@2: ldrsh r3, [lr, #10] @ r3 = 'd3' nengel@2: ldrsh r5, [lr, #12] @ r5 = 'd5' nengel@2: ldrsh r7, [lr, #14] @ r7 = 'd7' nengel@2: nengel@2: add r0, r3, r5 @ r0 = 'z2' nengel@2: add r2, r1, r7 @ r2 = 'z1' nengel@2: add r4, r3, r7 @ r4 = 'z3' nengel@2: add r6, r1, r5 @ r6 = 'z4' nengel@2: ldr r9, [r11, #FIX_1_175875602_ID] nengel@2: add r8, r4, r6 @ r8 = z3 + z4 nengel@2: ldr r10, [r11, #FIX_M_0_899976223_ID] nengel@2: mul r8, r9, r8 @ r8 = 'z5' nengel@2: ldr r9, [r11, #FIX_M_2_562915447_ID] nengel@2: mul r2, r10, r2 @ r2 = 'z1' nengel@2: ldr r10, [r11, #FIX_M_1_961570560_ID] nengel@2: mul r0, r9, r0 @ r0 = 'z2' nengel@2: ldr r9, [r11, #FIX_M_0_390180644_ID] nengel@2: mla r4, r10, r4, r8 @ r4 = 'z3' nengel@2: ldr r10, [r11, #FIX_0_298631336_ID] nengel@2: mla r6, r9, r6, r8 @ r6 = 'z4' nengel@2: ldr r9, [r11, #FIX_2_053119869_ID] nengel@2: mla r7, r10, r7, r2 @ r7 = tmp0 + z1 nengel@2: ldr r10, [r11, #FIX_3_072711026_ID] nengel@2: mla r5, r9, r5, r0 @ r5 = tmp1 + z2 nengel@2: ldr r9, [r11, #FIX_1_501321110_ID] nengel@2: mla r3, r10, r3, r0 @ r3 = tmp2 + z2 nengel@2: add r7, r7, r4 @ r7 = tmp0 nengel@2: mla r1, r9, r1, r2 @ r1 = tmp3 + z1 nengel@2: add r5, r5, r6 @ r5 = tmp1 nengel@2: add r3, r3, r4 @ r3 = tmp2 nengel@2: add r1, r1, r6 @ r1 = tmp3 nengel@2: nengel@2: ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 nengel@2: @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 nengel@2: nengel@2: @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) nengel@2: add r8, r0, r1 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, # 0] nengel@2: nengel@2: @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) nengel@2: sub r8, r0, r1 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, #14] nengel@2: nengel@2: @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) nengel@2: add r8, r6, r3 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, # 2] nengel@2: nengel@2: @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) nengel@2: sub r8, r6, r3 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, #12] nengel@2: nengel@2: @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) nengel@2: add r8, r4, r5 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, # 4] nengel@2: nengel@2: @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) nengel@2: sub r8, r4, r5 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, #10] nengel@2: nengel@2: @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) nengel@2: add r8, r2, r7 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, # 6] nengel@2: nengel@2: @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) nengel@2: sub r8, r2, r7 nengel@2: add r8, r8, #(1<<10) nengel@2: mov r8, r8, asr #11 nengel@2: strh r8, [lr, # 8] nengel@2: nengel@2: @ End of row loop nengel@2: add lr, lr, #16 nengel@2: subs r12, r12, #1 nengel@2: bne row_loop nengel@2: beq start_column_loop nengel@2: nengel@2: empty_row: nengel@2: ldr r1, [r11, #FIX_0xFFFF_ID] nengel@2: mov r0, r0, lsl #2 nengel@2: and r0, r0, r1 nengel@2: add r0, r0, r0, lsl #16 nengel@2: str r0, [lr, # 0] nengel@2: str r0, [lr, # 4] nengel@2: str r0, [lr, # 8] nengel@2: str r0, [lr, #12] nengel@2: nengel@2: end_of_row_loop: nengel@2: @ End of loop nengel@2: add lr, lr, #16 nengel@2: subs r12, r12, #1 nengel@2: bne row_loop nengel@2: nengel@2: start_column_loop: nengel@2: @ Start of column loop nengel@2: ldr lr, [ sp ] nengel@2: mov r12, #8 nengel@2: column_loop: nengel@2: ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' nengel@2: ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' nengel@2: ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' nengel@2: ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' nengel@2: nengel@2: ldr r3, [r11, #FIX_0_541196100_ID] nengel@2: add r1, r2, r6 nengel@2: ldr r5, [r11, #FIX_M_1_847759065_ID] nengel@2: mul r1, r3, r1 @ r1 = z1 nengel@2: ldr r3, [r11, #FIX_0_765366865_ID] nengel@2: mla r6, r5, r6, r1 @ r6 = tmp2 nengel@2: add r5, r0, r4 @ r5 = tmp0 nengel@2: mla r2, r3, r2, r1 @ r2 = tmp3 nengel@2: sub r3, r0, r4 @ r3 = tmp1 nengel@2: nengel@2: add r0, r2, r5, lsl #13 @ r0 = tmp10 nengel@2: rsb r2, r2, r5, lsl #13 @ r2 = tmp13 nengel@2: add r4, r6, r3, lsl #13 @ r4 = tmp11 nengel@2: rsb r6, r6, r3, lsl #13 @ r6 = tmp12 nengel@2: nengel@2: ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' nengel@2: ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' nengel@2: ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' nengel@2: ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' nengel@2: nengel@2: @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) nengel@2: orr r9, r1, r3 nengel@2: orr r10, r5, r7 nengel@2: orrs r10, r9, r10 nengel@2: beq empty_odd_column nengel@2: nengel@2: stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 nengel@2: nengel@2: add r0, r3, r5 @ r0 = 'z2' nengel@2: add r2, r1, r7 @ r2 = 'z1' nengel@2: add r4, r3, r7 @ r4 = 'z3' nengel@2: add r6, r1, r5 @ r6 = 'z4' nengel@2: ldr r9, [r11, #FIX_1_175875602_ID] nengel@2: add r8, r4, r6 nengel@2: ldr r10, [r11, #FIX_M_0_899976223_ID] nengel@2: mul r8, r9, r8 @ r8 = 'z5' nengel@2: ldr r9, [r11, #FIX_M_2_562915447_ID] nengel@2: mul r2, r10, r2 @ r2 = 'z1' nengel@2: ldr r10, [r11, #FIX_M_1_961570560_ID] nengel@2: mul r0, r9, r0 @ r0 = 'z2' nengel@2: ldr r9, [r11, #FIX_M_0_390180644_ID] nengel@2: mla r4, r10, r4, r8 @ r4 = 'z3' nengel@2: ldr r10, [r11, #FIX_0_298631336_ID] nengel@2: mla r6, r9, r6, r8 @ r6 = 'z4' nengel@2: ldr r9, [r11, #FIX_2_053119869_ID] nengel@2: mla r7, r10, r7, r2 @ r7 = tmp0 + z1 nengel@2: ldr r10, [r11, #FIX_3_072711026_ID] nengel@2: mla r5, r9, r5, r0 @ r5 = tmp1 + z2 nengel@2: ldr r9, [r11, #FIX_1_501321110_ID] nengel@2: mla r3, r10, r3, r0 @ r3 = tmp2 + z2 nengel@2: add r7, r7, r4 @ r7 = tmp0 nengel@2: mla r1, r9, r1, r2 @ r1 = tmp3 + z1 nengel@2: add r5, r5, r6 @ r5 = tmp1 nengel@2: add r3, r3, r4 @ r3 = tmp2 nengel@2: add r1, r1, r6 @ r1 = tmp3 nengel@2: nengel@2: ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 nengel@2: @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 nengel@2: nengel@2: @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) nengel@2: add r8, r0, r1 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #( 0*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) nengel@2: sub r8, r0, r1 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #(14*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) nengel@2: add r8, r4, r3 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #( 2*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) nengel@2: sub r8, r4, r3 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #(12*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) nengel@2: add r8, r6, r5 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #( 4*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) nengel@2: sub r8, r6, r5 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #(10*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) nengel@2: add r8, r2, r7 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #( 6*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) nengel@2: sub r8, r2, r7 nengel@2: add r8, r8, #(1<<17) nengel@2: mov r8, r8, asr #18 nengel@2: strh r8, [lr, #( 8*8)] nengel@2: nengel@2: @ End of row loop nengel@2: add lr, lr, #2 nengel@2: subs r12, r12, #1 nengel@2: bne column_loop nengel@2: beq the_end nengel@2: nengel@2: empty_odd_column: nengel@2: @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) nengel@2: @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) nengel@2: add r0, r0, #(1<<17) nengel@2: mov r0, r0, asr #18 nengel@2: strh r0, [lr, #( 0*8)] nengel@2: strh r0, [lr, #(14*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) nengel@2: @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) nengel@2: add r4, r4, #(1<<17) nengel@2: mov r4, r4, asr #18 nengel@2: strh r4, [lr, #( 2*8)] nengel@2: strh r4, [lr, #(12*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) nengel@2: @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) nengel@2: add r6, r6, #(1<<17) nengel@2: mov r6, r6, asr #18 nengel@2: strh r6, [lr, #( 4*8)] nengel@2: strh r6, [lr, #(10*8)] nengel@2: nengel@2: @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) nengel@2: @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) nengel@2: add r2, r2, #(1<<17) nengel@2: mov r2, r2, asr #18 nengel@2: strh r2, [lr, #( 6*8)] nengel@2: strh r2, [lr, #( 8*8)] nengel@2: nengel@2: @ End of row loop nengel@2: add lr, lr, #2 nengel@2: subs r12, r12, #1 nengel@2: bne column_loop nengel@2: nengel@2: the_end: nengel@2: @ The end.... nengel@2: add sp, sp, #4 nengel@2: ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return nengel@2: nengel@2: const_array: nengel@2: .align nengel@2: .word FIX_0_298631336 nengel@2: .word FIX_0_541196100 nengel@2: .word FIX_0_765366865 nengel@2: .word FIX_1_175875602 nengel@2: .word FIX_1_501321110 nengel@2: .word FIX_2_053119869 nengel@2: .word FIX_3_072711026 nengel@2: .word FIX_M_0_390180644 nengel@2: .word FIX_M_0_899976223 nengel@2: .word FIX_M_1_847759065 nengel@2: .word FIX_M_1_961570560 nengel@2: .word FIX_M_2_562915447 nengel@2: .word FIX_0xFFFF