annotate libavcodec/arm/h264idct_neon.S @ 6:55fb61482128

VSs working
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Wed, 06 Mar 2013 14:35:39 +0100
parents
children
rev   line source
nengel@2 1 /*
nengel@2 2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
nengel@2 3 *
nengel@2 4 * This file is part of FFmpeg.
nengel@2 5 *
nengel@2 6 * FFmpeg is free software; you can redistribute it and/or
nengel@2 7 * modify it under the terms of the GNU Lesser General Public
nengel@2 8 * License as published by the Free Software Foundation; either
nengel@2 9 * version 2.1 of the License, or (at your option) any later version.
nengel@2 10 *
nengel@2 11 * FFmpeg is distributed in the hope that it will be useful,
nengel@2 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
nengel@2 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
nengel@2 14 * Lesser General Public License for more details.
nengel@2 15 *
nengel@2 16 * You should have received a copy of the GNU Lesser General Public
nengel@2 17 * License along with FFmpeg; if not, write to the Free Software
nengel@2 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
nengel@2 19 */
nengel@2 20
nengel@2 21 #include "asm.S"
nengel@2 22
nengel@2 23 preserve8
nengel@2 24 .text
nengel@2 25
nengel@2 26 function ff_h264_idct_add_neon, export=1
nengel@2 27 vld1.64 {d0-d3}, [r1,:128]
nengel@2 28
nengel@2 29 vswp d1, d2
nengel@2 30 vadd.i16 d4, d0, d1
nengel@2 31 vshr.s16 q8, q1, #1
nengel@2 32 vsub.i16 d5, d0, d1
nengel@2 33 vadd.i16 d6, d2, d17
nengel@2 34 vsub.i16 d7, d16, d3
nengel@2 35 vadd.i16 q0, q2, q3
nengel@2 36 vsub.i16 q1, q2, q3
nengel@2 37
nengel@2 38 vtrn.16 d0, d1
nengel@2 39 vtrn.16 d3, d2
nengel@2 40 vtrn.32 d0, d3
nengel@2 41 vtrn.32 d1, d2
nengel@2 42
nengel@2 43 vadd.i16 d4, d0, d3
nengel@2 44 vld1.32 {d18[0]}, [r0,:32], r2
nengel@2 45 vswp d1, d3
nengel@2 46 vshr.s16 q8, q1, #1
nengel@2 47 vld1.32 {d19[1]}, [r0,:32], r2
nengel@2 48 vsub.i16 d5, d0, d1
nengel@2 49 vld1.32 {d18[1]}, [r0,:32], r2
nengel@2 50 vadd.i16 d6, d16, d3
nengel@2 51 vld1.32 {d19[0]}, [r0,:32], r2
nengel@2 52 vsub.i16 d7, d2, d17
nengel@2 53 sub r0, r0, r2, lsl #2
nengel@2 54 vadd.i16 q0, q2, q3
nengel@2 55 vsub.i16 q1, q2, q3
nengel@2 56
nengel@2 57 vrshr.s16 q0, q0, #6
nengel@2 58 vrshr.s16 q1, q1, #6
nengel@2 59
nengel@2 60 vaddw.u8 q0, q0, d18
nengel@2 61 vaddw.u8 q1, q1, d19
nengel@2 62
nengel@2 63 vqmovun.s16 d0, q0
nengel@2 64 vqmovun.s16 d1, q1
nengel@2 65
nengel@2 66 vst1.32 {d0[0]}, [r0,:32], r2
nengel@2 67 vst1.32 {d1[1]}, [r0,:32], r2
nengel@2 68 vst1.32 {d0[1]}, [r0,:32], r2
nengel@2 69 vst1.32 {d1[0]}, [r0,:32], r2
nengel@2 70
nengel@2 71 bx lr
nengel@2 72 endfunc
nengel@2 73
nengel@2 74 function ff_h264_idct_dc_add_neon, export=1
nengel@2 75 vld1.16 {d2[],d3[]}, [r1,:16]
nengel@2 76 vrshr.s16 q1, q1, #6
nengel@2 77 vld1.32 {d0[0]}, [r0,:32], r2
nengel@2 78 vld1.32 {d0[1]}, [r0,:32], r2
nengel@2 79 vaddw.u8 q2, q1, d0
nengel@2 80 vld1.32 {d1[0]}, [r0,:32], r2
nengel@2 81 vld1.32 {d1[1]}, [r0,:32], r2
nengel@2 82 vaddw.u8 q1, q1, d1
nengel@2 83 vqmovun.s16 d0, q2
nengel@2 84 vqmovun.s16 d1, q1
nengel@2 85 sub r0, r0, r2, lsl #2
nengel@2 86 vst1.32 {d0[0]}, [r0,:32], r2
nengel@2 87 vst1.32 {d0[1]}, [r0,:32], r2
nengel@2 88 vst1.32 {d1[0]}, [r0,:32], r2
nengel@2 89 vst1.32 {d1[1]}, [r0,:32], r2
nengel@2 90 bx lr
nengel@2 91 endfunc
nengel@2 92
nengel@2 93 function ff_h264_idct_add16_neon, export=1
nengel@2 94 push {r4-r8,lr}
nengel@2 95 mov r4, r0
nengel@2 96 mov r5, r1
nengel@2 97 mov r1, r2
nengel@2 98 mov r2, r3
nengel@2 99 ldr r6, [sp, #24]
nengel@2 100 movrel r7, scan8
nengel@2 101 mov ip, #16
nengel@2 102 1: ldrb r8, [r7], #1
nengel@2 103 ldr r0, [r5], #4
nengel@2 104 ldrb r8, [r6, r8]
nengel@2 105 subs r8, r8, #1
nengel@2 106 blt 2f
nengel@2 107 ldrsh lr, [r1]
nengel@2 108 add r0, r0, r4
nengel@2 109 movne lr, #0
nengel@2 110 cmp lr, #0
nengel@2 111 adrne lr, ff_h264_idct_dc_add_neon
nengel@2 112 adreq lr, ff_h264_idct_add_neon
nengel@2 113 blx lr
nengel@2 114 2: subs ip, ip, #1
nengel@2 115 add r1, r1, #32
nengel@2 116 bne 1b
nengel@2 117 pop {r4-r8,pc}
nengel@2 118 endfunc
nengel@2 119
nengel@2 120 function ff_h264_idct_add16intra_neon, export=1
nengel@2 121 push {r4-r8,lr}
nengel@2 122 mov r4, r0
nengel@2 123 mov r5, r1
nengel@2 124 mov r1, r2
nengel@2 125 mov r2, r3
nengel@2 126 ldr r6, [sp, #24]
nengel@2 127 movrel r7, scan8
nengel@2 128 mov ip, #16
nengel@2 129 1: ldrb r8, [r7], #1
nengel@2 130 ldr r0, [r5], #4
nengel@2 131 ldrb r8, [r6, r8]
nengel@2 132 add r0, r0, r4
nengel@2 133 cmp r8, #0
nengel@2 134 ldrsh r8, [r1]
nengel@2 135 adrne lr, ff_h264_idct_add_neon
nengel@2 136 adreq lr, ff_h264_idct_dc_add_neon
nengel@2 137 cmpeq r8, #0
nengel@2 138 blxne lr
nengel@2 139 subs ip, ip, #1
nengel@2 140 add r1, r1, #32
nengel@2 141 bne 1b
nengel@2 142 pop {r4-r8,pc}
nengel@2 143 endfunc
nengel@2 144
nengel@2 145 function ff_h264_idct_add8_neon, export=1
nengel@2 146 push {r4-r10,lr}
nengel@2 147 ldm r0, {r4,r9}
nengel@2 148 add r5, r1, #16*4
nengel@2 149 add r1, r2, #16*32
nengel@2 150 mov r2, r3
nengel@2 151 ldr r6, [sp, #32]
nengel@2 152 movrel r7, scan8+16
nengel@2 153 mov ip, #8
nengel@2 154 1: ldrb r8, [r7], #1
nengel@2 155 ldr r0, [r5], #4
nengel@2 156 ldrb r8, [r6, r8]
nengel@2 157 tst ip, #4
nengel@2 158 addeq r0, r0, r4
nengel@2 159 addne r0, r0, r9
nengel@2 160 cmp r8, #0
nengel@2 161 ldrsh r8, [r1]
nengel@2 162 adrne lr, ff_h264_idct_add_neon
nengel@2 163 adreq lr, ff_h264_idct_dc_add_neon
nengel@2 164 cmpeq r8, #0
nengel@2 165 blxne lr
nengel@2 166 subs ip, ip, #1
nengel@2 167 add r1, r1, #32
nengel@2 168 bne 1b
nengel@2 169 pop {r4-r10,pc}
nengel@2 170 endfunc
nengel@2 171
nengel@2 172 .section .rodata
nengel@2 173 scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
nengel@2 174 .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
nengel@2 175 .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
nengel@2 176 .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
nengel@2 177 .byte 1+1*8, 2+1*8
nengel@2 178 .byte 1+2*8, 2+2*8
nengel@2 179 .byte 1+4*8, 2+4*8
nengel@2 180 .byte 1+5*8, 2+5*8