| rev |
line source |
|
nengel@2
|
1 /*
|
|
nengel@2
|
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
|
nengel@2
|
3 *
|
|
nengel@2
|
4 * This file is part of FFmpeg.
|
|
nengel@2
|
5 *
|
|
nengel@2
|
6 * FFmpeg is free software; you can redistribute it and/or
|
|
nengel@2
|
7 * modify it under the terms of the GNU Lesser General Public
|
|
nengel@2
|
8 * License as published by the Free Software Foundation; either
|
|
nengel@2
|
9 * version 2.1 of the License, or (at your option) any later version.
|
|
nengel@2
|
10 *
|
|
nengel@2
|
11 * FFmpeg is distributed in the hope that it will be useful,
|
|
nengel@2
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
nengel@2
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
nengel@2
|
14 * Lesser General Public License for more details.
|
|
nengel@2
|
15 *
|
|
nengel@2
|
16 * You should have received a copy of the GNU Lesser General Public
|
|
nengel@2
|
17 * License along with FFmpeg; if not, write to the Free Software
|
|
nengel@2
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
nengel@2
|
19 */
|
|
nengel@2
|
20
|
|
nengel@2
|
21 #include "asm.S"
|
|
nengel@2
|
22
|
|
nengel@2
|
23 preserve8
|
|
nengel@2
|
24 .text
|
|
nengel@2
|
25
|
|
nengel@2
|
26 function ff_h264_idct_add_neon, export=1
|
|
nengel@2
|
27 vld1.64 {d0-d3}, [r1,:128]
|
|
nengel@2
|
28
|
|
nengel@2
|
29 vswp d1, d2
|
|
nengel@2
|
30 vadd.i16 d4, d0, d1
|
|
nengel@2
|
31 vshr.s16 q8, q1, #1
|
|
nengel@2
|
32 vsub.i16 d5, d0, d1
|
|
nengel@2
|
33 vadd.i16 d6, d2, d17
|
|
nengel@2
|
34 vsub.i16 d7, d16, d3
|
|
nengel@2
|
35 vadd.i16 q0, q2, q3
|
|
nengel@2
|
36 vsub.i16 q1, q2, q3
|
|
nengel@2
|
37
|
|
nengel@2
|
38 vtrn.16 d0, d1
|
|
nengel@2
|
39 vtrn.16 d3, d2
|
|
nengel@2
|
40 vtrn.32 d0, d3
|
|
nengel@2
|
41 vtrn.32 d1, d2
|
|
nengel@2
|
42
|
|
nengel@2
|
43 vadd.i16 d4, d0, d3
|
|
nengel@2
|
44 vld1.32 {d18[0]}, [r0,:32], r2
|
|
nengel@2
|
45 vswp d1, d3
|
|
nengel@2
|
46 vshr.s16 q8, q1, #1
|
|
nengel@2
|
47 vld1.32 {d19[1]}, [r0,:32], r2
|
|
nengel@2
|
48 vsub.i16 d5, d0, d1
|
|
nengel@2
|
49 vld1.32 {d18[1]}, [r0,:32], r2
|
|
nengel@2
|
50 vadd.i16 d6, d16, d3
|
|
nengel@2
|
51 vld1.32 {d19[0]}, [r0,:32], r2
|
|
nengel@2
|
52 vsub.i16 d7, d2, d17
|
|
nengel@2
|
53 sub r0, r0, r2, lsl #2
|
|
nengel@2
|
54 vadd.i16 q0, q2, q3
|
|
nengel@2
|
55 vsub.i16 q1, q2, q3
|
|
nengel@2
|
56
|
|
nengel@2
|
57 vrshr.s16 q0, q0, #6
|
|
nengel@2
|
58 vrshr.s16 q1, q1, #6
|
|
nengel@2
|
59
|
|
nengel@2
|
60 vaddw.u8 q0, q0, d18
|
|
nengel@2
|
61 vaddw.u8 q1, q1, d19
|
|
nengel@2
|
62
|
|
nengel@2
|
63 vqmovun.s16 d0, q0
|
|
nengel@2
|
64 vqmovun.s16 d1, q1
|
|
nengel@2
|
65
|
|
nengel@2
|
66 vst1.32 {d0[0]}, [r0,:32], r2
|
|
nengel@2
|
67 vst1.32 {d1[1]}, [r0,:32], r2
|
|
nengel@2
|
68 vst1.32 {d0[1]}, [r0,:32], r2
|
|
nengel@2
|
69 vst1.32 {d1[0]}, [r0,:32], r2
|
|
nengel@2
|
70
|
|
nengel@2
|
71 bx lr
|
|
nengel@2
|
72 endfunc
|
|
nengel@2
|
73
|
|
nengel@2
|
74 function ff_h264_idct_dc_add_neon, export=1
|
|
nengel@2
|
75 vld1.16 {d2[],d3[]}, [r1,:16]
|
|
nengel@2
|
76 vrshr.s16 q1, q1, #6
|
|
nengel@2
|
77 vld1.32 {d0[0]}, [r0,:32], r2
|
|
nengel@2
|
78 vld1.32 {d0[1]}, [r0,:32], r2
|
|
nengel@2
|
79 vaddw.u8 q2, q1, d0
|
|
nengel@2
|
80 vld1.32 {d1[0]}, [r0,:32], r2
|
|
nengel@2
|
81 vld1.32 {d1[1]}, [r0,:32], r2
|
|
nengel@2
|
82 vaddw.u8 q1, q1, d1
|
|
nengel@2
|
83 vqmovun.s16 d0, q2
|
|
nengel@2
|
84 vqmovun.s16 d1, q1
|
|
nengel@2
|
85 sub r0, r0, r2, lsl #2
|
|
nengel@2
|
86 vst1.32 {d0[0]}, [r0,:32], r2
|
|
nengel@2
|
87 vst1.32 {d0[1]}, [r0,:32], r2
|
|
nengel@2
|
88 vst1.32 {d1[0]}, [r0,:32], r2
|
|
nengel@2
|
89 vst1.32 {d1[1]}, [r0,:32], r2
|
|
nengel@2
|
90 bx lr
|
|
nengel@2
|
91 endfunc
|
|
nengel@2
|
92
|
|
nengel@2
|
93 function ff_h264_idct_add16_neon, export=1
|
|
nengel@2
|
94 push {r4-r8,lr}
|
|
nengel@2
|
95 mov r4, r0
|
|
nengel@2
|
96 mov r5, r1
|
|
nengel@2
|
97 mov r1, r2
|
|
nengel@2
|
98 mov r2, r3
|
|
nengel@2
|
99 ldr r6, [sp, #24]
|
|
nengel@2
|
100 movrel r7, scan8
|
|
nengel@2
|
101 mov ip, #16
|
|
nengel@2
|
102 1: ldrb r8, [r7], #1
|
|
nengel@2
|
103 ldr r0, [r5], #4
|
|
nengel@2
|
104 ldrb r8, [r6, r8]
|
|
nengel@2
|
105 subs r8, r8, #1
|
|
nengel@2
|
106 blt 2f
|
|
nengel@2
|
107 ldrsh lr, [r1]
|
|
nengel@2
|
108 add r0, r0, r4
|
|
nengel@2
|
109 movne lr, #0
|
|
nengel@2
|
110 cmp lr, #0
|
|
nengel@2
|
111 adrne lr, ff_h264_idct_dc_add_neon
|
|
nengel@2
|
112 adreq lr, ff_h264_idct_add_neon
|
|
nengel@2
|
113 blx lr
|
|
nengel@2
|
114 2: subs ip, ip, #1
|
|
nengel@2
|
115 add r1, r1, #32
|
|
nengel@2
|
116 bne 1b
|
|
nengel@2
|
117 pop {r4-r8,pc}
|
|
nengel@2
|
118 endfunc
|
|
nengel@2
|
119
|
|
nengel@2
|
120 function ff_h264_idct_add16intra_neon, export=1
|
|
nengel@2
|
121 push {r4-r8,lr}
|
|
nengel@2
|
122 mov r4, r0
|
|
nengel@2
|
123 mov r5, r1
|
|
nengel@2
|
124 mov r1, r2
|
|
nengel@2
|
125 mov r2, r3
|
|
nengel@2
|
126 ldr r6, [sp, #24]
|
|
nengel@2
|
127 movrel r7, scan8
|
|
nengel@2
|
128 mov ip, #16
|
|
nengel@2
|
129 1: ldrb r8, [r7], #1
|
|
nengel@2
|
130 ldr r0, [r5], #4
|
|
nengel@2
|
131 ldrb r8, [r6, r8]
|
|
nengel@2
|
132 add r0, r0, r4
|
|
nengel@2
|
133 cmp r8, #0
|
|
nengel@2
|
134 ldrsh r8, [r1]
|
|
nengel@2
|
135 adrne lr, ff_h264_idct_add_neon
|
|
nengel@2
|
136 adreq lr, ff_h264_idct_dc_add_neon
|
|
nengel@2
|
137 cmpeq r8, #0
|
|
nengel@2
|
138 blxne lr
|
|
nengel@2
|
139 subs ip, ip, #1
|
|
nengel@2
|
140 add r1, r1, #32
|
|
nengel@2
|
141 bne 1b
|
|
nengel@2
|
142 pop {r4-r8,pc}
|
|
nengel@2
|
143 endfunc
|
|
nengel@2
|
144
|
|
nengel@2
|
145 function ff_h264_idct_add8_neon, export=1
|
|
nengel@2
|
146 push {r4-r10,lr}
|
|
nengel@2
|
147 ldm r0, {r4,r9}
|
|
nengel@2
|
148 add r5, r1, #16*4
|
|
nengel@2
|
149 add r1, r2, #16*32
|
|
nengel@2
|
150 mov r2, r3
|
|
nengel@2
|
151 ldr r6, [sp, #32]
|
|
nengel@2
|
152 movrel r7, scan8+16
|
|
nengel@2
|
153 mov ip, #8
|
|
nengel@2
|
154 1: ldrb r8, [r7], #1
|
|
nengel@2
|
155 ldr r0, [r5], #4
|
|
nengel@2
|
156 ldrb r8, [r6, r8]
|
|
nengel@2
|
157 tst ip, #4
|
|
nengel@2
|
158 addeq r0, r0, r4
|
|
nengel@2
|
159 addne r0, r0, r9
|
|
nengel@2
|
160 cmp r8, #0
|
|
nengel@2
|
161 ldrsh r8, [r1]
|
|
nengel@2
|
162 adrne lr, ff_h264_idct_add_neon
|
|
nengel@2
|
163 adreq lr, ff_h264_idct_dc_add_neon
|
|
nengel@2
|
164 cmpeq r8, #0
|
|
nengel@2
|
165 blxne lr
|
|
nengel@2
|
166 subs ip, ip, #1
|
|
nengel@2
|
167 add r1, r1, #32
|
|
nengel@2
|
168 bne 1b
|
|
nengel@2
|
169 pop {r4-r10,pc}
|
|
nengel@2
|
170 endfunc
|
|
nengel@2
|
171
|
|
nengel@2
|
172 .section .rodata
|
|
nengel@2
|
173 scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
|
|
nengel@2
|
174 .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
|
|
nengel@2
|
175 .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
|
|
nengel@2
|
176 .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
|
|
nengel@2
|
177 .byte 1+1*8, 2+1*8
|
|
nengel@2
|
178 .byte 1+2*8, 2+2*8
|
|
nengel@2
|
179 .byte 1+4*8, 2+4*8
|
|
nengel@2
|
180 .byte 1+5*8, 2+5*8
|