| rev |
line source |
|
nengel@2
|
1 @
|
|
nengel@2
|
2 @ ARMv4 optimized DSP utils
|
|
nengel@2
|
3 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
|
|
nengel@2
|
4 @
|
|
nengel@2
|
5 @ This file is part of FFmpeg.
|
|
nengel@2
|
6 @
|
|
nengel@2
|
7 @ FFmpeg is free software; you can redistribute it and/or
|
|
nengel@2
|
8 @ modify it under the terms of the GNU Lesser General Public
|
|
nengel@2
|
9 @ License as published by the Free Software Foundation; either
|
|
nengel@2
|
10 @ version 2.1 of the License, or (at your option) any later version.
|
|
nengel@2
|
11 @
|
|
nengel@2
|
12 @ FFmpeg is distributed in the hope that it will be useful,
|
|
nengel@2
|
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
nengel@2
|
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
nengel@2
|
15 @ Lesser General Public License for more details.
|
|
nengel@2
|
16 @
|
|
nengel@2
|
17 @ You should have received a copy of the GNU Lesser General Public
|
|
nengel@2
|
18 @ License along with FFmpeg; if not, write to the Free Software
|
|
nengel@2
|
19 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
nengel@2
|
20 @
|
|
nengel@2
|
21
|
|
nengel@2
|
22 #include "config.h"
|
|
nengel@2
|
23 #include "asm.S"
|
|
nengel@2
|
24
|
|
nengel@2
|
25 preserve8
|
|
nengel@2
|
26
|
|
nengel@2
|
27 #if !HAVE_PLD
|
|
nengel@2
|
28 .macro pld reg
|
|
nengel@2
|
29 .endm
|
|
nengel@2
|
30 #endif
|
|
nengel@2
|
31
|
|
nengel@2
|
32 #if HAVE_ARMV5TE
|
|
nengel@2
|
33 function ff_prefetch_arm, export=1
|
|
nengel@2
|
34 subs r2, r2, #1
|
|
nengel@2
|
35 pld [r0]
|
|
nengel@2
|
36 add r0, r0, r1
|
|
nengel@2
|
37 bne ff_prefetch_arm
|
|
nengel@2
|
38 bx lr
|
|
nengel@2
|
39 endfunc
|
|
nengel@2
|
40 #endif
|
|
nengel@2
|
41
|
|
nengel@2
|
42 .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
|
|
nengel@2
|
43 mov \Rd0, \Rn0, lsr #(\shift * 8)
|
|
nengel@2
|
44 mov \Rd1, \Rn1, lsr #(\shift * 8)
|
|
nengel@2
|
45 mov \Rd2, \Rn2, lsr #(\shift * 8)
|
|
nengel@2
|
46 mov \Rd3, \Rn3, lsr #(\shift * 8)
|
|
nengel@2
|
47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
|
|
nengel@2
|
48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
|
|
nengel@2
|
49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
|
|
nengel@2
|
50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
|
|
nengel@2
|
51 .endm
|
|
nengel@2
|
52 .macro ALIGN_DWORD shift, R0, R1, R2
|
|
nengel@2
|
53 mov \R0, \R0, lsr #(\shift * 8)
|
|
nengel@2
|
54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
|
|
nengel@2
|
55 mov \R1, \R1, lsr #(\shift * 8)
|
|
nengel@2
|
56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
|
|
nengel@2
|
57 .endm
|
|
nengel@2
|
58 .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
|
|
nengel@2
|
59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
|
|
nengel@2
|
60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
|
|
nengel@2
|
61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
|
|
nengel@2
|
62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
|
|
nengel@2
|
63 .endm
|
|
nengel@2
|
64
|
|
nengel@2
|
65 .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
|
|
nengel@2
|
66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
|
|
nengel@2
|
67 @ Rmask = 0xFEFEFEFE
|
|
nengel@2
|
68 @ Rn = destroy
|
|
nengel@2
|
69 eor \Rd0, \Rn0, \Rm0
|
|
nengel@2
|
70 eor \Rd1, \Rn1, \Rm1
|
|
nengel@2
|
71 orr \Rn0, \Rn0, \Rm0
|
|
nengel@2
|
72 orr \Rn1, \Rn1, \Rm1
|
|
nengel@2
|
73 and \Rd0, \Rd0, \Rmask
|
|
nengel@2
|
74 and \Rd1, \Rd1, \Rmask
|
|
nengel@2
|
75 sub \Rd0, \Rn0, \Rd0, lsr #1
|
|
nengel@2
|
76 sub \Rd1, \Rn1, \Rd1, lsr #1
|
|
nengel@2
|
77 .endm
|
|
nengel@2
|
78
|
|
nengel@2
|
79 .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
|
|
nengel@2
|
80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
|
|
nengel@2
|
81 @ Rmask = 0xFEFEFEFE
|
|
nengel@2
|
82 @ Rn = destroy
|
|
nengel@2
|
83 eor \Rd0, \Rn0, \Rm0
|
|
nengel@2
|
84 eor \Rd1, \Rn1, \Rm1
|
|
nengel@2
|
85 and \Rn0, \Rn0, \Rm0
|
|
nengel@2
|
86 and \Rn1, \Rn1, \Rm1
|
|
nengel@2
|
87 and \Rd0, \Rd0, \Rmask
|
|
nengel@2
|
88 and \Rd1, \Rd1, \Rmask
|
|
nengel@2
|
89 add \Rd0, \Rn0, \Rd0, lsr #1
|
|
nengel@2
|
90 add \Rd1, \Rn1, \Rd1, lsr #1
|
|
nengel@2
|
91 .endm
|
|
nengel@2
|
92
|
|
nengel@2
|
93 .macro JMP_ALIGN tmp, reg
|
|
nengel@2
|
94 ands \tmp, \reg, #3
|
|
nengel@2
|
95 bic \reg, \reg, #3
|
|
nengel@2
|
96 beq 1f
|
|
nengel@2
|
97 subs \tmp, \tmp, #1
|
|
nengel@2
|
98 beq 2f
|
|
nengel@2
|
99 subs \tmp, \tmp, #1
|
|
nengel@2
|
100 beq 3f
|
|
nengel@2
|
101 b 4f
|
|
nengel@2
|
102 .endm
|
|
nengel@2
|
103
|
|
nengel@2
|
104 @ ----------------------------------------------------------------
|
|
nengel@2
|
105 .align 5
|
|
nengel@2
|
106 function ff_put_pixels16_arm, export=1
|
|
nengel@2
|
107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
108 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
109 pld [r1]
|
|
nengel@2
|
110 push {r4-r11, lr}
|
|
nengel@2
|
111 JMP_ALIGN r5, r1
|
|
nengel@2
|
112 1:
|
|
nengel@2
|
113 ldm r1, {r4-r7}
|
|
nengel@2
|
114 add r1, r1, r2
|
|
nengel@2
|
115 stm r0, {r4-r7}
|
|
nengel@2
|
116 pld [r1]
|
|
nengel@2
|
117 subs r3, r3, #1
|
|
nengel@2
|
118 add r0, r0, r2
|
|
nengel@2
|
119 bne 1b
|
|
nengel@2
|
120 pop {r4-r11, pc}
|
|
nengel@2
|
121 .align 5
|
|
nengel@2
|
122 2:
|
|
nengel@2
|
123 ldm r1, {r4-r8}
|
|
nengel@2
|
124 add r1, r1, r2
|
|
nengel@2
|
125 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
|
nengel@2
|
126 pld [r1]
|
|
nengel@2
|
127 subs r3, r3, #1
|
|
nengel@2
|
128 stm r0, {r9-r12}
|
|
nengel@2
|
129 add r0, r0, r2
|
|
nengel@2
|
130 bne 2b
|
|
nengel@2
|
131 pop {r4-r11, pc}
|
|
nengel@2
|
132 .align 5
|
|
nengel@2
|
133 3:
|
|
nengel@2
|
134 ldm r1, {r4-r8}
|
|
nengel@2
|
135 add r1, r1, r2
|
|
nengel@2
|
136 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
|
nengel@2
|
137 pld [r1]
|
|
nengel@2
|
138 subs r3, r3, #1
|
|
nengel@2
|
139 stm r0, {r9-r12}
|
|
nengel@2
|
140 add r0, r0, r2
|
|
nengel@2
|
141 bne 3b
|
|
nengel@2
|
142 pop {r4-r11, pc}
|
|
nengel@2
|
143 .align 5
|
|
nengel@2
|
144 4:
|
|
nengel@2
|
145 ldm r1, {r4-r8}
|
|
nengel@2
|
146 add r1, r1, r2
|
|
nengel@2
|
147 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
|
nengel@2
|
148 pld [r1]
|
|
nengel@2
|
149 subs r3, r3, #1
|
|
nengel@2
|
150 stm r0, {r9-r12}
|
|
nengel@2
|
151 add r0, r0, r2
|
|
nengel@2
|
152 bne 4b
|
|
nengel@2
|
153 pop {r4-r11,pc}
|
|
nengel@2
|
154 endfunc
|
|
nengel@2
|
155
|
|
nengel@2
|
156 @ ----------------------------------------------------------------
|
|
nengel@2
|
157 .align 5
|
|
nengel@2
|
158 function ff_put_pixels8_arm, export=1
|
|
nengel@2
|
159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
160 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
161 pld [r1]
|
|
nengel@2
|
162 push {r4-r5,lr}
|
|
nengel@2
|
163 JMP_ALIGN r5, r1
|
|
nengel@2
|
164 1:
|
|
nengel@2
|
165 ldm r1, {r4-r5}
|
|
nengel@2
|
166 add r1, r1, r2
|
|
nengel@2
|
167 subs r3, r3, #1
|
|
nengel@2
|
168 pld [r1]
|
|
nengel@2
|
169 stm r0, {r4-r5}
|
|
nengel@2
|
170 add r0, r0, r2
|
|
nengel@2
|
171 bne 1b
|
|
nengel@2
|
172 pop {r4-r5,pc}
|
|
nengel@2
|
173 .align 5
|
|
nengel@2
|
174 2:
|
|
nengel@2
|
175 ldm r1, {r4-r5, r12}
|
|
nengel@2
|
176 add r1, r1, r2
|
|
nengel@2
|
177 ALIGN_DWORD 1, r4, r5, r12
|
|
nengel@2
|
178 pld [r1]
|
|
nengel@2
|
179 subs r3, r3, #1
|
|
nengel@2
|
180 stm r0, {r4-r5}
|
|
nengel@2
|
181 add r0, r0, r2
|
|
nengel@2
|
182 bne 2b
|
|
nengel@2
|
183 pop {r4-r5,pc}
|
|
nengel@2
|
184 .align 5
|
|
nengel@2
|
185 3:
|
|
nengel@2
|
186 ldm r1, {r4-r5, r12}
|
|
nengel@2
|
187 add r1, r1, r2
|
|
nengel@2
|
188 ALIGN_DWORD 2, r4, r5, r12
|
|
nengel@2
|
189 pld [r1]
|
|
nengel@2
|
190 subs r3, r3, #1
|
|
nengel@2
|
191 stm r0, {r4-r5}
|
|
nengel@2
|
192 add r0, r0, r2
|
|
nengel@2
|
193 bne 3b
|
|
nengel@2
|
194 pop {r4-r5,pc}
|
|
nengel@2
|
195 .align 5
|
|
nengel@2
|
196 4:
|
|
nengel@2
|
197 ldm r1, {r4-r5, r12}
|
|
nengel@2
|
198 add r1, r1, r2
|
|
nengel@2
|
199 ALIGN_DWORD 3, r4, r5, r12
|
|
nengel@2
|
200 pld [r1]
|
|
nengel@2
|
201 subs r3, r3, #1
|
|
nengel@2
|
202 stm r0, {r4-r5}
|
|
nengel@2
|
203 add r0, r0, r2
|
|
nengel@2
|
204 bne 4b
|
|
nengel@2
|
205 pop {r4-r5,pc}
|
|
nengel@2
|
206 endfunc
|
|
nengel@2
|
207
|
|
nengel@2
|
208 @ ----------------------------------------------------------------
|
|
nengel@2
|
209 .align 5
|
|
nengel@2
|
210 function ff_put_pixels8_x2_arm, export=1
|
|
nengel@2
|
211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
212 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
213 pld [r1]
|
|
nengel@2
|
214 push {r4-r10,lr}
|
|
nengel@2
|
215 ldr r12, =0xfefefefe
|
|
nengel@2
|
216 JMP_ALIGN r5, r1
|
|
nengel@2
|
217 1:
|
|
nengel@2
|
218 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
219 add r1, r1, r2
|
|
nengel@2
|
220 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
nengel@2
|
221 pld [r1]
|
|
nengel@2
|
222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
nengel@2
|
223 subs r3, r3, #1
|
|
nengel@2
|
224 stm r0, {r8-r9}
|
|
nengel@2
|
225 add r0, r0, r2
|
|
nengel@2
|
226 bne 1b
|
|
nengel@2
|
227 pop {r4-r10,pc}
|
|
nengel@2
|
228 .align 5
|
|
nengel@2
|
229 2:
|
|
nengel@2
|
230 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
231 add r1, r1, r2
|
|
nengel@2
|
232 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
nengel@2
|
233 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
|
|
nengel@2
|
234 pld [r1]
|
|
nengel@2
|
235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
nengel@2
|
236 subs r3, r3, #1
|
|
nengel@2
|
237 stm r0, {r4-r5}
|
|
nengel@2
|
238 add r0, r0, r2
|
|
nengel@2
|
239 bne 2b
|
|
nengel@2
|
240 pop {r4-r10,pc}
|
|
nengel@2
|
241 .align 5
|
|
nengel@2
|
242 3:
|
|
nengel@2
|
243 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
244 add r1, r1, r2
|
|
nengel@2
|
245 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
|
|
nengel@2
|
246 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
|
|
nengel@2
|
247 pld [r1]
|
|
nengel@2
|
248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
nengel@2
|
249 subs r3, r3, #1
|
|
nengel@2
|
250 stm r0, {r4-r5}
|
|
nengel@2
|
251 add r0, r0, r2
|
|
nengel@2
|
252 bne 3b
|
|
nengel@2
|
253 pop {r4-r10,pc}
|
|
nengel@2
|
254 .align 5
|
|
nengel@2
|
255 4:
|
|
nengel@2
|
256 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
257 add r1, r1, r2
|
|
nengel@2
|
258 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
|
|
nengel@2
|
259 pld [r1]
|
|
nengel@2
|
260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12
|
|
nengel@2
|
261 subs r3, r3, #1
|
|
nengel@2
|
262 stm r0, {r8-r9}
|
|
nengel@2
|
263 add r0, r0, r2
|
|
nengel@2
|
264 bne 4b
|
|
nengel@2
|
265 pop {r4-r10,pc}
|
|
nengel@2
|
266 endfunc
|
|
nengel@2
|
267
|
|
nengel@2
|
268 .align 5
|
|
nengel@2
|
269 function ff_put_no_rnd_pixels8_x2_arm, export=1
|
|
nengel@2
|
270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
271 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
272 pld [r1]
|
|
nengel@2
|
273 push {r4-r10,lr}
|
|
nengel@2
|
274 ldr r12, =0xfefefefe
|
|
nengel@2
|
275 JMP_ALIGN r5, r1
|
|
nengel@2
|
276 1:
|
|
nengel@2
|
277 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
278 add r1, r1, r2
|
|
nengel@2
|
279 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
nengel@2
|
280 pld [r1]
|
|
nengel@2
|
281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
nengel@2
|
282 subs r3, r3, #1
|
|
nengel@2
|
283 stm r0, {r8-r9}
|
|
nengel@2
|
284 add r0, r0, r2
|
|
nengel@2
|
285 bne 1b
|
|
nengel@2
|
286 pop {r4-r10,pc}
|
|
nengel@2
|
287 .align 5
|
|
nengel@2
|
288 2:
|
|
nengel@2
|
289 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
290 add r1, r1, r2
|
|
nengel@2
|
291 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
nengel@2
|
292 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
|
|
nengel@2
|
293 pld [r1]
|
|
nengel@2
|
294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
nengel@2
|
295 subs r3, r3, #1
|
|
nengel@2
|
296 stm r0, {r4-r5}
|
|
nengel@2
|
297 add r0, r0, r2
|
|
nengel@2
|
298 bne 2b
|
|
nengel@2
|
299 pop {r4-r10,pc}
|
|
nengel@2
|
300 .align 5
|
|
nengel@2
|
301 3:
|
|
nengel@2
|
302 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
303 add r1, r1, r2
|
|
nengel@2
|
304 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
|
|
nengel@2
|
305 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
|
|
nengel@2
|
306 pld [r1]
|
|
nengel@2
|
307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
nengel@2
|
308 subs r3, r3, #1
|
|
nengel@2
|
309 stm r0, {r4-r5}
|
|
nengel@2
|
310 add r0, r0, r2
|
|
nengel@2
|
311 bne 3b
|
|
nengel@2
|
312 pop {r4-r10,pc}
|
|
nengel@2
|
313 .align 5
|
|
nengel@2
|
314 4:
|
|
nengel@2
|
315 ldm r1, {r4-r5, r10}
|
|
nengel@2
|
316 add r1, r1, r2
|
|
nengel@2
|
317 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
|
|
nengel@2
|
318 pld [r1]
|
|
nengel@2
|
319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
|
|
nengel@2
|
320 subs r3, r3, #1
|
|
nengel@2
|
321 stm r0, {r8-r9}
|
|
nengel@2
|
322 add r0, r0, r2
|
|
nengel@2
|
323 bne 4b
|
|
nengel@2
|
324 pop {r4-r10,pc}
|
|
nengel@2
|
325 endfunc
|
|
nengel@2
|
326
|
|
nengel@2
|
327
|
|
nengel@2
|
328 @ ----------------------------------------------------------------
|
|
nengel@2
|
329 .align 5
|
|
nengel@2
|
330 function ff_put_pixels8_y2_arm, export=1
|
|
nengel@2
|
331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
332 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
333 pld [r1]
|
|
nengel@2
|
334 push {r4-r11,lr}
|
|
nengel@2
|
335 mov r3, r3, lsr #1
|
|
nengel@2
|
336 ldr r12, =0xfefefefe
|
|
nengel@2
|
337 JMP_ALIGN r5, r1
|
|
nengel@2
|
338 1:
|
|
nengel@2
|
339 ldm r1, {r4-r5}
|
|
nengel@2
|
340 add r1, r1, r2
|
|
nengel@2
|
341 6: ldm r1, {r6-r7}
|
|
nengel@2
|
342 add r1, r1, r2
|
|
nengel@2
|
343 pld [r1]
|
|
nengel@2
|
344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
nengel@2
|
345 ldm r1, {r4-r5}
|
|
nengel@2
|
346 add r1, r1, r2
|
|
nengel@2
|
347 stm r0, {r8-r9}
|
|
nengel@2
|
348 add r0, r0, r2
|
|
nengel@2
|
349 pld [r1]
|
|
nengel@2
|
350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12
|
|
nengel@2
|
351 subs r3, r3, #1
|
|
nengel@2
|
352 stm r0, {r8-r9}
|
|
nengel@2
|
353 add r0, r0, r2
|
|
nengel@2
|
354 bne 6b
|
|
nengel@2
|
355 pop {r4-r11,pc}
|
|
nengel@2
|
356 .align 5
|
|
nengel@2
|
357 2:
|
|
nengel@2
|
358 ldm r1, {r4-r6}
|
|
nengel@2
|
359 add r1, r1, r2
|
|
nengel@2
|
360 pld [r1]
|
|
nengel@2
|
361 ALIGN_DWORD 1, r4, r5, r6
|
|
nengel@2
|
362 6: ldm r1, {r7-r9}
|
|
nengel@2
|
363 add r1, r1, r2
|
|
nengel@2
|
364 pld [r1]
|
|
nengel@2
|
365 ALIGN_DWORD 1, r7, r8, r9
|
|
nengel@2
|
366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
nengel@2
|
367 stm r0, {r10-r11}
|
|
nengel@2
|
368 add r0, r0, r2
|
|
nengel@2
|
369 ldm r1, {r4-r6}
|
|
nengel@2
|
370 add r1, r1, r2
|
|
nengel@2
|
371 pld [r1]
|
|
nengel@2
|
372 ALIGN_DWORD 1, r4, r5, r6
|
|
nengel@2
|
373 subs r3, r3, #1
|
|
nengel@2
|
374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
nengel@2
|
375 stm r0, {r10-r11}
|
|
nengel@2
|
376 add r0, r0, r2
|
|
nengel@2
|
377 bne 6b
|
|
nengel@2
|
378 pop {r4-r11,pc}
|
|
nengel@2
|
379 .align 5
|
|
nengel@2
|
380 3:
|
|
nengel@2
|
381 ldm r1, {r4-r6}
|
|
nengel@2
|
382 add r1, r1, r2
|
|
nengel@2
|
383 pld [r1]
|
|
nengel@2
|
384 ALIGN_DWORD 2, r4, r5, r6
|
|
nengel@2
|
385 6: ldm r1, {r7-r9}
|
|
nengel@2
|
386 add r1, r1, r2
|
|
nengel@2
|
387 pld [r1]
|
|
nengel@2
|
388 ALIGN_DWORD 2, r7, r8, r9
|
|
nengel@2
|
389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
nengel@2
|
390 stm r0, {r10-r11}
|
|
nengel@2
|
391 add r0, r0, r2
|
|
nengel@2
|
392 ldm r1, {r4-r6}
|
|
nengel@2
|
393 add r1, r1, r2
|
|
nengel@2
|
394 pld [r1]
|
|
nengel@2
|
395 ALIGN_DWORD 2, r4, r5, r6
|
|
nengel@2
|
396 subs r3, r3, #1
|
|
nengel@2
|
397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
nengel@2
|
398 stm r0, {r10-r11}
|
|
nengel@2
|
399 add r0, r0, r2
|
|
nengel@2
|
400 bne 6b
|
|
nengel@2
|
401 pop {r4-r11,pc}
|
|
nengel@2
|
402 .align 5
|
|
nengel@2
|
403 4:
|
|
nengel@2
|
404 ldm r1, {r4-r6}
|
|
nengel@2
|
405 add r1, r1, r2
|
|
nengel@2
|
406 pld [r1]
|
|
nengel@2
|
407 ALIGN_DWORD 3, r4, r5, r6
|
|
nengel@2
|
408 6: ldm r1, {r7-r9}
|
|
nengel@2
|
409 add r1, r1, r2
|
|
nengel@2
|
410 pld [r1]
|
|
nengel@2
|
411 ALIGN_DWORD 3, r7, r8, r9
|
|
nengel@2
|
412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
nengel@2
|
413 stm r0, {r10-r11}
|
|
nengel@2
|
414 add r0, r0, r2
|
|
nengel@2
|
415 ldm r1, {r4-r6}
|
|
nengel@2
|
416 add r1, r1, r2
|
|
nengel@2
|
417 pld [r1]
|
|
nengel@2
|
418 ALIGN_DWORD 3, r4, r5, r6
|
|
nengel@2
|
419 subs r3, r3, #1
|
|
nengel@2
|
420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
nengel@2
|
421 stm r0, {r10-r11}
|
|
nengel@2
|
422 add r0, r0, r2
|
|
nengel@2
|
423 bne 6b
|
|
nengel@2
|
424 pop {r4-r11,pc}
|
|
nengel@2
|
425 endfunc
|
|
nengel@2
|
426
|
|
nengel@2
|
427 .align 5
|
|
nengel@2
|
428 function ff_put_no_rnd_pixels8_y2_arm, export=1
|
|
nengel@2
|
429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
430 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
431 pld [r1]
|
|
nengel@2
|
432 push {r4-r11,lr}
|
|
nengel@2
|
433 mov r3, r3, lsr #1
|
|
nengel@2
|
434 ldr r12, =0xfefefefe
|
|
nengel@2
|
435 JMP_ALIGN r5, r1
|
|
nengel@2
|
436 1:
|
|
nengel@2
|
437 ldm r1, {r4-r5}
|
|
nengel@2
|
438 add r1, r1, r2
|
|
nengel@2
|
439 6: ldm r1, {r6-r7}
|
|
nengel@2
|
440 add r1, r1, r2
|
|
nengel@2
|
441 pld [r1]
|
|
nengel@2
|
442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
nengel@2
|
443 ldm r1, {r4-r5}
|
|
nengel@2
|
444 add r1, r1, r2
|
|
nengel@2
|
445 stm r0, {r8-r9}
|
|
nengel@2
|
446 add r0, r0, r2
|
|
nengel@2
|
447 pld [r1]
|
|
nengel@2
|
448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
|
|
nengel@2
|
449 subs r3, r3, #1
|
|
nengel@2
|
450 stm r0, {r8-r9}
|
|
nengel@2
|
451 add r0, r0, r2
|
|
nengel@2
|
452 bne 6b
|
|
nengel@2
|
453 pop {r4-r11,pc}
|
|
nengel@2
|
454 .align 5
|
|
nengel@2
|
455 2:
|
|
nengel@2
|
456 ldm r1, {r4-r6}
|
|
nengel@2
|
457 add r1, r1, r2
|
|
nengel@2
|
458 pld [r1]
|
|
nengel@2
|
459 ALIGN_DWORD 1, r4, r5, r6
|
|
nengel@2
|
460 6: ldm r1, {r7-r9}
|
|
nengel@2
|
461 add r1, r1, r2
|
|
nengel@2
|
462 pld [r1]
|
|
nengel@2
|
463 ALIGN_DWORD 1, r7, r8, r9
|
|
nengel@2
|
464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
nengel@2
|
465 stm r0, {r10-r11}
|
|
nengel@2
|
466 add r0, r0, r2
|
|
nengel@2
|
467 ldm r1, {r4-r6}
|
|
nengel@2
|
468 add r1, r1, r2
|
|
nengel@2
|
469 pld [r1]
|
|
nengel@2
|
470 ALIGN_DWORD 1, r4, r5, r6
|
|
nengel@2
|
471 subs r3, r3, #1
|
|
nengel@2
|
472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
nengel@2
|
473 stm r0, {r10-r11}
|
|
nengel@2
|
474 add r0, r0, r2
|
|
nengel@2
|
475 bne 6b
|
|
nengel@2
|
476 pop {r4-r11,pc}
|
|
nengel@2
|
477 .align 5
|
|
nengel@2
|
478 3:
|
|
nengel@2
|
479 ldm r1, {r4-r6}
|
|
nengel@2
|
480 add r1, r1, r2
|
|
nengel@2
|
481 pld [r1]
|
|
nengel@2
|
482 ALIGN_DWORD 2, r4, r5, r6
|
|
nengel@2
|
483 6: ldm r1, {r7-r9}
|
|
nengel@2
|
484 add r1, r1, r2
|
|
nengel@2
|
485 pld [r1]
|
|
nengel@2
|
486 ALIGN_DWORD 2, r7, r8, r9
|
|
nengel@2
|
487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
nengel@2
|
488 stm r0, {r10-r11}
|
|
nengel@2
|
489 add r0, r0, r2
|
|
nengel@2
|
490 ldm r1, {r4-r6}
|
|
nengel@2
|
491 add r1, r1, r2
|
|
nengel@2
|
492 pld [r1]
|
|
nengel@2
|
493 ALIGN_DWORD 2, r4, r5, r6
|
|
nengel@2
|
494 subs r3, r3, #1
|
|
nengel@2
|
495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
nengel@2
|
496 stm r0, {r10-r11}
|
|
nengel@2
|
497 add r0, r0, r2
|
|
nengel@2
|
498 bne 6b
|
|
nengel@2
|
499 pop {r4-r11,pc}
|
|
nengel@2
|
500 .align 5
|
|
nengel@2
|
501 4:
|
|
nengel@2
|
502 ldm r1, {r4-r6}
|
|
nengel@2
|
503 add r1, r1, r2
|
|
nengel@2
|
504 pld [r1]
|
|
nengel@2
|
505 ALIGN_DWORD 3, r4, r5, r6
|
|
nengel@2
|
506 6: ldm r1, {r7-r9}
|
|
nengel@2
|
507 add r1, r1, r2
|
|
nengel@2
|
508 pld [r1]
|
|
nengel@2
|
509 ALIGN_DWORD 3, r7, r8, r9
|
|
nengel@2
|
510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
nengel@2
|
511 stm r0, {r10-r11}
|
|
nengel@2
|
512 add r0, r0, r2
|
|
nengel@2
|
513 ldm r1, {r4-r6}
|
|
nengel@2
|
514 add r1, r1, r2
|
|
nengel@2
|
515 pld [r1]
|
|
nengel@2
|
516 ALIGN_DWORD 3, r4, r5, r6
|
|
nengel@2
|
517 subs r3, r3, #1
|
|
nengel@2
|
518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
nengel@2
|
519 stm r0, {r10-r11}
|
|
nengel@2
|
520 add r0, r0, r2
|
|
nengel@2
|
521 bne 6b
|
|
nengel@2
|
522 pop {r4-r11,pc}
|
|
nengel@2
|
523 endfunc
|
|
nengel@2
|
524
|
|
nengel@2
|
525 .ltorg
|
|
nengel@2
|
526
|
|
nengel@2
|
527 @ ----------------------------------------------------------------
|
|
nengel@2
|
528 .macro RND_XY2_IT align, rnd
|
|
nengel@2
|
529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
|
|
nengel@2
|
530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
|
|
nengel@2
|
531 .if \align == 0
|
|
nengel@2
|
532 ldm r1, {r6-r8}
|
|
nengel@2
|
533 .elseif \align == 3
|
|
nengel@2
|
534 ldm r1, {r5-r7}
|
|
nengel@2
|
535 .else
|
|
nengel@2
|
536 ldm r1, {r8-r10}
|
|
nengel@2
|
537 .endif
|
|
nengel@2
|
538 add r1, r1, r2
|
|
nengel@2
|
539 pld [r1]
|
|
nengel@2
|
540 .if \align == 0
|
|
nengel@2
|
541 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
|
|
nengel@2
|
542 .elseif \align == 1
|
|
nengel@2
|
543 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
|
|
nengel@2
|
544 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
|
|
nengel@2
|
545 .elseif \align == 2
|
|
nengel@2
|
546 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
|
|
nengel@2
|
547 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
|
|
nengel@2
|
548 .elseif \align == 3
|
|
nengel@2
|
549 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
|
|
nengel@2
|
550 .endif
|
|
nengel@2
|
551 ldr r14, =0x03030303
|
|
nengel@2
|
552 tst r3, #1
|
|
nengel@2
|
553 and r8, r4, r14
|
|
nengel@2
|
554 and r9, r5, r14
|
|
nengel@2
|
555 and r10, r6, r14
|
|
nengel@2
|
556 and r11, r7, r14
|
|
nengel@2
|
557 andeq r14, r14, r14, \rnd #1
|
|
nengel@2
|
558 add r8, r8, r10
|
|
nengel@2
|
559 add r9, r9, r11
|
|
nengel@2
|
560 ldr r12, =0xfcfcfcfc >> 2
|
|
nengel@2
|
561 addeq r8, r8, r14
|
|
nengel@2
|
562 addeq r9, r9, r14
|
|
nengel@2
|
563 and r4, r12, r4, lsr #2
|
|
nengel@2
|
564 and r5, r12, r5, lsr #2
|
|
nengel@2
|
565 and r6, r12, r6, lsr #2
|
|
nengel@2
|
566 and r7, r12, r7, lsr #2
|
|
nengel@2
|
567 add r10, r4, r6
|
|
nengel@2
|
568 add r11, r5, r7
|
|
nengel@2
|
569 subs r3, r3, #1
|
|
nengel@2
|
570 .endm
|
|
nengel@2
|
571
|
|
nengel@2
|
572 .macro RND_XY2_EXPAND align, rnd
|
|
nengel@2
|
573 RND_XY2_IT \align, \rnd
|
|
nengel@2
|
574 6: push {r8-r11}
|
|
nengel@2
|
575 RND_XY2_IT \align, \rnd
|
|
nengel@2
|
576 pop {r4-r7}
|
|
nengel@2
|
577 add r4, r4, r8
|
|
nengel@2
|
578 add r5, r5, r9
|
|
nengel@2
|
579 ldr r14, =0x0f0f0f0f
|
|
nengel@2
|
580 add r6, r6, r10
|
|
nengel@2
|
581 add r7, r7, r11
|
|
nengel@2
|
582 and r4, r14, r4, lsr #2
|
|
nengel@2
|
583 and r5, r14, r5, lsr #2
|
|
nengel@2
|
584 add r4, r4, r6
|
|
nengel@2
|
585 add r5, r5, r7
|
|
nengel@2
|
586 stm r0, {r4-r5}
|
|
nengel@2
|
587 add r0, r0, r2
|
|
nengel@2
|
588 bge 6b
|
|
nengel@2
|
589 pop {r4-r11,pc}
|
|
nengel@2
|
590 .endm
|
|
nengel@2
|
591
|
|
nengel@2
|
592 .align 5
|
|
nengel@2
|
593 function ff_put_pixels8_xy2_arm, export=1
|
|
nengel@2
|
594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
595 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
596 pld [r1]
|
|
nengel@2
|
597 push {r4-r11,lr} @ R14 is also called LR
|
|
nengel@2
|
598 JMP_ALIGN r5, r1
|
|
nengel@2
|
599 1: RND_XY2_EXPAND 0, lsl
|
|
nengel@2
|
600 .align 5
|
|
nengel@2
|
601 2: RND_XY2_EXPAND 1, lsl
|
|
nengel@2
|
602 .align 5
|
|
nengel@2
|
603 3: RND_XY2_EXPAND 2, lsl
|
|
nengel@2
|
604 .align 5
|
|
nengel@2
|
605 4: RND_XY2_EXPAND 3, lsl
|
|
nengel@2
|
606 endfunc
|
|
nengel@2
|
607
|
|
nengel@2
|
608 .align 5
|
|
nengel@2
|
609 function ff_put_no_rnd_pixels8_xy2_arm, export=1
|
|
nengel@2
|
610 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
611 @ block = word aligned, pixles = unaligned
|
|
nengel@2
|
612 pld [r1]
|
|
nengel@2
|
613 push {r4-r11,lr}
|
|
nengel@2
|
614 JMP_ALIGN r5, r1
|
|
nengel@2
|
615 1: RND_XY2_EXPAND 0, lsr
|
|
nengel@2
|
616 .align 5
|
|
nengel@2
|
617 2: RND_XY2_EXPAND 1, lsr
|
|
nengel@2
|
618 .align 5
|
|
nengel@2
|
619 3: RND_XY2_EXPAND 2, lsr
|
|
nengel@2
|
620 .align 5
|
|
nengel@2
|
621 4: RND_XY2_EXPAND 3, lsr
|
|
nengel@2
|
622 endfunc
|
|
nengel@2
|
623
|
|
nengel@2
|
624 .align 5
|
|
nengel@2
|
625 @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
|
|
nengel@2
|
626 function ff_add_pixels_clamped_arm, export=1
|
|
nengel@2
|
627 push {r4-r10}
|
|
nengel@2
|
628 mov r10, #8
|
|
nengel@2
|
629 1:
|
|
nengel@2
|
630 ldr r4, [r1] /* load dest */
|
|
nengel@2
|
631 /* block[0] and block[1]*/
|
|
nengel@2
|
632 ldrsh r5, [r0]
|
|
nengel@2
|
633 ldrsh r7, [r0, #2]
|
|
nengel@2
|
634 and r6, r4, #0xFF
|
|
nengel@2
|
635 and r8, r4, #0xFF00
|
|
nengel@2
|
636 add r6, r5, r6
|
|
nengel@2
|
637 add r8, r7, r8, lsr #8
|
|
nengel@2
|
638 mvn r5, r5
|
|
nengel@2
|
639 mvn r7, r7
|
|
nengel@2
|
640 tst r6, #0x100
|
|
nengel@2
|
641 movne r6, r5, lsr #24
|
|
nengel@2
|
642 tst r8, #0x100
|
|
nengel@2
|
643 movne r8, r7, lsr #24
|
|
nengel@2
|
644 mov r9, r6
|
|
nengel@2
|
645 ldrsh r5, [r0, #4] /* moved form [A] */
|
|
nengel@2
|
646 orr r9, r9, r8, lsl #8
|
|
nengel@2
|
647 /* block[2] and block[3] */
|
|
nengel@2
|
648 /* [A] */
|
|
nengel@2
|
649 ldrsh r7, [r0, #6]
|
|
nengel@2
|
650 and r6, r4, #0xFF0000
|
|
nengel@2
|
651 and r8, r4, #0xFF000000
|
|
nengel@2
|
652 add r6, r5, r6, lsr #16
|
|
nengel@2
|
653 add r8, r7, r8, lsr #24
|
|
nengel@2
|
654 mvn r5, r5
|
|
nengel@2
|
655 mvn r7, r7
|
|
nengel@2
|
656 tst r6, #0x100
|
|
nengel@2
|
657 movne r6, r5, lsr #24
|
|
nengel@2
|
658 tst r8, #0x100
|
|
nengel@2
|
659 movne r8, r7, lsr #24
|
|
nengel@2
|
660 orr r9, r9, r6, lsl #16
|
|
nengel@2
|
661 ldr r4, [r1, #4] /* moved form [B] */
|
|
nengel@2
|
662 orr r9, r9, r8, lsl #24
|
|
nengel@2
|
663 /* store dest */
|
|
nengel@2
|
664 ldrsh r5, [r0, #8] /* moved form [C] */
|
|
nengel@2
|
665 str r9, [r1]
|
|
nengel@2
|
666
|
|
nengel@2
|
667 /* load dest */
|
|
nengel@2
|
668 /* [B] */
|
|
nengel@2
|
669 /* block[4] and block[5] */
|
|
nengel@2
|
670 /* [C] */
|
|
nengel@2
|
671 ldrsh r7, [r0, #10]
|
|
nengel@2
|
672 and r6, r4, #0xFF
|
|
nengel@2
|
673 and r8, r4, #0xFF00
|
|
nengel@2
|
674 add r6, r5, r6
|
|
nengel@2
|
675 add r8, r7, r8, lsr #8
|
|
nengel@2
|
676 mvn r5, r5
|
|
nengel@2
|
677 mvn r7, r7
|
|
nengel@2
|
678 tst r6, #0x100
|
|
nengel@2
|
679 movne r6, r5, lsr #24
|
|
nengel@2
|
680 tst r8, #0x100
|
|
nengel@2
|
681 movne r8, r7, lsr #24
|
|
nengel@2
|
682 mov r9, r6
|
|
nengel@2
|
683 ldrsh r5, [r0, #12] /* moved from [D] */
|
|
nengel@2
|
684 orr r9, r9, r8, lsl #8
|
|
nengel@2
|
685 /* block[6] and block[7] */
|
|
nengel@2
|
686 /* [D] */
|
|
nengel@2
|
687 ldrsh r7, [r0, #14]
|
|
nengel@2
|
688 and r6, r4, #0xFF0000
|
|
nengel@2
|
689 and r8, r4, #0xFF000000
|
|
nengel@2
|
690 add r6, r5, r6, lsr #16
|
|
nengel@2
|
691 add r8, r7, r8, lsr #24
|
|
nengel@2
|
692 mvn r5, r5
|
|
nengel@2
|
693 mvn r7, r7
|
|
nengel@2
|
694 tst r6, #0x100
|
|
nengel@2
|
695 movne r6, r5, lsr #24
|
|
nengel@2
|
696 tst r8, #0x100
|
|
nengel@2
|
697 movne r8, r7, lsr #24
|
|
nengel@2
|
698 orr r9, r9, r6, lsl #16
|
|
nengel@2
|
699 add r0, r0, #16 /* moved from [E] */
|
|
nengel@2
|
700 orr r9, r9, r8, lsl #24
|
|
nengel@2
|
701 subs r10, r10, #1 /* moved from [F] */
|
|
nengel@2
|
702 /* store dest */
|
|
nengel@2
|
703 str r9, [r1, #4]
|
|
nengel@2
|
704
|
|
nengel@2
|
705 /* [E] */
|
|
nengel@2
|
706 /* [F] */
|
|
nengel@2
|
707 add r1, r1, r2
|
|
nengel@2
|
708 bne 1b
|
|
nengel@2
|
709
|
|
nengel@2
|
710 pop {r4-r10}
|
|
nengel@2
|
711 bx lr
|
|
nengel@2
|
712 endfunc
|