| rev |
line source |
|
nengel@2
|
1 /*
|
|
nengel@2
|
2 * iWMMXt optimized DSP utils
|
|
nengel@2
|
3 * Copyright (c) 2004 AGAWA Koji
|
|
nengel@2
|
4 *
|
|
nengel@2
|
5 * This file is part of FFmpeg.
|
|
nengel@2
|
6 *
|
|
nengel@2
|
7 * FFmpeg is free software; you can redistribute it and/or
|
|
nengel@2
|
8 * modify it under the terms of the GNU Lesser General Public
|
|
nengel@2
|
9 * License as published by the Free Software Foundation; either
|
|
nengel@2
|
10 * version 2.1 of the License, or (at your option) any later version.
|
|
nengel@2
|
11 *
|
|
nengel@2
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
|
nengel@2
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
nengel@2
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
nengel@2
|
15 * Lesser General Public License for more details.
|
|
nengel@2
|
16 *
|
|
nengel@2
|
17 * You should have received a copy of the GNU Lesser General Public
|
|
nengel@2
|
18 * License along with FFmpeg; if not, write to the Free Software
|
|
nengel@2
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
nengel@2
|
20 */
|
|
nengel@2
|
21
|
|
nengel@2
|
22 #include "libavcodec/dsputil.h"
|
|
nengel@2
|
23
|
|
nengel@2
|
24 #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
|
|
nengel@2
|
25 #define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
|
|
nengel@2
|
26 #define WAVG2B "wavg2b"
|
|
nengel@2
|
27 #include "dsputil_iwmmxt_rnd_template.c"
|
|
nengel@2
|
28 #undef DEF
|
|
nengel@2
|
29 #undef SET_RND
|
|
nengel@2
|
30 #undef WAVG2B
|
|
nengel@2
|
31
|
|
nengel@2
|
32 #define DEF(x, y) x ## _ ## y ##_iwmmxt
|
|
nengel@2
|
33 #define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
|
|
nengel@2
|
34 #define WAVG2B "wavg2br"
|
|
nengel@2
|
35 #include "dsputil_iwmmxt_rnd_template.c"
|
|
nengel@2
|
36 #undef DEF
|
|
nengel@2
|
37 #undef SET_RND
|
|
nengel@2
|
38 #undef WAVG2BR
|
|
nengel@2
|
39
|
|
nengel@2
|
40 // need scheduling
|
|
nengel@2
|
41 #define OP(AVG) \
|
|
nengel@2
|
42 __asm__ volatile ( \
|
|
nengel@2
|
43 /* alignment */ \
|
|
nengel@2
|
44 "and r12, %[pixels], #7 \n\t" \
|
|
nengel@2
|
45 "bic %[pixels], %[pixels], #7 \n\t" \
|
|
nengel@2
|
46 "tmcr wcgr1, r12 \n\t" \
|
|
nengel@2
|
47 \
|
|
nengel@2
|
48 "wldrd wr0, [%[pixels]] \n\t" \
|
|
nengel@2
|
49 "wldrd wr1, [%[pixels], #8] \n\t" \
|
|
nengel@2
|
50 "add %[pixels], %[pixels], %[line_size] \n\t" \
|
|
nengel@2
|
51 "walignr1 wr4, wr0, wr1 \n\t" \
|
|
nengel@2
|
52 \
|
|
nengel@2
|
53 "1: \n\t" \
|
|
nengel@2
|
54 \
|
|
nengel@2
|
55 "wldrd wr2, [%[pixels]] \n\t" \
|
|
nengel@2
|
56 "wldrd wr3, [%[pixels], #8] \n\t" \
|
|
nengel@2
|
57 "add %[pixels], %[pixels], %[line_size] \n\t" \
|
|
nengel@2
|
58 "pld [%[pixels]] \n\t" \
|
|
nengel@2
|
59 "walignr1 wr5, wr2, wr3 \n\t" \
|
|
nengel@2
|
60 AVG " wr6, wr4, wr5 \n\t" \
|
|
nengel@2
|
61 "wstrd wr6, [%[block]] \n\t" \
|
|
nengel@2
|
62 "add %[block], %[block], %[line_size] \n\t" \
|
|
nengel@2
|
63 \
|
|
nengel@2
|
64 "wldrd wr0, [%[pixels]] \n\t" \
|
|
nengel@2
|
65 "wldrd wr1, [%[pixels], #8] \n\t" \
|
|
nengel@2
|
66 "add %[pixels], %[pixels], %[line_size] \n\t" \
|
|
nengel@2
|
67 "walignr1 wr4, wr0, wr1 \n\t" \
|
|
nengel@2
|
68 "pld [%[pixels]] \n\t" \
|
|
nengel@2
|
69 AVG " wr6, wr4, wr5 \n\t" \
|
|
nengel@2
|
70 "wstrd wr6, [%[block]] \n\t" \
|
|
nengel@2
|
71 "add %[block], %[block], %[line_size] \n\t" \
|
|
nengel@2
|
72 \
|
|
nengel@2
|
73 "subs %[h], %[h], #2 \n\t" \
|
|
nengel@2
|
74 "bne 1b \n\t" \
|
|
nengel@2
|
75 : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \
|
|
nengel@2
|
76 : [line_size]"r"(line_size) \
|
|
nengel@2
|
77 : "memory", "r12");
|
|
nengel@2
|
78 void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
|
|
nengel@2
|
79 {
|
|
nengel@2
|
80 OP("wavg2br");
|
|
nengel@2
|
81 }
|
|
nengel@2
|
82 void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
|
|
nengel@2
|
83 {
|
|
nengel@2
|
84 OP("wavg2b");
|
|
nengel@2
|
85 }
|
|
nengel@2
|
86 #undef OP
|
|
nengel@2
|
87
|
|
nengel@2
|
88 void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
|
|
nengel@2
|
89 {
|
|
nengel@2
|
90 uint8_t *pixels2 = pixels + line_size;
|
|
nengel@2
|
91
|
|
nengel@2
|
92 __asm__ volatile (
|
|
nengel@2
|
93 "mov r12, #4 \n\t"
|
|
nengel@2
|
94 "1: \n\t"
|
|
nengel@2
|
95 "pld [%[pixels], %[line_size2]] \n\t"
|
|
nengel@2
|
96 "pld [%[pixels2], %[line_size2]] \n\t"
|
|
nengel@2
|
97 "wldrd wr4, [%[pixels]] \n\t"
|
|
nengel@2
|
98 "wldrd wr5, [%[pixels2]] \n\t"
|
|
nengel@2
|
99 "pld [%[block], #32] \n\t"
|
|
nengel@2
|
100 "wunpckelub wr6, wr4 \n\t"
|
|
nengel@2
|
101 "wldrd wr0, [%[block]] \n\t"
|
|
nengel@2
|
102 "wunpckehub wr7, wr4 \n\t"
|
|
nengel@2
|
103 "wldrd wr1, [%[block], #8] \n\t"
|
|
nengel@2
|
104 "wunpckelub wr8, wr5 \n\t"
|
|
nengel@2
|
105 "wldrd wr2, [%[block], #16] \n\t"
|
|
nengel@2
|
106 "wunpckehub wr9, wr5 \n\t"
|
|
nengel@2
|
107 "wldrd wr3, [%[block], #24] \n\t"
|
|
nengel@2
|
108 "add %[block], %[block], #32 \n\t"
|
|
nengel@2
|
109 "waddhss wr10, wr0, wr6 \n\t"
|
|
nengel@2
|
110 "waddhss wr11, wr1, wr7 \n\t"
|
|
nengel@2
|
111 "waddhss wr12, wr2, wr8 \n\t"
|
|
nengel@2
|
112 "waddhss wr13, wr3, wr9 \n\t"
|
|
nengel@2
|
113 "wpackhus wr14, wr10, wr11 \n\t"
|
|
nengel@2
|
114 "wpackhus wr15, wr12, wr13 \n\t"
|
|
nengel@2
|
115 "wstrd wr14, [%[pixels]] \n\t"
|
|
nengel@2
|
116 "add %[pixels], %[pixels], %[line_size2] \n\t"
|
|
nengel@2
|
117 "subs r12, r12, #1 \n\t"
|
|
nengel@2
|
118 "wstrd wr15, [%[pixels2]] \n\t"
|
|
nengel@2
|
119 "add %[pixels2], %[pixels2], %[line_size2] \n\t"
|
|
nengel@2
|
120 "bne 1b \n\t"
|
|
nengel@2
|
121 : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
|
|
nengel@2
|
122 : [line_size2]"r"(line_size << 1)
|
|
nengel@2
|
123 : "cc", "memory", "r12");
|
|
nengel@2
|
124 }
|
|
nengel@2
|
125
|
|
nengel@2
|
126 static void clear_blocks_iwmmxt(DCTELEM *blocks)
|
|
nengel@2
|
127 {
|
|
nengel@2
|
128 __asm__ volatile(
|
|
nengel@2
|
129 "wzero wr0 \n\t"
|
|
nengel@2
|
130 "mov r1, #(128 * 6 / 32) \n\t"
|
|
nengel@2
|
131 "1: \n\t"
|
|
nengel@2
|
132 "wstrd wr0, [%0] \n\t"
|
|
nengel@2
|
133 "wstrd wr0, [%0, #8] \n\t"
|
|
nengel@2
|
134 "wstrd wr0, [%0, #16] \n\t"
|
|
nengel@2
|
135 "wstrd wr0, [%0, #24] \n\t"
|
|
nengel@2
|
136 "subs r1, r1, #1 \n\t"
|
|
nengel@2
|
137 "add %0, %0, #32 \n\t"
|
|
nengel@2
|
138 "bne 1b \n\t"
|
|
nengel@2
|
139 : "+r"(blocks)
|
|
nengel@2
|
140 :
|
|
nengel@2
|
141 : "r1"
|
|
nengel@2
|
142 );
|
|
nengel@2
|
143 }
|
|
nengel@2
|
144
|
|
nengel@2
|
145 static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
nengel@2
|
146 {
|
|
nengel@2
|
147 return;
|
|
nengel@2
|
148 }
|
|
nengel@2
|
149
|
|
nengel@2
|
150 /* A run time test is not simple. If this file is compiled in
|
|
nengel@2
|
151 * then we should install the functions
|
|
nengel@2
|
152 */
|
|
nengel@2
|
153 int mm_flags = FF_MM_IWMMXT; /* multimedia extension flags */
|
|
nengel@2
|
154
|
|
nengel@2
|
155 void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
|
|
nengel@2
|
156 {
|
|
nengel@2
|
157 if (avctx->dsp_mask) {
|
|
nengel@2
|
158 if (avctx->dsp_mask & FF_MM_FORCE)
|
|
nengel@2
|
159 mm_flags |= (avctx->dsp_mask & 0xffff);
|
|
nengel@2
|
160 else
|
|
nengel@2
|
161 mm_flags &= ~(avctx->dsp_mask & 0xffff);
|
|
nengel@2
|
162 }
|
|
nengel@2
|
163
|
|
nengel@2
|
164 if (!(mm_flags & FF_MM_IWMMXT)) return;
|
|
nengel@2
|
165
|
|
nengel@2
|
166 c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
|
|
nengel@2
|
167
|
|
nengel@2
|
168 c->clear_blocks = clear_blocks_iwmmxt;
|
|
nengel@2
|
169
|
|
nengel@2
|
170 c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
|
|
nengel@2
|
171 c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
|
|
nengel@2
|
172 c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
|
|
nengel@2
|
173 c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
|
|
nengel@2
|
174 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
|
|
nengel@2
|
175 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
|
|
nengel@2
|
176 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
|
|
nengel@2
|
177 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
|
|
nengel@2
|
178
|
|
nengel@2
|
179 c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
|
|
nengel@2
|
180 c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
|
|
nengel@2
|
181 c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
|
|
nengel@2
|
182 c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
|
|
nengel@2
|
183 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
|
|
nengel@2
|
184 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
|
|
nengel@2
|
185 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
|
|
nengel@2
|
186 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
|
|
nengel@2
|
187
|
|
nengel@2
|
188 c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
|
|
nengel@2
|
189 c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
|
|
nengel@2
|
190 c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
|
|
nengel@2
|
191 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
|
|
nengel@2
|
192 c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
|
|
nengel@2
|
193 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
|
|
nengel@2
|
194 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
|
|
nengel@2
|
195 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
|
|
nengel@2
|
196
|
|
nengel@2
|
197 c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
|
|
nengel@2
|
198 c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
|
|
nengel@2
|
199 c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
|
|
nengel@2
|
200 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
|
|
nengel@2
|
201 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
|
|
nengel@2
|
202 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
|
|
nengel@2
|
203 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
|
|
nengel@2
|
204 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
|
|
nengel@2
|
205 }
|