Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
view libavcodec/simple_idct.c @ 9:ea1ba68cf0ed
update to match api changes + add sscc produced source
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 05 Jun 2013 14:43:26 +0200 |
| parents | |
| children |
line source
1 /*
2 * Simple IDCT
3 *
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
23 /**
24 * @file
25 * simpleidct in C.
26 */
28 /*
29 based upon some outcommented c code from mpeg2dec (idct_mmx.c
30 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
31 */
32 #include "avcodec.h"
33 #include "dsputil.h"
34 #include "mathops.h"
35 #include "simple_idct.h"
37 #if 0
38 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
39 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
40 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
41 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
42 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
43 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
44 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
45 #define ROW_SHIFT 8
46 #define COL_SHIFT 17
47 #else
48 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
49 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
50 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
51 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
52 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
53 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
54 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
55 #define ROW_SHIFT 11
56 #define COL_SHIFT 20 // 6
57 #endif
59 static inline void idctRowCondDC (DCTELEM * row)
60 {
61 int a0, a1, a2, a3, b0, b1, b2, b3;
62 uint64_t temp;
64 #if HAVE_BIGENDIAN
65 #define ROW0_MASK 0xffff000000000000LL
66 #else
67 #define ROW0_MASK 0xffffLL
68 #endif
69 if(sizeof(DCTELEM)==2){
70 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
71 ((uint64_t *)row)[1]) == 0) {
72 temp = (row[0] << 3) & 0xffff;
73 temp += temp << 16;
74 temp += temp << 32;
75 ((uint64_t *)row)[0] = temp;
76 ((uint64_t *)row)[1] = temp;
77 return;
78 }
79 }else{
80 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
81 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
82 return;
83 }
84 }
86 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
87 a1 = a0;
88 a2 = a0;
89 a3 = a0;
91 /* no need to optimize : gcc does it */
92 a0 += W2 * row[2];
93 a1 += W6 * row[2];
94 a2 -= W6 * row[2];
95 a3 -= W2 * row[2];
97 b0 = MUL16(W1, row[1]);
98 MAC16(b0, W3, row[3]);
99 b1 = MUL16(W3, row[1]);
100 MAC16(b1, -W7, row[3]);
101 b2 = MUL16(W5, row[1]);
102 MAC16(b2, -W1, row[3]);
103 b3 = MUL16(W7, row[1]);
104 MAC16(b3, -W5, row[3]);
106 temp = ((uint64_t*)row)[1];
108 if (temp != 0) {
109 a0 += W4*row[4] + W6*row[6];
110 a1 += - W4*row[4] - W2*row[6];
111 a2 += - W4*row[4] + W2*row[6];
112 a3 += W4*row[4] - W6*row[6];
114 MAC16(b0, W5, row[5]);
115 MAC16(b0, W7, row[7]);
117 MAC16(b1, -W1, row[5]);
118 MAC16(b1, -W5, row[7]);
120 MAC16(b2, W7, row[5]);
121 MAC16(b2, W3, row[7]);
123 MAC16(b3, W3, row[5]);
124 MAC16(b3, -W1, row[7]);
125 }
127 row[0] = (a0 + b0) >> ROW_SHIFT;
128 row[7] = (a0 - b0) >> ROW_SHIFT;
129 row[1] = (a1 + b1) >> ROW_SHIFT;
130 row[6] = (a1 - b1) >> ROW_SHIFT;
131 row[2] = (a2 + b2) >> ROW_SHIFT;
132 row[5] = (a2 - b2) >> ROW_SHIFT;
133 row[3] = (a3 + b3) >> ROW_SHIFT;
134 row[4] = (a3 - b3) >> ROW_SHIFT;
135 }
137 static inline void idctSparseColPut (uint8_t *dest, int line_size,
138 DCTELEM * col)
139 {
140 int a0, a1, a2, a3, b0, b1, b2, b3;
141 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
143 /* XXX: I did that only to give same values as previous code */
144 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
145 a1 = a0;
146 a2 = a0;
147 a3 = a0;
149 a0 += + W2*col[8*2];
150 a1 += + W6*col[8*2];
151 a2 += - W6*col[8*2];
152 a3 += - W2*col[8*2];
154 b0 = MUL16(W1, col[8*1]);
155 b1 = MUL16(W3, col[8*1]);
156 b2 = MUL16(W5, col[8*1]);
157 b3 = MUL16(W7, col[8*1]);
159 MAC16(b0, + W3, col[8*3]);
160 MAC16(b1, - W7, col[8*3]);
161 MAC16(b2, - W1, col[8*3]);
162 MAC16(b3, - W5, col[8*3]);
164 if(col[8*4]){
165 a0 += + W4*col[8*4];
166 a1 += - W4*col[8*4];
167 a2 += - W4*col[8*4];
168 a3 += + W4*col[8*4];
169 }
171 if (col[8*5]) {
172 MAC16(b0, + W5, col[8*5]);
173 MAC16(b1, - W1, col[8*5]);
174 MAC16(b2, + W7, col[8*5]);
175 MAC16(b3, + W3, col[8*5]);
176 }
178 if(col[8*6]){
179 a0 += + W6*col[8*6];
180 a1 += - W2*col[8*6];
181 a2 += + W2*col[8*6];
182 a3 += - W6*col[8*6];
183 }
185 if (col[8*7]) {
186 MAC16(b0, + W7, col[8*7]);
187 MAC16(b1, - W5, col[8*7]);
188 MAC16(b2, + W3, col[8*7]);
189 MAC16(b3, - W1, col[8*7]);
190 }
192 dest[0] = cm[(a0 + b0) >> COL_SHIFT];
193 dest += line_size;
194 dest[0] = cm[(a1 + b1) >> COL_SHIFT];
195 dest += line_size;
196 dest[0] = cm[(a2 + b2) >> COL_SHIFT];
197 dest += line_size;
198 dest[0] = cm[(a3 + b3) >> COL_SHIFT];
199 dest += line_size;
200 dest[0] = cm[(a3 - b3) >> COL_SHIFT];
201 dest += line_size;
202 dest[0] = cm[(a2 - b2) >> COL_SHIFT];
203 dest += line_size;
204 dest[0] = cm[(a1 - b1) >> COL_SHIFT];
205 dest += line_size;
206 dest[0] = cm[(a0 - b0) >> COL_SHIFT];
207 }
209 static inline void idctSparseColAdd (uint8_t *dest, int line_size,
210 DCTELEM * col)
211 {
212 int a0, a1, a2, a3, b0, b1, b2, b3;
213 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
215 /* XXX: I did that only to give same values as previous code */
216 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
217 a1 = a0;
218 a2 = a0;
219 a3 = a0;
221 a0 += + W2*col[8*2];
222 a1 += + W6*col[8*2];
223 a2 += - W6*col[8*2];
224 a3 += - W2*col[8*2];
226 b0 = MUL16(W1, col[8*1]);
227 b1 = MUL16(W3, col[8*1]);
228 b2 = MUL16(W5, col[8*1]);
229 b3 = MUL16(W7, col[8*1]);
231 MAC16(b0, + W3, col[8*3]);
232 MAC16(b1, - W7, col[8*3]);
233 MAC16(b2, - W1, col[8*3]);
234 MAC16(b3, - W5, col[8*3]);
236 if(col[8*4]){
237 a0 += + W4*col[8*4];
238 a1 += - W4*col[8*4];
239 a2 += - W4*col[8*4];
240 a3 += + W4*col[8*4];
241 }
243 if (col[8*5]) {
244 MAC16(b0, + W5, col[8*5]);
245 MAC16(b1, - W1, col[8*5]);
246 MAC16(b2, + W7, col[8*5]);
247 MAC16(b3, + W3, col[8*5]);
248 }
250 if(col[8*6]){
251 a0 += + W6*col[8*6];
252 a1 += - W2*col[8*6];
253 a2 += + W2*col[8*6];
254 a3 += - W6*col[8*6];
255 }
257 if (col[8*7]) {
258 MAC16(b0, + W7, col[8*7]);
259 MAC16(b1, - W5, col[8*7]);
260 MAC16(b2, + W3, col[8*7]);
261 MAC16(b3, - W1, col[8*7]);
262 }
264 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
265 dest += line_size;
266 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
267 dest += line_size;
268 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
269 dest += line_size;
270 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
271 dest += line_size;
272 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
273 dest += line_size;
274 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
275 dest += line_size;
276 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
277 dest += line_size;
278 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
279 }
281 static inline void idctSparseCol (DCTELEM * col)
282 {
283 int a0, a1, a2, a3, b0, b1, b2, b3;
285 /* XXX: I did that only to give same values as previous code */
286 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
287 a1 = a0;
288 a2 = a0;
289 a3 = a0;
291 a0 += + W2*col[8*2];
292 a1 += + W6*col[8*2];
293 a2 += - W6*col[8*2];
294 a3 += - W2*col[8*2];
296 b0 = MUL16(W1, col[8*1]);
297 b1 = MUL16(W3, col[8*1]);
298 b2 = MUL16(W5, col[8*1]);
299 b3 = MUL16(W7, col[8*1]);
301 MAC16(b0, + W3, col[8*3]);
302 MAC16(b1, - W7, col[8*3]);
303 MAC16(b2, - W1, col[8*3]);
304 MAC16(b3, - W5, col[8*3]);
306 if(col[8*4]){
307 a0 += + W4*col[8*4];
308 a1 += - W4*col[8*4];
309 a2 += - W4*col[8*4];
310 a3 += + W4*col[8*4];
311 }
313 if (col[8*5]) {
314 MAC16(b0, + W5, col[8*5]);
315 MAC16(b1, - W1, col[8*5]);
316 MAC16(b2, + W7, col[8*5]);
317 MAC16(b3, + W3, col[8*5]);
318 }
320 if(col[8*6]){
321 a0 += + W6*col[8*6];
322 a1 += - W2*col[8*6];
323 a2 += + W2*col[8*6];
324 a3 += - W6*col[8*6];
325 }
327 if (col[8*7]) {
328 MAC16(b0, + W7, col[8*7]);
329 MAC16(b1, - W5, col[8*7]);
330 MAC16(b2, + W3, col[8*7]);
331 MAC16(b3, - W1, col[8*7]);
332 }
334 col[0 ] = ((a0 + b0) >> COL_SHIFT);
335 col[8 ] = ((a1 + b1) >> COL_SHIFT);
336 col[16] = ((a2 + b2) >> COL_SHIFT);
337 col[24] = ((a3 + b3) >> COL_SHIFT);
338 col[32] = ((a3 - b3) >> COL_SHIFT);
339 col[40] = ((a2 - b2) >> COL_SHIFT);
340 col[48] = ((a1 - b1) >> COL_SHIFT);
341 col[56] = ((a0 - b0) >> COL_SHIFT);
342 }
344 void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
345 {
346 int i;
347 for(i=0; i<8; i++)
348 idctRowCondDC(block + i*8);
350 for(i=0; i<8; i++)
351 idctSparseColPut(dest + i, line_size, block + i);
352 }
354 void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
355 {
356 int i;
357 for(i=0; i<8; i++)
358 idctRowCondDC(block + i*8);
360 for(i=0; i<8; i++)
361 idctSparseColAdd(dest + i, line_size, block + i);
362 }
364 void ff_simple_idct(DCTELEM *block)
365 {
366 int i;
367 for(i=0; i<8; i++)
368 idctRowCondDC(block + i*8);
370 for(i=0; i<8; i++)
371 idctSparseCol(block + i);
372 }
