| rev |
line source |
|
nengel@2
|
1 /*
|
|
nengel@2
|
2 * Copyright (c) 2009 TUDelft
|
|
nengel@2
|
3 *
|
|
nengel@2
|
4 * Cell Parallel SPU - 2DWave Macroblock Decoding.
|
|
nengel@2
|
5 */
|
|
nengel@2
|
6
|
|
nengel@2
|
7 /**
|
|
nengel@2
|
8 * @file libavcodec/cell/spu/h264_main_spu.c
|
|
nengel@2
|
9 * Cell Parallel SPU - 2DWave Macroblock Decoding
|
|
nengel@2
|
10 * @author C C Chi <c.c.chi@student.tudelft.nl>
|
|
nengel@2
|
11 *
|
|
nengel@2
|
12 * SIMD SPU kernels
|
|
nengel@2
|
13 * H.264/AVC motion compensation
|
|
nengel@2
|
14 * @author Mauricio Alvarez <alvarez@ac.upc.edu>
|
|
nengel@2
|
15 * @author Albert Paradis <apar7632@hotmail.com>
|
|
nengel@2
|
16 */
|
|
nengel@2
|
17
|
|
nengel@2
|
18
|
|
nengel@2
|
19 #include "dsputil_spu.h"
|
|
nengel@2
|
20 #include "h264_idct_spu.h"
|
|
nengel@2
|
21 #include "h264_deblock_spu.h"
|
|
nengel@2
|
22 #include "types_spu.h"
|
|
nengel@2
|
23 #include "libavutil/intreadwrite.h"
|
|
nengel@2
|
24
|
|
nengel@2
|
25 #include <stdio.h>
|
|
nengel@2
|
26 #include <spu_intrinsics.h>
|
|
nengel@2
|
27 #include <spu_mfcio.h>
|
|
nengel@2
|
28 #include <assert.h>
|
|
nengel@2
|
29
|
|
nengel@2
|
30 //Luma interpolation
|
|
nengel@2
|
31 #define PUT_OP_U8_SPU(d, s, dst) (void) dst; d = s
|
|
nengel@2
|
32 #define AVG_OP_U8_SPU(d, s, dst) d = spu_avg(dst, s)
|
|
nengel@2
|
33
|
|
nengel@2
|
34 #define OP_U8_SPU PUT_OP_U8_SPU
|
|
nengel@2
|
35 #define PREFIX_h264_qpel16_h_lowpass_spu put_h264_qpel16_h_lowpass_spu
|
|
nengel@2
|
36 #define PREFIX_h264_qpel16_v_lowpass_spu put_h264_qpel16_v_lowpass_spu
|
|
nengel@2
|
37 #define PREFIX_h264_qpel16_hv_lowpass_spu put_h264_qpel16_hv_lowpass_spu
|
|
nengel@2
|
38 #define PREFIX_h264_qpel8_h_lowpass_spu put_h264_qpel8_h_lowpass_spu
|
|
nengel@2
|
39 #define PREFIX_h264_qpel8_v_lowpass_spu put_h264_qpel8_v_lowpass_spu
|
|
nengel@2
|
40 #define PREFIX_h264_qpel8_hv_lowpass_spu put_h264_qpel8_hv_lowpass_spu
|
|
nengel@2
|
41 #define PREFIX_h264_qpel4_h_lowpass_spu put_h264_qpel4_h_lowpass_spu
|
|
nengel@2
|
42 #define PREFIX_h264_qpel4_v_lowpass_spu put_h264_qpel4_v_lowpass_spu
|
|
nengel@2
|
43 #define PREFIX_h264_qpel4_hv_lowpass_spu put_h264_qpel4_hv_lowpass_spu
|
|
nengel@2
|
44 #include "h264_luma_template_spu.c"
|
|
nengel@2
|
45 #undef OP_U8_SPU
|
|
nengel@2
|
46 #undef PREFIX_h264_qpel16_h_lowpass_spu
|
|
nengel@2
|
47 #undef PREFIX_h264_qpel16_v_lowpass_spu
|
|
nengel@2
|
48 #undef PREFIX_h264_qpel16_hv_lowpass_spu
|
|
nengel@2
|
49 #undef PREFIX_h264_qpel8_h_lowpass_spu
|
|
nengel@2
|
50 #undef PREFIX_h264_qpel8_v_lowpass_spu
|
|
nengel@2
|
51 #undef PREFIX_h264_qpel8_hv_lowpass_spu
|
|
nengel@2
|
52 #undef PREFIX_h264_qpel4_h_lowpass_spu
|
|
nengel@2
|
53 #undef PREFIX_h264_qpel4_v_lowpass_spu
|
|
nengel@2
|
54 #undef PREFIX_h264_qpel4_hv_lowpass_spu
|
|
nengel@2
|
55
|
|
nengel@2
|
56 #define OP_U8_SPU AVG_OP_U8_SPU
|
|
nengel@2
|
57 #define PREFIX_h264_qpel16_h_lowpass_spu avg_h264_qpel16_h_lowpass_spu
|
|
nengel@2
|
58 #define PREFIX_h264_qpel16_v_lowpass_spu avg_h264_qpel16_v_lowpass_spu
|
|
nengel@2
|
59 #define PREFIX_h264_qpel16_hv_lowpass_spu avg_h264_qpel16_hv_lowpass_spu
|
|
nengel@2
|
60 #define PREFIX_h264_qpel8_h_lowpass_spu avg_h264_qpel8_h_lowpass_spu
|
|
nengel@2
|
61 #define PREFIX_h264_qpel8_v_lowpass_spu avg_h264_qpel8_v_lowpass_spu
|
|
nengel@2
|
62 #define PREFIX_h264_qpel8_hv_lowpass_spu avg_h264_qpel8_hv_lowpass_spu
|
|
nengel@2
|
63 #define PREFIX_h264_qpel4_h_lowpass_spu avg_h264_qpel4_h_lowpass_spu
|
|
nengel@2
|
64 #define PREFIX_h264_qpel4_v_lowpass_spu avg_h264_qpel4_v_lowpass_spu
|
|
nengel@2
|
65 #define PREFIX_h264_qpel4_hv_lowpass_spu avg_h264_qpel4_hv_lowpass_spu
|
|
nengel@2
|
66 #include "h264_luma_template_spu.c"
|
|
nengel@2
|
67 #undef OP_U8_SPU
|
|
nengel@2
|
68 #undef PREFIX_h264_qpel16_h_lowpass_spu
|
|
nengel@2
|
69 #undef PREFIX_h264_qpel16_v_lowpass_spu
|
|
nengel@2
|
70 #undef PREFIX_h264_qpel16_hv_lowpass_spu
|
|
nengel@2
|
71 #undef PREFIX_h264_qpel8_h_lowpass_spu
|
|
nengel@2
|
72 #undef PREFIX_h264_qpel8_v_lowpass_spu
|
|
nengel@2
|
73 #undef PREFIX_h264_qpel8_hv_lowpass_spu
|
|
nengel@2
|
74 #undef PREFIX_h264_qpel4_h_lowpass_spu
|
|
nengel@2
|
75 #undef PREFIX_h264_qpel4_v_lowpass_spu
|
|
nengel@2
|
76 #undef PREFIX_h264_qpel4_hv_lowpass_spu
|
|
nengel@2
|
77
|
|
nengel@2
|
78 #define H264_MC(OPNAME, SIZE, CODETYPE) \
|
|
nengel@2
|
79 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
80 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, dst_stride, STRIDE_Y, h);\
|
|
nengel@2
|
81 }\
|
|
nengel@2
|
82 \
|
|
nengel@2
|
83 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){ \
|
|
nengel@2
|
84 DECLARE_ALIGNED_16(uint8_t, half[16*16]);\
|
|
nengel@2
|
85 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, 16, h);\
|
|
nengel@2
|
86 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, dst_stride, STRIDE_Y, h);\
|
|
nengel@2
|
87 }\
|
|
nengel@2
|
88 \
|
|
nengel@2
|
89 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
90 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, dst_stride, h);\
|
|
nengel@2
|
91 }\
|
|
nengel@2
|
92 \
|
|
nengel@2
|
93 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
94 DECLARE_ALIGNED_16(uint8_t, half[16*16]);\
|
|
nengel@2
|
95 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, 16, h);\
|
|
nengel@2
|
96 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, dst_stride, STRIDE_Y, h);\
|
|
nengel@2
|
97 }\
|
|
nengel@2
|
98 \
|
|
nengel@2
|
99 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
100 DECLARE_ALIGNED_16(uint8_t, half[16*16]);\
|
|
nengel@2
|
101 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, 16, h);\
|
|
nengel@2
|
102 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, dst_stride, STRIDE_Y, h);\
|
|
nengel@2
|
103 }\
|
|
nengel@2
|
104 \
|
|
nengel@2
|
105 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
106 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, dst_stride, h);\
|
|
nengel@2
|
107 }\
|
|
nengel@2
|
108 \
|
|
nengel@2
|
109 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
110 DECLARE_ALIGNED_16(uint8_t, half[16*16]);\
|
|
nengel@2
|
111 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, 16, h);\
|
|
nengel@2
|
112 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+STRIDE_Y, half, dst_stride, STRIDE_Y, h);\
|
|
nengel@2
|
113 }\
|
|
nengel@2
|
114 \
|
|
nengel@2
|
115 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
116 DECLARE_ALIGNED_16(uint8_t, halfH[16*16]);\
|
|
nengel@2
|
117 DECLARE_ALIGNED_16(uint8_t, halfV[16*16]);\
|
|
nengel@2
|
118 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, 16, h);\
|
|
nengel@2
|
119 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, 16, h);\
|
|
nengel@2
|
120 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, dst_stride, 16, h);\
|
|
nengel@2
|
121 }\
|
|
nengel@2
|
122 \
|
|
nengel@2
|
123 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
124 DECLARE_ALIGNED_16(uint8_t, halfH[16*16]);\
|
|
nengel@2
|
125 DECLARE_ALIGNED_16(uint8_t, halfV[16*16]);\
|
|
nengel@2
|
126 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, 16, h);\
|
|
nengel@2
|
127 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, 16, h);\
|
|
nengel@2
|
128 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, dst_stride, 16, h);\
|
|
nengel@2
|
129 }\
|
|
nengel@2
|
130 \
|
|
nengel@2
|
131 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
132 DECLARE_ALIGNED_16(uint8_t, halfH[16*16]);\
|
|
nengel@2
|
133 DECLARE_ALIGNED_16(uint8_t, halfV[16*16]);\
|
|
nengel@2
|
134 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + STRIDE_Y, 16, h);\
|
|
nengel@2
|
135 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, 16, h);\
|
|
nengel@2
|
136 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, dst_stride, 16, h);\
|
|
nengel@2
|
137 }\
|
|
nengel@2
|
138 \
|
|
nengel@2
|
139 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
140 DECLARE_ALIGNED_16(uint8_t, halfH[16*16]);\
|
|
nengel@2
|
141 DECLARE_ALIGNED_16(uint8_t, halfV[16*16]);\
|
|
nengel@2
|
142 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + STRIDE_Y, 16, h);\
|
|
nengel@2
|
143 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, 16, h);\
|
|
nengel@2
|
144 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, dst_stride, 16, h);\
|
|
nengel@2
|
145 }\
|
|
nengel@2
|
146 \
|
|
nengel@2
|
147 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
148 DECLARE_ALIGNED_16(int16_t, tmp[16*(16+8)]);\
|
|
nengel@2
|
149 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, dst_stride, 16, h);\
|
|
nengel@2
|
150 }\
|
|
nengel@2
|
151 \
|
|
nengel@2
|
152 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
153 DECLARE_ALIGNED_16(uint8_t, halfH[16*16]);\
|
|
nengel@2
|
154 DECLARE_ALIGNED_16(uint8_t, halfHV[16*16]);\
|
|
nengel@2
|
155 DECLARE_ALIGNED_16(int16_t, tmp[16*(16+8)]);\
|
|
nengel@2
|
156 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, 16, h);\
|
|
nengel@2
|
157 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, 16, 16, h);\
|
|
nengel@2
|
158 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, dst_stride, 16, h);\
|
|
nengel@2
|
159 }\
|
|
nengel@2
|
160 \
|
|
nengel@2
|
161 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
162 DECLARE_ALIGNED_16(uint8_t, halfH[16*16]);\
|
|
nengel@2
|
163 DECLARE_ALIGNED_16(uint8_t, halfHV[16*16]);\
|
|
nengel@2
|
164 DECLARE_ALIGNED_16(int16_t, tmp[16*(16+8)]);\
|
|
nengel@2
|
165 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + STRIDE_Y, 16, h);\
|
|
nengel@2
|
166 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, 16, 16, h);\
|
|
nengel@2
|
167 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, dst_stride, 16, h);\
|
|
nengel@2
|
168 }\
|
|
nengel@2
|
169 \
|
|
nengel@2
|
170 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
171 DECLARE_ALIGNED_16(uint8_t, halfV[16*16]);\
|
|
nengel@2
|
172 DECLARE_ALIGNED_16(uint8_t, halfHV[16*16]);\
|
|
nengel@2
|
173 DECLARE_ALIGNED_16(int16_t, tmp[16*(16+8)]);\
|
|
nengel@2
|
174 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, 16, h);\
|
|
nengel@2
|
175 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, 16, 16, h);\
|
|
nengel@2
|
176 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, dst_stride, 16, h);\
|
|
nengel@2
|
177 }\
|
|
nengel@2
|
178 \
|
|
nengel@2
|
179 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int dst_stride, int h){\
|
|
nengel@2
|
180 DECLARE_ALIGNED_16(uint8_t, halfV[16*16]);\
|
|
nengel@2
|
181 DECLARE_ALIGNED_16(uint8_t, halfHV[16*16]);\
|
|
nengel@2
|
182 DECLARE_ALIGNED_16(int16_t, tmp[16*(16+8)]);\
|
|
nengel@2
|
183 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, 16, h);\
|
|
nengel@2
|
184 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, 16, 16, h);\
|
|
nengel@2
|
185 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, dst_stride, 16, h);\
|
|
nengel@2
|
186 }\
|
|
nengel@2
|
187
|
|
nengel@2
|
188
|
|
nengel@2
|
189 /**************************/
|
|
nengel@2
|
190 /* put pixels functions */
|
|
nengel@2
|
191 /*************************/
|
|
nengel@2
|
192
|
|
nengel@2
|
193 static void put_pixels16_l2_spu( uint8_t * dst, const uint8_t * src1,
|
|
nengel@2
|
194 const uint8_t * src2, int dst_stride,
|
|
nengel@2
|
195 int src_stride1, int h)
|
|
nengel@2
|
196 {
|
|
nengel@2
|
197 int i;
|
|
nengel@2
|
198
|
|
nengel@2
|
199 const int perm_src1 = (unsigned int) src1 & 15;
|
|
nengel@2
|
200
|
|
nengel@2
|
201 for (i=0; i<h; i++){
|
|
nengel@2
|
202 //unaligned load of src1
|
|
nengel@2
|
203 const vuint8_t srctmpa1 = *(vuint8_t *)(src1);
|
|
nengel@2
|
204 const vuint8_t srctmpa2 = *(vuint8_t *)(src1+16);
|
|
nengel@2
|
205 const vuint8_t srca= spu_or(spu_slqwbyte(srctmpa1, perm_src1), spu_rlmaskqwbyte(srctmpa2, perm_src1-16));
|
|
nengel@2
|
206
|
|
nengel@2
|
207 //aligned load of src2
|
|
nengel@2
|
208 const vuint8_t srcb = *(vuint8_t *)(src2);
|
|
nengel@2
|
209
|
|
nengel@2
|
210 //average and rounding
|
|
nengel@2
|
211 const vuint8_t avgc = spu_avg(srca,srcb);
|
|
nengel@2
|
212
|
|
nengel@2
|
213 // 16x16 dest luma blocks are always aligned
|
|
nengel@2
|
214 *(vuint8_t *)dst=avgc;
|
|
nengel@2
|
215
|
|
nengel@2
|
216 src1 +=src_stride1;
|
|
nengel@2
|
217 src2 +=16;
|
|
nengel@2
|
218 dst +=dst_stride;
|
|
nengel@2
|
219 }
|
|
nengel@2
|
220 }
|
|
nengel@2
|
221
|
|
nengel@2
|
222 static void avg_pixels16_l2_spu( uint8_t * dst, const uint8_t * src1,
|
|
nengel@2
|
223 const uint8_t * src2, int dst_stride,
|
|
nengel@2
|
224 int src_stride1, int h)
|
|
nengel@2
|
225 {
|
|
nengel@2
|
226 int i;
|
|
nengel@2
|
227
|
|
nengel@2
|
228 const int perm_src1 = (unsigned int) src1 & 15;
|
|
nengel@2
|
229
|
|
nengel@2
|
230 for (i=0; i<h; i++){
|
|
nengel@2
|
231 //unaligned load of src1
|
|
nengel@2
|
232 const vuint8_t srctmpa1 = *(vuint8_t *)(src1);
|
|
nengel@2
|
233 const vuint8_t srctmpa2 = *(vuint8_t *)(src1+16);
|
|
nengel@2
|
234 const vuint8_t srca= spu_or(spu_slqwbyte(srctmpa1, perm_src1), spu_rlmaskqwbyte(srctmpa2, perm_src1-16));
|
|
nengel@2
|
235
|
|
nengel@2
|
236 //aligned load of src2
|
|
nengel@2
|
237 const vuint8_t srcb = *(vuint8_t *)(src2);
|
|
nengel@2
|
238
|
|
nengel@2
|
239 //average and rounding
|
|
nengel@2
|
240 const vuint8_t avgc = spu_avg(spu_avg(srca,srcb), *(vuint8_t *)dst);
|
|
nengel@2
|
241
|
|
nengel@2
|
242 // 16x16 dest luma blocks are always aligned
|
|
nengel@2
|
243 *(vuint8_t *)dst=avgc;
|
|
nengel@2
|
244
|
|
nengel@2
|
245 src1 +=src_stride1;
|
|
nengel@2
|
246 src2 +=16;
|
|
nengel@2
|
247 dst +=dst_stride;
|
|
nengel@2
|
248 }
|
|
nengel@2
|
249 }
|
|
nengel@2
|
250
|
|
nengel@2
|
251 // next one assumes that ((line_size % 16) == 0)
|
|
nengel@2
|
252 void put_pixels16_spu(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int h)
|
|
nengel@2
|
253 {
|
|
nengel@2
|
254 register vector unsigned char pixelsv1, pixelsv2;
|
|
nengel@2
|
255 register vector unsigned char pixelsv1B, pixelsv2B;
|
|
nengel@2
|
256 register vector unsigned char pixelsv1C, pixelsv2C;
|
|
nengel@2
|
257 register vector unsigned char pixelsv1D, pixelsv2D;
|
|
nengel@2
|
258
|
|
nengel@2
|
259 const int perm = (unsigned int) src & 15;
|
|
nengel@2
|
260 int i;
|
|
nengel@2
|
261 register int line_size = src_stride;
|
|
nengel@2
|
262 register int line_size_2 = line_size << 1;
|
|
nengel@2
|
263 register int line_size_3 = line_size + line_size_2;
|
|
nengel@2
|
264 register int line_size_4 = line_size << 2;
|
|
nengel@2
|
265
|
|
nengel@2
|
266 register int dst_stride_2 = dst_stride << 1;
|
|
nengel@2
|
267 register int dst_stride_3 = dst_stride_2 + dst_stride;
|
|
nengel@2
|
268 register int dst_stride_4 = dst_stride << 2;
|
|
nengel@2
|
269
|
|
nengel@2
|
270 for(i=0; i<h; i+=4) {
|
|
nengel@2
|
271 pixelsv1 = *(vuint8_t *)(src);
|
|
nengel@2
|
272 pixelsv2 = *(vuint8_t *)(src+16);
|
|
nengel@2
|
273 pixelsv1B = *(vuint8_t *)(src + line_size);
|
|
nengel@2
|
274 pixelsv2B = *(vuint8_t *)(src+16 + line_size);
|
|
nengel@2
|
275 pixelsv1C = *(vuint8_t *)(src + line_size_2);
|
|
nengel@2
|
276 pixelsv2C = *(vuint8_t *)(src+16 + line_size_2);
|
|
nengel@2
|
277 pixelsv1D = *(vuint8_t *)(src + line_size_3);
|
|
nengel@2
|
278 pixelsv2D = *(vuint8_t *)(src+16 + line_size_3);
|
|
nengel@2
|
279
|
|
nengel@2
|
280 *(vuint8_t *) dst = spu_or(spu_slqwbyte(pixelsv1, perm), spu_rlmaskqwbyte(pixelsv2, perm-16));
|
|
nengel@2
|
281 *(vuint8_t *)(dst + dst_stride) = spu_or(spu_slqwbyte(pixelsv1B, perm), spu_rlmaskqwbyte(pixelsv2B, perm-16));
|
|
nengel@2
|
282 *(vuint8_t *)(dst + dst_stride_2) = spu_or(spu_slqwbyte(pixelsv1C, perm), spu_rlmaskqwbyte(pixelsv2C, perm-16));
|
|
nengel@2
|
283 *(vuint8_t *)(dst + dst_stride_3) = spu_or(spu_slqwbyte(pixelsv1D, perm), spu_rlmaskqwbyte(pixelsv2D, perm-16));
|
|
nengel@2
|
284
|
|
nengel@2
|
285 src+= line_size_4;
|
|
nengel@2
|
286 dst+= dst_stride_4;
|
|
nengel@2
|
287 }
|
|
nengel@2
|
288 }
|
|
nengel@2
|
289
|
|
nengel@2
|
290 // next one assumes that ((line_size % 16) == 0)
|
|
nengel@2
|
291 void avg_pixels16_spu(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int h)
|
|
nengel@2
|
292 {
|
|
nengel@2
|
293 register vector unsigned char pixelsv1, pixelsv2;
|
|
nengel@2
|
294 register vector unsigned char pixelsv1B, pixelsv2B;
|
|
nengel@2
|
295 register vector unsigned char pixelsv1C, pixelsv2C;
|
|
nengel@2
|
296 register vector unsigned char pixelsv1D, pixelsv2D;
|
|
nengel@2
|
297
|
|
nengel@2
|
298 const int perm = (unsigned int) src & 15;
|
|
nengel@2
|
299 int i;
|
|
nengel@2
|
300 register int line_size = src_stride;
|
|
nengel@2
|
301 register int line_size_2 = line_size << 1;
|
|
nengel@2
|
302 register int line_size_3 = line_size + line_size_2;
|
|
nengel@2
|
303 register int line_size_4 = line_size << 2;
|
|
nengel@2
|
304
|
|
nengel@2
|
305 register int dst_stride_2 = dst_stride << 1;
|
|
nengel@2
|
306 register int dst_stride_3 = dst_stride_2 + dst_stride;
|
|
nengel@2
|
307 register int dst_stride_4 = dst_stride << 2;
|
|
nengel@2
|
308
|
|
nengel@2
|
309
|
|
nengel@2
|
310 for(i=0; i<h; i+=4) {
|
|
nengel@2
|
311 pixelsv1 = *(vuint8_t *)(src);
|
|
nengel@2
|
312 pixelsv2 = *(vuint8_t *)(src+16);
|
|
nengel@2
|
313 pixelsv1B = *(vuint8_t *)(src + line_size);
|
|
nengel@2
|
314 pixelsv2B = *(vuint8_t *)(src+16 + line_size);
|
|
nengel@2
|
315 pixelsv1C = *(vuint8_t *)(src + line_size_2);
|
|
nengel@2
|
316 pixelsv2C = *(vuint8_t *)(src+16 + line_size_2);
|
|
nengel@2
|
317 pixelsv1D = *(vuint8_t *)(src + line_size_3);
|
|
nengel@2
|
318 pixelsv2D = *(vuint8_t *)(src+16 + line_size_3);
|
|
nengel@2
|
319
|
|
nengel@2
|
320 *(vuint8_t *)dst = spu_avg(spu_or(spu_slqwbyte(pixelsv1, perm), spu_rlmaskqwbyte(pixelsv2, perm-16)), *(vuint8_t *)dst);
|
|
nengel@2
|
321 *(vuint8_t *)(dst + dst_stride) = spu_avg(spu_or(spu_slqwbyte(pixelsv1B, perm), spu_rlmaskqwbyte(pixelsv2B, perm-16)), *(vuint8_t *)(dst+dst_stride));
|
|
nengel@2
|
322 *(vuint8_t *)(dst + dst_stride_2) = spu_avg(spu_or(spu_slqwbyte(pixelsv1C, perm), spu_rlmaskqwbyte(pixelsv2C, perm-16)), *(vuint8_t *)(dst+dst_stride_2));
|
|
nengel@2
|
323 *(vuint8_t *)(dst + dst_stride_3) = spu_avg(spu_or(spu_slqwbyte(pixelsv1D, perm), spu_rlmaskqwbyte(pixelsv2D, perm-16)), *(vuint8_t *)(dst+dst_stride_3));
|
|
nengel@2
|
324
|
|
nengel@2
|
325 src+= line_size_4;
|
|
nengel@2
|
326 dst+= dst_stride_4;
|
|
nengel@2
|
327 }
|
|
nengel@2
|
328 }
|
|
nengel@2
|
329
|
|
nengel@2
|
330 void put_pixels8_l2_spu(uint8_t * dst, const uint8_t * src1, const uint8_t * src2,
|
|
nengel@2
|
331 int dst_stride, int src_stride1, int h)
|
|
nengel@2
|
332 {
|
|
nengel@2
|
333 int i;
|
|
nengel@2
|
334
|
|
nengel@2
|
335 const int perm_src1 = (unsigned int) src1 & 15;
|
|
nengel@2
|
336 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
337
|
|
nengel@2
|
338 // 8x dest luma blocks are aligned or desaligned by 8
|
|
nengel@2
|
339 vuint8_t dstmask;
|
|
nengel@2
|
340 const vuint8_t dst8mask1= {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
341 const vuint8_t dst8mask2= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17};
|
|
nengel@2
|
342
|
|
nengel@2
|
343 if(shift_dst==0){
|
|
nengel@2
|
344 dstmask = dst8mask1;
|
|
nengel@2
|
345 }
|
|
nengel@2
|
346 else{
|
|
nengel@2
|
347 dstmask = dst8mask2;
|
|
nengel@2
|
348 }
|
|
nengel@2
|
349
|
|
nengel@2
|
350 for (i=0; i<h; i++){
|
|
nengel@2
|
351 //unaligned load of src1
|
|
nengel@2
|
352 const vuint8_t srctmpa1 = *(vuint8_t *)(src1);
|
|
nengel@2
|
353 const vuint8_t srctmpa2 = *(vuint8_t *)(src1+16);
|
|
nengel@2
|
354 const vuint8_t srca= spu_or(spu_slqwbyte(srctmpa1, perm_src1), spu_rlmaskqwbyte(srctmpa2, perm_src1-16));
|
|
nengel@2
|
355
|
|
nengel@2
|
356 //aligned load of src2
|
|
nengel@2
|
357 const vuint8_t srcb = *(vuint8_t *)(src2);
|
|
nengel@2
|
358
|
|
nengel@2
|
359 //average and rounding
|
|
nengel@2
|
360 const vuint8_t avgc = spu_avg(srca,srcb);
|
|
nengel@2
|
361
|
|
nengel@2
|
362 const vuint8_t dst1 = *(vuint8_t *)dst;
|
|
nengel@2
|
363
|
|
nengel@2
|
364 const vuint8_t davgc = spu_shuffle(dst1, avgc, dstmask);
|
|
nengel@2
|
365
|
|
nengel@2
|
366 *(vuint8_t *)dst=davgc;
|
|
nengel@2
|
367
|
|
nengel@2
|
368 src1 +=src_stride1;
|
|
nengel@2
|
369 src2 +=16;
|
|
nengel@2
|
370 dst +=dst_stride;
|
|
nengel@2
|
371 }
|
|
nengel@2
|
372 }
|
|
nengel@2
|
373
|
|
nengel@2
|
374 void avg_pixels8_l2_spu(uint8_t * dst, const uint8_t * src1, const uint8_t * src2,
|
|
nengel@2
|
375 int dst_stride, int src_stride1, int h)
|
|
nengel@2
|
376 {
|
|
nengel@2
|
377 int i;
|
|
nengel@2
|
378
|
|
nengel@2
|
379 const int perm_src1 = (unsigned int) src1 & 15;
|
|
nengel@2
|
380 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
381
|
|
nengel@2
|
382 // 8x dest luma blocks are aligned or desaligned by 8
|
|
nengel@2
|
383 vuint8_t dstmask;
|
|
nengel@2
|
384 const vuint8_t dst8mask1= {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
385 const vuint8_t dst8mask2= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17};
|
|
nengel@2
|
386
|
|
nengel@2
|
387 if(shift_dst==0){
|
|
nengel@2
|
388 dstmask = dst8mask1;
|
|
nengel@2
|
389 }
|
|
nengel@2
|
390 else{
|
|
nengel@2
|
391 dstmask = dst8mask2;
|
|
nengel@2
|
392 }
|
|
nengel@2
|
393
|
|
nengel@2
|
394 for (i=0; i<h; i++){
|
|
nengel@2
|
395 //unaligned load of src1
|
|
nengel@2
|
396 const vuint8_t srctmpa1 = *(vuint8_t *)(src1);
|
|
nengel@2
|
397 const vuint8_t srctmpa2 = *(vuint8_t *)(src1+16);
|
|
nengel@2
|
398 const vuint8_t srca= spu_or(spu_slqwbyte(srctmpa1, perm_src1), spu_rlmaskqwbyte(srctmpa2, perm_src1-16));
|
|
nengel@2
|
399
|
|
nengel@2
|
400 //aligned load of src2
|
|
nengel@2
|
401 const vuint8_t srcb = *(vuint8_t *)(src2);
|
|
nengel@2
|
402
|
|
nengel@2
|
403 //average and rounding
|
|
nengel@2
|
404 const vuint8_t avgc = spu_avg(srca,srcb);
|
|
nengel@2
|
405
|
|
nengel@2
|
406 const vuint8_t dst1 = *(vuint8_t *)dst;
|
|
nengel@2
|
407
|
|
nengel@2
|
408 const vuint8_t davgc1 = spu_shuffle(dst1, avgc, dstmask);
|
|
nengel@2
|
409
|
|
nengel@2
|
410 const vuint8_t davgc = spu_avg(dst1,davgc1);
|
|
nengel@2
|
411
|
|
nengel@2
|
412 *(vuint8_t *)dst=davgc;
|
|
nengel@2
|
413
|
|
nengel@2
|
414 src1 +=src_stride1;
|
|
nengel@2
|
415 src2 +=16;
|
|
nengel@2
|
416 dst +=dst_stride;
|
|
nengel@2
|
417 }
|
|
nengel@2
|
418 }
|
|
nengel@2
|
419
|
|
nengel@2
|
420 // next one assumes that ((line_size % 16) == 0)
|
|
nengel@2
|
421 void put_pixels8_spu(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int h)
|
|
nengel@2
|
422 {
|
|
nengel@2
|
423 register vector unsigned char pixelsv1A, pixelsv2A;
|
|
nengel@2
|
424 register vector unsigned char pixelsv1B, pixelsv2B;
|
|
nengel@2
|
425 register vector unsigned char pixelsv1C, pixelsv2C;
|
|
nengel@2
|
426 register vector unsigned char pixelsv1D, pixelsv2D;
|
|
nengel@2
|
427
|
|
nengel@2
|
428 const int perm = (unsigned int) src & 15;
|
|
nengel@2
|
429 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
430
|
|
nengel@2
|
431 // 8x dest luma blocks are aligned or desaligned by 8
|
|
nengel@2
|
432 vuint8_t dstmask;
|
|
nengel@2
|
433 const vuint8_t dst8mask1= {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
434 const vuint8_t dst8mask2= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17};
|
|
nengel@2
|
435
|
|
nengel@2
|
436 if(shift_dst==0){
|
|
nengel@2
|
437 dstmask = dst8mask1;
|
|
nengel@2
|
438 }
|
|
nengel@2
|
439 else{
|
|
nengel@2
|
440 dstmask = dst8mask2;
|
|
nengel@2
|
441 }
|
|
nengel@2
|
442
|
|
nengel@2
|
443 int i;
|
|
nengel@2
|
444 register int line_size = src_stride;
|
|
nengel@2
|
445 register int line_size_2 = line_size << 1;
|
|
nengel@2
|
446 register int line_size_3 = line_size + line_size_2;
|
|
nengel@2
|
447 register int line_size_4 = line_size << 2;
|
|
nengel@2
|
448
|
|
nengel@2
|
449 register int dst_stride_2 = dst_stride << 1;
|
|
nengel@2
|
450 register int dst_stride_3 = dst_stride_2 + dst_stride;
|
|
nengel@2
|
451 register int dst_stride_4 = dst_stride << 2;
|
|
nengel@2
|
452
|
|
nengel@2
|
453 for(i=0; i<h; i+=4) {
|
|
nengel@2
|
454 pixelsv1A = *(vuint8_t *)(src);
|
|
nengel@2
|
455 pixelsv2A = *(vuint8_t *)(src+16);
|
|
nengel@2
|
456 pixelsv1B = *(vuint8_t *)(src + line_size);
|
|
nengel@2
|
457 pixelsv2B = *(vuint8_t *)(src+16 + line_size);
|
|
nengel@2
|
458 pixelsv1C = *(vuint8_t *)(src + line_size_2);
|
|
nengel@2
|
459 pixelsv2C = *(vuint8_t *)(src+16 + line_size_2);
|
|
nengel@2
|
460 pixelsv1D = *(vuint8_t *)(src + line_size_3);
|
|
nengel@2
|
461 pixelsv2D = *(vuint8_t *)(src+16 + line_size_3);
|
|
nengel@2
|
462
|
|
nengel@2
|
463 const vuint8_t block1 = *(vuint8_t *)dst;
|
|
nengel@2
|
464 const vuint8_t put1 = spu_shuffle(block1, spu_or(spu_slqwbyte(pixelsv1A, perm), spu_rlmaskqwbyte(pixelsv2A, perm-16)), dstmask);
|
|
nengel@2
|
465 const vuint8_t block2 = *(vuint8_t *)(dst+dst_stride);
|
|
nengel@2
|
466 const vuint8_t put2 = spu_shuffle(block2, spu_or(spu_slqwbyte(pixelsv1B, perm), spu_rlmaskqwbyte(pixelsv2B, perm-16)), dstmask);
|
|
nengel@2
|
467 const vuint8_t block3 = *(vuint8_t *)(dst+2*dst_stride);
|
|
nengel@2
|
468 const vuint8_t put3 = spu_shuffle(block3, spu_or(spu_slqwbyte(pixelsv1C, perm), spu_rlmaskqwbyte(pixelsv2C, perm-16)), dstmask);
|
|
nengel@2
|
469 const vuint8_t block4 = *(vuint8_t *)(dst+3*dst_stride);
|
|
nengel@2
|
470 const vuint8_t put4 = spu_shuffle(block4, spu_or(spu_slqwbyte(pixelsv1D, perm), spu_rlmaskqwbyte(pixelsv2D, perm-16)), dstmask);
|
|
nengel@2
|
471
|
|
nengel@2
|
472 *(vuint8_t *) dst = put1;
|
|
nengel@2
|
473 *(vuint8_t *)(dst + dst_stride) = put2;
|
|
nengel@2
|
474 *(vuint8_t *)(dst + dst_stride_2) = put3;
|
|
nengel@2
|
475 *(vuint8_t *)(dst + dst_stride_3) = put4;
|
|
nengel@2
|
476
|
|
nengel@2
|
477 src += line_size_4;
|
|
nengel@2
|
478 dst += dst_stride_4;
|
|
nengel@2
|
479 }
|
|
nengel@2
|
480 }
|
|
nengel@2
|
481
|
|
nengel@2
|
482 // next one assumes that ((line_size % 16) == 0)
|
|
nengel@2
|
483 void avg_pixels8_spu(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int h)
|
|
nengel@2
|
484 {
|
|
nengel@2
|
485 register vector unsigned char pixelsv1A, pixelsv2A;
|
|
nengel@2
|
486 register vector unsigned char pixelsv1B, pixelsv2B;
|
|
nengel@2
|
487 register vector unsigned char pixelsv1C, pixelsv2C;
|
|
nengel@2
|
488 register vector unsigned char pixelsv1D, pixelsv2D;
|
|
nengel@2
|
489
|
|
nengel@2
|
490 const int perm = (unsigned int) src & 15;
|
|
nengel@2
|
491 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
492
|
|
nengel@2
|
493 // 8x dest luma blocks are aligned or desaligned by 8
|
|
nengel@2
|
494 vuint8_t dstmask;
|
|
nengel@2
|
495 const vuint8_t dst8mask1= {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
496 const vuint8_t dst8mask2= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17};
|
|
nengel@2
|
497
|
|
nengel@2
|
498 if(shift_dst==0){
|
|
nengel@2
|
499 dstmask = dst8mask1;
|
|
nengel@2
|
500 }
|
|
nengel@2
|
501 else{
|
|
nengel@2
|
502 dstmask = dst8mask2;
|
|
nengel@2
|
503 }
|
|
nengel@2
|
504
|
|
nengel@2
|
505 int i;
|
|
nengel@2
|
506 register int line_size = src_stride;
|
|
nengel@2
|
507 register int line_size_2 = line_size << 1;
|
|
nengel@2
|
508 register int line_size_3 = line_size + line_size_2;
|
|
nengel@2
|
509 register int line_size_4 = line_size << 2;
|
|
nengel@2
|
510
|
|
nengel@2
|
511 register int dst_stride_2 = dst_stride << 1;
|
|
nengel@2
|
512 register int dst_stride_3 = dst_stride_2 + dst_stride;
|
|
nengel@2
|
513 register int dst_stride_4 = dst_stride << 2;
|
|
nengel@2
|
514
|
|
nengel@2
|
515 for(i=0; i<h; i+=4) {
|
|
nengel@2
|
516 pixelsv1A = *(vuint8_t *)(src);
|
|
nengel@2
|
517 pixelsv2A = *(vuint8_t *)(src+16);
|
|
nengel@2
|
518 pixelsv1B = *(vuint8_t *)(src + line_size);
|
|
nengel@2
|
519 pixelsv2B = *(vuint8_t *)(src+16 + line_size);
|
|
nengel@2
|
520 pixelsv1C = *(vuint8_t *)(src + line_size_2);
|
|
nengel@2
|
521 pixelsv2C = *(vuint8_t *)(src+16 + line_size_2);
|
|
nengel@2
|
522 pixelsv1D = *(vuint8_t *)(src + line_size_3);
|
|
nengel@2
|
523 pixelsv2D = *(vuint8_t *)(src+16 + line_size_3);
|
|
nengel@2
|
524
|
|
nengel@2
|
525 const vuint8_t block1 = *(vuint8_t *) dst;
|
|
nengel@2
|
526 const vuint8_t put1a = spu_shuffle(block1, spu_or(spu_slqwbyte(pixelsv1A, perm), spu_rlmaskqwbyte(pixelsv2A, perm-16)), dstmask);
|
|
nengel@2
|
527 const vuint8_t put1 = spu_avg(block1,put1a);
|
|
nengel@2
|
528
|
|
nengel@2
|
529 const vuint8_t block2 = *(vuint8_t *)(dst + dst_stride);
|
|
nengel@2
|
530 const vuint8_t put2a = spu_shuffle(block2, spu_or(spu_slqwbyte(pixelsv1B, perm), spu_rlmaskqwbyte(pixelsv2B, perm-16)), dstmask);
|
|
nengel@2
|
531 const vuint8_t put2 = spu_avg(block2,put2a);
|
|
nengel@2
|
532
|
|
nengel@2
|
533 const vuint8_t block3 = *(vuint8_t *)(dst + dst_stride_2);
|
|
nengel@2
|
534 const vuint8_t put3a = spu_shuffle(block3, spu_or(spu_slqwbyte(pixelsv1C, perm), spu_rlmaskqwbyte(pixelsv2C, perm-16)), dstmask);
|
|
nengel@2
|
535 const vuint8_t put3 = spu_avg(block3,put3a);
|
|
nengel@2
|
536
|
|
nengel@2
|
537 const vuint8_t block4 = *(vuint8_t *)(dst + dst_stride_3);
|
|
nengel@2
|
538 const vuint8_t put4a = spu_shuffle(block4, spu_or(spu_slqwbyte(pixelsv1D, perm), spu_rlmaskqwbyte(pixelsv2D, perm-16)), dstmask);
|
|
nengel@2
|
539 const vuint8_t put4 = spu_avg(block4,put4a);
|
|
nengel@2
|
540
|
|
nengel@2
|
541 *(vuint8_t *) dst = put1;
|
|
nengel@2
|
542 *(vuint8_t *)(dst + dst_stride) = put2;
|
|
nengel@2
|
543 *(vuint8_t *)(dst + dst_stride_2) = put3;
|
|
nengel@2
|
544 *(vuint8_t *)(dst + dst_stride_3) = put4;
|
|
nengel@2
|
545
|
|
nengel@2
|
546 src+= line_size_4;
|
|
nengel@2
|
547 dst+= dst_stride_4;
|
|
nengel@2
|
548 }
|
|
nengel@2
|
549 }
|
|
nengel@2
|
550
|
|
nengel@2
|
551 void put_pixels4_l2_spu(uint8_t * dst, const uint8_t * src1, const uint8_t * src2,
|
|
nengel@2
|
552 int dst_stride, int src_stride1, int h)
|
|
nengel@2
|
553 {
|
|
nengel@2
|
554 int i;
|
|
nengel@2
|
555
|
|
nengel@2
|
556 const int perm_src1 = (unsigned int) src1 & 15;
|
|
nengel@2
|
557 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
558
|
|
nengel@2
|
559 // 4x dest luma blocks are desaligned by 0, 4, 8, or 12
|
|
nengel@2
|
560 vuint8_t dstmask = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
nengel@2
|
561 const vuint8_t dstmask0= {0x10,0x11,0x12,0x13,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
562 const vuint8_t dstmask4= {0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
563 const vuint8_t dstmask8= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
564 const vuint8_t dstmask12= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x10,0x11,0x12,0x13};
|
|
nengel@2
|
565
|
|
nengel@2
|
566 switch(shift_dst){
|
|
nengel@2
|
567 case 0: dstmask = dstmask0;
|
|
nengel@2
|
568 break;
|
|
nengel@2
|
569 case 4: dstmask = dstmask4;
|
|
nengel@2
|
570 break;
|
|
nengel@2
|
571 case 8: dstmask = dstmask8;
|
|
nengel@2
|
572 break;
|
|
nengel@2
|
573 case 12: dstmask = dstmask12;
|
|
nengel@2
|
574 break;
|
|
nengel@2
|
575 }
|
|
nengel@2
|
576
|
|
nengel@2
|
577 for (i=0; i<h; i++){
|
|
nengel@2
|
578 //unaligned load of src1
|
|
nengel@2
|
579 const vuint8_t srctmpa1 = *(vuint8_t *)(src1);
|
|
nengel@2
|
580 const vuint8_t srctmpa2 = *(vuint8_t *)(src1+16);
|
|
nengel@2
|
581 const vuint8_t srca= spu_or(spu_slqwbyte(srctmpa1, perm_src1), spu_rlmaskqwbyte(srctmpa2, perm_src1-16));
|
|
nengel@2
|
582
|
|
nengel@2
|
583 //aligned load of src2
|
|
nengel@2
|
584 const vuint8_t srcb = *(vuint8_t *)(src2);
|
|
nengel@2
|
585
|
|
nengel@2
|
586 //average and rounding
|
|
nengel@2
|
587 const vuint8_t avgc = spu_avg(srca,srcb);
|
|
nengel@2
|
588
|
|
nengel@2
|
589 const vuint8_t dst1 = *(vuint8_t *)dst;
|
|
nengel@2
|
590
|
|
nengel@2
|
591 const vuint8_t davgc = spu_shuffle(dst1, avgc, dstmask);
|
|
nengel@2
|
592
|
|
nengel@2
|
593 *(vuint8_t *)dst=davgc;
|
|
nengel@2
|
594
|
|
nengel@2
|
595 src1 +=src_stride1;
|
|
nengel@2
|
596 src2 +=16;
|
|
nengel@2
|
597 dst +=dst_stride;
|
|
nengel@2
|
598 }
|
|
nengel@2
|
599 }
|
|
nengel@2
|
600
|
|
nengel@2
|
601 void avg_pixels4_l2_spu(uint8_t * dst, const uint8_t * src1, const uint8_t * src2,
|
|
nengel@2
|
602 int dst_stride, int src_stride1, int h)
|
|
nengel@2
|
603 {
|
|
nengel@2
|
604 int i;
|
|
nengel@2
|
605
|
|
nengel@2
|
606 const int perm_src1 = (unsigned int) src1 & 15;
|
|
nengel@2
|
607 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
608
|
|
nengel@2
|
609 // 4x dest luma blocks are desaligned by 0, 4, 8, or 12
|
|
nengel@2
|
610 vuint8_t dstmask = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
nengel@2
|
611 const vuint8_t dstmask0= {0x10,0x11,0x12,0x13,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
612 const vuint8_t dstmask4= {0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
613 const vuint8_t dstmask8= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
614 const vuint8_t dstmask12= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x10,0x11,0x12,0x13};
|
|
nengel@2
|
615
|
|
nengel@2
|
616 switch(shift_dst){
|
|
nengel@2
|
617 case 0: dstmask = dstmask0;
|
|
nengel@2
|
618 break;
|
|
nengel@2
|
619 case 4: dstmask = dstmask4;
|
|
nengel@2
|
620 break;
|
|
nengel@2
|
621 case 8: dstmask = dstmask8;
|
|
nengel@2
|
622 break;
|
|
nengel@2
|
623 case 12: dstmask = dstmask12;
|
|
nengel@2
|
624 break;
|
|
nengel@2
|
625 }
|
|
nengel@2
|
626
|
|
nengel@2
|
627 for (i=0; i<h; i++){
|
|
nengel@2
|
628 //unaligned load of src1
|
|
nengel@2
|
629 const vuint8_t srctmpa1 = *(vuint8_t *)(src1);
|
|
nengel@2
|
630 const vuint8_t srctmpa2 = *(vuint8_t *)(src1+16);
|
|
nengel@2
|
631 const vuint8_t srca= spu_or(spu_slqwbyte(srctmpa1, perm_src1), spu_rlmaskqwbyte(srctmpa2, perm_src1-16));
|
|
nengel@2
|
632
|
|
nengel@2
|
633 //aligned load of src2
|
|
nengel@2
|
634 const vuint8_t srcb = *(vuint8_t *)(src2);
|
|
nengel@2
|
635
|
|
nengel@2
|
636 //average and rounding
|
|
nengel@2
|
637 const vuint8_t avgc = spu_avg(srca,srcb);
|
|
nengel@2
|
638
|
|
nengel@2
|
639 const vuint8_t dst1 = *(vuint8_t *)dst;
|
|
nengel@2
|
640
|
|
nengel@2
|
641 const vuint8_t davgc1 = spu_shuffle(dst1, avgc, dstmask);
|
|
nengel@2
|
642
|
|
nengel@2
|
643 const vuint8_t davgc = spu_avg(dst1,davgc1);
|
|
nengel@2
|
644
|
|
nengel@2
|
645 *(vuint8_t *)dst=davgc;
|
|
nengel@2
|
646
|
|
nengel@2
|
647 src1 +=src_stride1;
|
|
nengel@2
|
648 src2 +=16;
|
|
nengel@2
|
649 dst +=dst_stride;
|
|
nengel@2
|
650 }
|
|
nengel@2
|
651 }
|
|
nengel@2
|
652
|
|
nengel@2
|
653 // next one assumes that ((line_size % 16) == 0)
|
|
nengel@2
|
654 void put_pixels4_spu(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int h)
|
|
nengel@2
|
655 {
|
|
nengel@2
|
656 register vector unsigned char pixelsv1A, pixelsv2A;
|
|
nengel@2
|
657 register vector unsigned char pixelsv1B, pixelsv2B;
|
|
nengel@2
|
658 register vector unsigned char pixelsv1C, pixelsv2C;
|
|
nengel@2
|
659 register vector unsigned char pixelsv1D, pixelsv2D;
|
|
nengel@2
|
660
|
|
nengel@2
|
661 const int perm = (unsigned int) src & 15;
|
|
nengel@2
|
662 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
663
|
|
nengel@2
|
664 // 4x dest luma blocks are desaligned by 0, 4, 8, or 12
|
|
nengel@2
|
665 vuint8_t dstmask = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
nengel@2
|
666 const vuint8_t dstmask0= {0x10,0x11,0x12,0x13,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
667 const vuint8_t dstmask4= {0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
668 const vuint8_t dstmask8= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
669 const vuint8_t dstmask12= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x10,0x11,0x12,0x13};
|
|
nengel@2
|
670
|
|
nengel@2
|
671 switch(shift_dst){
|
|
nengel@2
|
672 case 0: dstmask = dstmask0;
|
|
nengel@2
|
673 break;
|
|
nengel@2
|
674 case 4: dstmask = dstmask4;
|
|
nengel@2
|
675 break;
|
|
nengel@2
|
676 case 8: dstmask = dstmask8;
|
|
nengel@2
|
677 break;
|
|
nengel@2
|
678 case 12: dstmask = dstmask12;
|
|
nengel@2
|
679 break;
|
|
nengel@2
|
680 }
|
|
nengel@2
|
681
|
|
nengel@2
|
682 int i;
|
|
nengel@2
|
683 register int line_size = src_stride;
|
|
nengel@2
|
684 register int line_size_2 = line_size << 1;
|
|
nengel@2
|
685 register int line_size_3 = line_size + line_size_2;
|
|
nengel@2
|
686 register int line_size_4 = line_size << 2;
|
|
nengel@2
|
687
|
|
nengel@2
|
688 register int dst_stride_2 = dst_stride << 1;
|
|
nengel@2
|
689 register int dst_stride_3 = dst_stride_2 + dst_stride;
|
|
nengel@2
|
690 register int dst_stride_4 = dst_stride << 2;
|
|
nengel@2
|
691
|
|
nengel@2
|
692 for(i=0; i<h; i+=4) {
|
|
nengel@2
|
693 pixelsv1A = *(vuint8_t *)(src);
|
|
nengel@2
|
694 pixelsv2A = *(vuint8_t *)(src+16);
|
|
nengel@2
|
695 pixelsv1B = *(vuint8_t *)(src + line_size);
|
|
nengel@2
|
696 pixelsv2B = *(vuint8_t *)(src+16 + line_size);
|
|
nengel@2
|
697 pixelsv1C = *(vuint8_t *)(src + line_size_2);
|
|
nengel@2
|
698 pixelsv2C = *(vuint8_t *)(src+16 + line_size_2);
|
|
nengel@2
|
699 pixelsv1D = *(vuint8_t *)(src + line_size_3);
|
|
nengel@2
|
700 pixelsv2D = *(vuint8_t *)(src+16 + line_size_3);
|
|
nengel@2
|
701
|
|
nengel@2
|
702 const vuint8_t block1 = *(vuint8_t *)dst;
|
|
nengel@2
|
703 const vuint8_t put1 = spu_shuffle(block1, spu_or(spu_slqwbyte(pixelsv1A, perm), spu_rlmaskqwbyte(pixelsv2A, perm-16)), dstmask);
|
|
nengel@2
|
704 const vuint8_t block2 = *(vuint8_t *)(dst+dst_stride);
|
|
nengel@2
|
705 const vuint8_t put2 = spu_shuffle(block2, spu_or(spu_slqwbyte(pixelsv1B, perm), spu_rlmaskqwbyte(pixelsv2B, perm-16)), dstmask);
|
|
nengel@2
|
706 const vuint8_t block3 = *(vuint8_t *)(dst+dst_stride_2);
|
|
nengel@2
|
707 const vuint8_t put3 = spu_shuffle(block3, spu_or(spu_slqwbyte(pixelsv1C, perm), spu_rlmaskqwbyte(pixelsv2C, perm-16)), dstmask);
|
|
nengel@2
|
708 const vuint8_t block4 = *(vuint8_t *)(dst+dst_stride_3);
|
|
nengel@2
|
709 const vuint8_t put4 = spu_shuffle(block4, spu_or(spu_slqwbyte(pixelsv1D, perm), spu_rlmaskqwbyte(pixelsv2D, perm-16)), dstmask);
|
|
nengel@2
|
710
|
|
nengel@2
|
711 *(vuint8_t *) dst = put1;
|
|
nengel@2
|
712 *(vuint8_t *)(dst + dst_stride) = put2;
|
|
nengel@2
|
713 *(vuint8_t *)(dst + dst_stride_2) = put3;
|
|
nengel@2
|
714 *(vuint8_t *)(dst + dst_stride_3) = put4;
|
|
nengel@2
|
715
|
|
nengel@2
|
716 src += line_size_4;
|
|
nengel@2
|
717 dst += dst_stride_4;
|
|
nengel@2
|
718 }
|
|
nengel@2
|
719 }
|
|
nengel@2
|
720
|
|
nengel@2
|
721 // next one assumes that ((line_size % 16) == 0)
|
|
nengel@2
|
722 void avg_pixels4_spu(uint8_t *dst, const uint8_t *src, int dst_stride, int src_stride, int h)
|
|
nengel@2
|
723 {
|
|
nengel@2
|
724 register vector unsigned char pixelsv1A, pixelsv2A;
|
|
nengel@2
|
725 register vector unsigned char pixelsv1B, pixelsv2B;
|
|
nengel@2
|
726 register vector unsigned char pixelsv1C, pixelsv2C;
|
|
nengel@2
|
727 register vector unsigned char pixelsv1D, pixelsv2D;
|
|
nengel@2
|
728
|
|
nengel@2
|
729 const int perm = (unsigned int) src & 15;
|
|
nengel@2
|
730 const int shift_dst = (unsigned int) dst & 15;
|
|
nengel@2
|
731
|
|
nengel@2
|
732 // 4x dest luma blocks are desaligned by 0, 4, 8, or 12
|
|
nengel@2
|
733 vuint8_t dstmask = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
nengel@2
|
734 const vuint8_t dstmask0= {0x10,0x11,0x12,0x13,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
735 const vuint8_t dstmask4= {0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
736 const vuint8_t dstmask8= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x0C,0x0D,0x0E,0x0F};
|
|
nengel@2
|
737 const vuint8_t dstmask12= {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x10,0x11,0x12,0x13};
|
|
nengel@2
|
738
|
|
nengel@2
|
739 switch(shift_dst){
|
|
nengel@2
|
740 case 0: dstmask = dstmask0;
|
|
nengel@2
|
741 break;
|
|
nengel@2
|
742 case 4: dstmask = dstmask4;
|
|
nengel@2
|
743 break;
|
|
nengel@2
|
744 case 8: dstmask = dstmask8;
|
|
nengel@2
|
745 break;
|
|
nengel@2
|
746 case 12: dstmask = dstmask12;
|
|
nengel@2
|
747 break;
|
|
nengel@2
|
748 }
|
|
nengel@2
|
749
|
|
nengel@2
|
750 int i;
|
|
nengel@2
|
751 register int line_size = src_stride;
|
|
nengel@2
|
752 register int line_size_2 = line_size << 1;
|
|
nengel@2
|
753 register int line_size_3 = line_size + line_size_2;
|
|
nengel@2
|
754 register int line_size_4 = line_size << 2;
|
|
nengel@2
|
755
|
|
nengel@2
|
756 register int dst_stride_2 = dst_stride << 1;
|
|
nengel@2
|
757 register int dst_stride_3 = dst_stride_2 + dst_stride;
|
|
nengel@2
|
758 register int dst_stride_4 = dst_stride << 2;
|
|
nengel@2
|
759
|
|
nengel@2
|
760 for(i=0; i<h; i+=4) {
|
|
nengel@2
|
761 pixelsv1A = *(vuint8_t *)(src);
|
|
nengel@2
|
762 pixelsv2A = *(vuint8_t *)(src+16);
|
|
nengel@2
|
763 pixelsv1B = *(vuint8_t *)(src + line_size);
|
|
nengel@2
|
764 pixelsv2B = *(vuint8_t *)(src+16 + line_size);
|
|
nengel@2
|
765 pixelsv1C = *(vuint8_t *)(src + line_size_2);
|
|
nengel@2
|
766 pixelsv2C = *(vuint8_t *)(src+16 + line_size_2);
|
|
nengel@2
|
767 pixelsv1D = *(vuint8_t *)(src + line_size_3);
|
|
nengel@2
|
768 pixelsv2D = *(vuint8_t *)(src+16 + line_size_3);
|
|
nengel@2
|
769
|
|
nengel@2
|
770 const vuint8_t block1 = *(vuint8_t *) dst;
|
|
nengel@2
|
771 const vuint8_t put1a = spu_shuffle(block1, spu_or(spu_slqwbyte(pixelsv1A, perm), spu_rlmaskqwbyte(pixelsv2A, perm-16)), dstmask);
|
|
nengel@2
|
772 const vuint8_t put1 = spu_avg(block1,put1a);
|
|
nengel@2
|
773
|
|
nengel@2
|
774 const vuint8_t block2 = *(vuint8_t *)(dst + dst_stride);
|
|
nengel@2
|
775 const vuint8_t put2a = spu_shuffle(block2, spu_or(spu_slqwbyte(pixelsv1B, perm), spu_rlmaskqwbyte(pixelsv2B, perm-16)), dstmask);
|
|
nengel@2
|
776 const vuint8_t put2 = spu_avg(block2,put2a);
|
|
nengel@2
|
777
|
|
nengel@2
|
778 const vuint8_t block3 = *(vuint8_t *)(dst + dst_stride_2);
|
|
nengel@2
|
779 const vuint8_t put3a = spu_shuffle(block3, spu_or(spu_slqwbyte(pixelsv1C, perm), spu_rlmaskqwbyte(pixelsv2C, perm-16)), dstmask);
|
|
nengel@2
|
780 const vuint8_t put3 = spu_avg(block3,put3a);
|
|
nengel@2
|
781
|
|
nengel@2
|
782 const vuint8_t block4 = *(vuint8_t *)(dst + dst_stride_3);
|
|
nengel@2
|
783 const vuint8_t put4a = spu_shuffle(block4, spu_or(spu_slqwbyte(pixelsv1D, perm), spu_rlmaskqwbyte(pixelsv2D, perm-16)), dstmask);
|
|
nengel@2
|
784 const vuint8_t put4 = spu_avg(block4,put4a);
|
|
nengel@2
|
785
|
|
nengel@2
|
786 *(vuint8_t *) dst = put1;
|
|
nengel@2
|
787 *(vuint8_t *)(dst + dst_stride) = put2;
|
|
nengel@2
|
788 *(vuint8_t *)(dst + dst_stride_2) = put3;
|
|
nengel@2
|
789 *(vuint8_t *)(dst + dst_stride_3) = put4;
|
|
nengel@2
|
790
|
|
nengel@2
|
791 src+= line_size_4;
|
|
nengel@2
|
792 dst+= dst_stride_4;
|
|
nengel@2
|
793 }
|
|
nengel@2
|
794 }
|
|
nengel@2
|
795
|
|
nengel@2
|
796 /* Here we create all the interpolation modes H.264 motion compensation stage for luma */
|
|
nengel@2
|
797 H264_MC(put_, 16, spu)
|
|
nengel@2
|
798 H264_MC(put_, 8, spu)
|
|
nengel@2
|
799 H264_MC(put_, 4, spu)
|
|
nengel@2
|
800
|
|
nengel@2
|
801 H264_MC(avg_, 16, spu)
|
|
nengel@2
|
802 H264_MC(avg_, 8, spu)
|
|
nengel@2
|
803 H264_MC(avg_, 4, spu)
|
|
nengel@2
|
804
|
|
nengel@2
|
805
|
|
nengel@2
|
806 //Chroma interpolation:
|
|
nengel@2
|
807
|
|
nengel@2
|
808 #define OP_U8_SPU PUT_OP_U8_SPU
|
|
nengel@2
|
809 #define PREFIX_h264_chroma_mc8_spu put_h264_chroma_mc8_spu
|
|
nengel@2
|
810 #define PREFIX_h264_chroma_mc4_spu put_h264_chroma_mc4_spu
|
|
nengel@2
|
811 #define PREFIX_h264_chroma_mc2_spu put_h264_chroma_mc2_spu
|
|
nengel@2
|
812 #include "h264_chroma_template_spu.c"
|
|
nengel@2
|
813 #undef OP_U8_SPU
|
|
nengel@2
|
814 #undef PREFIX_h264_chroma_mc8_spu
|
|
nengel@2
|
815 #undef PREFIX_h264_chroma_mc4_spu
|
|
nengel@2
|
816 #undef PREFIX_h264_chroma_mc2_spu
|
|
nengel@2
|
817
|
|
nengel@2
|
818 #define OP_U8_SPU AVG_OP_U8_SPU
|
|
nengel@2
|
819 #define PREFIX_h264_chroma_mc8_spu avg_h264_chroma_mc8_spu
|
|
nengel@2
|
820 #define PREFIX_h264_chroma_mc4_spu avg_h264_chroma_mc4_spu
|
|
nengel@2
|
821 #define PREFIX_h264_chroma_mc2_spu avg_h264_chroma_mc2_spu
|
|
nengel@2
|
822 #include "h264_chroma_template_spu.c"
|
|
nengel@2
|
823 #undef OP_U8_SPU
|
|
nengel@2
|
824 #undef PREFIX_h264_chroma_mc8_spu
|
|
nengel@2
|
825 #undef PREFIX_h264_chroma_mc4_spu
|
|
nengel@2
|
826 #undef PREFIX_h264_chroma_mc2_spu
|
|
nengel@2
|
827
|
|
nengel@2
|
828 // Weight and Biweight functions
|
|
nengel@2
|
829
|
|
nengel@2
|
830 #define op_scale1(x) dst[x] = av_clip_uint8( (dst[x]*weight + offset) >> log2_denom )
|
|
nengel@2
|
831 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
|
|
nengel@2
|
832 #define H264_WEIGHT(W,H) \
|
|
nengel@2
|
833 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, int stride, int log2_denom, int weight, int offset){ \
|
|
nengel@2
|
834 int y; \
|
|
nengel@2
|
835 offset <<= log2_denom; \
|
|
nengel@2
|
836 if(log2_denom) offset += 1<<(log2_denom-1); \
|
|
nengel@2
|
837 for(y=0; y<H; y++, dst += stride){ \
|
|
nengel@2
|
838 op_scale1(0); \
|
|
nengel@2
|
839 op_scale1(1); \
|
|
nengel@2
|
840 if(W==2) continue; \
|
|
nengel@2
|
841 op_scale1(2); \
|
|
nengel@2
|
842 op_scale1(3); \
|
|
nengel@2
|
843 if(W==4) continue; \
|
|
nengel@2
|
844 op_scale1(4); \
|
|
nengel@2
|
845 op_scale1(5); \
|
|
nengel@2
|
846 op_scale1(6); \
|
|
nengel@2
|
847 op_scale1(7); \
|
|
nengel@2
|
848 if(W==8) continue; \
|
|
nengel@2
|
849 op_scale1(8); \
|
|
nengel@2
|
850 op_scale1(9); \
|
|
nengel@2
|
851 op_scale1(10); \
|
|
nengel@2
|
852 op_scale1(11); \
|
|
nengel@2
|
853 op_scale1(12); \
|
|
nengel@2
|
854 op_scale1(13); \
|
|
nengel@2
|
855 op_scale1(14); \
|
|
nengel@2
|
856 op_scale1(15); \
|
|
nengel@2
|
857 } \
|
|
nengel@2
|
858 } \
|
|
nengel@2
|
859 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int log2_denom, int weightd, int weights, int offset){ \
|
|
nengel@2
|
860 int y; \
|
|
nengel@2
|
861 offset = ((offset + 1) | 1) << log2_denom; \
|
|
nengel@2
|
862 for(y=0; y<H; y++, dst += dst_stride, src += src_stride){ \
|
|
nengel@2
|
863 op_scale2(0); \
|
|
nengel@2
|
864 op_scale2(1); \
|
|
nengel@2
|
865 if(W==2) continue; \
|
|
nengel@2
|
866 op_scale2(2); \
|
|
nengel@2
|
867 op_scale2(3); \
|
|
nengel@2
|
868 if(W==4) continue; \
|
|
nengel@2
|
869 op_scale2(4); \
|
|
nengel@2
|
870 op_scale2(5); \
|
|
nengel@2
|
871 op_scale2(6); \
|
|
nengel@2
|
872 op_scale2(7); \
|
|
nengel@2
|
873 if(W==8) continue; \
|
|
nengel@2
|
874 op_scale2(8); \
|
|
nengel@2
|
875 op_scale2(9); \
|
|
nengel@2
|
876 op_scale2(10); \
|
|
nengel@2
|
877 op_scale2(11); \
|
|
nengel@2
|
878 op_scale2(12); \
|
|
nengel@2
|
879 op_scale2(13); \
|
|
nengel@2
|
880 op_scale2(14); \
|
|
nengel@2
|
881 op_scale2(15); \
|
|
nengel@2
|
882 } \
|
|
nengel@2
|
883 }
|
|
nengel@2
|
884
|
|
nengel@2
|
885 H264_WEIGHT(16,16)
|
|
nengel@2
|
886 H264_WEIGHT(16,8)
|
|
nengel@2
|
887 H264_WEIGHT(8,16)
|
|
nengel@2
|
888 H264_WEIGHT(8,8)
|
|
nengel@2
|
889 H264_WEIGHT(8,4)
|
|
nengel@2
|
890 H264_WEIGHT(4,8)
|
|
nengel@2
|
891 H264_WEIGHT(4,4)
|
|
nengel@2
|
892 H264_WEIGHT(4,2)
|
|
nengel@2
|
893 H264_WEIGHT(2,4)
|
|
nengel@2
|
894 H264_WEIGHT(2,2)
|
|
nengel@2
|
895
|
|
nengel@2
|
896 #undef op_scale1
|
|
nengel@2
|
897 #undef op_scale2
|
|
nengel@2
|
898 #undef H264_WEIGHT
|
|
nengel@2
|
899
|
|
nengel@2
|
900 /////////////////////////////////////////////////////////////////////////////////////////
|
|
nengel@2
|
901
|
|
nengel@2
|
902 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
|
|
nengel@2
|
903 {
|
|
nengel@2
|
904 int i, d;
|
|
nengel@2
|
905 for( i = 0; i < 4; i++ ) {
|
|
nengel@2
|
906 if( tc0[i] < 0 ) {
|
|
nengel@2
|
907 pix += 4*ystride;
|
|
nengel@2
|
908 continue;
|
|
nengel@2
|
909 }
|
|
nengel@2
|
910 for( d = 0; d < 4; d++ ) {
|
|
nengel@2
|
911 const int p0 = pix[-1*xstride];
|
|
nengel@2
|
912 const int p1 = pix[-2*xstride];
|
|
nengel@2
|
913 const int p2 = pix[-3*xstride];
|
|
nengel@2
|
914 const int q0 = pix[0];
|
|
nengel@2
|
915 const int q1 = pix[1*xstride];
|
|
nengel@2
|
916 const int q2 = pix[2*xstride];
|
|
nengel@2
|
917
|
|
nengel@2
|
918 if( FFABS( p0 - q0 ) < alpha &&
|
|
nengel@2
|
919 FFABS( p1 - p0 ) < beta &&
|
|
nengel@2
|
920 FFABS( q1 - q0 ) < beta ) {
|
|
nengel@2
|
921
|
|
nengel@2
|
922 int tc = tc0[i];
|
|
nengel@2
|
923 int i_delta;
|
|
nengel@2
|
924
|
|
nengel@2
|
925 if( FFABS( p2 - p0 ) < beta ) {
|
|
nengel@2
|
926 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
|
|
nengel@2
|
927 tc++;
|
|
nengel@2
|
928 }
|
|
nengel@2
|
929 if( FFABS( q2 - q0 ) < beta ) {
|
|
nengel@2
|
930 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
|
|
nengel@2
|
931 tc++;
|
|
nengel@2
|
932 }
|
|
nengel@2
|
933
|
|
nengel@2
|
934 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
|
|
nengel@2
|
935 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */
|
|
nengel@2
|
936 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
|
|
nengel@2
|
937 }
|
|
nengel@2
|
938 pix += ystride;
|
|
nengel@2
|
939 }
|
|
nengel@2
|
940 }
|
|
nengel@2
|
941 }
|
|
nengel@2
|
942 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
|
nengel@2
|
943 {
|
|
nengel@2
|
944 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
|
|
nengel@2
|
945 }
|
|
nengel@2
|
946 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
|
nengel@2
|
947 {
|
|
nengel@2
|
948 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
|
|
nengel@2
|
949 }
|
|
nengel@2
|
950
|
|
nengel@2
|
951 static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
|
|
nengel@2
|
952 {
|
|
nengel@2
|
953 int d;
|
|
nengel@2
|
954 for( d = 0; d < 16; d++ ) {
|
|
nengel@2
|
955 const int p2 = pix[-3*xstride];
|
|
nengel@2
|
956 const int p1 = pix[-2*xstride];
|
|
nengel@2
|
957 const int p0 = pix[-1*xstride];
|
|
nengel@2
|
958
|
|
nengel@2
|
959 const int q0 = pix[ 0*xstride];
|
|
nengel@2
|
960 const int q1 = pix[ 1*xstride];
|
|
nengel@2
|
961 const int q2 = pix[ 2*xstride];
|
|
nengel@2
|
962
|
|
nengel@2
|
963 if( FFABS( p0 - q0 ) < alpha &&
|
|
nengel@2
|
964 FFABS( p1 - p0 ) < beta &&
|
|
nengel@2
|
965 FFABS( q1 - q0 ) < beta ) {
|
|
nengel@2
|
966
|
|
nengel@2
|
967 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
|
|
nengel@2
|
968 if( FFABS( p2 - p0 ) < beta)
|
|
nengel@2
|
969 {
|
|
nengel@2
|
970 const int p3 = pix[-4*xstride];
|
|
nengel@2
|
971 /* p0', p1', p2' */
|
|
nengel@2
|
972 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
|
|
nengel@2
|
973 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
|
|
nengel@2
|
974 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
|
|
nengel@2
|
975 } else {
|
|
nengel@2
|
976 /* p0' */
|
|
nengel@2
|
977 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
|
|
nengel@2
|
978 }
|
|
nengel@2
|
979 if( FFABS( q2 - q0 ) < beta)
|
|
nengel@2
|
980 {
|
|
nengel@2
|
981 const int q3 = pix[3*xstride];
|
|
nengel@2
|
982 /* q0', q1', q2' */
|
|
nengel@2
|
983 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
|
|
nengel@2
|
984 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
|
|
nengel@2
|
985 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
|
|
nengel@2
|
986 } else {
|
|
nengel@2
|
987 /* q0' */
|
|
nengel@2
|
988 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
|
|
nengel@2
|
989 }
|
|
nengel@2
|
990 }else{
|
|
nengel@2
|
991 /* p0', q0' */
|
|
nengel@2
|
992 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
|
|
nengel@2
|
993 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
|
|
nengel@2
|
994 }
|
|
nengel@2
|
995 }
|
|
nengel@2
|
996 pix += ystride;
|
|
nengel@2
|
997 }
|
|
nengel@2
|
998 }
|
|
nengel@2
|
999 static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
|
|
nengel@2
|
1000 {
|
|
nengel@2
|
1001 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
|
|
nengel@2
|
1002 }
|
|
nengel@2
|
1003 static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
|
|
nengel@2
|
1004 {
|
|
nengel@2
|
1005 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
|
|
nengel@2
|
1006 }
|
|
nengel@2
|
1007
|
|
nengel@2
|
1008 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
|
|
nengel@2
|
1009 {
|
|
nengel@2
|
1010 int i, d;
|
|
nengel@2
|
1011 for( i = 0; i < 4; i++ ) {
|
|
nengel@2
|
1012 const int tc = tc0[i];
|
|
nengel@2
|
1013 if( tc <= 0 ) {
|
|
nengel@2
|
1014 pix += 2*ystride;
|
|
nengel@2
|
1015 continue;
|
|
nengel@2
|
1016 }
|
|
nengel@2
|
1017 for( d = 0; d < 2; d++ ) {
|
|
nengel@2
|
1018 const int p0 = pix[-1*xstride];
|
|
nengel@2
|
1019 const int p1 = pix[-2*xstride];
|
|
nengel@2
|
1020 const int q0 = pix[0];
|
|
nengel@2
|
1021 const int q1 = pix[1*xstride];
|
|
nengel@2
|
1022
|
|
nengel@2
|
1023 if( FFABS( p0 - q0 ) < alpha &&
|
|
nengel@2
|
1024 FFABS( p1 - p0 ) < beta &&
|
|
nengel@2
|
1025 FFABS( q1 - q0 ) < beta ) {
|
|
nengel@2
|
1026
|
|
nengel@2
|
1027 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
|
|
nengel@2
|
1028
|
|
nengel@2
|
1029 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */
|
|
nengel@2
|
1030 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */
|
|
nengel@2
|
1031 }
|
|
nengel@2
|
1032 pix += ystride;
|
|
nengel@2
|
1033 }
|
|
nengel@2
|
1034 }
|
|
nengel@2
|
1035 }
|
|
nengel@2
|
1036 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
|
nengel@2
|
1037 {
|
|
nengel@2
|
1038 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
|
|
nengel@2
|
1039 }
|
|
nengel@2
|
1040 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
|
nengel@2
|
1041 {
|
|
nengel@2
|
1042 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
|
|
nengel@2
|
1043 }
|
|
nengel@2
|
1044
|
|
nengel@2
|
1045 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
|
|
nengel@2
|
1046 {
|
|
nengel@2
|
1047 int d;
|
|
nengel@2
|
1048 for( d = 0; d < 8; d++ ) {
|
|
nengel@2
|
1049 const int p0 = pix[-1*xstride];
|
|
nengel@2
|
1050 const int p1 = pix[-2*xstride];
|
|
nengel@2
|
1051 const int q0 = pix[0];
|
|
nengel@2
|
1052 const int q1 = pix[1*xstride];
|
|
nengel@2
|
1053
|
|
nengel@2
|
1054 if( FFABS( p0 - q0 ) < alpha &&
|
|
nengel@2
|
1055 FFABS( p1 - p0 ) < beta &&
|
|
nengel@2
|
1056 FFABS( q1 - q0 ) < beta ) {
|
|
nengel@2
|
1057
|
|
nengel@2
|
1058 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
|
|
nengel@2
|
1059 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
|
|
nengel@2
|
1060 }
|
|
nengel@2
|
1061 pix += ystride;
|
|
nengel@2
|
1062 }
|
|
nengel@2
|
1063 }
|
|
nengel@2
|
1064 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
|
|
nengel@2
|
1065 {
|
|
nengel@2
|
1066 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
|
|
nengel@2
|
1067 }
|
|
nengel@2
|
1068 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
|
|
nengel@2
|
1069 {
|
|
nengel@2
|
1070 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
|
|
nengel@2
|
1071 }
|
|
nengel@2
|
1072
|
|
nengel@2
|
1073
|
|
nengel@2
|
1074 void dsputil_h264_init_cell(DSPContext_spu* c) {
|
|
nengel@2
|
1075
|
|
nengel@2
|
1076 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
|
|
nengel@2
|
1077 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
|
|
nengel@2
|
1078 c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
|
|
nengel@2
|
1079 c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
|
|
nengel@2
|
1080 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
|
|
nengel@2
|
1081 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
|
|
nengel@2
|
1082 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
|
|
nengel@2
|
1083 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
|
|
nengel@2
|
1084
|
|
nengel@2
|
1085 c->h264_idct_add[0] = h264_idct8_add_spu;
|
|
nengel@2
|
1086 c->h264_idct_add[1] = h264_idct4_add_spu;
|
|
nengel@2
|
1087
|
|
nengel@2
|
1088
|
|
nengel@2
|
1089 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_spu;
|
|
nengel@2
|
1090 c->put_h264_chroma_pixels_tab[1] = put_h264_chroma_mc4_spu;
|
|
nengel@2
|
1091 c->put_h264_chroma_pixels_tab[2] = put_h264_chroma_mc2_spu;
|
|
nengel@2
|
1092 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_spu;
|
|
nengel@2
|
1093 c->avg_h264_chroma_pixels_tab[1] = avg_h264_chroma_mc4_spu;
|
|
nengel@2
|
1094 c->avg_h264_chroma_pixels_tab[2] = avg_h264_chroma_mc2_spu;
|
|
nengel@2
|
1095
|
|
nengel@2
|
1096 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
|
|
nengel@2
|
1097 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
|
|
nengel@2
|
1098 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
|
|
nengel@2
|
1099 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
|
|
nengel@2
|
1100 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
|
|
nengel@2
|
1101 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
|
|
nengel@2
|
1102 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
|
|
nengel@2
|
1103 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
|
|
nengel@2
|
1104 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
|
|
nengel@2
|
1105 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
|
|
nengel@2
|
1106 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
|
|
nengel@2
|
1107 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
|
|
nengel@2
|
1108 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
|
|
nengel@2
|
1109 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
|
|
nengel@2
|
1110 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
|
|
nengel@2
|
1111 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
|
|
nengel@2
|
1112 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
|
|
nengel@2
|
1113 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
|
|
nengel@2
|
1114 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
|
|
nengel@2
|
1115 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
|
|
nengel@2
|
1116
|
|
nengel@2
|
1117
|
|
nengel@2
|
1118 #define dspfunc(PFX, IDX, NUM) \
|
|
nengel@2
|
1119 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_spu; \
|
|
nengel@2
|
1120 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_spu; \
|
|
nengel@2
|
1121 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_spu; \
|
|
nengel@2
|
1122 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_spu; \
|
|
nengel@2
|
1123 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_spu; \
|
|
nengel@2
|
1124 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_spu; \
|
|
nengel@2
|
1125 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_spu; \
|
|
nengel@2
|
1126 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_spu; \
|
|
nengel@2
|
1127 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_spu; \
|
|
nengel@2
|
1128 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_spu; \
|
|
nengel@2
|
1129 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_spu; \
|
|
nengel@2
|
1130 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_spu; \
|
|
nengel@2
|
1131 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_spu; \
|
|
nengel@2
|
1132 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_spu; \
|
|
nengel@2
|
1133 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_spu; \
|
|
nengel@2
|
1134 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_spu
|
|
nengel@2
|
1135
|
|
nengel@2
|
1136 dspfunc(put_h264_qpel, 0, 16);
|
|
nengel@2
|
1137 dspfunc(put_h264_qpel, 1, 8);
|
|
nengel@2
|
1138 dspfunc(put_h264_qpel, 2, 4);
|
|
nengel@2
|
1139
|
|
nengel@2
|
1140 dspfunc(avg_h264_qpel, 0, 16);
|
|
nengel@2
|
1141 dspfunc(avg_h264_qpel, 1, 8);
|
|
nengel@2
|
1142 dspfunc(avg_h264_qpel, 2, 4);
|
|
nengel@2
|
1143
|
|
nengel@2
|
1144 #undef dspfunc
|
|
nengel@2
|
1145
|
|
nengel@2
|
1146
|
|
nengel@2
|
1147 }
|