Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
view libavcodec/h264_rec.c @ 9:ea1ba68cf0ed
update to match api changes + add sscc produced source
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 05 Jun 2013 14:43:26 +0200 |
| parents | |
| children |
line source
1 #include "config.h"
3 #include "dsputil.h"
4 #include "h264_types.h"
5 #include "h264_data.h"
6 #include "h264_mc.h"
7 #include "h264_deblock.h"
8 #include "h264_pred_mode.h"
9 //#undef NDEBUG
10 #include <assert.h>
12 void init_mbrec_context(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int line){
13 DecodedPicture *pic = s->curr_pic;
14 int mb_stride = mrc->mb_stride;
15 int mb_width = mrc->mb_width;
16 mrs->mb_type_top = pic->mb_type + (line -1)*mb_stride;
17 mrs->mb_type = pic->mb_type + line*mb_stride;
18 mrs->ref_index_top[0] = pic->ref_index[0] + 4*(line -1)*mb_stride;
19 mrs->ref_index_top[1] = pic->ref_index[1] + 4*(line -1)*mb_stride;
20 mrs->ref_index[0] = pic->ref_index[0] + 4*line*mb_stride;
21 mrs->ref_index[1] = pic->ref_index[1] + 4*line*mb_stride;
23 mrs->motion_val_top[0] = pic->motion_val[0] + 4*mb_width*4*(line-1);
24 mrs->motion_val_top[1] = pic->motion_val[1] + 4*mb_width*4*(line-1);
25 mrs->motion_val[0] = pic->motion_val[0] + 4*mb_width*4*line;
26 mrs->motion_val[1] = pic->motion_val[1] + 4*mb_width*4*line;
28 mrs->intra4x4_pred_mode_top = pic->intra4x4_pred_mode + 4*mb_width*(line-1);
29 mrs->intra4x4_pred_mode = pic->intra4x4_pred_mode + 4*mb_width*line;
31 mrs->non_zero_count_top = pic->non_zero_count + 8*mb_width*(line-1);
32 mrs->non_zero_count = pic->non_zero_count + 8*mb_width*line;
34 if (s->slice_type_nos == FF_B_TYPE){
35 mrs->list1_mb_type = s->dp_ref_list[1][0]->mb_type + line*mb_stride;
36 mrs->list1_ref_index[0] = s->dp_ref_list[1][0]->ref_index[0] + 4*line*mb_stride;
37 mrs->list1_ref_index[1] = s->dp_ref_list[1][0]->ref_index[1] + 4*line*mb_stride;
38 mrs->list1_motion_val[0] = s->dp_ref_list[1][0]->motion_val[0] + 4*mb_width*4*line;
39 mrs->list1_motion_val[1] = s->dp_ref_list[1][0]->motion_val[1] + 4*mb_width*4*line;
40 }
42 }
44 #if OMPSS
45 static void backup_mb_border(H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
46 int i;
47 uint8_t * top_border_y1 = m->top_border;
48 uint8_t * top_border_y2 = m->top_border + 8;
49 uint8_t * top_border_cb = m->top_border + 16;
50 uint8_t * top_border_cr = m->top_border + 24;
51 uint8_t * top_border_next = m->top_border_next;
53 src_y -= linesize;
54 src_cb -= uvlinesize;
55 src_cr -= uvlinesize;
57 m->left_border[0]= m->top_border[15];
58 for(i=1; i<17 ; i++){
59 m->left_border[i]= src_y[15 + i*linesize];
60 }
62 *(uint64_t*)(top_border_y1) = *(uint64_t*)(src_y + 16*linesize);
63 *(uint64_t*)(top_border_next) = *(uint64_t*)(src_y + 16*linesize);
64 *(uint64_t*)(top_border_y2) = *(uint64_t*)(src_y +8+16*linesize);
66 m->left_border[17]= m->top_border[16+7];
67 m->left_border[17+9]= m->top_border[24+7];
68 for(i=1; i<9; i++){
69 m->left_border[17 +i]= src_cb[7+i*uvlinesize];
70 m->left_border[17+9+i]= src_cr[7+i*uvlinesize];
71 }
72 *(uint64_t*)(top_border_cb)= *(uint64_t*)(src_cb+8*uvlinesize);
73 *(uint64_t*)(top_border_cr)= *(uint64_t*)(src_cr+8*uvlinesize);
74 }
76 static void xchg_mb_border(H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
77 int temp8, i;
78 uint64_t temp64;
80 uint8_t * top_border_y1 = m->top_border;
81 uint8_t * top_border_y2 = m->top_border + 8;
82 uint8_t * top_border_cb = m->top_border + 16;
83 uint8_t * top_border_cr = m->top_border + 24;
84 uint8_t * top_border_next = m->top_border_next;
86 int deblock_left;
87 int deblock_top;
89 deblock_left = (m->mb_x > 0);
90 deblock_top = (m->mb_y > 0);
92 src_y -= ( linesize + 1);
93 src_cb -= (uvlinesize + 1);
94 src_cr -= (uvlinesize + 1);
96 #define XCHG(a,b,t,xchg)\
97 t= a;\
98 if(xchg)\
99 a= b;\
100 b= t;
102 if(deblock_left){
103 for(i = !deblock_top; i<16; i++){
104 XCHG(m->left_border[i], src_y [i* linesize], temp8, xchg);
105 }
106 XCHG(m->left_border[i], src_y [i* linesize], temp8, 1);
108 for(i = !deblock_top; i<8; i++){
109 XCHG(m->left_border[17 +i], src_cb[i*uvlinesize], temp8, xchg);
110 XCHG(m->left_border[17+9+i], src_cr[i*uvlinesize], temp8, xchg);
111 }
112 XCHG(m->left_border[17 +i], src_cb[i*uvlinesize], temp8, 1);
113 XCHG(m->left_border[17+9+i], src_cr[i*uvlinesize], temp8, 1);
114 }
116 if(deblock_top){
117 XCHG(*(uint64_t*)(top_border_y1) , *(uint64_t*)(src_y +1), temp64, xchg);
118 XCHG(*(uint64_t*)(top_border_y2) , *(uint64_t*)(src_y +9), temp64, 1);
119 XCHG(*(uint64_t*)(top_border_next), *(uint64_t*)(src_y +17), temp64, 1);
121 XCHG(*(uint64_t*)(top_border_cb) , *(uint64_t*)(src_cb+1), temp64, 1);
122 XCHG(*(uint64_t*)(top_border_cr) , *(uint64_t*)(src_cr+1), temp64, 1);
123 }
124 }
125 #else
127 static void backup_mb_border(MBRecContext *d, H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
128 int i;
129 uint8_t* top_border_y = d->top[m->mb_x].unfiltered_y;
130 uint8_t* top_border_cb = d->top[m->mb_x].unfiltered_cb;
131 uint8_t* top_border_cr = d->top[m->mb_x].unfiltered_cr;
133 uint8_t* left_border_y = d->left.unfiltered_y;
134 uint8_t* left_border_cb = d->left.unfiltered_cb;
135 uint8_t* left_border_cr = d->left.unfiltered_cr;
137 src_y -= linesize;
138 src_cb -= uvlinesize;
139 src_cr -= uvlinesize;
141 // There are two lines saved, the line above the top macroblock of a pair,
142 // and the line above the bottom macroblock
143 left_border_y[0] = top_border_y[15];
144 for(i=1; i<17; i++){
145 left_border_y[i] = src_y[15+i* linesize];
146 }
147 *(uint64_t*)(top_border_y ) = *(uint64_t*)(src_y + 16*linesize);
148 *(uint64_t*)(top_border_y +8) = *(uint64_t*)(src_y +8+16*linesize);
150 left_border_cb[0] = top_border_cb[7];
151 left_border_cr[0] = top_border_cr[7];
152 for(i=1; i<9; i++){
153 left_border_cb[i] = src_cb[7+i*uvlinesize];
154 left_border_cr[i] = src_cr[7+i*uvlinesize];
155 }
156 *(uint64_t*)(top_border_cb)= *(uint64_t*)(src_cb+8*uvlinesize);
157 *(uint64_t*)(top_border_cr)= *(uint64_t*)(src_cr+8*uvlinesize);
158 }
160 static void xchg_mb_border(MBRecContext *d, H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
162 int temp8, i;
163 uint64_t temp64;
164 int deblock_left;
165 int deblock_top;
167 uint8_t* top_border_y = d->top[m->mb_x].unfiltered_y;
168 uint8_t* top_border_cb = d->top[m->mb_x].unfiltered_cb;
169 uint8_t* top_border_cr = d->top[m->mb_x].unfiltered_cr;
170 uint8_t* top_border_y_next = d->top[m->mb_x +1].unfiltered_y;
172 uint8_t* left_border_y = d->left.unfiltered_y;
173 uint8_t* left_border_cb = d->left.unfiltered_cb;
174 uint8_t* left_border_cr = d->left.unfiltered_cr;
176 deblock_left = (m->mb_x > 0);
177 deblock_top = (m->mb_y > 0);
179 src_y -= ( linesize + 1);
180 src_cb -= (uvlinesize + 1);
181 src_cr -= (uvlinesize + 1);
183 #define XCHG(a,b,t,xchg)\
184 t= a;\
185 if(xchg)\
186 a= b;\
187 b= t;
189 if(deblock_left){
190 for(i = !deblock_top; i<16; i++){
191 XCHG(left_border_y[i], src_y [i* linesize], temp8, xchg);
192 }
193 XCHG(left_border_y[i], src_y [i* linesize], temp8, 1);
195 for(i = !deblock_top; i<8; i++){
196 XCHG(left_border_cb[i], src_cb[i*uvlinesize], temp8, xchg);
197 XCHG(left_border_cr[i], src_cr[i*uvlinesize], temp8, xchg);
198 }
199 XCHG(left_border_cb[i], src_cb[i*uvlinesize], temp8, 1);
200 XCHG(left_border_cr[i], src_cr[i*uvlinesize], temp8, 1);
201 }
203 if(deblock_top){
204 XCHG(*(uint64_t*)(top_border_y+0), *(uint64_t*)(src_y +1), temp64, xchg);
205 XCHG(*(uint64_t*)(top_border_y+8), *(uint64_t*)(src_y +9), temp64, 1);
206 if(m->mb_x+1 < d->mb_width){
207 XCHG(*(uint64_t*)(top_border_y_next), *(uint64_t*)(src_y +17), temp64, 1);
208 }
209 XCHG(*(uint64_t*)(top_border_cb), *(uint64_t*)(src_cb+1), temp64, 1);
210 XCHG(*(uint64_t*)(top_border_cr), *(uint64_t*)(src_cr+1), temp64, 1);
211 }
212 }
214 #endif
216 void h264_decode_mb_internal(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m){
217 int i;
218 const int mb_x= m->mb_x;
219 const int mb_y= m->mb_y;
220 int *block_offset = d->block_offset;
222 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
223 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
225 int linesize = d->linesize;
226 int uvlinesize = d->uvlinesize;
228 uint8_t *dest_y = s->curr_pic->data[0] + (mb_x + mb_y * linesize ) * 16;
229 uint8_t *dest_cb = s->curr_pic->data[1] + (mb_x + mb_y * uvlinesize) * 8;
230 uint8_t *dest_cr = s->curr_pic->data[2] + (mb_x + mb_y * uvlinesize) * 8;
232 pred_motion_mb_rec (d, mrs, s, m);
234 const int mb_type= m->mb_type;
236 d->dsp.prefetch(dest_y + (m->mb_x&3)*4*linesize + 64, d->linesize, 4);
237 d->dsp.prefetch(dest_cb + (m->mb_x&7)*uvlinesize + 64, dest_cr - dest_cb, 2);
239 if(IS_INTRA(mb_type)){
240 #if OMPSS
241 xchg_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
242 #else
243 xchg_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
244 #endif
246 d->hpc.pred8x8[ m->chroma_pred_mode ](dest_cb, uvlinesize);
247 d->hpc.pred8x8[ m->chroma_pred_mode ](dest_cr, uvlinesize);
249 if(IS_INTRA4x4(mb_type)){
250 if(IS_8x8DCT(mb_type)){
251 idct_dc_add = d->hdsp.h264_idct8_dc_add;
252 idct_add = d->hdsp.h264_idct8_add;
254 for(i=0; i<16; i+=4){
255 uint8_t * const ptr= dest_y + block_offset[i];
256 const int dir= mrs->intra4x4_pred_mode_cache[ scan8[i] ];
258 const int nnz = mrs->non_zero_count_cache[ scan8[i] ];
259 d->hpc.pred8x8l[ dir ](ptr, (mrs->topleft_samples_available<<i)&0x8000,
260 (mrs->topright_samples_available<<i)&0x4000, linesize);
261 if(nnz){
262 if(nnz == 1 && m->mb[i*16])
263 idct_dc_add(ptr, m->mb + i*16, linesize);
264 else
265 idct_add (ptr, m->mb + i*16, linesize);
266 }
267 }
268 }else{
269 idct_dc_add = d->hdsp.h264_idct_dc_add;
270 idct_add = d->hdsp.h264_idct_add;
272 for(i=0; i<16; i++){
273 uint8_t * const ptr= dest_y + block_offset[i];
274 const int dir= mrs->intra4x4_pred_mode_cache[ scan8[i] ];
275 uint8_t *topright;
276 int nnz, tr;
277 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
278 const int topright_avail= (mrs->topright_samples_available<<i)&0x8000;
279 assert(mb_y || linesize <= block_offset[i]);
280 if(!topright_avail){
281 tr= ptr[3 - linesize]*0x01010101;
282 topright= (uint8_t*) &tr;
283 }else
284 topright= ptr + 4 - linesize;
285 }else
286 topright= NULL;
288 d->hpc.pred4x4[ dir ](ptr, topright, linesize);
289 nnz = mrs->non_zero_count_cache[ scan8[i] ];
290 if(nnz){
291 if(nnz == 1 && m->mb[i*16])
292 idct_dc_add(ptr, m->mb + i*16, linesize);
293 else
294 idct_add (ptr, m->mb + i*16, linesize);
295 }
296 }
297 }
298 }else{
299 d->hpc.pred16x16[ m->intra16x16_pred_mode ](dest_y , linesize);
300 }
301 #if OMPSS
302 xchg_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
303 #else
304 xchg_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
305 #endif
306 }else {
307 hl_motion(d, mrs, s, m, dest_y, dest_cb, dest_cr,
308 d->hdsp.qpel_put, d->dsp.put_h264_chroma_pixels_tab,
309 d->hdsp.qpel_avg, d->dsp.avg_h264_chroma_pixels_tab,
310 d->hdsp.weight_h264_pixels_tab, d->hdsp.biweight_h264_pixels_tab);
311 }
313 if(!IS_INTRA4x4(mb_type)){
315 if(IS_INTRA16x16(mb_type)){
317 d->hdsp.h264_idct_add16intra(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache);
319 }else if(m->cbp&15){
321 if(IS_8x8DCT(mb_type)){
322 d->hdsp.h264_idct8_add4(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache);
323 }else{
324 d->hdsp.h264_idct_add16(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache);
325 }
326 }
327 }
329 if(m->cbp&0x30){
330 uint8_t *dest[2] = {dest_cb, dest_cr};
332 idct_add = d->hdsp.h264_idct_add;
333 idct_dc_add = d->hdsp.h264_idct_dc_add;
334 for(i=16; i<16+8; i++){
335 if(mrs->non_zero_count_cache[ scan8[i] ])
336 idct_add (dest[(i&4)>>2] + block_offset[i], m->mb + i*16, uvlinesize);
337 else if(m->mb[i*16])
338 idct_dc_add(dest[(i&4)>>2] + block_offset[i], m->mb + i*16, uvlinesize);
339 }
340 }
342 #if OMPSS
343 backup_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
344 if (mb_x+1 <d->mb_width){
345 H264Mb *mr = m+1;
346 memcpy(mr->left_border, m->left_border, sizeof(m->left_border));
347 }
348 if (mb_y +1 <d->mb_height){
349 H264Mb *md = m + d->mb_width;
350 memcpy(md->top_border, m->top_border, sizeof(m->top_border));
351 if (mb_x>0){
352 H264Mb *mdl = m + d->mb_width -1;
353 memcpy(mdl->top_border_next, m->top_border_next, sizeof(m->top_border_next));
354 }
355 }
356 #else
357 backup_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
358 if (mb_y +1 <d->mb_height && d->top_next != d->top){
359 memcpy(&d->top_next[mb_x],&d->top[mb_x], sizeof(TopBorder));
360 }
361 #endif
363 ff_h264_filter_mb(d, mrs, s, m, dest_y, dest_cb, dest_cr);
364 }
366 MBRecContext *get_mbrec_context(H264Context *h){
367 MBRecContext *d = av_mallocz(sizeof(MBRecContext));
369 ff_h264dsp_init(&d->hdsp);
370 ff_h264_pred_init(&d->hpc);
371 dsputil_init(&d->dsp);
373 #if !OMPSS
374 d->mrs = av_mallocz(sizeof(MBRecState));
375 #endif
376 d->hdsp.qpel_put= d->dsp.put_h264_qpel_pixels_tab;
377 d->hdsp.qpel_avg= d->dsp.avg_h264_qpel_pixels_tab;
378 d->mb_height = h->mb_height;
379 d->mb_width = h->mb_width;
380 d->mb_stride = h->mb_stride;
381 d->b_stride = h->b_stride;
382 d->height = h->height;
383 d->width = h->width;
384 d->linesize = h->width + EDGE_WIDTH*2;
385 d->uvlinesize = d->linesize>>1;
387 d->scratchpad_y = av_malloc(d->linesize*16*sizeof(uint8_t));
388 d->scratchpad_cb= av_malloc(d->uvlinesize*8*sizeof(uint8_t));
389 d->scratchpad_cr= av_malloc(d->uvlinesize*8*sizeof(uint8_t));
391 for (int i=0; i<16; i++){
392 d->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*d->linesize*((scan8[i] - scan8[0])>>3);
393 }
394 for (int i=0; i<4; i++){
395 d->block_offset[16+i]=
396 d->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*d->uvlinesize*((scan8[i] - scan8[0])>>3);
397 }
401 return d;
402 }
404 void free_mbrec_context(MBRecContext *d){
405 #if !OMPSS
406 av_free(d->mrs);
407 #endif
408 av_free(d->scratchpad_y);
409 av_free(d->scratchpad_cb);
410 av_free(d->scratchpad_cr);
411 av_free(d);
412 }
