Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/h264_rec.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/h264_rec.c Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,412 @@ 1.4 +#include "config.h" 1.5 + 1.6 +#include "dsputil.h" 1.7 +#include "h264_types.h" 1.8 +#include "h264_data.h" 1.9 +#include "h264_mc.h" 1.10 +#include "h264_deblock.h" 1.11 +#include "h264_pred_mode.h" 1.12 +//#undef NDEBUG 1.13 +#include <assert.h> 1.14 + 1.15 +void init_mbrec_context(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int line){ 1.16 + DecodedPicture *pic = s->curr_pic; 1.17 + int mb_stride = mrc->mb_stride; 1.18 + int mb_width = mrc->mb_width; 1.19 + mrs->mb_type_top = pic->mb_type + (line -1)*mb_stride; 1.20 + mrs->mb_type = pic->mb_type + line*mb_stride; 1.21 + mrs->ref_index_top[0] = pic->ref_index[0] + 4*(line -1)*mb_stride; 1.22 + mrs->ref_index_top[1] = pic->ref_index[1] + 4*(line -1)*mb_stride; 1.23 + mrs->ref_index[0] = pic->ref_index[0] + 4*line*mb_stride; 1.24 + mrs->ref_index[1] = pic->ref_index[1] + 4*line*mb_stride; 1.25 + 1.26 + mrs->motion_val_top[0] = pic->motion_val[0] + 4*mb_width*4*(line-1); 1.27 + mrs->motion_val_top[1] = pic->motion_val[1] + 4*mb_width*4*(line-1); 1.28 + mrs->motion_val[0] = pic->motion_val[0] + 4*mb_width*4*line; 1.29 + mrs->motion_val[1] = pic->motion_val[1] + 4*mb_width*4*line; 1.30 + 1.31 + mrs->intra4x4_pred_mode_top = pic->intra4x4_pred_mode + 4*mb_width*(line-1); 1.32 + mrs->intra4x4_pred_mode = pic->intra4x4_pred_mode + 4*mb_width*line; 1.33 + 1.34 + mrs->non_zero_count_top = pic->non_zero_count + 8*mb_width*(line-1); 1.35 + mrs->non_zero_count = pic->non_zero_count + 8*mb_width*line; 1.36 + 1.37 + if (s->slice_type_nos == FF_B_TYPE){ 1.38 + mrs->list1_mb_type = s->dp_ref_list[1][0]->mb_type + line*mb_stride; 1.39 + mrs->list1_ref_index[0] = s->dp_ref_list[1][0]->ref_index[0] + 4*line*mb_stride; 1.40 + mrs->list1_ref_index[1] = s->dp_ref_list[1][0]->ref_index[1] + 4*line*mb_stride; 1.41 + mrs->list1_motion_val[0] = s->dp_ref_list[1][0]->motion_val[0] + 4*mb_width*4*line; 1.42 + mrs->list1_motion_val[1] = s->dp_ref_list[1][0]->motion_val[1] + 4*mb_width*4*line; 1.43 + } 1.44 + 1.45 +} 1.46 + 1.47 +#if OMPSS 1.48 +static void backup_mb_border(H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){ 1.49 + int i; 1.50 + uint8_t * top_border_y1 = m->top_border; 1.51 + uint8_t * top_border_y2 = m->top_border + 8; 1.52 + uint8_t * top_border_cb = m->top_border + 16; 1.53 + uint8_t * top_border_cr = m->top_border + 24; 1.54 + uint8_t * top_border_next = m->top_border_next; 1.55 + 1.56 + src_y -= linesize; 1.57 + src_cb -= uvlinesize; 1.58 + src_cr -= uvlinesize; 1.59 + 1.60 + m->left_border[0]= m->top_border[15]; 1.61 + for(i=1; i<17 ; i++){ 1.62 + m->left_border[i]= src_y[15 + i*linesize]; 1.63 + } 1.64 + 1.65 + *(uint64_t*)(top_border_y1) = *(uint64_t*)(src_y + 16*linesize); 1.66 + *(uint64_t*)(top_border_next) = *(uint64_t*)(src_y + 16*linesize); 1.67 + *(uint64_t*)(top_border_y2) = *(uint64_t*)(src_y +8+16*linesize); 1.68 + 1.69 + m->left_border[17]= m->top_border[16+7]; 1.70 + m->left_border[17+9]= m->top_border[24+7]; 1.71 + for(i=1; i<9; i++){ 1.72 + m->left_border[17 +i]= src_cb[7+i*uvlinesize]; 1.73 + m->left_border[17+9+i]= src_cr[7+i*uvlinesize]; 1.74 + } 1.75 + *(uint64_t*)(top_border_cb)= *(uint64_t*)(src_cb+8*uvlinesize); 1.76 + *(uint64_t*)(top_border_cr)= *(uint64_t*)(src_cr+8*uvlinesize); 1.77 +} 1.78 + 1.79 +static void xchg_mb_border(H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){ 1.80 + int temp8, i; 1.81 + uint64_t temp64; 1.82 + 1.83 + uint8_t * top_border_y1 = m->top_border; 1.84 + uint8_t * top_border_y2 = m->top_border + 8; 1.85 + uint8_t * top_border_cb = m->top_border + 16; 1.86 + uint8_t * top_border_cr = m->top_border + 24; 1.87 + uint8_t * top_border_next = m->top_border_next; 1.88 + 1.89 + int deblock_left; 1.90 + int deblock_top; 1.91 + 1.92 + deblock_left = (m->mb_x > 0); 1.93 + deblock_top = (m->mb_y > 0); 1.94 + 1.95 + src_y -= ( linesize + 1); 1.96 + src_cb -= (uvlinesize + 1); 1.97 + src_cr -= (uvlinesize + 1); 1.98 + 1.99 + #define XCHG(a,b,t,xchg)\ 1.100 + t= a;\ 1.101 + if(xchg)\ 1.102 + a= b;\ 1.103 + b= t; 1.104 + 1.105 + if(deblock_left){ 1.106 + for(i = !deblock_top; i<16; i++){ 1.107 + XCHG(m->left_border[i], src_y [i* linesize], temp8, xchg); 1.108 + } 1.109 + XCHG(m->left_border[i], src_y [i* linesize], temp8, 1); 1.110 + 1.111 + for(i = !deblock_top; i<8; i++){ 1.112 + XCHG(m->left_border[17 +i], src_cb[i*uvlinesize], temp8, xchg); 1.113 + XCHG(m->left_border[17+9+i], src_cr[i*uvlinesize], temp8, xchg); 1.114 + } 1.115 + XCHG(m->left_border[17 +i], src_cb[i*uvlinesize], temp8, 1); 1.116 + XCHG(m->left_border[17+9+i], src_cr[i*uvlinesize], temp8, 1); 1.117 + } 1.118 + 1.119 + if(deblock_top){ 1.120 + XCHG(*(uint64_t*)(top_border_y1) , *(uint64_t*)(src_y +1), temp64, xchg); 1.121 + XCHG(*(uint64_t*)(top_border_y2) , *(uint64_t*)(src_y +9), temp64, 1); 1.122 + XCHG(*(uint64_t*)(top_border_next), *(uint64_t*)(src_y +17), temp64, 1); 1.123 + 1.124 + XCHG(*(uint64_t*)(top_border_cb) , *(uint64_t*)(src_cb+1), temp64, 1); 1.125 + XCHG(*(uint64_t*)(top_border_cr) , *(uint64_t*)(src_cr+1), temp64, 1); 1.126 + } 1.127 +} 1.128 +#else 1.129 + 1.130 +static void backup_mb_border(MBRecContext *d, H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){ 1.131 + int i; 1.132 + uint8_t* top_border_y = d->top[m->mb_x].unfiltered_y; 1.133 + uint8_t* top_border_cb = d->top[m->mb_x].unfiltered_cb; 1.134 + uint8_t* top_border_cr = d->top[m->mb_x].unfiltered_cr; 1.135 + 1.136 + uint8_t* left_border_y = d->left.unfiltered_y; 1.137 + uint8_t* left_border_cb = d->left.unfiltered_cb; 1.138 + uint8_t* left_border_cr = d->left.unfiltered_cr; 1.139 + 1.140 + src_y -= linesize; 1.141 + src_cb -= uvlinesize; 1.142 + src_cr -= uvlinesize; 1.143 + 1.144 + // There are two lines saved, the line above the top macroblock of a pair, 1.145 + // and the line above the bottom macroblock 1.146 + left_border_y[0] = top_border_y[15]; 1.147 + for(i=1; i<17; i++){ 1.148 + left_border_y[i] = src_y[15+i* linesize]; 1.149 + } 1.150 + *(uint64_t*)(top_border_y ) = *(uint64_t*)(src_y + 16*linesize); 1.151 + *(uint64_t*)(top_border_y +8) = *(uint64_t*)(src_y +8+16*linesize); 1.152 + 1.153 + left_border_cb[0] = top_border_cb[7]; 1.154 + left_border_cr[0] = top_border_cr[7]; 1.155 + for(i=1; i<9; i++){ 1.156 + left_border_cb[i] = src_cb[7+i*uvlinesize]; 1.157 + left_border_cr[i] = src_cr[7+i*uvlinesize]; 1.158 + } 1.159 + *(uint64_t*)(top_border_cb)= *(uint64_t*)(src_cb+8*uvlinesize); 1.160 + *(uint64_t*)(top_border_cr)= *(uint64_t*)(src_cr+8*uvlinesize); 1.161 +} 1.162 + 1.163 +static void xchg_mb_border(MBRecContext *d, H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){ 1.164 + 1.165 + int temp8, i; 1.166 + uint64_t temp64; 1.167 + int deblock_left; 1.168 + int deblock_top; 1.169 + 1.170 + uint8_t* top_border_y = d->top[m->mb_x].unfiltered_y; 1.171 + uint8_t* top_border_cb = d->top[m->mb_x].unfiltered_cb; 1.172 + uint8_t* top_border_cr = d->top[m->mb_x].unfiltered_cr; 1.173 + uint8_t* top_border_y_next = d->top[m->mb_x +1].unfiltered_y; 1.174 + 1.175 + uint8_t* left_border_y = d->left.unfiltered_y; 1.176 + uint8_t* left_border_cb = d->left.unfiltered_cb; 1.177 + uint8_t* left_border_cr = d->left.unfiltered_cr; 1.178 + 1.179 + deblock_left = (m->mb_x > 0); 1.180 + deblock_top = (m->mb_y > 0); 1.181 + 1.182 + src_y -= ( linesize + 1); 1.183 + src_cb -= (uvlinesize + 1); 1.184 + src_cr -= (uvlinesize + 1); 1.185 + 1.186 + #define XCHG(a,b,t,xchg)\ 1.187 + t= a;\ 1.188 + if(xchg)\ 1.189 + a= b;\ 1.190 + b= t; 1.191 + 1.192 + if(deblock_left){ 1.193 + for(i = !deblock_top; i<16; i++){ 1.194 + XCHG(left_border_y[i], src_y [i* linesize], temp8, xchg); 1.195 + } 1.196 + XCHG(left_border_y[i], src_y [i* linesize], temp8, 1); 1.197 + 1.198 + for(i = !deblock_top; i<8; i++){ 1.199 + XCHG(left_border_cb[i], src_cb[i*uvlinesize], temp8, xchg); 1.200 + XCHG(left_border_cr[i], src_cr[i*uvlinesize], temp8, xchg); 1.201 + } 1.202 + XCHG(left_border_cb[i], src_cb[i*uvlinesize], temp8, 1); 1.203 + XCHG(left_border_cr[i], src_cr[i*uvlinesize], temp8, 1); 1.204 + } 1.205 + 1.206 + if(deblock_top){ 1.207 + XCHG(*(uint64_t*)(top_border_y+0), *(uint64_t*)(src_y +1), temp64, xchg); 1.208 + XCHG(*(uint64_t*)(top_border_y+8), *(uint64_t*)(src_y +9), temp64, 1); 1.209 + if(m->mb_x+1 < d->mb_width){ 1.210 + XCHG(*(uint64_t*)(top_border_y_next), *(uint64_t*)(src_y +17), temp64, 1); 1.211 + } 1.212 + XCHG(*(uint64_t*)(top_border_cb), *(uint64_t*)(src_cb+1), temp64, 1); 1.213 + XCHG(*(uint64_t*)(top_border_cr), *(uint64_t*)(src_cr+1), temp64, 1); 1.214 + } 1.215 +} 1.216 + 1.217 +#endif 1.218 + 1.219 +void h264_decode_mb_internal(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m){ 1.220 + int i; 1.221 + const int mb_x= m->mb_x; 1.222 + const int mb_y= m->mb_y; 1.223 + int *block_offset = d->block_offset; 1.224 + 1.225 + void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 1.226 + void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); 1.227 + 1.228 + int linesize = d->linesize; 1.229 + int uvlinesize = d->uvlinesize; 1.230 + 1.231 + uint8_t *dest_y = s->curr_pic->data[0] + (mb_x + mb_y * linesize ) * 16; 1.232 + uint8_t *dest_cb = s->curr_pic->data[1] + (mb_x + mb_y * uvlinesize) * 8; 1.233 + uint8_t *dest_cr = s->curr_pic->data[2] + (mb_x + mb_y * uvlinesize) * 8; 1.234 + 1.235 + pred_motion_mb_rec (d, mrs, s, m); 1.236 + 1.237 + const int mb_type= m->mb_type; 1.238 + 1.239 + d->dsp.prefetch(dest_y + (m->mb_x&3)*4*linesize + 64, d->linesize, 4); 1.240 + d->dsp.prefetch(dest_cb + (m->mb_x&7)*uvlinesize + 64, dest_cr - dest_cb, 2); 1.241 + 1.242 + if(IS_INTRA(mb_type)){ 1.243 +#if OMPSS 1.244 + xchg_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); 1.245 +#else 1.246 + xchg_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); 1.247 +#endif 1.248 + 1.249 + d->hpc.pred8x8[ m->chroma_pred_mode ](dest_cb, uvlinesize); 1.250 + d->hpc.pred8x8[ m->chroma_pred_mode ](dest_cr, uvlinesize); 1.251 + 1.252 + if(IS_INTRA4x4(mb_type)){ 1.253 + if(IS_8x8DCT(mb_type)){ 1.254 + idct_dc_add = d->hdsp.h264_idct8_dc_add; 1.255 + idct_add = d->hdsp.h264_idct8_add; 1.256 + 1.257 + for(i=0; i<16; i+=4){ 1.258 + uint8_t * const ptr= dest_y + block_offset[i]; 1.259 + const int dir= mrs->intra4x4_pred_mode_cache[ scan8[i] ]; 1.260 + 1.261 + const int nnz = mrs->non_zero_count_cache[ scan8[i] ]; 1.262 + d->hpc.pred8x8l[ dir ](ptr, (mrs->topleft_samples_available<<i)&0x8000, 1.263 + (mrs->topright_samples_available<<i)&0x4000, linesize); 1.264 + if(nnz){ 1.265 + if(nnz == 1 && m->mb[i*16]) 1.266 + idct_dc_add(ptr, m->mb + i*16, linesize); 1.267 + else 1.268 + idct_add (ptr, m->mb + i*16, linesize); 1.269 + } 1.270 + } 1.271 + }else{ 1.272 + idct_dc_add = d->hdsp.h264_idct_dc_add; 1.273 + idct_add = d->hdsp.h264_idct_add; 1.274 + 1.275 + for(i=0; i<16; i++){ 1.276 + uint8_t * const ptr= dest_y + block_offset[i]; 1.277 + const int dir= mrs->intra4x4_pred_mode_cache[ scan8[i] ]; 1.278 + uint8_t *topright; 1.279 + int nnz, tr; 1.280 + if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ 1.281 + const int topright_avail= (mrs->topright_samples_available<<i)&0x8000; 1.282 + assert(mb_y || linesize <= block_offset[i]); 1.283 + if(!topright_avail){ 1.284 + tr= ptr[3 - linesize]*0x01010101; 1.285 + topright= (uint8_t*) &tr; 1.286 + }else 1.287 + topright= ptr + 4 - linesize; 1.288 + }else 1.289 + topright= NULL; 1.290 + 1.291 + d->hpc.pred4x4[ dir ](ptr, topright, linesize); 1.292 + nnz = mrs->non_zero_count_cache[ scan8[i] ]; 1.293 + if(nnz){ 1.294 + if(nnz == 1 && m->mb[i*16]) 1.295 + idct_dc_add(ptr, m->mb + i*16, linesize); 1.296 + else 1.297 + idct_add (ptr, m->mb + i*16, linesize); 1.298 + } 1.299 + } 1.300 + } 1.301 + }else{ 1.302 + d->hpc.pred16x16[ m->intra16x16_pred_mode ](dest_y , linesize); 1.303 + } 1.304 +#if OMPSS 1.305 + xchg_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); 1.306 +#else 1.307 + xchg_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); 1.308 +#endif 1.309 + }else { 1.310 + hl_motion(d, mrs, s, m, dest_y, dest_cb, dest_cr, 1.311 + d->hdsp.qpel_put, d->dsp.put_h264_chroma_pixels_tab, 1.312 + d->hdsp.qpel_avg, d->dsp.avg_h264_chroma_pixels_tab, 1.313 + d->hdsp.weight_h264_pixels_tab, d->hdsp.biweight_h264_pixels_tab); 1.314 + } 1.315 + 1.316 + if(!IS_INTRA4x4(mb_type)){ 1.317 + 1.318 + if(IS_INTRA16x16(mb_type)){ 1.319 + 1.320 + d->hdsp.h264_idct_add16intra(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache); 1.321 + 1.322 + }else if(m->cbp&15){ 1.323 + 1.324 + if(IS_8x8DCT(mb_type)){ 1.325 + d->hdsp.h264_idct8_add4(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache); 1.326 + }else{ 1.327 + d->hdsp.h264_idct_add16(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache); 1.328 + } 1.329 + } 1.330 + } 1.331 + 1.332 + if(m->cbp&0x30){ 1.333 + uint8_t *dest[2] = {dest_cb, dest_cr}; 1.334 + 1.335 + idct_add = d->hdsp.h264_idct_add; 1.336 + idct_dc_add = d->hdsp.h264_idct_dc_add; 1.337 + for(i=16; i<16+8; i++){ 1.338 + if(mrs->non_zero_count_cache[ scan8[i] ]) 1.339 + idct_add (dest[(i&4)>>2] + block_offset[i], m->mb + i*16, uvlinesize); 1.340 + else if(m->mb[i*16]) 1.341 + idct_dc_add(dest[(i&4)>>2] + block_offset[i], m->mb + i*16, uvlinesize); 1.342 + } 1.343 + } 1.344 + 1.345 +#if OMPSS 1.346 + backup_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 1.347 + if (mb_x+1 <d->mb_width){ 1.348 + H264Mb *mr = m+1; 1.349 + memcpy(mr->left_border, m->left_border, sizeof(m->left_border)); 1.350 + } 1.351 + if (mb_y +1 <d->mb_height){ 1.352 + H264Mb *md = m + d->mb_width; 1.353 + memcpy(md->top_border, m->top_border, sizeof(m->top_border)); 1.354 + if (mb_x>0){ 1.355 + H264Mb *mdl = m + d->mb_width -1; 1.356 + memcpy(mdl->top_border_next, m->top_border_next, sizeof(m->top_border_next)); 1.357 + } 1.358 + } 1.359 +#else 1.360 + backup_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 1.361 + if (mb_y +1 <d->mb_height && d->top_next != d->top){ 1.362 + memcpy(&d->top_next[mb_x],&d->top[mb_x], sizeof(TopBorder)); 1.363 + } 1.364 +#endif 1.365 + 1.366 + ff_h264_filter_mb(d, mrs, s, m, dest_y, dest_cb, dest_cr); 1.367 +} 1.368 + 1.369 +MBRecContext *get_mbrec_context(H264Context *h){ 1.370 + MBRecContext *d = av_mallocz(sizeof(MBRecContext)); 1.371 + 1.372 + ff_h264dsp_init(&d->hdsp); 1.373 + ff_h264_pred_init(&d->hpc); 1.374 + dsputil_init(&d->dsp); 1.375 + 1.376 +#if !OMPSS 1.377 + d->mrs = av_mallocz(sizeof(MBRecState)); 1.378 +#endif 1.379 + d->hdsp.qpel_put= d->dsp.put_h264_qpel_pixels_tab; 1.380 + d->hdsp.qpel_avg= d->dsp.avg_h264_qpel_pixels_tab; 1.381 + d->mb_height = h->mb_height; 1.382 + d->mb_width = h->mb_width; 1.383 + d->mb_stride = h->mb_stride; 1.384 + d->b_stride = h->b_stride; 1.385 + d->height = h->height; 1.386 + d->width = h->width; 1.387 + d->linesize = h->width + EDGE_WIDTH*2; 1.388 + d->uvlinesize = d->linesize>>1; 1.389 + 1.390 + d->scratchpad_y = av_malloc(d->linesize*16*sizeof(uint8_t)); 1.391 + d->scratchpad_cb= av_malloc(d->uvlinesize*8*sizeof(uint8_t)); 1.392 + d->scratchpad_cr= av_malloc(d->uvlinesize*8*sizeof(uint8_t)); 1.393 + 1.394 + for (int i=0; i<16; i++){ 1.395 + d->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*d->linesize*((scan8[i] - scan8[0])>>3); 1.396 + } 1.397 + for (int i=0; i<4; i++){ 1.398 + d->block_offset[16+i]= 1.399 + d->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*d->uvlinesize*((scan8[i] - scan8[0])>>3); 1.400 + } 1.401 + 1.402 + 1.403 + 1.404 + return d; 1.405 +} 1.406 + 1.407 +void free_mbrec_context(MBRecContext *d){ 1.408 +#if !OMPSS 1.409 + av_free(d->mrs); 1.410 +#endif 1.411 + av_free(d->scratchpad_y); 1.412 + av_free(d->scratchpad_cb); 1.413 + av_free(d->scratchpad_cr); 1.414 + av_free(d); 1.415 +}
