nengel@2: #include "config.h" nengel@2: nengel@2: #include "dsputil.h" nengel@2: #include "h264_types.h" nengel@2: #include "h264_data.h" nengel@2: #include "h264_mc.h" nengel@2: #include "h264_deblock.h" nengel@2: #include "h264_pred_mode.h" nengel@2: //#undef NDEBUG nengel@2: #include nengel@2: nengel@2: void init_mbrec_context(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, int line){ nengel@2: DecodedPicture *pic = s->curr_pic; nengel@2: int mb_stride = mrc->mb_stride; nengel@2: int mb_width = mrc->mb_width; nengel@2: mrs->mb_type_top = pic->mb_type + (line -1)*mb_stride; nengel@2: mrs->mb_type = pic->mb_type + line*mb_stride; nengel@2: mrs->ref_index_top[0] = pic->ref_index[0] + 4*(line -1)*mb_stride; nengel@2: mrs->ref_index_top[1] = pic->ref_index[1] + 4*(line -1)*mb_stride; nengel@2: mrs->ref_index[0] = pic->ref_index[0] + 4*line*mb_stride; nengel@2: mrs->ref_index[1] = pic->ref_index[1] + 4*line*mb_stride; nengel@2: nengel@2: mrs->motion_val_top[0] = pic->motion_val[0] + 4*mb_width*4*(line-1); nengel@2: mrs->motion_val_top[1] = pic->motion_val[1] + 4*mb_width*4*(line-1); nengel@2: mrs->motion_val[0] = pic->motion_val[0] + 4*mb_width*4*line; nengel@2: mrs->motion_val[1] = pic->motion_val[1] + 4*mb_width*4*line; nengel@2: nengel@2: mrs->intra4x4_pred_mode_top = pic->intra4x4_pred_mode + 4*mb_width*(line-1); nengel@2: mrs->intra4x4_pred_mode = pic->intra4x4_pred_mode + 4*mb_width*line; nengel@2: nengel@2: mrs->non_zero_count_top = pic->non_zero_count + 8*mb_width*(line-1); nengel@2: mrs->non_zero_count = pic->non_zero_count + 8*mb_width*line; nengel@2: nengel@2: if (s->slice_type_nos == FF_B_TYPE){ nengel@2: mrs->list1_mb_type = s->dp_ref_list[1][0]->mb_type + line*mb_stride; nengel@2: mrs->list1_ref_index[0] = s->dp_ref_list[1][0]->ref_index[0] + 4*line*mb_stride; nengel@2: mrs->list1_ref_index[1] = s->dp_ref_list[1][0]->ref_index[1] + 4*line*mb_stride; nengel@2: mrs->list1_motion_val[0] = s->dp_ref_list[1][0]->motion_val[0] + 4*mb_width*4*line; nengel@2: mrs->list1_motion_val[1] = s->dp_ref_list[1][0]->motion_val[1] + 4*mb_width*4*line; nengel@2: } nengel@2: nengel@2: } nengel@2: nengel@2: #if OMPSS nengel@2: static void backup_mb_border(H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){ nengel@2: int i; nengel@2: uint8_t * top_border_y1 = m->top_border; nengel@2: uint8_t * top_border_y2 = m->top_border + 8; nengel@2: uint8_t * top_border_cb = m->top_border + 16; nengel@2: uint8_t * top_border_cr = m->top_border + 24; nengel@2: uint8_t * top_border_next = m->top_border_next; nengel@2: nengel@2: src_y -= linesize; nengel@2: src_cb -= uvlinesize; nengel@2: src_cr -= uvlinesize; nengel@2: nengel@2: m->left_border[0]= m->top_border[15]; nengel@2: for(i=1; i<17 ; i++){ nengel@2: m->left_border[i]= src_y[15 + i*linesize]; nengel@2: } nengel@2: nengel@2: *(uint64_t*)(top_border_y1) = *(uint64_t*)(src_y + 16*linesize); nengel@2: *(uint64_t*)(top_border_next) = *(uint64_t*)(src_y + 16*linesize); nengel@2: *(uint64_t*)(top_border_y2) = *(uint64_t*)(src_y +8+16*linesize); nengel@2: nengel@2: m->left_border[17]= m->top_border[16+7]; nengel@2: m->left_border[17+9]= m->top_border[24+7]; nengel@2: for(i=1; i<9; i++){ nengel@2: m->left_border[17 +i]= src_cb[7+i*uvlinesize]; nengel@2: m->left_border[17+9+i]= src_cr[7+i*uvlinesize]; nengel@2: } nengel@2: *(uint64_t*)(top_border_cb)= *(uint64_t*)(src_cb+8*uvlinesize); nengel@2: *(uint64_t*)(top_border_cr)= *(uint64_t*)(src_cr+8*uvlinesize); nengel@2: } nengel@2: nengel@2: static void xchg_mb_border(H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){ nengel@2: int temp8, i; nengel@2: uint64_t temp64; nengel@2: nengel@2: uint8_t * top_border_y1 = m->top_border; nengel@2: uint8_t * top_border_y2 = m->top_border + 8; nengel@2: uint8_t * top_border_cb = m->top_border + 16; nengel@2: uint8_t * top_border_cr = m->top_border + 24; nengel@2: uint8_t * top_border_next = m->top_border_next; nengel@2: nengel@2: int deblock_left; nengel@2: int deblock_top; nengel@2: nengel@2: deblock_left = (m->mb_x > 0); nengel@2: deblock_top = (m->mb_y > 0); nengel@2: nengel@2: src_y -= ( linesize + 1); nengel@2: src_cb -= (uvlinesize + 1); nengel@2: src_cr -= (uvlinesize + 1); nengel@2: nengel@2: #define XCHG(a,b,t,xchg)\ nengel@2: t= a;\ nengel@2: if(xchg)\ nengel@2: a= b;\ nengel@2: b= t; nengel@2: nengel@2: if(deblock_left){ nengel@2: for(i = !deblock_top; i<16; i++){ nengel@2: XCHG(m->left_border[i], src_y [i* linesize], temp8, xchg); nengel@2: } nengel@2: XCHG(m->left_border[i], src_y [i* linesize], temp8, 1); nengel@2: nengel@2: for(i = !deblock_top; i<8; i++){ nengel@2: XCHG(m->left_border[17 +i], src_cb[i*uvlinesize], temp8, xchg); nengel@2: XCHG(m->left_border[17+9+i], src_cr[i*uvlinesize], temp8, xchg); nengel@2: } nengel@2: XCHG(m->left_border[17 +i], src_cb[i*uvlinesize], temp8, 1); nengel@2: XCHG(m->left_border[17+9+i], src_cr[i*uvlinesize], temp8, 1); nengel@2: } nengel@2: nengel@2: if(deblock_top){ nengel@2: XCHG(*(uint64_t*)(top_border_y1) , *(uint64_t*)(src_y +1), temp64, xchg); nengel@2: XCHG(*(uint64_t*)(top_border_y2) , *(uint64_t*)(src_y +9), temp64, 1); nengel@2: XCHG(*(uint64_t*)(top_border_next), *(uint64_t*)(src_y +17), temp64, 1); nengel@2: nengel@2: XCHG(*(uint64_t*)(top_border_cb) , *(uint64_t*)(src_cb+1), temp64, 1); nengel@2: XCHG(*(uint64_t*)(top_border_cr) , *(uint64_t*)(src_cr+1), temp64, 1); nengel@2: } nengel@2: } nengel@2: #else nengel@2: nengel@2: static void backup_mb_border(MBRecContext *d, H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){ nengel@2: int i; nengel@2: uint8_t* top_border_y = d->top[m->mb_x].unfiltered_y; nengel@2: uint8_t* top_border_cb = d->top[m->mb_x].unfiltered_cb; nengel@2: uint8_t* top_border_cr = d->top[m->mb_x].unfiltered_cr; nengel@2: nengel@2: uint8_t* left_border_y = d->left.unfiltered_y; nengel@2: uint8_t* left_border_cb = d->left.unfiltered_cb; nengel@2: uint8_t* left_border_cr = d->left.unfiltered_cr; nengel@2: nengel@2: src_y -= linesize; nengel@2: src_cb -= uvlinesize; nengel@2: src_cr -= uvlinesize; nengel@2: nengel@2: // There are two lines saved, the line above the top macroblock of a pair, nengel@2: // and the line above the bottom macroblock nengel@2: left_border_y[0] = top_border_y[15]; nengel@2: for(i=1; i<17; i++){ nengel@2: left_border_y[i] = src_y[15+i* linesize]; nengel@2: } nengel@2: *(uint64_t*)(top_border_y ) = *(uint64_t*)(src_y + 16*linesize); nengel@2: *(uint64_t*)(top_border_y +8) = *(uint64_t*)(src_y +8+16*linesize); nengel@2: nengel@2: left_border_cb[0] = top_border_cb[7]; nengel@2: left_border_cr[0] = top_border_cr[7]; nengel@2: for(i=1; i<9; i++){ nengel@2: left_border_cb[i] = src_cb[7+i*uvlinesize]; nengel@2: left_border_cr[i] = src_cr[7+i*uvlinesize]; nengel@2: } nengel@2: *(uint64_t*)(top_border_cb)= *(uint64_t*)(src_cb+8*uvlinesize); nengel@2: *(uint64_t*)(top_border_cr)= *(uint64_t*)(src_cr+8*uvlinesize); nengel@2: } nengel@2: nengel@2: static void xchg_mb_border(MBRecContext *d, H264Mb *m, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){ nengel@2: nengel@2: int temp8, i; nengel@2: uint64_t temp64; nengel@2: int deblock_left; nengel@2: int deblock_top; nengel@2: nengel@2: uint8_t* top_border_y = d->top[m->mb_x].unfiltered_y; nengel@2: uint8_t* top_border_cb = d->top[m->mb_x].unfiltered_cb; nengel@2: uint8_t* top_border_cr = d->top[m->mb_x].unfiltered_cr; nengel@2: uint8_t* top_border_y_next = d->top[m->mb_x +1].unfiltered_y; nengel@2: nengel@2: uint8_t* left_border_y = d->left.unfiltered_y; nengel@2: uint8_t* left_border_cb = d->left.unfiltered_cb; nengel@2: uint8_t* left_border_cr = d->left.unfiltered_cr; nengel@2: nengel@2: deblock_left = (m->mb_x > 0); nengel@2: deblock_top = (m->mb_y > 0); nengel@2: nengel@2: src_y -= ( linesize + 1); nengel@2: src_cb -= (uvlinesize + 1); nengel@2: src_cr -= (uvlinesize + 1); nengel@2: nengel@2: #define XCHG(a,b,t,xchg)\ nengel@2: t= a;\ nengel@2: if(xchg)\ nengel@2: a= b;\ nengel@2: b= t; nengel@2: nengel@2: if(deblock_left){ nengel@2: for(i = !deblock_top; i<16; i++){ nengel@2: XCHG(left_border_y[i], src_y [i* linesize], temp8, xchg); nengel@2: } nengel@2: XCHG(left_border_y[i], src_y [i* linesize], temp8, 1); nengel@2: nengel@2: for(i = !deblock_top; i<8; i++){ nengel@2: XCHG(left_border_cb[i], src_cb[i*uvlinesize], temp8, xchg); nengel@2: XCHG(left_border_cr[i], src_cr[i*uvlinesize], temp8, xchg); nengel@2: } nengel@2: XCHG(left_border_cb[i], src_cb[i*uvlinesize], temp8, 1); nengel@2: XCHG(left_border_cr[i], src_cr[i*uvlinesize], temp8, 1); nengel@2: } nengel@2: nengel@2: if(deblock_top){ nengel@2: XCHG(*(uint64_t*)(top_border_y+0), *(uint64_t*)(src_y +1), temp64, xchg); nengel@2: XCHG(*(uint64_t*)(top_border_y+8), *(uint64_t*)(src_y +9), temp64, 1); nengel@2: if(m->mb_x+1 < d->mb_width){ nengel@2: XCHG(*(uint64_t*)(top_border_y_next), *(uint64_t*)(src_y +17), temp64, 1); nengel@2: } nengel@2: XCHG(*(uint64_t*)(top_border_cb), *(uint64_t*)(src_cb+1), temp64, 1); nengel@2: XCHG(*(uint64_t*)(top_border_cr), *(uint64_t*)(src_cr+1), temp64, 1); nengel@2: } nengel@2: } nengel@2: nengel@2: #endif nengel@2: nengel@2: void h264_decode_mb_internal(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m){ nengel@2: int i; nengel@2: const int mb_x= m->mb_x; nengel@2: const int mb_y= m->mb_y; nengel@2: int *block_offset = d->block_offset; nengel@2: nengel@2: void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); nengel@2: void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); nengel@2: nengel@2: int linesize = d->linesize; nengel@2: int uvlinesize = d->uvlinesize; nengel@2: nengel@2: uint8_t *dest_y = s->curr_pic->data[0] + (mb_x + mb_y * linesize ) * 16; nengel@2: uint8_t *dest_cb = s->curr_pic->data[1] + (mb_x + mb_y * uvlinesize) * 8; nengel@2: uint8_t *dest_cr = s->curr_pic->data[2] + (mb_x + mb_y * uvlinesize) * 8; nengel@2: nengel@2: pred_motion_mb_rec (d, mrs, s, m); nengel@2: nengel@2: const int mb_type= m->mb_type; nengel@2: nengel@2: d->dsp.prefetch(dest_y + (m->mb_x&3)*4*linesize + 64, d->linesize, 4); nengel@2: d->dsp.prefetch(dest_cb + (m->mb_x&7)*uvlinesize + 64, dest_cr - dest_cb, 2); nengel@2: nengel@2: if(IS_INTRA(mb_type)){ nengel@2: #if OMPSS nengel@2: xchg_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); nengel@2: #else nengel@2: xchg_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); nengel@2: #endif nengel@2: nengel@2: d->hpc.pred8x8[ m->chroma_pred_mode ](dest_cb, uvlinesize); nengel@2: d->hpc.pred8x8[ m->chroma_pred_mode ](dest_cr, uvlinesize); nengel@2: nengel@2: if(IS_INTRA4x4(mb_type)){ nengel@2: if(IS_8x8DCT(mb_type)){ nengel@2: idct_dc_add = d->hdsp.h264_idct8_dc_add; nengel@2: idct_add = d->hdsp.h264_idct8_add; nengel@2: nengel@2: for(i=0; i<16; i+=4){ nengel@2: uint8_t * const ptr= dest_y + block_offset[i]; nengel@2: const int dir= mrs->intra4x4_pred_mode_cache[ scan8[i] ]; nengel@2: nengel@2: const int nnz = mrs->non_zero_count_cache[ scan8[i] ]; nengel@2: d->hpc.pred8x8l[ dir ](ptr, (mrs->topleft_samples_available<topright_samples_available<mb[i*16]) nengel@2: idct_dc_add(ptr, m->mb + i*16, linesize); nengel@2: else nengel@2: idct_add (ptr, m->mb + i*16, linesize); nengel@2: } nengel@2: } nengel@2: }else{ nengel@2: idct_dc_add = d->hdsp.h264_idct_dc_add; nengel@2: idct_add = d->hdsp.h264_idct_add; nengel@2: nengel@2: for(i=0; i<16; i++){ nengel@2: uint8_t * const ptr= dest_y + block_offset[i]; nengel@2: const int dir= mrs->intra4x4_pred_mode_cache[ scan8[i] ]; nengel@2: uint8_t *topright; nengel@2: int nnz, tr; nengel@2: if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ nengel@2: const int topright_avail= (mrs->topright_samples_available<hpc.pred4x4[ dir ](ptr, topright, linesize); nengel@2: nnz = mrs->non_zero_count_cache[ scan8[i] ]; nengel@2: if(nnz){ nengel@2: if(nnz == 1 && m->mb[i*16]) nengel@2: idct_dc_add(ptr, m->mb + i*16, linesize); nengel@2: else nengel@2: idct_add (ptr, m->mb + i*16, linesize); nengel@2: } nengel@2: } nengel@2: } nengel@2: }else{ nengel@2: d->hpc.pred16x16[ m->intra16x16_pred_mode ](dest_y , linesize); nengel@2: } nengel@2: #if OMPSS nengel@2: xchg_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); nengel@2: #else nengel@2: xchg_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); nengel@2: #endif nengel@2: }else { nengel@2: hl_motion(d, mrs, s, m, dest_y, dest_cb, dest_cr, nengel@2: d->hdsp.qpel_put, d->dsp.put_h264_chroma_pixels_tab, nengel@2: d->hdsp.qpel_avg, d->dsp.avg_h264_chroma_pixels_tab, nengel@2: d->hdsp.weight_h264_pixels_tab, d->hdsp.biweight_h264_pixels_tab); nengel@2: } nengel@2: nengel@2: if(!IS_INTRA4x4(mb_type)){ nengel@2: nengel@2: if(IS_INTRA16x16(mb_type)){ nengel@2: nengel@2: d->hdsp.h264_idct_add16intra(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache); nengel@2: nengel@2: }else if(m->cbp&15){ nengel@2: nengel@2: if(IS_8x8DCT(mb_type)){ nengel@2: d->hdsp.h264_idct8_add4(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache); nengel@2: }else{ nengel@2: d->hdsp.h264_idct_add16(dest_y, block_offset, m->mb, linesize, mrs->non_zero_count_cache); nengel@2: } nengel@2: } nengel@2: } nengel@2: nengel@2: if(m->cbp&0x30){ nengel@2: uint8_t *dest[2] = {dest_cb, dest_cr}; nengel@2: nengel@2: idct_add = d->hdsp.h264_idct_add; nengel@2: idct_dc_add = d->hdsp.h264_idct_dc_add; nengel@2: for(i=16; i<16+8; i++){ nengel@2: if(mrs->non_zero_count_cache[ scan8[i] ]) nengel@2: idct_add (dest[(i&4)>>2] + block_offset[i], m->mb + i*16, uvlinesize); nengel@2: else if(m->mb[i*16]) nengel@2: idct_dc_add(dest[(i&4)>>2] + block_offset[i], m->mb + i*16, uvlinesize); nengel@2: } nengel@2: } nengel@2: nengel@2: #if OMPSS nengel@2: backup_mb_border(m, dest_y, dest_cb, dest_cr, linesize, uvlinesize); nengel@2: if (mb_x+1 mb_width){ nengel@2: H264Mb *mr = m+1; nengel@2: memcpy(mr->left_border, m->left_border, sizeof(m->left_border)); nengel@2: } nengel@2: if (mb_y +1 mb_height){ nengel@2: H264Mb *md = m + d->mb_width; nengel@2: memcpy(md->top_border, m->top_border, sizeof(m->top_border)); nengel@2: if (mb_x>0){ nengel@2: H264Mb *mdl = m + d->mb_width -1; nengel@2: memcpy(mdl->top_border_next, m->top_border_next, sizeof(m->top_border_next)); nengel@2: } nengel@2: } nengel@2: #else nengel@2: backup_mb_border(d, m, dest_y, dest_cb, dest_cr, linesize, uvlinesize); nengel@2: if (mb_y +1 mb_height && d->top_next != d->top){ nengel@2: memcpy(&d->top_next[mb_x],&d->top[mb_x], sizeof(TopBorder)); nengel@2: } nengel@2: #endif nengel@2: nengel@2: ff_h264_filter_mb(d, mrs, s, m, dest_y, dest_cb, dest_cr); nengel@2: } nengel@2: nengel@2: MBRecContext *get_mbrec_context(H264Context *h){ nengel@2: MBRecContext *d = av_mallocz(sizeof(MBRecContext)); nengel@2: nengel@2: ff_h264dsp_init(&d->hdsp); nengel@2: ff_h264_pred_init(&d->hpc); nengel@2: dsputil_init(&d->dsp); nengel@2: nengel@2: #if !OMPSS nengel@2: d->mrs = av_mallocz(sizeof(MBRecState)); nengel@2: #endif nengel@2: d->hdsp.qpel_put= d->dsp.put_h264_qpel_pixels_tab; nengel@2: d->hdsp.qpel_avg= d->dsp.avg_h264_qpel_pixels_tab; nengel@2: d->mb_height = h->mb_height; nengel@2: d->mb_width = h->mb_width; nengel@2: d->mb_stride = h->mb_stride; nengel@2: d->b_stride = h->b_stride; nengel@2: d->height = h->height; nengel@2: d->width = h->width; nengel@2: d->linesize = h->width + EDGE_WIDTH*2; nengel@2: d->uvlinesize = d->linesize>>1; nengel@2: nengel@2: d->scratchpad_y = av_malloc(d->linesize*16*sizeof(uint8_t)); nengel@2: d->scratchpad_cb= av_malloc(d->uvlinesize*8*sizeof(uint8_t)); nengel@2: d->scratchpad_cr= av_malloc(d->uvlinesize*8*sizeof(uint8_t)); nengel@2: nengel@2: for (int i=0; i<16; i++){ nengel@2: d->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*d->linesize*((scan8[i] - scan8[0])>>3); nengel@2: } nengel@2: for (int i=0; i<4; i++){ nengel@2: d->block_offset[16+i]= nengel@2: d->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*d->uvlinesize*((scan8[i] - scan8[0])>>3); nengel@2: } nengel@2: nengel@2: nengel@2: nengel@2: return d; nengel@2: } nengel@2: nengel@2: void free_mbrec_context(MBRecContext *d){ nengel@2: #if !OMPSS nengel@2: av_free(d->mrs); nengel@2: #endif nengel@2: av_free(d->scratchpad_y); nengel@2: av_free(d->scratchpad_cb); nengel@2: av_free(d->scratchpad_cr); nengel@2: av_free(d); nengel@2: }