nengel@2: #include "h264_types.h" nengel@2: #include "h264_data.h" nengel@2: nengel@2: static inline void mc_dir_part(MBRecContext *d, MBRecState *mrs, H264Mb *m, DecodedPicture *pic, int n, int square, nengel@2: int chroma_height, int delta, int list,uint8_t *dest_y, nengel@2: uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, nengel@2: qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ nengel@2: const int mx= mrs->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; nengel@2: const int my= mrs->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; nengel@2: const int luma_xy= (mx&3) + ((my&3)<<2); nengel@2: const int pic_width = 16*d->mb_width; nengel@2: const int pic_height = 16*d->mb_height; nengel@2: nengel@2: uint8_t *src_y, *src_cb, *src_cr; nengel@2: int ymx= mx>>2; nengel@2: int ymy= my>>2; nengel@2: int cmy= my>>3; nengel@2: int cmx= mx>>3; nengel@2: nengel@2: //truncate the motion vectors references nengel@2: if(ymy>= pic_height+2){ nengel@2: ymy=pic_height+1; nengel@2: }else if(ymy <=-19){ nengel@2: ymy=-18; nengel@2: } nengel@2: if(ymx>= pic_width+2){ nengel@2: ymx= pic_width+1; nengel@2: }else if(ymx<=-19){ nengel@2: ymx=-19; nengel@2: } nengel@2: nengel@2: src_y = pic->data[0] + ymx + ymy*d->linesize; nengel@2: qpix_op[luma_xy](dest_y, src_y, d->linesize); //FIXME try variable height perhaps? nengel@2: if(!square){ nengel@2: qpix_op[luma_xy](dest_y + delta, src_y + delta, d->linesize); nengel@2: } nengel@2: nengel@2: if(cmy >= pic_height>>1){ nengel@2: cmy = (pic_height>>1) -1; nengel@2: }else if(cmy<=-9){ nengel@2: cmy=-8; nengel@2: } nengel@2: if(cmx >= pic_width>>1){ nengel@2: cmx = (pic_width>>1) -1; nengel@2: }else if(cmx<=-9){ nengel@2: cmx=-8; nengel@2: } nengel@2: nengel@2: src_cb= pic->data[1] + cmx + cmy*d->uvlinesize; nengel@2: src_cr= pic->data[2] + cmx + cmy*d->uvlinesize; nengel@2: nengel@2: chroma_op(dest_cb, src_cb, d->uvlinesize, chroma_height, mx&7, my&7); nengel@2: chroma_op(dest_cr, src_cr, d->uvlinesize, chroma_height, mx&7, my&7); nengel@2: } nengel@2: nengel@2: static inline void mc_part_std(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, nengel@2: uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, nengel@2: int x_offset, int y_offset, nengel@2: qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, nengel@2: qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, nengel@2: int list0, int list1){ nengel@2: qpel_mc_func *qpix_op= qpix_put; nengel@2: h264_chroma_mc_func chroma_op= chroma_put; nengel@2: nengel@2: dest_y += 2*x_offset + 2*y_offset*d-> linesize; nengel@2: dest_cb += x_offset + y_offset*d->uvlinesize; nengel@2: dest_cr += x_offset + y_offset*d->uvlinesize; nengel@2: x_offset += 8*m->mb_x; nengel@2: y_offset += 8*m->mb_y; nengel@2: nengel@2: if(list0){ nengel@2: DecodedPicture *ref= s->dp_ref_list[0][ mrs->ref_cache[0][ scan8[n] ] ]; nengel@2: mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 0, nengel@2: dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op); nengel@2: nengel@2: qpix_op= qpix_avg; nengel@2: chroma_op= chroma_avg; nengel@2: } nengel@2: nengel@2: if(list1){ nengel@2: DecodedPicture *ref= s->dp_ref_list[1][ mrs->ref_cache[1][ scan8[n] ] ]; nengel@2: mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 1, nengel@2: dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op); nengel@2: } nengel@2: } nengel@2: nengel@2: static inline void mc_part_weighted(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, nengel@2: uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, nengel@2: int x_offset, int y_offset, nengel@2: qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, nengel@2: h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, nengel@2: h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, nengel@2: int list0, int list1){ nengel@2: dest_y += 2*x_offset + 2*y_offset*d-> linesize; nengel@2: dest_cb += x_offset + y_offset*d->uvlinesize; nengel@2: dest_cr += x_offset + y_offset*d->uvlinesize; nengel@2: x_offset += 8*m->mb_x; nengel@2: y_offset += 8*m->mb_y; nengel@2: nengel@2: if(list0 && list1){ nengel@2: /* don't optimize for luma-only case, since B-frames usually nengel@2: * use implicit weights => chroma too. */ nengel@2: uint8_t *tmp_y = d->scratchpad_y + 2*x_offset +16 ; nengel@2: uint8_t *tmp_cb = d->scratchpad_cb + x_offset + 8; nengel@2: uint8_t *tmp_cr = d->scratchpad_cr + x_offset + 8; nengel@2: nengel@2: /* nengel@2: uint8_t *tmp_cb = d->scratchpad; nengel@2: uint8_t *tmp_cr = d->scratchpad + 8; nengel@2: uint8_t *tmp_y = d->scratchpad + 8*d->uvlinesize;*/ nengel@2: int refn0 = mrs->ref_cache[0][ scan8[n] ]; nengel@2: int refn1 = mrs->ref_cache[1][ scan8[n] ]; nengel@2: nengel@2: mc_dir_part(d, mrs, m, s->dp_ref_list[0][refn0], n, square, chroma_height, delta, 0, nengel@2: dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put); nengel@2: mc_dir_part(d, mrs, m, s->dp_ref_list[1][refn1], n, square, chroma_height, delta, 1, nengel@2: tmp_y, tmp_cb, tmp_cr, x_offset, y_offset, qpix_put, chroma_put); nengel@2: nengel@2: if(s->use_weight == 2){ nengel@2: int weight0 = s->implicit_weight[refn0][refn1][m->mb_y&1]; nengel@2: int weight1 = 64 - weight0; nengel@2: luma_weight_avg( dest_y, tmp_y, d-> linesize, 5, weight0, weight1, 0); nengel@2: chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, 5, weight0, weight1, 0); nengel@2: chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, 5, weight0, weight1, 0); nengel@2: }else{ nengel@2: luma_weight_avg(dest_y, tmp_y, d->linesize, s->luma_log2_weight_denom, nengel@2: s->luma_weight[refn0][0][0] , s->luma_weight[refn1][1][0], nengel@2: s->luma_weight[refn0][0][1] + s->luma_weight[refn1][1][1]); nengel@2: chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, s->chroma_log2_weight_denom, nengel@2: s->chroma_weight[refn0][0][0][0] , s->chroma_weight[refn1][1][0][0], nengel@2: s->chroma_weight[refn0][0][0][1] + s->chroma_weight[refn1][1][0][1]); nengel@2: chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, s->chroma_log2_weight_denom, nengel@2: s->chroma_weight[refn0][0][1][0] , s->chroma_weight[refn1][1][1][0], nengel@2: s->chroma_weight[refn0][0][1][1] + s->chroma_weight[refn1][1][1][1]); nengel@2: } nengel@2: }else{ nengel@2: int list = list1 ? 1 : 0; nengel@2: int refn = mrs->ref_cache[list][ scan8[n] ]; nengel@2: DecodedPicture *ref= s->dp_ref_list[list][refn]; nengel@2: mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, list, nengel@2: dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put); nengel@2: nengel@2: luma_weight_op(dest_y, d->linesize, s->luma_log2_weight_denom, nengel@2: s->luma_weight[refn][list][0], s->luma_weight[refn][list][1]); nengel@2: if(s->use_weight_chroma){ nengel@2: chroma_weight_op(dest_cb, d->uvlinesize, s->chroma_log2_weight_denom, nengel@2: s->chroma_weight[refn][list][0][0], s->chroma_weight[refn][list][0][1]); nengel@2: chroma_weight_op(dest_cr, d->uvlinesize, s->chroma_log2_weight_denom, nengel@2: s->chroma_weight[refn][list][1][0], s->chroma_weight[refn][list][1][1]); nengel@2: } nengel@2: } nengel@2: } nengel@2: nengel@2: static inline void mc_part(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, nengel@2: uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, nengel@2: int x_offset, int y_offset, nengel@2: qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, nengel@2: qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, nengel@2: h264_weight_func *weight_op, h264_biweight_func *weight_avg, nengel@2: int list0, int list1){ nengel@2: if((s->use_weight==2 && list0 && list1 nengel@2: && (s->implicit_weight[ mrs->ref_cache[0][scan8[n]] ][ mrs->ref_cache[1][scan8[n]] ][m->mb_y&1] != 32)) nengel@2: || s->use_weight==1) nengel@2: mc_part_weighted(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, nengel@2: x_offset, y_offset, qpix_put, chroma_put, nengel@2: weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); nengel@2: else nengel@2: mc_part_std(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, nengel@2: x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); nengel@2: } nengel@2: nengel@2: static inline void prefetch_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int list){ nengel@2: /* fetch pixels for estimated mv 4 macroblocks ahead nengel@2: * optimized for 64byte cache lines */ nengel@2: const int refn = mrs->ref_cache[list][scan8[0]]; nengel@2: nengel@2: if(refn >= 0){ nengel@2: const int mx= (mrs->mv_cache[list][scan8[0]][0]>>2) + 16*m->mb_x + 8; nengel@2: const int my= (mrs->mv_cache[list][scan8[0]][1]>>2) + 16*m->mb_y; nengel@2: uint8_t **src= s->dp_ref_list[list][refn]->data; nengel@2: int off= mx + (my + (m->mb_x&3)*4)*d->linesize + 64; nengel@2: nengel@2: d->dsp.prefetch(src[0]+off, d->linesize, 4); nengel@2: off= (mx>>1) + ((my>>1) + (m->mb_x&7))*d->uvlinesize + 64; nengel@2: d->dsp.prefetch(src[1]+off, src[2]-src[1], 2); nengel@2: } nengel@2: } nengel@2: nengel@2: void hl_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, nengel@2: qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), nengel@2: qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), nengel@2: h264_weight_func *weight_op, h264_biweight_func *weight_avg){ nengel@2: const int mb_type= m->mb_type; nengel@2: assert(IS_INTER(mb_type)); nengel@2: nengel@2: if (mb_type & MB_TYPE_L0) nengel@2: prefetch_motion(d, mrs, s, m, 0); nengel@2: if (mb_type & MB_TYPE_L1) nengel@2: prefetch_motion(d, mrs, s, m, 1); nengel@2: nengel@2: if(IS_16X16(mb_type)){ nengel@2: mc_part(d, mrs, s, m, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, nengel@2: qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], nengel@2: weight_op, weight_avg, nengel@2: IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); nengel@2: }else if(IS_16X8(mb_type)){ nengel@2: mc_part(d, mrs, s, m, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, nengel@2: qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], nengel@2: &weight_op[1], &weight_avg[1], nengel@2: IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); nengel@2: mc_part(d, mrs, s, m, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, nengel@2: qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], nengel@2: &weight_op[1], &weight_avg[1], nengel@2: IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); nengel@2: }else if(IS_8X16(mb_type)){ nengel@2: mc_part(d, mrs, s, m, 0, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 0, 0, nengel@2: qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], nengel@2: &weight_op[2], &weight_avg[2], nengel@2: IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); nengel@2: mc_part(d, mrs, s, m, 4, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 4, 0, nengel@2: qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], nengel@2: &weight_op[2], &weight_avg[2], nengel@2: IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); nengel@2: }else{ nengel@2: int i; nengel@2: nengel@2: assert(IS_8X8(mb_type)); nengel@2: nengel@2: for(i=0; i<4; i++){ nengel@2: const int sub_mb_type= m->sub_mb_type[i]; nengel@2: const int n= 4*i; nengel@2: int x_offset= (i&1)<<2; nengel@2: int y_offset= (i&2)<<1; nengel@2: nengel@2: if(IS_SUB_8X8(sub_mb_type)){ nengel@2: mc_part(d, mrs, s, m, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, nengel@2: qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], nengel@2: &weight_op[3], &weight_avg[3], nengel@2: IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); nengel@2: }else if(IS_SUB_8X4(sub_mb_type)){ nengel@2: mc_part(d, mrs, s, m, n, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, nengel@2: qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], nengel@2: &weight_op[4], &weight_avg[4], nengel@2: IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); nengel@2: mc_part(d, mrs, s, m, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, nengel@2: qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], nengel@2: &weight_op[4], &weight_avg[4], nengel@2: IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); nengel@2: }else if(IS_SUB_4X8(sub_mb_type)){ nengel@2: mc_part(d, mrs, s, m, n, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, nengel@2: qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], nengel@2: &weight_op[5], &weight_avg[5], nengel@2: IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); nengel@2: mc_part(d, mrs, s, m, n+1, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, nengel@2: qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], nengel@2: &weight_op[5], &weight_avg[5], nengel@2: IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); nengel@2: }else{ nengel@2: int j; nengel@2: assert(IS_SUB_4X4(sub_mb_type)); nengel@2: for(j=0; j<4; j++){ nengel@2: int sub_x_offset= x_offset + 2*(j&1); nengel@2: int sub_y_offset= y_offset + (j&2); nengel@2: mc_part(d, mrs, s, m, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, nengel@2: qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], nengel@2: &weight_op[6], &weight_avg[6], nengel@2: IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); nengel@2: } nengel@2: } nengel@2: } nengel@2: } nengel@2: }