Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
comparison libavcodec/h264_mc.c @ 3:0b056460c67d
changed code to use VSs
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Mon, 29 Oct 2012 16:44:27 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bcb9e2c3d14f |
|---|---|
| 1 #include "h264_types.h" | |
| 2 #include "h264_data.h" | |
| 3 | |
| 4 static inline void mc_dir_part(MBRecContext *d, MBRecState *mrs, H264Mb *m, DecodedPicture *pic, int n, int square, | |
| 5 int chroma_height, int delta, int list,uint8_t *dest_y, | |
| 6 uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, | |
| 7 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ | |
| 8 const int mx= mrs->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; | |
| 9 const int my= mrs->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; | |
| 10 const int luma_xy= (mx&3) + ((my&3)<<2); | |
| 11 const int pic_width = 16*d->mb_width; | |
| 12 const int pic_height = 16*d->mb_height; | |
| 13 | |
| 14 uint8_t *src_y, *src_cb, *src_cr; | |
| 15 int ymx= mx>>2; | |
| 16 int ymy= my>>2; | |
| 17 int cmy= my>>3; | |
| 18 int cmx= mx>>3; | |
| 19 | |
| 20 //truncate the motion vectors references | |
| 21 if(ymy>= pic_height+2){ | |
| 22 ymy=pic_height+1; | |
| 23 }else if(ymy <=-19){ | |
| 24 ymy=-18; | |
| 25 } | |
| 26 if(ymx>= pic_width+2){ | |
| 27 ymx= pic_width+1; | |
| 28 }else if(ymx<=-19){ | |
| 29 ymx=-19; | |
| 30 } | |
| 31 | |
| 32 src_y = pic->data[0] + ymx + ymy*d->linesize; | |
| 33 qpix_op[luma_xy](dest_y, src_y, d->linesize); //FIXME try variable height perhaps? | |
| 34 if(!square){ | |
| 35 qpix_op[luma_xy](dest_y + delta, src_y + delta, d->linesize); | |
| 36 } | |
| 37 | |
| 38 if(cmy >= pic_height>>1){ | |
| 39 cmy = (pic_height>>1) -1; | |
| 40 }else if(cmy<=-9){ | |
| 41 cmy=-8; | |
| 42 } | |
| 43 if(cmx >= pic_width>>1){ | |
| 44 cmx = (pic_width>>1) -1; | |
| 45 }else if(cmx<=-9){ | |
| 46 cmx=-8; | |
| 47 } | |
| 48 | |
| 49 src_cb= pic->data[1] + cmx + cmy*d->uvlinesize; | |
| 50 src_cr= pic->data[2] + cmx + cmy*d->uvlinesize; | |
| 51 | |
| 52 chroma_op(dest_cb, src_cb, d->uvlinesize, chroma_height, mx&7, my&7); | |
| 53 chroma_op(dest_cr, src_cr, d->uvlinesize, chroma_height, mx&7, my&7); | |
| 54 } | |
| 55 | |
| 56 static inline void mc_part_std(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, | |
| 57 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
| 58 int x_offset, int y_offset, | |
| 59 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
| 60 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |
| 61 int list0, int list1){ | |
| 62 qpel_mc_func *qpix_op= qpix_put; | |
| 63 h264_chroma_mc_func chroma_op= chroma_put; | |
| 64 | |
| 65 dest_y += 2*x_offset + 2*y_offset*d-> linesize; | |
| 66 dest_cb += x_offset + y_offset*d->uvlinesize; | |
| 67 dest_cr += x_offset + y_offset*d->uvlinesize; | |
| 68 x_offset += 8*m->mb_x; | |
| 69 y_offset += 8*m->mb_y; | |
| 70 | |
| 71 if(list0){ | |
| 72 DecodedPicture *ref= s->dp_ref_list[0][ mrs->ref_cache[0][ scan8[n] ] ]; | |
| 73 mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 0, | |
| 74 dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op); | |
| 75 | |
| 76 qpix_op= qpix_avg; | |
| 77 chroma_op= chroma_avg; | |
| 78 } | |
| 79 | |
| 80 if(list1){ | |
| 81 DecodedPicture *ref= s->dp_ref_list[1][ mrs->ref_cache[1][ scan8[n] ] ]; | |
| 82 mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, 1, | |
| 83 dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_op, chroma_op); | |
| 84 } | |
| 85 } | |
| 86 | |
| 87 static inline void mc_part_weighted(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, | |
| 88 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
| 89 int x_offset, int y_offset, | |
| 90 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
| 91 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, | |
| 92 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, | |
| 93 int list0, int list1){ | |
| 94 dest_y += 2*x_offset + 2*y_offset*d-> linesize; | |
| 95 dest_cb += x_offset + y_offset*d->uvlinesize; | |
| 96 dest_cr += x_offset + y_offset*d->uvlinesize; | |
| 97 x_offset += 8*m->mb_x; | |
| 98 y_offset += 8*m->mb_y; | |
| 99 | |
| 100 if(list0 && list1){ | |
| 101 /* don't optimize for luma-only case, since B-frames usually | |
| 102 * use implicit weights => chroma too. */ | |
| 103 uint8_t *tmp_y = d->scratchpad_y + 2*x_offset +16 ; | |
| 104 uint8_t *tmp_cb = d->scratchpad_cb + x_offset + 8; | |
| 105 uint8_t *tmp_cr = d->scratchpad_cr + x_offset + 8; | |
| 106 | |
| 107 /* | |
| 108 uint8_t *tmp_cb = d->scratchpad; | |
| 109 uint8_t *tmp_cr = d->scratchpad + 8; | |
| 110 uint8_t *tmp_y = d->scratchpad + 8*d->uvlinesize;*/ | |
| 111 int refn0 = mrs->ref_cache[0][ scan8[n] ]; | |
| 112 int refn1 = mrs->ref_cache[1][ scan8[n] ]; | |
| 113 | |
| 114 mc_dir_part(d, mrs, m, s->dp_ref_list[0][refn0], n, square, chroma_height, delta, 0, | |
| 115 dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put); | |
| 116 mc_dir_part(d, mrs, m, s->dp_ref_list[1][refn1], n, square, chroma_height, delta, 1, | |
| 117 tmp_y, tmp_cb, tmp_cr, x_offset, y_offset, qpix_put, chroma_put); | |
| 118 | |
| 119 if(s->use_weight == 2){ | |
| 120 int weight0 = s->implicit_weight[refn0][refn1][m->mb_y&1]; | |
| 121 int weight1 = 64 - weight0; | |
| 122 luma_weight_avg( dest_y, tmp_y, d-> linesize, 5, weight0, weight1, 0); | |
| 123 chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, 5, weight0, weight1, 0); | |
| 124 chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, 5, weight0, weight1, 0); | |
| 125 }else{ | |
| 126 luma_weight_avg(dest_y, tmp_y, d->linesize, s->luma_log2_weight_denom, | |
| 127 s->luma_weight[refn0][0][0] , s->luma_weight[refn1][1][0], | |
| 128 s->luma_weight[refn0][0][1] + s->luma_weight[refn1][1][1]); | |
| 129 chroma_weight_avg(dest_cb, tmp_cb, d->uvlinesize, s->chroma_log2_weight_denom, | |
| 130 s->chroma_weight[refn0][0][0][0] , s->chroma_weight[refn1][1][0][0], | |
| 131 s->chroma_weight[refn0][0][0][1] + s->chroma_weight[refn1][1][0][1]); | |
| 132 chroma_weight_avg(dest_cr, tmp_cr, d->uvlinesize, s->chroma_log2_weight_denom, | |
| 133 s->chroma_weight[refn0][0][1][0] , s->chroma_weight[refn1][1][1][0], | |
| 134 s->chroma_weight[refn0][0][1][1] + s->chroma_weight[refn1][1][1][1]); | |
| 135 } | |
| 136 }else{ | |
| 137 int list = list1 ? 1 : 0; | |
| 138 int refn = mrs->ref_cache[list][ scan8[n] ]; | |
| 139 DecodedPicture *ref= s->dp_ref_list[list][refn]; | |
| 140 mc_dir_part(d, mrs, m, ref, n, square, chroma_height, delta, list, | |
| 141 dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put); | |
| 142 | |
| 143 luma_weight_op(dest_y, d->linesize, s->luma_log2_weight_denom, | |
| 144 s->luma_weight[refn][list][0], s->luma_weight[refn][list][1]); | |
| 145 if(s->use_weight_chroma){ | |
| 146 chroma_weight_op(dest_cb, d->uvlinesize, s->chroma_log2_weight_denom, | |
| 147 s->chroma_weight[refn][list][0][0], s->chroma_weight[refn][list][0][1]); | |
| 148 chroma_weight_op(dest_cr, d->uvlinesize, s->chroma_log2_weight_denom, | |
| 149 s->chroma_weight[refn][list][1][0], s->chroma_weight[refn][list][1][1]); | |
| 150 } | |
| 151 } | |
| 152 } | |
| 153 | |
| 154 static inline void mc_part(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int n, int square, int chroma_height, int delta, | |
| 155 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
| 156 int x_offset, int y_offset, | |
| 157 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, | |
| 158 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, | |
| 159 h264_weight_func *weight_op, h264_biweight_func *weight_avg, | |
| 160 int list0, int list1){ | |
| 161 if((s->use_weight==2 && list0 && list1 | |
| 162 && (s->implicit_weight[ mrs->ref_cache[0][scan8[n]] ][ mrs->ref_cache[1][scan8[n]] ][m->mb_y&1] != 32)) | |
| 163 || s->use_weight==1) | |
| 164 mc_part_weighted(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |
| 165 x_offset, y_offset, qpix_put, chroma_put, | |
| 166 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); | |
| 167 else | |
| 168 mc_part_std(d, mrs, s, m, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, | |
| 169 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); | |
| 170 } | |
| 171 | |
| 172 static inline void prefetch_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, int list){ | |
| 173 /* fetch pixels for estimated mv 4 macroblocks ahead | |
| 174 * optimized for 64byte cache lines */ | |
| 175 const int refn = mrs->ref_cache[list][scan8[0]]; | |
| 176 | |
| 177 if(refn >= 0){ | |
| 178 const int mx= (mrs->mv_cache[list][scan8[0]][0]>>2) + 16*m->mb_x + 8; | |
| 179 const int my= (mrs->mv_cache[list][scan8[0]][1]>>2) + 16*m->mb_y; | |
| 180 uint8_t **src= s->dp_ref_list[list][refn]->data; | |
| 181 int off= mx + (my + (m->mb_x&3)*4)*d->linesize + 64; | |
| 182 | |
| 183 d->dsp.prefetch(src[0]+off, d->linesize, 4); | |
| 184 off= (mx>>1) + ((my>>1) + (m->mb_x&7))*d->uvlinesize + 64; | |
| 185 d->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
| 186 } | |
| 187 } | |
| 188 | |
| 189 void hl_motion(MBRecContext *d, MBRecState *mrs, H264Slice *s, H264Mb *m, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | |
| 190 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | |
| 191 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), | |
| 192 h264_weight_func *weight_op, h264_biweight_func *weight_avg){ | |
| 193 const int mb_type= m->mb_type; | |
| 194 assert(IS_INTER(mb_type)); | |
| 195 | |
| 196 if (mb_type & MB_TYPE_L0) | |
| 197 prefetch_motion(d, mrs, s, m, 0); | |
| 198 if (mb_type & MB_TYPE_L1) | |
| 199 prefetch_motion(d, mrs, s, m, 1); | |
| 200 | |
| 201 if(IS_16X16(mb_type)){ | |
| 202 mc_part(d, mrs, s, m, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, | |
| 203 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], | |
| 204 weight_op, weight_avg, | |
| 205 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); | |
| 206 }else if(IS_16X8(mb_type)){ | |
| 207 mc_part(d, mrs, s, m, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, | |
| 208 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | |
| 209 &weight_op[1], &weight_avg[1], | |
| 210 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); | |
| 211 mc_part(d, mrs, s, m, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, | |
| 212 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], | |
| 213 &weight_op[1], &weight_avg[1], | |
| 214 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); | |
| 215 }else if(IS_8X16(mb_type)){ | |
| 216 mc_part(d, mrs, s, m, 0, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 0, 0, | |
| 217 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | |
| 218 &weight_op[2], &weight_avg[2], | |
| 219 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); | |
| 220 mc_part(d, mrs, s, m, 4, 0, 8, 8*d->linesize, dest_y, dest_cb, dest_cr, 4, 0, | |
| 221 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | |
| 222 &weight_op[2], &weight_avg[2], | |
| 223 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); | |
| 224 }else{ | |
| 225 int i; | |
| 226 | |
| 227 assert(IS_8X8(mb_type)); | |
| 228 | |
| 229 for(i=0; i<4; i++){ | |
| 230 const int sub_mb_type= m->sub_mb_type[i]; | |
| 231 const int n= 4*i; | |
| 232 int x_offset= (i&1)<<2; | |
| 233 int y_offset= (i&2)<<1; | |
| 234 | |
| 235 if(IS_SUB_8X8(sub_mb_type)){ | |
| 236 mc_part(d, mrs, s, m, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
| 237 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], | |
| 238 &weight_op[3], &weight_avg[3], | |
| 239 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); | |
| 240 }else if(IS_SUB_8X4(sub_mb_type)){ | |
| 241 mc_part(d, mrs, s, m, n, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
| 242 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | |
| 243 &weight_op[4], &weight_avg[4], | |
| 244 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); | |
| 245 mc_part(d, mrs, s, m, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, | |
| 246 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], | |
| 247 &weight_op[4], &weight_avg[4], | |
| 248 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); | |
| 249 }else if(IS_SUB_4X8(sub_mb_type)){ | |
| 250 mc_part(d, mrs, s, m, n, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, | |
| 251 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | |
| 252 &weight_op[5], &weight_avg[5], | |
| 253 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); | |
| 254 mc_part(d, mrs, s, m, n+1, 0, 4, 4*d->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, | |
| 255 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | |
| 256 &weight_op[5], &weight_avg[5], | |
| 257 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); | |
| 258 }else{ | |
| 259 int j; | |
| 260 assert(IS_SUB_4X4(sub_mb_type)); | |
| 261 for(j=0; j<4; j++){ | |
| 262 int sub_x_offset= x_offset + 2*(j&1); | |
| 263 int sub_y_offset= y_offset + (j&2); | |
| 264 mc_part(d, mrs, s, m, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, | |
| 265 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], | |
| 266 &weight_op[6], &weight_avg[6], | |
| 267 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); | |
| 268 } | |
| 269 } | |
| 270 } | |
| 271 } | |
| 272 } |
