Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/h264_deblock.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/h264_deblock.c Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,507 @@ 1.4 +/* 1.5 + * H.26L/H.264/AVC/JVT/14496-10/... loop filter 1.6 + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 1.7 + * 1.8 + * This file is part of FFmpeg. 1.9 + * 1.10 + * FFmpeg is free software; you can redistribute it and/or 1.11 + * modify it under the terms of the GNU Lesser General Public 1.12 + * License as published by the Free Software Foundation; either 1.13 + * version 2.1 of the License, or (at your option) any later version. 1.14 + * 1.15 + * FFmpeg is distributed in the hope that it will be useful, 1.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 1.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.18 + * Lesser General Public License for more details. 1.19 + * 1.20 + * You should have received a copy of the GNU Lesser General Public 1.21 + * License along with FFmpeg; if not, write to the Free Software 1.22 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 1.23 + */ 1.24 + 1.25 +/** 1.26 + * @file 1.27 + * H.264 / AVC / MPEG4 part10 loop filter. 1.28 + * @author Michael Niedermayer <michaelni@gmx.at> 1.29 + */ 1.30 + 1.31 +#include "dsputil.h" 1.32 +#include "mathops.h" 1.33 +#include "rectangle.h" 1.34 +#include "h264_types.h" 1.35 +#include "h264_misc.h" 1.36 +#include "h264_data.h" 1.37 +//#undef NDEBUG 1.38 +#include <assert.h> 1.39 + 1.40 +/* Deblocking filter (p153) */ 1.41 +static const uint8_t alpha_table[52*3] = { 1.42 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.43 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.44 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.45 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.46 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.47 + 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, 1.48 + 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 1.49 + 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, 1.50 + 80, 90,101,113,127,144,162,182,203,226, 1.51 + 255,255, 1.52 + 255,255,255,255,255,255,255,255,255,255,255,255,255, 1.53 + 255,255,255,255,255,255,255,255,255,255,255,255,255, 1.54 + 255,255,255,255,255,255,255,255,255,255,255,255,255, 1.55 + 255,255,255,255,255,255,255,255,255,255,255,255,255, 1.56 +}; 1.57 +static const uint8_t beta_table[52*3] = { 1.58 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.59 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.60 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.61 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.62 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.63 + 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, 1.64 + 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 1.65 + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 1.66 + 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 1.67 + 18, 18, 1.68 + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1.69 + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1.70 + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1.71 + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 1.72 +}; 1.73 +static const uint8_t tc0_table[52*3][4] = { 1.74 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.75 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.76 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.77 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.78 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.79 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.80 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.81 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.82 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.83 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.84 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 1.85 + {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, 1.86 + {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, 1.87 + {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, 1.88 + {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, 1.89 + {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, 1.90 + {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, 1.91 + {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, 1.92 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.93 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.94 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.95 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.96 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.97 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.98 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.99 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.100 + {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 1.101 +}; 1.102 + 1.103 +av_always_inline static void filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, MBRecContext *mrc, H264Slice *s) { 1.104 + const unsigned int index_a = qp + s->slice_alpha_c0_offset; 1.105 + const int alpha = alpha_table[index_a]; 1.106 + const int beta = beta_table[qp + s->slice_beta_offset]; 1.107 + if (alpha ==0 || beta == 0) return; 1.108 + 1.109 + if( bS[0] < 4 ) { 1.110 + int8_t tc[4]; 1.111 + tc[0] = tc0_table[index_a][bS[0]]; 1.112 + tc[1] = tc0_table[index_a][bS[1]]; 1.113 + tc[2] = tc0_table[index_a][bS[2]]; 1.114 + tc[3] = tc0_table[index_a][bS[3]]; 1.115 + mrc->hdsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); 1.116 + } else { 1.117 + mrc->hdsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); 1.118 + } 1.119 +} 1.120 + 1.121 +av_always_inline static void filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, MBRecContext *mrc, H264Slice *s ) { 1.122 + const unsigned int index_a = qp + s->slice_alpha_c0_offset; 1.123 + const int alpha = alpha_table[index_a]; 1.124 + const int beta = beta_table[qp + s->slice_beta_offset]; 1.125 + if (alpha ==0 || beta == 0) return; 1.126 + 1.127 + if( bS[0] < 4 ) { 1.128 + int8_t tc[4]; 1.129 + tc[0] = tc0_table[index_a][bS[0]]+1; 1.130 + tc[1] = tc0_table[index_a][bS[1]]+1; 1.131 + tc[2] = tc0_table[index_a][bS[2]]+1; 1.132 + tc[3] = tc0_table[index_a][bS[3]]+1; 1.133 + mrc->hdsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); 1.134 + } else { 1.135 + mrc->hdsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); 1.136 + } 1.137 +} 1.138 + 1.139 + 1.140 +av_always_inline static void filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, MBRecContext *mrc, H264Slice *s ) { 1.141 + const unsigned int index_a = qp + s->slice_alpha_c0_offset; 1.142 + const int alpha = alpha_table[index_a]; 1.143 + const int beta = beta_table[qp + s->slice_beta_offset]; 1.144 + if (alpha ==0 || beta == 0) return; 1.145 + 1.146 + if( bS[0] < 4 ) { 1.147 + int8_t tc[4]; 1.148 + tc[0] = tc0_table[index_a][bS[0]]; 1.149 + tc[1] = tc0_table[index_a][bS[1]]; 1.150 + tc[2] = tc0_table[index_a][bS[2]]; 1.151 + tc[3] = tc0_table[index_a][bS[3]]; 1.152 + mrc->hdsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); 1.153 + } else { 1.154 + mrc->hdsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); 1.155 + } 1.156 +} 1.157 + 1.158 +av_always_inline static void filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, MBRecContext *mrc, H264Slice *s ) { 1.159 + const unsigned int index_a = qp + s->slice_alpha_c0_offset; 1.160 + const int alpha = alpha_table[index_a]; 1.161 + const int beta = beta_table[qp + s->slice_beta_offset]; 1.162 + if (alpha ==0 || beta == 0) return; 1.163 + 1.164 + if( bS[0] < 4 ) { 1.165 + int8_t tc[4]; 1.166 + tc[0] = tc0_table[index_a][bS[0]]+1; 1.167 + tc[1] = tc0_table[index_a][bS[1]]+1; 1.168 + tc[2] = tc0_table[index_a][bS[2]]+1; 1.169 + tc[3] = tc0_table[index_a][bS[3]]+1; 1.170 + mrc->hdsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); 1.171 + } else { 1.172 + mrc->hdsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); 1.173 + } 1.174 +} 1.175 + 1.176 +static av_always_inline void filter_mb_dir(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, int dir) { 1.177 + const int mbm_type = dir == 0 ? mrs->left_type : mrs->top_type; 1.178 + const int qp_xy= m->qscale_mb_xy; 1.179 + const int qp_dir = dir == 0 ? m->qscale_left_mb_xy : m->qscale_top_mb_xy; 1.180 + const int linesize = mrc->linesize; 1.181 + const int uvlinesize = mrc->uvlinesize; 1.182 + const int mb_type = m->mb_type; 1.183 + int edge; 1.184 + const int edges = mrs->edges[dir]; 1.185 + 1.186 + if(mbm_type){ 1.187 + int16_t* bS=mrs->bS[dir][0]; 1.188 + /* Filter edge */ 1.189 + // Do not use s->qscale as luma quantizer because it has not the same 1.190 + // value in IPCM macroblocks. 1.191 + if(bS[0]+bS[1]+bS[2]+bS[3]){ 1.192 + int qp = ( qp_xy + qp_dir + 1 ) >> 1; 1.193 + if( dir == 0 ) { 1.194 + filter_mb_edgev( &img_y[0], linesize, bS, qp, mrc, s ); 1.195 + { 1.196 + int qp= ( get_chroma_qp(s, 0, qp_xy) + get_chroma_qp( s, 0, qp_dir) + 1 ) >> 1; 1.197 + filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, mrc, s); 1.198 + filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, mrc, s); 1.199 + } 1.200 + } else { 1.201 + filter_mb_edgeh( &img_y[0], linesize, bS, qp, mrc, s ); 1.202 + { 1.203 + int qp= ( get_chroma_qp(s, 0, qp_xy) + get_chroma_qp( s, 0, qp_dir) + 1 ) >> 1; 1.204 + filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, mrc, s); 1.205 + filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, mrc, s); 1.206 + } 1.207 + } 1.208 + } 1.209 + } 1.210 + 1.211 + for( edge = 1; edge < edges; edge++ ) { 1.212 + int16_t* bS=mrs->bS[dir][edge]; 1.213 + int qp = qp_xy; 1.214 + 1.215 + if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) 1.216 + continue; 1.217 + 1.218 + if(bS[0]+bS[1]+bS[2]+bS[3] == 0) 1.219 + continue; 1.220 + 1.221 + /* Filter edge */ 1.222 + // Do not use s->qscale as luma quantizer because it has not the same 1.223 + // value in IPCM macroblocks. 1.224 + 1.225 + if( dir == 0 ) { 1.226 + filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, mrc, s); 1.227 + if( (edge&1) == 0 ) { 1.228 + filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, get_chroma_qp(s, 0, qp_xy), mrc, s); 1.229 + filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, get_chroma_qp(s, 1, qp_xy), mrc, s); 1.230 + } 1.231 + } else { 1.232 + filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, mrc, s ); 1.233 + if( (edge&1) == 0 ) { 1.234 + filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, get_chroma_qp(s, 0, qp_xy), mrc, s); 1.235 + filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, get_chroma_qp(s, 1, qp_xy), mrc, s); 1.236 + } 1.237 + } 1.238 + } 1.239 +} 1.240 + 1.241 +static int check_mv(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, long b_idx, long bn_idx, int mvy_limit){ 1.242 + int v; 1.243 + v= mrs->ref_cache[0][b_idx] != mrs->ref_cache[0][bn_idx]; 1.244 + if(!v && mrs->ref_cache[0][b_idx]!=-1) 1.245 + // absolute value >= 7 | ... 1.246 + v= ((unsigned) (mrs->mv_cache[0][b_idx][0] - mrs->mv_cache[0][bn_idx][0] + 3) >= 7U) | 1.247 + ((FFABS( mrs->mv_cache[0][b_idx][1] - mrs->mv_cache[0][bn_idx][1] )) >= mvy_limit); 1.248 + 1.249 + if(s->list_count==2){ 1.250 + if(!v) 1.251 + v = (mrs->ref_cache[1][b_idx] != mrs->ref_cache[1][bn_idx]) | 1.252 + ((unsigned) (mrs->mv_cache[1][b_idx][0] - mrs->mv_cache[1][bn_idx][0] + 3) >= 7U) | 1.253 + ((FFABS( mrs->mv_cache[1][b_idx][1] - mrs->mv_cache[1][bn_idx][1] )) >= mvy_limit); 1.254 + 1.255 + if(v){ 1.256 + if((mrs->ref_cache[0][b_idx] != mrs->ref_cache[1][bn_idx]) | 1.257 + (mrs->ref_cache[1][b_idx] != mrs->ref_cache[0][bn_idx])) 1.258 + return 1; 1.259 + return 1.260 + ((unsigned) (mrs->mv_cache[0][b_idx][0] - mrs->mv_cache[1][bn_idx][0] + 3) >= 7U) | 1.261 + ((FFABS( mrs->mv_cache[0][b_idx][1] - mrs->mv_cache[1][bn_idx][1] )) >= mvy_limit) | 1.262 + ((unsigned) (mrs->mv_cache[1][b_idx][0] - mrs->mv_cache[0][bn_idx][0] + 3) >= 7U) | 1.263 + ((FFABS( mrs->mv_cache[1][b_idx][1] - mrs->mv_cache[0][bn_idx][1] )) >= mvy_limit); 1.264 + } 1.265 + } 1.266 + 1.267 + return v; 1.268 +} 1.269 + 1.270 +static void calc_bS_values(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mvy_limit, int dir) { 1.271 + int mb_type = m->mb_type; 1.272 + int edge; 1.273 + const int mbm_type = dir == 0 ? mrs->left_type : mrs->top_type; 1.274 + 1.275 + // how often to recheck mv-based bS when iterating between edges 1.276 + static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, 1.277 + {0,3,1,1,3,3,3,3}}; 1.278 + const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7]; 1.279 + const int edges = mask_edge== 3 && !(m->cbp&15) ? 1 : 4; 1.280 + // how often to recheck mv-based bS when iterating along each edge 1.281 + const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); 1.282 + 1.283 + mrs->edges[dir]= edges; 1.284 + 1.285 + if(mbm_type){ 1.286 + int16_t* bS=mrs->bS[dir][0]; 1.287 + if( IS_INTRA(mb_type|mbm_type)) { 1.288 + AV_WN64A(bS, 0x0004000400040004ULL); 1.289 + } else { 1.290 + int i; 1.291 + int mv_done; 1.292 + if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { 1.293 + int b_idx= 8 + 4; 1.294 + int bn_idx= b_idx - (dir ? 8:1); 1.295 + 1.296 + bS[0] = bS[1] = bS[2] = bS[3] = check_mv(mrc, mrs, s, 8 + 4, bn_idx, mvy_limit); 1.297 + mv_done = 1; 1.298 + } 1.299 + else 1.300 + mv_done = 0; 1.301 + 1.302 + for( i = 0; i < 4; i++ ) { 1.303 + int x = dir == 0 ? 0 : i; 1.304 + int y = dir == 0 ? i : 0; 1.305 + int b_idx= 8 + 4 + x + 8*y; 1.306 + int bn_idx= b_idx - (dir ? 8:1); 1.307 + 1.308 + if( mrs->non_zero_count_cache[b_idx] | 1.309 + mrs->non_zero_count_cache[bn_idx] ) { 1.310 + bS[i] = 2; 1.311 + } 1.312 + else if(!mv_done) 1.313 + { 1.314 + bS[i] = check_mv(mrc, mrs, s, b_idx, bn_idx, mvy_limit); 1.315 + } 1.316 + } 1.317 + } 1.318 + } 1.319 + 1.320 + /* Calculate bS */ 1.321 + for( edge = 1; edge < edges; edge++ ) { 1.322 + int16_t* bS=mrs->bS[dir][edge]; 1.323 + 1.324 + if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) 1.325 + continue; 1.326 + 1.327 + if( IS_INTRA(mb_type)) { 1.328 + AV_WN64A(bS, 0x0003000300030003ULL); 1.329 + } else { 1.330 + int i; 1.331 + int mv_done; 1.332 + 1.333 + if( edge & mask_edge ) { 1.334 + AV_ZERO64(bS); 1.335 + mv_done = 1; 1.336 + } 1.337 + else if( mask_par0 ) { 1.338 + int b_idx= 8 + 4 + edge * (dir ? 8:1); 1.339 + int bn_idx= b_idx - (dir ? 8:1); 1.340 + 1.341 + bS[0] = bS[1] = bS[2] = bS[3] = check_mv(mrc, mrs, s, b_idx, bn_idx, mvy_limit); 1.342 + mv_done = 1; 1.343 + } 1.344 + else 1.345 + mv_done = 0; 1.346 + 1.347 + for( i = 0; i < 4; i++ ) { 1.348 + int x = dir == 0 ? edge : i; 1.349 + int y = dir == 0 ? i : edge; 1.350 + int b_idx= 8 + 4 + x + 8*y; 1.351 + int bn_idx= b_idx - (dir ? 8:1); 1.352 + 1.353 + if( mrs->non_zero_count_cache[b_idx] | 1.354 + mrs->non_zero_count_cache[bn_idx] ) { 1.355 + bS[i] = 2; 1.356 + } 1.357 + else if(!mv_done) 1.358 + { 1.359 + bS[i] = check_mv(mrc, mrs, s, b_idx, bn_idx, mvy_limit); 1.360 + } 1.361 + } 1.362 + 1.363 + if(bS[0]+bS[1]+bS[2]+bS[3] == 0) 1.364 + continue; 1.365 + } 1.366 + 1.367 + } 1.368 +} 1.369 + 1.370 + 1.371 +/** 1.372 +* 1.373 +* @return zero if the loop filter can be skiped 1.374 +*/ 1.375 +static int fill_filter_caches(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, int mb_type){ 1.376 + H264Mb *m_top = m - mrc->mb_width; 1.377 + H264Mb *m_left = m - 1; 1.378 + const int mb_x = m->mb_x; 1.379 + const int mb_y = m->mb_y; 1.380 + int top_type, left_type; 1.381 + int qp, top_qp, left_qp; 1.382 + int qp_thresh = s->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice 1.383 + 1.384 + qp = m->qscale_mb_xy ; 1.385 + left_qp = m->qscale_left_mb_xy ; 1.386 + top_qp = m->qscale_top_mb_xy ; 1.387 + 1.388 + //for sufficiently low qp, filtering wouldn't do anything 1.389 + //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp 1.390 + if(qp <= qp_thresh 1.391 + && (!(mb_x+mb_y) || ((qp + left_qp + 1)>>1) <= qp_thresh) 1.392 + && ( mb_y==0 || ((qp + top_qp + 1)>>1) <= qp_thresh)){ 1.393 + return 0; 1.394 + } 1.395 + 1.396 + if(IS_INTRA(mb_type)){ 1.397 + return 1; 1.398 + } 1.399 + 1.400 + { 1.401 + int list; 1.402 + for(list=0; list<s->list_count; list++){ 1.403 + int8_t *ref; 1.404 + 1.405 + if(!USES_LIST(mb_type, list)){ 1.406 + fill_rectangle( mrs->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); 1.407 + fill_rectangle( mrs->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); 1.408 + AV_WN32A(&mrs->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 1.409 + AV_WN32A(&mrs->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 1.410 + AV_WN32A(&mrs->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 1.411 + AV_WN32A(&mrs->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 1.412 + continue; 1.413 + } 1.414 + 1.415 + ref = &mrs->ref_index[list][4*mb_x]; 1.416 + { 1.417 + int (*ref2frm)[64] =(void *) (s->ref2frm[0] + 2); 1.418 + AV_WN32A(&mrs->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 1.419 + AV_WN32A(&mrs->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 1.420 + ref += 2; 1.421 + 1.422 + AV_WN32A(&mrs->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 1.423 + AV_WN32A(&mrs->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 1.424 + } 1.425 + } 1.426 + } 1.427 + 1.428 + /* 1.429 + 0 . T T. T T T T 1.430 + 1 L . .L . . . . 1.431 + 2 L . .L . . . . 1.432 + 3 . T TL . . . . 1.433 + 4 L . .L . . . . 1.434 + 5 L . .. . . . . 1.435 + */ 1.436 + 1.437 + if (IS_SKIP(mb_type)){ 1.438 + memset(mrs->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui 1.439 + } 1.440 + 1.441 + //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 1.442 + top_type = mrs->top_type; 1.443 + left_type = mrs->left_type; 1.444 + if(top_type){ 1.445 + AV_COPY32(&mrs->non_zero_count_cache[4+8*0], &m_top->non_zero_count[3*4]); 1.446 + } 1.447 + 1.448 + if(left_type){ 1.449 + mrs->non_zero_count_cache[3+8*1]= m_left->non_zero_count[3+0*4]; 1.450 + mrs->non_zero_count_cache[3+8*2]= m_left->non_zero_count[3+1*4]; 1.451 + mrs->non_zero_count_cache[3+8*3]= m_left->non_zero_count[3+2*4]; 1.452 + mrs->non_zero_count_cache[3+8*4]= m_left->non_zero_count[3+3*4]; 1.453 + } 1.454 + 1.455 + if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 1.456 + int list; 1.457 + for(list=0; list<s->list_count; list++){ 1.458 + if(USES_LIST(top_type, list)){ 1.459 + const int b_xy= 4*mb_x + 3*mrc->b_stride; 1.460 + const int b8_x= 4*mb_x + 2; 1.461 + int (*ref2frm)[64] = (void *) (s->ref2frm[0] + 2); 1.462 + AV_COPY128(mrs->mv_cache[list][scan8[0] + 0 - 1*8], mrs->motion_val_top[list][b_xy + 0]); 1.463 + 1.464 + mrs->ref_cache[list][scan8[0] + 0 - 1*8]= 1.465 + mrs->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][mrs->ref_index_top[list][b8_x + 0]]; 1.466 + mrs->ref_cache[list][scan8[0] + 2 - 1*8]= 1.467 + mrs->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][mrs->ref_index_top[list][b8_x + 1]]; 1.468 + }else{ 1.469 + AV_ZERO128(mrs->mv_cache[list][scan8[0] + 0 - 1*8]); 1.470 + AV_WN32A(&mrs->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); 1.471 + } 1.472 + 1.473 + if(USES_LIST(left_type, list)){ 1.474 + const int b_x = 4*(mb_x-1) + 3; 1.475 + const int b8_x= 4*(mb_x-1) + 1; 1.476 + int (*ref2frm)[64] = (void *) (s->ref2frm[0] + 2); 1.477 + AV_COPY32(mrs->mv_cache[list][scan8[0] - 1 + 0 ], mrs->motion_val[list][b_x + mrc->b_stride*0]); 1.478 + AV_COPY32(mrs->mv_cache[list][scan8[0] - 1 + 8 ], mrs->motion_val[list][b_x + mrc->b_stride*1]); 1.479 + AV_COPY32(mrs->mv_cache[list][scan8[0] - 1 +16 ], mrs->motion_val[list][b_x + mrc->b_stride*2]); 1.480 + AV_COPY32(mrs->mv_cache[list][scan8[0] - 1 +24 ], mrs->motion_val[list][b_x + mrc->b_stride*3]); 1.481 + 1.482 + mrs->ref_cache[list][scan8[0] - 1 + 0 ]= 1.483 + mrs->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][mrs->ref_index[list][b8_x + 2*0]]; 1.484 + mrs->ref_cache[list][scan8[0] - 1 +16 ]= 1.485 + mrs->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][mrs->ref_index[list][b8_x + 2*1]]; 1.486 + 1.487 + }else{ 1.488 + AV_ZERO32(mrs->mv_cache [list][scan8[0] - 1 + 0 ]); 1.489 + AV_ZERO32(mrs->mv_cache [list][scan8[0] - 1 + 8 ]); 1.490 + AV_ZERO32(mrs->mv_cache [list][scan8[0] - 1 +16 ]); 1.491 + AV_ZERO32(mrs->mv_cache [list][scan8[0] - 1 +24 ]); 1.492 + 1.493 + mrs->ref_cache[list][scan8[0] - 1 + 0 ]= 1.494 + mrs->ref_cache[list][scan8[0] - 1 + 8 ]= 1.495 + mrs->ref_cache[list][scan8[0] - 1 + 16 ]= 1.496 + mrs->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; 1.497 + } 1.498 + } 1.499 + } 1.500 + return 1; 1.501 +} 1.502 + 1.503 +void ff_h264_filter_mb(MBRecContext *mrc, MBRecState *mrs, H264Slice *s, H264Mb *m, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr) { 1.504 + if (fill_filter_caches(mrc, mrs, s, m, m->mb_type)){ 1.505 + calc_bS_values(mrc, mrs, s, m, 4, 0); 1.506 + calc_bS_values(mrc, mrs, s, m, 4, 1); 1.507 + filter_mb_dir(mrc, mrs, s, m, img_y, img_cb, img_cr, 0); 1.508 + filter_mb_dir(mrc, mrs, s, m, img_y, img_cb, img_cr, 1); 1.509 + } 1.510 +}
