diff libavcodec/h264_ompss.c @ 2:897f711a7157

rearrange to work with autoconf
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 25 Sep 2012 15:55:33 +0200
parents
children 0b056460c67d
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libavcodec/h264_ompss.c	Tue Sep 25 15:55:33 2012 +0200
     1.3 @@ -0,0 +1,401 @@
     1.4 +/*
     1.5 +* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
     1.6 +* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
     1.7 +*
     1.8 +* This file is part of FFmpeg.
     1.9 +*
    1.10 +* FFmpeg is free software; you can redistribute it and/or
    1.11 +* modify it under the terms of the GNU Lesser General Public
    1.12 +* License as published by the Free Software Foundation; either
    1.13 +* version 2.1 of the License, or (at your option) any later version.
    1.14 +*
    1.15 +* FFmpeg is distributed in the hope that it will be useful,
    1.16 +* but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.17 +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.18 +* Lesser General Public License for more details.
    1.19 +*
    1.20 +* You should have received a copy of the GNU Lesser General Public
    1.21 +* License along with FFmpeg; if not, write to the Free Software
    1.22 +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    1.23 +*/
    1.24 +#include "h264_types.h"
    1.25 +#include "h264_parser.h"
    1.26 +#include "h264_nal.h"
    1.27 +#include "h264_entropy.h"
    1.28 +#include "h264_rec.h"
    1.29 +#include "h264_pred_mode.h"
    1.30 +#include "h264_misc.h"
    1.31 +// #undef NDEBUG
    1.32 +#include <assert.h>
    1.33 +
    1.34 +#pragma omp task inout(*pc, *nc) output(*sbe)
    1.35 +static void parse_task(H264Context *h, ParserContext *pc, NalContext *nc, SliceBufferEntry *sbe){
    1.36 +    H264Slice *s;
    1.37 +
    1.38 +    if (!sbe->initialized){
    1.39 +        init_sb_entry(h, sbe);
    1.40 +        sbe->lines_total=h->mb_height;
    1.41 +    }
    1.42 +
    1.43 +    av_read_frame_internal(pc, &sbe->gb);
    1.44 +    s = &sbe->slice;
    1.45 +
    1.46 +    decode_nal_units(nc, s, &sbe->gb);
    1.47 +}
    1.48 +
    1.49 +#pragma omp task inout(*ec) inout(*sbe)
    1.50 +static void decode_slice_entropy_task(H264Context *h, EntropyContext *ec, SliceBufferEntry *sbe){
    1.51 +    int i,j;
    1.52 +    H264Slice *s = &sbe->slice;
    1.53 +    GetBitContext *gb = &sbe->gb;
    1.54 +    H264Mb *mbs = sbe->mbs;
    1.55 +//     GetBitContext *gb = s->gb;
    1.56 +    CABACContext *c = &ec->c;
    1.57 +
    1.58 +    if( !s->pps.cabac ){
    1.59 +        av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
    1.60 +        return ;
    1.61 +    }
    1.62 +
    1.63 +    init_dequant_tables(s, ec);
    1.64 +    ec->curr_qscale = s->qscale;
    1.65 +    ec->last_qscale_diff = 0;
    1.66 +    ec->chroma_qp[0] = get_chroma_qp((H264Slice *) s, 0, s->qscale);
    1.67 +    ec->chroma_qp[1] = get_chroma_qp((H264Slice *) s, 1, s->qscale);
    1.68 +
    1.69 +    /* realign */
    1.70 +    align_get_bits( gb );
    1.71 +    /* init cabac */
    1.72 +    ff_init_cabac_decoder( c, gb->buffer + get_bits_count(gb)/8, (get_bits_left(gb) + 7)/8);
    1.73 +
    1.74 +    ff_h264_init_cabac_states(ec, s, c);
    1.75 +
    1.76 +    for(j=0; j<ec->mb_height; j++){
    1.77 +        init_entropy_buf(ec, s, j);
    1.78 +        for(i=0; i<ec->mb_width; i++){
    1.79 +            int eos,ret;
    1.80 +            H264Mb *m = &mbs[i + j*ec->mb_width];
    1.81 +            m->mb_x=i;
    1.82 +            m->mb_y=j;
    1.83 +            ec->m = m;
    1.84 +
    1.85 +            ret = ff_h264_decode_mb_cabac(ec, s, c);
    1.86 +            eos = get_cabac_terminate( c);
    1.87 +            (void) eos;
    1.88 +            if( ret < 0 || c->bytestream > c->bytestream_end + 2) {
    1.89 +                av_log(AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", m->mb_x, m->mb_y, c->bytestream_end - c->bytestream);
    1.90 +                return ;
    1.91 +            }
    1.92 +        }
    1.93 +    }
    1.94 +}
    1.95 +
    1.96 +static void decode_super_mb_block(MBRecContext *d, H264Slice *s, SuperMBContext *smbc, H264Mb *mbs, int smb_x, int smb_y){
    1.97 +    MBRecState mrs;
    1.98 +//     memset(&mrs, 0, sizeof(MBRecState));
    1.99 +
   1.100 +    for (int k=0, i= smb_y; i< smb_y + smbc->smb_height; i++, k++){
   1.101 +        init_mbrec_context(d, &mrs, s, i);
   1.102 +        for (int j= smb_x -k ; j< smb_x - k + smbc->smb_width; j++){
   1.103 +            if (i< d->mb_height && j >= 0 && j < d->mb_width){
   1.104 +                h264_decode_mb_internal (d, &mrs, s, &mbs[i*d->mb_width+j]);
   1.105 +            }
   1.106 +        }
   1.107 +    }
   1.108 +}
   1.109 +
   1.110 +#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m)
   1.111 +static void decode_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml,
   1.112 +SuperMBTask *mur, SuperMBTask *m){
   1.113 +    H264Slice *s = &sbe->slice;
   1.114 +    H264Mb *mbs = sbe->mbs;
   1.115 +    decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
   1.116 +}
   1.117 +
   1.118 +#pragma omp task input(*d, *sbe) inout(*sm)
   1.119 +static void draw_edges_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *sm, int line){
   1.120 +    H264Slice *s = &sbe->slice;
   1.121 +    for (int i=line*smbc->smb_height; i< (line+1)*smbc->smb_height && i< d->mb_height; i++)
   1.122 +        draw_edges(d, s, i);
   1.123 +}
   1.124 +
   1.125 +static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){
   1.126 +    int i,j;
   1.127 +
   1.128 +    SuperMBContext *smbc = acquire_smbc(h);
   1.129 +    int smb_height =smbc->nsmb_height, smb_width= smbc->nsmb_width;
   1.130 +    SuperMBTask *smbs = smbc->smbs[0];
   1.131 +
   1.132 +    SuperMBTask *sm=NULL, *sml, *smur;
   1.133 +    for(j=0; j< smb_height; j++){
   1.134 +        for(i=0; i< smb_width; i++){
   1.135 +            sm = smbs + j*smb_width + i;
   1.136 +            sml  = sm - ((i > 0) ? 1: 0);
   1.137 +            smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   1.138 +            decode_super_mb_task(d, sbe, smbc, sml, smur, sm);
   1.139 +        }
   1.140 +        draw_edges_task(d, sbe, smbc, sm, j);
   1.141 +    }
   1.142 +    #pragma omp taskwait on(*sm)
   1.143 +
   1.144 +    release_smbc(h, smbc);
   1.145 +}
   1.146 +
   1.147 +#pragma omp task inout(*d) inout(*sbe)
   1.148 +static void decode_slice_mb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){
   1.149 +    H264Slice *s = &sbe->slice;
   1.150 +
   1.151 +    for (int i=0; i<2; i++){
   1.152 +        for(int j=0; j< s->ref_count[i]; j++){
   1.153 +            if (s->ref_list_cpn[i][j] ==-1)
   1.154 +                continue;
   1.155 +            int k;
   1.156 +            for (k=0; k< h->max_dpb_cnt; k++){
   1.157 +                if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
   1.158 +                    s->dp_ref_list[i][j] = &h->dpb[k];
   1.159 +                    break;
   1.160 +                }
   1.161 +            }
   1.162 +        }
   1.163 +    }
   1.164 +
   1.165 +    #pragma omp critical (dpb)
   1.166 +    get_dpb_entry(h, s);
   1.167 +
   1.168 +    if (!h->no_mbd){
   1.169 +        decode_mb_in_slice (h, d, sbe);
   1.170 +    }
   1.171 +
   1.172 +    for (int i=0; i<s->release_cnt; i++){
   1.173 +        for(int j=0; j<h->max_dpb_cnt; j++){
   1.174 +            if(h->dpb[j].cpn== s->release_ref_cpn[i]){
   1.175 +                #pragma omp critical (dpb)
   1.176 +                release_dpb_entry(h, &h->dpb[j], 2);
   1.177 +                break;
   1.178 +            }
   1.179 +        }
   1.180 +    }
   1.181 +    s->release_cnt=0;
   1.182 +}
   1.183 +
   1.184 +// for static 3d wave
   1.185 +/*-------------------------------------------------------------------------------*/
   1.186 +#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m)
   1.187 +static void decode_3dwave_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml,
   1.188 +SuperMBTask *mur, SuperMBTask *mprev, SuperMBTask *m){
   1.189 +    H264Slice *s = &sbe->slice;
   1.190 +    H264Mb *mbs = sbe->mbs;
   1.191 +
   1.192 +    decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
   1.193 +}
   1.194 +
   1.195 +// int init_ref_count=0;
   1.196 +#pragma omp task inout(*d, *sbe, *init)
   1.197 +static void init_ref_list_and_get_dpb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe, int *init){
   1.198 +    H264Slice *s = &sbe->slice;
   1.199 +    for (int i=0; i<2; i++){
   1.200 +        for(int j=0; j< s->ref_count[i]; j++){
   1.201 +            if (s->ref_list_cpn[i][j] ==-1)
   1.202 +                continue;
   1.203 +            int k;
   1.204 +            for (k=0; k<h->max_dpb_cnt; k++){
   1.205 +                if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
   1.206 +                    s->dp_ref_list[i][j] = &h->dpb[k];
   1.207 +                    break;
   1.208 +                }
   1.209 +            }
   1.210 +        }
   1.211 +    }
   1.212 +
   1.213 +    #pragma omp critical (dpb)
   1.214 +    get_dpb_entry(h, s);
   1.215 +
   1.216 +}
   1.217 +
   1.218 +static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc){
   1.219 +    int i,j;
   1.220 +    
   1.221 +    int smb_3d_height =smbc->nsmb_3dheight;
   1.222 +    int smb_height =smbc->nsmb_height, smb_width= smbc->nsmb_width;
   1.223 +    int smb_diff_prev = smb_height - smb_3d_height;
   1.224 +    SuperMBTask *sm=NULL, *sml, *smur, *smprev;
   1.225 +
   1.226 +    SuperMBTask *smbs = smbc->smbs[smbc->index++]; smbc->index%=2; 
   1.227 +    SuperMBTask *smbs_prev = smbc->smbs[smbc->index]; // index rotates -> next == prev
   1.228 +    
   1.229 +    for(j=0; j<smb_3d_height ; j++){
   1.230 +        for(i=0; i< smb_width; i++){
   1.231 +            sm = smbs + j*smb_width + i;
   1.232 +            sml  = sm - ((i > 0) ? 1: 0);
   1.233 +            smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   1.234 +            smprev = smbs_prev + (j + smb_diff_prev+1)*smb_width -1;
   1.235 +            decode_3dwave_super_mb_task(d, sbe, smbc, sml, smur, smprev, sm);
   1.236 +        }
   1.237 +        draw_edges_task(d, sbe, smbc, sm, j);
   1.238 +    }
   1.239 +
   1.240 +    for(; j< smb_height; j++){
   1.241 +        for(i=0; i< smb_width; i++){
   1.242 +            sm = smbs + j*smb_width + i;
   1.243 +            sml  = sm - ((i > 0) ? 1: 0);
   1.244 +            smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   1.245 +            decode_super_mb_task(d, sbe, smbc, sml, smur, sm);
   1.246 +        }
   1.247 +        draw_edges_task(d, sbe, smbc, sm, j);
   1.248 +    }
   1.249 +    return sm;
   1.250 +}
   1.251 +
   1.252 +#pragma omp task inout(*d, *sbe, *release) input (*lastsmb)
   1.253 +static void release_ref_list_task(H264Context *h, SuperMBContext *smbc, MBRecContext *d, SliceBufferEntry *sbe, SuperMBTask *lastsmb, int *release){
   1.254 +    H264Slice *s = &sbe->slice;
   1.255 +    for (int i=0; i<s->release_cnt; i++){
   1.256 +        for(int j=0; j<h->max_dpb_cnt; j++){
   1.257 +            if(h->dpb[j].cpn== s->release_ref_cpn[i]){
   1.258 +                #pragma omp critical (dpb)
   1.259 +                release_dpb_entry(h, &h->dpb[j], 2);
   1.260 +                break;
   1.261 +            }
   1.262 +        }
   1.263 +    }
   1.264 +    s->release_cnt=0;
   1.265 +
   1.266 +    release_smbc(h, smbc);
   1.267 +    
   1.268 +}
   1.269 +
   1.270 +// static void decode_mb_static_3dwave(H264Context *h, int mb_height, int mb_width, MBRecContext *d, H264Slice *s, H264Mb *mbs, SuperMBTask *smbs, SuperMBTask *smbs_prev){
   1.271 +//
   1.272 +// }
   1.273 +/*-------------------------------------------------------------------------------*/
   1.274 +//end for static 3d wave
   1.275 +
   1.276 +#pragma omp task inout (*oc) input(*sbe)
   1.277 +static void output_task(H264Context *h, OutputContext *oc, SliceBufferEntry *sbe){
   1.278 +    DecodedPicture* out =output_frame(h, oc, sbe->slice.curr_pic, h->ofile, h->frame_width, h->frame_height);
   1.279 +    if (out){
   1.280 +        #pragma omp critical (dpb)
   1.281 +        release_dpb_entry(h, out, 1);
   1.282 +    }
   1.283 +    print_report(oc->frame_number, oc->video_size, 0, h->verbose);
   1.284 +}
   1.285 +
   1.286 +/*
   1.287 +* The following code is the main loop of the file converter
   1.288 +*/
   1.289 +//Put VMS entry point here
   1.290 +int h264_decode_ompss( H264Context *h) {
   1.291 +    const int bufs = h->pipe_bufs;
   1.292 +
   1.293 +    ParserContext *pc;
   1.294 +    NalContext *nc;
   1.295 +    EntropyContext *ec[bufs];
   1.296 +    MBRecContext *rc[2];
   1.297 +    OutputContext *oc;
   1.298 +    SliceBufferEntry *sbe;
   1.299 +    SuperMBContext *smbc;
   1.300 +
   1.301 +    DecodedPicture *out;
   1.302 +    int frames=0;
   1.303 +
   1.304 +#if HAVE_LIBSDL2
   1.305 +    pthread_t sdl_thr;
   1.306 +    if (h->display){
   1.307 +        pthread_create(&sdl_thr, NULL, sdl_thread, h);
   1.308 +    }
   1.309 +#endif
   1.310 +    sbe= av_mallocz(sizeof(SliceBufferEntry) * bufs);
   1.311 +
   1.312 +
   1.313 +    pc = get_parse_context(h->ifile);
   1.314 +    nc = get_nal_context(h->width, h->height);
   1.315 +
   1.316 +    for(int i=0; i<bufs; i++){
   1.317 +        ec[i] = get_entropy_context( h );
   1.318 +    }
   1.319 +
   1.320 +    for(int i=0; i<2; i++){
   1.321 +        rc[i] = get_mbrec_context(h);
   1.322 +    }
   1.323 +
   1.324 +    oc = get_output_context( h );
   1.325 +
   1.326 +    av_start_timer();
   1.327 +    int k=0; int init, release;
   1.328 +    if (h->static_3d && bufs < h->num_frames ){
   1.329 +        int num_pre_ed =0;
   1.330 +        for (num_pre_ed=0; num_pre_ed< bufs -1 && !pc->final_frame; num_pre_ed++){
   1.331 +            parse_task( h, pc, nc, &sbe[k%bufs] );
   1.332 +            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   1.333 +            #pragma omp taskwait on(*pc)
   1.334 +            k++;
   1.335 +        }
   1.336 +
   1.337 +        while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
   1.338 +            parse_task( h, pc, nc, &sbe[k%bufs] );
   1.339 +            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   1.340 +
   1.341 +            k++;
   1.342 +
   1.343 +            init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init);
   1.344 +            smbc = acquire_smbc(h);
   1.345 +            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc);
   1.346 +            release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release);
   1.347 +
   1.348 +            output_task (h, oc, &sbe[k%bufs]);
   1.349 +            #pragma omp taskwait on(*pc)
   1.350 +        }
   1.351 +
   1.352 +        for (int i=0; i< num_pre_ed; i++){
   1.353 +            k++;
   1.354 +            init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init);
   1.355 +            smbc = acquire_smbc(h);
   1.356 +            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc);
   1.357 +            release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release);
   1.358 +
   1.359 +            output_task (h, oc, &sbe[k%bufs]);
   1.360 +        }
   1.361 +
   1.362 +    } else {
   1.363 +        while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
   1.364 +            parse_task( h, pc, nc, &sbe[k%bufs] );
   1.365 +
   1.366 +            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   1.367 +
   1.368 +            decode_slice_mb_task(h, rc[0], &sbe[k%bufs]);
   1.369 +
   1.370 +            output_task (h, oc, &sbe[k%bufs]);
   1.371 +            #pragma omp taskwait on(*pc)
   1.372 +            k++;
   1.373 +        }
   1.374 +    }
   1.375 +    #pragma omp taskwait
   1.376 +
   1.377 +    while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ;
   1.378 +
   1.379 +    print_report(oc->frame_number, oc->video_size, 1, h->verbose);
   1.380 +    h->num_frames = oc->frame_number;
   1.381 +    /* finished ! */
   1.382 +
   1.383 +    free_parse_context(pc);
   1.384 +    free_nal_context  (nc);
   1.385 +    free_output_context(oc);
   1.386 +    for (int i=0; i<bufs; i++){
   1.387 +        free_sb_entry(&sbe[i]);
   1.388 +        free_entropy_context(ec[i]);
   1.389 +    }
   1.390 +    av_free(sbe);
   1.391 +
   1.392 +    for (int i=0; i<2; i++){
   1.393 +        free_mbrec_context(rc[i]);
   1.394 +    }
   1.395 +
   1.396 +#if HAVE_LIBSDL2
   1.397 +    if (h->display){
   1.398 +        signal_sdl_exit(h);
   1.399 +        pthread_join(sdl_thr, NULL);
   1.400 +    }
   1.401 +#endif
   1.402 +
   1.403 +    return 0;
   1.404 +}