/*
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "h264_types.h"
#include "h264_parser.h"
#include "h264_nal.h"
#include "h264_entropy.h"
#include "h264_rec.h"
#include "h264_pred_mode.h"
#include "h264_misc.h"
// #undef NDEBUG
#include <assert.h>
#include "VSs_impl/VSs.h"

typedef struct{
    ParserContext *pc;
    NalContext *nc;
    SliceBufferEntry *sbe; 
    H264Context *h;
} parse_taskArgs;

int32 parse_taskArgTypes[3] = {INOUT, INOUT, OUTPUT};
size_t parse_taskArgSizes[3] = {sizeof(ParserContext), sizeof(NalContext), sizeof(SliceBufferEntry)};

//#pragma omp task inout(*pc, *nc) output(*sbe)
static void parse_task(void *_data){
    parse_taskArgs* args = (parse_taskArgs*) _data;
    ParserContext *pc = args->pc;
    NalContext *nc = args->nc;
    SliceBufferEntry *sbe = args->sbe; 
    H264Context *h = args->h;
    H264Slice *s;

    if (!sbe->initialized){
        init_sb_entry(h, sbe);
        sbe->lines_total=h->mb_height;
    }

    av_read_frame_internal(pc, &sbe->gb);
    s = &sbe->slice;

    decode_nal_units(nc, s, &sbe->gb);
    
    return;
}

VSsTaskType parse_taskType = {
    .fn = &parse_task,
    .numDeps = 3,
    .sizeOfArgs = sizeof(parse_taskArgs),
    .depsTypes = parse_taskArgTypes,
    .depsSizes = parse_taskArgSizes};


typedef struct{
    EntropyContext *ec; 
    SliceBufferEntry *sbe; 
    H264Context *h;
} decode_slice_entropy_taskArgs;

int32 decode_slice_entropy_taskArgTypes[2] = {INOUT, INOUT};
size_t decode_slice_entropy_taskArgSizes[2] = {sizeof(EntropyContext), sizeof(SliceBufferEntry)};

//#pragma omp task inout(*ec) inout(*sbe)
static void decode_slice_entropy_task(void *_data){
    decode_slice_entropy_taskArgs* args = (decode_slice_entropy_taskArgs*) _data;
    EntropyContext *ec = args->ec; 
    SliceBufferEntry *sbe = args->sbe; 
    H264Context *h = args->h;
    int i,j;
    H264Slice *s = &sbe->slice;
    GetBitContext *gb = &sbe->gb;
    H264Mb *mbs = sbe->mbs;
//     GetBitContext *gb = s->gb;
    CABACContext *c = &ec->c;

    if( !s->pps.cabac ){
        av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
        return;
    }

    init_dequant_tables(s, ec);
    ec->curr_qscale = s->qscale;
    ec->last_qscale_diff = 0;
    ec->chroma_qp[0] = get_chroma_qp((H264Slice *) s, 0, s->qscale);
    ec->chroma_qp[1] = get_chroma_qp((H264Slice *) s, 1, s->qscale);

    /* realign */
    align_get_bits( gb );
    /* init cabac */
    ff_init_cabac_decoder( c, gb->buffer + get_bits_count(gb)/8, (get_bits_left(gb) + 7)/8);

    ff_h264_init_cabac_states(ec, s, c);

    for(j=0; j<ec->mb_height; j++){
        init_entropy_buf(ec, s, j);
        for(i=0; i<ec->mb_width; i++){
            int eos,ret;
            H264Mb *m = &mbs[i + j*ec->mb_width];
            m->mb_x=i;
            m->mb_y=j;
            ec->m = m;

            ret = ff_h264_decode_mb_cabac(ec, s, c);
            eos = get_cabac_terminate( c);
            (void) eos;
            if( ret < 0 || c->bytestream > c->bytestream_end + 2) {
                av_log(AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", m->mb_x, m->mb_y, c->bytestream_end - c->bytestream);
                return;
            }
        }
    }
    return;
}

VSsTaskType decode_slice_entropy_taskType = {
    .fn = &decode_slice_entropy_task,
    .numDeps = 2,
    .sizeOfArgs = sizeof(decode_slice_entropy_taskArgs),
    .depsTypes = decode_slice_entropy_taskArgTypes,
    .depsSizes = decode_slice_entropy_taskArgSizes};


static void decode_super_mb_block(MBRecContext *d, H264Slice *s, SuperMBContext *smbc, H264Mb *mbs, int smb_x, int smb_y){
    MBRecState mrs;
//     memset(&mrs, 0, sizeof(MBRecState));

    for (int k=0, i= smb_y; i< smb_y + smbc->smb_height; i++, k++){
        init_mbrec_context(d, &mrs, s, i);
        for (int j= smb_x -k ; j< smb_x - k + smbc->smb_width; j++){
            if (i< d->mb_height && j >= 0 && j < d->mb_width){
                h264_decode_mb_internal (d, &mrs, s, &mbs[i*d->mb_width+j]);
            }
        }
    }
}

typedef struct{
    MBRecContext *d;
    SliceBufferEntry *sbe;
    SuperMBTask *ml;
    SuperMBTask *mur;
    SuperMBTask *m;
    SuperMBContext *smbc;
} decode_super_mb_taskArgs;

int32 decode_super_mb_taskArgTypes[5] = {IN, IN, IN, IN, INOUT};
size_t decode_super_mb_taskArgSizes[5] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask)};

//#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m)
static void decode_super_mb_task(void *_data){
    decode_super_mb_taskArgs* args = (decode_super_mb_taskArgs*) _data;
    MBRecContext *d = args->d;
    SliceBufferEntry *sbe = args->sbe;
    SuperMBTask *ml = args->ml;
    SuperMBTask *mur = args->mur;
    SuperMBTask *m = args->m;
    SuperMBContext *smbc = args->smbc;
    H264Slice *s = &sbe->slice;
    H264Mb *mbs = sbe->mbs;
    decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
    return;
}

VSsTaskType decode_super_mb_taskType = {
    .fn = &decode_super_mb_task,
    .numDeps = 5,
    .sizeOfArgs = sizeof(decode_super_mb_taskArgs),
    .depsTypes = decode_super_mb_taskArgTypes,
    .depsSizes = decode_super_mb_taskArgSizes};



typedef struct{
    MBRecContext *d;
    SliceBufferEntry *sbe;
    SuperMBTask *sm;
    SuperMBContext *smbc;
    int* line;
} draw_edges_taskArgs;

int32 draw_edges_taskArgTypes[3] = {IN, IN, INOUT};
size_t draw_edges_taskArgSizes[3] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask)};

//#pragma omp task input(*d, *sbe) inout(*sm)
static void draw_edges_task(void *_data){
    draw_edges_taskArgs* args = (draw_edges_taskArgs*) _data;
    MBRecContext *d = args->d;
    SliceBufferEntry *sbe = args->sbe;
    SuperMBTask *sm = args->sm;
    SuperMBContext *smbc = args->smbc;
    int line = *(args->line);
    H264Slice *s = &sbe->slice;
    for (int i=line*smbc->smb_height; i< (line+1)*smbc->smb_height && i< d->mb_height; i++)
        draw_edges(d, s, i);
    VMS_App__free(args->line);
    return;
}
VSsTaskType draw_edges_taskType = {
    .fn = &draw_edges_task,
    .numDeps = 3,
    .sizeOfArgs = sizeof(draw_edges_taskArgs),
    .depsTypes = draw_edges_taskArgTypes,
    .depsSizes = draw_edges_taskArgSizes};


static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){
    int i,j;

    SuperMBContext *smbc = acquire_smbc(h);
    int smb_height =smbc->nsmb_height, smb_width= smbc->nsmb_width;
    SuperMBTask *smbs = smbc->smbs[0];

    SuperMBTask *sm=NULL, *sml, *smur;
    for(j=0; j< smb_height; j++){
        for(i=0; i< smb_width; i++){
            sm = smbs + j*smb_width + i;
            sml  = sm - ((i > 0) ? 1: 0);
            smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
            decode_super_mb_taskArgs decode_super_mb_task_args;
            decode_super_mb_task_args.d = d;
            decode_super_mb_task_args.sbe = sbe;
            decode_super_mb_task_args.smbc = smbc;
            decode_super_mb_task_args.ml = sml;
            decode_super_mb_task_args.mur = smur;
            decode_super_mb_task_args.m = sm;
            void** depsAddrs = malloc(decode_super_mb_taskType.numDeps * sizeof(void*));
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask)*/
            depsAddrs[0] = d;
            depsAddrs[1] = sbe;
            depsAddrs[2] = sml;
            depsAddrs[3] = smur;
            depsAddrs[4] = sm;
            VSs__submit_task(&decode_super_mb_taskType, &decode_super_mb_task_args, depsAddrs);
        }
        draw_edges_taskArgs draw_edges_task_args;
        draw_edges_task_args.d = d;
        draw_edges_task_args.sbe = sbe;
        draw_edges_task_args.sm = sm;
        draw_edges_task_args.smbc = smbc;
        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
        *(draw_edges_task_args.line) = j;
        void** depsAddrs = malloc(sizeof (void*) * draw_edges_taskType.numDeps);
        /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask)*/
        depsAddrs[0] = d;
        depsAddrs[1] = sbe;
        depsAddrs[2] = sm;
        VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, depsAddrs);
    }
    VSs__taskwait_on(sm);
    //#pragma omp taskwait on(*sm)

    release_smbc(h, smbc);
}

typedef struct{
    MBRecContext *d;
    SliceBufferEntry *sbe;
    H264Context *h;
} decode_slice_mb_taskArgs;

int32 decode_slice_mb_taskArgTypes[2] = {INOUT, INOUT};
size_t decode_slice_mb_taskArgSizes[2] = {sizeof(MBRecContext), sizeof(SliceBufferEntry)};

//#pragma omp task inout(*d) inout(*sbe)
static void decode_slice_mb_task(void *_data){
    decode_slice_mb_taskArgs* args = (decode_slice_mb_taskArgs*) _data;
    MBRecContext *d = args->d;
    SliceBufferEntry *sbe = args->sbe;
    H264Context *h = args->h;
    
    H264Slice *s = &sbe->slice;

    for (int i=0; i<2; i++){
        for(int j=0; j< s->ref_count[i]; j++){
            if (s->ref_list_cpn[i][j] ==-1)
                continue;
            int k;
            for (k=0; k< h->max_dpb_cnt; k++){
                if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
                    s->dp_ref_list[i][j] = &h->dpb[k];
                    break;
                }
            }
        }
    }

    //#pragma omp critical (dpb)
    VSs__start_critical(0);
    get_dpb_entry(h, s);
    VSs__end_critical(0);
            
    if (!h->no_mbd){
        decode_mb_in_slice (h, d, sbe);
    }

    for (int i=0; i<s->release_cnt; i++){
        for(int j=0; j<h->max_dpb_cnt; j++){
            if(h->dpb[j].cpn== s->release_ref_cpn[i]){
                //#pragma omp critical (dpb)
                VSs__start_critical(0);
                release_dpb_entry(h, &h->dpb[j], 2);
                VSs__end_critical(0);
                break;
            }
        }
    }
    s->release_cnt=0;
    return;
}

VSsTaskType decode_slice_mb_taskType = {
    .fn = &decode_slice_mb_task,
    .numDeps = 2,
    .sizeOfArgs = sizeof(decode_slice_mb_taskArgs),
    .depsTypes = decode_slice_mb_taskArgTypes,
    .depsSizes = decode_slice_mb_taskArgSizes};

// for static 3d wave
/*-------------------------------------------------------------------------------*/
typedef struct{
    MBRecContext *d;
    SliceBufferEntry *sbe;
    SuperMBTask *ml;
    SuperMBTask *mur;
    SuperMBTask *mprev;
    SuperMBTask *m;
    SuperMBContext *smbc;
} decode_3dwave_super_mb_taskArgs;

int32 decode_3dwave_super_mb_taskArgTypes[6] = {IN, IN, IN, IN, IN, INOUT};
size_t decode_3dwave_super_mb_taskArgSizes[6] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask),
        sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask)};

//#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m)
static void decode_3dwave_super_mb_task(void *_data){
    decode_3dwave_super_mb_taskArgs* args = (decode_3dwave_super_mb_taskArgs*) _data;
    MBRecContext *d = args->d;
    SliceBufferEntry *sbe = args->sbe;
    SuperMBTask *ml = args->ml;
    SuperMBTask *mur = args->mur;
    SuperMBTask *mprev = args->mprev;
    SuperMBTask *m = args->m;
    SuperMBContext *smbc = args->smbc;
    
    H264Slice *s = &sbe->slice;
    H264Mb *mbs = sbe->mbs;

    decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
    return;
}

VSsTaskType decode_3dwave_super_mb_taskType = {
    .fn = &decode_3dwave_super_mb_task,
    .numDeps = 6,
    .sizeOfArgs = sizeof(decode_3dwave_super_mb_taskArgs),
    .depsTypes = decode_3dwave_super_mb_taskArgTypes,
    .depsSizes = decode_3dwave_super_mb_taskArgSizes};

// int init_ref_count=0;
typedef struct{
    MBRecContext *d;
    SliceBufferEntry *sbe;
    int* init;
    H264Context *h;
} init_ref_list_and_get_dpb_taskArgs;

int32 init_ref_list_and_get_dpb_taskArgTypes[3] = {INOUT, INOUT, INOUT};
size_t init_ref_list_and_get_dpb_taskArgSizes[3] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(int)};

//#pragma omp task inout(*d, *sbe, *init)
static void init_ref_list_and_get_dpb_task(void *_data){
    init_ref_list_and_get_dpb_taskArgs* args = (init_ref_list_and_get_dpb_taskArgs*) _data;
    MBRecContext *d = args->d;
    SliceBufferEntry *sbe = args->sbe;
    int* initp = args->init;
    H264Context *h = args->h;
    
    H264Slice *s = &sbe->slice;
    for (int i=0; i<2; i++){
        for(int j=0; j< s->ref_count[i]; j++){
            if (s->ref_list_cpn[i][j] ==-1)
                continue;
            int k;
            for (k=0; k<h->max_dpb_cnt; k++){
                if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
                    s->dp_ref_list[i][j] = &h->dpb[k];
                    break;
                }
            }
        }
    }

    //#pragma omp critical (dpb)
    VSs__start_critical(0);
    get_dpb_entry(h, s);
    VSs__end_critical(0);
    
    return;
}

VSsTaskType init_ref_list_and_get_dpb_taskType = {
    .fn = &init_ref_list_and_get_dpb_task,
    .numDeps = 3,
    .sizeOfArgs = sizeof(init_ref_list_and_get_dpb_taskArgs),
    .depsTypes = init_ref_list_and_get_dpb_taskArgTypes,
    .depsSizes = init_ref_list_and_get_dpb_taskArgSizes};

static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, int k){
    int i,j;
    
    int32* taskID;
    
    int smb_3d_height =smbc->nsmb_3dheight;
    int smb_height =smbc->nsmb_height, smb_width= smbc->nsmb_width;
    int smb_diff_prev = smb_height - smb_3d_height;
    SuperMBTask *sm=NULL, *sml, *smur, *smprev;

    SuperMBTask *smbs = smbc->smbs[smbc->index++]; smbc->index%=2; 
    SuperMBTask *smbs_prev = smbc->smbs[smbc->index]; // index rotates -> next == prev
    
    for(j=0; j<smb_3d_height ; j++){
        for(i=0; i< smb_width; i++){
            sm = smbs + j*smb_width + i;
            sml  = sm - ((i > 0) ? 1: 0);
            smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
            smprev = smbs_prev + (j + smb_diff_prev+1)*smb_width -1;
            decode_3dwave_super_mb_taskArgs decode_3dwave_super_mb_task_args;
            decode_3dwave_super_mb_task_args.d = d;
            decode_3dwave_super_mb_task_args.sbe = sbe;
            decode_3dwave_super_mb_task_args.smbc = smbc;
            decode_3dwave_super_mb_task_args.ml = sml;
            decode_3dwave_super_mb_task_args.mur = smur;
            decode_3dwave_super_mb_task_args.mprev = smprev;
            decode_3dwave_super_mb_task_args.m = sm;
            taskID = VSs__create_taskID_of_size(3 );
            void** depsAddrs = malloc(sizeof(void*) * decode_3dwave_super_mb_taskType.numDeps);
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask),
        sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask)*/
            depsAddrs[0] = d;
            depsAddrs[1] = sbe;
            depsAddrs[2] = sml;
            depsAddrs[3] = smur;
            depsAddrs[4] = smprev;
            taskID[1] = k;
            taskID[2] = j;
            taskID[3] = i;
            VSs__submit_task_with_ID(&decode_3dwave_super_mb_taskType, &decode_3dwave_super_mb_task_args, depsAddrs, taskID);
        }
        draw_edges_taskArgs draw_edges_task_args;
        draw_edges_task_args.d = d;
        draw_edges_task_args.sbe = sbe;
        draw_edges_task_args.sm = sm;
        draw_edges_task_args.smbc = smbc;
        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
        *(draw_edges_task_args.line) = j;
        void** depsAddrs = malloc(sizeof (void*) * draw_edges_taskType.numDeps);
        /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask)*/
        depsAddrs[0] = d;
        depsAddrs[1] = sbe;
        depsAddrs[2] = sm;
        taskID = VSs__create_taskID_of_size(3);
        taskID[1] = k;
        taskID[2] = j;
        taskID[3] = i;
        VSs__submit_task_with_ID(&draw_edges_taskType, &draw_edges_task_args, depsAddrs, taskID);
    }

    for(; j< smb_height; j++){
        for(i=0; i< smb_width; i++){
            sm = smbs + j*smb_width + i;
            sml  = sm - ((i > 0) ? 1: 0);
            smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
            decode_super_mb_taskArgs decode_super_mb_task_args;
            decode_super_mb_task_args.d = d;
            decode_super_mb_task_args.sbe = sbe;
            decode_super_mb_task_args.smbc = smbc;
            decode_super_mb_task_args.ml = sml;
            decode_super_mb_task_args.mur = smur;
            decode_super_mb_task_args.m = sm;
            void** depsAddrs = malloc(sizeof(void*) * decode_super_mb_taskType.numDeps);
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask)*/
            depsAddrs[0] = d;
            depsAddrs[1] = sbe;
            depsAddrs[2] = sml;
            depsAddrs[3] = smur;
            depsAddrs[4] = sm;
            taskID = VSs__create_taskID_of_size(3 );
            taskID[1] = k;
            taskID[2] = j;
            taskID[3] = i;
            VSs__submit_task_with_ID(&decode_super_mb_taskType, &decode_super_mb_task_args, depsAddrs, taskID);
        }
        draw_edges_taskArgs draw_edges_task_args;
        draw_edges_task_args.d = d;
        draw_edges_task_args.sbe = sbe;
        draw_edges_task_args.sm = sm;
        draw_edges_task_args.smbc = smbc;
        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
        *(draw_edges_task_args.line) = j;
        void** depsAddrs = malloc(sizeof (void*) * draw_edges_taskType.numDeps);
        /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask)*/
        depsAddrs[0] = d;
        depsAddrs[1] = sbe;
        depsAddrs[2] = sm;
        taskID = VSs__create_taskID_of_size(3);
        taskID[1] = k;
        taskID[2] = j;
        taskID[3] = i;
        VSs__submit_task_with_ID(&draw_edges_taskType, &draw_edges_task_args, depsAddrs, taskID);
    }
    return sm;
}

typedef struct{
    MBRecContext *d;
    SliceBufferEntry *sbe;
    SuperMBTask *lastsmb;
    int* release;
    H264Context *h;
    SuperMBContext *smbc;
} release_ref_list_taskArgs;

int32 release_ref_list_taskArgTypes[4] = {INOUT, INOUT, IN, INOUT};
size_t release_ref_list_taskArgSizes[4] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(int)};

//#pragma omp task inout(*d, *sbe, *release) input (*lastsmb)
static void release_ref_list_task(void *_data){
    release_ref_list_taskArgs* args = (release_ref_list_taskArgs*) _data;
    MBRecContext *d = args->d;
    SliceBufferEntry *sbe = args->sbe;
    SuperMBTask *lastsmb = args->lastsmb;
    int* releasep = args->release;
    H264Context *h = args->h;
    SuperMBContext *smbc = args->smbc;
    
    H264Slice *s = &sbe->slice;
    for (int i=0; i<s->release_cnt; i++){
        for(int j=0; j<h->max_dpb_cnt; j++){
            if(h->dpb[j].cpn== s->release_ref_cpn[i]){
                //#pragma omp critical (dpb)
                VSs__start_critical(0);
                release_dpb_entry(h, &h->dpb[j], 2);
                VSs__end_critical(0);
                break;
            }
        }
    }
    s->release_cnt=0;

    release_smbc(h, smbc);
    
    return;
}

VSsTaskType release_ref_list_taskType = {
    .fn = &release_ref_list_task,
    .numDeps = 4,
    .sizeOfArgs = sizeof(release_ref_list_taskArgs),
    .depsTypes = release_ref_list_taskArgTypes,
    .depsSizes = release_ref_list_taskArgSizes};

// static void decode_mb_static_3dwave(H264Context *h, int mb_height, int mb_width, MBRecContext *d, H264Slice *s, H264Mb *mbs, SuperMBTask *smbs, SuperMBTask *smbs_prev){
//
// }
/*-------------------------------------------------------------------------------*/
//end for static 3d wave

typedef struct{
    OutputContext *oc;
    SliceBufferEntry *sbe; 
    H264Context *h;
} output_taskArgs;

int32 output_taskArgTypes[2] = {INOUT, IN};
size_t output_taskArgSizes[2] = {sizeof(OutputContext), sizeof(SliceBufferEntry)};

//#pragma omp task inout (*oc) input(*sbe)
static void output_task(void *_data){
    output_taskArgs* args = (output_taskArgs*) _data;
    OutputContext *oc = args->oc;
    SliceBufferEntry *sbe = args->sbe; 
    H264Context *h = args->h;
    
    DecodedPicture* out =output_frame(h, oc, sbe->slice.curr_pic, h->ofile, h->frame_width, h->frame_height);
    if (out){
        //#pragma omp critical (dpb)
        VSs__start_critical(0);
        release_dpb_entry(h, out, 1);
        VSs__end_critical(0);
    }
    //print_report(oc->frame_number, oc->video_size, 0, h->verbose);
    
    return;
}

VSsTaskType output_taskType = {
    .fn = &output_task,
    .numDeps = 2,
    .sizeOfArgs = sizeof(output_taskArgs),
    .depsTypes = output_taskArgTypes,
    .depsSizes = output_taskArgSizes};

/*
* The following code is the main loop of the file converter
*/

int h264_decode_ompss( H264Context *h) {
    
    const int bufs = h->pipe_bufs;

    ParserContext *pc;
    NalContext *nc;
    EntropyContext *ec[bufs];
    MBRecContext *rc[2];
    OutputContext *oc;
    SliceBufferEntry *sbe;
    SuperMBContext *smbc;

    DecodedPicture *out;
    int frames=0;
    
    int32* taskID;
    void** depsAddrs;

#if HAVE_LIBSDL2
    pthread_t sdl_thr;
    if (h->display){
        pthread_create(&sdl_thr, NULL, sdl_thread, h);
    }
#endif
    /*sbe= VMS_WL__malloc(sizeof(SliceBufferEntry) * bufs);
    if (sbe)
        memset(sbe, 0, sizeof(SliceBufferEntry) * bufs);*/
    sbe= av_mallocz(sizeof(SliceBufferEntry) * bufs);

    pc = get_parse_context(h->ifile);
    nc = get_nal_context(h->width, h->height);

    for(int i=0; i<bufs; i++){
        ec[i] = get_entropy_context( h );
    }

    for(int i=0; i<2; i++){
        rc[i] = get_mbrec_context(h);
    }

    oc = get_output_context( h );

    av_start_timer();
    int k=0; int init, release;
    if (h->static_3d && bufs < h->num_frames ){
        int num_pre_ed =0;
        for (num_pre_ed=0; num_pre_ed< bufs -1 && !pc->final_frame; num_pre_ed++){
            parse_taskArgs parse_task_args;
            parse_task_args.h = h;
            parse_task_args.pc = pc;
            parse_task_args.nc = nc;
            parse_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * parse_taskType.numDeps);
            /*sizeof(ParserContext), sizeof(NalContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = pc;
            depsAddrs[1] = nc;
            depsAddrs[2] = &sbe[k%bufs];
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 1;
            taskID[2] = k;
            VSs__submit_task_with_ID(&parse_taskType, &parse_task_args, depsAddrs, taskID);
            
            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
            decode_slice_entropy_task_args.h = h;
            decode_slice_entropy_task_args.ec = ec[k%bufs];
            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * decode_slice_entropy_taskType.numDeps);
            /*sizeof(EntropyContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = ec[k%bufs];
            depsAddrs[1] = &sbe[k%bufs];
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 2;
            taskID[2] = k;
            VSs__submit_task_with_ID(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, depsAddrs, taskID);        
            //#pragma omp taskwait on(*pc)
            VSs__taskwait_on(pc);
            k++;
        }

        while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
            parse_taskArgs parse_task_args;
            parse_task_args.h = h;
            parse_task_args.pc = pc;
            parse_task_args.nc = nc;
            parse_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * parse_taskType.numDeps);
            /*sizeof(ParserContext), sizeof(NalContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = pc;
            depsAddrs[1] = nc;
            depsAddrs[2] = &sbe[k%bufs];
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 3;
            taskID[2] = k;
            VSs__submit_task_with_ID(&parse_taskType, &parse_task_args, depsAddrs, taskID);
            
            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
            decode_slice_entropy_task_args.h = h;
            decode_slice_entropy_task_args.ec = ec[k%bufs];
            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * decode_slice_entropy_taskType.numDeps);
            /*sizeof(EntropyContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = ec[k%bufs];
            depsAddrs[1] = &sbe[k%bufs];
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 4;
            taskID[2] = k;
            VSs__submit_task_with_ID(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, depsAddrs, taskID); 

            k++;
            
            init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args;
            init_ref_list_and_get_dpb_task_args.h = h;
            init_ref_list_and_get_dpb_task_args.d = rc[k%2];
            init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs];
            init_ref_list_and_get_dpb_task_args.init = &init;
            depsAddrs = malloc(sizeof(void*) * init_ref_list_and_get_dpb_taskType.numDeps);
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(int)*/
            depsAddrs[0] = rc[k%2];
            depsAddrs[1] = &sbe[k%bufs];
            depsAddrs[2] = &init;
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 5;
            taskID[2] = k;
            VSs__submit_task_with_ID(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, depsAddrs, taskID); 

            smbc = acquire_smbc(h);
            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, k);            
            release_ref_list_taskArgs release_ref_list_task_args;
            release_ref_list_task_args.h = h;
            release_ref_list_task_args.smbc = smbc;
            release_ref_list_task_args.d = rc[k%2];
            release_ref_list_task_args.sbe = &sbe[k%bufs];
            release_ref_list_task_args.lastsmb = lastsmb;
            release_ref_list_task_args.release = &release;
            depsAddrs = malloc(sizeof(void*) * release_ref_list_taskType.numDeps);
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(int)*/
            depsAddrs[0] = rc[k%2];
            depsAddrs[1] = &sbe[k%bufs];
            depsAddrs[2] = smbc;
            depsAddrs[3] = &release;
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 6;
            taskID[2] = k;
            VSs__submit_task_with_ID(&release_ref_list_taskType, &release_ref_list_task_args, depsAddrs, taskID);

            output_taskArgs output_task_args;
            output_task_args.h = h;
            output_task_args.oc = oc;
            output_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * output_taskType.numDeps);
            /*sizeof(OutputContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = oc;
            depsAddrs[1] = &sbe[k%bufs];
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 7;
            taskID[2] = k;
            VSs__submit_task_with_ID(&output_taskType, &output_task_args, depsAddrs, taskID);
            //#pragma omp taskwait on(*pc)
            VSs__taskwait_on(pc);
        }

        for (int i=0; i< num_pre_ed; i++){
            k++;
            init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args;
            init_ref_list_and_get_dpb_task_args.h = h;
            init_ref_list_and_get_dpb_task_args.d = rc[k%2];
            init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs];
            init_ref_list_and_get_dpb_task_args.init = &init;
            depsAddrs = malloc(sizeof(void*) * init_ref_list_and_get_dpb_taskType.numDeps);
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(int)*/
            depsAddrs[0] = rc[k%2];
            depsAddrs[1] = &sbe[k%bufs];
            depsAddrs[2] = &init;
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 8;
            taskID[2] = i;
            VSs__submit_task_with_ID(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, depsAddrs, taskID); 
            smbc = acquire_smbc(h);
            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, k);
            release_ref_list_taskArgs release_ref_list_task_args;
            release_ref_list_task_args.h = h;
            release_ref_list_task_args.smbc = smbc;
            release_ref_list_task_args.d = rc[k%2];
            release_ref_list_task_args.sbe = &sbe[k%bufs];
            release_ref_list_task_args.lastsmb = lastsmb;
            release_ref_list_task_args.release = &release;
            depsAddrs = malloc(sizeof(void*) * release_ref_list_taskType.numDeps);
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(int)*/
            depsAddrs[0] = rc[k%2];
            depsAddrs[1] = &sbe[k%bufs];
            depsAddrs[2] = smbc;
            depsAddrs[3] = &release;
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 9;
            taskID[2] = k;
            VSs__submit_task_with_ID(&release_ref_list_taskType, &release_ref_list_task_args, depsAddrs, taskID); 

            output_taskArgs output_task_args;
            output_task_args.h = h;
            output_task_args.oc = oc;
            output_task_args.sbe = &sbe[k%bufs];
           depsAddrs = malloc(sizeof(void*) * output_taskType.numDeps);
            /*sizeof(OutputContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = oc;
            depsAddrs[1] = &sbe[k%bufs];
            taskID = VSs__create_taskID_of_size(2);
            taskID[1] = 10;
            taskID[2] = k;
            VSs__submit_task_with_ID(&output_taskType, &output_task_args, depsAddrs, taskID);
        }

    } else {
        while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
            
            taskID = VSs__create_taskID_of_size(1);
            taskID[1] = frames*10+1;
            parse_taskArgs parse_task_args;
            parse_task_args.h = h;
            parse_task_args.pc = pc;
            parse_task_args.nc = nc;
            parse_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * parse_taskType.numDeps);
            /*sizeof(ParserContext), sizeof(NalContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = pc;
            depsAddrs[1] = nc;
            depsAddrs[2] = &sbe[k%bufs];
            VSs__submit_task_with_ID(&parse_taskType, &parse_task_args, depsAddrs, taskID);

            taskID = VSs__create_taskID_of_size(1);
            taskID[1] = frames*10+2;
            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
            decode_slice_entropy_task_args.h = h;
            decode_slice_entropy_task_args.ec = ec[k%bufs];
            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * decode_slice_entropy_taskType.numDeps);
            /*sizeof(EntropyContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = ec[k%bufs];
            depsAddrs[1] = &sbe[k%bufs];
            VSs__submit_task_with_ID(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, depsAddrs, taskID);

            taskID = VSs__create_taskID_of_size(1);
            taskID[1] = frames*10+3;
            decode_slice_mb_taskArgs decode_slice_mb_task_args;
            decode_slice_mb_task_args.h = h;
            decode_slice_mb_task_args.d = rc[0];
            decode_slice_mb_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * decode_slice_mb_taskType.numDeps);
            /*sizeof(MBRecContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = rc[0];
            depsAddrs[1] = &sbe[k%bufs];
            VSs__submit_task_with_ID(&decode_slice_mb_taskType, &decode_slice_mb_task_args, depsAddrs, taskID); 

            taskID = VSs__create_taskID_of_size(1);
            taskID[1] = frames*10+4;
            output_taskArgs output_task_args;
            output_task_args.h = h;
            output_task_args.oc = oc;
            output_task_args.sbe = &sbe[k%bufs];
            depsAddrs = malloc(sizeof(void*) * output_taskType.numDeps);
            /*sizeof(OutputContext), sizeof(SliceBufferEntry)*/
            depsAddrs[0] = oc;
            depsAddrs[1] = &sbe[k%bufs];
            VSs__submit_task_with_ID(&output_taskType, &output_task_args, depsAddrs, taskID);
            //#pragma omp taskwait on(*pc)
            VSs__taskwait_on(pc);
            k++;
        }
    }
    //#pragma omp taskwait
    VSs__taskwait();
    
    while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ;

    //print_report(oc->frame_number, oc->video_size, 1, h->verbose);
    h->num_frames = oc->frame_number;
    /* finished ! */

    free_parse_context(pc);
    free_nal_context  (nc);
    free_output_context(oc);
    for (int i=0; i<bufs; i++){
        free_sb_entry(&sbe[i]);
        free_entropy_context(ec[i]);
    }
    av_free(sbe);

    for (int i=0; i<2; i++){
        free_mbrec_context(rc[i]);
    }

#if HAVE_LIBSDL2
    if (h->display){
        signal_sdl_exit(h);
        pthread_join(sdl_thr, NULL);
    }
#endif

    return 0;
}
