changeset 3:0b056460c67d

changed code to use VSs
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Mon, 29 Oct 2012 16:44:27 +0100
parents 897f711a7157
children 96e628866d41
files h264dec.c libavcodec/h264.h libavcodec/h264_misc.c libavcodec/h264_ompss.c
diffstat 4 files changed, 452 insertions(+), 72 deletions(-) [+]
line diff
     1.1 --- a/h264dec.c	Tue Sep 25 15:55:33 2012 +0200
     1.2 +++ b/h264dec.c	Mon Oct 29 16:44:27 2012 +0100
     1.3 @@ -20,6 +20,7 @@
     1.4  
     1.5  #include <assert.h>
     1.6  
     1.7 +#include "VSs_impl/VSs.h"
     1.8  
     1.9  static const char program_name[] = "h264dec";
    1.10  static const int program_birth_year = 2010;
    1.11 @@ -259,8 +260,7 @@
    1.12  
    1.13      H264Context *h = get_h264dec_context(file_name, ifile, ofile, frame_width, frame_height, &cli_opts);
    1.14  #if OMPSS
    1.15 -    if (h264_decode_ompss( h ) < 0)
    1.16 -        av_exit(-1);
    1.17 +    VSs__create_seed_slave_and_do_work( &h264_decode_ompss , (void*)h );
    1.18  #else
    1.19      if (parallel){
    1.20          if (ARCH_CELL && !no_arch){
     2.1 --- a/libavcodec/h264.h	Tue Sep 25 15:55:33 2012 +0200
     2.2 +++ b/libavcodec/h264.h	Mon Oct 29 16:44:27 2012 +0100
     2.3 @@ -39,6 +39,7 @@
     2.4  #include "h264_rec.h"
     2.5  #include "h264_deblock.h"
     2.6  #include "h264_types.h"
     2.7 +#include "VSs_impl/VSs.h"
     2.8  
     2.9  typedef struct h264_options{
    2.10      int statsched;
    2.11 @@ -63,7 +64,7 @@
    2.12  int h264_decode_cell(H264Context *h);
    2.13  int h264_decode_cell_seq(H264Context *h);
    2.14  
    2.15 -int h264_decode_ompss(H264Context *h);
    2.16 +void h264_decode_ompss(void *_params, SlaveVP *animSlv);
    2.17  
    2.18  int h264_decode_pthread(H264Context *h);
    2.19  int h264_decode_seq(H264Context *h);
     3.1 --- a/libavcodec/h264_misc.c	Tue Sep 25 15:55:33 2012 +0200
     3.2 +++ b/libavcodec/h264_misc.c	Mon Oct 29 16:44:27 2012 +0100
     3.3 @@ -266,18 +266,18 @@
     3.4      }
     3.5  
     3.6      if (verbose){
     3.7 -        fprintf(stderr, "frame=%5d avgfps=%3d curfps=%3d\r", frame_number, (int)(frame_number/t+0.5), (int)((frame_number - last_frame_number)/t2+0.5) );
     3.8 -        fflush(stderr);
     3.9 +        //fprintf(stderr, "frame=%5d avgfps=%3d curfps=%3d\r", frame_number, (int)(frame_number/t+0.5), (int)((frame_number - last_frame_number)/t2+0.5) );
    3.10 +        //fflush(stderr);
    3.11      }
    3.12      last_frame_number = frame_number;
    3.13      last_time = cur_time;
    3.14  
    3.15      if (is_last_report){
    3.16          t = (av_gettime()-timer_start) / 1000000.0;
    3.17 -        fprintf(stderr, "%c[2Kframe=%5d avgfps=%3d\r", 27, frame_number, (int)(frame_number/t+0.5));
    3.18 -        fprintf(stderr, "\n");
    3.19 -        fprintf(stderr, "video:%1.0fkB\n", video_size/1024.0);
    3.20 -        fflush(stderr);
    3.21 +        //fprintf(stderr, "%c[2Kframe=%5d avgfps=%3d\r", 27, frame_number, (int)(frame_number/t+0.5));
    3.22 +        //fprintf(stderr, "\n");
    3.23 +        //fprintf(stderr, "video:%1.0fkB\n", video_size/1024.0);
    3.24 +        //fflush(stderr);
    3.25      }
    3.26  }
    3.27  
     4.1 --- a/libavcodec/h264_ompss.c	Tue Sep 25 15:55:33 2012 +0200
     4.2 +++ b/libavcodec/h264_ompss.c	Mon Oct 29 16:44:27 2012 +0100
     4.3 @@ -27,9 +27,25 @@
     4.4  #include "h264_misc.h"
     4.5  // #undef NDEBUG
     4.6  #include <assert.h>
     4.7 +#include "VSs_impl/VSs.h"
     4.8  
     4.9 -#pragma omp task inout(*pc, *nc) output(*sbe)
    4.10 -static void parse_task(H264Context *h, ParserContext *pc, NalContext *nc, SliceBufferEntry *sbe){
    4.11 +typedef struct{
    4.12 +    ParserContext *pc;
    4.13 +    NalContext *nc;
    4.14 +    SliceBufferEntry *sbe; 
    4.15 +    H264Context *h;
    4.16 +} parse_taskArgs;
    4.17 +
    4.18 +int32 parse_taskArgTypes[4] = {INOUT, INOUT, OUTPUT, NONCTLD};
    4.19 +int32 parse_taskArgSizes[4] = {sizeof(ParserContext), sizeof(NalContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
    4.20 +
    4.21 +//#pragma omp task inout(*pc, *nc) output(*sbe)
    4.22 +static void parse_task(void *_data, SlaveVP *animatingSlv){
    4.23 +    parse_taskArgs* args = (parse_taskArgs*) _data;
    4.24 +    ParserContext *pc = args->pc;
    4.25 +    NalContext *nc = args->nc;
    4.26 +    SliceBufferEntry *sbe = args->sbe; 
    4.27 +    H264Context *h = args->h;
    4.28      H264Slice *s;
    4.29  
    4.30      if (!sbe->initialized){
    4.31 @@ -41,10 +57,34 @@
    4.32      s = &sbe->slice;
    4.33  
    4.34      decode_nal_units(nc, s, &sbe->gb);
    4.35 +    
    4.36 +    VSs__end_task( animatingSlv );
    4.37  }
    4.38  
    4.39 -#pragma omp task inout(*ec) inout(*sbe)
    4.40 -static void decode_slice_entropy_task(H264Context *h, EntropyContext *ec, SliceBufferEntry *sbe){
    4.41 +VSsTaskType parse_taskType = {
    4.42 +    .fn = &parse_task,
    4.43 +    .numCtldArgs = 3,
    4.44 +    .numTotalArgs = 4,
    4.45 +    .sizeOfArgs = sizeof(parse_taskArgs),
    4.46 +    .argTypes = parse_taskArgTypes,
    4.47 +    .argSizes = parse_taskArgSizes};
    4.48 +
    4.49 +
    4.50 +typedef struct{
    4.51 +    EntropyContext *ec; 
    4.52 +    SliceBufferEntry *sbe; 
    4.53 +    H264Context *h;
    4.54 +} decode_slice_entropy_taskArgs;
    4.55 +
    4.56 +int32 decode_slice_entropy_taskArgTypes[3] = {INOUT, INOUT, NONCTLD};
    4.57 +int32 decode_slice_entropy_taskArgSizes[3] = {sizeof(EntropyContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
    4.58 +
    4.59 +//#pragma omp task inout(*ec) inout(*sbe)
    4.60 +static void decode_slice_entropy_task(void *_data, SlaveVP *animatingSlv){
    4.61 +    decode_slice_entropy_taskArgs* args = (decode_slice_entropy_taskArgs*) _data;
    4.62 +    EntropyContext *ec = args->ec; 
    4.63 +    SliceBufferEntry *sbe = args->sbe; 
    4.64 +    H264Context *h = args->h;
    4.65      int i,j;
    4.66      H264Slice *s = &sbe->slice;
    4.67      GetBitContext *gb = &sbe->gb;
    4.68 @@ -54,7 +94,7 @@
    4.69  
    4.70      if( !s->pps.cabac ){
    4.71          av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
    4.72 -        return ;
    4.73 +        VSs__end_task( animatingSlv );
    4.74      }
    4.75  
    4.76      init_dequant_tables(s, ec);
    4.77 @@ -84,12 +124,22 @@
    4.78              (void) eos;
    4.79              if( ret < 0 || c->bytestream > c->bytestream_end + 2) {
    4.80                  av_log(AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", m->mb_x, m->mb_y, c->bytestream_end - c->bytestream);
    4.81 -                return ;
    4.82 +                VSs__end_task( animatingSlv );
    4.83              }
    4.84          }
    4.85      }
    4.86 +    VSs__end_task( animatingSlv );
    4.87  }
    4.88  
    4.89 +VSsTaskType decode_slice_entropy_taskType = {
    4.90 +    .fn = &decode_slice_entropy_task,
    4.91 +    .numCtldArgs = 2,
    4.92 +    .numTotalArgs = 3,
    4.93 +    .sizeOfArgs = sizeof(decode_slice_entropy_taskArgs),
    4.94 +    .argTypes = decode_slice_entropy_taskArgTypes,
    4.95 +    .argSizes = decode_slice_entropy_taskArgSizes};
    4.96 +
    4.97 +
    4.98  static void decode_super_mb_block(MBRecContext *d, H264Slice *s, SuperMBContext *smbc, H264Mb *mbs, int smb_x, int smb_y){
    4.99      MBRecState mrs;
   4.100  //     memset(&mrs, 0, sizeof(MBRecState));
   4.101 @@ -104,22 +154,77 @@
   4.102      }
   4.103  }
   4.104  
   4.105 -#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m)
   4.106 -static void decode_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml,
   4.107 -SuperMBTask *mur, SuperMBTask *m){
   4.108 +typedef struct{
   4.109 +    MBRecContext *d;
   4.110 +    SliceBufferEntry *sbe;
   4.111 +    SuperMBTask *ml;
   4.112 +    SuperMBTask *mur;
   4.113 +    SuperMBTask *m;
   4.114 +    SuperMBContext *smbc;
   4.115 +} decode_super_mb_taskArgs;
   4.116 +
   4.117 +int32 decode_super_mb_taskArgTypes[6] = {IN, IN, IN, IN, INOUT, NONCTLD};
   4.118 +int32 decode_super_mb_taskArgSizes[6] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBContext)};
   4.119 +
   4.120 +//#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m)
   4.121 +static void decode_super_mb_task(void *_data, SlaveVP *animatingSlv){
   4.122 +    decode_super_mb_taskArgs* args = (decode_super_mb_taskArgs*) _data;
   4.123 +    MBRecContext *d = args->d;
   4.124 +    SliceBufferEntry *sbe = args->sbe;
   4.125 +    SuperMBTask *ml = args->ml;
   4.126 +    SuperMBTask *mur = args->mur;
   4.127 +    SuperMBTask *m = args->m;
   4.128 +    SuperMBContext *smbc = args->smbc;
   4.129      H264Slice *s = &sbe->slice;
   4.130      H264Mb *mbs = sbe->mbs;
   4.131      decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
   4.132 +    VSs__end_task( animatingSlv );
   4.133  }
   4.134  
   4.135 -#pragma omp task input(*d, *sbe) inout(*sm)
   4.136 -static void draw_edges_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *sm, int line){
   4.137 +VSsTaskType decode_super_mb_taskType = {
   4.138 +    .fn = &decode_super_mb_task,
   4.139 +    .numCtldArgs = 5,
   4.140 +    .numTotalArgs = 6,
   4.141 +    .sizeOfArgs = sizeof(decode_super_mb_taskArgs),
   4.142 +    .argTypes = decode_super_mb_taskArgTypes,
   4.143 +    .argSizes = decode_super_mb_taskArgSizes};
   4.144 +
   4.145 +
   4.146 +
   4.147 +typedef struct{
   4.148 +    MBRecContext *d;
   4.149 +    SliceBufferEntry *sbe;
   4.150 +    SuperMBTask *sm;
   4.151 +    SuperMBContext *smbc;
   4.152 +    int* line;
   4.153 +} draw_edges_taskArgs;
   4.154 +
   4.155 +int32 draw_edges_taskArgTypes[5] = {IN, IN, INOUT, NONCTLD, NONCTLD};
   4.156 +int32 draw_edges_taskArgSizes[5] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask),sizeof(SuperMBContext),sizeof(int)};
   4.157 +
   4.158 +//#pragma omp task input(*d, *sbe) inout(*sm)
   4.159 +static void draw_edges_task(void *_data, SlaveVP *animatingSlv){
   4.160 +    draw_edges_taskArgs* args = (draw_edges_taskArgs*) _data;
   4.161 +    MBRecContext *d = args->d;
   4.162 +    SliceBufferEntry *sbe = args->sbe;
   4.163 +    SuperMBTask *sm = args->sm;
   4.164 +    SuperMBContext *smbc = args->smbc;
   4.165 +    int line = *(args->line);
   4.166      H264Slice *s = &sbe->slice;
   4.167      for (int i=line*smbc->smb_height; i< (line+1)*smbc->smb_height && i< d->mb_height; i++)
   4.168          draw_edges(d, s, i);
   4.169 +    VSs__end_task( animatingSlv );
   4.170  }
   4.171 +VSsTaskType draw_edges_taskType = {
   4.172 +    .fn = &draw_edges_task,
   4.173 +    .numCtldArgs = 3,
   4.174 +    .numTotalArgs = 5,
   4.175 +    .sizeOfArgs = sizeof(draw_edges_taskArgs),
   4.176 +    .argTypes = draw_edges_taskArgTypes,
   4.177 +    .argSizes = draw_edges_taskArgSizes};
   4.178  
   4.179 -static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){
   4.180 +
   4.181 +static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe, SlaveVP* animSlv){
   4.182      int i,j;
   4.183  
   4.184      SuperMBContext *smbc = acquire_smbc(h);
   4.185 @@ -132,17 +237,46 @@
   4.186              sm = smbs + j*smb_width + i;
   4.187              sml  = sm - ((i > 0) ? 1: 0);
   4.188              smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   4.189 -            decode_super_mb_task(d, sbe, smbc, sml, smur, sm);
   4.190 +            decode_super_mb_taskArgs decode_super_mb_task_args;
   4.191 +            decode_super_mb_task_args.d = d;
   4.192 +            decode_super_mb_task_args.sbe = sbe;
   4.193 +            decode_super_mb_task_args.smbc = smbc;
   4.194 +            decode_super_mb_task_args.ml = sml;
   4.195 +            decode_super_mb_task_args.mur = smur;
   4.196 +            decode_super_mb_task_args.m = sm;
   4.197 +            VSs__submit_task(&decode_super_mb_taskType, &decode_super_mb_task_args, animSlv);
   4.198          }
   4.199 -        draw_edges_task(d, sbe, smbc, sm, j);
   4.200 +        draw_edges_taskArgs draw_edges_task_args;
   4.201 +        draw_edges_task_args.d = d;
   4.202 +        draw_edges_task_args.sbe = sbe;
   4.203 +        draw_edges_task_args.sm = sm;
   4.204 +        draw_edges_task_args.smbc = smbc;
   4.205 +        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
   4.206 +        *(draw_edges_task_args.line) = j;
   4.207 +        VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv);
   4.208      }
   4.209 -    #pragma omp taskwait on(*sm)
   4.210 +    VSs__taskwait_on(animSlv,sm);
   4.211 +    //#pragma omp taskwait on(*sm)
   4.212  
   4.213      release_smbc(h, smbc);
   4.214  }
   4.215  
   4.216 -#pragma omp task inout(*d) inout(*sbe)
   4.217 -static void decode_slice_mb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){
   4.218 +typedef struct{
   4.219 +    MBRecContext *d;
   4.220 +    SliceBufferEntry *sbe;
   4.221 +    H264Context *h;
   4.222 +} decode_slice_mb_taskArgs;
   4.223 +
   4.224 +int32 decode_slice_mb_taskArgTypes[3] = {INOUT, INOUT, NONCTLD};
   4.225 +int32 decode_slice_mb_taskArgSizes[3] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
   4.226 +
   4.227 +//#pragma omp task inout(*d) inout(*sbe)
   4.228 +static void decode_slice_mb_task(void *_data, SlaveVP *animatingSlv){
   4.229 +    decode_slice_mb_taskArgs* args = (decode_slice_mb_taskArgs*) _data;
   4.230 +    MBRecContext *d = args->d;
   4.231 +    SliceBufferEntry *sbe = args->sbe;
   4.232 +    H264Context *h = args->h;
   4.233 +    
   4.234      H264Slice *s = &sbe->slice;
   4.235  
   4.236      for (int i=0; i<2; i++){
   4.237 @@ -159,39 +293,99 @@
   4.238          }
   4.239      }
   4.240  
   4.241 -    #pragma omp critical (dpb)
   4.242 +    //#pragma omp critical (dpb)
   4.243 +    VSs__start_critical(animatingSlv,0);
   4.244      get_dpb_entry(h, s);
   4.245 -
   4.246 +    VSs__end_critical(animatingSlv,0);
   4.247 +            
   4.248      if (!h->no_mbd){
   4.249 -        decode_mb_in_slice (h, d, sbe);
   4.250 +        decode_mb_in_slice (h, d, sbe, animatingSlv);
   4.251      }
   4.252  
   4.253      for (int i=0; i<s->release_cnt; i++){
   4.254          for(int j=0; j<h->max_dpb_cnt; j++){
   4.255              if(h->dpb[j].cpn== s->release_ref_cpn[i]){
   4.256 -                #pragma omp critical (dpb)
   4.257 +                //#pragma omp critical (dpb)
   4.258 +                VSs__start_critical(animatingSlv,0);
   4.259                  release_dpb_entry(h, &h->dpb[j], 2);
   4.260 +                VSs__end_critical(animatingSlv,0);
   4.261                  break;
   4.262              }
   4.263          }
   4.264      }
   4.265      s->release_cnt=0;
   4.266 +    VSs__end_task( animatingSlv );
   4.267  }
   4.268  
   4.269 +VSsTaskType decode_slice_mb_taskType = {
   4.270 +    .fn = &decode_slice_mb_task,
   4.271 +    .numCtldArgs = 2,
   4.272 +    .numTotalArgs = 3,
   4.273 +    .sizeOfArgs = sizeof(decode_slice_mb_taskArgs),
   4.274 +    .argTypes = decode_slice_mb_taskArgTypes,
   4.275 +    .argSizes = decode_slice_mb_taskArgSizes};
   4.276 +
   4.277  // for static 3d wave
   4.278  /*-------------------------------------------------------------------------------*/
   4.279 -#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m)
   4.280 -static void decode_3dwave_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml,
   4.281 -SuperMBTask *mur, SuperMBTask *mprev, SuperMBTask *m){
   4.282 +typedef struct{
   4.283 +    MBRecContext *d;
   4.284 +    SliceBufferEntry *sbe;
   4.285 +    SuperMBTask *ml;
   4.286 +    SuperMBTask *mur;
   4.287 +    SuperMBTask *mprev;
   4.288 +    SuperMBTask *m;
   4.289 +    SuperMBContext *smbc;
   4.290 +} decode_3dwave_super_mb_taskArgs;
   4.291 +
   4.292 +int32 decode_3dwave_super_mb_taskArgTypes[7] = {IN, IN, IN, IN, IN, INOUT, NONCTLD};
   4.293 +int32 decode_3dwave_super_mb_taskArgSizes[7] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask),
   4.294 +        sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBContext)};
   4.295 +
   4.296 +//#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m)
   4.297 +static void decode_3dwave_super_mb_task(void *_data, SlaveVP *animatingSlv){
   4.298 +    decode_3dwave_super_mb_taskArgs* args = (decode_3dwave_super_mb_taskArgs*) _data;
   4.299 +    MBRecContext *d = args->d;
   4.300 +    SliceBufferEntry *sbe = args->sbe;
   4.301 +    SuperMBTask *ml = args->ml;
   4.302 +    SuperMBTask *mur = args->mur;
   4.303 +    SuperMBTask *mprev = args->mprev;
   4.304 +    SuperMBTask *m = args->m;
   4.305 +    SuperMBContext *smbc = args->smbc;
   4.306 +    
   4.307      H264Slice *s = &sbe->slice;
   4.308      H264Mb *mbs = sbe->mbs;
   4.309  
   4.310      decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
   4.311 +    VSs__end_task( animatingSlv );
   4.312  }
   4.313  
   4.314 +VSsTaskType decode_3dwave_super_mb_taskType = {
   4.315 +    .fn = &decode_3dwave_super_mb_task,
   4.316 +    .numCtldArgs = 6,
   4.317 +    .numTotalArgs = 7,
   4.318 +    .sizeOfArgs = sizeof(decode_3dwave_super_mb_taskArgs),
   4.319 +    .argTypes = decode_3dwave_super_mb_taskArgTypes,
   4.320 +    .argSizes = decode_3dwave_super_mb_taskArgSizes};
   4.321 +
   4.322  // int init_ref_count=0;
   4.323 -#pragma omp task inout(*d, *sbe, *init)
   4.324 -static void init_ref_list_and_get_dpb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe, int *init){
   4.325 +typedef struct{
   4.326 +    MBRecContext *d;
   4.327 +    SliceBufferEntry *sbe;
   4.328 +    int* init;
   4.329 +    H264Context *h;
   4.330 +} init_ref_list_and_get_dpb_taskArgs;
   4.331 +
   4.332 +int32 init_ref_list_and_get_dpb_taskArgTypes[4] = {INOUT, INOUT, INOUT, NONCTLD};
   4.333 +int32 init_ref_list_and_get_dpb_taskArgSizes[4] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(int), sizeof(H264Context)};
   4.334 +
   4.335 +//#pragma omp task inout(*d, *sbe, *init)
   4.336 +static void init_ref_list_and_get_dpb_task(void *_data, SlaveVP *animatingSlv){
   4.337 +    init_ref_list_and_get_dpb_taskArgs* args = (init_ref_list_and_get_dpb_taskArgs*) _data;
   4.338 +    MBRecContext *d = args->d;
   4.339 +    SliceBufferEntry *sbe = args->sbe;
   4.340 +    int* initp = args->init;
   4.341 +    H264Context *h = args->h;
   4.342 +    
   4.343      H264Slice *s = &sbe->slice;
   4.344      for (int i=0; i<2; i++){
   4.345          for(int j=0; j< s->ref_count[i]; j++){
   4.346 @@ -207,12 +401,23 @@
   4.347          }
   4.348      }
   4.349  
   4.350 -    #pragma omp critical (dpb)
   4.351 +    //#pragma omp critical (dpb)
   4.352 +    VSs__start_critical(animatingSlv,0);
   4.353      get_dpb_entry(h, s);
   4.354 -
   4.355 +    VSs__end_critical(animatingSlv,0);
   4.356 +    
   4.357 +    VSs__end_task( animatingSlv );
   4.358  }
   4.359  
   4.360 -static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc){
   4.361 +VSsTaskType init_ref_list_and_get_dpb_taskType = {
   4.362 +    .fn = &init_ref_list_and_get_dpb_task,
   4.363 +    .numCtldArgs = 3,
   4.364 +    .numTotalArgs = 4,
   4.365 +    .sizeOfArgs = sizeof(init_ref_list_and_get_dpb_taskArgs),
   4.366 +    .argTypes = init_ref_list_and_get_dpb_taskArgTypes,
   4.367 +    .argSizes = init_ref_list_and_get_dpb_taskArgSizes};
   4.368 +
   4.369 +static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SlaveVP* animSlv){
   4.370      int i,j;
   4.371      
   4.372      int smb_3d_height =smbc->nsmb_3dheight;
   4.373 @@ -229,9 +434,24 @@
   4.374              sml  = sm - ((i > 0) ? 1: 0);
   4.375              smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   4.376              smprev = smbs_prev + (j + smb_diff_prev+1)*smb_width -1;
   4.377 -            decode_3dwave_super_mb_task(d, sbe, smbc, sml, smur, smprev, sm);
   4.378 +            decode_3dwave_super_mb_taskArgs decode_3dwave_super_mb_task_args;
   4.379 +            decode_3dwave_super_mb_task_args.d = d;
   4.380 +            decode_3dwave_super_mb_task_args.sbe = sbe;
   4.381 +            decode_3dwave_super_mb_task_args.smbc = smbc;
   4.382 +            decode_3dwave_super_mb_task_args.ml = sml;
   4.383 +            decode_3dwave_super_mb_task_args.mur = smur;
   4.384 +            decode_3dwave_super_mb_task_args.mprev = smprev;
   4.385 +            decode_3dwave_super_mb_task_args.m = sm;
   4.386 +            VSs__submit_task(&decode_3dwave_super_mb_taskType, &decode_3dwave_super_mb_task_args, animSlv);
   4.387          }
   4.388 -        draw_edges_task(d, sbe, smbc, sm, j);
   4.389 +        draw_edges_taskArgs draw_edges_task_args;
   4.390 +        draw_edges_task_args.d = d;
   4.391 +        draw_edges_task_args.sbe = sbe;
   4.392 +        draw_edges_task_args.sm = sm;
   4.393 +        draw_edges_task_args.smbc = smbc;
   4.394 +        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
   4.395 +        *(draw_edges_task_args.line) = j;
   4.396 +        VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv);
   4.397      }
   4.398  
   4.399      for(; j< smb_height; j++){
   4.400 @@ -239,21 +459,57 @@
   4.401              sm = smbs + j*smb_width + i;
   4.402              sml  = sm - ((i > 0) ? 1: 0);
   4.403              smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   4.404 -            decode_super_mb_task(d, sbe, smbc, sml, smur, sm);
   4.405 +            decode_super_mb_taskArgs decode_super_mb_task_args;
   4.406 +            decode_super_mb_task_args.d = d;
   4.407 +            decode_super_mb_task_args.sbe = sbe;
   4.408 +            decode_super_mb_task_args.smbc = smbc;
   4.409 +            decode_super_mb_task_args.ml = sml;
   4.410 +            decode_super_mb_task_args.mur = smur;
   4.411 +            decode_super_mb_task_args.m = sm;
   4.412 +            VSs__submit_task(&decode_super_mb_taskType, &decode_super_mb_task_args, animSlv);
   4.413          }
   4.414 -        draw_edges_task(d, sbe, smbc, sm, j);
   4.415 +        draw_edges_taskArgs draw_edges_task_args;
   4.416 +        draw_edges_task_args.d = d;
   4.417 +        draw_edges_task_args.sbe = sbe;
   4.418 +        draw_edges_task_args.sm = sm;
   4.419 +        draw_edges_task_args.smbc = smbc;
   4.420 +        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
   4.421 +        *(draw_edges_task_args.line) = j;
   4.422 +        VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv);
   4.423      }
   4.424      return sm;
   4.425  }
   4.426  
   4.427 -#pragma omp task inout(*d, *sbe, *release) input (*lastsmb)
   4.428 -static void release_ref_list_task(H264Context *h, SuperMBContext *smbc, MBRecContext *d, SliceBufferEntry *sbe, SuperMBTask *lastsmb, int *release){
   4.429 +typedef struct{
   4.430 +    MBRecContext *d;
   4.431 +    SliceBufferEntry *sbe;
   4.432 +    SuperMBTask *lastsmb;
   4.433 +    int* release;
   4.434 +    H264Context *h;
   4.435 +    SuperMBContext *smbc;
   4.436 +} release_ref_list_taskArgs;
   4.437 +
   4.438 +int32 release_ref_list_taskArgTypes[6] = {INOUT, INOUT, IN, INOUT, NONCTLD, NONCTLD};
   4.439 +int32 release_ref_list_taskArgSizes[6] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(int), sizeof(H264Context), sizeof(SuperMBContext)};
   4.440 +
   4.441 +//#pragma omp task inout(*d, *sbe, *release) input (*lastsmb)
   4.442 +static void release_ref_list_task(void *_data, SlaveVP *animatingSlv){
   4.443 +    release_ref_list_taskArgs* args = (release_ref_list_taskArgs*) _data;
   4.444 +    MBRecContext *d = args->d;
   4.445 +    SliceBufferEntry *sbe = args->sbe;
   4.446 +    SuperMBTask *lastsmb = args->lastsmb;
   4.447 +    int* releasep = args->release;
   4.448 +    H264Context *h = args->h;
   4.449 +    SuperMBContext *smbc = args->smbc;
   4.450 +    
   4.451      H264Slice *s = &sbe->slice;
   4.452      for (int i=0; i<s->release_cnt; i++){
   4.453          for(int j=0; j<h->max_dpb_cnt; j++){
   4.454              if(h->dpb[j].cpn== s->release_ref_cpn[i]){
   4.455 -                #pragma omp critical (dpb)
   4.456 +                //#pragma omp critical (dpb)
   4.457 +                VSs__start_critical(animatingSlv,0);
   4.458                  release_dpb_entry(h, &h->dpb[j], 2);
   4.459 +                VSs__end_critical(animatingSlv,0);
   4.460                  break;
   4.461              }
   4.462          }
   4.463 @@ -262,29 +518,67 @@
   4.464  
   4.465      release_smbc(h, smbc);
   4.466      
   4.467 +    VSs__end_task( animatingSlv );
   4.468  }
   4.469  
   4.470 +VSsTaskType release_ref_list_taskType = {
   4.471 +    .fn = &release_ref_list_task,
   4.472 +    .numCtldArgs = 4,
   4.473 +    .numTotalArgs = 6,
   4.474 +    .sizeOfArgs = sizeof(release_ref_list_taskArgs),
   4.475 +    .argTypes = release_ref_list_taskArgTypes,
   4.476 +    .argSizes = release_ref_list_taskArgSizes};
   4.477 +
   4.478  // static void decode_mb_static_3dwave(H264Context *h, int mb_height, int mb_width, MBRecContext *d, H264Slice *s, H264Mb *mbs, SuperMBTask *smbs, SuperMBTask *smbs_prev){
   4.479  //
   4.480  // }
   4.481  /*-------------------------------------------------------------------------------*/
   4.482  //end for static 3d wave
   4.483  
   4.484 -#pragma omp task inout (*oc) input(*sbe)
   4.485 -static void output_task(H264Context *h, OutputContext *oc, SliceBufferEntry *sbe){
   4.486 +typedef struct{
   4.487 +    OutputContext *oc;
   4.488 +    SliceBufferEntry *sbe; 
   4.489 +    H264Context *h;
   4.490 +} output_taskArgs;
   4.491 +
   4.492 +int32 output_taskArgTypes[3] = {INOUT, IN, NONCTLD};
   4.493 +int32 output_taskArgSizes[3] = {sizeof(OutputContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
   4.494 +
   4.495 +//#pragma omp task inout (*oc) input(*sbe)
   4.496 +static void output_task(void *_data, SlaveVP *animatingSlv){
   4.497 +    output_taskArgs* args = (output_taskArgs*) _data;
   4.498 +    OutputContext *oc = args->oc;
   4.499 +    SliceBufferEntry *sbe = args->sbe; 
   4.500 +    H264Context *h = args->h;
   4.501 +    
   4.502      DecodedPicture* out =output_frame(h, oc, sbe->slice.curr_pic, h->ofile, h->frame_width, h->frame_height);
   4.503      if (out){
   4.504 -        #pragma omp critical (dpb)
   4.505 +        //#pragma omp critical (dpb)
   4.506 +        VSs__start_critical(animatingSlv,0);
   4.507          release_dpb_entry(h, out, 1);
   4.508 +        VSs__end_critical(animatingSlv,0);
   4.509      }
   4.510      print_report(oc->frame_number, oc->video_size, 0, h->verbose);
   4.511 +    
   4.512 +    VSs__end_task( animatingSlv );
   4.513  }
   4.514  
   4.515 +VSsTaskType output_taskType = {
   4.516 +    .fn = &output_task,
   4.517 +    .numCtldArgs = 2,
   4.518 +    .numTotalArgs = 3,
   4.519 +    .sizeOfArgs = sizeof(output_taskArgs),
   4.520 +    .argTypes = output_taskArgTypes,
   4.521 +    .argSizes = output_taskArgSizes};
   4.522 +
   4.523  /*
   4.524  * The following code is the main loop of the file converter
   4.525  */
   4.526  //Put VMS entry point here
   4.527 -int h264_decode_ompss( H264Context *h) {
   4.528 +
   4.529 +void h264_decode_ompss( void *_params, SlaveVP *animSlv) {
   4.530 +    H264Context* h = (H264Context*) _params;
   4.531 +    
   4.532      const int bufs = h->pipe_bufs;
   4.533  
   4.534      ParserContext *pc;
   4.535 @@ -304,9 +598,11 @@
   4.536          pthread_create(&sdl_thr, NULL, sdl_thread, h);
   4.537      }
   4.538  #endif
   4.539 +    /*sbe= VMS_WL__malloc(sizeof(SliceBufferEntry) * bufs);
   4.540 +    if (sbe)
   4.541 +        memset(sbe, 0, sizeof(SliceBufferEntry) * bufs);*/
   4.542      sbe= av_mallocz(sizeof(SliceBufferEntry) * bufs);
   4.543  
   4.544 -
   4.545      pc = get_parse_context(h->ifile);
   4.546      nc = get_nal_context(h->width, h->height);
   4.547  
   4.548 @@ -325,52 +621,135 @@
   4.549      if (h->static_3d && bufs < h->num_frames ){
   4.550          int num_pre_ed =0;
   4.551          for (num_pre_ed=0; num_pre_ed< bufs -1 && !pc->final_frame; num_pre_ed++){
   4.552 -            parse_task( h, pc, nc, &sbe[k%bufs] );
   4.553 -            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   4.554 -            #pragma omp taskwait on(*pc)
   4.555 +            parse_taskArgs parse_task_args;
   4.556 +            parse_task_args.h = h;
   4.557 +            parse_task_args.pc = pc;
   4.558 +            parse_task_args.nc = nc;
   4.559 +            parse_task_args.sbe = &sbe[k%bufs];
   4.560 +            VSs__submit_task(&parse_taskType, &parse_task_args, animSlv);
   4.561 +            
   4.562 +            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
   4.563 +            decode_slice_entropy_task_args.h = h;
   4.564 +            decode_slice_entropy_task_args.ec = ec[k%bufs];
   4.565 +            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
   4.566 +            VSs__submit_task(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, animSlv);        
   4.567 +            //#pragma omp taskwait on(*pc)
   4.568 +            VSs__taskwait_on(animSlv,pc);
   4.569              k++;
   4.570          }
   4.571  
   4.572          while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
   4.573 -            parse_task( h, pc, nc, &sbe[k%bufs] );
   4.574 -            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   4.575 +            parse_taskArgs parse_task_args;
   4.576 +            parse_task_args.h = h;
   4.577 +            parse_task_args.pc = pc;
   4.578 +            parse_task_args.nc = nc;
   4.579 +            parse_task_args.sbe = &sbe[k%bufs];
   4.580 +            VSs__submit_task(&parse_taskType, &parse_task_args, animSlv);
   4.581 +            
   4.582 +            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
   4.583 +            decode_slice_entropy_task_args.h = h;
   4.584 +            decode_slice_entropy_task_args.ec = ec[k%bufs];
   4.585 +            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
   4.586 +            VSs__submit_task(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, animSlv); 
   4.587  
   4.588              k++;
   4.589 +            
   4.590 +            init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args;
   4.591 +            init_ref_list_and_get_dpb_task_args.h = h;
   4.592 +            init_ref_list_and_get_dpb_task_args.d = rc[k%2];
   4.593 +            init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs];
   4.594 +            init_ref_list_and_get_dpb_task_args.init = &init;
   4.595 +            VSs__submit_task(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, animSlv); 
   4.596  
   4.597 -            init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init);
   4.598              smbc = acquire_smbc(h);
   4.599 -            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc);
   4.600 -            release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release);
   4.601 +            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, animSlv);
   4.602 +            release_ref_list_taskArgs release_ref_list_task_args;
   4.603 +            release_ref_list_task_args.h = h;
   4.604 +            release_ref_list_task_args.smbc = smbc;
   4.605 +            release_ref_list_task_args.d = rc[k%2];
   4.606 +            release_ref_list_task_args.sbe = &sbe[k%bufs];
   4.607 +            release_ref_list_task_args.lastsmb = lastsmb;
   4.608 +            release_ref_list_task_args.release = &release;
   4.609 +            VSs__submit_task(&release_ref_list_taskType, &release_ref_list_task_args, animSlv);
   4.610  
   4.611 -            output_task (h, oc, &sbe[k%bufs]);
   4.612 -            #pragma omp taskwait on(*pc)
   4.613 +            output_taskArgs output_task_args;
   4.614 +            output_task_args.h = h;
   4.615 +            output_task_args.oc = oc;
   4.616 +            output_task_args.sbe = &sbe[k%bufs];
   4.617 +            VSs__submit_task(&output_taskType, &output_task_args, animSlv);
   4.618 +            //#pragma omp taskwait on(*pc)
   4.619 +            VSs__taskwait_on(animSlv,pc);
   4.620          }
   4.621  
   4.622          for (int i=0; i< num_pre_ed; i++){
   4.623              k++;
   4.624 -            init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init);
   4.625 +            init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args;
   4.626 +            init_ref_list_and_get_dpb_task_args.h = h;
   4.627 +            init_ref_list_and_get_dpb_task_args.d = rc[k%2];
   4.628 +            init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs];
   4.629 +            init_ref_list_and_get_dpb_task_args.init = &init;
   4.630 +            VSs__submit_task(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, animSlv); 
   4.631              smbc = acquire_smbc(h);
   4.632 -            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc);
   4.633 -            release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release);
   4.634 +            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, animSlv);
   4.635 +            release_ref_list_taskArgs release_ref_list_task_args;
   4.636 +            release_ref_list_task_args.h = h;
   4.637 +            release_ref_list_task_args.smbc = smbc;
   4.638 +            release_ref_list_task_args.d = rc[k%2];
   4.639 +            release_ref_list_task_args.sbe = &sbe[k%bufs];
   4.640 +            release_ref_list_task_args.lastsmb = lastsmb;
   4.641 +            release_ref_list_task_args.release = &release;
   4.642 +            VSs__submit_task(&release_ref_list_taskType, &release_ref_list_task_args, animSlv); 
   4.643  
   4.644 -            output_task (h, oc, &sbe[k%bufs]);
   4.645 +            output_taskArgs output_task_args;
   4.646 +            output_task_args.h = h;
   4.647 +            output_task_args.oc = oc;
   4.648 +            output_task_args.sbe = &sbe[k%bufs];
   4.649 +            VSs__submit_task(&output_taskType, &output_task_args, animSlv);
   4.650          }
   4.651  
   4.652      } else {
   4.653          while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
   4.654 -            parse_task( h, pc, nc, &sbe[k%bufs] );
   4.655 +            int32* taskID;
   4.656 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   4.657 +            taskID[1] = frames*10+1;
   4.658 +            parse_taskArgs parse_task_args;
   4.659 +            parse_task_args.h = h;
   4.660 +            parse_task_args.pc = pc;
   4.661 +            parse_task_args.nc = nc;
   4.662 +            parse_task_args.sbe = &sbe[k%bufs];
   4.663 +            VSs__submit_task_with_ID(&parse_taskType, &parse_task_args, taskID, animSlv);
   4.664  
   4.665 -            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   4.666 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   4.667 +            taskID[1] = frames*10+2;
   4.668 +            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
   4.669 +            decode_slice_entropy_task_args.h = h;
   4.670 +            decode_slice_entropy_task_args.ec = ec[k%bufs];
   4.671 +            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
   4.672 +            VSs__submit_task_with_ID(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args,taskID, animSlv);
   4.673  
   4.674 -            decode_slice_mb_task(h, rc[0], &sbe[k%bufs]);
   4.675 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   4.676 +            taskID[1] = frames*10+3;
   4.677 +            decode_slice_mb_taskArgs decode_slice_mb_task_args;
   4.678 +            decode_slice_mb_task_args.h = h;
   4.679 +            decode_slice_mb_task_args.d = rc[0];
   4.680 +            decode_slice_mb_task_args.sbe = &sbe[k%bufs];
   4.681 +            VSs__submit_task_with_ID(&decode_slice_mb_taskType, &decode_slice_mb_task_args,taskID, animSlv); 
   4.682  
   4.683 -            output_task (h, oc, &sbe[k%bufs]);
   4.684 -            #pragma omp taskwait on(*pc)
   4.685 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   4.686 +            taskID[1] = frames*10+4;
   4.687 +            output_taskArgs output_task_args;
   4.688 +            output_task_args.h = h;
   4.689 +            output_task_args.oc = oc;
   4.690 +            output_task_args.sbe = &sbe[k%bufs];
   4.691 +            VSs__submit_task_with_ID(&output_taskType, &output_task_args,taskID, animSlv);
   4.692 +            //#pragma omp taskwait on(*pc)
   4.693 +            VSs__taskwait_on(animSlv,pc);
   4.694              k++;
   4.695          }
   4.696      }
   4.697 -    #pragma omp taskwait
   4.698 -
   4.699 +    //#pragma omp taskwait
   4.700 +    VSs__taskwait(animSlv);
   4.701 +    
   4.702      while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ;
   4.703  
   4.704      print_report(oc->frame_number, oc->video_size, 1, h->verbose);
   4.705 @@ -397,5 +776,5 @@
   4.706      }
   4.707  #endif
   4.708  
   4.709 -    return 0;
   4.710 +    VSs__end_thread( animSlv );
   4.711  }