diff libavcodec/h264_ompss.c @ 3:0b056460c67d

changed code to use VSs
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Mon, 29 Oct 2012 16:44:27 +0100
parents 897f711a7157
children 96e628866d41
line diff
     1.1 --- a/libavcodec/h264_ompss.c	Tue Sep 25 15:55:33 2012 +0200
     1.2 +++ b/libavcodec/h264_ompss.c	Mon Oct 29 16:44:27 2012 +0100
     1.3 @@ -27,9 +27,25 @@
     1.4  #include "h264_misc.h"
     1.5  // #undef NDEBUG
     1.6  #include <assert.h>
     1.7 +#include "VSs_impl/VSs.h"
     1.8  
     1.9 -#pragma omp task inout(*pc, *nc) output(*sbe)
    1.10 -static void parse_task(H264Context *h, ParserContext *pc, NalContext *nc, SliceBufferEntry *sbe){
    1.11 +typedef struct{
    1.12 +    ParserContext *pc;
    1.13 +    NalContext *nc;
    1.14 +    SliceBufferEntry *sbe; 
    1.15 +    H264Context *h;
    1.16 +} parse_taskArgs;
    1.17 +
    1.18 +int32 parse_taskArgTypes[4] = {INOUT, INOUT, OUTPUT, NONCTLD};
    1.19 +int32 parse_taskArgSizes[4] = {sizeof(ParserContext), sizeof(NalContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
    1.20 +
    1.21 +//#pragma omp task inout(*pc, *nc) output(*sbe)
    1.22 +static void parse_task(void *_data, SlaveVP *animatingSlv){
    1.23 +    parse_taskArgs* args = (parse_taskArgs*) _data;
    1.24 +    ParserContext *pc = args->pc;
    1.25 +    NalContext *nc = args->nc;
    1.26 +    SliceBufferEntry *sbe = args->sbe; 
    1.27 +    H264Context *h = args->h;
    1.28      H264Slice *s;
    1.29  
    1.30      if (!sbe->initialized){
    1.31 @@ -41,10 +57,34 @@
    1.32      s = &sbe->slice;
    1.33  
    1.34      decode_nal_units(nc, s, &sbe->gb);
    1.35 +    
    1.36 +    VSs__end_task( animatingSlv );
    1.37  }
    1.38  
    1.39 -#pragma omp task inout(*ec) inout(*sbe)
    1.40 -static void decode_slice_entropy_task(H264Context *h, EntropyContext *ec, SliceBufferEntry *sbe){
    1.41 +VSsTaskType parse_taskType = {
    1.42 +    .fn = &parse_task,
    1.43 +    .numCtldArgs = 3,
    1.44 +    .numTotalArgs = 4,
    1.45 +    .sizeOfArgs = sizeof(parse_taskArgs),
    1.46 +    .argTypes = parse_taskArgTypes,
    1.47 +    .argSizes = parse_taskArgSizes};
    1.48 +
    1.49 +
    1.50 +typedef struct{
    1.51 +    EntropyContext *ec; 
    1.52 +    SliceBufferEntry *sbe; 
    1.53 +    H264Context *h;
    1.54 +} decode_slice_entropy_taskArgs;
    1.55 +
    1.56 +int32 decode_slice_entropy_taskArgTypes[3] = {INOUT, INOUT, NONCTLD};
    1.57 +int32 decode_slice_entropy_taskArgSizes[3] = {sizeof(EntropyContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
    1.58 +
    1.59 +//#pragma omp task inout(*ec) inout(*sbe)
    1.60 +static void decode_slice_entropy_task(void *_data, SlaveVP *animatingSlv){
    1.61 +    decode_slice_entropy_taskArgs* args = (decode_slice_entropy_taskArgs*) _data;
    1.62 +    EntropyContext *ec = args->ec; 
    1.63 +    SliceBufferEntry *sbe = args->sbe; 
    1.64 +    H264Context *h = args->h;
    1.65      int i,j;
    1.66      H264Slice *s = &sbe->slice;
    1.67      GetBitContext *gb = &sbe->gb;
    1.68 @@ -54,7 +94,7 @@
    1.69  
    1.70      if( !s->pps.cabac ){
    1.71          av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
    1.72 -        return ;
    1.73 +        VSs__end_task( animatingSlv );
    1.74      }
    1.75  
    1.76      init_dequant_tables(s, ec);
    1.77 @@ -84,12 +124,22 @@
    1.78              (void) eos;
    1.79              if( ret < 0 || c->bytestream > c->bytestream_end + 2) {
    1.80                  av_log(AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", m->mb_x, m->mb_y, c->bytestream_end - c->bytestream);
    1.81 -                return ;
    1.82 +                VSs__end_task( animatingSlv );
    1.83              }
    1.84          }
    1.85      }
    1.86 +    VSs__end_task( animatingSlv );
    1.87  }
    1.88  
    1.89 +VSsTaskType decode_slice_entropy_taskType = {
    1.90 +    .fn = &decode_slice_entropy_task,
    1.91 +    .numCtldArgs = 2,
    1.92 +    .numTotalArgs = 3,
    1.93 +    .sizeOfArgs = sizeof(decode_slice_entropy_taskArgs),
    1.94 +    .argTypes = decode_slice_entropy_taskArgTypes,
    1.95 +    .argSizes = decode_slice_entropy_taskArgSizes};
    1.96 +
    1.97 +
    1.98  static void decode_super_mb_block(MBRecContext *d, H264Slice *s, SuperMBContext *smbc, H264Mb *mbs, int smb_x, int smb_y){
    1.99      MBRecState mrs;
   1.100  //     memset(&mrs, 0, sizeof(MBRecState));
   1.101 @@ -104,22 +154,77 @@
   1.102      }
   1.103  }
   1.104  
   1.105 -#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m)
   1.106 -static void decode_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml,
   1.107 -SuperMBTask *mur, SuperMBTask *m){
   1.108 +typedef struct{
   1.109 +    MBRecContext *d;
   1.110 +    SliceBufferEntry *sbe;
   1.111 +    SuperMBTask *ml;
   1.112 +    SuperMBTask *mur;
   1.113 +    SuperMBTask *m;
   1.114 +    SuperMBContext *smbc;
   1.115 +} decode_super_mb_taskArgs;
   1.116 +
   1.117 +int32 decode_super_mb_taskArgTypes[6] = {IN, IN, IN, IN, INOUT, NONCTLD};
   1.118 +int32 decode_super_mb_taskArgSizes[6] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBContext)};
   1.119 +
   1.120 +//#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m)
   1.121 +static void decode_super_mb_task(void *_data, SlaveVP *animatingSlv){
   1.122 +    decode_super_mb_taskArgs* args = (decode_super_mb_taskArgs*) _data;
   1.123 +    MBRecContext *d = args->d;
   1.124 +    SliceBufferEntry *sbe = args->sbe;
   1.125 +    SuperMBTask *ml = args->ml;
   1.126 +    SuperMBTask *mur = args->mur;
   1.127 +    SuperMBTask *m = args->m;
   1.128 +    SuperMBContext *smbc = args->smbc;
   1.129      H264Slice *s = &sbe->slice;
   1.130      H264Mb *mbs = sbe->mbs;
   1.131      decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
   1.132 +    VSs__end_task( animatingSlv );
   1.133  }
   1.134  
   1.135 -#pragma omp task input(*d, *sbe) inout(*sm)
   1.136 -static void draw_edges_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *sm, int line){
   1.137 +VSsTaskType decode_super_mb_taskType = {
   1.138 +    .fn = &decode_super_mb_task,
   1.139 +    .numCtldArgs = 5,
   1.140 +    .numTotalArgs = 6,
   1.141 +    .sizeOfArgs = sizeof(decode_super_mb_taskArgs),
   1.142 +    .argTypes = decode_super_mb_taskArgTypes,
   1.143 +    .argSizes = decode_super_mb_taskArgSizes};
   1.144 +
   1.145 +
   1.146 +
   1.147 +typedef struct{
   1.148 +    MBRecContext *d;
   1.149 +    SliceBufferEntry *sbe;
   1.150 +    SuperMBTask *sm;
   1.151 +    SuperMBContext *smbc;
   1.152 +    int* line;
   1.153 +} draw_edges_taskArgs;
   1.154 +
   1.155 +int32 draw_edges_taskArgTypes[5] = {IN, IN, INOUT, NONCTLD, NONCTLD};
   1.156 +int32 draw_edges_taskArgSizes[5] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask),sizeof(SuperMBContext),sizeof(int)};
   1.157 +
   1.158 +//#pragma omp task input(*d, *sbe) inout(*sm)
   1.159 +static void draw_edges_task(void *_data, SlaveVP *animatingSlv){
   1.160 +    draw_edges_taskArgs* args = (draw_edges_taskArgs*) _data;
   1.161 +    MBRecContext *d = args->d;
   1.162 +    SliceBufferEntry *sbe = args->sbe;
   1.163 +    SuperMBTask *sm = args->sm;
   1.164 +    SuperMBContext *smbc = args->smbc;
   1.165 +    int line = *(args->line);
   1.166      H264Slice *s = &sbe->slice;
   1.167      for (int i=line*smbc->smb_height; i< (line+1)*smbc->smb_height && i< d->mb_height; i++)
   1.168          draw_edges(d, s, i);
   1.169 +    VSs__end_task( animatingSlv );
   1.170  }
   1.171 +VSsTaskType draw_edges_taskType = {
   1.172 +    .fn = &draw_edges_task,
   1.173 +    .numCtldArgs = 3,
   1.174 +    .numTotalArgs = 5,
   1.175 +    .sizeOfArgs = sizeof(draw_edges_taskArgs),
   1.176 +    .argTypes = draw_edges_taskArgTypes,
   1.177 +    .argSizes = draw_edges_taskArgSizes};
   1.178  
   1.179 -static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){
   1.180 +
   1.181 +static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe, SlaveVP* animSlv){
   1.182      int i,j;
   1.183  
   1.184      SuperMBContext *smbc = acquire_smbc(h);
   1.185 @@ -132,17 +237,46 @@
   1.186              sm = smbs + j*smb_width + i;
   1.187              sml  = sm - ((i > 0) ? 1: 0);
   1.188              smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   1.189 -            decode_super_mb_task(d, sbe, smbc, sml, smur, sm);
   1.190 +            decode_super_mb_taskArgs decode_super_mb_task_args;
   1.191 +            decode_super_mb_task_args.d = d;
   1.192 +            decode_super_mb_task_args.sbe = sbe;
   1.193 +            decode_super_mb_task_args.smbc = smbc;
   1.194 +            decode_super_mb_task_args.ml = sml;
   1.195 +            decode_super_mb_task_args.mur = smur;
   1.196 +            decode_super_mb_task_args.m = sm;
   1.197 +            VSs__submit_task(&decode_super_mb_taskType, &decode_super_mb_task_args, animSlv);
   1.198          }
   1.199 -        draw_edges_task(d, sbe, smbc, sm, j);
   1.200 +        draw_edges_taskArgs draw_edges_task_args;
   1.201 +        draw_edges_task_args.d = d;
   1.202 +        draw_edges_task_args.sbe = sbe;
   1.203 +        draw_edges_task_args.sm = sm;
   1.204 +        draw_edges_task_args.smbc = smbc;
   1.205 +        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
   1.206 +        *(draw_edges_task_args.line) = j;
   1.207 +        VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv);
   1.208      }
   1.209 -    #pragma omp taskwait on(*sm)
   1.210 +    VSs__taskwait_on(animSlv,sm);
   1.211 +    //#pragma omp taskwait on(*sm)
   1.212  
   1.213      release_smbc(h, smbc);
   1.214  }
   1.215  
   1.216 -#pragma omp task inout(*d) inout(*sbe)
   1.217 -static void decode_slice_mb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){
   1.218 +typedef struct{
   1.219 +    MBRecContext *d;
   1.220 +    SliceBufferEntry *sbe;
   1.221 +    H264Context *h;
   1.222 +} decode_slice_mb_taskArgs;
   1.223 +
   1.224 +int32 decode_slice_mb_taskArgTypes[3] = {INOUT, INOUT, NONCTLD};
   1.225 +int32 decode_slice_mb_taskArgSizes[3] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
   1.226 +
   1.227 +//#pragma omp task inout(*d) inout(*sbe)
   1.228 +static void decode_slice_mb_task(void *_data, SlaveVP *animatingSlv){
   1.229 +    decode_slice_mb_taskArgs* args = (decode_slice_mb_taskArgs*) _data;
   1.230 +    MBRecContext *d = args->d;
   1.231 +    SliceBufferEntry *sbe = args->sbe;
   1.232 +    H264Context *h = args->h;
   1.233 +    
   1.234      H264Slice *s = &sbe->slice;
   1.235  
   1.236      for (int i=0; i<2; i++){
   1.237 @@ -159,39 +293,99 @@
   1.238          }
   1.239      }
   1.240  
   1.241 -    #pragma omp critical (dpb)
   1.242 +    //#pragma omp critical (dpb)
   1.243 +    VSs__start_critical(animatingSlv,0);
   1.244      get_dpb_entry(h, s);
   1.245 -
   1.246 +    VSs__end_critical(animatingSlv,0);
   1.247 +            
   1.248      if (!h->no_mbd){
   1.249 -        decode_mb_in_slice (h, d, sbe);
   1.250 +        decode_mb_in_slice (h, d, sbe, animatingSlv);
   1.251      }
   1.252  
   1.253      for (int i=0; i<s->release_cnt; i++){
   1.254          for(int j=0; j<h->max_dpb_cnt; j++){
   1.255              if(h->dpb[j].cpn== s->release_ref_cpn[i]){
   1.256 -                #pragma omp critical (dpb)
   1.257 +                //#pragma omp critical (dpb)
   1.258 +                VSs__start_critical(animatingSlv,0);
   1.259                  release_dpb_entry(h, &h->dpb[j], 2);
   1.260 +                VSs__end_critical(animatingSlv,0);
   1.261                  break;
   1.262              }
   1.263          }
   1.264      }
   1.265      s->release_cnt=0;
   1.266 +    VSs__end_task( animatingSlv );
   1.267  }
   1.268  
   1.269 +VSsTaskType decode_slice_mb_taskType = {
   1.270 +    .fn = &decode_slice_mb_task,
   1.271 +    .numCtldArgs = 2,
   1.272 +    .numTotalArgs = 3,
   1.273 +    .sizeOfArgs = sizeof(decode_slice_mb_taskArgs),
   1.274 +    .argTypes = decode_slice_mb_taskArgTypes,
   1.275 +    .argSizes = decode_slice_mb_taskArgSizes};
   1.276 +
   1.277  // for static 3d wave
   1.278  /*-------------------------------------------------------------------------------*/
   1.279 -#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m)
   1.280 -static void decode_3dwave_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml,
   1.281 -SuperMBTask *mur, SuperMBTask *mprev, SuperMBTask *m){
   1.282 +typedef struct{
   1.283 +    MBRecContext *d;
   1.284 +    SliceBufferEntry *sbe;
   1.285 +    SuperMBTask *ml;
   1.286 +    SuperMBTask *mur;
   1.287 +    SuperMBTask *mprev;
   1.288 +    SuperMBTask *m;
   1.289 +    SuperMBContext *smbc;
   1.290 +} decode_3dwave_super_mb_taskArgs;
   1.291 +
   1.292 +int32 decode_3dwave_super_mb_taskArgTypes[7] = {IN, IN, IN, IN, IN, INOUT, NONCTLD};
   1.293 +int32 decode_3dwave_super_mb_taskArgSizes[7] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask),
   1.294 +        sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBContext)};
   1.295 +
   1.296 +//#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m)
   1.297 +static void decode_3dwave_super_mb_task(void *_data, SlaveVP *animatingSlv){
   1.298 +    decode_3dwave_super_mb_taskArgs* args = (decode_3dwave_super_mb_taskArgs*) _data;
   1.299 +    MBRecContext *d = args->d;
   1.300 +    SliceBufferEntry *sbe = args->sbe;
   1.301 +    SuperMBTask *ml = args->ml;
   1.302 +    SuperMBTask *mur = args->mur;
   1.303 +    SuperMBTask *mprev = args->mprev;
   1.304 +    SuperMBTask *m = args->m;
   1.305 +    SuperMBContext *smbc = args->smbc;
   1.306 +    
   1.307      H264Slice *s = &sbe->slice;
   1.308      H264Mb *mbs = sbe->mbs;
   1.309  
   1.310      decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y);
   1.311 +    VSs__end_task( animatingSlv );
   1.312  }
   1.313  
   1.314 +VSsTaskType decode_3dwave_super_mb_taskType = {
   1.315 +    .fn = &decode_3dwave_super_mb_task,
   1.316 +    .numCtldArgs = 6,
   1.317 +    .numTotalArgs = 7,
   1.318 +    .sizeOfArgs = sizeof(decode_3dwave_super_mb_taskArgs),
   1.319 +    .argTypes = decode_3dwave_super_mb_taskArgTypes,
   1.320 +    .argSizes = decode_3dwave_super_mb_taskArgSizes};
   1.321 +
   1.322  // int init_ref_count=0;
   1.323 -#pragma omp task inout(*d, *sbe, *init)
   1.324 -static void init_ref_list_and_get_dpb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe, int *init){
   1.325 +typedef struct{
   1.326 +    MBRecContext *d;
   1.327 +    SliceBufferEntry *sbe;
   1.328 +    int* init;
   1.329 +    H264Context *h;
   1.330 +} init_ref_list_and_get_dpb_taskArgs;
   1.331 +
   1.332 +int32 init_ref_list_and_get_dpb_taskArgTypes[4] = {INOUT, INOUT, INOUT, NONCTLD};
   1.333 +int32 init_ref_list_and_get_dpb_taskArgSizes[4] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(int), sizeof(H264Context)};
   1.334 +
   1.335 +//#pragma omp task inout(*d, *sbe, *init)
   1.336 +static void init_ref_list_and_get_dpb_task(void *_data, SlaveVP *animatingSlv){
   1.337 +    init_ref_list_and_get_dpb_taskArgs* args = (init_ref_list_and_get_dpb_taskArgs*) _data;
   1.338 +    MBRecContext *d = args->d;
   1.339 +    SliceBufferEntry *sbe = args->sbe;
   1.340 +    int* initp = args->init;
   1.341 +    H264Context *h = args->h;
   1.342 +    
   1.343      H264Slice *s = &sbe->slice;
   1.344      for (int i=0; i<2; i++){
   1.345          for(int j=0; j< s->ref_count[i]; j++){
   1.346 @@ -207,12 +401,23 @@
   1.347          }
   1.348      }
   1.349  
   1.350 -    #pragma omp critical (dpb)
   1.351 +    //#pragma omp critical (dpb)
   1.352 +    VSs__start_critical(animatingSlv,0);
   1.353      get_dpb_entry(h, s);
   1.354 -
   1.355 +    VSs__end_critical(animatingSlv,0);
   1.356 +    
   1.357 +    VSs__end_task( animatingSlv );
   1.358  }
   1.359  
   1.360 -static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc){
   1.361 +VSsTaskType init_ref_list_and_get_dpb_taskType = {
   1.362 +    .fn = &init_ref_list_and_get_dpb_task,
   1.363 +    .numCtldArgs = 3,
   1.364 +    .numTotalArgs = 4,
   1.365 +    .sizeOfArgs = sizeof(init_ref_list_and_get_dpb_taskArgs),
   1.366 +    .argTypes = init_ref_list_and_get_dpb_taskArgTypes,
   1.367 +    .argSizes = init_ref_list_and_get_dpb_taskArgSizes};
   1.368 +
   1.369 +static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SlaveVP* animSlv){
   1.370      int i,j;
   1.371      
   1.372      int smb_3d_height =smbc->nsmb_3dheight;
   1.373 @@ -229,9 +434,24 @@
   1.374              sml  = sm - ((i > 0) ? 1: 0);
   1.375              smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   1.376              smprev = smbs_prev + (j + smb_diff_prev+1)*smb_width -1;
   1.377 -            decode_3dwave_super_mb_task(d, sbe, smbc, sml, smur, smprev, sm);
   1.378 +            decode_3dwave_super_mb_taskArgs decode_3dwave_super_mb_task_args;
   1.379 +            decode_3dwave_super_mb_task_args.d = d;
   1.380 +            decode_3dwave_super_mb_task_args.sbe = sbe;
   1.381 +            decode_3dwave_super_mb_task_args.smbc = smbc;
   1.382 +            decode_3dwave_super_mb_task_args.ml = sml;
   1.383 +            decode_3dwave_super_mb_task_args.mur = smur;
   1.384 +            decode_3dwave_super_mb_task_args.mprev = smprev;
   1.385 +            decode_3dwave_super_mb_task_args.m = sm;
   1.386 +            VSs__submit_task(&decode_3dwave_super_mb_taskType, &decode_3dwave_super_mb_task_args, animSlv);
   1.387          }
   1.388 -        draw_edges_task(d, sbe, smbc, sm, j);
   1.389 +        draw_edges_taskArgs draw_edges_task_args;
   1.390 +        draw_edges_task_args.d = d;
   1.391 +        draw_edges_task_args.sbe = sbe;
   1.392 +        draw_edges_task_args.sm = sm;
   1.393 +        draw_edges_task_args.smbc = smbc;
   1.394 +        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
   1.395 +        *(draw_edges_task_args.line) = j;
   1.396 +        VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv);
   1.397      }
   1.398  
   1.399      for(; j< smb_height; j++){
   1.400 @@ -239,21 +459,57 @@
   1.401              sm = smbs + j*smb_width + i;
   1.402              sml  = sm - ((i > 0) ? 1: 0);
   1.403              smur = sm + (((i < smb_width-1) && (j >0))  ? -smb_width+1: 0);
   1.404 -            decode_super_mb_task(d, sbe, smbc, sml, smur, sm);
   1.405 +            decode_super_mb_taskArgs decode_super_mb_task_args;
   1.406 +            decode_super_mb_task_args.d = d;
   1.407 +            decode_super_mb_task_args.sbe = sbe;
   1.408 +            decode_super_mb_task_args.smbc = smbc;
   1.409 +            decode_super_mb_task_args.ml = sml;
   1.410 +            decode_super_mb_task_args.mur = smur;
   1.411 +            decode_super_mb_task_args.m = sm;
   1.412 +            VSs__submit_task(&decode_super_mb_taskType, &decode_super_mb_task_args, animSlv);
   1.413          }
   1.414 -        draw_edges_task(d, sbe, smbc, sm, j);
   1.415 +        draw_edges_taskArgs draw_edges_task_args;
   1.416 +        draw_edges_task_args.d = d;
   1.417 +        draw_edges_task_args.sbe = sbe;
   1.418 +        draw_edges_task_args.sm = sm;
   1.419 +        draw_edges_task_args.smbc = smbc;
   1.420 +        draw_edges_task_args.line = VMS_App__malloc( sizeof(int) );
   1.421 +        *(draw_edges_task_args.line) = j;
   1.422 +        VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv);
   1.423      }
   1.424      return sm;
   1.425  }
   1.426  
   1.427 -#pragma omp task inout(*d, *sbe, *release) input (*lastsmb)
   1.428 -static void release_ref_list_task(H264Context *h, SuperMBContext *smbc, MBRecContext *d, SliceBufferEntry *sbe, SuperMBTask *lastsmb, int *release){
   1.429 +typedef struct{
   1.430 +    MBRecContext *d;
   1.431 +    SliceBufferEntry *sbe;
   1.432 +    SuperMBTask *lastsmb;
   1.433 +    int* release;
   1.434 +    H264Context *h;
   1.435 +    SuperMBContext *smbc;
   1.436 +} release_ref_list_taskArgs;
   1.437 +
   1.438 +int32 release_ref_list_taskArgTypes[6] = {INOUT, INOUT, IN, INOUT, NONCTLD, NONCTLD};
   1.439 +int32 release_ref_list_taskArgSizes[6] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(int), sizeof(H264Context), sizeof(SuperMBContext)};
   1.440 +
   1.441 +//#pragma omp task inout(*d, *sbe, *release) input (*lastsmb)
   1.442 +static void release_ref_list_task(void *_data, SlaveVP *animatingSlv){
   1.443 +    release_ref_list_taskArgs* args = (release_ref_list_taskArgs*) _data;
   1.444 +    MBRecContext *d = args->d;
   1.445 +    SliceBufferEntry *sbe = args->sbe;
   1.446 +    SuperMBTask *lastsmb = args->lastsmb;
   1.447 +    int* releasep = args->release;
   1.448 +    H264Context *h = args->h;
   1.449 +    SuperMBContext *smbc = args->smbc;
   1.450 +    
   1.451      H264Slice *s = &sbe->slice;
   1.452      for (int i=0; i<s->release_cnt; i++){
   1.453          for(int j=0; j<h->max_dpb_cnt; j++){
   1.454              if(h->dpb[j].cpn== s->release_ref_cpn[i]){
   1.455 -                #pragma omp critical (dpb)
   1.456 +                //#pragma omp critical (dpb)
   1.457 +                VSs__start_critical(animatingSlv,0);
   1.458                  release_dpb_entry(h, &h->dpb[j], 2);
   1.459 +                VSs__end_critical(animatingSlv,0);
   1.460                  break;
   1.461              }
   1.462          }
   1.463 @@ -262,29 +518,67 @@
   1.464  
   1.465      release_smbc(h, smbc);
   1.466      
   1.467 +    VSs__end_task( animatingSlv );
   1.468  }
   1.469  
   1.470 +VSsTaskType release_ref_list_taskType = {
   1.471 +    .fn = &release_ref_list_task,
   1.472 +    .numCtldArgs = 4,
   1.473 +    .numTotalArgs = 6,
   1.474 +    .sizeOfArgs = sizeof(release_ref_list_taskArgs),
   1.475 +    .argTypes = release_ref_list_taskArgTypes,
   1.476 +    .argSizes = release_ref_list_taskArgSizes};
   1.477 +
   1.478  // static void decode_mb_static_3dwave(H264Context *h, int mb_height, int mb_width, MBRecContext *d, H264Slice *s, H264Mb *mbs, SuperMBTask *smbs, SuperMBTask *smbs_prev){
   1.479  //
   1.480  // }
   1.481  /*-------------------------------------------------------------------------------*/
   1.482  //end for static 3d wave
   1.483  
   1.484 -#pragma omp task inout (*oc) input(*sbe)
   1.485 -static void output_task(H264Context *h, OutputContext *oc, SliceBufferEntry *sbe){
   1.486 +typedef struct{
   1.487 +    OutputContext *oc;
   1.488 +    SliceBufferEntry *sbe; 
   1.489 +    H264Context *h;
   1.490 +} output_taskArgs;
   1.491 +
   1.492 +int32 output_taskArgTypes[3] = {INOUT, IN, NONCTLD};
   1.493 +int32 output_taskArgSizes[3] = {sizeof(OutputContext), sizeof(SliceBufferEntry), sizeof(H264Context)};
   1.494 +
   1.495 +//#pragma omp task inout (*oc) input(*sbe)
   1.496 +static void output_task(void *_data, SlaveVP *animatingSlv){
   1.497 +    output_taskArgs* args = (output_taskArgs*) _data;
   1.498 +    OutputContext *oc = args->oc;
   1.499 +    SliceBufferEntry *sbe = args->sbe; 
   1.500 +    H264Context *h = args->h;
   1.501 +    
   1.502      DecodedPicture* out =output_frame(h, oc, sbe->slice.curr_pic, h->ofile, h->frame_width, h->frame_height);
   1.503      if (out){
   1.504 -        #pragma omp critical (dpb)
   1.505 +        //#pragma omp critical (dpb)
   1.506 +        VSs__start_critical(animatingSlv,0);
   1.507          release_dpb_entry(h, out, 1);
   1.508 +        VSs__end_critical(animatingSlv,0);
   1.509      }
   1.510      print_report(oc->frame_number, oc->video_size, 0, h->verbose);
   1.511 +    
   1.512 +    VSs__end_task( animatingSlv );
   1.513  }
   1.514  
   1.515 +VSsTaskType output_taskType = {
   1.516 +    .fn = &output_task,
   1.517 +    .numCtldArgs = 2,
   1.518 +    .numTotalArgs = 3,
   1.519 +    .sizeOfArgs = sizeof(output_taskArgs),
   1.520 +    .argTypes = output_taskArgTypes,
   1.521 +    .argSizes = output_taskArgSizes};
   1.522 +
   1.523  /*
   1.524  * The following code is the main loop of the file converter
   1.525  */
   1.526  //Put VMS entry point here
   1.527 -int h264_decode_ompss( H264Context *h) {
   1.528 +
   1.529 +void h264_decode_ompss( void *_params, SlaveVP *animSlv) {
   1.530 +    H264Context* h = (H264Context*) _params;
   1.531 +    
   1.532      const int bufs = h->pipe_bufs;
   1.533  
   1.534      ParserContext *pc;
   1.535 @@ -304,9 +598,11 @@
   1.536          pthread_create(&sdl_thr, NULL, sdl_thread, h);
   1.537      }
   1.538  #endif
   1.539 +    /*sbe= VMS_WL__malloc(sizeof(SliceBufferEntry) * bufs);
   1.540 +    if (sbe)
   1.541 +        memset(sbe, 0, sizeof(SliceBufferEntry) * bufs);*/
   1.542      sbe= av_mallocz(sizeof(SliceBufferEntry) * bufs);
   1.543  
   1.544 -
   1.545      pc = get_parse_context(h->ifile);
   1.546      nc = get_nal_context(h->width, h->height);
   1.547  
   1.548 @@ -325,52 +621,135 @@
   1.549      if (h->static_3d && bufs < h->num_frames ){
   1.550          int num_pre_ed =0;
   1.551          for (num_pre_ed=0; num_pre_ed< bufs -1 && !pc->final_frame; num_pre_ed++){
   1.552 -            parse_task( h, pc, nc, &sbe[k%bufs] );
   1.553 -            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   1.554 -            #pragma omp taskwait on(*pc)
   1.555 +            parse_taskArgs parse_task_args;
   1.556 +            parse_task_args.h = h;
   1.557 +            parse_task_args.pc = pc;
   1.558 +            parse_task_args.nc = nc;
   1.559 +            parse_task_args.sbe = &sbe[k%bufs];
   1.560 +            VSs__submit_task(&parse_taskType, &parse_task_args, animSlv);
   1.561 +            
   1.562 +            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
   1.563 +            decode_slice_entropy_task_args.h = h;
   1.564 +            decode_slice_entropy_task_args.ec = ec[k%bufs];
   1.565 +            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
   1.566 +            VSs__submit_task(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, animSlv);        
   1.567 +            //#pragma omp taskwait on(*pc)
   1.568 +            VSs__taskwait_on(animSlv,pc);
   1.569              k++;
   1.570          }
   1.571  
   1.572          while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
   1.573 -            parse_task( h, pc, nc, &sbe[k%bufs] );
   1.574 -            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   1.575 +            parse_taskArgs parse_task_args;
   1.576 +            parse_task_args.h = h;
   1.577 +            parse_task_args.pc = pc;
   1.578 +            parse_task_args.nc = nc;
   1.579 +            parse_task_args.sbe = &sbe[k%bufs];
   1.580 +            VSs__submit_task(&parse_taskType, &parse_task_args, animSlv);
   1.581 +            
   1.582 +            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
   1.583 +            decode_slice_entropy_task_args.h = h;
   1.584 +            decode_slice_entropy_task_args.ec = ec[k%bufs];
   1.585 +            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
   1.586 +            VSs__submit_task(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, animSlv); 
   1.587  
   1.588              k++;
   1.589 +            
   1.590 +            init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args;
   1.591 +            init_ref_list_and_get_dpb_task_args.h = h;
   1.592 +            init_ref_list_and_get_dpb_task_args.d = rc[k%2];
   1.593 +            init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs];
   1.594 +            init_ref_list_and_get_dpb_task_args.init = &init;
   1.595 +            VSs__submit_task(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, animSlv); 
   1.596  
   1.597 -            init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init);
   1.598              smbc = acquire_smbc(h);
   1.599 -            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc);
   1.600 -            release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release);
   1.601 +            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, animSlv);
   1.602 +            release_ref_list_taskArgs release_ref_list_task_args;
   1.603 +            release_ref_list_task_args.h = h;
   1.604 +            release_ref_list_task_args.smbc = smbc;
   1.605 +            release_ref_list_task_args.d = rc[k%2];
   1.606 +            release_ref_list_task_args.sbe = &sbe[k%bufs];
   1.607 +            release_ref_list_task_args.lastsmb = lastsmb;
   1.608 +            release_ref_list_task_args.release = &release;
   1.609 +            VSs__submit_task(&release_ref_list_taskType, &release_ref_list_task_args, animSlv);
   1.610  
   1.611 -            output_task (h, oc, &sbe[k%bufs]);
   1.612 -            #pragma omp taskwait on(*pc)
   1.613 +            output_taskArgs output_task_args;
   1.614 +            output_task_args.h = h;
   1.615 +            output_task_args.oc = oc;
   1.616 +            output_task_args.sbe = &sbe[k%bufs];
   1.617 +            VSs__submit_task(&output_taskType, &output_task_args, animSlv);
   1.618 +            //#pragma omp taskwait on(*pc)
   1.619 +            VSs__taskwait_on(animSlv,pc);
   1.620          }
   1.621  
   1.622          for (int i=0; i< num_pre_ed; i++){
   1.623              k++;
   1.624 -            init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init);
   1.625 +            init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args;
   1.626 +            init_ref_list_and_get_dpb_task_args.h = h;
   1.627 +            init_ref_list_and_get_dpb_task_args.d = rc[k%2];
   1.628 +            init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs];
   1.629 +            init_ref_list_and_get_dpb_task_args.init = &init;
   1.630 +            VSs__submit_task(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, animSlv); 
   1.631              smbc = acquire_smbc(h);
   1.632 -            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc);
   1.633 -            release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release);
   1.634 +            SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, animSlv);
   1.635 +            release_ref_list_taskArgs release_ref_list_task_args;
   1.636 +            release_ref_list_task_args.h = h;
   1.637 +            release_ref_list_task_args.smbc = smbc;
   1.638 +            release_ref_list_task_args.d = rc[k%2];
   1.639 +            release_ref_list_task_args.sbe = &sbe[k%bufs];
   1.640 +            release_ref_list_task_args.lastsmb = lastsmb;
   1.641 +            release_ref_list_task_args.release = &release;
   1.642 +            VSs__submit_task(&release_ref_list_taskType, &release_ref_list_task_args, animSlv); 
   1.643  
   1.644 -            output_task (h, oc, &sbe[k%bufs]);
   1.645 +            output_taskArgs output_task_args;
   1.646 +            output_task_args.h = h;
   1.647 +            output_task_args.oc = oc;
   1.648 +            output_task_args.sbe = &sbe[k%bufs];
   1.649 +            VSs__submit_task(&output_taskType, &output_task_args, animSlv);
   1.650          }
   1.651  
   1.652      } else {
   1.653          while(!pc->final_frame && frames++ < h->num_frames && !h->quit){
   1.654 -            parse_task( h, pc, nc, &sbe[k%bufs] );
   1.655 +            int32* taskID;
   1.656 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   1.657 +            taskID[1] = frames*10+1;
   1.658 +            parse_taskArgs parse_task_args;
   1.659 +            parse_task_args.h = h;
   1.660 +            parse_task_args.pc = pc;
   1.661 +            parse_task_args.nc = nc;
   1.662 +            parse_task_args.sbe = &sbe[k%bufs];
   1.663 +            VSs__submit_task_with_ID(&parse_taskType, &parse_task_args, taskID, animSlv);
   1.664  
   1.665 -            decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]);
   1.666 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   1.667 +            taskID[1] = frames*10+2;
   1.668 +            decode_slice_entropy_taskArgs decode_slice_entropy_task_args;
   1.669 +            decode_slice_entropy_task_args.h = h;
   1.670 +            decode_slice_entropy_task_args.ec = ec[k%bufs];
   1.671 +            decode_slice_entropy_task_args.sbe = &sbe[k%bufs];
   1.672 +            VSs__submit_task_with_ID(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args,taskID, animSlv);
   1.673  
   1.674 -            decode_slice_mb_task(h, rc[0], &sbe[k%bufs]);
   1.675 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   1.676 +            taskID[1] = frames*10+3;
   1.677 +            decode_slice_mb_taskArgs decode_slice_mb_task_args;
   1.678 +            decode_slice_mb_task_args.h = h;
   1.679 +            decode_slice_mb_task_args.d = rc[0];
   1.680 +            decode_slice_mb_task_args.sbe = &sbe[k%bufs];
   1.681 +            VSs__submit_task_with_ID(&decode_slice_mb_taskType, &decode_slice_mb_task_args,taskID, animSlv); 
   1.682  
   1.683 -            output_task (h, oc, &sbe[k%bufs]);
   1.684 -            #pragma omp taskwait on(*pc)
   1.685 +            taskID = VSs__create_taskID_of_size(1,animSlv );
   1.686 +            taskID[1] = frames*10+4;
   1.687 +            output_taskArgs output_task_args;
   1.688 +            output_task_args.h = h;
   1.689 +            output_task_args.oc = oc;
   1.690 +            output_task_args.sbe = &sbe[k%bufs];
   1.691 +            VSs__submit_task_with_ID(&output_taskType, &output_task_args,taskID, animSlv);
   1.692 +            //#pragma omp taskwait on(*pc)
   1.693 +            VSs__taskwait_on(animSlv,pc);
   1.694              k++;
   1.695          }
   1.696      }
   1.697 -    #pragma omp taskwait
   1.698 -
   1.699 +    //#pragma omp taskwait
   1.700 +    VSs__taskwait(animSlv);
   1.701 +    
   1.702      while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ;
   1.703  
   1.704      print_report(oc->frame_number, oc->video_size, 1, h->verbose);
   1.705 @@ -397,5 +776,5 @@
   1.706      }
   1.707  #endif
   1.708  
   1.709 -    return 0;
   1.710 +    VSs__end_thread( animSlv );
   1.711  }