Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
changeset 3:0b056460c67d
changed code to use VSs
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Mon, 29 Oct 2012 16:44:27 +0100 |
| parents | 897f711a7157 |
| children | 96e628866d41 |
| files | h264dec.c libavcodec/h264.h libavcodec/h264_misc.c libavcodec/h264_ompss.c |
| diffstat | 4 files changed, 452 insertions(+), 72 deletions(-) [+] |
line diff
1.1 --- a/h264dec.c Tue Sep 25 15:55:33 2012 +0200 1.2 +++ b/h264dec.c Mon Oct 29 16:44:27 2012 +0100 1.3 @@ -20,6 +20,7 @@ 1.4 1.5 #include <assert.h> 1.6 1.7 +#include "VSs_impl/VSs.h" 1.8 1.9 static const char program_name[] = "h264dec"; 1.10 static const int program_birth_year = 2010; 1.11 @@ -259,8 +260,7 @@ 1.12 1.13 H264Context *h = get_h264dec_context(file_name, ifile, ofile, frame_width, frame_height, &cli_opts); 1.14 #if OMPSS 1.15 - if (h264_decode_ompss( h ) < 0) 1.16 - av_exit(-1); 1.17 + VSs__create_seed_slave_and_do_work( &h264_decode_ompss , (void*)h ); 1.18 #else 1.19 if (parallel){ 1.20 if (ARCH_CELL && !no_arch){
2.1 --- a/libavcodec/h264.h Tue Sep 25 15:55:33 2012 +0200 2.2 +++ b/libavcodec/h264.h Mon Oct 29 16:44:27 2012 +0100 2.3 @@ -39,6 +39,7 @@ 2.4 #include "h264_rec.h" 2.5 #include "h264_deblock.h" 2.6 #include "h264_types.h" 2.7 +#include "VSs_impl/VSs.h" 2.8 2.9 typedef struct h264_options{ 2.10 int statsched; 2.11 @@ -63,7 +64,7 @@ 2.12 int h264_decode_cell(H264Context *h); 2.13 int h264_decode_cell_seq(H264Context *h); 2.14 2.15 -int h264_decode_ompss(H264Context *h); 2.16 +void h264_decode_ompss(void *_params, SlaveVP *animSlv); 2.17 2.18 int h264_decode_pthread(H264Context *h); 2.19 int h264_decode_seq(H264Context *h);
3.1 --- a/libavcodec/h264_misc.c Tue Sep 25 15:55:33 2012 +0200 3.2 +++ b/libavcodec/h264_misc.c Mon Oct 29 16:44:27 2012 +0100 3.3 @@ -266,18 +266,18 @@ 3.4 } 3.5 3.6 if (verbose){ 3.7 - fprintf(stderr, "frame=%5d avgfps=%3d curfps=%3d\r", frame_number, (int)(frame_number/t+0.5), (int)((frame_number - last_frame_number)/t2+0.5) ); 3.8 - fflush(stderr); 3.9 + //fprintf(stderr, "frame=%5d avgfps=%3d curfps=%3d\r", frame_number, (int)(frame_number/t+0.5), (int)((frame_number - last_frame_number)/t2+0.5) ); 3.10 + //fflush(stderr); 3.11 } 3.12 last_frame_number = frame_number; 3.13 last_time = cur_time; 3.14 3.15 if (is_last_report){ 3.16 t = (av_gettime()-timer_start) / 1000000.0; 3.17 - fprintf(stderr, "%c[2Kframe=%5d avgfps=%3d\r", 27, frame_number, (int)(frame_number/t+0.5)); 3.18 - fprintf(stderr, "\n"); 3.19 - fprintf(stderr, "video:%1.0fkB\n", video_size/1024.0); 3.20 - fflush(stderr); 3.21 + //fprintf(stderr, "%c[2Kframe=%5d avgfps=%3d\r", 27, frame_number, (int)(frame_number/t+0.5)); 3.22 + //fprintf(stderr, "\n"); 3.23 + //fprintf(stderr, "video:%1.0fkB\n", video_size/1024.0); 3.24 + //fflush(stderr); 3.25 } 3.26 } 3.27
4.1 --- a/libavcodec/h264_ompss.c Tue Sep 25 15:55:33 2012 +0200 4.2 +++ b/libavcodec/h264_ompss.c Mon Oct 29 16:44:27 2012 +0100 4.3 @@ -27,9 +27,25 @@ 4.4 #include "h264_misc.h" 4.5 // #undef NDEBUG 4.6 #include <assert.h> 4.7 +#include "VSs_impl/VSs.h" 4.8 4.9 -#pragma omp task inout(*pc, *nc) output(*sbe) 4.10 -static void parse_task(H264Context *h, ParserContext *pc, NalContext *nc, SliceBufferEntry *sbe){ 4.11 +typedef struct{ 4.12 + ParserContext *pc; 4.13 + NalContext *nc; 4.14 + SliceBufferEntry *sbe; 4.15 + H264Context *h; 4.16 +} parse_taskArgs; 4.17 + 4.18 +int32 parse_taskArgTypes[4] = {INOUT, INOUT, OUTPUT, NONCTLD}; 4.19 +int32 parse_taskArgSizes[4] = {sizeof(ParserContext), sizeof(NalContext), sizeof(SliceBufferEntry), sizeof(H264Context)}; 4.20 + 4.21 +//#pragma omp task inout(*pc, *nc) output(*sbe) 4.22 +static void parse_task(void *_data, SlaveVP *animatingSlv){ 4.23 + parse_taskArgs* args = (parse_taskArgs*) _data; 4.24 + ParserContext *pc = args->pc; 4.25 + NalContext *nc = args->nc; 4.26 + SliceBufferEntry *sbe = args->sbe; 4.27 + H264Context *h = args->h; 4.28 H264Slice *s; 4.29 4.30 if (!sbe->initialized){ 4.31 @@ -41,10 +57,34 @@ 4.32 s = &sbe->slice; 4.33 4.34 decode_nal_units(nc, s, &sbe->gb); 4.35 + 4.36 + VSs__end_task( animatingSlv ); 4.37 } 4.38 4.39 -#pragma omp task inout(*ec) inout(*sbe) 4.40 -static void decode_slice_entropy_task(H264Context *h, EntropyContext *ec, SliceBufferEntry *sbe){ 4.41 +VSsTaskType parse_taskType = { 4.42 + .fn = &parse_task, 4.43 + .numCtldArgs = 3, 4.44 + .numTotalArgs = 4, 4.45 + .sizeOfArgs = sizeof(parse_taskArgs), 4.46 + .argTypes = parse_taskArgTypes, 4.47 + .argSizes = parse_taskArgSizes}; 4.48 + 4.49 + 4.50 +typedef struct{ 4.51 + EntropyContext *ec; 4.52 + SliceBufferEntry *sbe; 4.53 + H264Context *h; 4.54 +} decode_slice_entropy_taskArgs; 4.55 + 4.56 +int32 decode_slice_entropy_taskArgTypes[3] = {INOUT, INOUT, NONCTLD}; 4.57 +int32 decode_slice_entropy_taskArgSizes[3] = {sizeof(EntropyContext), sizeof(SliceBufferEntry), sizeof(H264Context)}; 4.58 + 4.59 +//#pragma omp task inout(*ec) inout(*sbe) 4.60 +static void decode_slice_entropy_task(void *_data, SlaveVP *animatingSlv){ 4.61 + decode_slice_entropy_taskArgs* args = (decode_slice_entropy_taskArgs*) _data; 4.62 + EntropyContext *ec = args->ec; 4.63 + SliceBufferEntry *sbe = args->sbe; 4.64 + H264Context *h = args->h; 4.65 int i,j; 4.66 H264Slice *s = &sbe->slice; 4.67 GetBitContext *gb = &sbe->gb; 4.68 @@ -54,7 +94,7 @@ 4.69 4.70 if( !s->pps.cabac ){ 4.71 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n"); 4.72 - return ; 4.73 + VSs__end_task( animatingSlv ); 4.74 } 4.75 4.76 init_dequant_tables(s, ec); 4.77 @@ -84,12 +124,22 @@ 4.78 (void) eos; 4.79 if( ret < 0 || c->bytestream > c->bytestream_end + 2) { 4.80 av_log(AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", m->mb_x, m->mb_y, c->bytestream_end - c->bytestream); 4.81 - return ; 4.82 + VSs__end_task( animatingSlv ); 4.83 } 4.84 } 4.85 } 4.86 + VSs__end_task( animatingSlv ); 4.87 } 4.88 4.89 +VSsTaskType decode_slice_entropy_taskType = { 4.90 + .fn = &decode_slice_entropy_task, 4.91 + .numCtldArgs = 2, 4.92 + .numTotalArgs = 3, 4.93 + .sizeOfArgs = sizeof(decode_slice_entropy_taskArgs), 4.94 + .argTypes = decode_slice_entropy_taskArgTypes, 4.95 + .argSizes = decode_slice_entropy_taskArgSizes}; 4.96 + 4.97 + 4.98 static void decode_super_mb_block(MBRecContext *d, H264Slice *s, SuperMBContext *smbc, H264Mb *mbs, int smb_x, int smb_y){ 4.99 MBRecState mrs; 4.100 // memset(&mrs, 0, sizeof(MBRecState)); 4.101 @@ -104,22 +154,77 @@ 4.102 } 4.103 } 4.104 4.105 -#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m) 4.106 -static void decode_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml, 4.107 -SuperMBTask *mur, SuperMBTask *m){ 4.108 +typedef struct{ 4.109 + MBRecContext *d; 4.110 + SliceBufferEntry *sbe; 4.111 + SuperMBTask *ml; 4.112 + SuperMBTask *mur; 4.113 + SuperMBTask *m; 4.114 + SuperMBContext *smbc; 4.115 +} decode_super_mb_taskArgs; 4.116 + 4.117 +int32 decode_super_mb_taskArgTypes[6] = {IN, IN, IN, IN, INOUT, NONCTLD}; 4.118 +int32 decode_super_mb_taskArgSizes[6] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBContext)}; 4.119 + 4.120 +//#pragma omp task input(*d, *sbe, *ml, *mur) inout(*m) 4.121 +static void decode_super_mb_task(void *_data, SlaveVP *animatingSlv){ 4.122 + decode_super_mb_taskArgs* args = (decode_super_mb_taskArgs*) _data; 4.123 + MBRecContext *d = args->d; 4.124 + SliceBufferEntry *sbe = args->sbe; 4.125 + SuperMBTask *ml = args->ml; 4.126 + SuperMBTask *mur = args->mur; 4.127 + SuperMBTask *m = args->m; 4.128 + SuperMBContext *smbc = args->smbc; 4.129 H264Slice *s = &sbe->slice; 4.130 H264Mb *mbs = sbe->mbs; 4.131 decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y); 4.132 + VSs__end_task( animatingSlv ); 4.133 } 4.134 4.135 -#pragma omp task input(*d, *sbe) inout(*sm) 4.136 -static void draw_edges_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *sm, int line){ 4.137 +VSsTaskType decode_super_mb_taskType = { 4.138 + .fn = &decode_super_mb_task, 4.139 + .numCtldArgs = 5, 4.140 + .numTotalArgs = 6, 4.141 + .sizeOfArgs = sizeof(decode_super_mb_taskArgs), 4.142 + .argTypes = decode_super_mb_taskArgTypes, 4.143 + .argSizes = decode_super_mb_taskArgSizes}; 4.144 + 4.145 + 4.146 + 4.147 +typedef struct{ 4.148 + MBRecContext *d; 4.149 + SliceBufferEntry *sbe; 4.150 + SuperMBTask *sm; 4.151 + SuperMBContext *smbc; 4.152 + int* line; 4.153 +} draw_edges_taskArgs; 4.154 + 4.155 +int32 draw_edges_taskArgTypes[5] = {IN, IN, INOUT, NONCTLD, NONCTLD}; 4.156 +int32 draw_edges_taskArgSizes[5] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask),sizeof(SuperMBContext),sizeof(int)}; 4.157 + 4.158 +//#pragma omp task input(*d, *sbe) inout(*sm) 4.159 +static void draw_edges_task(void *_data, SlaveVP *animatingSlv){ 4.160 + draw_edges_taskArgs* args = (draw_edges_taskArgs*) _data; 4.161 + MBRecContext *d = args->d; 4.162 + SliceBufferEntry *sbe = args->sbe; 4.163 + SuperMBTask *sm = args->sm; 4.164 + SuperMBContext *smbc = args->smbc; 4.165 + int line = *(args->line); 4.166 H264Slice *s = &sbe->slice; 4.167 for (int i=line*smbc->smb_height; i< (line+1)*smbc->smb_height && i< d->mb_height; i++) 4.168 draw_edges(d, s, i); 4.169 + VSs__end_task( animatingSlv ); 4.170 } 4.171 +VSsTaskType draw_edges_taskType = { 4.172 + .fn = &draw_edges_task, 4.173 + .numCtldArgs = 3, 4.174 + .numTotalArgs = 5, 4.175 + .sizeOfArgs = sizeof(draw_edges_taskArgs), 4.176 + .argTypes = draw_edges_taskArgTypes, 4.177 + .argSizes = draw_edges_taskArgSizes}; 4.178 4.179 -static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){ 4.180 + 4.181 +static void decode_mb_in_slice(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe, SlaveVP* animSlv){ 4.182 int i,j; 4.183 4.184 SuperMBContext *smbc = acquire_smbc(h); 4.185 @@ -132,17 +237,46 @@ 4.186 sm = smbs + j*smb_width + i; 4.187 sml = sm - ((i > 0) ? 1: 0); 4.188 smur = sm + (((i < smb_width-1) && (j >0)) ? -smb_width+1: 0); 4.189 - decode_super_mb_task(d, sbe, smbc, sml, smur, sm); 4.190 + decode_super_mb_taskArgs decode_super_mb_task_args; 4.191 + decode_super_mb_task_args.d = d; 4.192 + decode_super_mb_task_args.sbe = sbe; 4.193 + decode_super_mb_task_args.smbc = smbc; 4.194 + decode_super_mb_task_args.ml = sml; 4.195 + decode_super_mb_task_args.mur = smur; 4.196 + decode_super_mb_task_args.m = sm; 4.197 + VSs__submit_task(&decode_super_mb_taskType, &decode_super_mb_task_args, animSlv); 4.198 } 4.199 - draw_edges_task(d, sbe, smbc, sm, j); 4.200 + draw_edges_taskArgs draw_edges_task_args; 4.201 + draw_edges_task_args.d = d; 4.202 + draw_edges_task_args.sbe = sbe; 4.203 + draw_edges_task_args.sm = sm; 4.204 + draw_edges_task_args.smbc = smbc; 4.205 + draw_edges_task_args.line = VMS_App__malloc( sizeof(int) ); 4.206 + *(draw_edges_task_args.line) = j; 4.207 + VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv); 4.208 } 4.209 - #pragma omp taskwait on(*sm) 4.210 + VSs__taskwait_on(animSlv,sm); 4.211 + //#pragma omp taskwait on(*sm) 4.212 4.213 release_smbc(h, smbc); 4.214 } 4.215 4.216 -#pragma omp task inout(*d) inout(*sbe) 4.217 -static void decode_slice_mb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe){ 4.218 +typedef struct{ 4.219 + MBRecContext *d; 4.220 + SliceBufferEntry *sbe; 4.221 + H264Context *h; 4.222 +} decode_slice_mb_taskArgs; 4.223 + 4.224 +int32 decode_slice_mb_taskArgTypes[3] = {INOUT, INOUT, NONCTLD}; 4.225 +int32 decode_slice_mb_taskArgSizes[3] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(H264Context)}; 4.226 + 4.227 +//#pragma omp task inout(*d) inout(*sbe) 4.228 +static void decode_slice_mb_task(void *_data, SlaveVP *animatingSlv){ 4.229 + decode_slice_mb_taskArgs* args = (decode_slice_mb_taskArgs*) _data; 4.230 + MBRecContext *d = args->d; 4.231 + SliceBufferEntry *sbe = args->sbe; 4.232 + H264Context *h = args->h; 4.233 + 4.234 H264Slice *s = &sbe->slice; 4.235 4.236 for (int i=0; i<2; i++){ 4.237 @@ -159,39 +293,99 @@ 4.238 } 4.239 } 4.240 4.241 - #pragma omp critical (dpb) 4.242 + //#pragma omp critical (dpb) 4.243 + VSs__start_critical(animatingSlv,0); 4.244 get_dpb_entry(h, s); 4.245 - 4.246 + VSs__end_critical(animatingSlv,0); 4.247 + 4.248 if (!h->no_mbd){ 4.249 - decode_mb_in_slice (h, d, sbe); 4.250 + decode_mb_in_slice (h, d, sbe, animatingSlv); 4.251 } 4.252 4.253 for (int i=0; i<s->release_cnt; i++){ 4.254 for(int j=0; j<h->max_dpb_cnt; j++){ 4.255 if(h->dpb[j].cpn== s->release_ref_cpn[i]){ 4.256 - #pragma omp critical (dpb) 4.257 + //#pragma omp critical (dpb) 4.258 + VSs__start_critical(animatingSlv,0); 4.259 release_dpb_entry(h, &h->dpb[j], 2); 4.260 + VSs__end_critical(animatingSlv,0); 4.261 break; 4.262 } 4.263 } 4.264 } 4.265 s->release_cnt=0; 4.266 + VSs__end_task( animatingSlv ); 4.267 } 4.268 4.269 +VSsTaskType decode_slice_mb_taskType = { 4.270 + .fn = &decode_slice_mb_task, 4.271 + .numCtldArgs = 2, 4.272 + .numTotalArgs = 3, 4.273 + .sizeOfArgs = sizeof(decode_slice_mb_taskArgs), 4.274 + .argTypes = decode_slice_mb_taskArgTypes, 4.275 + .argSizes = decode_slice_mb_taskArgSizes}; 4.276 + 4.277 // for static 3d wave 4.278 /*-------------------------------------------------------------------------------*/ 4.279 -#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m) 4.280 -static void decode_3dwave_super_mb_task(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SuperMBTask *ml, 4.281 -SuperMBTask *mur, SuperMBTask *mprev, SuperMBTask *m){ 4.282 +typedef struct{ 4.283 + MBRecContext *d; 4.284 + SliceBufferEntry *sbe; 4.285 + SuperMBTask *ml; 4.286 + SuperMBTask *mur; 4.287 + SuperMBTask *mprev; 4.288 + SuperMBTask *m; 4.289 + SuperMBContext *smbc; 4.290 +} decode_3dwave_super_mb_taskArgs; 4.291 + 4.292 +int32 decode_3dwave_super_mb_taskArgTypes[7] = {IN, IN, IN, IN, IN, INOUT, NONCTLD}; 4.293 +int32 decode_3dwave_super_mb_taskArgSizes[7] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), 4.294 + sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBTask), sizeof(SuperMBContext)}; 4.295 + 4.296 +//#pragma omp task input(*d, *sbe, *ml, *mur, *mprev) inout(*m) 4.297 +static void decode_3dwave_super_mb_task(void *_data, SlaveVP *animatingSlv){ 4.298 + decode_3dwave_super_mb_taskArgs* args = (decode_3dwave_super_mb_taskArgs*) _data; 4.299 + MBRecContext *d = args->d; 4.300 + SliceBufferEntry *sbe = args->sbe; 4.301 + SuperMBTask *ml = args->ml; 4.302 + SuperMBTask *mur = args->mur; 4.303 + SuperMBTask *mprev = args->mprev; 4.304 + SuperMBTask *m = args->m; 4.305 + SuperMBContext *smbc = args->smbc; 4.306 + 4.307 H264Slice *s = &sbe->slice; 4.308 H264Mb *mbs = sbe->mbs; 4.309 4.310 decode_super_mb_block(d, s, smbc, mbs, m->smb_x, m->smb_y); 4.311 + VSs__end_task( animatingSlv ); 4.312 } 4.313 4.314 +VSsTaskType decode_3dwave_super_mb_taskType = { 4.315 + .fn = &decode_3dwave_super_mb_task, 4.316 + .numCtldArgs = 6, 4.317 + .numTotalArgs = 7, 4.318 + .sizeOfArgs = sizeof(decode_3dwave_super_mb_taskArgs), 4.319 + .argTypes = decode_3dwave_super_mb_taskArgTypes, 4.320 + .argSizes = decode_3dwave_super_mb_taskArgSizes}; 4.321 + 4.322 // int init_ref_count=0; 4.323 -#pragma omp task inout(*d, *sbe, *init) 4.324 -static void init_ref_list_and_get_dpb_task(H264Context *h, MBRecContext *d, SliceBufferEntry *sbe, int *init){ 4.325 +typedef struct{ 4.326 + MBRecContext *d; 4.327 + SliceBufferEntry *sbe; 4.328 + int* init; 4.329 + H264Context *h; 4.330 +} init_ref_list_and_get_dpb_taskArgs; 4.331 + 4.332 +int32 init_ref_list_and_get_dpb_taskArgTypes[4] = {INOUT, INOUT, INOUT, NONCTLD}; 4.333 +int32 init_ref_list_and_get_dpb_taskArgSizes[4] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(int), sizeof(H264Context)}; 4.334 + 4.335 +//#pragma omp task inout(*d, *sbe, *init) 4.336 +static void init_ref_list_and_get_dpb_task(void *_data, SlaveVP *animatingSlv){ 4.337 + init_ref_list_and_get_dpb_taskArgs* args = (init_ref_list_and_get_dpb_taskArgs*) _data; 4.338 + MBRecContext *d = args->d; 4.339 + SliceBufferEntry *sbe = args->sbe; 4.340 + int* initp = args->init; 4.341 + H264Context *h = args->h; 4.342 + 4.343 H264Slice *s = &sbe->slice; 4.344 for (int i=0; i<2; i++){ 4.345 for(int j=0; j< s->ref_count[i]; j++){ 4.346 @@ -207,12 +401,23 @@ 4.347 } 4.348 } 4.349 4.350 - #pragma omp critical (dpb) 4.351 + //#pragma omp critical (dpb) 4.352 + VSs__start_critical(animatingSlv,0); 4.353 get_dpb_entry(h, s); 4.354 - 4.355 + VSs__end_critical(animatingSlv,0); 4.356 + 4.357 + VSs__end_task( animatingSlv ); 4.358 } 4.359 4.360 -static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc){ 4.361 +VSsTaskType init_ref_list_and_get_dpb_taskType = { 4.362 + .fn = &init_ref_list_and_get_dpb_task, 4.363 + .numCtldArgs = 3, 4.364 + .numTotalArgs = 4, 4.365 + .sizeOfArgs = sizeof(init_ref_list_and_get_dpb_taskArgs), 4.366 + .argTypes = init_ref_list_and_get_dpb_taskArgTypes, 4.367 + .argSizes = init_ref_list_and_get_dpb_taskArgSizes}; 4.368 + 4.369 +static SuperMBTask* add_decode_slice_3dwave_tasks(MBRecContext *d, SliceBufferEntry *sbe, SuperMBContext *smbc, SlaveVP* animSlv){ 4.370 int i,j; 4.371 4.372 int smb_3d_height =smbc->nsmb_3dheight; 4.373 @@ -229,9 +434,24 @@ 4.374 sml = sm - ((i > 0) ? 1: 0); 4.375 smur = sm + (((i < smb_width-1) && (j >0)) ? -smb_width+1: 0); 4.376 smprev = smbs_prev + (j + smb_diff_prev+1)*smb_width -1; 4.377 - decode_3dwave_super_mb_task(d, sbe, smbc, sml, smur, smprev, sm); 4.378 + decode_3dwave_super_mb_taskArgs decode_3dwave_super_mb_task_args; 4.379 + decode_3dwave_super_mb_task_args.d = d; 4.380 + decode_3dwave_super_mb_task_args.sbe = sbe; 4.381 + decode_3dwave_super_mb_task_args.smbc = smbc; 4.382 + decode_3dwave_super_mb_task_args.ml = sml; 4.383 + decode_3dwave_super_mb_task_args.mur = smur; 4.384 + decode_3dwave_super_mb_task_args.mprev = smprev; 4.385 + decode_3dwave_super_mb_task_args.m = sm; 4.386 + VSs__submit_task(&decode_3dwave_super_mb_taskType, &decode_3dwave_super_mb_task_args, animSlv); 4.387 } 4.388 - draw_edges_task(d, sbe, smbc, sm, j); 4.389 + draw_edges_taskArgs draw_edges_task_args; 4.390 + draw_edges_task_args.d = d; 4.391 + draw_edges_task_args.sbe = sbe; 4.392 + draw_edges_task_args.sm = sm; 4.393 + draw_edges_task_args.smbc = smbc; 4.394 + draw_edges_task_args.line = VMS_App__malloc( sizeof(int) ); 4.395 + *(draw_edges_task_args.line) = j; 4.396 + VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv); 4.397 } 4.398 4.399 for(; j< smb_height; j++){ 4.400 @@ -239,21 +459,57 @@ 4.401 sm = smbs + j*smb_width + i; 4.402 sml = sm - ((i > 0) ? 1: 0); 4.403 smur = sm + (((i < smb_width-1) && (j >0)) ? -smb_width+1: 0); 4.404 - decode_super_mb_task(d, sbe, smbc, sml, smur, sm); 4.405 + decode_super_mb_taskArgs decode_super_mb_task_args; 4.406 + decode_super_mb_task_args.d = d; 4.407 + decode_super_mb_task_args.sbe = sbe; 4.408 + decode_super_mb_task_args.smbc = smbc; 4.409 + decode_super_mb_task_args.ml = sml; 4.410 + decode_super_mb_task_args.mur = smur; 4.411 + decode_super_mb_task_args.m = sm; 4.412 + VSs__submit_task(&decode_super_mb_taskType, &decode_super_mb_task_args, animSlv); 4.413 } 4.414 - draw_edges_task(d, sbe, smbc, sm, j); 4.415 + draw_edges_taskArgs draw_edges_task_args; 4.416 + draw_edges_task_args.d = d; 4.417 + draw_edges_task_args.sbe = sbe; 4.418 + draw_edges_task_args.sm = sm; 4.419 + draw_edges_task_args.smbc = smbc; 4.420 + draw_edges_task_args.line = VMS_App__malloc( sizeof(int) ); 4.421 + *(draw_edges_task_args.line) = j; 4.422 + VSs__submit_task(&draw_edges_taskType, &draw_edges_task_args, animSlv); 4.423 } 4.424 return sm; 4.425 } 4.426 4.427 -#pragma omp task inout(*d, *sbe, *release) input (*lastsmb) 4.428 -static void release_ref_list_task(H264Context *h, SuperMBContext *smbc, MBRecContext *d, SliceBufferEntry *sbe, SuperMBTask *lastsmb, int *release){ 4.429 +typedef struct{ 4.430 + MBRecContext *d; 4.431 + SliceBufferEntry *sbe; 4.432 + SuperMBTask *lastsmb; 4.433 + int* release; 4.434 + H264Context *h; 4.435 + SuperMBContext *smbc; 4.436 +} release_ref_list_taskArgs; 4.437 + 4.438 +int32 release_ref_list_taskArgTypes[6] = {INOUT, INOUT, IN, INOUT, NONCTLD, NONCTLD}; 4.439 +int32 release_ref_list_taskArgSizes[6] = {sizeof(MBRecContext), sizeof(SliceBufferEntry), sizeof(SuperMBTask), sizeof(int), sizeof(H264Context), sizeof(SuperMBContext)}; 4.440 + 4.441 +//#pragma omp task inout(*d, *sbe, *release) input (*lastsmb) 4.442 +static void release_ref_list_task(void *_data, SlaveVP *animatingSlv){ 4.443 + release_ref_list_taskArgs* args = (release_ref_list_taskArgs*) _data; 4.444 + MBRecContext *d = args->d; 4.445 + SliceBufferEntry *sbe = args->sbe; 4.446 + SuperMBTask *lastsmb = args->lastsmb; 4.447 + int* releasep = args->release; 4.448 + H264Context *h = args->h; 4.449 + SuperMBContext *smbc = args->smbc; 4.450 + 4.451 H264Slice *s = &sbe->slice; 4.452 for (int i=0; i<s->release_cnt; i++){ 4.453 for(int j=0; j<h->max_dpb_cnt; j++){ 4.454 if(h->dpb[j].cpn== s->release_ref_cpn[i]){ 4.455 - #pragma omp critical (dpb) 4.456 + //#pragma omp critical (dpb) 4.457 + VSs__start_critical(animatingSlv,0); 4.458 release_dpb_entry(h, &h->dpb[j], 2); 4.459 + VSs__end_critical(animatingSlv,0); 4.460 break; 4.461 } 4.462 } 4.463 @@ -262,29 +518,67 @@ 4.464 4.465 release_smbc(h, smbc); 4.466 4.467 + VSs__end_task( animatingSlv ); 4.468 } 4.469 4.470 +VSsTaskType release_ref_list_taskType = { 4.471 + .fn = &release_ref_list_task, 4.472 + .numCtldArgs = 4, 4.473 + .numTotalArgs = 6, 4.474 + .sizeOfArgs = sizeof(release_ref_list_taskArgs), 4.475 + .argTypes = release_ref_list_taskArgTypes, 4.476 + .argSizes = release_ref_list_taskArgSizes}; 4.477 + 4.478 // static void decode_mb_static_3dwave(H264Context *h, int mb_height, int mb_width, MBRecContext *d, H264Slice *s, H264Mb *mbs, SuperMBTask *smbs, SuperMBTask *smbs_prev){ 4.479 // 4.480 // } 4.481 /*-------------------------------------------------------------------------------*/ 4.482 //end for static 3d wave 4.483 4.484 -#pragma omp task inout (*oc) input(*sbe) 4.485 -static void output_task(H264Context *h, OutputContext *oc, SliceBufferEntry *sbe){ 4.486 +typedef struct{ 4.487 + OutputContext *oc; 4.488 + SliceBufferEntry *sbe; 4.489 + H264Context *h; 4.490 +} output_taskArgs; 4.491 + 4.492 +int32 output_taskArgTypes[3] = {INOUT, IN, NONCTLD}; 4.493 +int32 output_taskArgSizes[3] = {sizeof(OutputContext), sizeof(SliceBufferEntry), sizeof(H264Context)}; 4.494 + 4.495 +//#pragma omp task inout (*oc) input(*sbe) 4.496 +static void output_task(void *_data, SlaveVP *animatingSlv){ 4.497 + output_taskArgs* args = (output_taskArgs*) _data; 4.498 + OutputContext *oc = args->oc; 4.499 + SliceBufferEntry *sbe = args->sbe; 4.500 + H264Context *h = args->h; 4.501 + 4.502 DecodedPicture* out =output_frame(h, oc, sbe->slice.curr_pic, h->ofile, h->frame_width, h->frame_height); 4.503 if (out){ 4.504 - #pragma omp critical (dpb) 4.505 + //#pragma omp critical (dpb) 4.506 + VSs__start_critical(animatingSlv,0); 4.507 release_dpb_entry(h, out, 1); 4.508 + VSs__end_critical(animatingSlv,0); 4.509 } 4.510 print_report(oc->frame_number, oc->video_size, 0, h->verbose); 4.511 + 4.512 + VSs__end_task( animatingSlv ); 4.513 } 4.514 4.515 +VSsTaskType output_taskType = { 4.516 + .fn = &output_task, 4.517 + .numCtldArgs = 2, 4.518 + .numTotalArgs = 3, 4.519 + .sizeOfArgs = sizeof(output_taskArgs), 4.520 + .argTypes = output_taskArgTypes, 4.521 + .argSizes = output_taskArgSizes}; 4.522 + 4.523 /* 4.524 * The following code is the main loop of the file converter 4.525 */ 4.526 //Put VMS entry point here 4.527 -int h264_decode_ompss( H264Context *h) { 4.528 + 4.529 +void h264_decode_ompss( void *_params, SlaveVP *animSlv) { 4.530 + H264Context* h = (H264Context*) _params; 4.531 + 4.532 const int bufs = h->pipe_bufs; 4.533 4.534 ParserContext *pc; 4.535 @@ -304,9 +598,11 @@ 4.536 pthread_create(&sdl_thr, NULL, sdl_thread, h); 4.537 } 4.538 #endif 4.539 + /*sbe= VMS_WL__malloc(sizeof(SliceBufferEntry) * bufs); 4.540 + if (sbe) 4.541 + memset(sbe, 0, sizeof(SliceBufferEntry) * bufs);*/ 4.542 sbe= av_mallocz(sizeof(SliceBufferEntry) * bufs); 4.543 4.544 - 4.545 pc = get_parse_context(h->ifile); 4.546 nc = get_nal_context(h->width, h->height); 4.547 4.548 @@ -325,52 +621,135 @@ 4.549 if (h->static_3d && bufs < h->num_frames ){ 4.550 int num_pre_ed =0; 4.551 for (num_pre_ed=0; num_pre_ed< bufs -1 && !pc->final_frame; num_pre_ed++){ 4.552 - parse_task( h, pc, nc, &sbe[k%bufs] ); 4.553 - decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]); 4.554 - #pragma omp taskwait on(*pc) 4.555 + parse_taskArgs parse_task_args; 4.556 + parse_task_args.h = h; 4.557 + parse_task_args.pc = pc; 4.558 + parse_task_args.nc = nc; 4.559 + parse_task_args.sbe = &sbe[k%bufs]; 4.560 + VSs__submit_task(&parse_taskType, &parse_task_args, animSlv); 4.561 + 4.562 + decode_slice_entropy_taskArgs decode_slice_entropy_task_args; 4.563 + decode_slice_entropy_task_args.h = h; 4.564 + decode_slice_entropy_task_args.ec = ec[k%bufs]; 4.565 + decode_slice_entropy_task_args.sbe = &sbe[k%bufs]; 4.566 + VSs__submit_task(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, animSlv); 4.567 + //#pragma omp taskwait on(*pc) 4.568 + VSs__taskwait_on(animSlv,pc); 4.569 k++; 4.570 } 4.571 4.572 while(!pc->final_frame && frames++ < h->num_frames && !h->quit){ 4.573 - parse_task( h, pc, nc, &sbe[k%bufs] ); 4.574 - decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]); 4.575 + parse_taskArgs parse_task_args; 4.576 + parse_task_args.h = h; 4.577 + parse_task_args.pc = pc; 4.578 + parse_task_args.nc = nc; 4.579 + parse_task_args.sbe = &sbe[k%bufs]; 4.580 + VSs__submit_task(&parse_taskType, &parse_task_args, animSlv); 4.581 + 4.582 + decode_slice_entropy_taskArgs decode_slice_entropy_task_args; 4.583 + decode_slice_entropy_task_args.h = h; 4.584 + decode_slice_entropy_task_args.ec = ec[k%bufs]; 4.585 + decode_slice_entropy_task_args.sbe = &sbe[k%bufs]; 4.586 + VSs__submit_task(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args, animSlv); 4.587 4.588 k++; 4.589 + 4.590 + init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args; 4.591 + init_ref_list_and_get_dpb_task_args.h = h; 4.592 + init_ref_list_and_get_dpb_task_args.d = rc[k%2]; 4.593 + init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs]; 4.594 + init_ref_list_and_get_dpb_task_args.init = &init; 4.595 + VSs__submit_task(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, animSlv); 4.596 4.597 - init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init); 4.598 smbc = acquire_smbc(h); 4.599 - SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc); 4.600 - release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release); 4.601 + SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, animSlv); 4.602 + release_ref_list_taskArgs release_ref_list_task_args; 4.603 + release_ref_list_task_args.h = h; 4.604 + release_ref_list_task_args.smbc = smbc; 4.605 + release_ref_list_task_args.d = rc[k%2]; 4.606 + release_ref_list_task_args.sbe = &sbe[k%bufs]; 4.607 + release_ref_list_task_args.lastsmb = lastsmb; 4.608 + release_ref_list_task_args.release = &release; 4.609 + VSs__submit_task(&release_ref_list_taskType, &release_ref_list_task_args, animSlv); 4.610 4.611 - output_task (h, oc, &sbe[k%bufs]); 4.612 - #pragma omp taskwait on(*pc) 4.613 + output_taskArgs output_task_args; 4.614 + output_task_args.h = h; 4.615 + output_task_args.oc = oc; 4.616 + output_task_args.sbe = &sbe[k%bufs]; 4.617 + VSs__submit_task(&output_taskType, &output_task_args, animSlv); 4.618 + //#pragma omp taskwait on(*pc) 4.619 + VSs__taskwait_on(animSlv,pc); 4.620 } 4.621 4.622 for (int i=0; i< num_pre_ed; i++){ 4.623 k++; 4.624 - init_ref_list_and_get_dpb_task(h, rc[k%2], &sbe[k%bufs], &init); 4.625 + init_ref_list_and_get_dpb_taskArgs init_ref_list_and_get_dpb_task_args; 4.626 + init_ref_list_and_get_dpb_task_args.h = h; 4.627 + init_ref_list_and_get_dpb_task_args.d = rc[k%2]; 4.628 + init_ref_list_and_get_dpb_task_args.sbe = &sbe[k%bufs]; 4.629 + init_ref_list_and_get_dpb_task_args.init = &init; 4.630 + VSs__submit_task(&init_ref_list_and_get_dpb_taskType, &init_ref_list_and_get_dpb_task_args, animSlv); 4.631 smbc = acquire_smbc(h); 4.632 - SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc); 4.633 - release_ref_list_task(h, smbc, rc[k%2], &sbe[k%bufs], lastsmb, &release); 4.634 + SuperMBTask *lastsmb= add_decode_slice_3dwave_tasks(rc[k%2], &sbe[k%bufs], smbc, animSlv); 4.635 + release_ref_list_taskArgs release_ref_list_task_args; 4.636 + release_ref_list_task_args.h = h; 4.637 + release_ref_list_task_args.smbc = smbc; 4.638 + release_ref_list_task_args.d = rc[k%2]; 4.639 + release_ref_list_task_args.sbe = &sbe[k%bufs]; 4.640 + release_ref_list_task_args.lastsmb = lastsmb; 4.641 + release_ref_list_task_args.release = &release; 4.642 + VSs__submit_task(&release_ref_list_taskType, &release_ref_list_task_args, animSlv); 4.643 4.644 - output_task (h, oc, &sbe[k%bufs]); 4.645 + output_taskArgs output_task_args; 4.646 + output_task_args.h = h; 4.647 + output_task_args.oc = oc; 4.648 + output_task_args.sbe = &sbe[k%bufs]; 4.649 + VSs__submit_task(&output_taskType, &output_task_args, animSlv); 4.650 } 4.651 4.652 } else { 4.653 while(!pc->final_frame && frames++ < h->num_frames && !h->quit){ 4.654 - parse_task( h, pc, nc, &sbe[k%bufs] ); 4.655 + int32* taskID; 4.656 + taskID = VSs__create_taskID_of_size(1,animSlv ); 4.657 + taskID[1] = frames*10+1; 4.658 + parse_taskArgs parse_task_args; 4.659 + parse_task_args.h = h; 4.660 + parse_task_args.pc = pc; 4.661 + parse_task_args.nc = nc; 4.662 + parse_task_args.sbe = &sbe[k%bufs]; 4.663 + VSs__submit_task_with_ID(&parse_taskType, &parse_task_args, taskID, animSlv); 4.664 4.665 - decode_slice_entropy_task(h, ec[k%bufs], &sbe[k%bufs]); 4.666 + taskID = VSs__create_taskID_of_size(1,animSlv ); 4.667 + taskID[1] = frames*10+2; 4.668 + decode_slice_entropy_taskArgs decode_slice_entropy_task_args; 4.669 + decode_slice_entropy_task_args.h = h; 4.670 + decode_slice_entropy_task_args.ec = ec[k%bufs]; 4.671 + decode_slice_entropy_task_args.sbe = &sbe[k%bufs]; 4.672 + VSs__submit_task_with_ID(&decode_slice_entropy_taskType, &decode_slice_entropy_task_args,taskID, animSlv); 4.673 4.674 - decode_slice_mb_task(h, rc[0], &sbe[k%bufs]); 4.675 + taskID = VSs__create_taskID_of_size(1,animSlv ); 4.676 + taskID[1] = frames*10+3; 4.677 + decode_slice_mb_taskArgs decode_slice_mb_task_args; 4.678 + decode_slice_mb_task_args.h = h; 4.679 + decode_slice_mb_task_args.d = rc[0]; 4.680 + decode_slice_mb_task_args.sbe = &sbe[k%bufs]; 4.681 + VSs__submit_task_with_ID(&decode_slice_mb_taskType, &decode_slice_mb_task_args,taskID, animSlv); 4.682 4.683 - output_task (h, oc, &sbe[k%bufs]); 4.684 - #pragma omp taskwait on(*pc) 4.685 + taskID = VSs__create_taskID_of_size(1,animSlv ); 4.686 + taskID[1] = frames*10+4; 4.687 + output_taskArgs output_task_args; 4.688 + output_task_args.h = h; 4.689 + output_task_args.oc = oc; 4.690 + output_task_args.sbe = &sbe[k%bufs]; 4.691 + VSs__submit_task_with_ID(&output_taskType, &output_task_args,taskID, animSlv); 4.692 + //#pragma omp taskwait on(*pc) 4.693 + VSs__taskwait_on(animSlv,pc); 4.694 k++; 4.695 } 4.696 } 4.697 - #pragma omp taskwait 4.698 - 4.699 + //#pragma omp taskwait 4.700 + VSs__taskwait(animSlv); 4.701 + 4.702 while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ; 4.703 4.704 print_report(oc->frame_number, oc->video_size, 1, h->verbose); 4.705 @@ -397,5 +776,5 @@ 4.706 } 4.707 #endif 4.708 4.709 - return 0; 4.710 + VSs__end_thread( animSlv ); 4.711 }
