Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
view libavcodec/h264_cell.c @ 9:ea1ba68cf0ed
update to match api changes + add sscc produced source
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 05 Jun 2013 14:43:26 +0200 |
| parents | |
| children |
line source
2 #include "h264_types.h"
3 #include "h264_parser.h"
4 #include "h264_nal.h"
5 #include "h264_entropy.h"
6 #include "h264_rec.h"
7 #include "h264_misc.h"
8 #include "cell/h264_types_spu.h"
9 #include "h264_pthread.h"
11 #include <pthread.h>
12 #include <assert.h>
13 #include <unistd.h>
15 #include <libspe2.h>
16 #include <ppu_intrinsics.h>
17 #include <cbe_mfc.h>
18 #include <libsync.h>
20 // spe global variables
21 unsigned rl_cnt_var, rl_mutex_var, rl_cond_var;
22 atomic_ea_t rl_cnt;
23 cond_ea_t rl_cond;
24 mutex_ea_t rl_lock;
26 H264spe * spe_params;
27 unsigned mutex_var[16];
28 unsigned cond_var[16];
29 unsigned atomic_var[16];
31 pthread_t * spe_tid;
32 spe_context_ptr_t *spe_context;
33 void** spe_control_area;
34 void** spe_ls_area;
35 H264slice **spe_slice_buf;
37 H264spe * spe_ed_params;
38 unsigned mutex_ed_var[16];
39 unsigned cond_ed_var[16];
40 unsigned atomic_ed_var[16];
42 pthread_t * spe_ed_tid;
43 spe_context_ptr_t *spe_ed_context;
44 void** spe_ed_control_area;
45 void** spe_ed_ls_area;
46 EDSlice_spu **spe_ed_slice_buf;
48 //structs to propagate stop signal
49 MBSlice last_slice;
50 EDSlice last_ed_slice;
51 DecodedPicture last_pic;
52 RawFrame last_frm;
54 static int direct_B_resolved(EDSlice *s, int *poc_list, int *poc_cnt){
55 int i;
56 int cnt = *poc_cnt;
57 for(i=0; i<cnt; i++){
58 if (poc_list[i]==s->ref_list[1][0]->poc){
59 *poc_cnt=i+1;
60 while(++i<cnt)
61 poc_list[i]=0;
62 return 1;
63 }
64 }
65 return 0;
66 }
68 static void update_IP_poc_list(int *poc_list, int *poc_cnt, int poc) {
69 int i=0;
70 int cnt = *poc_cnt;
72 while (poc_list[i] > poc) { i++;}
73 if ( i< cnt)
74 memmove(&poc_list[i+1], &poc_list[i], (cnt-i)*sizeof(int));
76 poc_list[i]=poc;
77 (*poc_cnt)++;
78 }
80 static void *spe_ed_thread(void *arg){
81 H264spe *params = (H264spe *)arg;
82 unsigned int idx = params->idx;
83 unsigned int runflags = 0;
84 unsigned int entry = SPE_DEFAULT_ENTRY;
85 // run SPE context
86 spe_context_run(spe_ed_context[idx], &entry, runflags, (void*) params, NULL, NULL);
87 // done - now exit thread
88 pthread_exit(NULL);
89 }
91 static void create_spe_ED_threads(H264Context *h, int ip_threads, int b_threads) {
92 int i;
93 int num_threads = ip_threads+b_threads;
94 spe_program_handle_t * spe_program = spe_image_open("spe_ed");
95 // reserve memory for spe thread id, context and argument addresses
96 spe_ed_tid = av_malloc(num_threads * sizeof (pthread_t));
97 spe_ed_context = av_malloc(num_threads * sizeof (spe_context_ptr_t));
98 spe_ed_params = av_malloc(num_threads * sizeof (H264spe));
99 spe_ed_control_area = av_malloc(num_threads * sizeof (void*));
100 spe_ed_ls_area = av_malloc(num_threads * sizeof (void*));
101 spe_ed_slice_buf = av_malloc(num_threads * sizeof (void*));
103 if (spe_program == NULL)
104 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno));
106 for (i = 0; i < num_threads; i++) {
107 // create context for spe program
108 spe_ed_context[i] = spe_context_create(SPE_MAP_PS, NULL);
109 if (spe_ed_context[i] == NULL)
110 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno));
111 // load SPE program into main memory
112 if ((spe_program_load(spe_ed_context[i], spe_program)) == -1)
113 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno));
114 //get the control_area for fast mailboxing
115 if ((spe_ed_control_area[i] = spe_ps_area_get(spe_ed_context[i], SPE_CONTROL_AREA)) == NULL)
116 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno));
117 //get ls area for inter spe communication
118 if ((spe_ed_ls_area[i] = spe_ls_area_get(spe_ed_context[i])) == NULL)
119 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno));
120 }
122 for (i = 0; i < ip_threads; i++) {
123 spe_ed_params[i].mb_width = h->mb_width;
124 spe_ed_params[i].mb_stride = h->mb_stride;
125 spe_ed_params[i].mb_height = h->mb_height;
126 spe_ed_params[i].type = EDIP;
127 spe_ed_params[i].spe_id = i;
128 spe_ed_params[i].idx = i;
129 //spe_ed_params[i].spe_total = ip_threads; //not used
130 //spe_params[i].slice_params= &slice_params;
131 spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads];
132 spe_ed_params[i].tgt_spe = spe_ed_ls_area[(i+1)%num_threads];
134 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i];
135 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i];
136 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0);
138 mutex_init(spe_ed_params[i].lock);
139 cond_init(spe_ed_params[i].cond);
140 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i]))
141 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
143 //slicebufaddr
144 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]);
145 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
146 }
147 for (int j = 0; j < b_threads; j++) {
148 i = j+ip_threads;
149 spe_ed_params[i].mb_width = h->mb_width;
150 spe_ed_params[i].mb_stride = h->mb_stride;
151 spe_ed_params[i].mb_height = h->mb_height;
152 spe_ed_params[i].type = EDB;
153 spe_ed_params[i].idx = i;
154 spe_ed_params[i].spe_id = j;
155 spe_ed_params[i].spe_total = b_threads;
156 //spe_params[i].slice_params= &slice_params;
157 //spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads];
158 spe_ed_params[i].tgt_spe = spe_ed_ls_area[((j+1)%b_threads) + ip_threads];
160 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i];
161 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i];
162 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0);
164 mutex_init(spe_ed_params[i].lock);
165 cond_init(spe_ed_params[i].cond);
166 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i]))
167 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
169 //slicebufaddr
170 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]);
171 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
172 }
173 spe_image_close(spe_program);
175 }
177 static void fill_EDSlice_spu(EDSlice_spu *dst, EDSlice *src){
178 dst->pps = src->pps;
179 dst->mbs = src->mbs;
180 dst->state = src->state;
181 dst->qp_thresh = src->qp_thresh;
182 dst->pic = *src->current_picture;
184 dst->ref_count[0] = src->ref_count[0];
185 dst->ref_count[1] = src->ref_count[1];
186 dst->slice_type = src->slice_type;
187 dst->slice_type_nos = src->slice_type_nos;
188 dst->direct_8x8_inference_flag = src->direct_8x8_inference_flag;
189 dst->list_count = src->list_count;
190 dst->coded_pic_num = src->coded_pic_num;
192 GetBitContext *gb = &src->gb;
193 align_get_bits( gb);
194 dst->bytestream_start = gb->buffer + get_bits_count(gb)/8;
195 dst->byte_bufsize = (get_bits_left(gb) + 7)/8;
197 dst->transform_bypass = src->transform_bypass;
198 dst->direct_spatial_mv_pred = src->direct_spatial_mv_pred;
199 memcpy(dst->map_col_to_list0, src->map_col_to_list0, 2*16*sizeof(int));
200 memcpy(dst->dist_scale_factor, src->dist_scale_factor, 16*sizeof(int));
201 dst->cabac_init_idc = src->cabac_init_idc;
202 memcpy(dst->ref2frm, src->ref2frm, 2*64*sizeof(int));
203 dst->chroma_qp[0]= src->chroma_qp[0];
204 dst->chroma_qp[1]= src->chroma_qp[1];
205 dst->qscale = src->qscale;
206 dst->last_qscale_diff = src->last_qscale_diff;
208 if (src->slice_type_nos == FF_B_TYPE) dst->list1 = *src->ref_list[1][0];
209 }
211 static void send_slice_to_spe_and_wait(EDSlice_spu *s, int id){
212 unsigned status;
214 spe_mfcio_get(spe_ed_context[id], (unsigned) spe_ed_slice_buf[id], s, sizeof(EDSlice_spu), 14, 0, 0);
215 spe_mfcio_tag_status_read(spe_ed_context[id], 1<<14, SPE_TAG_ALL, &status);
218 _spe_in_mbox_write(spe_ed_control_area[id], 0);
220 while (!spe_out_mbox_status(spe_ed_context[id])){
221 //pthread_yield();
222 usleep(1000);
223 }
224 _spe_out_mbox_read(spe_ed_control_area[id]);
225 }
227 static int decode_slice_entropy_cell(EntropyContext *ec, EDSlice *s, int id){
228 int i,j;
230 if( !s->pps.cabac ){
231 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
232 return -1;
233 }
234 DECLARE_ALIGNED(16, EDSlice_spu, slice);
235 fill_EDSlice_spu(&slice, s);
237 send_slice_to_spe_and_wait(&slice, id);
239 return 0;
240 }
242 static int decode_slice_entropy_cell_seq(H264Context *h, EntropyContext *ec, EDSlice *s){
243 int i,j;
245 if( !s->pps.cabac ){
246 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
247 return -1;
248 }
249 DECLARE_ALIGNED(16, EDSlice_spu, slice);
250 fill_EDSlice_spu(&slice, s);
252 send_slice_to_spe_and_wait(&slice, 0);
254 if (s->release_cnt>0) {
255 for (int i=0; i<s->release_cnt; i++){
256 release_pib_entry(h, s->release_ref[i], 2);
257 }
258 s->release_cnt=0;
259 }
261 release_pib_entry(h, s->current_picture, 1);
262 av_freep(&s->gb.raw);
263 if (s->gb.rbsp)
264 av_freep(&s->gb.rbsp);
266 return 0;
267 }
269 static void *entr_IP_spe_thread(void *arg){
270 EDThreadContext *eip = (EDThreadContext *) arg;
271 H264Context *h = eip->h;
272 // printf("eip %d, pid %d\n", eip->thread_num, syscall(SYS_gettid));
273 for (int i=0; i<SLICE_BUFS; i++){
274 eip->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb));
275 }
277 EntropyContext *ec = get_entropy_context(h);
278 EDSlice *s;
280 for(;;){
281 {
282 pthread_mutex_lock(&eip->ed_lock);
283 while (eip->ed_cnt <= 0)
284 pthread_cond_wait(&eip->ed_cond, &eip->ed_lock);
285 s = &eip->ed_q[eip->ed_fo];
286 eip->ed_fo++; eip->ed_fo %= MAX_SLICE_COUNT;
287 pthread_mutex_unlock(&eip->ed_lock);
288 }
290 if (s->state<0)
291 break;
292 {
293 pthread_mutex_lock(&eip->mbs_lock);
294 while (eip->mbs_cnt <= 0)
295 pthread_cond_wait(&eip->mbs_cond, &eip->mbs_lock);
297 s->mbs = eip->mbs[eip->mbs_fo];
298 s->ed = eip;
299 eip->mbs_cnt--;
300 eip->mbs_fo++; eip->mbs_fo%=SLICE_BUFS;
301 pthread_mutex_unlock(&eip->mbs_lock);
302 }
303 if (eip->cell){
304 decode_slice_entropy_cell(ec, s, eip->thread_num);
305 }else{
306 decode_slice_entropy(ec, s);
307 }
309 // {
310 // pthread_mutex_lock(&h->lock[ENTROPY2]);
311 // h->ed_poc[h->ed_poc_fi++ % MAX_SLICE_COUNT] = s->current_picture->poc;
312 // while (h->ed_poc_fi > h->ed_poc_fo + MAX_SLICE_COUNT)
313 // h->ed_poc_fo++;
314 //
315 // pthread_cond_signal(&h->cond[ENTROPY2]);
316 // pthread_mutex_unlock(&h->lock[ENTROPY2]);
317 // }
319 {
320 pthread_mutex_lock(&h->lock[ENTROPY4]);
321 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
322 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
323 h->ed_reorder_q[h->ed_reorder_fi] = *s;
324 h->ed_reorder_cnt++;
325 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
326 pthread_cond_signal(&h->cond[ENTROPY4]);
327 pthread_mutex_unlock(&h->lock[ENTROPY4]);
328 }
330 {
331 pthread_mutex_lock(&eip->ed_lock);
332 eip->ed_cnt--;
333 pthread_cond_signal(&eip->ed_cond);
334 pthread_mutex_unlock(&eip->ed_lock);
335 }
336 }
338 free_entropy_context(ec);
340 pthread_exit(NULL);
341 return NULL;
342 }
344 static void *entr_B_spe_thread(void *arg){
345 EDThreadContext *eb = (EDThreadContext *) arg;
346 H264Context *h = eb->h;
347 // printf("eb %d, pid %d\n", eb->thread_num, syscall(SYS_gettid));
348 for (int i=0; i<SLICE_BUFS; i++){
349 eb->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb));
350 }
352 EntropyContext *ec = get_entropy_context(h);
353 EDSlice *s;
355 for(;;){
356 {
357 pthread_mutex_lock(&eb->ed_lock);
358 while (eb->ed_cnt <= 0)
359 pthread_cond_wait(&eb->ed_cond, &eb->ed_lock);
360 s = &eb->ed_q[eb->ed_fo];
361 eb->ed_fo++; eb->ed_fo %= MAX_SLICE_COUNT;
362 pthread_mutex_unlock(&eb->ed_lock);
363 }
365 if (s->state<0)
366 break;
367 {
368 pthread_mutex_lock(&eb->mbs_lock);
369 while (eb->mbs_cnt <= 0)
370 pthread_cond_wait(&eb->mbs_cond, &eb->mbs_lock);
371 s->mbs = eb->mbs[eb->mbs_fo];
372 s->ed = eb;
373 eb->mbs_cnt--;
374 eb->mbs_fo++; eb->mbs_fo%=SLICE_BUFS;
375 pthread_mutex_unlock(&eb->mbs_lock);
376 }
377 //decode_B_slice_entropy(&hcabac, &cabac, s, eb, eb->prev_ed);
378 decode_slice_entropy_cell(ec, s, eb->thread_num + h->edip_threads);
380 {
381 pthread_mutex_lock(&h->lock[ENTROPY4]);
382 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
383 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
384 h->ed_reorder_q[h->ed_reorder_fi] = *s;
385 h->ed_reorder_cnt++;
386 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
387 pthread_cond_signal(&h->cond[ENTROPY4]);
388 pthread_mutex_unlock(&h->lock[ENTROPY4]);
390 }
392 {
393 pthread_mutex_lock(&eb->ed_lock);
394 eb->ed_cnt--;
395 pthread_cond_signal(&eb->ed_cond);
396 pthread_mutex_unlock(&eb->ed_lock);
397 }
398 }
399 eb->lines_cnt++;
401 free_entropy_context(ec);
403 pthread_exit(NULL);
404 return NULL;
405 }
407 static void *entr_B_distribute(void *arg){
408 H264Context *h = (H264Context *) arg;
409 EDSlice *s;
411 int i, n=0, poc;
413 // printf("eb dist, pid %d\n", syscall(SYS_gettid));
415 for(i=0; i<h->edb_threads; i++){
416 h->b[i].h =h;
417 h->b[i].thread_num =i;
418 h->b[i].thread_total =h->edb_threads;
419 pthread_mutex_init(&h->b[i].mbs_lock, NULL);
420 pthread_cond_init(&h->b[i].mbs_cond, NULL);
421 h->b[i].mbs_fo = 0;
422 h->b[i].mbs_cnt = SLICE_BUFS;
423 h->b[i].ed_fi =0;
424 h->b[i].ed_fo =0;
425 h->b[i].ed_cnt =0;
426 h->b[i].lines_cnt =0;
427 h->b[i].prev_ed = &h->b[(i-1 +h->edb_threads) % h->edb_threads];
428 pthread_mutex_init(&h->b[i].ed_lock, NULL);
429 pthread_cond_init(&h->b[i].ed_cond, NULL);
430 pthread_create(&h->ed_B_thr[i], NULL, entr_B_spe_thread, &h->b[i]);
431 }
433 for(;;){
434 {
435 pthread_mutex_lock(&h->lock[ENTROPY3B]);
436 while (h->ed_B_cnt<=0)
437 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
438 s= &h->ed_B_q[h->ed_B_fo];
439 h->ed_B_fo++; h->ed_B_fo %= MAX_SLICE_COUNT;
440 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
442 }
443 if (s->state<0)
444 break;
446 if (s->ref_list[1][0]->slice_type_nos != FF_B_TYPE){
447 while (poc < s->ref_list[1][0]->poc){
448 pthread_mutex_lock(&h->lock[ENTROPY2]);
449 while (poc == h->ed_poc)
450 pthread_cond_wait(&h->cond[ENTROPY2], &h->lock[ENTROPY2]);
451 poc = h->ed_poc;
452 pthread_mutex_unlock(&h->lock[ENTROPY2]);
453 }
454 }
455 {
456 pthread_mutex_lock(&h->b[n].ed_lock);
457 while (h->b[n].ed_cnt >= MAX_SLICE_COUNT)
458 pthread_cond_wait(&h->b[n].ed_cond, &h->b[n].ed_lock);
459 h->b[n].ed_q[ h->b[n].ed_fi] = *s;
460 h->b[n].ed_cnt++;
461 h->b[n].ed_fi++; h->b[n].ed_fi %= MAX_SLICE_COUNT;
462 pthread_cond_signal(&h->b[n].ed_cond);
463 pthread_mutex_unlock(&h->b[n].ed_lock);
465 n++; n%=h->edb_threads;
466 }
467 {
468 pthread_mutex_lock(&h->lock[ENTROPY3B]);
469 h->ed_B_cnt--;
470 pthread_cond_signal(&h->cond[ENTROPY3B]);
471 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
473 }
475 }
477 for (i=0; i<h->edb_threads; i++){
478 pthread_mutex_lock(&h->b[i].ed_lock);
479 while (h->b[i].ed_cnt >= MAX_SLICE_COUNT)
480 pthread_cond_wait(&h->b[i].ed_cond, &h->b[i].ed_lock);
481 h->b[i].ed_q[ h->b[i].ed_fi] = *s;
482 h->b[i].ed_cnt++;
483 h->b[i].ed_fi++; h->b[i].ed_fi %= MAX_SLICE_COUNT;
484 pthread_cond_signal(&h->b[i].ed_cond);
485 pthread_mutex_unlock(&h->b[i].ed_lock);
487 }
488 for(int i=0; i<h->edb_threads; i++){
489 pthread_join(h->ed_B_thr[i], NULL);
490 }
491 pthread_exit(NULL);
492 return NULL;
493 }
496 static void *entr_IPB_distribute(void *arg){
497 H264Context *h = (H264Context *) arg;
498 EDSlice *s;
499 int i,n=0;
501 create_spe_ED_threads(h, h->edip_threads, h->edb_threads);
502 pthread_create(&h->ed_B_dist, NULL, entr_B_distribute, h);
503 for(i=0; i<h->edip_threads + h->edip_ppe_threads; i++){
504 h->ip[i].h =h;
505 h->ip[i].cell = (i >= h->edip_ppe_threads);
506 pthread_mutex_init(&h->ip[i].mbs_lock, NULL);
507 pthread_cond_init(&h->ip[i].mbs_cond, NULL);
508 h->ip[i].thread_num = i - h->edip_ppe_threads;
509 h->ip[i].thread_total=h->edip_threads+ h->edip_ppe_threads;
510 h->ip[i].mbs_fo = 0;
511 h->ip[i].mbs_cnt = SLICE_BUFS;
512 h->ip[i].ed_fi =0;
513 h->ip[i].ed_fo =0;
514 pthread_mutex_init(&h->ip[i].ed_lock, NULL);
515 pthread_cond_init(&h->ip[i].ed_cond, NULL);
516 pthread_create(&h->ed_IP_thr[i], NULL, entr_IP_spe_thread, &h->ip[i]);
517 }
519 for(;;){
520 {
521 pthread_mutex_lock(&h->lock[ENTROPY]);
522 while (h->ed_cnt<=0)
523 pthread_cond_wait(&h->cond[ENTROPY], &h->lock[ENTROPY]);
524 s= &h->ed_q[h->ed_fo];
526 pthread_mutex_unlock(&h->lock[ENTROPY]);
527 h->ed_fo++; h->ed_fo %= MAX_SLICE_COUNT;
528 }
529 if (s->state<0)
530 break;
532 assert(s->current_picture);
533 if (s->slice_type_nos == FF_B_TYPE )
534 {
535 pthread_mutex_lock(&h->lock[ENTROPY3B]);
536 while (h->ed_B_cnt>=MAX_SLICE_COUNT)
537 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
538 h->ed_B_q[h->ed_B_fi] = *s;
539 h->ed_B_cnt++;
540 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT;
541 pthread_cond_signal(&h->cond[ENTROPY3B]);
542 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
543 }else
544 {
545 ///round robin now, change to based on rawframes size.
546 pthread_mutex_lock(&h->ip[n].ed_lock);
547 while (h->ip[n].ed_cnt >= MAX_SLICE_COUNT)
548 pthread_cond_wait(&h->ip[n].ed_cond, &h->ip[n].ed_lock);
549 h->ip[n].ed_q[ h->ip[n].ed_fi] = *s;
550 h->ip[n].ed_cnt++;
551 h->ip[n].ed_fi++; h->ip[n].ed_fi %= MAX_SLICE_COUNT;
552 pthread_cond_signal(&h->ip[n].ed_cond);
553 pthread_mutex_unlock(&h->ip[n].ed_lock);
555 n++; n %=(h->edip_threads+h->edip_ppe_threads);
556 }
557 {
558 pthread_mutex_lock(&h->lock[ENTROPY]);
559 h->ed_cnt--;
560 pthread_cond_signal(&h->cond[ENTROPY]);
561 pthread_mutex_unlock(&h->lock[ENTROPY]);
563 }
564 }
566 {
567 pthread_mutex_lock(&h->lock[ENTROPY3B]);
568 while (h->ed_B_cnt>=MAX_SLICE_COUNT)
569 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
570 h->ed_B_q[h->ed_B_fi] = *s;
571 h->ed_B_cnt++;
572 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT;
573 pthread_cond_signal(&h->cond[ENTROPY3B]);
574 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
575 }
576 {
577 for (i=0; i<h->edip_threads + h->edip_ppe_threads; i++){
578 pthread_mutex_lock(&h->ip[i].ed_lock);
579 while (h->ip[i].ed_cnt >= MAX_SLICE_COUNT)
580 pthread_cond_wait(&h->ip[i].ed_cond, &h->ip[i].ed_lock);
581 h->ip[i].ed_q[ h->ip[i].ed_fi] = *s;
582 h->ip[i].ed_cnt++;
583 h->ip[i].ed_fi++; h->ip[i].ed_fi %= MAX_SLICE_COUNT;
584 pthread_cond_signal(&h->ip[i].ed_cond);
585 pthread_mutex_unlock(&h->ip[i].ed_lock);
586 }
587 }
588 {
589 pthread_mutex_lock(&h->lock[ENTROPY4]);
590 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
591 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
592 h->ed_reorder_q[h->ed_reorder_fi] = *s;
593 h->ed_reorder_cnt++;
594 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
595 pthread_cond_signal(&h->cond[ENTROPY4]);
596 pthread_mutex_unlock(&h->lock[ENTROPY4]);
598 }
599 pthread_join(h->ed_B_dist, NULL);
600 for(i=0; i<h->edip_threads; i++){
601 pthread_join(h->ed_IP_thr[i], NULL);
602 }
603 pthread_exit(NULL);
604 return NULL;
605 }
607 static pthread_t ed_IPB_dist;
608 static void *entropy_IPB_cell_thread(void *arg){
609 H264Context *h = (H264Context *) arg;
610 int i;
611 EDSlice reorder[MAX_SLICE_COUNT];
612 int ip_poc[MAX_SLICE_COUNT][2]={0,};
613 int next_ip_id=0;
614 int ip_poc_cnt=0;
615 EDSlice *s;
616 int reorder_cnt=0;
617 unsigned next_pic_num=0;
619 pthread_create(&ed_IPB_dist, NULL, entr_IPB_distribute, h);
620 int count =0;
621 for(;;){
622 //signals received from the entropy decoders
623 {
624 pthread_mutex_lock(&h->lock[ENTROPY4]);
625 while (h->ed_reorder_cnt<=0)
626 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
627 s= &h->ed_reorder_q[h->ed_reorder_fo];
628 h->ed_reorder_fo++; h->ed_reorder_fo %=MAX_SLICE_COUNT;
629 pthread_mutex_unlock(&h->lock[ENTROPY4]);
630 }
632 if (s->state >=0 && s->slice_type_nos != FF_B_TYPE){
633 for (i=0; i<ip_poc_cnt; i++){
634 if (s->ip_id < ip_poc[i][0]){
635 memmove(ip_poc[i+1], ip_poc[i], 2*(ip_poc_cnt-i)*sizeof(int));
636 break;
637 }
638 }
639 ip_poc[i][0]= s->ip_id;
640 ip_poc[i][1]= s->current_picture->poc;
641 ip_poc_cnt++;
643 while (next_ip_id == ip_poc[0][0]){
644 pthread_mutex_lock(&h->lock[ENTROPY2]);
645 h->ed_poc = ip_poc[0][1];
647 pthread_cond_signal(&h->cond[ENTROPY2]);
648 pthread_mutex_unlock(&h->lock[ENTROPY2]);
649 memmove(ip_poc[0], ip_poc[1], 2*(ip_poc_cnt-1)*sizeof(int));
650 ip_poc_cnt--;
651 next_ip_id++;
652 }
653 }
655 for(i=reorder_cnt; i>0; i--){
656 if (s->coded_pic_num < reorder[i-1].coded_pic_num)
657 break;
658 reorder[i]=reorder[i-1];
659 }
660 reorder[i]=*s;
662 while(reorder_cnt>=0){
663 if (next_pic_num!=reorder[reorder_cnt].coded_pic_num){
664 break;
665 }
666 EDSlice *es = &reorder[reorder_cnt];
668 {
669 pthread_mutex_lock(&h->lock[MBDEC]);
670 while (h->mbdec_cnt >= MAX_SLICE_COUNT)
671 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]);
672 copyEDtoMBSlice(&h->mbdec_q[h->mbdec_fi], es);
674 h->mbdec_cnt++;
675 h->mbdec_fi++; h->mbdec_fi %= MAX_SLICE_COUNT;
676 pthread_cond_signal(&h->cond[MBDEC]);
677 pthread_mutex_unlock(&h->lock[MBDEC]);
679 }
681 if (es->state<0)
682 goto end;
684 assert(es->current_picture);
685 for (int i=0; i<es->release_cnt; i++){
686 release_pib_entry(h, es->release_ref[i], 2);
687 }
688 release_pib_entry(h, es->current_picture, 1);
689 av_freep(&es->gb.raw);
690 if (es->gb.rbsp)
691 av_freep(&es->gb.rbsp);
693 next_pic_num++;
694 reorder_cnt--;
695 }
696 reorder_cnt++;
698 {
699 pthread_mutex_lock(&h->lock[ENTROPY4]);
700 h->ed_reorder_cnt--;
701 pthread_cond_signal(&h->cond[ENTROPY4]);
702 pthread_mutex_unlock(&h->lock[ENTROPY4]);
703 }
704 }
706 end:
707 pthread_join(ed_IPB_dist, NULL);
708 pthread_exit(NULL);
709 return NULL;
710 }
713 static void fill_spe_slice(H264slice *dst, const MBSlice *src, H264Context *h){
714 dst->deblocking_filter =1;
715 dst->linesize = src->current_picture->linesize[0];
716 dst->uvlinesize = src->current_picture->linesize[1];
717 dst->mb_width = h->mb_width;
718 dst->mb_height = h->mb_height;
719 dst->use_weight = src->use_weight;
720 dst->use_weight_chroma = src->use_weight_chroma;
721 dst->luma_log2_weight_denom = src->luma_log2_weight_denom;
722 dst->chroma_log2_weight_denom = src->chroma_log2_weight_denom;
724 //weights later
725 memcpy(dst->luma_weight, src->luma_weight, 16*2*2*sizeof(int16_t));
726 memcpy(dst->chroma_weight, src->chroma_weight, 16*2*2*2*sizeof(int16_t));
727 memcpy(dst->implicit_weight, src->implicit_weight, 16*16*2*sizeof(int16_t));
729 for(int list=0; list<2; list++){
730 for (int i=0; i<src->ref_count[list]; i++){
731 Picture_spu *p_dst = &dst->ref_list[list][i];
732 DecodedPicture *p_src = src->ref_list[list][i];
733 if (p_src){
734 p_dst->data[0] = p_src->data[0];
735 p_dst->data[1] = p_src->data[1];
736 p_dst->data[2] = p_src->data[2];
737 }
738 }
739 }
740 dst->state = src->state;
742 dst->emu_edge_width =32;
743 dst->emu_edge_height =32;
744 dst->slice_type = src->slice_type;
745 dst->slice_type_nos = src->slice_type_nos;
746 dst->slice_alpha_c0_offset = src->slice_alpha_c0_offset;
747 dst->slice_beta_offset = src->slice_beta_offset;
749 memcpy(dst->chroma_qp_table, src->pps.chroma_qp_table, 2*64);
751 dst->blocks = src->mbs;
752 dst->dst_y = src->current_picture->data[0];
753 dst->dst_cb = src->current_picture->data[1];
754 dst->dst_cr = src->current_picture->data[2];
755 }
757 static void decode_slice_mb_seq_cell(H264Context *h, MBRecContext *d, MBSlice *s, DecodedPicture *tmp){
758 static int rl_fi=0;
760 DECLARE_ALIGNED(16, H264slice, spe_slice);
761 H264spe *p=&spe_params[0];
762 unsigned status;
763 uint8_t *dst_y, *dst_cb, *dst_cr;
765 DecodedPicture *dp;
767 for (int i=0; i<2; i++){
768 for(int j=0; j< s->ref_count[i]; j++){
769 if (s->ref_list_cpn[i][j] ==-1)
770 continue;
771 int k;
772 for (k=0; k<DPB_SIZE; k++){
773 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
774 s->ref_list[i][j] = &h->dpb[k];
775 break;
776 }
777 }
778 }
779 }
781 dp = get_dpb_entry(h);
782 init_dpb_entry(dp, s, d->width, d->height);
784 if (h->no_mbd)
785 return;
788 fill_spe_slice(&spe_slice, s, h);
789 spe_mfcio_get(spe_context[0], (unsigned) (spe_slice_buf[0] + rl_fi), &spe_slice, sizeof(H264slice), 15, 0, 0);
790 spe_mfcio_tag_status_read(spe_context[0], 1<<15, SPE_TAG_ALL, &status);
791 rl_fi++; rl_fi %= 2;
793 _spe_in_mbox_write(spe_control_area[0], 0);
794 while (atomic_read(rl_cnt)<=0){
795 //pthread_yield();
796 usleep(1000);
797 }
798 atomic_dec(rl_cnt);
801 /** This is error free, no visual artifacts, however, md5sum fails.... (WTF) **/
802 // memcpy(tmp->data[0], s->current_picture->data[0], tmp->linesize[0]*h->mb_height*16);
803 // memcpy(tmp->data[1], s->current_picture->data[1], tmp->linesize[1]*h->mb_height*8);
804 // memcpy(tmp->data[2], s->current_picture->data[2], tmp->linesize[1]*h->mb_height*8);
805 //
806 // memset(s->current_picture->data[0], 0, tmp->linesize[0]*h->mb_height*16);
807 // memset(s->current_picture->data[1], 0, tmp->linesize[1]*h->mb_height*8);
808 // memset(s->current_picture->data[2], 0, tmp->linesize[1]*h->mb_height*8);
809 //
810 // decode_slice_mb_seq(d, s);
811 //
812 // for (int i=0; i<h->mb_height*16; i++){
813 // for (int j=0; j<h->width; j++){
814 // if (tmp->data[0][j + i*tmp->linesize[0]] != s->current_picture->data[0][j + i*tmp->linesize[0]]){
815 // printf("%d, %d, %d, %d\n", j, i, tmp->data[0][j + i*tmp->linesize[0]], s->current_picture->data[0][j + i*tmp->linesize[0]]);
816 // return;
817 // }
818 // }
819 // }
820 //
821 // for (int i=0; i<h->mb_height*8; i++){
822 // for (int j=0; j<h->width/2; j++){
823 // if (tmp->data[1][j + i*tmp->linesize[1]] != s->current_picture->data[1][j + i*tmp->linesize[1]]){
824 // printf("%d, %d, %d, %d\n", j, i, tmp->data[1][j + i*tmp->linesize[1]], s->current_picture->data[1][j + i*tmp->linesize[1]]);
825 // return;
826 // }
827 // }
828 // }
829 //
830 // for (int i=0; i<h->mb_height*8; i++){
831 // for (int j=0; j<h->width/2; j++){
832 // if (tmp->data[2][j + i*tmp->linesize[1]] != s->current_picture->data[2][j + i*tmp->linesize[1]]){
833 // printf("%d, %d, %d, %d\n", j, i, tmp->data[2][j + i*tmp->linesize[1]], s->current_picture->data[2][j + i*tmp->linesize[1]]);
834 // return;
835 // }
836 // }
837 // }
840 //printf("dst_y %p\n", dst_y);
843 for (int i=0; i<s->release_cnt; i++){
844 for(int j=0; j<DPB_SIZE; j++){
845 if(h->dpb[j].cpn== s->release_ref_cpn[i]){
846 release_dpb_entry(h, &h->dpb[j], 2);
847 break;
848 }
849 }
850 }
851 s->release_cnt=0;
853 }
855 static void *h264_spe_thread(void * thread_args ) {
856 H264spe *params = (H264spe *)thread_args;
857 unsigned int spe_id = params->spe_id;
858 unsigned int runflags = 0;
859 unsigned int entry = SPE_DEFAULT_ENTRY;
860 // run SPE context
861 spe_context_run(spe_context[spe_id], &entry, runflags, (void*) params, NULL, NULL);
862 // done - now exit thread
863 pthread_exit(NULL);
864 }
866 static int create_spe_MBR_threads(H264Context *h, int num_threads) {
867 int i;
869 // reserve memory for spe thread id, context and argument addresses
870 spe_tid = av_malloc(num_threads * sizeof (pthread_t));
871 spe_context = av_malloc(num_threads * sizeof (spe_context_ptr_t));
872 spe_params = av_malloc(num_threads * sizeof (H264spe));
873 spe_control_area = av_malloc(num_threads * sizeof (void*));
874 spe_ls_area = av_malloc(num_threads * sizeof (void*));
875 spe_slice_buf = av_malloc(num_threads * sizeof (void*));
877 spe_program_handle_t *spe_program = spe_image_open("spe_mbd");
879 if (spe_program == NULL)
880 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno));
882 for (i = 0; i < num_threads; i++) {
883 // create context for spe program
884 spe_context[i] = spe_context_create(SPE_MAP_PS, NULL);
885 if (spe_context[i] == NULL)
886 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno));
887 // load SPE program into main memory
888 if ((spe_program_load(spe_context[i], spe_program)) == -1)
889 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno));
890 //get the control_area for fast mailboxing
891 if ((spe_control_area[i] = spe_ps_area_get(spe_context[i], SPE_CONTROL_AREA)) == NULL)
892 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno));
893 //get ls area for inter spe communication
894 if ((spe_ls_area[i] = spe_ls_area_get(spe_context[i])) == NULL)
895 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno));
896 }
898 for (i = 0; i < num_threads; i++) {
899 spe_params[i].mb_width = h->mb_width;
900 spe_params[i].mb_height = h->mb_height;
901 spe_params[i].mb_stride = h->mb_stride;
902 spe_params[i].spe_id = i;
903 spe_params[i].spe_total = num_threads;
904 //spe_params[i].slice_params= &slice_params;
905 spe_params[i].src_spe = spe_ls_area[(i-1+num_threads)%num_threads];
906 spe_params[i].tgt_spe = spe_ls_area[(i+1)%num_threads];
908 spe_params[i].rl_lock = rl_lock;
909 spe_params[i].rl_cond = rl_cond;
910 spe_params[i].rl_cnt = rl_cnt;
911 spe_params[i].lock = (mutex_ea_t) (unsigned) &mutex_var[i];
912 spe_params[i].cond = (cond_ea_t) (unsigned) &cond_var[i];
913 spe_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_var[i]; atomic_set(spe_params[i].cnt, 0);
915 mutex_init(spe_params[i].lock);
916 cond_init(spe_params[i].cond);
917 if (pthread_create(&spe_tid[i], NULL, h264_spe_thread, (void *) &spe_params[i]))
918 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
920 //slicebufaddr
921 spe_slice_buf[i] = (H264slice *) _spe_out_mbox_read(spe_control_area[i]);
923 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
924 }
925 spe_image_close(spe_program);
926 return 0;
927 }
929 //_spe_out_mbox_read(spe_control_area[i]);
930 /**
931 * joins all the spe worker threads.
932 */
933 static void join_spe_worker_threads(H264slice *s, int num_threads, int *rl_fi) {
934 int i;
935 ///just to keep coding consistency.
936 {
937 for (i=0; i<num_threads; i++){
938 H264spe *p=&spe_params[i];
939 unsigned status;
941 while (atomic_read(p->cnt)>=2) {//double buffered
942 usleep(1000);//cond_wait(p->cond, p->lock);
943 }
945 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), s, sizeof(H264slice), 15, 0, 0);
946 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status);
947 //mutex_unlock(p->lock);
948 _spe_in_mbox_write(spe_control_area[i], 0);
949 }
950 }
952 for (i=0; i<num_threads; i++){
953 pthread_join(spe_tid[i], NULL);
954 }
956 for (i=0; i<num_threads; i++){
957 spe_context_destroy(spe_context[i]);
958 }
959 atomic_inc(rl_cnt);
961 // destroy memory reserved for spe thread id, context and argument addresses
962 av_freep(&spe_tid);
963 av_freep(&spe_context);
964 av_freep(&spe_params);
965 av_freep(&spe_control_area);
966 av_freep(&spe_slice_buf);
967 }
970 static void *rl_dist_thread(void *arg){
971 int i;
972 H264Context *h = (H264Context *) arg;
973 MBSlice *s;
974 DecodedPicture *dp;
975 int rl_fi[16]={0,};
976 DECLARE_ALIGNED(16, H264slice, spe_slice);
978 create_spe_MBR_threads(h, h->rl_threads);
979 for(;;){
980 {
981 pthread_mutex_lock(&h->lock[MBDEC]);
982 while (h->mbdec_cnt<=0)
983 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]);
984 s= &h->mbdec_q[h->mbdec_fo];
985 h->mbdec_fo++; h->mbdec_fo %= MAX_SLICE_COUNT;
986 pthread_mutex_unlock(&h->lock[MBDEC]);
987 }
989 if (s->state<0){
990 break;
991 }
992 for (int i=0; i<2; i++){
993 for(int j=0; j< s->ref_count[i]; j++){
994 if (s->ref_list_cpn[i][j] ==-1)
995 continue;
996 int k;
997 for (k=0; k<DPB_SIZE; k++){
998 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
999 s->ref_list[i][j] = &h->dpb[k];
1000 break;
1001 }
1002 }
1004 }
1005 }
1006 dp = get_dpb_entry(h);
1007 init_dpb_entry(dp, s, h->width, h->height);
1008 assert(s->current_picture);
1009 {
1010 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){
1011 usleep(1000);
1012 }
1013 h->mbrel_q[h->mbrel_fi] = *s;
1015 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT;
1016 }
1017 {
1018 if(h->no_mbd){
1019 atomic_inc(rl_cnt);
1020 }else {
1021 fill_spe_slice(&spe_slice, s, h);
1022 for (i=0; i<h->rl_threads; i++){
1023 H264spe *p=&spe_params[i];
1024 unsigned status;
1025 while (atomic_read(p->cnt)>=2){ //double buffered
1026 usleep(1000);
1027 //cond_wait(p->cond, p->lock);
1028 }
1029 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), &spe_slice, sizeof(H264slice), 15, 0, 0);
1030 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status);
1031 rl_fi[i]++; rl_fi[i] %= 2;
1032 atomic_inc(p->cnt);
1034 _spe_in_mbox_write(spe_control_area[i], 0);
1035 }
1036 }
1037 }
1039 {
1040 pthread_mutex_lock(&h->lock[MBDEC]);
1041 h->mbdec_cnt--;
1042 pthread_cond_signal(&h->cond[MBDEC]);
1043 pthread_mutex_unlock(&h->lock[MBDEC]);
1044 }
1046 }
1048 {
1049 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){
1050 usleep(1000);
1051 }
1052 h->mbrel_q[h->mbrel_fi] = *s;
1054 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT;
1055 }
1056 spe_slice.state=-1;
1057 join_spe_worker_threads(&spe_slice, h->rl_threads, rl_fi);
1058 pthread_exit(NULL);
1059 return NULL;
1060 }
1062 static void *mbdec_cell_thread(void *arg){
1063 H264Context *h = (H264Context *) arg;
1065 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var;
1066 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var;
1067 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var;
1068 atomic_set(rl_cnt, 0);
1069 mutex_init(rl_lock);
1070 cond_init(rl_cond);
1071 // printf("mbdec, pid %d\n", syscall(SYS_gettid));
1072 pthread_create(&h->rl_dist_thr, NULL, rl_dist_thread, h);
1074 for(;;){
1075 MBSlice *s=NULL;
1076 {
1077 while (atomic_read(rl_cnt)<=0){
1078 usleep(1000);
1079 }
1080 s= &h->mbrel_q[h->mbrel_fo];
1081 h->mbrel_fo++; h->mbrel_fo %= MAX_SLICE_COUNT;
1082 }
1084 if (s->state<0)
1085 break;
1087 for (int i=0; i<s->release_cnt; i++){
1088 for(int j=0; j<DPB_SIZE; j++){
1089 if(h->dpb[j].cpn== s->release_ref_cpn[i]){
1090 release_dpb_entry(h, &h->dpb[j], 2);
1091 break;
1092 }
1093 }
1094 }
1096 {
1097 EDThreadContext *ed = s->ed;
1098 pthread_mutex_lock(&ed->mbs_lock);
1099 ed->mbs_cnt++;
1100 pthread_cond_signal(&ed->mbs_cond);
1101 pthread_mutex_unlock(&ed->mbs_lock);
1102 }
1104 {
1105 pthread_mutex_lock(&h->lock[WRITE]);
1106 while (h->write_cnt>= DPB_SIZE)
1107 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]);
1108 assert(s);
1109 assert(s->current_picture);
1110 h->write_q[h->write_fi]= s->current_picture;
1111 h->write_cnt++;
1112 h->write_fi++; h->write_fi %= DPB_SIZE;
1113 pthread_cond_signal(&h->cond[WRITE]);
1114 pthread_mutex_unlock(&h->lock[WRITE]);
1116 }
1117 {
1118 atomic_dec(rl_cnt);
1119 }
1121 }
1123 {//propagate exit
1124 pthread_mutex_lock(&h->lock[WRITE]);
1125 while (h->write_cnt>= DPB_SIZE)
1126 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]);
1127 last_pic.reference = -1;
1128 h->write_q[h->write_fi] = &last_pic;
1129 h->write_cnt++;
1130 h->write_fi++; h->write_fi %= DPB_SIZE;
1131 pthread_cond_signal(&h->cond[WRITE]);
1132 pthread_mutex_unlock(&h->lock[WRITE]);
1134 }
1135 pthread_join(h->rl_dist_thr, NULL);
1136 pthread_exit(NULL);
1137 return NULL;
1138 }
1140 /*
1141 * The following code is the main loop of the file converter
1142 */
1143 int h264_decode_cell(H264Context *h) {
1145 pthread_t read_thr, parsenal_thr, entropy_thr, mbdec_thr, write_thr;
1147 start_timer();
1149 pthread_create(&read_thr, NULL, read_thread, h);
1150 pthread_create(&parsenal_thr, NULL, parsenal_thread, h);
1151 pthread_create(&entropy_thr, NULL, entropy_IPB_cell_thread, h);
1152 pthread_create(&mbdec_thr, NULL, mbdec_cell_thread, h);
1153 pthread_create(&write_thr, NULL, write_thread, h);
1155 pthread_join(read_thr, NULL);
1156 pthread_join(parsenal_thr, NULL);
1157 pthread_join(entropy_thr, NULL);
1158 pthread_join(mbdec_thr, NULL);
1159 pthread_join(write_thr, NULL);
1161 return 0;
1162 }
1164 /*
1165 * The following code is the main loop of the file converter
1166 */
1167 int h264_decode_cell_seq(H264Context *h) {
1168 ParserContext *pc;
1169 NalContext *nc;
1170 EntropyContext *ec;
1171 MBRecContext *rc;
1172 OutputContext *oc;
1174 RawFrame frm;
1175 EDSlice slice, *s=&slice;
1176 MBSlice mbslice, *s2=&mbslice;
1177 PictureInfo *pic=NULL;
1178 DecodedPicture *out;
1179 int size;
1180 int frames=0;
1182 pc = get_parse_context(h->ifile);
1183 nc = get_nal_context(h->width, h->height);
1184 ec = get_entropy_context( h );
1185 rc = get_mbrec_context(h);
1186 oc = get_output_context( h );
1188 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var;
1189 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var;
1190 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var;
1191 atomic_set(rl_cnt, 0);
1192 mutex_init(rl_lock);
1193 cond_init(rl_cond);
1195 memset(s, 0, sizeof(EDSlice));
1196 ff_init_slice(nc, s);
1197 s->mbs = av_malloc( h->mb_height * h->mb_width * sizeof(H264Mb));
1199 DecodedPicture tmp;
1200 tmp.base[0]=0;
1201 ///fix this when want to debug the Cell errors
1202 //init_dpb_entry(&tmp, h->width, h->height);
1204 create_spe_ED_threads(h, 1, 0);
1205 create_spe_MBR_threads(h, 1);
1207 start_timer();
1209 while(!pc->final_frame && frames++ < h->num_frames){
1211 av_read_frame_internal(pc, &frm);
1213 PictureInfo *pic=get_pib_entry(h);
1214 ff_alloc_picture_info(nc, s, pic);
1215 decode_nal_units(nc, s, &frm);
1217 copyEDtoMBSlice(s2, s);
1218 decode_slice_entropy_cell_seq(h, ec, s);
1220 decode_slice_mb_seq_cell(h, rc, s2, &tmp);
1222 out =output_frame(h, oc, s2->current_picture, h->ofile, h->frame_width, h->frame_height);
1224 if (out){
1225 release_dpb_entry(h, out, 1);
1226 }
1227 print_report(oc->frame_number, oc->video_size, 0, h->verbose);
1228 }
1229 while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ;
1231 print_report(oc->frame_number, oc->video_size, 1, h->verbose);
1233 /* finished ! */
1234 av_freep(&s->mbs);
1236 free_parse_context(pc);
1237 free_nal_context (nc);
1238 free_entropy_context(ec);
1239 free_mbrec_context(rc);
1240 free_output_context(oc);
1241 return 0;
1242 }
