nengel@2: nengel@2: #include "h264_types.h" nengel@2: #include "h264_parser.h" nengel@2: #include "h264_nal.h" nengel@2: #include "h264_entropy.h" nengel@2: #include "h264_rec.h" nengel@2: #include "h264_misc.h" nengel@2: #include "cell/h264_types_spu.h" nengel@2: #include "h264_pthread.h" nengel@2: nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: nengel@2: // spe global variables nengel@2: unsigned rl_cnt_var, rl_mutex_var, rl_cond_var; nengel@2: atomic_ea_t rl_cnt; nengel@2: cond_ea_t rl_cond; nengel@2: mutex_ea_t rl_lock; nengel@2: nengel@2: H264spe * spe_params; nengel@2: unsigned mutex_var[16]; nengel@2: unsigned cond_var[16]; nengel@2: unsigned atomic_var[16]; nengel@2: nengel@2: pthread_t * spe_tid; nengel@2: spe_context_ptr_t *spe_context; nengel@2: void** spe_control_area; nengel@2: void** spe_ls_area; nengel@2: H264slice **spe_slice_buf; nengel@2: nengel@2: H264spe * spe_ed_params; nengel@2: unsigned mutex_ed_var[16]; nengel@2: unsigned cond_ed_var[16]; nengel@2: unsigned atomic_ed_var[16]; nengel@2: nengel@2: pthread_t * spe_ed_tid; nengel@2: spe_context_ptr_t *spe_ed_context; nengel@2: void** spe_ed_control_area; nengel@2: void** spe_ed_ls_area; nengel@2: EDSlice_spu **spe_ed_slice_buf; nengel@2: nengel@2: //structs to propagate stop signal nengel@2: MBSlice last_slice; nengel@2: EDSlice last_ed_slice; nengel@2: DecodedPicture last_pic; nengel@2: RawFrame last_frm; nengel@2: nengel@2: static int direct_B_resolved(EDSlice *s, int *poc_list, int *poc_cnt){ nengel@2: int i; nengel@2: int cnt = *poc_cnt; nengel@2: for(i=0; iref_list[1][0]->poc){ nengel@2: *poc_cnt=i+1; nengel@2: while(++i poc) { i++;} nengel@2: if ( i< cnt) nengel@2: memmove(&poc_list[i+1], &poc_list[i], (cnt-i)*sizeof(int)); nengel@2: nengel@2: poc_list[i]=poc; nengel@2: (*poc_cnt)++; nengel@2: } nengel@2: nengel@2: static void *spe_ed_thread(void *arg){ nengel@2: H264spe *params = (H264spe *)arg; nengel@2: unsigned int idx = params->idx; nengel@2: unsigned int runflags = 0; nengel@2: unsigned int entry = SPE_DEFAULT_ENTRY; nengel@2: // run SPE context nengel@2: spe_context_run(spe_ed_context[idx], &entry, runflags, (void*) params, NULL, NULL); nengel@2: // done - now exit thread nengel@2: pthread_exit(NULL); nengel@2: } nengel@2: nengel@2: static void create_spe_ED_threads(H264Context *h, int ip_threads, int b_threads) { nengel@2: int i; nengel@2: int num_threads = ip_threads+b_threads; nengel@2: spe_program_handle_t * spe_program = spe_image_open("spe_ed"); nengel@2: // reserve memory for spe thread id, context and argument addresses nengel@2: spe_ed_tid = av_malloc(num_threads * sizeof (pthread_t)); nengel@2: spe_ed_context = av_malloc(num_threads * sizeof (spe_context_ptr_t)); nengel@2: spe_ed_params = av_malloc(num_threads * sizeof (H264spe)); nengel@2: spe_ed_control_area = av_malloc(num_threads * sizeof (void*)); nengel@2: spe_ed_ls_area = av_malloc(num_threads * sizeof (void*)); nengel@2: spe_ed_slice_buf = av_malloc(num_threads * sizeof (void*)); nengel@2: nengel@2: if (spe_program == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno)); nengel@2: nengel@2: for (i = 0; i < num_threads; i++) { nengel@2: // create context for spe program nengel@2: spe_ed_context[i] = spe_context_create(SPE_MAP_PS, NULL); nengel@2: if (spe_ed_context[i] == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno)); nengel@2: // load SPE program into main memory nengel@2: if ((spe_program_load(spe_ed_context[i], spe_program)) == -1) nengel@2: av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno)); nengel@2: //get the control_area for fast mailboxing nengel@2: if ((spe_ed_control_area[i] = spe_ps_area_get(spe_ed_context[i], SPE_CONTROL_AREA)) == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno)); nengel@2: //get ls area for inter spe communication nengel@2: if ((spe_ed_ls_area[i] = spe_ls_area_get(spe_ed_context[i])) == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno)); nengel@2: } nengel@2: nengel@2: for (i = 0; i < ip_threads; i++) { nengel@2: spe_ed_params[i].mb_width = h->mb_width; nengel@2: spe_ed_params[i].mb_stride = h->mb_stride; nengel@2: spe_ed_params[i].mb_height = h->mb_height; nengel@2: spe_ed_params[i].type = EDIP; nengel@2: spe_ed_params[i].spe_id = i; nengel@2: spe_ed_params[i].idx = i; nengel@2: //spe_ed_params[i].spe_total = ip_threads; //not used nengel@2: //spe_params[i].slice_params= &slice_params; nengel@2: spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads]; nengel@2: spe_ed_params[i].tgt_spe = spe_ed_ls_area[(i+1)%num_threads]; nengel@2: nengel@2: spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i]; nengel@2: spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i]; nengel@2: spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0); nengel@2: nengel@2: mutex_init(spe_ed_params[i].lock); nengel@2: cond_init(spe_ed_params[i].cond); nengel@2: if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i])) nengel@2: av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i); nengel@2: nengel@2: //slicebufaddr nengel@2: spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]); nengel@2: av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i); nengel@2: } nengel@2: for (int j = 0; j < b_threads; j++) { nengel@2: i = j+ip_threads; nengel@2: spe_ed_params[i].mb_width = h->mb_width; nengel@2: spe_ed_params[i].mb_stride = h->mb_stride; nengel@2: spe_ed_params[i].mb_height = h->mb_height; nengel@2: spe_ed_params[i].type = EDB; nengel@2: spe_ed_params[i].idx = i; nengel@2: spe_ed_params[i].spe_id = j; nengel@2: spe_ed_params[i].spe_total = b_threads; nengel@2: //spe_params[i].slice_params= &slice_params; nengel@2: //spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads]; nengel@2: spe_ed_params[i].tgt_spe = spe_ed_ls_area[((j+1)%b_threads) + ip_threads]; nengel@2: nengel@2: spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i]; nengel@2: spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i]; nengel@2: spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0); nengel@2: nengel@2: mutex_init(spe_ed_params[i].lock); nengel@2: cond_init(spe_ed_params[i].cond); nengel@2: if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i])) nengel@2: av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i); nengel@2: nengel@2: //slicebufaddr nengel@2: spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]); nengel@2: av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i); nengel@2: } nengel@2: spe_image_close(spe_program); nengel@2: nengel@2: } nengel@2: nengel@2: static void fill_EDSlice_spu(EDSlice_spu *dst, EDSlice *src){ nengel@2: dst->pps = src->pps; nengel@2: dst->mbs = src->mbs; nengel@2: dst->state = src->state; nengel@2: dst->qp_thresh = src->qp_thresh; nengel@2: dst->pic = *src->current_picture; nengel@2: nengel@2: dst->ref_count[0] = src->ref_count[0]; nengel@2: dst->ref_count[1] = src->ref_count[1]; nengel@2: dst->slice_type = src->slice_type; nengel@2: dst->slice_type_nos = src->slice_type_nos; nengel@2: dst->direct_8x8_inference_flag = src->direct_8x8_inference_flag; nengel@2: dst->list_count = src->list_count; nengel@2: dst->coded_pic_num = src->coded_pic_num; nengel@2: nengel@2: GetBitContext *gb = &src->gb; nengel@2: align_get_bits( gb); nengel@2: dst->bytestream_start = gb->buffer + get_bits_count(gb)/8; nengel@2: dst->byte_bufsize = (get_bits_left(gb) + 7)/8; nengel@2: nengel@2: dst->transform_bypass = src->transform_bypass; nengel@2: dst->direct_spatial_mv_pred = src->direct_spatial_mv_pred; nengel@2: memcpy(dst->map_col_to_list0, src->map_col_to_list0, 2*16*sizeof(int)); nengel@2: memcpy(dst->dist_scale_factor, src->dist_scale_factor, 16*sizeof(int)); nengel@2: dst->cabac_init_idc = src->cabac_init_idc; nengel@2: memcpy(dst->ref2frm, src->ref2frm, 2*64*sizeof(int)); nengel@2: dst->chroma_qp[0]= src->chroma_qp[0]; nengel@2: dst->chroma_qp[1]= src->chroma_qp[1]; nengel@2: dst->qscale = src->qscale; nengel@2: dst->last_qscale_diff = src->last_qscale_diff; nengel@2: nengel@2: if (src->slice_type_nos == FF_B_TYPE) dst->list1 = *src->ref_list[1][0]; nengel@2: } nengel@2: nengel@2: static void send_slice_to_spe_and_wait(EDSlice_spu *s, int id){ nengel@2: unsigned status; nengel@2: nengel@2: spe_mfcio_get(spe_ed_context[id], (unsigned) spe_ed_slice_buf[id], s, sizeof(EDSlice_spu), 14, 0, 0); nengel@2: spe_mfcio_tag_status_read(spe_ed_context[id], 1<<14, SPE_TAG_ALL, &status); nengel@2: nengel@2: nengel@2: _spe_in_mbox_write(spe_ed_control_area[id], 0); nengel@2: nengel@2: while (!spe_out_mbox_status(spe_ed_context[id])){ nengel@2: //pthread_yield(); nengel@2: usleep(1000); nengel@2: } nengel@2: _spe_out_mbox_read(spe_ed_control_area[id]); nengel@2: } nengel@2: nengel@2: static int decode_slice_entropy_cell(EntropyContext *ec, EDSlice *s, int id){ nengel@2: int i,j; nengel@2: nengel@2: if( !s->pps.cabac ){ nengel@2: av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n"); nengel@2: return -1; nengel@2: } nengel@2: DECLARE_ALIGNED(16, EDSlice_spu, slice); nengel@2: fill_EDSlice_spu(&slice, s); nengel@2: nengel@2: send_slice_to_spe_and_wait(&slice, id); nengel@2: nengel@2: return 0; nengel@2: } nengel@2: nengel@2: static int decode_slice_entropy_cell_seq(H264Context *h, EntropyContext *ec, EDSlice *s){ nengel@2: int i,j; nengel@2: nengel@2: if( !s->pps.cabac ){ nengel@2: av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n"); nengel@2: return -1; nengel@2: } nengel@2: DECLARE_ALIGNED(16, EDSlice_spu, slice); nengel@2: fill_EDSlice_spu(&slice, s); nengel@2: nengel@2: send_slice_to_spe_and_wait(&slice, 0); nengel@2: nengel@2: if (s->release_cnt>0) { nengel@2: for (int i=0; irelease_cnt; i++){ nengel@2: release_pib_entry(h, s->release_ref[i], 2); nengel@2: } nengel@2: s->release_cnt=0; nengel@2: } nengel@2: nengel@2: release_pib_entry(h, s->current_picture, 1); nengel@2: av_freep(&s->gb.raw); nengel@2: if (s->gb.rbsp) nengel@2: av_freep(&s->gb.rbsp); nengel@2: nengel@2: return 0; nengel@2: } nengel@2: nengel@2: static void *entr_IP_spe_thread(void *arg){ nengel@2: EDThreadContext *eip = (EDThreadContext *) arg; nengel@2: H264Context *h = eip->h; nengel@2: // printf("eip %d, pid %d\n", eip->thread_num, syscall(SYS_gettid)); nengel@2: for (int i=0; imbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb)); nengel@2: } nengel@2: nengel@2: EntropyContext *ec = get_entropy_context(h); nengel@2: EDSlice *s; nengel@2: nengel@2: for(;;){ nengel@2: { nengel@2: pthread_mutex_lock(&eip->ed_lock); nengel@2: while (eip->ed_cnt <= 0) nengel@2: pthread_cond_wait(&eip->ed_cond, &eip->ed_lock); nengel@2: s = &eip->ed_q[eip->ed_fo]; nengel@2: eip->ed_fo++; eip->ed_fo %= MAX_SLICE_COUNT; nengel@2: pthread_mutex_unlock(&eip->ed_lock); nengel@2: } nengel@2: nengel@2: if (s->state<0) nengel@2: break; nengel@2: { nengel@2: pthread_mutex_lock(&eip->mbs_lock); nengel@2: while (eip->mbs_cnt <= 0) nengel@2: pthread_cond_wait(&eip->mbs_cond, &eip->mbs_lock); nengel@2: nengel@2: s->mbs = eip->mbs[eip->mbs_fo]; nengel@2: s->ed = eip; nengel@2: eip->mbs_cnt--; nengel@2: eip->mbs_fo++; eip->mbs_fo%=SLICE_BUFS; nengel@2: pthread_mutex_unlock(&eip->mbs_lock); nengel@2: } nengel@2: if (eip->cell){ nengel@2: decode_slice_entropy_cell(ec, s, eip->thread_num); nengel@2: }else{ nengel@2: decode_slice_entropy(ec, s); nengel@2: } nengel@2: nengel@2: // { nengel@2: // pthread_mutex_lock(&h->lock[ENTROPY2]); nengel@2: // h->ed_poc[h->ed_poc_fi++ % MAX_SLICE_COUNT] = s->current_picture->poc; nengel@2: // while (h->ed_poc_fi > h->ed_poc_fo + MAX_SLICE_COUNT) nengel@2: // h->ed_poc_fo++; nengel@2: // nengel@2: // pthread_cond_signal(&h->cond[ENTROPY2]); nengel@2: // pthread_mutex_unlock(&h->lock[ENTROPY2]); nengel@2: // } nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY4]); nengel@2: while (h->ed_reorder_cnt>=MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); nengel@2: h->ed_reorder_q[h->ed_reorder_fi] = *s; nengel@2: h->ed_reorder_cnt++; nengel@2: h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->cond[ENTROPY4]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY4]); nengel@2: } nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&eip->ed_lock); nengel@2: eip->ed_cnt--; nengel@2: pthread_cond_signal(&eip->ed_cond); nengel@2: pthread_mutex_unlock(&eip->ed_lock); nengel@2: } nengel@2: } nengel@2: nengel@2: free_entropy_context(ec); nengel@2: nengel@2: pthread_exit(NULL); nengel@2: return NULL; nengel@2: } nengel@2: nengel@2: static void *entr_B_spe_thread(void *arg){ nengel@2: EDThreadContext *eb = (EDThreadContext *) arg; nengel@2: H264Context *h = eb->h; nengel@2: // printf("eb %d, pid %d\n", eb->thread_num, syscall(SYS_gettid)); nengel@2: for (int i=0; imbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb)); nengel@2: } nengel@2: nengel@2: EntropyContext *ec = get_entropy_context(h); nengel@2: EDSlice *s; nengel@2: nengel@2: for(;;){ nengel@2: { nengel@2: pthread_mutex_lock(&eb->ed_lock); nengel@2: while (eb->ed_cnt <= 0) nengel@2: pthread_cond_wait(&eb->ed_cond, &eb->ed_lock); nengel@2: s = &eb->ed_q[eb->ed_fo]; nengel@2: eb->ed_fo++; eb->ed_fo %= MAX_SLICE_COUNT; nengel@2: pthread_mutex_unlock(&eb->ed_lock); nengel@2: } nengel@2: nengel@2: if (s->state<0) nengel@2: break; nengel@2: { nengel@2: pthread_mutex_lock(&eb->mbs_lock); nengel@2: while (eb->mbs_cnt <= 0) nengel@2: pthread_cond_wait(&eb->mbs_cond, &eb->mbs_lock); nengel@2: s->mbs = eb->mbs[eb->mbs_fo]; nengel@2: s->ed = eb; nengel@2: eb->mbs_cnt--; nengel@2: eb->mbs_fo++; eb->mbs_fo%=SLICE_BUFS; nengel@2: pthread_mutex_unlock(&eb->mbs_lock); nengel@2: } nengel@2: //decode_B_slice_entropy(&hcabac, &cabac, s, eb, eb->prev_ed); nengel@2: decode_slice_entropy_cell(ec, s, eb->thread_num + h->edip_threads); nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY4]); nengel@2: while (h->ed_reorder_cnt>=MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); nengel@2: h->ed_reorder_q[h->ed_reorder_fi] = *s; nengel@2: h->ed_reorder_cnt++; nengel@2: h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->cond[ENTROPY4]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY4]); nengel@2: nengel@2: } nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&eb->ed_lock); nengel@2: eb->ed_cnt--; nengel@2: pthread_cond_signal(&eb->ed_cond); nengel@2: pthread_mutex_unlock(&eb->ed_lock); nengel@2: } nengel@2: } nengel@2: eb->lines_cnt++; nengel@2: nengel@2: free_entropy_context(ec); nengel@2: nengel@2: pthread_exit(NULL); nengel@2: return NULL; nengel@2: } nengel@2: nengel@2: static void *entr_B_distribute(void *arg){ nengel@2: H264Context *h = (H264Context *) arg; nengel@2: EDSlice *s; nengel@2: nengel@2: int i, n=0, poc; nengel@2: nengel@2: // printf("eb dist, pid %d\n", syscall(SYS_gettid)); nengel@2: nengel@2: for(i=0; iedb_threads; i++){ nengel@2: h->b[i].h =h; nengel@2: h->b[i].thread_num =i; nengel@2: h->b[i].thread_total =h->edb_threads; nengel@2: pthread_mutex_init(&h->b[i].mbs_lock, NULL); nengel@2: pthread_cond_init(&h->b[i].mbs_cond, NULL); nengel@2: h->b[i].mbs_fo = 0; nengel@2: h->b[i].mbs_cnt = SLICE_BUFS; nengel@2: h->b[i].ed_fi =0; nengel@2: h->b[i].ed_fo =0; nengel@2: h->b[i].ed_cnt =0; nengel@2: h->b[i].lines_cnt =0; nengel@2: h->b[i].prev_ed = &h->b[(i-1 +h->edb_threads) % h->edb_threads]; nengel@2: pthread_mutex_init(&h->b[i].ed_lock, NULL); nengel@2: pthread_cond_init(&h->b[i].ed_cond, NULL); nengel@2: pthread_create(&h->ed_B_thr[i], NULL, entr_B_spe_thread, &h->b[i]); nengel@2: } nengel@2: nengel@2: for(;;){ nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY3B]); nengel@2: while (h->ed_B_cnt<=0) nengel@2: pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]); nengel@2: s= &h->ed_B_q[h->ed_B_fo]; nengel@2: h->ed_B_fo++; h->ed_B_fo %= MAX_SLICE_COUNT; nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY3B]); nengel@2: nengel@2: } nengel@2: if (s->state<0) nengel@2: break; nengel@2: nengel@2: if (s->ref_list[1][0]->slice_type_nos != FF_B_TYPE){ nengel@2: while (poc < s->ref_list[1][0]->poc){ nengel@2: pthread_mutex_lock(&h->lock[ENTROPY2]); nengel@2: while (poc == h->ed_poc) nengel@2: pthread_cond_wait(&h->cond[ENTROPY2], &h->lock[ENTROPY2]); nengel@2: poc = h->ed_poc; nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY2]); nengel@2: } nengel@2: } nengel@2: { nengel@2: pthread_mutex_lock(&h->b[n].ed_lock); nengel@2: while (h->b[n].ed_cnt >= MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->b[n].ed_cond, &h->b[n].ed_lock); nengel@2: h->b[n].ed_q[ h->b[n].ed_fi] = *s; nengel@2: h->b[n].ed_cnt++; nengel@2: h->b[n].ed_fi++; h->b[n].ed_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->b[n].ed_cond); nengel@2: pthread_mutex_unlock(&h->b[n].ed_lock); nengel@2: nengel@2: n++; n%=h->edb_threads; nengel@2: } nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY3B]); nengel@2: h->ed_B_cnt--; nengel@2: pthread_cond_signal(&h->cond[ENTROPY3B]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY3B]); nengel@2: nengel@2: } nengel@2: nengel@2: } nengel@2: nengel@2: for (i=0; iedb_threads; i++){ nengel@2: pthread_mutex_lock(&h->b[i].ed_lock); nengel@2: while (h->b[i].ed_cnt >= MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->b[i].ed_cond, &h->b[i].ed_lock); nengel@2: h->b[i].ed_q[ h->b[i].ed_fi] = *s; nengel@2: h->b[i].ed_cnt++; nengel@2: h->b[i].ed_fi++; h->b[i].ed_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->b[i].ed_cond); nengel@2: pthread_mutex_unlock(&h->b[i].ed_lock); nengel@2: nengel@2: } nengel@2: for(int i=0; iedb_threads; i++){ nengel@2: pthread_join(h->ed_B_thr[i], NULL); nengel@2: } nengel@2: pthread_exit(NULL); nengel@2: return NULL; nengel@2: } nengel@2: nengel@2: nengel@2: static void *entr_IPB_distribute(void *arg){ nengel@2: H264Context *h = (H264Context *) arg; nengel@2: EDSlice *s; nengel@2: int i,n=0; nengel@2: nengel@2: create_spe_ED_threads(h, h->edip_threads, h->edb_threads); nengel@2: pthread_create(&h->ed_B_dist, NULL, entr_B_distribute, h); nengel@2: for(i=0; iedip_threads + h->edip_ppe_threads; i++){ nengel@2: h->ip[i].h =h; nengel@2: h->ip[i].cell = (i >= h->edip_ppe_threads); nengel@2: pthread_mutex_init(&h->ip[i].mbs_lock, NULL); nengel@2: pthread_cond_init(&h->ip[i].mbs_cond, NULL); nengel@2: h->ip[i].thread_num = i - h->edip_ppe_threads; nengel@2: h->ip[i].thread_total=h->edip_threads+ h->edip_ppe_threads; nengel@2: h->ip[i].mbs_fo = 0; nengel@2: h->ip[i].mbs_cnt = SLICE_BUFS; nengel@2: h->ip[i].ed_fi =0; nengel@2: h->ip[i].ed_fo =0; nengel@2: pthread_mutex_init(&h->ip[i].ed_lock, NULL); nengel@2: pthread_cond_init(&h->ip[i].ed_cond, NULL); nengel@2: pthread_create(&h->ed_IP_thr[i], NULL, entr_IP_spe_thread, &h->ip[i]); nengel@2: } nengel@2: nengel@2: for(;;){ nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY]); nengel@2: while (h->ed_cnt<=0) nengel@2: pthread_cond_wait(&h->cond[ENTROPY], &h->lock[ENTROPY]); nengel@2: s= &h->ed_q[h->ed_fo]; nengel@2: nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY]); nengel@2: h->ed_fo++; h->ed_fo %= MAX_SLICE_COUNT; nengel@2: } nengel@2: if (s->state<0) nengel@2: break; nengel@2: nengel@2: assert(s->current_picture); nengel@2: if (s->slice_type_nos == FF_B_TYPE ) nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY3B]); nengel@2: while (h->ed_B_cnt>=MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]); nengel@2: h->ed_B_q[h->ed_B_fi] = *s; nengel@2: h->ed_B_cnt++; nengel@2: h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->cond[ENTROPY3B]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY3B]); nengel@2: }else nengel@2: { nengel@2: ///round robin now, change to based on rawframes size. nengel@2: pthread_mutex_lock(&h->ip[n].ed_lock); nengel@2: while (h->ip[n].ed_cnt >= MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->ip[n].ed_cond, &h->ip[n].ed_lock); nengel@2: h->ip[n].ed_q[ h->ip[n].ed_fi] = *s; nengel@2: h->ip[n].ed_cnt++; nengel@2: h->ip[n].ed_fi++; h->ip[n].ed_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->ip[n].ed_cond); nengel@2: pthread_mutex_unlock(&h->ip[n].ed_lock); nengel@2: nengel@2: n++; n %=(h->edip_threads+h->edip_ppe_threads); nengel@2: } nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY]); nengel@2: h->ed_cnt--; nengel@2: pthread_cond_signal(&h->cond[ENTROPY]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY]); nengel@2: nengel@2: } nengel@2: } nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY3B]); nengel@2: while (h->ed_B_cnt>=MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]); nengel@2: h->ed_B_q[h->ed_B_fi] = *s; nengel@2: h->ed_B_cnt++; nengel@2: h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->cond[ENTROPY3B]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY3B]); nengel@2: } nengel@2: { nengel@2: for (i=0; iedip_threads + h->edip_ppe_threads; i++){ nengel@2: pthread_mutex_lock(&h->ip[i].ed_lock); nengel@2: while (h->ip[i].ed_cnt >= MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->ip[i].ed_cond, &h->ip[i].ed_lock); nengel@2: h->ip[i].ed_q[ h->ip[i].ed_fi] = *s; nengel@2: h->ip[i].ed_cnt++; nengel@2: h->ip[i].ed_fi++; h->ip[i].ed_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->ip[i].ed_cond); nengel@2: pthread_mutex_unlock(&h->ip[i].ed_lock); nengel@2: } nengel@2: } nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY4]); nengel@2: while (h->ed_reorder_cnt>=MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); nengel@2: h->ed_reorder_q[h->ed_reorder_fi] = *s; nengel@2: h->ed_reorder_cnt++; nengel@2: h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->cond[ENTROPY4]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY4]); nengel@2: nengel@2: } nengel@2: pthread_join(h->ed_B_dist, NULL); nengel@2: for(i=0; iedip_threads; i++){ nengel@2: pthread_join(h->ed_IP_thr[i], NULL); nengel@2: } nengel@2: pthread_exit(NULL); nengel@2: return NULL; nengel@2: } nengel@2: nengel@2: static pthread_t ed_IPB_dist; nengel@2: static void *entropy_IPB_cell_thread(void *arg){ nengel@2: H264Context *h = (H264Context *) arg; nengel@2: int i; nengel@2: EDSlice reorder[MAX_SLICE_COUNT]; nengel@2: int ip_poc[MAX_SLICE_COUNT][2]={0,}; nengel@2: int next_ip_id=0; nengel@2: int ip_poc_cnt=0; nengel@2: EDSlice *s; nengel@2: int reorder_cnt=0; nengel@2: unsigned next_pic_num=0; nengel@2: nengel@2: pthread_create(&ed_IPB_dist, NULL, entr_IPB_distribute, h); nengel@2: int count =0; nengel@2: for(;;){ nengel@2: //signals received from the entropy decoders nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY4]); nengel@2: while (h->ed_reorder_cnt<=0) nengel@2: pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); nengel@2: s= &h->ed_reorder_q[h->ed_reorder_fo]; nengel@2: h->ed_reorder_fo++; h->ed_reorder_fo %=MAX_SLICE_COUNT; nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY4]); nengel@2: } nengel@2: nengel@2: if (s->state >=0 && s->slice_type_nos != FF_B_TYPE){ nengel@2: for (i=0; iip_id < ip_poc[i][0]){ nengel@2: memmove(ip_poc[i+1], ip_poc[i], 2*(ip_poc_cnt-i)*sizeof(int)); nengel@2: break; nengel@2: } nengel@2: } nengel@2: ip_poc[i][0]= s->ip_id; nengel@2: ip_poc[i][1]= s->current_picture->poc; nengel@2: ip_poc_cnt++; nengel@2: nengel@2: while (next_ip_id == ip_poc[0][0]){ nengel@2: pthread_mutex_lock(&h->lock[ENTROPY2]); nengel@2: h->ed_poc = ip_poc[0][1]; nengel@2: nengel@2: pthread_cond_signal(&h->cond[ENTROPY2]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY2]); nengel@2: memmove(ip_poc[0], ip_poc[1], 2*(ip_poc_cnt-1)*sizeof(int)); nengel@2: ip_poc_cnt--; nengel@2: next_ip_id++; nengel@2: } nengel@2: } nengel@2: nengel@2: for(i=reorder_cnt; i>0; i--){ nengel@2: if (s->coded_pic_num < reorder[i-1].coded_pic_num) nengel@2: break; nengel@2: reorder[i]=reorder[i-1]; nengel@2: } nengel@2: reorder[i]=*s; nengel@2: nengel@2: while(reorder_cnt>=0){ nengel@2: if (next_pic_num!=reorder[reorder_cnt].coded_pic_num){ nengel@2: break; nengel@2: } nengel@2: EDSlice *es = &reorder[reorder_cnt]; nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[MBDEC]); nengel@2: while (h->mbdec_cnt >= MAX_SLICE_COUNT) nengel@2: pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]); nengel@2: copyEDtoMBSlice(&h->mbdec_q[h->mbdec_fi], es); nengel@2: nengel@2: h->mbdec_cnt++; nengel@2: h->mbdec_fi++; h->mbdec_fi %= MAX_SLICE_COUNT; nengel@2: pthread_cond_signal(&h->cond[MBDEC]); nengel@2: pthread_mutex_unlock(&h->lock[MBDEC]); nengel@2: nengel@2: } nengel@2: nengel@2: if (es->state<0) nengel@2: goto end; nengel@2: nengel@2: assert(es->current_picture); nengel@2: for (int i=0; irelease_cnt; i++){ nengel@2: release_pib_entry(h, es->release_ref[i], 2); nengel@2: } nengel@2: release_pib_entry(h, es->current_picture, 1); nengel@2: av_freep(&es->gb.raw); nengel@2: if (es->gb.rbsp) nengel@2: av_freep(&es->gb.rbsp); nengel@2: nengel@2: next_pic_num++; nengel@2: reorder_cnt--; nengel@2: } nengel@2: reorder_cnt++; nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[ENTROPY4]); nengel@2: h->ed_reorder_cnt--; nengel@2: pthread_cond_signal(&h->cond[ENTROPY4]); nengel@2: pthread_mutex_unlock(&h->lock[ENTROPY4]); nengel@2: } nengel@2: } nengel@2: nengel@2: end: nengel@2: pthread_join(ed_IPB_dist, NULL); nengel@2: pthread_exit(NULL); nengel@2: return NULL; nengel@2: } nengel@2: nengel@2: nengel@2: static void fill_spe_slice(H264slice *dst, const MBSlice *src, H264Context *h){ nengel@2: dst->deblocking_filter =1; nengel@2: dst->linesize = src->current_picture->linesize[0]; nengel@2: dst->uvlinesize = src->current_picture->linesize[1]; nengel@2: dst->mb_width = h->mb_width; nengel@2: dst->mb_height = h->mb_height; nengel@2: dst->use_weight = src->use_weight; nengel@2: dst->use_weight_chroma = src->use_weight_chroma; nengel@2: dst->luma_log2_weight_denom = src->luma_log2_weight_denom; nengel@2: dst->chroma_log2_weight_denom = src->chroma_log2_weight_denom; nengel@2: nengel@2: //weights later nengel@2: memcpy(dst->luma_weight, src->luma_weight, 16*2*2*sizeof(int16_t)); nengel@2: memcpy(dst->chroma_weight, src->chroma_weight, 16*2*2*2*sizeof(int16_t)); nengel@2: memcpy(dst->implicit_weight, src->implicit_weight, 16*16*2*sizeof(int16_t)); nengel@2: nengel@2: for(int list=0; list<2; list++){ nengel@2: for (int i=0; iref_count[list]; i++){ nengel@2: Picture_spu *p_dst = &dst->ref_list[list][i]; nengel@2: DecodedPicture *p_src = src->ref_list[list][i]; nengel@2: if (p_src){ nengel@2: p_dst->data[0] = p_src->data[0]; nengel@2: p_dst->data[1] = p_src->data[1]; nengel@2: p_dst->data[2] = p_src->data[2]; nengel@2: } nengel@2: } nengel@2: } nengel@2: dst->state = src->state; nengel@2: nengel@2: dst->emu_edge_width =32; nengel@2: dst->emu_edge_height =32; nengel@2: dst->slice_type = src->slice_type; nengel@2: dst->slice_type_nos = src->slice_type_nos; nengel@2: dst->slice_alpha_c0_offset = src->slice_alpha_c0_offset; nengel@2: dst->slice_beta_offset = src->slice_beta_offset; nengel@2: nengel@2: memcpy(dst->chroma_qp_table, src->pps.chroma_qp_table, 2*64); nengel@2: nengel@2: dst->blocks = src->mbs; nengel@2: dst->dst_y = src->current_picture->data[0]; nengel@2: dst->dst_cb = src->current_picture->data[1]; nengel@2: dst->dst_cr = src->current_picture->data[2]; nengel@2: } nengel@2: nengel@2: static void decode_slice_mb_seq_cell(H264Context *h, MBRecContext *d, MBSlice *s, DecodedPicture *tmp){ nengel@2: static int rl_fi=0; nengel@2: nengel@2: DECLARE_ALIGNED(16, H264slice, spe_slice); nengel@2: H264spe *p=&spe_params[0]; nengel@2: unsigned status; nengel@2: uint8_t *dst_y, *dst_cb, *dst_cr; nengel@2: nengel@2: DecodedPicture *dp; nengel@2: nengel@2: for (int i=0; i<2; i++){ nengel@2: for(int j=0; j< s->ref_count[i]; j++){ nengel@2: if (s->ref_list_cpn[i][j] ==-1) nengel@2: continue; nengel@2: int k; nengel@2: for (k=0; kdpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){ nengel@2: s->ref_list[i][j] = &h->dpb[k]; nengel@2: break; nengel@2: } nengel@2: } nengel@2: } nengel@2: } nengel@2: nengel@2: dp = get_dpb_entry(h); nengel@2: init_dpb_entry(dp, s, d->width, d->height); nengel@2: nengel@2: if (h->no_mbd) nengel@2: return; nengel@2: nengel@2: nengel@2: fill_spe_slice(&spe_slice, s, h); nengel@2: spe_mfcio_get(spe_context[0], (unsigned) (spe_slice_buf[0] + rl_fi), &spe_slice, sizeof(H264slice), 15, 0, 0); nengel@2: spe_mfcio_tag_status_read(spe_context[0], 1<<15, SPE_TAG_ALL, &status); nengel@2: rl_fi++; rl_fi %= 2; nengel@2: nengel@2: _spe_in_mbox_write(spe_control_area[0], 0); nengel@2: while (atomic_read(rl_cnt)<=0){ nengel@2: //pthread_yield(); nengel@2: usleep(1000); nengel@2: } nengel@2: atomic_dec(rl_cnt); nengel@2: nengel@2: nengel@2: /** This is error free, no visual artifacts, however, md5sum fails.... (WTF) **/ nengel@2: // memcpy(tmp->data[0], s->current_picture->data[0], tmp->linesize[0]*h->mb_height*16); nengel@2: // memcpy(tmp->data[1], s->current_picture->data[1], tmp->linesize[1]*h->mb_height*8); nengel@2: // memcpy(tmp->data[2], s->current_picture->data[2], tmp->linesize[1]*h->mb_height*8); nengel@2: // nengel@2: // memset(s->current_picture->data[0], 0, tmp->linesize[0]*h->mb_height*16); nengel@2: // memset(s->current_picture->data[1], 0, tmp->linesize[1]*h->mb_height*8); nengel@2: // memset(s->current_picture->data[2], 0, tmp->linesize[1]*h->mb_height*8); nengel@2: // nengel@2: // decode_slice_mb_seq(d, s); nengel@2: // nengel@2: // for (int i=0; imb_height*16; i++){ nengel@2: // for (int j=0; jwidth; j++){ nengel@2: // if (tmp->data[0][j + i*tmp->linesize[0]] != s->current_picture->data[0][j + i*tmp->linesize[0]]){ nengel@2: // printf("%d, %d, %d, %d\n", j, i, tmp->data[0][j + i*tmp->linesize[0]], s->current_picture->data[0][j + i*tmp->linesize[0]]); nengel@2: // return; nengel@2: // } nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // for (int i=0; imb_height*8; i++){ nengel@2: // for (int j=0; jwidth/2; j++){ nengel@2: // if (tmp->data[1][j + i*tmp->linesize[1]] != s->current_picture->data[1][j + i*tmp->linesize[1]]){ nengel@2: // printf("%d, %d, %d, %d\n", j, i, tmp->data[1][j + i*tmp->linesize[1]], s->current_picture->data[1][j + i*tmp->linesize[1]]); nengel@2: // return; nengel@2: // } nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // for (int i=0; imb_height*8; i++){ nengel@2: // for (int j=0; jwidth/2; j++){ nengel@2: // if (tmp->data[2][j + i*tmp->linesize[1]] != s->current_picture->data[2][j + i*tmp->linesize[1]]){ nengel@2: // printf("%d, %d, %d, %d\n", j, i, tmp->data[2][j + i*tmp->linesize[1]], s->current_picture->data[2][j + i*tmp->linesize[1]]); nengel@2: // return; nengel@2: // } nengel@2: // } nengel@2: // } nengel@2: nengel@2: nengel@2: //printf("dst_y %p\n", dst_y); nengel@2: nengel@2: nengel@2: for (int i=0; irelease_cnt; i++){ nengel@2: for(int j=0; jdpb[j].cpn== s->release_ref_cpn[i]){ nengel@2: release_dpb_entry(h, &h->dpb[j], 2); nengel@2: break; nengel@2: } nengel@2: } nengel@2: } nengel@2: s->release_cnt=0; nengel@2: nengel@2: } nengel@2: nengel@2: static void *h264_spe_thread(void * thread_args ) { nengel@2: H264spe *params = (H264spe *)thread_args; nengel@2: unsigned int spe_id = params->spe_id; nengel@2: unsigned int runflags = 0; nengel@2: unsigned int entry = SPE_DEFAULT_ENTRY; nengel@2: // run SPE context nengel@2: spe_context_run(spe_context[spe_id], &entry, runflags, (void*) params, NULL, NULL); nengel@2: // done - now exit thread nengel@2: pthread_exit(NULL); nengel@2: } nengel@2: nengel@2: static int create_spe_MBR_threads(H264Context *h, int num_threads) { nengel@2: int i; nengel@2: nengel@2: // reserve memory for spe thread id, context and argument addresses nengel@2: spe_tid = av_malloc(num_threads * sizeof (pthread_t)); nengel@2: spe_context = av_malloc(num_threads * sizeof (spe_context_ptr_t)); nengel@2: spe_params = av_malloc(num_threads * sizeof (H264spe)); nengel@2: spe_control_area = av_malloc(num_threads * sizeof (void*)); nengel@2: spe_ls_area = av_malloc(num_threads * sizeof (void*)); nengel@2: spe_slice_buf = av_malloc(num_threads * sizeof (void*)); nengel@2: nengel@2: spe_program_handle_t *spe_program = spe_image_open("spe_mbd"); nengel@2: nengel@2: if (spe_program == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno)); nengel@2: nengel@2: for (i = 0; i < num_threads; i++) { nengel@2: // create context for spe program nengel@2: spe_context[i] = spe_context_create(SPE_MAP_PS, NULL); nengel@2: if (spe_context[i] == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno)); nengel@2: // load SPE program into main memory nengel@2: if ((spe_program_load(spe_context[i], spe_program)) == -1) nengel@2: av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno)); nengel@2: //get the control_area for fast mailboxing nengel@2: if ((spe_control_area[i] = spe_ps_area_get(spe_context[i], SPE_CONTROL_AREA)) == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno)); nengel@2: //get ls area for inter spe communication nengel@2: if ((spe_ls_area[i] = spe_ls_area_get(spe_context[i])) == NULL) nengel@2: av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno)); nengel@2: } nengel@2: nengel@2: for (i = 0; i < num_threads; i++) { nengel@2: spe_params[i].mb_width = h->mb_width; nengel@2: spe_params[i].mb_height = h->mb_height; nengel@2: spe_params[i].mb_stride = h->mb_stride; nengel@2: spe_params[i].spe_id = i; nengel@2: spe_params[i].spe_total = num_threads; nengel@2: //spe_params[i].slice_params= &slice_params; nengel@2: spe_params[i].src_spe = spe_ls_area[(i-1+num_threads)%num_threads]; nengel@2: spe_params[i].tgt_spe = spe_ls_area[(i+1)%num_threads]; nengel@2: nengel@2: spe_params[i].rl_lock = rl_lock; nengel@2: spe_params[i].rl_cond = rl_cond; nengel@2: spe_params[i].rl_cnt = rl_cnt; nengel@2: spe_params[i].lock = (mutex_ea_t) (unsigned) &mutex_var[i]; nengel@2: spe_params[i].cond = (cond_ea_t) (unsigned) &cond_var[i]; nengel@2: spe_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_var[i]; atomic_set(spe_params[i].cnt, 0); nengel@2: nengel@2: mutex_init(spe_params[i].lock); nengel@2: cond_init(spe_params[i].cond); nengel@2: if (pthread_create(&spe_tid[i], NULL, h264_spe_thread, (void *) &spe_params[i])) nengel@2: av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i); nengel@2: nengel@2: //slicebufaddr nengel@2: spe_slice_buf[i] = (H264slice *) _spe_out_mbox_read(spe_control_area[i]); nengel@2: nengel@2: av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i); nengel@2: } nengel@2: spe_image_close(spe_program); nengel@2: return 0; nengel@2: } nengel@2: nengel@2: //_spe_out_mbox_read(spe_control_area[i]); nengel@2: /** nengel@2: * joins all the spe worker threads. nengel@2: */ nengel@2: static void join_spe_worker_threads(H264slice *s, int num_threads, int *rl_fi) { nengel@2: int i; nengel@2: ///just to keep coding consistency. nengel@2: { nengel@2: for (i=0; icnt)>=2) {//double buffered nengel@2: usleep(1000);//cond_wait(p->cond, p->lock); nengel@2: } nengel@2: nengel@2: spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), s, sizeof(H264slice), 15, 0, 0); nengel@2: spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status); nengel@2: //mutex_unlock(p->lock); nengel@2: _spe_in_mbox_write(spe_control_area[i], 0); nengel@2: } nengel@2: } nengel@2: nengel@2: for (i=0; irl_threads); nengel@2: for(;;){ nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[MBDEC]); nengel@2: while (h->mbdec_cnt<=0) nengel@2: pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]); nengel@2: s= &h->mbdec_q[h->mbdec_fo]; nengel@2: h->mbdec_fo++; h->mbdec_fo %= MAX_SLICE_COUNT; nengel@2: pthread_mutex_unlock(&h->lock[MBDEC]); nengel@2: } nengel@2: nengel@2: if (s->state<0){ nengel@2: break; nengel@2: } nengel@2: for (int i=0; i<2; i++){ nengel@2: for(int j=0; j< s->ref_count[i]; j++){ nengel@2: if (s->ref_list_cpn[i][j] ==-1) nengel@2: continue; nengel@2: int k; nengel@2: for (k=0; kdpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){ nengel@2: s->ref_list[i][j] = &h->dpb[k]; nengel@2: break; nengel@2: } nengel@2: } nengel@2: nengel@2: } nengel@2: } nengel@2: dp = get_dpb_entry(h); nengel@2: init_dpb_entry(dp, s, h->width, h->height); nengel@2: assert(s->current_picture); nengel@2: { nengel@2: while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){ nengel@2: usleep(1000); nengel@2: } nengel@2: h->mbrel_q[h->mbrel_fi] = *s; nengel@2: nengel@2: h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT; nengel@2: } nengel@2: { nengel@2: if(h->no_mbd){ nengel@2: atomic_inc(rl_cnt); nengel@2: }else { nengel@2: fill_spe_slice(&spe_slice, s, h); nengel@2: for (i=0; irl_threads; i++){ nengel@2: H264spe *p=&spe_params[i]; nengel@2: unsigned status; nengel@2: while (atomic_read(p->cnt)>=2){ //double buffered nengel@2: usleep(1000); nengel@2: //cond_wait(p->cond, p->lock); nengel@2: } nengel@2: spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), &spe_slice, sizeof(H264slice), 15, 0, 0); nengel@2: spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status); nengel@2: rl_fi[i]++; rl_fi[i] %= 2; nengel@2: atomic_inc(p->cnt); nengel@2: nengel@2: _spe_in_mbox_write(spe_control_area[i], 0); nengel@2: } nengel@2: } nengel@2: } nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[MBDEC]); nengel@2: h->mbdec_cnt--; nengel@2: pthread_cond_signal(&h->cond[MBDEC]); nengel@2: pthread_mutex_unlock(&h->lock[MBDEC]); nengel@2: } nengel@2: nengel@2: } nengel@2: nengel@2: { nengel@2: while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){ nengel@2: usleep(1000); nengel@2: } nengel@2: h->mbrel_q[h->mbrel_fi] = *s; nengel@2: nengel@2: h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT; nengel@2: } nengel@2: spe_slice.state=-1; nengel@2: join_spe_worker_threads(&spe_slice, h->rl_threads, rl_fi); nengel@2: pthread_exit(NULL); nengel@2: return NULL; nengel@2: } nengel@2: nengel@2: static void *mbdec_cell_thread(void *arg){ nengel@2: H264Context *h = (H264Context *) arg; nengel@2: nengel@2: rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var; nengel@2: rl_cond = (cond_ea_t) (unsigned) &rl_cond_var; nengel@2: rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var; nengel@2: atomic_set(rl_cnt, 0); nengel@2: mutex_init(rl_lock); nengel@2: cond_init(rl_cond); nengel@2: // printf("mbdec, pid %d\n", syscall(SYS_gettid)); nengel@2: pthread_create(&h->rl_dist_thr, NULL, rl_dist_thread, h); nengel@2: nengel@2: for(;;){ nengel@2: MBSlice *s=NULL; nengel@2: { nengel@2: while (atomic_read(rl_cnt)<=0){ nengel@2: usleep(1000); nengel@2: } nengel@2: s= &h->mbrel_q[h->mbrel_fo]; nengel@2: h->mbrel_fo++; h->mbrel_fo %= MAX_SLICE_COUNT; nengel@2: } nengel@2: nengel@2: if (s->state<0) nengel@2: break; nengel@2: nengel@2: for (int i=0; irelease_cnt; i++){ nengel@2: for(int j=0; jdpb[j].cpn== s->release_ref_cpn[i]){ nengel@2: release_dpb_entry(h, &h->dpb[j], 2); nengel@2: break; nengel@2: } nengel@2: } nengel@2: } nengel@2: nengel@2: { nengel@2: EDThreadContext *ed = s->ed; nengel@2: pthread_mutex_lock(&ed->mbs_lock); nengel@2: ed->mbs_cnt++; nengel@2: pthread_cond_signal(&ed->mbs_cond); nengel@2: pthread_mutex_unlock(&ed->mbs_lock); nengel@2: } nengel@2: nengel@2: { nengel@2: pthread_mutex_lock(&h->lock[WRITE]); nengel@2: while (h->write_cnt>= DPB_SIZE) nengel@2: pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]); nengel@2: assert(s); nengel@2: assert(s->current_picture); nengel@2: h->write_q[h->write_fi]= s->current_picture; nengel@2: h->write_cnt++; nengel@2: h->write_fi++; h->write_fi %= DPB_SIZE; nengel@2: pthread_cond_signal(&h->cond[WRITE]); nengel@2: pthread_mutex_unlock(&h->lock[WRITE]); nengel@2: nengel@2: } nengel@2: { nengel@2: atomic_dec(rl_cnt); nengel@2: } nengel@2: nengel@2: } nengel@2: nengel@2: {//propagate exit nengel@2: pthread_mutex_lock(&h->lock[WRITE]); nengel@2: while (h->write_cnt>= DPB_SIZE) nengel@2: pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]); nengel@2: last_pic.reference = -1; nengel@2: h->write_q[h->write_fi] = &last_pic; nengel@2: h->write_cnt++; nengel@2: h->write_fi++; h->write_fi %= DPB_SIZE; nengel@2: pthread_cond_signal(&h->cond[WRITE]); nengel@2: pthread_mutex_unlock(&h->lock[WRITE]); nengel@2: nengel@2: } nengel@2: pthread_join(h->rl_dist_thr, NULL); nengel@2: pthread_exit(NULL); nengel@2: return NULL; nengel@2: } nengel@2: nengel@2: /* nengel@2: * The following code is the main loop of the file converter nengel@2: */ nengel@2: int h264_decode_cell(H264Context *h) { nengel@2: nengel@2: pthread_t read_thr, parsenal_thr, entropy_thr, mbdec_thr, write_thr; nengel@2: nengel@2: start_timer(); nengel@2: nengel@2: pthread_create(&read_thr, NULL, read_thread, h); nengel@2: pthread_create(&parsenal_thr, NULL, parsenal_thread, h); nengel@2: pthread_create(&entropy_thr, NULL, entropy_IPB_cell_thread, h); nengel@2: pthread_create(&mbdec_thr, NULL, mbdec_cell_thread, h); nengel@2: pthread_create(&write_thr, NULL, write_thread, h); nengel@2: nengel@2: pthread_join(read_thr, NULL); nengel@2: pthread_join(parsenal_thr, NULL); nengel@2: pthread_join(entropy_thr, NULL); nengel@2: pthread_join(mbdec_thr, NULL); nengel@2: pthread_join(write_thr, NULL); nengel@2: nengel@2: return 0; nengel@2: } nengel@2: nengel@2: /* nengel@2: * The following code is the main loop of the file converter nengel@2: */ nengel@2: int h264_decode_cell_seq(H264Context *h) { nengel@2: ParserContext *pc; nengel@2: NalContext *nc; nengel@2: EntropyContext *ec; nengel@2: MBRecContext *rc; nengel@2: OutputContext *oc; nengel@2: nengel@2: RawFrame frm; nengel@2: EDSlice slice, *s=&slice; nengel@2: MBSlice mbslice, *s2=&mbslice; nengel@2: PictureInfo *pic=NULL; nengel@2: DecodedPicture *out; nengel@2: int size; nengel@2: int frames=0; nengel@2: nengel@2: pc = get_parse_context(h->ifile); nengel@2: nc = get_nal_context(h->width, h->height); nengel@2: ec = get_entropy_context( h ); nengel@2: rc = get_mbrec_context(h); nengel@2: oc = get_output_context( h ); nengel@2: nengel@2: rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var; nengel@2: rl_cond = (cond_ea_t) (unsigned) &rl_cond_var; nengel@2: rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var; nengel@2: atomic_set(rl_cnt, 0); nengel@2: mutex_init(rl_lock); nengel@2: cond_init(rl_cond); nengel@2: nengel@2: memset(s, 0, sizeof(EDSlice)); nengel@2: ff_init_slice(nc, s); nengel@2: s->mbs = av_malloc( h->mb_height * h->mb_width * sizeof(H264Mb)); nengel@2: nengel@2: DecodedPicture tmp; nengel@2: tmp.base[0]=0; nengel@2: ///fix this when want to debug the Cell errors nengel@2: //init_dpb_entry(&tmp, h->width, h->height); nengel@2: nengel@2: create_spe_ED_threads(h, 1, 0); nengel@2: create_spe_MBR_threads(h, 1); nengel@2: nengel@2: start_timer(); nengel@2: nengel@2: while(!pc->final_frame && frames++ < h->num_frames){ nengel@2: nengel@2: av_read_frame_internal(pc, &frm); nengel@2: nengel@2: PictureInfo *pic=get_pib_entry(h); nengel@2: ff_alloc_picture_info(nc, s, pic); nengel@2: decode_nal_units(nc, s, &frm); nengel@2: nengel@2: copyEDtoMBSlice(s2, s); nengel@2: decode_slice_entropy_cell_seq(h, ec, s); nengel@2: nengel@2: decode_slice_mb_seq_cell(h, rc, s2, &tmp); nengel@2: nengel@2: out =output_frame(h, oc, s2->current_picture, h->ofile, h->frame_width, h->frame_height); nengel@2: nengel@2: if (out){ nengel@2: release_dpb_entry(h, out, 1); nengel@2: } nengel@2: print_report(oc->frame_number, oc->video_size, 0, h->verbose); nengel@2: } nengel@2: while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ; nengel@2: nengel@2: print_report(oc->frame_number, oc->video_size, 1, h->verbose); nengel@2: nengel@2: /* finished ! */ nengel@2: av_freep(&s->mbs); nengel@2: nengel@2: free_parse_context(pc); nengel@2: free_nal_context (nc); nengel@2: free_entropy_context(ec); nengel@2: free_mbrec_context(rc); nengel@2: free_output_context(oc); nengel@2: return 0; nengel@2: }