Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
comparison libavcodec/h264_cell.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0cbdae632367 |
|---|---|
| 1 | |
| 2 #include "h264_types.h" | |
| 3 #include "h264_parser.h" | |
| 4 #include "h264_nal.h" | |
| 5 #include "h264_entropy.h" | |
| 6 #include "h264_rec.h" | |
| 7 #include "h264_misc.h" | |
| 8 #include "cell/h264_types_spu.h" | |
| 9 #include "h264_pthread.h" | |
| 10 | |
| 11 #include <pthread.h> | |
| 12 #include <assert.h> | |
| 13 #include <unistd.h> | |
| 14 | |
| 15 #include <libspe2.h> | |
| 16 #include <ppu_intrinsics.h> | |
| 17 #include <cbe_mfc.h> | |
| 18 #include <libsync.h> | |
| 19 | |
| 20 // spe global variables | |
| 21 unsigned rl_cnt_var, rl_mutex_var, rl_cond_var; | |
| 22 atomic_ea_t rl_cnt; | |
| 23 cond_ea_t rl_cond; | |
| 24 mutex_ea_t rl_lock; | |
| 25 | |
| 26 H264spe * spe_params; | |
| 27 unsigned mutex_var[16]; | |
| 28 unsigned cond_var[16]; | |
| 29 unsigned atomic_var[16]; | |
| 30 | |
| 31 pthread_t * spe_tid; | |
| 32 spe_context_ptr_t *spe_context; | |
| 33 void** spe_control_area; | |
| 34 void** spe_ls_area; | |
| 35 H264slice **spe_slice_buf; | |
| 36 | |
| 37 H264spe * spe_ed_params; | |
| 38 unsigned mutex_ed_var[16]; | |
| 39 unsigned cond_ed_var[16]; | |
| 40 unsigned atomic_ed_var[16]; | |
| 41 | |
| 42 pthread_t * spe_ed_tid; | |
| 43 spe_context_ptr_t *spe_ed_context; | |
| 44 void** spe_ed_control_area; | |
| 45 void** spe_ed_ls_area; | |
| 46 EDSlice_spu **spe_ed_slice_buf; | |
| 47 | |
| 48 //structs to propagate stop signal | |
| 49 MBSlice last_slice; | |
| 50 EDSlice last_ed_slice; | |
| 51 DecodedPicture last_pic; | |
| 52 RawFrame last_frm; | |
| 53 | |
| 54 static int direct_B_resolved(EDSlice *s, int *poc_list, int *poc_cnt){ | |
| 55 int i; | |
| 56 int cnt = *poc_cnt; | |
| 57 for(i=0; i<cnt; i++){ | |
| 58 if (poc_list[i]==s->ref_list[1][0]->poc){ | |
| 59 *poc_cnt=i+1; | |
| 60 while(++i<cnt) | |
| 61 poc_list[i]=0; | |
| 62 return 1; | |
| 63 } | |
| 64 } | |
| 65 return 0; | |
| 66 } | |
| 67 | |
| 68 static void update_IP_poc_list(int *poc_list, int *poc_cnt, int poc) { | |
| 69 int i=0; | |
| 70 int cnt = *poc_cnt; | |
| 71 | |
| 72 while (poc_list[i] > poc) { i++;} | |
| 73 if ( i< cnt) | |
| 74 memmove(&poc_list[i+1], &poc_list[i], (cnt-i)*sizeof(int)); | |
| 75 | |
| 76 poc_list[i]=poc; | |
| 77 (*poc_cnt)++; | |
| 78 } | |
| 79 | |
| 80 static void *spe_ed_thread(void *arg){ | |
| 81 H264spe *params = (H264spe *)arg; | |
| 82 unsigned int idx = params->idx; | |
| 83 unsigned int runflags = 0; | |
| 84 unsigned int entry = SPE_DEFAULT_ENTRY; | |
| 85 // run SPE context | |
| 86 spe_context_run(spe_ed_context[idx], &entry, runflags, (void*) params, NULL, NULL); | |
| 87 // done - now exit thread | |
| 88 pthread_exit(NULL); | |
| 89 } | |
| 90 | |
| 91 static void create_spe_ED_threads(H264Context *h, int ip_threads, int b_threads) { | |
| 92 int i; | |
| 93 int num_threads = ip_threads+b_threads; | |
| 94 spe_program_handle_t * spe_program = spe_image_open("spe_ed"); | |
| 95 // reserve memory for spe thread id, context and argument addresses | |
| 96 spe_ed_tid = av_malloc(num_threads * sizeof (pthread_t)); | |
| 97 spe_ed_context = av_malloc(num_threads * sizeof (spe_context_ptr_t)); | |
| 98 spe_ed_params = av_malloc(num_threads * sizeof (H264spe)); | |
| 99 spe_ed_control_area = av_malloc(num_threads * sizeof (void*)); | |
| 100 spe_ed_ls_area = av_malloc(num_threads * sizeof (void*)); | |
| 101 spe_ed_slice_buf = av_malloc(num_threads * sizeof (void*)); | |
| 102 | |
| 103 if (spe_program == NULL) | |
| 104 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno)); | |
| 105 | |
| 106 for (i = 0; i < num_threads; i++) { | |
| 107 // create context for spe program | |
| 108 spe_ed_context[i] = spe_context_create(SPE_MAP_PS, NULL); | |
| 109 if (spe_ed_context[i] == NULL) | |
| 110 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno)); | |
| 111 // load SPE program into main memory | |
| 112 if ((spe_program_load(spe_ed_context[i], spe_program)) == -1) | |
| 113 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno)); | |
| 114 //get the control_area for fast mailboxing | |
| 115 if ((spe_ed_control_area[i] = spe_ps_area_get(spe_ed_context[i], SPE_CONTROL_AREA)) == NULL) | |
| 116 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno)); | |
| 117 //get ls area for inter spe communication | |
| 118 if ((spe_ed_ls_area[i] = spe_ls_area_get(spe_ed_context[i])) == NULL) | |
| 119 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno)); | |
| 120 } | |
| 121 | |
| 122 for (i = 0; i < ip_threads; i++) { | |
| 123 spe_ed_params[i].mb_width = h->mb_width; | |
| 124 spe_ed_params[i].mb_stride = h->mb_stride; | |
| 125 spe_ed_params[i].mb_height = h->mb_height; | |
| 126 spe_ed_params[i].type = EDIP; | |
| 127 spe_ed_params[i].spe_id = i; | |
| 128 spe_ed_params[i].idx = i; | |
| 129 //spe_ed_params[i].spe_total = ip_threads; //not used | |
| 130 //spe_params[i].slice_params= &slice_params; | |
| 131 spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads]; | |
| 132 spe_ed_params[i].tgt_spe = spe_ed_ls_area[(i+1)%num_threads]; | |
| 133 | |
| 134 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i]; | |
| 135 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i]; | |
| 136 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0); | |
| 137 | |
| 138 mutex_init(spe_ed_params[i].lock); | |
| 139 cond_init(spe_ed_params[i].cond); | |
| 140 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i])) | |
| 141 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i); | |
| 142 | |
| 143 //slicebufaddr | |
| 144 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]); | |
| 145 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i); | |
| 146 } | |
| 147 for (int j = 0; j < b_threads; j++) { | |
| 148 i = j+ip_threads; | |
| 149 spe_ed_params[i].mb_width = h->mb_width; | |
| 150 spe_ed_params[i].mb_stride = h->mb_stride; | |
| 151 spe_ed_params[i].mb_height = h->mb_height; | |
| 152 spe_ed_params[i].type = EDB; | |
| 153 spe_ed_params[i].idx = i; | |
| 154 spe_ed_params[i].spe_id = j; | |
| 155 spe_ed_params[i].spe_total = b_threads; | |
| 156 //spe_params[i].slice_params= &slice_params; | |
| 157 //spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads]; | |
| 158 spe_ed_params[i].tgt_spe = spe_ed_ls_area[((j+1)%b_threads) + ip_threads]; | |
| 159 | |
| 160 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i]; | |
| 161 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i]; | |
| 162 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0); | |
| 163 | |
| 164 mutex_init(spe_ed_params[i].lock); | |
| 165 cond_init(spe_ed_params[i].cond); | |
| 166 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i])) | |
| 167 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i); | |
| 168 | |
| 169 //slicebufaddr | |
| 170 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]); | |
| 171 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i); | |
| 172 } | |
| 173 spe_image_close(spe_program); | |
| 174 | |
| 175 } | |
| 176 | |
| 177 static void fill_EDSlice_spu(EDSlice_spu *dst, EDSlice *src){ | |
| 178 dst->pps = src->pps; | |
| 179 dst->mbs = src->mbs; | |
| 180 dst->state = src->state; | |
| 181 dst->qp_thresh = src->qp_thresh; | |
| 182 dst->pic = *src->current_picture; | |
| 183 | |
| 184 dst->ref_count[0] = src->ref_count[0]; | |
| 185 dst->ref_count[1] = src->ref_count[1]; | |
| 186 dst->slice_type = src->slice_type; | |
| 187 dst->slice_type_nos = src->slice_type_nos; | |
| 188 dst->direct_8x8_inference_flag = src->direct_8x8_inference_flag; | |
| 189 dst->list_count = src->list_count; | |
| 190 dst->coded_pic_num = src->coded_pic_num; | |
| 191 | |
| 192 GetBitContext *gb = &src->gb; | |
| 193 align_get_bits( gb); | |
| 194 dst->bytestream_start = gb->buffer + get_bits_count(gb)/8; | |
| 195 dst->byte_bufsize = (get_bits_left(gb) + 7)/8; | |
| 196 | |
| 197 dst->transform_bypass = src->transform_bypass; | |
| 198 dst->direct_spatial_mv_pred = src->direct_spatial_mv_pred; | |
| 199 memcpy(dst->map_col_to_list0, src->map_col_to_list0, 2*16*sizeof(int)); | |
| 200 memcpy(dst->dist_scale_factor, src->dist_scale_factor, 16*sizeof(int)); | |
| 201 dst->cabac_init_idc = src->cabac_init_idc; | |
| 202 memcpy(dst->ref2frm, src->ref2frm, 2*64*sizeof(int)); | |
| 203 dst->chroma_qp[0]= src->chroma_qp[0]; | |
| 204 dst->chroma_qp[1]= src->chroma_qp[1]; | |
| 205 dst->qscale = src->qscale; | |
| 206 dst->last_qscale_diff = src->last_qscale_diff; | |
| 207 | |
| 208 if (src->slice_type_nos == FF_B_TYPE) dst->list1 = *src->ref_list[1][0]; | |
| 209 } | |
| 210 | |
| 211 static void send_slice_to_spe_and_wait(EDSlice_spu *s, int id){ | |
| 212 unsigned status; | |
| 213 | |
| 214 spe_mfcio_get(spe_ed_context[id], (unsigned) spe_ed_slice_buf[id], s, sizeof(EDSlice_spu), 14, 0, 0); | |
| 215 spe_mfcio_tag_status_read(spe_ed_context[id], 1<<14, SPE_TAG_ALL, &status); | |
| 216 | |
| 217 | |
| 218 _spe_in_mbox_write(spe_ed_control_area[id], 0); | |
| 219 | |
| 220 while (!spe_out_mbox_status(spe_ed_context[id])){ | |
| 221 //pthread_yield(); | |
| 222 usleep(1000); | |
| 223 } | |
| 224 _spe_out_mbox_read(spe_ed_control_area[id]); | |
| 225 } | |
| 226 | |
| 227 static int decode_slice_entropy_cell(EntropyContext *ec, EDSlice *s, int id){ | |
| 228 int i,j; | |
| 229 | |
| 230 if( !s->pps.cabac ){ | |
| 231 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n"); | |
| 232 return -1; | |
| 233 } | |
| 234 DECLARE_ALIGNED(16, EDSlice_spu, slice); | |
| 235 fill_EDSlice_spu(&slice, s); | |
| 236 | |
| 237 send_slice_to_spe_and_wait(&slice, id); | |
| 238 | |
| 239 return 0; | |
| 240 } | |
| 241 | |
| 242 static int decode_slice_entropy_cell_seq(H264Context *h, EntropyContext *ec, EDSlice *s){ | |
| 243 int i,j; | |
| 244 | |
| 245 if( !s->pps.cabac ){ | |
| 246 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n"); | |
| 247 return -1; | |
| 248 } | |
| 249 DECLARE_ALIGNED(16, EDSlice_spu, slice); | |
| 250 fill_EDSlice_spu(&slice, s); | |
| 251 | |
| 252 send_slice_to_spe_and_wait(&slice, 0); | |
| 253 | |
| 254 if (s->release_cnt>0) { | |
| 255 for (int i=0; i<s->release_cnt; i++){ | |
| 256 release_pib_entry(h, s->release_ref[i], 2); | |
| 257 } | |
| 258 s->release_cnt=0; | |
| 259 } | |
| 260 | |
| 261 release_pib_entry(h, s->current_picture, 1); | |
| 262 av_freep(&s->gb.raw); | |
| 263 if (s->gb.rbsp) | |
| 264 av_freep(&s->gb.rbsp); | |
| 265 | |
| 266 return 0; | |
| 267 } | |
| 268 | |
| 269 static void *entr_IP_spe_thread(void *arg){ | |
| 270 EDThreadContext *eip = (EDThreadContext *) arg; | |
| 271 H264Context *h = eip->h; | |
| 272 // printf("eip %d, pid %d\n", eip->thread_num, syscall(SYS_gettid)); | |
| 273 for (int i=0; i<SLICE_BUFS; i++){ | |
| 274 eip->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb)); | |
| 275 } | |
| 276 | |
| 277 EntropyContext *ec = get_entropy_context(h); | |
| 278 EDSlice *s; | |
| 279 | |
| 280 for(;;){ | |
| 281 { | |
| 282 pthread_mutex_lock(&eip->ed_lock); | |
| 283 while (eip->ed_cnt <= 0) | |
| 284 pthread_cond_wait(&eip->ed_cond, &eip->ed_lock); | |
| 285 s = &eip->ed_q[eip->ed_fo]; | |
| 286 eip->ed_fo++; eip->ed_fo %= MAX_SLICE_COUNT; | |
| 287 pthread_mutex_unlock(&eip->ed_lock); | |
| 288 } | |
| 289 | |
| 290 if (s->state<0) | |
| 291 break; | |
| 292 { | |
| 293 pthread_mutex_lock(&eip->mbs_lock); | |
| 294 while (eip->mbs_cnt <= 0) | |
| 295 pthread_cond_wait(&eip->mbs_cond, &eip->mbs_lock); | |
| 296 | |
| 297 s->mbs = eip->mbs[eip->mbs_fo]; | |
| 298 s->ed = eip; | |
| 299 eip->mbs_cnt--; | |
| 300 eip->mbs_fo++; eip->mbs_fo%=SLICE_BUFS; | |
| 301 pthread_mutex_unlock(&eip->mbs_lock); | |
| 302 } | |
| 303 if (eip->cell){ | |
| 304 decode_slice_entropy_cell(ec, s, eip->thread_num); | |
| 305 }else{ | |
| 306 decode_slice_entropy(ec, s); | |
| 307 } | |
| 308 | |
| 309 // { | |
| 310 // pthread_mutex_lock(&h->lock[ENTROPY2]); | |
| 311 // h->ed_poc[h->ed_poc_fi++ % MAX_SLICE_COUNT] = s->current_picture->poc; | |
| 312 // while (h->ed_poc_fi > h->ed_poc_fo + MAX_SLICE_COUNT) | |
| 313 // h->ed_poc_fo++; | |
| 314 // | |
| 315 // pthread_cond_signal(&h->cond[ENTROPY2]); | |
| 316 // pthread_mutex_unlock(&h->lock[ENTROPY2]); | |
| 317 // } | |
| 318 | |
| 319 { | |
| 320 pthread_mutex_lock(&h->lock[ENTROPY4]); | |
| 321 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT) | |
| 322 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); | |
| 323 h->ed_reorder_q[h->ed_reorder_fi] = *s; | |
| 324 h->ed_reorder_cnt++; | |
| 325 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT; | |
| 326 pthread_cond_signal(&h->cond[ENTROPY4]); | |
| 327 pthread_mutex_unlock(&h->lock[ENTROPY4]); | |
| 328 } | |
| 329 | |
| 330 { | |
| 331 pthread_mutex_lock(&eip->ed_lock); | |
| 332 eip->ed_cnt--; | |
| 333 pthread_cond_signal(&eip->ed_cond); | |
| 334 pthread_mutex_unlock(&eip->ed_lock); | |
| 335 } | |
| 336 } | |
| 337 | |
| 338 free_entropy_context(ec); | |
| 339 | |
| 340 pthread_exit(NULL); | |
| 341 return NULL; | |
| 342 } | |
| 343 | |
| 344 static void *entr_B_spe_thread(void *arg){ | |
| 345 EDThreadContext *eb = (EDThreadContext *) arg; | |
| 346 H264Context *h = eb->h; | |
| 347 // printf("eb %d, pid %d\n", eb->thread_num, syscall(SYS_gettid)); | |
| 348 for (int i=0; i<SLICE_BUFS; i++){ | |
| 349 eb->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb)); | |
| 350 } | |
| 351 | |
| 352 EntropyContext *ec = get_entropy_context(h); | |
| 353 EDSlice *s; | |
| 354 | |
| 355 for(;;){ | |
| 356 { | |
| 357 pthread_mutex_lock(&eb->ed_lock); | |
| 358 while (eb->ed_cnt <= 0) | |
| 359 pthread_cond_wait(&eb->ed_cond, &eb->ed_lock); | |
| 360 s = &eb->ed_q[eb->ed_fo]; | |
| 361 eb->ed_fo++; eb->ed_fo %= MAX_SLICE_COUNT; | |
| 362 pthread_mutex_unlock(&eb->ed_lock); | |
| 363 } | |
| 364 | |
| 365 if (s->state<0) | |
| 366 break; | |
| 367 { | |
| 368 pthread_mutex_lock(&eb->mbs_lock); | |
| 369 while (eb->mbs_cnt <= 0) | |
| 370 pthread_cond_wait(&eb->mbs_cond, &eb->mbs_lock); | |
| 371 s->mbs = eb->mbs[eb->mbs_fo]; | |
| 372 s->ed = eb; | |
| 373 eb->mbs_cnt--; | |
| 374 eb->mbs_fo++; eb->mbs_fo%=SLICE_BUFS; | |
| 375 pthread_mutex_unlock(&eb->mbs_lock); | |
| 376 } | |
| 377 //decode_B_slice_entropy(&hcabac, &cabac, s, eb, eb->prev_ed); | |
| 378 decode_slice_entropy_cell(ec, s, eb->thread_num + h->edip_threads); | |
| 379 | |
| 380 { | |
| 381 pthread_mutex_lock(&h->lock[ENTROPY4]); | |
| 382 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT) | |
| 383 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); | |
| 384 h->ed_reorder_q[h->ed_reorder_fi] = *s; | |
| 385 h->ed_reorder_cnt++; | |
| 386 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT; | |
| 387 pthread_cond_signal(&h->cond[ENTROPY4]); | |
| 388 pthread_mutex_unlock(&h->lock[ENTROPY4]); | |
| 389 | |
| 390 } | |
| 391 | |
| 392 { | |
| 393 pthread_mutex_lock(&eb->ed_lock); | |
| 394 eb->ed_cnt--; | |
| 395 pthread_cond_signal(&eb->ed_cond); | |
| 396 pthread_mutex_unlock(&eb->ed_lock); | |
| 397 } | |
| 398 } | |
| 399 eb->lines_cnt++; | |
| 400 | |
| 401 free_entropy_context(ec); | |
| 402 | |
| 403 pthread_exit(NULL); | |
| 404 return NULL; | |
| 405 } | |
| 406 | |
| 407 static void *entr_B_distribute(void *arg){ | |
| 408 H264Context *h = (H264Context *) arg; | |
| 409 EDSlice *s; | |
| 410 | |
| 411 int i, n=0, poc; | |
| 412 | |
| 413 // printf("eb dist, pid %d\n", syscall(SYS_gettid)); | |
| 414 | |
| 415 for(i=0; i<h->edb_threads; i++){ | |
| 416 h->b[i].h =h; | |
| 417 h->b[i].thread_num =i; | |
| 418 h->b[i].thread_total =h->edb_threads; | |
| 419 pthread_mutex_init(&h->b[i].mbs_lock, NULL); | |
| 420 pthread_cond_init(&h->b[i].mbs_cond, NULL); | |
| 421 h->b[i].mbs_fo = 0; | |
| 422 h->b[i].mbs_cnt = SLICE_BUFS; | |
| 423 h->b[i].ed_fi =0; | |
| 424 h->b[i].ed_fo =0; | |
| 425 h->b[i].ed_cnt =0; | |
| 426 h->b[i].lines_cnt =0; | |
| 427 h->b[i].prev_ed = &h->b[(i-1 +h->edb_threads) % h->edb_threads]; | |
| 428 pthread_mutex_init(&h->b[i].ed_lock, NULL); | |
| 429 pthread_cond_init(&h->b[i].ed_cond, NULL); | |
| 430 pthread_create(&h->ed_B_thr[i], NULL, entr_B_spe_thread, &h->b[i]); | |
| 431 } | |
| 432 | |
| 433 for(;;){ | |
| 434 { | |
| 435 pthread_mutex_lock(&h->lock[ENTROPY3B]); | |
| 436 while (h->ed_B_cnt<=0) | |
| 437 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]); | |
| 438 s= &h->ed_B_q[h->ed_B_fo]; | |
| 439 h->ed_B_fo++; h->ed_B_fo %= MAX_SLICE_COUNT; | |
| 440 pthread_mutex_unlock(&h->lock[ENTROPY3B]); | |
| 441 | |
| 442 } | |
| 443 if (s->state<0) | |
| 444 break; | |
| 445 | |
| 446 if (s->ref_list[1][0]->slice_type_nos != FF_B_TYPE){ | |
| 447 while (poc < s->ref_list[1][0]->poc){ | |
| 448 pthread_mutex_lock(&h->lock[ENTROPY2]); | |
| 449 while (poc == h->ed_poc) | |
| 450 pthread_cond_wait(&h->cond[ENTROPY2], &h->lock[ENTROPY2]); | |
| 451 poc = h->ed_poc; | |
| 452 pthread_mutex_unlock(&h->lock[ENTROPY2]); | |
| 453 } | |
| 454 } | |
| 455 { | |
| 456 pthread_mutex_lock(&h->b[n].ed_lock); | |
| 457 while (h->b[n].ed_cnt >= MAX_SLICE_COUNT) | |
| 458 pthread_cond_wait(&h->b[n].ed_cond, &h->b[n].ed_lock); | |
| 459 h->b[n].ed_q[ h->b[n].ed_fi] = *s; | |
| 460 h->b[n].ed_cnt++; | |
| 461 h->b[n].ed_fi++; h->b[n].ed_fi %= MAX_SLICE_COUNT; | |
| 462 pthread_cond_signal(&h->b[n].ed_cond); | |
| 463 pthread_mutex_unlock(&h->b[n].ed_lock); | |
| 464 | |
| 465 n++; n%=h->edb_threads; | |
| 466 } | |
| 467 { | |
| 468 pthread_mutex_lock(&h->lock[ENTROPY3B]); | |
| 469 h->ed_B_cnt--; | |
| 470 pthread_cond_signal(&h->cond[ENTROPY3B]); | |
| 471 pthread_mutex_unlock(&h->lock[ENTROPY3B]); | |
| 472 | |
| 473 } | |
| 474 | |
| 475 } | |
| 476 | |
| 477 for (i=0; i<h->edb_threads; i++){ | |
| 478 pthread_mutex_lock(&h->b[i].ed_lock); | |
| 479 while (h->b[i].ed_cnt >= MAX_SLICE_COUNT) | |
| 480 pthread_cond_wait(&h->b[i].ed_cond, &h->b[i].ed_lock); | |
| 481 h->b[i].ed_q[ h->b[i].ed_fi] = *s; | |
| 482 h->b[i].ed_cnt++; | |
| 483 h->b[i].ed_fi++; h->b[i].ed_fi %= MAX_SLICE_COUNT; | |
| 484 pthread_cond_signal(&h->b[i].ed_cond); | |
| 485 pthread_mutex_unlock(&h->b[i].ed_lock); | |
| 486 | |
| 487 } | |
| 488 for(int i=0; i<h->edb_threads; i++){ | |
| 489 pthread_join(h->ed_B_thr[i], NULL); | |
| 490 } | |
| 491 pthread_exit(NULL); | |
| 492 return NULL; | |
| 493 } | |
| 494 | |
| 495 | |
| 496 static void *entr_IPB_distribute(void *arg){ | |
| 497 H264Context *h = (H264Context *) arg; | |
| 498 EDSlice *s; | |
| 499 int i,n=0; | |
| 500 | |
| 501 create_spe_ED_threads(h, h->edip_threads, h->edb_threads); | |
| 502 pthread_create(&h->ed_B_dist, NULL, entr_B_distribute, h); | |
| 503 for(i=0; i<h->edip_threads + h->edip_ppe_threads; i++){ | |
| 504 h->ip[i].h =h; | |
| 505 h->ip[i].cell = (i >= h->edip_ppe_threads); | |
| 506 pthread_mutex_init(&h->ip[i].mbs_lock, NULL); | |
| 507 pthread_cond_init(&h->ip[i].mbs_cond, NULL); | |
| 508 h->ip[i].thread_num = i - h->edip_ppe_threads; | |
| 509 h->ip[i].thread_total=h->edip_threads+ h->edip_ppe_threads; | |
| 510 h->ip[i].mbs_fo = 0; | |
| 511 h->ip[i].mbs_cnt = SLICE_BUFS; | |
| 512 h->ip[i].ed_fi =0; | |
| 513 h->ip[i].ed_fo =0; | |
| 514 pthread_mutex_init(&h->ip[i].ed_lock, NULL); | |
| 515 pthread_cond_init(&h->ip[i].ed_cond, NULL); | |
| 516 pthread_create(&h->ed_IP_thr[i], NULL, entr_IP_spe_thread, &h->ip[i]); | |
| 517 } | |
| 518 | |
| 519 for(;;){ | |
| 520 { | |
| 521 pthread_mutex_lock(&h->lock[ENTROPY]); | |
| 522 while (h->ed_cnt<=0) | |
| 523 pthread_cond_wait(&h->cond[ENTROPY], &h->lock[ENTROPY]); | |
| 524 s= &h->ed_q[h->ed_fo]; | |
| 525 | |
| 526 pthread_mutex_unlock(&h->lock[ENTROPY]); | |
| 527 h->ed_fo++; h->ed_fo %= MAX_SLICE_COUNT; | |
| 528 } | |
| 529 if (s->state<0) | |
| 530 break; | |
| 531 | |
| 532 assert(s->current_picture); | |
| 533 if (s->slice_type_nos == FF_B_TYPE ) | |
| 534 { | |
| 535 pthread_mutex_lock(&h->lock[ENTROPY3B]); | |
| 536 while (h->ed_B_cnt>=MAX_SLICE_COUNT) | |
| 537 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]); | |
| 538 h->ed_B_q[h->ed_B_fi] = *s; | |
| 539 h->ed_B_cnt++; | |
| 540 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT; | |
| 541 pthread_cond_signal(&h->cond[ENTROPY3B]); | |
| 542 pthread_mutex_unlock(&h->lock[ENTROPY3B]); | |
| 543 }else | |
| 544 { | |
| 545 ///round robin now, change to based on rawframes size. | |
| 546 pthread_mutex_lock(&h->ip[n].ed_lock); | |
| 547 while (h->ip[n].ed_cnt >= MAX_SLICE_COUNT) | |
| 548 pthread_cond_wait(&h->ip[n].ed_cond, &h->ip[n].ed_lock); | |
| 549 h->ip[n].ed_q[ h->ip[n].ed_fi] = *s; | |
| 550 h->ip[n].ed_cnt++; | |
| 551 h->ip[n].ed_fi++; h->ip[n].ed_fi %= MAX_SLICE_COUNT; | |
| 552 pthread_cond_signal(&h->ip[n].ed_cond); | |
| 553 pthread_mutex_unlock(&h->ip[n].ed_lock); | |
| 554 | |
| 555 n++; n %=(h->edip_threads+h->edip_ppe_threads); | |
| 556 } | |
| 557 { | |
| 558 pthread_mutex_lock(&h->lock[ENTROPY]); | |
| 559 h->ed_cnt--; | |
| 560 pthread_cond_signal(&h->cond[ENTROPY]); | |
| 561 pthread_mutex_unlock(&h->lock[ENTROPY]); | |
| 562 | |
| 563 } | |
| 564 } | |
| 565 | |
| 566 { | |
| 567 pthread_mutex_lock(&h->lock[ENTROPY3B]); | |
| 568 while (h->ed_B_cnt>=MAX_SLICE_COUNT) | |
| 569 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]); | |
| 570 h->ed_B_q[h->ed_B_fi] = *s; | |
| 571 h->ed_B_cnt++; | |
| 572 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT; | |
| 573 pthread_cond_signal(&h->cond[ENTROPY3B]); | |
| 574 pthread_mutex_unlock(&h->lock[ENTROPY3B]); | |
| 575 } | |
| 576 { | |
| 577 for (i=0; i<h->edip_threads + h->edip_ppe_threads; i++){ | |
| 578 pthread_mutex_lock(&h->ip[i].ed_lock); | |
| 579 while (h->ip[i].ed_cnt >= MAX_SLICE_COUNT) | |
| 580 pthread_cond_wait(&h->ip[i].ed_cond, &h->ip[i].ed_lock); | |
| 581 h->ip[i].ed_q[ h->ip[i].ed_fi] = *s; | |
| 582 h->ip[i].ed_cnt++; | |
| 583 h->ip[i].ed_fi++; h->ip[i].ed_fi %= MAX_SLICE_COUNT; | |
| 584 pthread_cond_signal(&h->ip[i].ed_cond); | |
| 585 pthread_mutex_unlock(&h->ip[i].ed_lock); | |
| 586 } | |
| 587 } | |
| 588 { | |
| 589 pthread_mutex_lock(&h->lock[ENTROPY4]); | |
| 590 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT) | |
| 591 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); | |
| 592 h->ed_reorder_q[h->ed_reorder_fi] = *s; | |
| 593 h->ed_reorder_cnt++; | |
| 594 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT; | |
| 595 pthread_cond_signal(&h->cond[ENTROPY4]); | |
| 596 pthread_mutex_unlock(&h->lock[ENTROPY4]); | |
| 597 | |
| 598 } | |
| 599 pthread_join(h->ed_B_dist, NULL); | |
| 600 for(i=0; i<h->edip_threads; i++){ | |
| 601 pthread_join(h->ed_IP_thr[i], NULL); | |
| 602 } | |
| 603 pthread_exit(NULL); | |
| 604 return NULL; | |
| 605 } | |
| 606 | |
| 607 static pthread_t ed_IPB_dist; | |
| 608 static void *entropy_IPB_cell_thread(void *arg){ | |
| 609 H264Context *h = (H264Context *) arg; | |
| 610 int i; | |
| 611 EDSlice reorder[MAX_SLICE_COUNT]; | |
| 612 int ip_poc[MAX_SLICE_COUNT][2]={0,}; | |
| 613 int next_ip_id=0; | |
| 614 int ip_poc_cnt=0; | |
| 615 EDSlice *s; | |
| 616 int reorder_cnt=0; | |
| 617 unsigned next_pic_num=0; | |
| 618 | |
| 619 pthread_create(&ed_IPB_dist, NULL, entr_IPB_distribute, h); | |
| 620 int count =0; | |
| 621 for(;;){ | |
| 622 //signals received from the entropy decoders | |
| 623 { | |
| 624 pthread_mutex_lock(&h->lock[ENTROPY4]); | |
| 625 while (h->ed_reorder_cnt<=0) | |
| 626 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]); | |
| 627 s= &h->ed_reorder_q[h->ed_reorder_fo]; | |
| 628 h->ed_reorder_fo++; h->ed_reorder_fo %=MAX_SLICE_COUNT; | |
| 629 pthread_mutex_unlock(&h->lock[ENTROPY4]); | |
| 630 } | |
| 631 | |
| 632 if (s->state >=0 && s->slice_type_nos != FF_B_TYPE){ | |
| 633 for (i=0; i<ip_poc_cnt; i++){ | |
| 634 if (s->ip_id < ip_poc[i][0]){ | |
| 635 memmove(ip_poc[i+1], ip_poc[i], 2*(ip_poc_cnt-i)*sizeof(int)); | |
| 636 break; | |
| 637 } | |
| 638 } | |
| 639 ip_poc[i][0]= s->ip_id; | |
| 640 ip_poc[i][1]= s->current_picture->poc; | |
| 641 ip_poc_cnt++; | |
| 642 | |
| 643 while (next_ip_id == ip_poc[0][0]){ | |
| 644 pthread_mutex_lock(&h->lock[ENTROPY2]); | |
| 645 h->ed_poc = ip_poc[0][1]; | |
| 646 | |
| 647 pthread_cond_signal(&h->cond[ENTROPY2]); | |
| 648 pthread_mutex_unlock(&h->lock[ENTROPY2]); | |
| 649 memmove(ip_poc[0], ip_poc[1], 2*(ip_poc_cnt-1)*sizeof(int)); | |
| 650 ip_poc_cnt--; | |
| 651 next_ip_id++; | |
| 652 } | |
| 653 } | |
| 654 | |
| 655 for(i=reorder_cnt; i>0; i--){ | |
| 656 if (s->coded_pic_num < reorder[i-1].coded_pic_num) | |
| 657 break; | |
| 658 reorder[i]=reorder[i-1]; | |
| 659 } | |
| 660 reorder[i]=*s; | |
| 661 | |
| 662 while(reorder_cnt>=0){ | |
| 663 if (next_pic_num!=reorder[reorder_cnt].coded_pic_num){ | |
| 664 break; | |
| 665 } | |
| 666 EDSlice *es = &reorder[reorder_cnt]; | |
| 667 | |
| 668 { | |
| 669 pthread_mutex_lock(&h->lock[MBDEC]); | |
| 670 while (h->mbdec_cnt >= MAX_SLICE_COUNT) | |
| 671 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]); | |
| 672 copyEDtoMBSlice(&h->mbdec_q[h->mbdec_fi], es); | |
| 673 | |
| 674 h->mbdec_cnt++; | |
| 675 h->mbdec_fi++; h->mbdec_fi %= MAX_SLICE_COUNT; | |
| 676 pthread_cond_signal(&h->cond[MBDEC]); | |
| 677 pthread_mutex_unlock(&h->lock[MBDEC]); | |
| 678 | |
| 679 } | |
| 680 | |
| 681 if (es->state<0) | |
| 682 goto end; | |
| 683 | |
| 684 assert(es->current_picture); | |
| 685 for (int i=0; i<es->release_cnt; i++){ | |
| 686 release_pib_entry(h, es->release_ref[i], 2); | |
| 687 } | |
| 688 release_pib_entry(h, es->current_picture, 1); | |
| 689 av_freep(&es->gb.raw); | |
| 690 if (es->gb.rbsp) | |
| 691 av_freep(&es->gb.rbsp); | |
| 692 | |
| 693 next_pic_num++; | |
| 694 reorder_cnt--; | |
| 695 } | |
| 696 reorder_cnt++; | |
| 697 | |
| 698 { | |
| 699 pthread_mutex_lock(&h->lock[ENTROPY4]); | |
| 700 h->ed_reorder_cnt--; | |
| 701 pthread_cond_signal(&h->cond[ENTROPY4]); | |
| 702 pthread_mutex_unlock(&h->lock[ENTROPY4]); | |
| 703 } | |
| 704 } | |
| 705 | |
| 706 end: | |
| 707 pthread_join(ed_IPB_dist, NULL); | |
| 708 pthread_exit(NULL); | |
| 709 return NULL; | |
| 710 } | |
| 711 | |
| 712 | |
| 713 static void fill_spe_slice(H264slice *dst, const MBSlice *src, H264Context *h){ | |
| 714 dst->deblocking_filter =1; | |
| 715 dst->linesize = src->current_picture->linesize[0]; | |
| 716 dst->uvlinesize = src->current_picture->linesize[1]; | |
| 717 dst->mb_width = h->mb_width; | |
| 718 dst->mb_height = h->mb_height; | |
| 719 dst->use_weight = src->use_weight; | |
| 720 dst->use_weight_chroma = src->use_weight_chroma; | |
| 721 dst->luma_log2_weight_denom = src->luma_log2_weight_denom; | |
| 722 dst->chroma_log2_weight_denom = src->chroma_log2_weight_denom; | |
| 723 | |
| 724 //weights later | |
| 725 memcpy(dst->luma_weight, src->luma_weight, 16*2*2*sizeof(int16_t)); | |
| 726 memcpy(dst->chroma_weight, src->chroma_weight, 16*2*2*2*sizeof(int16_t)); | |
| 727 memcpy(dst->implicit_weight, src->implicit_weight, 16*16*2*sizeof(int16_t)); | |
| 728 | |
| 729 for(int list=0; list<2; list++){ | |
| 730 for (int i=0; i<src->ref_count[list]; i++){ | |
| 731 Picture_spu *p_dst = &dst->ref_list[list][i]; | |
| 732 DecodedPicture *p_src = src->ref_list[list][i]; | |
| 733 if (p_src){ | |
| 734 p_dst->data[0] = p_src->data[0]; | |
| 735 p_dst->data[1] = p_src->data[1]; | |
| 736 p_dst->data[2] = p_src->data[2]; | |
| 737 } | |
| 738 } | |
| 739 } | |
| 740 dst->state = src->state; | |
| 741 | |
| 742 dst->emu_edge_width =32; | |
| 743 dst->emu_edge_height =32; | |
| 744 dst->slice_type = src->slice_type; | |
| 745 dst->slice_type_nos = src->slice_type_nos; | |
| 746 dst->slice_alpha_c0_offset = src->slice_alpha_c0_offset; | |
| 747 dst->slice_beta_offset = src->slice_beta_offset; | |
| 748 | |
| 749 memcpy(dst->chroma_qp_table, src->pps.chroma_qp_table, 2*64); | |
| 750 | |
| 751 dst->blocks = src->mbs; | |
| 752 dst->dst_y = src->current_picture->data[0]; | |
| 753 dst->dst_cb = src->current_picture->data[1]; | |
| 754 dst->dst_cr = src->current_picture->data[2]; | |
| 755 } | |
| 756 | |
| 757 static void decode_slice_mb_seq_cell(H264Context *h, MBRecContext *d, MBSlice *s, DecodedPicture *tmp){ | |
| 758 static int rl_fi=0; | |
| 759 | |
| 760 DECLARE_ALIGNED(16, H264slice, spe_slice); | |
| 761 H264spe *p=&spe_params[0]; | |
| 762 unsigned status; | |
| 763 uint8_t *dst_y, *dst_cb, *dst_cr; | |
| 764 | |
| 765 DecodedPicture *dp; | |
| 766 | |
| 767 for (int i=0; i<2; i++){ | |
| 768 for(int j=0; j< s->ref_count[i]; j++){ | |
| 769 if (s->ref_list_cpn[i][j] ==-1) | |
| 770 continue; | |
| 771 int k; | |
| 772 for (k=0; k<DPB_SIZE; k++){ | |
| 773 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){ | |
| 774 s->ref_list[i][j] = &h->dpb[k]; | |
| 775 break; | |
| 776 } | |
| 777 } | |
| 778 } | |
| 779 } | |
| 780 | |
| 781 dp = get_dpb_entry(h); | |
| 782 init_dpb_entry(dp, s, d->width, d->height); | |
| 783 | |
| 784 if (h->no_mbd) | |
| 785 return; | |
| 786 | |
| 787 | |
| 788 fill_spe_slice(&spe_slice, s, h); | |
| 789 spe_mfcio_get(spe_context[0], (unsigned) (spe_slice_buf[0] + rl_fi), &spe_slice, sizeof(H264slice), 15, 0, 0); | |
| 790 spe_mfcio_tag_status_read(spe_context[0], 1<<15, SPE_TAG_ALL, &status); | |
| 791 rl_fi++; rl_fi %= 2; | |
| 792 | |
| 793 _spe_in_mbox_write(spe_control_area[0], 0); | |
| 794 while (atomic_read(rl_cnt)<=0){ | |
| 795 //pthread_yield(); | |
| 796 usleep(1000); | |
| 797 } | |
| 798 atomic_dec(rl_cnt); | |
| 799 | |
| 800 | |
| 801 /** This is error free, no visual artifacts, however, md5sum fails.... (WTF) **/ | |
| 802 // memcpy(tmp->data[0], s->current_picture->data[0], tmp->linesize[0]*h->mb_height*16); | |
| 803 // memcpy(tmp->data[1], s->current_picture->data[1], tmp->linesize[1]*h->mb_height*8); | |
| 804 // memcpy(tmp->data[2], s->current_picture->data[2], tmp->linesize[1]*h->mb_height*8); | |
| 805 // | |
| 806 // memset(s->current_picture->data[0], 0, tmp->linesize[0]*h->mb_height*16); | |
| 807 // memset(s->current_picture->data[1], 0, tmp->linesize[1]*h->mb_height*8); | |
| 808 // memset(s->current_picture->data[2], 0, tmp->linesize[1]*h->mb_height*8); | |
| 809 // | |
| 810 // decode_slice_mb_seq(d, s); | |
| 811 // | |
| 812 // for (int i=0; i<h->mb_height*16; i++){ | |
| 813 // for (int j=0; j<h->width; j++){ | |
| 814 // if (tmp->data[0][j + i*tmp->linesize[0]] != s->current_picture->data[0][j + i*tmp->linesize[0]]){ | |
| 815 // printf("%d, %d, %d, %d\n", j, i, tmp->data[0][j + i*tmp->linesize[0]], s->current_picture->data[0][j + i*tmp->linesize[0]]); | |
| 816 // return; | |
| 817 // } | |
| 818 // } | |
| 819 // } | |
| 820 // | |
| 821 // for (int i=0; i<h->mb_height*8; i++){ | |
| 822 // for (int j=0; j<h->width/2; j++){ | |
| 823 // if (tmp->data[1][j + i*tmp->linesize[1]] != s->current_picture->data[1][j + i*tmp->linesize[1]]){ | |
| 824 // printf("%d, %d, %d, %d\n", j, i, tmp->data[1][j + i*tmp->linesize[1]], s->current_picture->data[1][j + i*tmp->linesize[1]]); | |
| 825 // return; | |
| 826 // } | |
| 827 // } | |
| 828 // } | |
| 829 // | |
| 830 // for (int i=0; i<h->mb_height*8; i++){ | |
| 831 // for (int j=0; j<h->width/2; j++){ | |
| 832 // if (tmp->data[2][j + i*tmp->linesize[1]] != s->current_picture->data[2][j + i*tmp->linesize[1]]){ | |
| 833 // printf("%d, %d, %d, %d\n", j, i, tmp->data[2][j + i*tmp->linesize[1]], s->current_picture->data[2][j + i*tmp->linesize[1]]); | |
| 834 // return; | |
| 835 // } | |
| 836 // } | |
| 837 // } | |
| 838 | |
| 839 | |
| 840 //printf("dst_y %p\n", dst_y); | |
| 841 | |
| 842 | |
| 843 for (int i=0; i<s->release_cnt; i++){ | |
| 844 for(int j=0; j<DPB_SIZE; j++){ | |
| 845 if(h->dpb[j].cpn== s->release_ref_cpn[i]){ | |
| 846 release_dpb_entry(h, &h->dpb[j], 2); | |
| 847 break; | |
| 848 } | |
| 849 } | |
| 850 } | |
| 851 s->release_cnt=0; | |
| 852 | |
| 853 } | |
| 854 | |
| 855 static void *h264_spe_thread(void * thread_args ) { | |
| 856 H264spe *params = (H264spe *)thread_args; | |
| 857 unsigned int spe_id = params->spe_id; | |
| 858 unsigned int runflags = 0; | |
| 859 unsigned int entry = SPE_DEFAULT_ENTRY; | |
| 860 // run SPE context | |
| 861 spe_context_run(spe_context[spe_id], &entry, runflags, (void*) params, NULL, NULL); | |
| 862 // done - now exit thread | |
| 863 pthread_exit(NULL); | |
| 864 } | |
| 865 | |
| 866 static int create_spe_MBR_threads(H264Context *h, int num_threads) { | |
| 867 int i; | |
| 868 | |
| 869 // reserve memory for spe thread id, context and argument addresses | |
| 870 spe_tid = av_malloc(num_threads * sizeof (pthread_t)); | |
| 871 spe_context = av_malloc(num_threads * sizeof (spe_context_ptr_t)); | |
| 872 spe_params = av_malloc(num_threads * sizeof (H264spe)); | |
| 873 spe_control_area = av_malloc(num_threads * sizeof (void*)); | |
| 874 spe_ls_area = av_malloc(num_threads * sizeof (void*)); | |
| 875 spe_slice_buf = av_malloc(num_threads * sizeof (void*)); | |
| 876 | |
| 877 spe_program_handle_t *spe_program = spe_image_open("spe_mbd"); | |
| 878 | |
| 879 if (spe_program == NULL) | |
| 880 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno)); | |
| 881 | |
| 882 for (i = 0; i < num_threads; i++) { | |
| 883 // create context for spe program | |
| 884 spe_context[i] = spe_context_create(SPE_MAP_PS, NULL); | |
| 885 if (spe_context[i] == NULL) | |
| 886 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno)); | |
| 887 // load SPE program into main memory | |
| 888 if ((spe_program_load(spe_context[i], spe_program)) == -1) | |
| 889 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno)); | |
| 890 //get the control_area for fast mailboxing | |
| 891 if ((spe_control_area[i] = spe_ps_area_get(spe_context[i], SPE_CONTROL_AREA)) == NULL) | |
| 892 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno)); | |
| 893 //get ls area for inter spe communication | |
| 894 if ((spe_ls_area[i] = spe_ls_area_get(spe_context[i])) == NULL) | |
| 895 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno)); | |
| 896 } | |
| 897 | |
| 898 for (i = 0; i < num_threads; i++) { | |
| 899 spe_params[i].mb_width = h->mb_width; | |
| 900 spe_params[i].mb_height = h->mb_height; | |
| 901 spe_params[i].mb_stride = h->mb_stride; | |
| 902 spe_params[i].spe_id = i; | |
| 903 spe_params[i].spe_total = num_threads; | |
| 904 //spe_params[i].slice_params= &slice_params; | |
| 905 spe_params[i].src_spe = spe_ls_area[(i-1+num_threads)%num_threads]; | |
| 906 spe_params[i].tgt_spe = spe_ls_area[(i+1)%num_threads]; | |
| 907 | |
| 908 spe_params[i].rl_lock = rl_lock; | |
| 909 spe_params[i].rl_cond = rl_cond; | |
| 910 spe_params[i].rl_cnt = rl_cnt; | |
| 911 spe_params[i].lock = (mutex_ea_t) (unsigned) &mutex_var[i]; | |
| 912 spe_params[i].cond = (cond_ea_t) (unsigned) &cond_var[i]; | |
| 913 spe_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_var[i]; atomic_set(spe_params[i].cnt, 0); | |
| 914 | |
| 915 mutex_init(spe_params[i].lock); | |
| 916 cond_init(spe_params[i].cond); | |
| 917 if (pthread_create(&spe_tid[i], NULL, h264_spe_thread, (void *) &spe_params[i])) | |
| 918 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i); | |
| 919 | |
| 920 //slicebufaddr | |
| 921 spe_slice_buf[i] = (H264slice *) _spe_out_mbox_read(spe_control_area[i]); | |
| 922 | |
| 923 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i); | |
| 924 } | |
| 925 spe_image_close(spe_program); | |
| 926 return 0; | |
| 927 } | |
| 928 | |
| 929 //_spe_out_mbox_read(spe_control_area[i]); | |
| 930 /** | |
| 931 * joins all the spe worker threads. | |
| 932 */ | |
| 933 static void join_spe_worker_threads(H264slice *s, int num_threads, int *rl_fi) { | |
| 934 int i; | |
| 935 ///just to keep coding consistency. | |
| 936 { | |
| 937 for (i=0; i<num_threads; i++){ | |
| 938 H264spe *p=&spe_params[i]; | |
| 939 unsigned status; | |
| 940 | |
| 941 while (atomic_read(p->cnt)>=2) {//double buffered | |
| 942 usleep(1000);//cond_wait(p->cond, p->lock); | |
| 943 } | |
| 944 | |
| 945 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), s, sizeof(H264slice), 15, 0, 0); | |
| 946 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status); | |
| 947 //mutex_unlock(p->lock); | |
| 948 _spe_in_mbox_write(spe_control_area[i], 0); | |
| 949 } | |
| 950 } | |
| 951 | |
| 952 for (i=0; i<num_threads; i++){ | |
| 953 pthread_join(spe_tid[i], NULL); | |
| 954 } | |
| 955 | |
| 956 for (i=0; i<num_threads; i++){ | |
| 957 spe_context_destroy(spe_context[i]); | |
| 958 } | |
| 959 atomic_inc(rl_cnt); | |
| 960 | |
| 961 // destroy memory reserved for spe thread id, context and argument addresses | |
| 962 av_freep(&spe_tid); | |
| 963 av_freep(&spe_context); | |
| 964 av_freep(&spe_params); | |
| 965 av_freep(&spe_control_area); | |
| 966 av_freep(&spe_slice_buf); | |
| 967 } | |
| 968 | |
| 969 | |
| 970 static void *rl_dist_thread(void *arg){ | |
| 971 int i; | |
| 972 H264Context *h = (H264Context *) arg; | |
| 973 MBSlice *s; | |
| 974 DecodedPicture *dp; | |
| 975 int rl_fi[16]={0,}; | |
| 976 DECLARE_ALIGNED(16, H264slice, spe_slice); | |
| 977 | |
| 978 create_spe_MBR_threads(h, h->rl_threads); | |
| 979 for(;;){ | |
| 980 { | |
| 981 pthread_mutex_lock(&h->lock[MBDEC]); | |
| 982 while (h->mbdec_cnt<=0) | |
| 983 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]); | |
| 984 s= &h->mbdec_q[h->mbdec_fo]; | |
| 985 h->mbdec_fo++; h->mbdec_fo %= MAX_SLICE_COUNT; | |
| 986 pthread_mutex_unlock(&h->lock[MBDEC]); | |
| 987 } | |
| 988 | |
| 989 if (s->state<0){ | |
| 990 break; | |
| 991 } | |
| 992 for (int i=0; i<2; i++){ | |
| 993 for(int j=0; j< s->ref_count[i]; j++){ | |
| 994 if (s->ref_list_cpn[i][j] ==-1) | |
| 995 continue; | |
| 996 int k; | |
| 997 for (k=0; k<DPB_SIZE; k++){ | |
| 998 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){ | |
| 999 s->ref_list[i][j] = &h->dpb[k]; | |
| 1000 break; | |
| 1001 } | |
| 1002 } | |
| 1003 | |
| 1004 } | |
| 1005 } | |
| 1006 dp = get_dpb_entry(h); | |
| 1007 init_dpb_entry(dp, s, h->width, h->height); | |
| 1008 assert(s->current_picture); | |
| 1009 { | |
| 1010 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){ | |
| 1011 usleep(1000); | |
| 1012 } | |
| 1013 h->mbrel_q[h->mbrel_fi] = *s; | |
| 1014 | |
| 1015 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT; | |
| 1016 } | |
| 1017 { | |
| 1018 if(h->no_mbd){ | |
| 1019 atomic_inc(rl_cnt); | |
| 1020 }else { | |
| 1021 fill_spe_slice(&spe_slice, s, h); | |
| 1022 for (i=0; i<h->rl_threads; i++){ | |
| 1023 H264spe *p=&spe_params[i]; | |
| 1024 unsigned status; | |
| 1025 while (atomic_read(p->cnt)>=2){ //double buffered | |
| 1026 usleep(1000); | |
| 1027 //cond_wait(p->cond, p->lock); | |
| 1028 } | |
| 1029 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), &spe_slice, sizeof(H264slice), 15, 0, 0); | |
| 1030 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status); | |
| 1031 rl_fi[i]++; rl_fi[i] %= 2; | |
| 1032 atomic_inc(p->cnt); | |
| 1033 | |
| 1034 _spe_in_mbox_write(spe_control_area[i], 0); | |
| 1035 } | |
| 1036 } | |
| 1037 } | |
| 1038 | |
| 1039 { | |
| 1040 pthread_mutex_lock(&h->lock[MBDEC]); | |
| 1041 h->mbdec_cnt--; | |
| 1042 pthread_cond_signal(&h->cond[MBDEC]); | |
| 1043 pthread_mutex_unlock(&h->lock[MBDEC]); | |
| 1044 } | |
| 1045 | |
| 1046 } | |
| 1047 | |
| 1048 { | |
| 1049 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){ | |
| 1050 usleep(1000); | |
| 1051 } | |
| 1052 h->mbrel_q[h->mbrel_fi] = *s; | |
| 1053 | |
| 1054 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT; | |
| 1055 } | |
| 1056 spe_slice.state=-1; | |
| 1057 join_spe_worker_threads(&spe_slice, h->rl_threads, rl_fi); | |
| 1058 pthread_exit(NULL); | |
| 1059 return NULL; | |
| 1060 } | |
| 1061 | |
| 1062 static void *mbdec_cell_thread(void *arg){ | |
| 1063 H264Context *h = (H264Context *) arg; | |
| 1064 | |
| 1065 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var; | |
| 1066 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var; | |
| 1067 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var; | |
| 1068 atomic_set(rl_cnt, 0); | |
| 1069 mutex_init(rl_lock); | |
| 1070 cond_init(rl_cond); | |
| 1071 // printf("mbdec, pid %d\n", syscall(SYS_gettid)); | |
| 1072 pthread_create(&h->rl_dist_thr, NULL, rl_dist_thread, h); | |
| 1073 | |
| 1074 for(;;){ | |
| 1075 MBSlice *s=NULL; | |
| 1076 { | |
| 1077 while (atomic_read(rl_cnt)<=0){ | |
| 1078 usleep(1000); | |
| 1079 } | |
| 1080 s= &h->mbrel_q[h->mbrel_fo]; | |
| 1081 h->mbrel_fo++; h->mbrel_fo %= MAX_SLICE_COUNT; | |
| 1082 } | |
| 1083 | |
| 1084 if (s->state<0) | |
| 1085 break; | |
| 1086 | |
| 1087 for (int i=0; i<s->release_cnt; i++){ | |
| 1088 for(int j=0; j<DPB_SIZE; j++){ | |
| 1089 if(h->dpb[j].cpn== s->release_ref_cpn[i]){ | |
| 1090 release_dpb_entry(h, &h->dpb[j], 2); | |
| 1091 break; | |
| 1092 } | |
| 1093 } | |
| 1094 } | |
| 1095 | |
| 1096 { | |
| 1097 EDThreadContext *ed = s->ed; | |
| 1098 pthread_mutex_lock(&ed->mbs_lock); | |
| 1099 ed->mbs_cnt++; | |
| 1100 pthread_cond_signal(&ed->mbs_cond); | |
| 1101 pthread_mutex_unlock(&ed->mbs_lock); | |
| 1102 } | |
| 1103 | |
| 1104 { | |
| 1105 pthread_mutex_lock(&h->lock[WRITE]); | |
| 1106 while (h->write_cnt>= DPB_SIZE) | |
| 1107 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]); | |
| 1108 assert(s); | |
| 1109 assert(s->current_picture); | |
| 1110 h->write_q[h->write_fi]= s->current_picture; | |
| 1111 h->write_cnt++; | |
| 1112 h->write_fi++; h->write_fi %= DPB_SIZE; | |
| 1113 pthread_cond_signal(&h->cond[WRITE]); | |
| 1114 pthread_mutex_unlock(&h->lock[WRITE]); | |
| 1115 | |
| 1116 } | |
| 1117 { | |
| 1118 atomic_dec(rl_cnt); | |
| 1119 } | |
| 1120 | |
| 1121 } | |
| 1122 | |
| 1123 {//propagate exit | |
| 1124 pthread_mutex_lock(&h->lock[WRITE]); | |
| 1125 while (h->write_cnt>= DPB_SIZE) | |
| 1126 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]); | |
| 1127 last_pic.reference = -1; | |
| 1128 h->write_q[h->write_fi] = &last_pic; | |
| 1129 h->write_cnt++; | |
| 1130 h->write_fi++; h->write_fi %= DPB_SIZE; | |
| 1131 pthread_cond_signal(&h->cond[WRITE]); | |
| 1132 pthread_mutex_unlock(&h->lock[WRITE]); | |
| 1133 | |
| 1134 } | |
| 1135 pthread_join(h->rl_dist_thr, NULL); | |
| 1136 pthread_exit(NULL); | |
| 1137 return NULL; | |
| 1138 } | |
| 1139 | |
| 1140 /* | |
| 1141 * The following code is the main loop of the file converter | |
| 1142 */ | |
| 1143 int h264_decode_cell(H264Context *h) { | |
| 1144 | |
| 1145 pthread_t read_thr, parsenal_thr, entropy_thr, mbdec_thr, write_thr; | |
| 1146 | |
| 1147 start_timer(); | |
| 1148 | |
| 1149 pthread_create(&read_thr, NULL, read_thread, h); | |
| 1150 pthread_create(&parsenal_thr, NULL, parsenal_thread, h); | |
| 1151 pthread_create(&entropy_thr, NULL, entropy_IPB_cell_thread, h); | |
| 1152 pthread_create(&mbdec_thr, NULL, mbdec_cell_thread, h); | |
| 1153 pthread_create(&write_thr, NULL, write_thread, h); | |
| 1154 | |
| 1155 pthread_join(read_thr, NULL); | |
| 1156 pthread_join(parsenal_thr, NULL); | |
| 1157 pthread_join(entropy_thr, NULL); | |
| 1158 pthread_join(mbdec_thr, NULL); | |
| 1159 pthread_join(write_thr, NULL); | |
| 1160 | |
| 1161 return 0; | |
| 1162 } | |
| 1163 | |
| 1164 /* | |
| 1165 * The following code is the main loop of the file converter | |
| 1166 */ | |
| 1167 int h264_decode_cell_seq(H264Context *h) { | |
| 1168 ParserContext *pc; | |
| 1169 NalContext *nc; | |
| 1170 EntropyContext *ec; | |
| 1171 MBRecContext *rc; | |
| 1172 OutputContext *oc; | |
| 1173 | |
| 1174 RawFrame frm; | |
| 1175 EDSlice slice, *s=&slice; | |
| 1176 MBSlice mbslice, *s2=&mbslice; | |
| 1177 PictureInfo *pic=NULL; | |
| 1178 DecodedPicture *out; | |
| 1179 int size; | |
| 1180 int frames=0; | |
| 1181 | |
| 1182 pc = get_parse_context(h->ifile); | |
| 1183 nc = get_nal_context(h->width, h->height); | |
| 1184 ec = get_entropy_context( h ); | |
| 1185 rc = get_mbrec_context(h); | |
| 1186 oc = get_output_context( h ); | |
| 1187 | |
| 1188 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var; | |
| 1189 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var; | |
| 1190 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var; | |
| 1191 atomic_set(rl_cnt, 0); | |
| 1192 mutex_init(rl_lock); | |
| 1193 cond_init(rl_cond); | |
| 1194 | |
| 1195 memset(s, 0, sizeof(EDSlice)); | |
| 1196 ff_init_slice(nc, s); | |
| 1197 s->mbs = av_malloc( h->mb_height * h->mb_width * sizeof(H264Mb)); | |
| 1198 | |
| 1199 DecodedPicture tmp; | |
| 1200 tmp.base[0]=0; | |
| 1201 ///fix this when want to debug the Cell errors | |
| 1202 //init_dpb_entry(&tmp, h->width, h->height); | |
| 1203 | |
| 1204 create_spe_ED_threads(h, 1, 0); | |
| 1205 create_spe_MBR_threads(h, 1); | |
| 1206 | |
| 1207 start_timer(); | |
| 1208 | |
| 1209 while(!pc->final_frame && frames++ < h->num_frames){ | |
| 1210 | |
| 1211 av_read_frame_internal(pc, &frm); | |
| 1212 | |
| 1213 PictureInfo *pic=get_pib_entry(h); | |
| 1214 ff_alloc_picture_info(nc, s, pic); | |
| 1215 decode_nal_units(nc, s, &frm); | |
| 1216 | |
| 1217 copyEDtoMBSlice(s2, s); | |
| 1218 decode_slice_entropy_cell_seq(h, ec, s); | |
| 1219 | |
| 1220 decode_slice_mb_seq_cell(h, rc, s2, &tmp); | |
| 1221 | |
| 1222 out =output_frame(h, oc, s2->current_picture, h->ofile, h->frame_width, h->frame_height); | |
| 1223 | |
| 1224 if (out){ | |
| 1225 release_dpb_entry(h, out, 1); | |
| 1226 } | |
| 1227 print_report(oc->frame_number, oc->video_size, 0, h->verbose); | |
| 1228 } | |
| 1229 while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ; | |
| 1230 | |
| 1231 print_report(oc->frame_number, oc->video_size, 1, h->verbose); | |
| 1232 | |
| 1233 /* finished ! */ | |
| 1234 av_freep(&s->mbs); | |
| 1235 | |
| 1236 free_parse_context(pc); | |
| 1237 free_nal_context (nc); | |
| 1238 free_entropy_context(ec); | |
| 1239 free_mbrec_context(rc); | |
| 1240 free_output_context(oc); | |
| 1241 return 0; | |
| 1242 } |
