annotate libavcodec/h264_cell.c @ 9:ea1ba68cf0ed

update to match api changes + add sscc produced source
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Wed, 05 Jun 2013 14:43:26 +0200
parents
children
rev   line source
nengel@2 1
nengel@2 2 #include "h264_types.h"
nengel@2 3 #include "h264_parser.h"
nengel@2 4 #include "h264_nal.h"
nengel@2 5 #include "h264_entropy.h"
nengel@2 6 #include "h264_rec.h"
nengel@2 7 #include "h264_misc.h"
nengel@2 8 #include "cell/h264_types_spu.h"
nengel@2 9 #include "h264_pthread.h"
nengel@2 10
nengel@2 11 #include <pthread.h>
nengel@2 12 #include <assert.h>
nengel@2 13 #include <unistd.h>
nengel@2 14
nengel@2 15 #include <libspe2.h>
nengel@2 16 #include <ppu_intrinsics.h>
nengel@2 17 #include <cbe_mfc.h>
nengel@2 18 #include <libsync.h>
nengel@2 19
nengel@2 20 // spe global variables
nengel@2 21 unsigned rl_cnt_var, rl_mutex_var, rl_cond_var;
nengel@2 22 atomic_ea_t rl_cnt;
nengel@2 23 cond_ea_t rl_cond;
nengel@2 24 mutex_ea_t rl_lock;
nengel@2 25
nengel@2 26 H264spe * spe_params;
nengel@2 27 unsigned mutex_var[16];
nengel@2 28 unsigned cond_var[16];
nengel@2 29 unsigned atomic_var[16];
nengel@2 30
nengel@2 31 pthread_t * spe_tid;
nengel@2 32 spe_context_ptr_t *spe_context;
nengel@2 33 void** spe_control_area;
nengel@2 34 void** spe_ls_area;
nengel@2 35 H264slice **spe_slice_buf;
nengel@2 36
nengel@2 37 H264spe * spe_ed_params;
nengel@2 38 unsigned mutex_ed_var[16];
nengel@2 39 unsigned cond_ed_var[16];
nengel@2 40 unsigned atomic_ed_var[16];
nengel@2 41
nengel@2 42 pthread_t * spe_ed_tid;
nengel@2 43 spe_context_ptr_t *spe_ed_context;
nengel@2 44 void** spe_ed_control_area;
nengel@2 45 void** spe_ed_ls_area;
nengel@2 46 EDSlice_spu **spe_ed_slice_buf;
nengel@2 47
nengel@2 48 //structs to propagate stop signal
nengel@2 49 MBSlice last_slice;
nengel@2 50 EDSlice last_ed_slice;
nengel@2 51 DecodedPicture last_pic;
nengel@2 52 RawFrame last_frm;
nengel@2 53
nengel@2 54 static int direct_B_resolved(EDSlice *s, int *poc_list, int *poc_cnt){
nengel@2 55 int i;
nengel@2 56 int cnt = *poc_cnt;
nengel@2 57 for(i=0; i<cnt; i++){
nengel@2 58 if (poc_list[i]==s->ref_list[1][0]->poc){
nengel@2 59 *poc_cnt=i+1;
nengel@2 60 while(++i<cnt)
nengel@2 61 poc_list[i]=0;
nengel@2 62 return 1;
nengel@2 63 }
nengel@2 64 }
nengel@2 65 return 0;
nengel@2 66 }
nengel@2 67
nengel@2 68 static void update_IP_poc_list(int *poc_list, int *poc_cnt, int poc) {
nengel@2 69 int i=0;
nengel@2 70 int cnt = *poc_cnt;
nengel@2 71
nengel@2 72 while (poc_list[i] > poc) { i++;}
nengel@2 73 if ( i< cnt)
nengel@2 74 memmove(&poc_list[i+1], &poc_list[i], (cnt-i)*sizeof(int));
nengel@2 75
nengel@2 76 poc_list[i]=poc;
nengel@2 77 (*poc_cnt)++;
nengel@2 78 }
nengel@2 79
nengel@2 80 static void *spe_ed_thread(void *arg){
nengel@2 81 H264spe *params = (H264spe *)arg;
nengel@2 82 unsigned int idx = params->idx;
nengel@2 83 unsigned int runflags = 0;
nengel@2 84 unsigned int entry = SPE_DEFAULT_ENTRY;
nengel@2 85 // run SPE context
nengel@2 86 spe_context_run(spe_ed_context[idx], &entry, runflags, (void*) params, NULL, NULL);
nengel@2 87 // done - now exit thread
nengel@2 88 pthread_exit(NULL);
nengel@2 89 }
nengel@2 90
nengel@2 91 static void create_spe_ED_threads(H264Context *h, int ip_threads, int b_threads) {
nengel@2 92 int i;
nengel@2 93 int num_threads = ip_threads+b_threads;
nengel@2 94 spe_program_handle_t * spe_program = spe_image_open("spe_ed");
nengel@2 95 // reserve memory for spe thread id, context and argument addresses
nengel@2 96 spe_ed_tid = av_malloc(num_threads * sizeof (pthread_t));
nengel@2 97 spe_ed_context = av_malloc(num_threads * sizeof (spe_context_ptr_t));
nengel@2 98 spe_ed_params = av_malloc(num_threads * sizeof (H264spe));
nengel@2 99 spe_ed_control_area = av_malloc(num_threads * sizeof (void*));
nengel@2 100 spe_ed_ls_area = av_malloc(num_threads * sizeof (void*));
nengel@2 101 spe_ed_slice_buf = av_malloc(num_threads * sizeof (void*));
nengel@2 102
nengel@2 103 if (spe_program == NULL)
nengel@2 104 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno));
nengel@2 105
nengel@2 106 for (i = 0; i < num_threads; i++) {
nengel@2 107 // create context for spe program
nengel@2 108 spe_ed_context[i] = spe_context_create(SPE_MAP_PS, NULL);
nengel@2 109 if (spe_ed_context[i] == NULL)
nengel@2 110 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno));
nengel@2 111 // load SPE program into main memory
nengel@2 112 if ((spe_program_load(spe_ed_context[i], spe_program)) == -1)
nengel@2 113 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno));
nengel@2 114 //get the control_area for fast mailboxing
nengel@2 115 if ((spe_ed_control_area[i] = spe_ps_area_get(spe_ed_context[i], SPE_CONTROL_AREA)) == NULL)
nengel@2 116 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno));
nengel@2 117 //get ls area for inter spe communication
nengel@2 118 if ((spe_ed_ls_area[i] = spe_ls_area_get(spe_ed_context[i])) == NULL)
nengel@2 119 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno));
nengel@2 120 }
nengel@2 121
nengel@2 122 for (i = 0; i < ip_threads; i++) {
nengel@2 123 spe_ed_params[i].mb_width = h->mb_width;
nengel@2 124 spe_ed_params[i].mb_stride = h->mb_stride;
nengel@2 125 spe_ed_params[i].mb_height = h->mb_height;
nengel@2 126 spe_ed_params[i].type = EDIP;
nengel@2 127 spe_ed_params[i].spe_id = i;
nengel@2 128 spe_ed_params[i].idx = i;
nengel@2 129 //spe_ed_params[i].spe_total = ip_threads; //not used
nengel@2 130 //spe_params[i].slice_params= &slice_params;
nengel@2 131 spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads];
nengel@2 132 spe_ed_params[i].tgt_spe = spe_ed_ls_area[(i+1)%num_threads];
nengel@2 133
nengel@2 134 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i];
nengel@2 135 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i];
nengel@2 136 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0);
nengel@2 137
nengel@2 138 mutex_init(spe_ed_params[i].lock);
nengel@2 139 cond_init(spe_ed_params[i].cond);
nengel@2 140 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i]))
nengel@2 141 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
nengel@2 142
nengel@2 143 //slicebufaddr
nengel@2 144 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]);
nengel@2 145 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
nengel@2 146 }
nengel@2 147 for (int j = 0; j < b_threads; j++) {
nengel@2 148 i = j+ip_threads;
nengel@2 149 spe_ed_params[i].mb_width = h->mb_width;
nengel@2 150 spe_ed_params[i].mb_stride = h->mb_stride;
nengel@2 151 spe_ed_params[i].mb_height = h->mb_height;
nengel@2 152 spe_ed_params[i].type = EDB;
nengel@2 153 spe_ed_params[i].idx = i;
nengel@2 154 spe_ed_params[i].spe_id = j;
nengel@2 155 spe_ed_params[i].spe_total = b_threads;
nengel@2 156 //spe_params[i].slice_params= &slice_params;
nengel@2 157 //spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads];
nengel@2 158 spe_ed_params[i].tgt_spe = spe_ed_ls_area[((j+1)%b_threads) + ip_threads];
nengel@2 159
nengel@2 160 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i];
nengel@2 161 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i];
nengel@2 162 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0);
nengel@2 163
nengel@2 164 mutex_init(spe_ed_params[i].lock);
nengel@2 165 cond_init(spe_ed_params[i].cond);
nengel@2 166 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i]))
nengel@2 167 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
nengel@2 168
nengel@2 169 //slicebufaddr
nengel@2 170 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]);
nengel@2 171 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
nengel@2 172 }
nengel@2 173 spe_image_close(spe_program);
nengel@2 174
nengel@2 175 }
nengel@2 176
nengel@2 177 static void fill_EDSlice_spu(EDSlice_spu *dst, EDSlice *src){
nengel@2 178 dst->pps = src->pps;
nengel@2 179 dst->mbs = src->mbs;
nengel@2 180 dst->state = src->state;
nengel@2 181 dst->qp_thresh = src->qp_thresh;
nengel@2 182 dst->pic = *src->current_picture;
nengel@2 183
nengel@2 184 dst->ref_count[0] = src->ref_count[0];
nengel@2 185 dst->ref_count[1] = src->ref_count[1];
nengel@2 186 dst->slice_type = src->slice_type;
nengel@2 187 dst->slice_type_nos = src->slice_type_nos;
nengel@2 188 dst->direct_8x8_inference_flag = src->direct_8x8_inference_flag;
nengel@2 189 dst->list_count = src->list_count;
nengel@2 190 dst->coded_pic_num = src->coded_pic_num;
nengel@2 191
nengel@2 192 GetBitContext *gb = &src->gb;
nengel@2 193 align_get_bits( gb);
nengel@2 194 dst->bytestream_start = gb->buffer + get_bits_count(gb)/8;
nengel@2 195 dst->byte_bufsize = (get_bits_left(gb) + 7)/8;
nengel@2 196
nengel@2 197 dst->transform_bypass = src->transform_bypass;
nengel@2 198 dst->direct_spatial_mv_pred = src->direct_spatial_mv_pred;
nengel@2 199 memcpy(dst->map_col_to_list0, src->map_col_to_list0, 2*16*sizeof(int));
nengel@2 200 memcpy(dst->dist_scale_factor, src->dist_scale_factor, 16*sizeof(int));
nengel@2 201 dst->cabac_init_idc = src->cabac_init_idc;
nengel@2 202 memcpy(dst->ref2frm, src->ref2frm, 2*64*sizeof(int));
nengel@2 203 dst->chroma_qp[0]= src->chroma_qp[0];
nengel@2 204 dst->chroma_qp[1]= src->chroma_qp[1];
nengel@2 205 dst->qscale = src->qscale;
nengel@2 206 dst->last_qscale_diff = src->last_qscale_diff;
nengel@2 207
nengel@2 208 if (src->slice_type_nos == FF_B_TYPE) dst->list1 = *src->ref_list[1][0];
nengel@2 209 }
nengel@2 210
nengel@2 211 static void send_slice_to_spe_and_wait(EDSlice_spu *s, int id){
nengel@2 212 unsigned status;
nengel@2 213
nengel@2 214 spe_mfcio_get(spe_ed_context[id], (unsigned) spe_ed_slice_buf[id], s, sizeof(EDSlice_spu), 14, 0, 0);
nengel@2 215 spe_mfcio_tag_status_read(spe_ed_context[id], 1<<14, SPE_TAG_ALL, &status);
nengel@2 216
nengel@2 217
nengel@2 218 _spe_in_mbox_write(spe_ed_control_area[id], 0);
nengel@2 219
nengel@2 220 while (!spe_out_mbox_status(spe_ed_context[id])){
nengel@2 221 //pthread_yield();
nengel@2 222 usleep(1000);
nengel@2 223 }
nengel@2 224 _spe_out_mbox_read(spe_ed_control_area[id]);
nengel@2 225 }
nengel@2 226
nengel@2 227 static int decode_slice_entropy_cell(EntropyContext *ec, EDSlice *s, int id){
nengel@2 228 int i,j;
nengel@2 229
nengel@2 230 if( !s->pps.cabac ){
nengel@2 231 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
nengel@2 232 return -1;
nengel@2 233 }
nengel@2 234 DECLARE_ALIGNED(16, EDSlice_spu, slice);
nengel@2 235 fill_EDSlice_spu(&slice, s);
nengel@2 236
nengel@2 237 send_slice_to_spe_and_wait(&slice, id);
nengel@2 238
nengel@2 239 return 0;
nengel@2 240 }
nengel@2 241
nengel@2 242 static int decode_slice_entropy_cell_seq(H264Context *h, EntropyContext *ec, EDSlice *s){
nengel@2 243 int i,j;
nengel@2 244
nengel@2 245 if( !s->pps.cabac ){
nengel@2 246 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
nengel@2 247 return -1;
nengel@2 248 }
nengel@2 249 DECLARE_ALIGNED(16, EDSlice_spu, slice);
nengel@2 250 fill_EDSlice_spu(&slice, s);
nengel@2 251
nengel@2 252 send_slice_to_spe_and_wait(&slice, 0);
nengel@2 253
nengel@2 254 if (s->release_cnt>0) {
nengel@2 255 for (int i=0; i<s->release_cnt; i++){
nengel@2 256 release_pib_entry(h, s->release_ref[i], 2);
nengel@2 257 }
nengel@2 258 s->release_cnt=0;
nengel@2 259 }
nengel@2 260
nengel@2 261 release_pib_entry(h, s->current_picture, 1);
nengel@2 262 av_freep(&s->gb.raw);
nengel@2 263 if (s->gb.rbsp)
nengel@2 264 av_freep(&s->gb.rbsp);
nengel@2 265
nengel@2 266 return 0;
nengel@2 267 }
nengel@2 268
nengel@2 269 static void *entr_IP_spe_thread(void *arg){
nengel@2 270 EDThreadContext *eip = (EDThreadContext *) arg;
nengel@2 271 H264Context *h = eip->h;
nengel@2 272 // printf("eip %d, pid %d\n", eip->thread_num, syscall(SYS_gettid));
nengel@2 273 for (int i=0; i<SLICE_BUFS; i++){
nengel@2 274 eip->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb));
nengel@2 275 }
nengel@2 276
nengel@2 277 EntropyContext *ec = get_entropy_context(h);
nengel@2 278 EDSlice *s;
nengel@2 279
nengel@2 280 for(;;){
nengel@2 281 {
nengel@2 282 pthread_mutex_lock(&eip->ed_lock);
nengel@2 283 while (eip->ed_cnt <= 0)
nengel@2 284 pthread_cond_wait(&eip->ed_cond, &eip->ed_lock);
nengel@2 285 s = &eip->ed_q[eip->ed_fo];
nengel@2 286 eip->ed_fo++; eip->ed_fo %= MAX_SLICE_COUNT;
nengel@2 287 pthread_mutex_unlock(&eip->ed_lock);
nengel@2 288 }
nengel@2 289
nengel@2 290 if (s->state<0)
nengel@2 291 break;
nengel@2 292 {
nengel@2 293 pthread_mutex_lock(&eip->mbs_lock);
nengel@2 294 while (eip->mbs_cnt <= 0)
nengel@2 295 pthread_cond_wait(&eip->mbs_cond, &eip->mbs_lock);
nengel@2 296
nengel@2 297 s->mbs = eip->mbs[eip->mbs_fo];
nengel@2 298 s->ed = eip;
nengel@2 299 eip->mbs_cnt--;
nengel@2 300 eip->mbs_fo++; eip->mbs_fo%=SLICE_BUFS;
nengel@2 301 pthread_mutex_unlock(&eip->mbs_lock);
nengel@2 302 }
nengel@2 303 if (eip->cell){
nengel@2 304 decode_slice_entropy_cell(ec, s, eip->thread_num);
nengel@2 305 }else{
nengel@2 306 decode_slice_entropy(ec, s);
nengel@2 307 }
nengel@2 308
nengel@2 309 // {
nengel@2 310 // pthread_mutex_lock(&h->lock[ENTROPY2]);
nengel@2 311 // h->ed_poc[h->ed_poc_fi++ % MAX_SLICE_COUNT] = s->current_picture->poc;
nengel@2 312 // while (h->ed_poc_fi > h->ed_poc_fo + MAX_SLICE_COUNT)
nengel@2 313 // h->ed_poc_fo++;
nengel@2 314 //
nengel@2 315 // pthread_cond_signal(&h->cond[ENTROPY2]);
nengel@2 316 // pthread_mutex_unlock(&h->lock[ENTROPY2]);
nengel@2 317 // }
nengel@2 318
nengel@2 319 {
nengel@2 320 pthread_mutex_lock(&h->lock[ENTROPY4]);
nengel@2 321 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
nengel@2 322 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
nengel@2 323 h->ed_reorder_q[h->ed_reorder_fi] = *s;
nengel@2 324 h->ed_reorder_cnt++;
nengel@2 325 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
nengel@2 326 pthread_cond_signal(&h->cond[ENTROPY4]);
nengel@2 327 pthread_mutex_unlock(&h->lock[ENTROPY4]);
nengel@2 328 }
nengel@2 329
nengel@2 330 {
nengel@2 331 pthread_mutex_lock(&eip->ed_lock);
nengel@2 332 eip->ed_cnt--;
nengel@2 333 pthread_cond_signal(&eip->ed_cond);
nengel@2 334 pthread_mutex_unlock(&eip->ed_lock);
nengel@2 335 }
nengel@2 336 }
nengel@2 337
nengel@2 338 free_entropy_context(ec);
nengel@2 339
nengel@2 340 pthread_exit(NULL);
nengel@2 341 return NULL;
nengel@2 342 }
nengel@2 343
nengel@2 344 static void *entr_B_spe_thread(void *arg){
nengel@2 345 EDThreadContext *eb = (EDThreadContext *) arg;
nengel@2 346 H264Context *h = eb->h;
nengel@2 347 // printf("eb %d, pid %d\n", eb->thread_num, syscall(SYS_gettid));
nengel@2 348 for (int i=0; i<SLICE_BUFS; i++){
nengel@2 349 eb->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb));
nengel@2 350 }
nengel@2 351
nengel@2 352 EntropyContext *ec = get_entropy_context(h);
nengel@2 353 EDSlice *s;
nengel@2 354
nengel@2 355 for(;;){
nengel@2 356 {
nengel@2 357 pthread_mutex_lock(&eb->ed_lock);
nengel@2 358 while (eb->ed_cnt <= 0)
nengel@2 359 pthread_cond_wait(&eb->ed_cond, &eb->ed_lock);
nengel@2 360 s = &eb->ed_q[eb->ed_fo];
nengel@2 361 eb->ed_fo++; eb->ed_fo %= MAX_SLICE_COUNT;
nengel@2 362 pthread_mutex_unlock(&eb->ed_lock);
nengel@2 363 }
nengel@2 364
nengel@2 365 if (s->state<0)
nengel@2 366 break;
nengel@2 367 {
nengel@2 368 pthread_mutex_lock(&eb->mbs_lock);
nengel@2 369 while (eb->mbs_cnt <= 0)
nengel@2 370 pthread_cond_wait(&eb->mbs_cond, &eb->mbs_lock);
nengel@2 371 s->mbs = eb->mbs[eb->mbs_fo];
nengel@2 372 s->ed = eb;
nengel@2 373 eb->mbs_cnt--;
nengel@2 374 eb->mbs_fo++; eb->mbs_fo%=SLICE_BUFS;
nengel@2 375 pthread_mutex_unlock(&eb->mbs_lock);
nengel@2 376 }
nengel@2 377 //decode_B_slice_entropy(&hcabac, &cabac, s, eb, eb->prev_ed);
nengel@2 378 decode_slice_entropy_cell(ec, s, eb->thread_num + h->edip_threads);
nengel@2 379
nengel@2 380 {
nengel@2 381 pthread_mutex_lock(&h->lock[ENTROPY4]);
nengel@2 382 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
nengel@2 383 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
nengel@2 384 h->ed_reorder_q[h->ed_reorder_fi] = *s;
nengel@2 385 h->ed_reorder_cnt++;
nengel@2 386 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
nengel@2 387 pthread_cond_signal(&h->cond[ENTROPY4]);
nengel@2 388 pthread_mutex_unlock(&h->lock[ENTROPY4]);
nengel@2 389
nengel@2 390 }
nengel@2 391
nengel@2 392 {
nengel@2 393 pthread_mutex_lock(&eb->ed_lock);
nengel@2 394 eb->ed_cnt--;
nengel@2 395 pthread_cond_signal(&eb->ed_cond);
nengel@2 396 pthread_mutex_unlock(&eb->ed_lock);
nengel@2 397 }
nengel@2 398 }
nengel@2 399 eb->lines_cnt++;
nengel@2 400
nengel@2 401 free_entropy_context(ec);
nengel@2 402
nengel@2 403 pthread_exit(NULL);
nengel@2 404 return NULL;
nengel@2 405 }
nengel@2 406
nengel@2 407 static void *entr_B_distribute(void *arg){
nengel@2 408 H264Context *h = (H264Context *) arg;
nengel@2 409 EDSlice *s;
nengel@2 410
nengel@2 411 int i, n=0, poc;
nengel@2 412
nengel@2 413 // printf("eb dist, pid %d\n", syscall(SYS_gettid));
nengel@2 414
nengel@2 415 for(i=0; i<h->edb_threads; i++){
nengel@2 416 h->b[i].h =h;
nengel@2 417 h->b[i].thread_num =i;
nengel@2 418 h->b[i].thread_total =h->edb_threads;
nengel@2 419 pthread_mutex_init(&h->b[i].mbs_lock, NULL);
nengel@2 420 pthread_cond_init(&h->b[i].mbs_cond, NULL);
nengel@2 421 h->b[i].mbs_fo = 0;
nengel@2 422 h->b[i].mbs_cnt = SLICE_BUFS;
nengel@2 423 h->b[i].ed_fi =0;
nengel@2 424 h->b[i].ed_fo =0;
nengel@2 425 h->b[i].ed_cnt =0;
nengel@2 426 h->b[i].lines_cnt =0;
nengel@2 427 h->b[i].prev_ed = &h->b[(i-1 +h->edb_threads) % h->edb_threads];
nengel@2 428 pthread_mutex_init(&h->b[i].ed_lock, NULL);
nengel@2 429 pthread_cond_init(&h->b[i].ed_cond, NULL);
nengel@2 430 pthread_create(&h->ed_B_thr[i], NULL, entr_B_spe_thread, &h->b[i]);
nengel@2 431 }
nengel@2 432
nengel@2 433 for(;;){
nengel@2 434 {
nengel@2 435 pthread_mutex_lock(&h->lock[ENTROPY3B]);
nengel@2 436 while (h->ed_B_cnt<=0)
nengel@2 437 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
nengel@2 438 s= &h->ed_B_q[h->ed_B_fo];
nengel@2 439 h->ed_B_fo++; h->ed_B_fo %= MAX_SLICE_COUNT;
nengel@2 440 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
nengel@2 441
nengel@2 442 }
nengel@2 443 if (s->state<0)
nengel@2 444 break;
nengel@2 445
nengel@2 446 if (s->ref_list[1][0]->slice_type_nos != FF_B_TYPE){
nengel@2 447 while (poc < s->ref_list[1][0]->poc){
nengel@2 448 pthread_mutex_lock(&h->lock[ENTROPY2]);
nengel@2 449 while (poc == h->ed_poc)
nengel@2 450 pthread_cond_wait(&h->cond[ENTROPY2], &h->lock[ENTROPY2]);
nengel@2 451 poc = h->ed_poc;
nengel@2 452 pthread_mutex_unlock(&h->lock[ENTROPY2]);
nengel@2 453 }
nengel@2 454 }
nengel@2 455 {
nengel@2 456 pthread_mutex_lock(&h->b[n].ed_lock);
nengel@2 457 while (h->b[n].ed_cnt >= MAX_SLICE_COUNT)
nengel@2 458 pthread_cond_wait(&h->b[n].ed_cond, &h->b[n].ed_lock);
nengel@2 459 h->b[n].ed_q[ h->b[n].ed_fi] = *s;
nengel@2 460 h->b[n].ed_cnt++;
nengel@2 461 h->b[n].ed_fi++; h->b[n].ed_fi %= MAX_SLICE_COUNT;
nengel@2 462 pthread_cond_signal(&h->b[n].ed_cond);
nengel@2 463 pthread_mutex_unlock(&h->b[n].ed_lock);
nengel@2 464
nengel@2 465 n++; n%=h->edb_threads;
nengel@2 466 }
nengel@2 467 {
nengel@2 468 pthread_mutex_lock(&h->lock[ENTROPY3B]);
nengel@2 469 h->ed_B_cnt--;
nengel@2 470 pthread_cond_signal(&h->cond[ENTROPY3B]);
nengel@2 471 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
nengel@2 472
nengel@2 473 }
nengel@2 474
nengel@2 475 }
nengel@2 476
nengel@2 477 for (i=0; i<h->edb_threads; i++){
nengel@2 478 pthread_mutex_lock(&h->b[i].ed_lock);
nengel@2 479 while (h->b[i].ed_cnt >= MAX_SLICE_COUNT)
nengel@2 480 pthread_cond_wait(&h->b[i].ed_cond, &h->b[i].ed_lock);
nengel@2 481 h->b[i].ed_q[ h->b[i].ed_fi] = *s;
nengel@2 482 h->b[i].ed_cnt++;
nengel@2 483 h->b[i].ed_fi++; h->b[i].ed_fi %= MAX_SLICE_COUNT;
nengel@2 484 pthread_cond_signal(&h->b[i].ed_cond);
nengel@2 485 pthread_mutex_unlock(&h->b[i].ed_lock);
nengel@2 486
nengel@2 487 }
nengel@2 488 for(int i=0; i<h->edb_threads; i++){
nengel@2 489 pthread_join(h->ed_B_thr[i], NULL);
nengel@2 490 }
nengel@2 491 pthread_exit(NULL);
nengel@2 492 return NULL;
nengel@2 493 }
nengel@2 494
nengel@2 495
nengel@2 496 static void *entr_IPB_distribute(void *arg){
nengel@2 497 H264Context *h = (H264Context *) arg;
nengel@2 498 EDSlice *s;
nengel@2 499 int i,n=0;
nengel@2 500
nengel@2 501 create_spe_ED_threads(h, h->edip_threads, h->edb_threads);
nengel@2 502 pthread_create(&h->ed_B_dist, NULL, entr_B_distribute, h);
nengel@2 503 for(i=0; i<h->edip_threads + h->edip_ppe_threads; i++){
nengel@2 504 h->ip[i].h =h;
nengel@2 505 h->ip[i].cell = (i >= h->edip_ppe_threads);
nengel@2 506 pthread_mutex_init(&h->ip[i].mbs_lock, NULL);
nengel@2 507 pthread_cond_init(&h->ip[i].mbs_cond, NULL);
nengel@2 508 h->ip[i].thread_num = i - h->edip_ppe_threads;
nengel@2 509 h->ip[i].thread_total=h->edip_threads+ h->edip_ppe_threads;
nengel@2 510 h->ip[i].mbs_fo = 0;
nengel@2 511 h->ip[i].mbs_cnt = SLICE_BUFS;
nengel@2 512 h->ip[i].ed_fi =0;
nengel@2 513 h->ip[i].ed_fo =0;
nengel@2 514 pthread_mutex_init(&h->ip[i].ed_lock, NULL);
nengel@2 515 pthread_cond_init(&h->ip[i].ed_cond, NULL);
nengel@2 516 pthread_create(&h->ed_IP_thr[i], NULL, entr_IP_spe_thread, &h->ip[i]);
nengel@2 517 }
nengel@2 518
nengel@2 519 for(;;){
nengel@2 520 {
nengel@2 521 pthread_mutex_lock(&h->lock[ENTROPY]);
nengel@2 522 while (h->ed_cnt<=0)
nengel@2 523 pthread_cond_wait(&h->cond[ENTROPY], &h->lock[ENTROPY]);
nengel@2 524 s= &h->ed_q[h->ed_fo];
nengel@2 525
nengel@2 526 pthread_mutex_unlock(&h->lock[ENTROPY]);
nengel@2 527 h->ed_fo++; h->ed_fo %= MAX_SLICE_COUNT;
nengel@2 528 }
nengel@2 529 if (s->state<0)
nengel@2 530 break;
nengel@2 531
nengel@2 532 assert(s->current_picture);
nengel@2 533 if (s->slice_type_nos == FF_B_TYPE )
nengel@2 534 {
nengel@2 535 pthread_mutex_lock(&h->lock[ENTROPY3B]);
nengel@2 536 while (h->ed_B_cnt>=MAX_SLICE_COUNT)
nengel@2 537 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
nengel@2 538 h->ed_B_q[h->ed_B_fi] = *s;
nengel@2 539 h->ed_B_cnt++;
nengel@2 540 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT;
nengel@2 541 pthread_cond_signal(&h->cond[ENTROPY3B]);
nengel@2 542 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
nengel@2 543 }else
nengel@2 544 {
nengel@2 545 ///round robin now, change to based on rawframes size.
nengel@2 546 pthread_mutex_lock(&h->ip[n].ed_lock);
nengel@2 547 while (h->ip[n].ed_cnt >= MAX_SLICE_COUNT)
nengel@2 548 pthread_cond_wait(&h->ip[n].ed_cond, &h->ip[n].ed_lock);
nengel@2 549 h->ip[n].ed_q[ h->ip[n].ed_fi] = *s;
nengel@2 550 h->ip[n].ed_cnt++;
nengel@2 551 h->ip[n].ed_fi++; h->ip[n].ed_fi %= MAX_SLICE_COUNT;
nengel@2 552 pthread_cond_signal(&h->ip[n].ed_cond);
nengel@2 553 pthread_mutex_unlock(&h->ip[n].ed_lock);
nengel@2 554
nengel@2 555 n++; n %=(h->edip_threads+h->edip_ppe_threads);
nengel@2 556 }
nengel@2 557 {
nengel@2 558 pthread_mutex_lock(&h->lock[ENTROPY]);
nengel@2 559 h->ed_cnt--;
nengel@2 560 pthread_cond_signal(&h->cond[ENTROPY]);
nengel@2 561 pthread_mutex_unlock(&h->lock[ENTROPY]);
nengel@2 562
nengel@2 563 }
nengel@2 564 }
nengel@2 565
nengel@2 566 {
nengel@2 567 pthread_mutex_lock(&h->lock[ENTROPY3B]);
nengel@2 568 while (h->ed_B_cnt>=MAX_SLICE_COUNT)
nengel@2 569 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
nengel@2 570 h->ed_B_q[h->ed_B_fi] = *s;
nengel@2 571 h->ed_B_cnt++;
nengel@2 572 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT;
nengel@2 573 pthread_cond_signal(&h->cond[ENTROPY3B]);
nengel@2 574 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
nengel@2 575 }
nengel@2 576 {
nengel@2 577 for (i=0; i<h->edip_threads + h->edip_ppe_threads; i++){
nengel@2 578 pthread_mutex_lock(&h->ip[i].ed_lock);
nengel@2 579 while (h->ip[i].ed_cnt >= MAX_SLICE_COUNT)
nengel@2 580 pthread_cond_wait(&h->ip[i].ed_cond, &h->ip[i].ed_lock);
nengel@2 581 h->ip[i].ed_q[ h->ip[i].ed_fi] = *s;
nengel@2 582 h->ip[i].ed_cnt++;
nengel@2 583 h->ip[i].ed_fi++; h->ip[i].ed_fi %= MAX_SLICE_COUNT;
nengel@2 584 pthread_cond_signal(&h->ip[i].ed_cond);
nengel@2 585 pthread_mutex_unlock(&h->ip[i].ed_lock);
nengel@2 586 }
nengel@2 587 }
nengel@2 588 {
nengel@2 589 pthread_mutex_lock(&h->lock[ENTROPY4]);
nengel@2 590 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
nengel@2 591 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
nengel@2 592 h->ed_reorder_q[h->ed_reorder_fi] = *s;
nengel@2 593 h->ed_reorder_cnt++;
nengel@2 594 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
nengel@2 595 pthread_cond_signal(&h->cond[ENTROPY4]);
nengel@2 596 pthread_mutex_unlock(&h->lock[ENTROPY4]);
nengel@2 597
nengel@2 598 }
nengel@2 599 pthread_join(h->ed_B_dist, NULL);
nengel@2 600 for(i=0; i<h->edip_threads; i++){
nengel@2 601 pthread_join(h->ed_IP_thr[i], NULL);
nengel@2 602 }
nengel@2 603 pthread_exit(NULL);
nengel@2 604 return NULL;
nengel@2 605 }
nengel@2 606
nengel@2 607 static pthread_t ed_IPB_dist;
nengel@2 608 static void *entropy_IPB_cell_thread(void *arg){
nengel@2 609 H264Context *h = (H264Context *) arg;
nengel@2 610 int i;
nengel@2 611 EDSlice reorder[MAX_SLICE_COUNT];
nengel@2 612 int ip_poc[MAX_SLICE_COUNT][2]={0,};
nengel@2 613 int next_ip_id=0;
nengel@2 614 int ip_poc_cnt=0;
nengel@2 615 EDSlice *s;
nengel@2 616 int reorder_cnt=0;
nengel@2 617 unsigned next_pic_num=0;
nengel@2 618
nengel@2 619 pthread_create(&ed_IPB_dist, NULL, entr_IPB_distribute, h);
nengel@2 620 int count =0;
nengel@2 621 for(;;){
nengel@2 622 //signals received from the entropy decoders
nengel@2 623 {
nengel@2 624 pthread_mutex_lock(&h->lock[ENTROPY4]);
nengel@2 625 while (h->ed_reorder_cnt<=0)
nengel@2 626 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
nengel@2 627 s= &h->ed_reorder_q[h->ed_reorder_fo];
nengel@2 628 h->ed_reorder_fo++; h->ed_reorder_fo %=MAX_SLICE_COUNT;
nengel@2 629 pthread_mutex_unlock(&h->lock[ENTROPY4]);
nengel@2 630 }
nengel@2 631
nengel@2 632 if (s->state >=0 && s->slice_type_nos != FF_B_TYPE){
nengel@2 633 for (i=0; i<ip_poc_cnt; i++){
nengel@2 634 if (s->ip_id < ip_poc[i][0]){
nengel@2 635 memmove(ip_poc[i+1], ip_poc[i], 2*(ip_poc_cnt-i)*sizeof(int));
nengel@2 636 break;
nengel@2 637 }
nengel@2 638 }
nengel@2 639 ip_poc[i][0]= s->ip_id;
nengel@2 640 ip_poc[i][1]= s->current_picture->poc;
nengel@2 641 ip_poc_cnt++;
nengel@2 642
nengel@2 643 while (next_ip_id == ip_poc[0][0]){
nengel@2 644 pthread_mutex_lock(&h->lock[ENTROPY2]);
nengel@2 645 h->ed_poc = ip_poc[0][1];
nengel@2 646
nengel@2 647 pthread_cond_signal(&h->cond[ENTROPY2]);
nengel@2 648 pthread_mutex_unlock(&h->lock[ENTROPY2]);
nengel@2 649 memmove(ip_poc[0], ip_poc[1], 2*(ip_poc_cnt-1)*sizeof(int));
nengel@2 650 ip_poc_cnt--;
nengel@2 651 next_ip_id++;
nengel@2 652 }
nengel@2 653 }
nengel@2 654
nengel@2 655 for(i=reorder_cnt; i>0; i--){
nengel@2 656 if (s->coded_pic_num < reorder[i-1].coded_pic_num)
nengel@2 657 break;
nengel@2 658 reorder[i]=reorder[i-1];
nengel@2 659 }
nengel@2 660 reorder[i]=*s;
nengel@2 661
nengel@2 662 while(reorder_cnt>=0){
nengel@2 663 if (next_pic_num!=reorder[reorder_cnt].coded_pic_num){
nengel@2 664 break;
nengel@2 665 }
nengel@2 666 EDSlice *es = &reorder[reorder_cnt];
nengel@2 667
nengel@2 668 {
nengel@2 669 pthread_mutex_lock(&h->lock[MBDEC]);
nengel@2 670 while (h->mbdec_cnt >= MAX_SLICE_COUNT)
nengel@2 671 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]);
nengel@2 672 copyEDtoMBSlice(&h->mbdec_q[h->mbdec_fi], es);
nengel@2 673
nengel@2 674 h->mbdec_cnt++;
nengel@2 675 h->mbdec_fi++; h->mbdec_fi %= MAX_SLICE_COUNT;
nengel@2 676 pthread_cond_signal(&h->cond[MBDEC]);
nengel@2 677 pthread_mutex_unlock(&h->lock[MBDEC]);
nengel@2 678
nengel@2 679 }
nengel@2 680
nengel@2 681 if (es->state<0)
nengel@2 682 goto end;
nengel@2 683
nengel@2 684 assert(es->current_picture);
nengel@2 685 for (int i=0; i<es->release_cnt; i++){
nengel@2 686 release_pib_entry(h, es->release_ref[i], 2);
nengel@2 687 }
nengel@2 688 release_pib_entry(h, es->current_picture, 1);
nengel@2 689 av_freep(&es->gb.raw);
nengel@2 690 if (es->gb.rbsp)
nengel@2 691 av_freep(&es->gb.rbsp);
nengel@2 692
nengel@2 693 next_pic_num++;
nengel@2 694 reorder_cnt--;
nengel@2 695 }
nengel@2 696 reorder_cnt++;
nengel@2 697
nengel@2 698 {
nengel@2 699 pthread_mutex_lock(&h->lock[ENTROPY4]);
nengel@2 700 h->ed_reorder_cnt--;
nengel@2 701 pthread_cond_signal(&h->cond[ENTROPY4]);
nengel@2 702 pthread_mutex_unlock(&h->lock[ENTROPY4]);
nengel@2 703 }
nengel@2 704 }
nengel@2 705
nengel@2 706 end:
nengel@2 707 pthread_join(ed_IPB_dist, NULL);
nengel@2 708 pthread_exit(NULL);
nengel@2 709 return NULL;
nengel@2 710 }
nengel@2 711
nengel@2 712
nengel@2 713 static void fill_spe_slice(H264slice *dst, const MBSlice *src, H264Context *h){
nengel@2 714 dst->deblocking_filter =1;
nengel@2 715 dst->linesize = src->current_picture->linesize[0];
nengel@2 716 dst->uvlinesize = src->current_picture->linesize[1];
nengel@2 717 dst->mb_width = h->mb_width;
nengel@2 718 dst->mb_height = h->mb_height;
nengel@2 719 dst->use_weight = src->use_weight;
nengel@2 720 dst->use_weight_chroma = src->use_weight_chroma;
nengel@2 721 dst->luma_log2_weight_denom = src->luma_log2_weight_denom;
nengel@2 722 dst->chroma_log2_weight_denom = src->chroma_log2_weight_denom;
nengel@2 723
nengel@2 724 //weights later
nengel@2 725 memcpy(dst->luma_weight, src->luma_weight, 16*2*2*sizeof(int16_t));
nengel@2 726 memcpy(dst->chroma_weight, src->chroma_weight, 16*2*2*2*sizeof(int16_t));
nengel@2 727 memcpy(dst->implicit_weight, src->implicit_weight, 16*16*2*sizeof(int16_t));
nengel@2 728
nengel@2 729 for(int list=0; list<2; list++){
nengel@2 730 for (int i=0; i<src->ref_count[list]; i++){
nengel@2 731 Picture_spu *p_dst = &dst->ref_list[list][i];
nengel@2 732 DecodedPicture *p_src = src->ref_list[list][i];
nengel@2 733 if (p_src){
nengel@2 734 p_dst->data[0] = p_src->data[0];
nengel@2 735 p_dst->data[1] = p_src->data[1];
nengel@2 736 p_dst->data[2] = p_src->data[2];
nengel@2 737 }
nengel@2 738 }
nengel@2 739 }
nengel@2 740 dst->state = src->state;
nengel@2 741
nengel@2 742 dst->emu_edge_width =32;
nengel@2 743 dst->emu_edge_height =32;
nengel@2 744 dst->slice_type = src->slice_type;
nengel@2 745 dst->slice_type_nos = src->slice_type_nos;
nengel@2 746 dst->slice_alpha_c0_offset = src->slice_alpha_c0_offset;
nengel@2 747 dst->slice_beta_offset = src->slice_beta_offset;
nengel@2 748
nengel@2 749 memcpy(dst->chroma_qp_table, src->pps.chroma_qp_table, 2*64);
nengel@2 750
nengel@2 751 dst->blocks = src->mbs;
nengel@2 752 dst->dst_y = src->current_picture->data[0];
nengel@2 753 dst->dst_cb = src->current_picture->data[1];
nengel@2 754 dst->dst_cr = src->current_picture->data[2];
nengel@2 755 }
nengel@2 756
nengel@2 757 static void decode_slice_mb_seq_cell(H264Context *h, MBRecContext *d, MBSlice *s, DecodedPicture *tmp){
nengel@2 758 static int rl_fi=0;
nengel@2 759
nengel@2 760 DECLARE_ALIGNED(16, H264slice, spe_slice);
nengel@2 761 H264spe *p=&spe_params[0];
nengel@2 762 unsigned status;
nengel@2 763 uint8_t *dst_y, *dst_cb, *dst_cr;
nengel@2 764
nengel@2 765 DecodedPicture *dp;
nengel@2 766
nengel@2 767 for (int i=0; i<2; i++){
nengel@2 768 for(int j=0; j< s->ref_count[i]; j++){
nengel@2 769 if (s->ref_list_cpn[i][j] ==-1)
nengel@2 770 continue;
nengel@2 771 int k;
nengel@2 772 for (k=0; k<DPB_SIZE; k++){
nengel@2 773 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
nengel@2 774 s->ref_list[i][j] = &h->dpb[k];
nengel@2 775 break;
nengel@2 776 }
nengel@2 777 }
nengel@2 778 }
nengel@2 779 }
nengel@2 780
nengel@2 781 dp = get_dpb_entry(h);
nengel@2 782 init_dpb_entry(dp, s, d->width, d->height);
nengel@2 783
nengel@2 784 if (h->no_mbd)
nengel@2 785 return;
nengel@2 786
nengel@2 787
nengel@2 788 fill_spe_slice(&spe_slice, s, h);
nengel@2 789 spe_mfcio_get(spe_context[0], (unsigned) (spe_slice_buf[0] + rl_fi), &spe_slice, sizeof(H264slice), 15, 0, 0);
nengel@2 790 spe_mfcio_tag_status_read(spe_context[0], 1<<15, SPE_TAG_ALL, &status);
nengel@2 791 rl_fi++; rl_fi %= 2;
nengel@2 792
nengel@2 793 _spe_in_mbox_write(spe_control_area[0], 0);
nengel@2 794 while (atomic_read(rl_cnt)<=0){
nengel@2 795 //pthread_yield();
nengel@2 796 usleep(1000);
nengel@2 797 }
nengel@2 798 atomic_dec(rl_cnt);
nengel@2 799
nengel@2 800
nengel@2 801 /** This is error free, no visual artifacts, however, md5sum fails.... (WTF) **/
nengel@2 802 // memcpy(tmp->data[0], s->current_picture->data[0], tmp->linesize[0]*h->mb_height*16);
nengel@2 803 // memcpy(tmp->data[1], s->current_picture->data[1], tmp->linesize[1]*h->mb_height*8);
nengel@2 804 // memcpy(tmp->data[2], s->current_picture->data[2], tmp->linesize[1]*h->mb_height*8);
nengel@2 805 //
nengel@2 806 // memset(s->current_picture->data[0], 0, tmp->linesize[0]*h->mb_height*16);
nengel@2 807 // memset(s->current_picture->data[1], 0, tmp->linesize[1]*h->mb_height*8);
nengel@2 808 // memset(s->current_picture->data[2], 0, tmp->linesize[1]*h->mb_height*8);
nengel@2 809 //
nengel@2 810 // decode_slice_mb_seq(d, s);
nengel@2 811 //
nengel@2 812 // for (int i=0; i<h->mb_height*16; i++){
nengel@2 813 // for (int j=0; j<h->width; j++){
nengel@2 814 // if (tmp->data[0][j + i*tmp->linesize[0]] != s->current_picture->data[0][j + i*tmp->linesize[0]]){
nengel@2 815 // printf("%d, %d, %d, %d\n", j, i, tmp->data[0][j + i*tmp->linesize[0]], s->current_picture->data[0][j + i*tmp->linesize[0]]);
nengel@2 816 // return;
nengel@2 817 // }
nengel@2 818 // }
nengel@2 819 // }
nengel@2 820 //
nengel@2 821 // for (int i=0; i<h->mb_height*8; i++){
nengel@2 822 // for (int j=0; j<h->width/2; j++){
nengel@2 823 // if (tmp->data[1][j + i*tmp->linesize[1]] != s->current_picture->data[1][j + i*tmp->linesize[1]]){
nengel@2 824 // printf("%d, %d, %d, %d\n", j, i, tmp->data[1][j + i*tmp->linesize[1]], s->current_picture->data[1][j + i*tmp->linesize[1]]);
nengel@2 825 // return;
nengel@2 826 // }
nengel@2 827 // }
nengel@2 828 // }
nengel@2 829 //
nengel@2 830 // for (int i=0; i<h->mb_height*8; i++){
nengel@2 831 // for (int j=0; j<h->width/2; j++){
nengel@2 832 // if (tmp->data[2][j + i*tmp->linesize[1]] != s->current_picture->data[2][j + i*tmp->linesize[1]]){
nengel@2 833 // printf("%d, %d, %d, %d\n", j, i, tmp->data[2][j + i*tmp->linesize[1]], s->current_picture->data[2][j + i*tmp->linesize[1]]);
nengel@2 834 // return;
nengel@2 835 // }
nengel@2 836 // }
nengel@2 837 // }
nengel@2 838
nengel@2 839
nengel@2 840 //printf("dst_y %p\n", dst_y);
nengel@2 841
nengel@2 842
nengel@2 843 for (int i=0; i<s->release_cnt; i++){
nengel@2 844 for(int j=0; j<DPB_SIZE; j++){
nengel@2 845 if(h->dpb[j].cpn== s->release_ref_cpn[i]){
nengel@2 846 release_dpb_entry(h, &h->dpb[j], 2);
nengel@2 847 break;
nengel@2 848 }
nengel@2 849 }
nengel@2 850 }
nengel@2 851 s->release_cnt=0;
nengel@2 852
nengel@2 853 }
nengel@2 854
nengel@2 855 static void *h264_spe_thread(void * thread_args ) {
nengel@2 856 H264spe *params = (H264spe *)thread_args;
nengel@2 857 unsigned int spe_id = params->spe_id;
nengel@2 858 unsigned int runflags = 0;
nengel@2 859 unsigned int entry = SPE_DEFAULT_ENTRY;
nengel@2 860 // run SPE context
nengel@2 861 spe_context_run(spe_context[spe_id], &entry, runflags, (void*) params, NULL, NULL);
nengel@2 862 // done - now exit thread
nengel@2 863 pthread_exit(NULL);
nengel@2 864 }
nengel@2 865
nengel@2 866 static int create_spe_MBR_threads(H264Context *h, int num_threads) {
nengel@2 867 int i;
nengel@2 868
nengel@2 869 // reserve memory for spe thread id, context and argument addresses
nengel@2 870 spe_tid = av_malloc(num_threads * sizeof (pthread_t));
nengel@2 871 spe_context = av_malloc(num_threads * sizeof (spe_context_ptr_t));
nengel@2 872 spe_params = av_malloc(num_threads * sizeof (H264spe));
nengel@2 873 spe_control_area = av_malloc(num_threads * sizeof (void*));
nengel@2 874 spe_ls_area = av_malloc(num_threads * sizeof (void*));
nengel@2 875 spe_slice_buf = av_malloc(num_threads * sizeof (void*));
nengel@2 876
nengel@2 877 spe_program_handle_t *spe_program = spe_image_open("spe_mbd");
nengel@2 878
nengel@2 879 if (spe_program == NULL)
nengel@2 880 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno));
nengel@2 881
nengel@2 882 for (i = 0; i < num_threads; i++) {
nengel@2 883 // create context for spe program
nengel@2 884 spe_context[i] = spe_context_create(SPE_MAP_PS, NULL);
nengel@2 885 if (spe_context[i] == NULL)
nengel@2 886 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno));
nengel@2 887 // load SPE program into main memory
nengel@2 888 if ((spe_program_load(spe_context[i], spe_program)) == -1)
nengel@2 889 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno));
nengel@2 890 //get the control_area for fast mailboxing
nengel@2 891 if ((spe_control_area[i] = spe_ps_area_get(spe_context[i], SPE_CONTROL_AREA)) == NULL)
nengel@2 892 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno));
nengel@2 893 //get ls area for inter spe communication
nengel@2 894 if ((spe_ls_area[i] = spe_ls_area_get(spe_context[i])) == NULL)
nengel@2 895 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno));
nengel@2 896 }
nengel@2 897
nengel@2 898 for (i = 0; i < num_threads; i++) {
nengel@2 899 spe_params[i].mb_width = h->mb_width;
nengel@2 900 spe_params[i].mb_height = h->mb_height;
nengel@2 901 spe_params[i].mb_stride = h->mb_stride;
nengel@2 902 spe_params[i].spe_id = i;
nengel@2 903 spe_params[i].spe_total = num_threads;
nengel@2 904 //spe_params[i].slice_params= &slice_params;
nengel@2 905 spe_params[i].src_spe = spe_ls_area[(i-1+num_threads)%num_threads];
nengel@2 906 spe_params[i].tgt_spe = spe_ls_area[(i+1)%num_threads];
nengel@2 907
nengel@2 908 spe_params[i].rl_lock = rl_lock;
nengel@2 909 spe_params[i].rl_cond = rl_cond;
nengel@2 910 spe_params[i].rl_cnt = rl_cnt;
nengel@2 911 spe_params[i].lock = (mutex_ea_t) (unsigned) &mutex_var[i];
nengel@2 912 spe_params[i].cond = (cond_ea_t) (unsigned) &cond_var[i];
nengel@2 913 spe_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_var[i]; atomic_set(spe_params[i].cnt, 0);
nengel@2 914
nengel@2 915 mutex_init(spe_params[i].lock);
nengel@2 916 cond_init(spe_params[i].cond);
nengel@2 917 if (pthread_create(&spe_tid[i], NULL, h264_spe_thread, (void *) &spe_params[i]))
nengel@2 918 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
nengel@2 919
nengel@2 920 //slicebufaddr
nengel@2 921 spe_slice_buf[i] = (H264slice *) _spe_out_mbox_read(spe_control_area[i]);
nengel@2 922
nengel@2 923 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
nengel@2 924 }
nengel@2 925 spe_image_close(spe_program);
nengel@2 926 return 0;
nengel@2 927 }
nengel@2 928
nengel@2 929 //_spe_out_mbox_read(spe_control_area[i]);
nengel@2 930 /**
nengel@2 931 * joins all the spe worker threads.
nengel@2 932 */
nengel@2 933 static void join_spe_worker_threads(H264slice *s, int num_threads, int *rl_fi) {
nengel@2 934 int i;
nengel@2 935 ///just to keep coding consistency.
nengel@2 936 {
nengel@2 937 for (i=0; i<num_threads; i++){
nengel@2 938 H264spe *p=&spe_params[i];
nengel@2 939 unsigned status;
nengel@2 940
nengel@2 941 while (atomic_read(p->cnt)>=2) {//double buffered
nengel@2 942 usleep(1000);//cond_wait(p->cond, p->lock);
nengel@2 943 }
nengel@2 944
nengel@2 945 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), s, sizeof(H264slice), 15, 0, 0);
nengel@2 946 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status);
nengel@2 947 //mutex_unlock(p->lock);
nengel@2 948 _spe_in_mbox_write(spe_control_area[i], 0);
nengel@2 949 }
nengel@2 950 }
nengel@2 951
nengel@2 952 for (i=0; i<num_threads; i++){
nengel@2 953 pthread_join(spe_tid[i], NULL);
nengel@2 954 }
nengel@2 955
nengel@2 956 for (i=0; i<num_threads; i++){
nengel@2 957 spe_context_destroy(spe_context[i]);
nengel@2 958 }
nengel@2 959 atomic_inc(rl_cnt);
nengel@2 960
nengel@2 961 // destroy memory reserved for spe thread id, context and argument addresses
nengel@2 962 av_freep(&spe_tid);
nengel@2 963 av_freep(&spe_context);
nengel@2 964 av_freep(&spe_params);
nengel@2 965 av_freep(&spe_control_area);
nengel@2 966 av_freep(&spe_slice_buf);
nengel@2 967 }
nengel@2 968
nengel@2 969
nengel@2 970 static void *rl_dist_thread(void *arg){
nengel@2 971 int i;
nengel@2 972 H264Context *h = (H264Context *) arg;
nengel@2 973 MBSlice *s;
nengel@2 974 DecodedPicture *dp;
nengel@2 975 int rl_fi[16]={0,};
nengel@2 976 DECLARE_ALIGNED(16, H264slice, spe_slice);
nengel@2 977
nengel@2 978 create_spe_MBR_threads(h, h->rl_threads);
nengel@2 979 for(;;){
nengel@2 980 {
nengel@2 981 pthread_mutex_lock(&h->lock[MBDEC]);
nengel@2 982 while (h->mbdec_cnt<=0)
nengel@2 983 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]);
nengel@2 984 s= &h->mbdec_q[h->mbdec_fo];
nengel@2 985 h->mbdec_fo++; h->mbdec_fo %= MAX_SLICE_COUNT;
nengel@2 986 pthread_mutex_unlock(&h->lock[MBDEC]);
nengel@2 987 }
nengel@2 988
nengel@2 989 if (s->state<0){
nengel@2 990 break;
nengel@2 991 }
nengel@2 992 for (int i=0; i<2; i++){
nengel@2 993 for(int j=0; j< s->ref_count[i]; j++){
nengel@2 994 if (s->ref_list_cpn[i][j] ==-1)
nengel@2 995 continue;
nengel@2 996 int k;
nengel@2 997 for (k=0; k<DPB_SIZE; k++){
nengel@2 998 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
nengel@2 999 s->ref_list[i][j] = &h->dpb[k];
nengel@2 1000 break;
nengel@2 1001 }
nengel@2 1002 }
nengel@2 1003
nengel@2 1004 }
nengel@2 1005 }
nengel@2 1006 dp = get_dpb_entry(h);
nengel@2 1007 init_dpb_entry(dp, s, h->width, h->height);
nengel@2 1008 assert(s->current_picture);
nengel@2 1009 {
nengel@2 1010 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){
nengel@2 1011 usleep(1000);
nengel@2 1012 }
nengel@2 1013 h->mbrel_q[h->mbrel_fi] = *s;
nengel@2 1014
nengel@2 1015 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT;
nengel@2 1016 }
nengel@2 1017 {
nengel@2 1018 if(h->no_mbd){
nengel@2 1019 atomic_inc(rl_cnt);
nengel@2 1020 }else {
nengel@2 1021 fill_spe_slice(&spe_slice, s, h);
nengel@2 1022 for (i=0; i<h->rl_threads; i++){
nengel@2 1023 H264spe *p=&spe_params[i];
nengel@2 1024 unsigned status;
nengel@2 1025 while (atomic_read(p->cnt)>=2){ //double buffered
nengel@2 1026 usleep(1000);
nengel@2 1027 //cond_wait(p->cond, p->lock);
nengel@2 1028 }
nengel@2 1029 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), &spe_slice, sizeof(H264slice), 15, 0, 0);
nengel@2 1030 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status);
nengel@2 1031 rl_fi[i]++; rl_fi[i] %= 2;
nengel@2 1032 atomic_inc(p->cnt);
nengel@2 1033
nengel@2 1034 _spe_in_mbox_write(spe_control_area[i], 0);
nengel@2 1035 }
nengel@2 1036 }
nengel@2 1037 }
nengel@2 1038
nengel@2 1039 {
nengel@2 1040 pthread_mutex_lock(&h->lock[MBDEC]);
nengel@2 1041 h->mbdec_cnt--;
nengel@2 1042 pthread_cond_signal(&h->cond[MBDEC]);
nengel@2 1043 pthread_mutex_unlock(&h->lock[MBDEC]);
nengel@2 1044 }
nengel@2 1045
nengel@2 1046 }
nengel@2 1047
nengel@2 1048 {
nengel@2 1049 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){
nengel@2 1050 usleep(1000);
nengel@2 1051 }
nengel@2 1052 h->mbrel_q[h->mbrel_fi] = *s;
nengel@2 1053
nengel@2 1054 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT;
nengel@2 1055 }
nengel@2 1056 spe_slice.state=-1;
nengel@2 1057 join_spe_worker_threads(&spe_slice, h->rl_threads, rl_fi);
nengel@2 1058 pthread_exit(NULL);
nengel@2 1059 return NULL;
nengel@2 1060 }
nengel@2 1061
nengel@2 1062 static void *mbdec_cell_thread(void *arg){
nengel@2 1063 H264Context *h = (H264Context *) arg;
nengel@2 1064
nengel@2 1065 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var;
nengel@2 1066 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var;
nengel@2 1067 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var;
nengel@2 1068 atomic_set(rl_cnt, 0);
nengel@2 1069 mutex_init(rl_lock);
nengel@2 1070 cond_init(rl_cond);
nengel@2 1071 // printf("mbdec, pid %d\n", syscall(SYS_gettid));
nengel@2 1072 pthread_create(&h->rl_dist_thr, NULL, rl_dist_thread, h);
nengel@2 1073
nengel@2 1074 for(;;){
nengel@2 1075 MBSlice *s=NULL;
nengel@2 1076 {
nengel@2 1077 while (atomic_read(rl_cnt)<=0){
nengel@2 1078 usleep(1000);
nengel@2 1079 }
nengel@2 1080 s= &h->mbrel_q[h->mbrel_fo];
nengel@2 1081 h->mbrel_fo++; h->mbrel_fo %= MAX_SLICE_COUNT;
nengel@2 1082 }
nengel@2 1083
nengel@2 1084 if (s->state<0)
nengel@2 1085 break;
nengel@2 1086
nengel@2 1087 for (int i=0; i<s->release_cnt; i++){
nengel@2 1088 for(int j=0; j<DPB_SIZE; j++){
nengel@2 1089 if(h->dpb[j].cpn== s->release_ref_cpn[i]){
nengel@2 1090 release_dpb_entry(h, &h->dpb[j], 2);
nengel@2 1091 break;
nengel@2 1092 }
nengel@2 1093 }
nengel@2 1094 }
nengel@2 1095
nengel@2 1096 {
nengel@2 1097 EDThreadContext *ed = s->ed;
nengel@2 1098 pthread_mutex_lock(&ed->mbs_lock);
nengel@2 1099 ed->mbs_cnt++;
nengel@2 1100 pthread_cond_signal(&ed->mbs_cond);
nengel@2 1101 pthread_mutex_unlock(&ed->mbs_lock);
nengel@2 1102 }
nengel@2 1103
nengel@2 1104 {
nengel@2 1105 pthread_mutex_lock(&h->lock[WRITE]);
nengel@2 1106 while (h->write_cnt>= DPB_SIZE)
nengel@2 1107 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]);
nengel@2 1108 assert(s);
nengel@2 1109 assert(s->current_picture);
nengel@2 1110 h->write_q[h->write_fi]= s->current_picture;
nengel@2 1111 h->write_cnt++;
nengel@2 1112 h->write_fi++; h->write_fi %= DPB_SIZE;
nengel@2 1113 pthread_cond_signal(&h->cond[WRITE]);
nengel@2 1114 pthread_mutex_unlock(&h->lock[WRITE]);
nengel@2 1115
nengel@2 1116 }
nengel@2 1117 {
nengel@2 1118 atomic_dec(rl_cnt);
nengel@2 1119 }
nengel@2 1120
nengel@2 1121 }
nengel@2 1122
nengel@2 1123 {//propagate exit
nengel@2 1124 pthread_mutex_lock(&h->lock[WRITE]);
nengel@2 1125 while (h->write_cnt>= DPB_SIZE)
nengel@2 1126 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]);
nengel@2 1127 last_pic.reference = -1;
nengel@2 1128 h->write_q[h->write_fi] = &last_pic;
nengel@2 1129 h->write_cnt++;
nengel@2 1130 h->write_fi++; h->write_fi %= DPB_SIZE;
nengel@2 1131 pthread_cond_signal(&h->cond[WRITE]);
nengel@2 1132 pthread_mutex_unlock(&h->lock[WRITE]);
nengel@2 1133
nengel@2 1134 }
nengel@2 1135 pthread_join(h->rl_dist_thr, NULL);
nengel@2 1136 pthread_exit(NULL);
nengel@2 1137 return NULL;
nengel@2 1138 }
nengel@2 1139
nengel@2 1140 /*
nengel@2 1141 * The following code is the main loop of the file converter
nengel@2 1142 */
nengel@2 1143 int h264_decode_cell(H264Context *h) {
nengel@2 1144
nengel@2 1145 pthread_t read_thr, parsenal_thr, entropy_thr, mbdec_thr, write_thr;
nengel@2 1146
nengel@2 1147 start_timer();
nengel@2 1148
nengel@2 1149 pthread_create(&read_thr, NULL, read_thread, h);
nengel@2 1150 pthread_create(&parsenal_thr, NULL, parsenal_thread, h);
nengel@2 1151 pthread_create(&entropy_thr, NULL, entropy_IPB_cell_thread, h);
nengel@2 1152 pthread_create(&mbdec_thr, NULL, mbdec_cell_thread, h);
nengel@2 1153 pthread_create(&write_thr, NULL, write_thread, h);
nengel@2 1154
nengel@2 1155 pthread_join(read_thr, NULL);
nengel@2 1156 pthread_join(parsenal_thr, NULL);
nengel@2 1157 pthread_join(entropy_thr, NULL);
nengel@2 1158 pthread_join(mbdec_thr, NULL);
nengel@2 1159 pthread_join(write_thr, NULL);
nengel@2 1160
nengel@2 1161 return 0;
nengel@2 1162 }
nengel@2 1163
nengel@2 1164 /*
nengel@2 1165 * The following code is the main loop of the file converter
nengel@2 1166 */
nengel@2 1167 int h264_decode_cell_seq(H264Context *h) {
nengel@2 1168 ParserContext *pc;
nengel@2 1169 NalContext *nc;
nengel@2 1170 EntropyContext *ec;
nengel@2 1171 MBRecContext *rc;
nengel@2 1172 OutputContext *oc;
nengel@2 1173
nengel@2 1174 RawFrame frm;
nengel@2 1175 EDSlice slice, *s=&slice;
nengel@2 1176 MBSlice mbslice, *s2=&mbslice;
nengel@2 1177 PictureInfo *pic=NULL;
nengel@2 1178 DecodedPicture *out;
nengel@2 1179 int size;
nengel@2 1180 int frames=0;
nengel@2 1181
nengel@2 1182 pc = get_parse_context(h->ifile);
nengel@2 1183 nc = get_nal_context(h->width, h->height);
nengel@2 1184 ec = get_entropy_context( h );
nengel@2 1185 rc = get_mbrec_context(h);
nengel@2 1186 oc = get_output_context( h );
nengel@2 1187
nengel@2 1188 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var;
nengel@2 1189 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var;
nengel@2 1190 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var;
nengel@2 1191 atomic_set(rl_cnt, 0);
nengel@2 1192 mutex_init(rl_lock);
nengel@2 1193 cond_init(rl_cond);
nengel@2 1194
nengel@2 1195 memset(s, 0, sizeof(EDSlice));
nengel@2 1196 ff_init_slice(nc, s);
nengel@2 1197 s->mbs = av_malloc( h->mb_height * h->mb_width * sizeof(H264Mb));
nengel@2 1198
nengel@2 1199 DecodedPicture tmp;
nengel@2 1200 tmp.base[0]=0;
nengel@2 1201 ///fix this when want to debug the Cell errors
nengel@2 1202 //init_dpb_entry(&tmp, h->width, h->height);
nengel@2 1203
nengel@2 1204 create_spe_ED_threads(h, 1, 0);
nengel@2 1205 create_spe_MBR_threads(h, 1);
nengel@2 1206
nengel@2 1207 start_timer();
nengel@2 1208
nengel@2 1209 while(!pc->final_frame && frames++ < h->num_frames){
nengel@2 1210
nengel@2 1211 av_read_frame_internal(pc, &frm);
nengel@2 1212
nengel@2 1213 PictureInfo *pic=get_pib_entry(h);
nengel@2 1214 ff_alloc_picture_info(nc, s, pic);
nengel@2 1215 decode_nal_units(nc, s, &frm);
nengel@2 1216
nengel@2 1217 copyEDtoMBSlice(s2, s);
nengel@2 1218 decode_slice_entropy_cell_seq(h, ec, s);
nengel@2 1219
nengel@2 1220 decode_slice_mb_seq_cell(h, rc, s2, &tmp);
nengel@2 1221
nengel@2 1222 out =output_frame(h, oc, s2->current_picture, h->ofile, h->frame_width, h->frame_height);
nengel@2 1223
nengel@2 1224 if (out){
nengel@2 1225 release_dpb_entry(h, out, 1);
nengel@2 1226 }
nengel@2 1227 print_report(oc->frame_number, oc->video_size, 0, h->verbose);
nengel@2 1228 }
nengel@2 1229 while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ;
nengel@2 1230
nengel@2 1231 print_report(oc->frame_number, oc->video_size, 1, h->verbose);
nengel@2 1232
nengel@2 1233 /* finished ! */
nengel@2 1234 av_freep(&s->mbs);
nengel@2 1235
nengel@2 1236 free_parse_context(pc);
nengel@2 1237 free_nal_context (nc);
nengel@2 1238 free_entropy_context(ec);
nengel@2 1239 free_mbrec_context(rc);
nengel@2 1240 free_output_context(oc);
nengel@2 1241 return 0;
nengel@2 1242 }