nengel@2: /* nengel@2: * Copyright (c) 2009 TUDelft nengel@2: * nengel@2: * Cell Parallel SPU - 2DWave Macroblock Decoding. nengel@2: */ nengel@2: nengel@2: /** nengel@2: * @file libavcodec/cell/spu/h264_main_spu.c nengel@2: * Cell Parallel SPU - 2DWave Macroblock Decoding nengel@2: * @author C C Chi nengel@2: * nengel@2: * SIMD kernels nengel@2: * H.264/AVC motion compensation nengel@2: * @author Mauricio Alvarez nengel@2: * @author Albert Paradis nengel@2: */ nengel@2: nengel@2: nengel@2: /* Enable this lines to enable simulator statistic or generate traces */ nengel@2: nengel@2: //#define ENABLE_SIMULATOR nengel@2: //#define ENABLE_PARAVER_TRACING_CELL nengel@2: nengel@2: #ifdef ENABLE_SIMULATOR nengel@2: #include "/opt/ibm/systemsim-cell/include/callthru/spu/profile.h" nengel@2: #endif nengel@2: nengel@2: #ifdef ENABLE_TRACES nengel@2: #include "spu_trace.h" nengel@2: #endif nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: nengel@2: //#include "dsputil_cell.h" nengel@2: #include "types_spu.h" nengel@2: #include "h264_intra_spu.h" nengel@2: #include "h264_decode_mb_spu.h" nengel@2: #include "h264_mc_spu.h" nengel@2: #include "h264_tables.h" nengel@2: #include "h264_dma.h" nengel@2: nengel@2: nengel@2: /** functions for supporting tracing with paraver for the SPU nengel@2: * nengel@2: */ nengel@2: inline void trace_init_SPU(){ nengel@2: #ifdef ENABLE_PARAVER_TRACING_CELL nengel@2: SPUtrace_init (); nengel@2: #endif nengel@2: } nengel@2: nengel@2: inline void trace_fini_SPU(){ nengel@2: #ifdef ENABLE_PARAVER_TRACING_CELL nengel@2: SPUtrace_fini (); nengel@2: #endif nengel@2: } nengel@2: nengel@2: inline void trace_event_SPU(int event, int id){ nengel@2: #ifdef ENABLE_PARAVER_TRACING_CELL nengel@2: SPUtrace_event (event, id); nengel@2: #else nengel@2: (void) event; nengel@2: (void) id; nengel@2: #endif nengel@2: } nengel@2: nengel@2: // for simulator statistic nengel@2: inline void clear_statistic(){ nengel@2: #ifdef ENABLE_SIMULATOR nengel@2: prof_clear(); nengel@2: #endif nengel@2: } nengel@2: nengel@2: inline void start_statistic(){ nengel@2: #ifdef ENABLE_SIMULATOR nengel@2: prof_start(); nengel@2: #endif nengel@2: } nengel@2: nengel@2: inline void stop_statistic(){ nengel@2: #ifdef ENABLE_SIMULATOR nengel@2: prof_stop(); nengel@2: #endif nengel@2: } nengel@2: nengel@2: H264Context_spu h_context; // struct that contain all the params to decode a macroblock nengel@2: nengel@2: DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending nengel@2: //mb position of neighbouring spes nengel@2: DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1 nengel@2: //DECLARE_ALIGNED_16(spe_pos, tgt_spe); //written by SPE_ID +1 nengel@2: nengel@2: /** nengel@2: * Initializes the buffering of the mb data and associated mc data. The init_mb_buffer needs to nengel@2: * be called before any get_next_mb and only once at the beginning of the slice. nengel@2: * nengel@2: * Note: init_mc_buffer and get_next_mb expect the width of the picture to be more than 2 mb's nengel@2: */ nengel@2: #define TAG_OFFSET_MB MBD_buf1 nengel@2: #define TAG_OFFSET_MC MBD_mc_buf1 nengel@2: static void init_mb_buffer(H264Context_spu* h){ nengel@2: H264slice *s = h->s; nengel@2: H264Mb *next_mb; nengel@2: int mb_height = s->mb_height; nengel@2: int mb_width = s->mb_width; nengel@2: nengel@2: h->mc_idx =0; nengel@2: nengel@2: h->mb_dec = 0; nengel@2: h->mb_mc = 0; nengel@2: h->mb_dma = 0; nengel@2: nengel@2: h->curr_line %= mb_height; nengel@2: h->next_mb_idx = h->curr_line * mb_width; nengel@2: h->mb_id = h->curr_line * mb_width; nengel@2: h->n_mc= h->curr_line * mb_width; nengel@2: nengel@2: next_mb = s->blocks + h->mb_id; nengel@2: spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB); nengel@2: h->mb_dma++; nengel@2: h->mb_id++; nengel@2: nengel@2: next_mb = s->blocks + h->mb_id; nengel@2: spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB); nengel@2: h->mb_dma++; nengel@2: h->mb_id++; nengel@2: wait_dma_id(0 + TAG_OFFSET_MB); nengel@2: nengel@2: H264Mb *mb = &h->mb_buf[0]; nengel@2: H264mc *mc = &h->mc_buf[0]; nengel@2: if(!IS_INTRA(mb->mb_type)){ nengel@2: calc_mc_params(mb, mc); nengel@2: fill_ref_buf(h, mb, mc); nengel@2: } nengel@2: h->n_mc++; nengel@2: h->mb_mc++; nengel@2: } nengel@2: nengel@2: static void *get_next_mb(H264Context_spu *h){ nengel@2: H264slice *s = h->s; nengel@2: H264spe *spe = &h->spe; nengel@2: H264Mb *mb_buf = h->mb_buf; nengel@2: H264mc *mc_buf = h->mc_buf; nengel@2: H264Mb *next_mb; nengel@2: H264Mb *next_dma_mb; nengel@2: nengel@2: if (h->curr_line >= s->mb_height) nengel@2: return NULL; nengel@2: nengel@2: if (h->mb_id < h->mb_total){ nengel@2: next_dma_mb = s->blocks + h->mb_id; nengel@2: spu_dma_get(&mb_buf[h->mb_dma], (unsigned) next_dma_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB); nengel@2: h->mb_dma = (h->mb_dma+1)%3; nengel@2: h->mb_id++; nengel@2: if (h->mb_id%s->mb_width ==0){ nengel@2: h->mb_id+=(spe->spe_total-1)*s->mb_width; nengel@2: } nengel@2: } nengel@2: nengel@2: h->mc = &mc_buf[h->mc_idx]; nengel@2: wait_dma_id(h->mc_idx + TAG_OFFSET_MC); nengel@2: h->mc_idx = (h->mc_idx+1)%2; nengel@2: if (h->n_mc < h->mb_total){ nengel@2: wait_dma_id(h->mb_mc + TAG_OFFSET_MB); nengel@2: H264Mb *mb = &mb_buf[h->mb_mc]; nengel@2: H264mc *mc = &mc_buf[h->mc_idx]; nengel@2: if(!IS_INTRA(mb->mb_type)){ nengel@2: calc_mc_params(mb, mc); nengel@2: fill_ref_buf(h, mb, mc); nengel@2: } nengel@2: h->n_mc++; nengel@2: if (h->n_mc%s->mb_width ==0){ nengel@2: h->n_mc+=(spe->spe_total-1)*s->mb_width; nengel@2: } nengel@2: } nengel@2: h->next_mb_idx++; nengel@2: if (h->next_mb_idx % s->mb_width ==0){ nengel@2: h->next_mb_idx+=(spe->spe_total-1)*s->mb_width; nengel@2: h->curr_line+=spe->spe_total; nengel@2: } nengel@2: nengel@2: h->mb_mc = (h->mb_mc+1)%3; nengel@2: next_mb = &mb_buf[h->mb_dec]; nengel@2: h->mb_dec = (h->mb_dec+1)%3; nengel@2: return next_mb; nengel@2: } nengel@2: nengel@2: static void *get_next_mb_blocking(H264Context_spu *h){ nengel@2: H264slice *s = h->s; nengel@2: H264spe *spe = &h->spe; nengel@2: H264Mb *mb_buf = h->mb_buf; nengel@2: H264mc *mc_buf = h->mc_buf; nengel@2: H264Mb *next_mb; nengel@2: H264Mb *next_dma_mb; nengel@2: nengel@2: if (h->mb_id >= h->mb_total) nengel@2: return NULL; nengel@2: nengel@2: //printf("%d\n", h->mb_id); nengel@2: next_dma_mb = s->blocks + h->mb_id; nengel@2: spu_dma_get(&mb_buf[0], (unsigned) next_dma_mb, sizeof(H264Mb), MBD_buf1); nengel@2: //h->mb_dma = (h->mb_dma+1)%3; nengel@2: h->mb_id++; nengel@2: if (h->mb_id%s->mb_width ==0){ nengel@2: h->mb_id+=(spe->spe_total-1)*s->mb_width; nengel@2: } nengel@2: wait_dma_id(MBD_buf1); nengel@2: nengel@2: h->mc = &mc_buf[0]; nengel@2: //h->mc_idx = (h->mc_idx+1)%2; nengel@2: //if (h->n_mc < h->mb_total){ nengel@2: H264Mb *mb = &mb_buf[0]; nengel@2: H264mc *mc = &mc_buf[0]; nengel@2: if(!IS_INTRA(mb->mb_type)){ nengel@2: calc_mc_params(mb, mc); nengel@2: fill_ref_buf(h, mb, mc); nengel@2: } nengel@2: //h->n_mc++; nengel@2: /*if (h->n_mc%s->mb_width ==0){ nengel@2: h->n_mc+=(spe->spe_total-1)*s->mb_width; nengel@2: }*/ nengel@2: // wait_dma_id(MBD_mc_buf1); nengel@2: nengel@2: // h->next_mb_idx++; nengel@2: // if (h->next_mb_idx % s->mb_width ==0){ nengel@2: // h->next_mb_idx+=(spe->spe_total-1)*s->mb_width; nengel@2: // h->curr_line+=spe->spe_total; nengel@2: // } nengel@2: nengel@2: // h->mb_mc = (h->mb_mc+1)%3; nengel@2: next_mb = &mb_buf[0]; nengel@2: // h->mb_dec = (h->mb_dec+1)%3; nengel@2: return next_mb; nengel@2: } nengel@2: nengel@2: nengel@2: #undef TAG_OFFSET_MB nengel@2: #undef TAG_OFFSET_MC nengel@2: static inline int dep_resolved(H264Context_spu *h){ nengel@2: H264slice *s = h->s; nengel@2: int spe_id = h->spe.spe_id; nengel@2: volatile int mb_proc_dep = src_spe.count; nengel@2: if (spe_id==0) nengel@2: return (h->mb_proc < mb_proc_dep-1 +s->mb_width)? 1:0; nengel@2: else nengel@2: return (h->mb_proc < mb_proc_dep-1)? 1:0; nengel@2: } nengel@2: nengel@2: void update_tgt_spe_dep(H264Context_spu *h, int end){ nengel@2: H264Mb *mb = h->mb; nengel@2: H264slice *s = h->s; nengel@2: H264spe *spe = &h->spe; nengel@2: int mb_x = mb->mb_x; nengel@2: nengel@2: if (end || (mb_x%2==0 && mb_x!=0) || mb_x==s->mb_width-1){ nengel@2: spe_pos* dma_spe = &dma_temp; nengel@2: spe_pos* tgt_spe = (spe_pos*) ((unsigned) spe->tgt_spe + (unsigned) &src_spe); //located in target spe local store nengel@2: dma_spe->count = end? h->mb_proc+1: h->mb_proc; nengel@2: spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), MBD_put); nengel@2: } nengel@2: h->mb_proc++; nengel@2: } nengel@2: nengel@2: nengel@2: int main(unsigned long long id, unsigned long long argp) nengel@2: { nengel@2: (void) id; nengel@2: H264Context_spu* h = &h_context; nengel@2: H264spe *spe_params = (H264spe *) (unsigned) argp; nengel@2: nengel@2: spu_dma_get(&h->spe, (unsigned) spe_params, sizeof(H264spe), MBD_slice); //ID_slice is used out of convienience nengel@2: wait_dma_id(MBD_slice); nengel@2: nengel@2: //clear_statistic(); nengel@2: dsputil_h264_init_cell(&h->dsp); nengel@2: ff_cropTbl_init(); nengel@2: init_pred_ptrs(&h->hpc); nengel@2: nengel@2: //send slice_buf to ppe nengel@2: spu_write_out_mbox((unsigned) h->slice_buf); nengel@2: h->sl_idx=0; nengel@2: // initialize tracing with paraver nengel@2: //trace_init_SPU(); nengel@2: h->frames =0; nengel@2: src_spe.count =0; nengel@2: h->mb_proc = 0; nengel@2: nengel@2: h->mb_id=0; nengel@2: h->mc_idx=0; nengel@2: h->mb_dec=0; nengel@2: h->mb_mc=0; nengel@2: h->mb_dma=0; nengel@2: h->next_mb_idx=0; nengel@2: nengel@2: h->blocking=0; nengel@2: nengel@2: nengel@2: H264spe* p = &h->spe; nengel@2: h->curr_line =p->spe_id; nengel@2: h->mb_total = p->mb_height*p->mb_width; nengel@2: int stride_y = 32; nengel@2: int stride_c = 16; nengel@2: //init block_offset array nengel@2: init_block_offset(stride_y, stride_c); nengel@2: for(;;){ nengel@2: spu_read_in_mbox(); nengel@2: nengel@2: h->s = &h->slice_buf[h->sl_idx]; nengel@2: h->sl_idx++; h->sl_idx%=2; nengel@2: nengel@2: if (h->s->state< 0){ nengel@2: break; nengel@2: } nengel@2: nengel@2: { nengel@2: if(!h->blocking){ nengel@2: init_mb_buffer(h); nengel@2: while((h->mb=(H264Mb *)get_next_mb(h))){ nengel@2: while(!dep_resolved(h)); nengel@2: //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id); nengel@2: hl_decode_mb_internal(h, stride_y, stride_c); nengel@2: } nengel@2: update_tgt_spe_dep(h, 1); nengel@2: }else{ nengel@2: h->mb_id=0; nengel@2: while((h->mb=(H264Mb *)get_next_mb_blocking(h))){ nengel@2: while(!dep_resolved(h)); nengel@2: //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id); nengel@2: hl_decode_mb_internal(h, stride_y, stride_c); nengel@2: } nengel@2: update_tgt_spe_dep(h, 1); nengel@2: } nengel@2: nengel@2: } nengel@2: nengel@2: h->frames++; nengel@2: nengel@2: if (p->spe_id == ((h->frames*p->mb_height -1)%p->spe_total)){ nengel@2: //printf("spe %d, %d\n", atomic_read(p->rl_cnt), h->frames); nengel@2: //MBSlice is copied beforehand. nengel@2: //only inc cnt. nengel@2: atomic_inc(p->rl_cnt); nengel@2: } nengel@2: { nengel@2: atomic_dec(p->cnt); nengel@2: } nengel@2: } nengel@2: nengel@2: return 0; nengel@2: } nengel@2: