nengel@2: #define CELL_SPE nengel@2: nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: #include nengel@2: #include "libavcodec/avcodec.h" nengel@2: #include "h264_cabac_spu.h" nengel@2: #include "cabac_spu.h" nengel@2: #include "h264_types_spu.h" nengel@2: #include "h264_tables.h" nengel@2: #include "h264_dma.h" nengel@2: #include "h264_tables.h" nengel@2: nengel@2: #define MB_WIDTH 240 nengel@2: #define MB_STRIDE (MB_WIDTH+16) nengel@2: nengel@2: H264Cabac_spu hcabac; nengel@2: CABACContext cabac; nengel@2: DECLARE_ALIGNED_16(EDSlice_spu, slice[2]); nengel@2: DECLARE_ALIGNED_16(H264Mb, mb[2]); nengel@2: DECLARE_ALIGNED_16(H264spe, spe); nengel@2: nengel@2: DECLARE_ALIGNED_16(uint8_t, non_zero_count_table[2][MB_STRIDE][32]); nengel@2: DECLARE_ALIGNED_16(uint8_t, mvd_table[2][2][8*MB_STRIDE][2]); nengel@2: DECLARE_ALIGNED_16(uint8_t, direct_table[2][4*MB_STRIDE]); nengel@2: DECLARE_ALIGNED_16(uint8_t, chroma_pred_mode_table[2][MB_STRIDE]); nengel@2: DECLARE_ALIGNED_16(uint8_t, intra4x4_pred_mode_table[2][8*MB_STRIDE]); nengel@2: DECLARE_ALIGNED_16(uint16_t,cbp_table[2][MB_STRIDE]); nengel@2: DECLARE_ALIGNED_16(uint8_t, qscale_table[2][MB_STRIDE]); nengel@2: nengel@2: DECLARE_ALIGNED_16(uint32_t, mb_type_table[2][MB_STRIDE]); nengel@2: DECLARE_ALIGNED_16(int8_t, ref_index_table[2][2][4*MB_STRIDE]); nengel@2: DECLARE_ALIGNED_16(int16_t, motion_val_table[2][2][4*4*MB_WIDTH][2]); nengel@2: nengel@2: DECLARE_ALIGNED(128, uint8_t, bytestream_ls[4096]); nengel@2: DECLARE_ALIGNED_16(uint32_t, list1_mb_type_table[2][MB_STRIDE]); nengel@2: DECLARE_ALIGNED_16(int8_t, list1_ref_index_table[2][2][4*MB_STRIDE]); nengel@2: nengel@2: DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending nengel@2: //mb position of neighbouring spes nengel@2: DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1 nengel@2: static int total_lines; nengel@2: nengel@2: static inline int dep_resolved(H264spe *p){ nengel@2: int spe_id = p->spe_id; nengel@2: volatile int lines_proc = src_spe.count; nengel@2: if (spe_id==0) nengel@2: return (total_lines < lines_proc-1 +p->mb_height)? 1:0; nengel@2: else nengel@2: return (total_lines < lines_proc-1)? 1:0; nengel@2: } nengel@2: nengel@2: static void update_tgt_spe_dep(H264spe *p, int end){ nengel@2: // if (end ){ nengel@2: total_lines++; nengel@2: spe_pos* dma_spe = &dma_temp; nengel@2: spe_pos* tgt_spe = p->tgt_spe + (unsigned) &src_spe; //located in target spe local store nengel@2: dma_spe->count = end? total_lines+1: total_lines; nengel@2: spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), ED_put); nengel@2: // } nengel@2: nengel@2: } nengel@2: nengel@2: static int init_cabac(H264spe *p, H264Cabac_spu *hc){ nengel@2: hc->mb_height = p->mb_height; nengel@2: hc->mb_width = p->mb_width; nengel@2: hc->b_stride = 4*p->mb_width; nengel@2: hc->mb_stride = p->mb_stride; nengel@2: nengel@2: for(int i=0; i<16; i++){ nengel@2: #define T(x) (x>>2) | ((x<<2) & 0xF) nengel@2: hc->zigzag_scan[i] = T(zigzag_scan[i]); nengel@2: #undef T nengel@2: } nengel@2: for(int i=0; i<64; i++){ nengel@2: #define T(x) (x>>3) | ((x&7)<<3) nengel@2: hc->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); nengel@2: #undef T nengel@2: } nengel@2: } nengel@2: nengel@2: static void reset_cabac_buffers(){ nengel@2: memset(intra4x4_pred_mode_table, 0, sizeof(intra4x4_pred_mode_table)); nengel@2: memset(mvd_table, 0, sizeof(mvd_table)); nengel@2: memset(direct_table, 0, sizeof(direct_table)); nengel@2: memset(chroma_pred_mode_table, 0, sizeof(chroma_pred_mode_table)); nengel@2: memset(cbp_table, 0, sizeof(cbp_table)); nengel@2: memset(qscale_table, 0, sizeof(qscale_table)); nengel@2: memset(mb_type_table, 0, sizeof(mb_type_table)); nengel@2: memset(ref_index_table, 0, sizeof(ref_index_table)); nengel@2: memset(motion_val_table, 0, sizeof(motion_val_table)); nengel@2: } nengel@2: nengel@2: static void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int bufsize){ nengel@2: int align = (unsigned) buf & 0xF; nengel@2: int dma_size; nengel@2: nengel@2: c->bytestream_ea_start= nengel@2: c->bytestream_ea= buf; nengel@2: c->bytestream_ea_end= buf + bufsize; nengel@2: c->bufsize = bufsize; nengel@2: nengel@2: if (bufsize + align >= sizeof(bytestream_ls)){ nengel@2: dma_size = sizeof(bytestream_ls); nengel@2: c->bufsize = c->bufsize +align - sizeof(bytestream_ls); nengel@2: }else{ nengel@2: int align_end = (bufsize+align) &0xF; nengel@2: if (align_end) nengel@2: dma_size = bufsize+align + 16-align_end; nengel@2: else nengel@2: dma_size = bufsize+align; nengel@2: c->bufsize = 0; nengel@2: } nengel@2: // printf("%d\n", dma_size); nengel@2: c->bytestream_end = &bytestream_ls[dma_size]; nengel@2: c->bytestream_start= c->bytestream = &bytestream_ls[align]; nengel@2: spu_dma_get(bytestream_ls, (unsigned) buf - align, dma_size, ED_get ); nengel@2: c->bytestream_ea_start= nengel@2: c->bytestream_ea= buf + dma_size -align; nengel@2: nengel@2: wait_dma_id(ED_get); nengel@2: nengel@2: if (align %2){ nengel@2: c->low = (*c->bytestream++)<<18; nengel@2: c->low+= (*c->bytestream++)<<10; nengel@2: c->low+= ((*c->bytestream++)<<2) + 2; nengel@2: }else { nengel@2: c->low = (*c->bytestream++)<<18; nengel@2: c->low+= (*c->bytestream++)<<10; nengel@2: c->low+= (2<<8); nengel@2: } nengel@2: nengel@2: c->range= 0x1FE; nengel@2: bytecount=0; nengel@2: } nengel@2: nengel@2: static void init_dequant8_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){ nengel@2: int i,q,x; nengel@2: const int transpose = HAVE_ALTIVEC; nengel@2: hc->dequant8_coeff[0] = hc->dequant8_buffer[0]; nengel@2: hc->dequant8_coeff[1] = hc->dequant8_buffer[1]; nengel@2: nengel@2: for(i=0; i<2; i++){ nengel@2: if(i && !memcmp(s->pps.scaling_matrix8[0], s->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ nengel@2: hc->dequant8_coeff[1] = hc->dequant8_buffer[0]; nengel@2: break; nengel@2: } nengel@2: nengel@2: for(q=0; q<52; q++){ nengel@2: int shift = div6[q]; nengel@2: int idx = rem6[q]; nengel@2: for(x=0; x<64; x++) nengel@2: hc->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = nengel@2: ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * nengel@2: s->pps.scaling_matrix8[i][x]) << shift; nengel@2: } nengel@2: } nengel@2: } nengel@2: nengel@2: static void init_dequant4_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){ nengel@2: int i,j,q,x; nengel@2: const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON; nengel@2: for(i=0; i<6; i++ ){ nengel@2: hc->dequant4_coeff[i] = hc->dequant4_buffer[i]; nengel@2: for(j=0; jpps.scaling_matrix4[j], s->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ nengel@2: hc->dequant4_coeff[i] = hc->dequant4_buffer[j]; nengel@2: break; nengel@2: } nengel@2: } nengel@2: if(jdequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = nengel@2: ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * nengel@2: s->pps.scaling_matrix4[i][x]) << shift; nengel@2: } nengel@2: } nengel@2: } nengel@2: nengel@2: static void init_dequant_tables(EDSlice_spu *s, H264Cabac_spu *hc){ nengel@2: int i,x; nengel@2: nengel@2: init_dequant4_coeff_table(s, hc); nengel@2: if(s->pps.transform_8x8_mode) nengel@2: init_dequant8_coeff_table(s, hc); nengel@2: if(s->transform_bypass){ nengel@2: for(i=0; i<6; i++) nengel@2: for(x=0; x<16; x++) nengel@2: hc->dequant4_coeff[i][0][x] = 1<<6; nengel@2: if(s->pps.transform_8x8_mode) nengel@2: for(i=0; i<2; i++) nengel@2: for(x=0; x<64; x++) nengel@2: hc->dequant8_coeff[i][0][x] = 1<<6; nengel@2: } nengel@2: } nengel@2: nengel@2: static void init_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s){ nengel@2: hc->non_zero_count_top = non_zero_count_table[0]; nengel@2: hc->non_zero_count = non_zero_count_table[1]; nengel@2: hc->mvd_top[0] = mvd_table[0][0]; nengel@2: hc->mvd[0] = mvd_table[0][1]; nengel@2: hc->mvd_top[1] = mvd_table[1][0]; nengel@2: hc->mvd[1] = mvd_table[1][1]; nengel@2: hc->direct_top = direct_table[0]; nengel@2: hc->direct = direct_table[1]; nengel@2: hc->chroma_pred_mode_top = chroma_pred_mode_table[0]; nengel@2: hc->chroma_pred_mode = chroma_pred_mode_table[1]; nengel@2: hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[0]; nengel@2: hc->intra4x4_pred_mode = intra4x4_pred_mode_table[1]; nengel@2: hc->cbp_top = cbp_table[0]; nengel@2: hc->cbp = cbp_table[1]; nengel@2: hc->qscale_top = qscale_table[0] +1; nengel@2: hc->qscale = qscale_table[1] +1; nengel@2: nengel@2: hc->mb_type_top = mb_type_table[0]+1; nengel@2: hc->mb_type = mb_type_table[1]+1; nengel@2: hc->ref_index_top[0] = ref_index_table[0][0]; nengel@2: hc->ref_index_top[1] = ref_index_table[1][0]; nengel@2: hc->ref_index[0] = ref_index_table[0][1]; nengel@2: hc->ref_index[1] = ref_index_table[1][1]; nengel@2: hc->motion_val_top[0] = motion_val_table[0][0]; nengel@2: hc->motion_val_top[1] = motion_val_table[1][0]; nengel@2: hc->motion_val[0] = motion_val_table[0][1]; nengel@2: hc->motion_val[1] = motion_val_table[1][1]; nengel@2: nengel@2: int mb_stride = hc->mb_stride; nengel@2: nengel@2: if (s->slice_type_nos == FF_B_TYPE){ nengel@2: while(!dep_resolved(&spe)); nengel@2: spu_dma_get(list1_mb_type_table[0], (unsigned) (s->list1.mb_type -1), mb_stride*sizeof(uint32_t), ED_get); nengel@2: spu_dma_get(list1_ref_index_table[0][0], (unsigned) s->list1.ref_index[0], mb_stride*4*sizeof(int8_t), ED_get); nengel@2: spu_dma_get(list1_ref_index_table[0][1], (unsigned) s->list1.ref_index[1], mb_stride*4*sizeof(int8_t), ED_get); nengel@2: wait_dma_id(ED_get); nengel@2: spu_dma_get(list1_mb_type_table[1], (unsigned) (s->list1.mb_type -1 + mb_stride), mb_stride*sizeof(uint32_t), ED_get); nengel@2: spu_dma_get(list1_ref_index_table[1][0], (unsigned) (s->list1.ref_index[0] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); nengel@2: spu_dma_get(list1_ref_index_table[1][1], (unsigned) (s->list1.ref_index[1] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); nengel@2: hc->list1_mb_type = list1_mb_type_table[0]+1; nengel@2: hc->list1_ref_index[0] = list1_ref_index_table[0][0]; nengel@2: hc->list1_ref_index[1] = list1_ref_index_table[0][1]; nengel@2: } nengel@2: nengel@2: } nengel@2: nengel@2: static void update_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s, int line){ nengel@2: int mb_stride = hc->mb_stride; nengel@2: int mb_width = hc->mb_width; nengel@2: int top = (line+1)%2; nengel@2: int cur = line%2; nengel@2: int bottom = (line+1)%2; //same as top, but to identify prebuffering of next line. nengel@2: nengel@2: hc->non_zero_count_top = non_zero_count_table[top]; nengel@2: hc->non_zero_count = non_zero_count_table[cur]; nengel@2: hc->mvd_top[0] = mvd_table[0][top]; nengel@2: hc->mvd[0] = mvd_table[0][cur]; nengel@2: hc->mvd_top[1] = mvd_table[1][top]; nengel@2: hc->mvd[1] = mvd_table[1][cur]; nengel@2: hc->direct_top = direct_table[top]; nengel@2: hc->direct = direct_table[cur]; nengel@2: hc->chroma_pred_mode_top = chroma_pred_mode_table[top]; nengel@2: hc->chroma_pred_mode = chroma_pred_mode_table[cur]; nengel@2: hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[top]; nengel@2: hc->intra4x4_pred_mode = intra4x4_pred_mode_table[cur]; nengel@2: hc->cbp_top = cbp_table[top]; nengel@2: hc->cbp = cbp_table[cur]; nengel@2: hc->qscale_top = qscale_table[top] +1; nengel@2: hc->qscale = qscale_table[cur] +1; nengel@2: nengel@2: hc->mb_type_top = mb_type_table[top]+1; nengel@2: hc->mb_type = mb_type_table[cur]+1; nengel@2: hc->ref_index_top[0] = ref_index_table[0][top]; nengel@2: hc->ref_index_top[1] = ref_index_table[1][top]; nengel@2: hc->ref_index[0] = ref_index_table[0][cur]; nengel@2: hc->ref_index[1] = ref_index_table[1][cur]; nengel@2: hc->motion_val_top[0] = motion_val_table[0][top]; nengel@2: hc->motion_val_top[1] = motion_val_table[1][top]; nengel@2: hc->motion_val[0] = motion_val_table[0][cur]; nengel@2: hc->motion_val[1] = motion_val_table[1][cur]; nengel@2: nengel@2: wait_dma_id(ED_put); nengel@2: nengel@2: spu_dma_put(mb_type_table[top], (unsigned) (s->pic.mb_type -1 + line*mb_stride), mb_stride*sizeof(uint32_t), ED_put); nengel@2: spu_dma_put(ref_index_table[0][top], (unsigned) (s->pic.ref_index[0] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put); nengel@2: spu_dma_put(ref_index_table[1][top], (unsigned) (s->pic.ref_index[1] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put); nengel@2: spu_dma_put(motion_val_table[0][top], (unsigned) (s->pic.motion_val[0]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put); nengel@2: spu_dma_put(motion_val_table[1][top], (unsigned) (s->pic.motion_val[1]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put); nengel@2: nengel@2: if (s->slice_type_nos == FF_B_TYPE){ nengel@2: update_tgt_spe_dep(&spe, 0); nengel@2: wait_dma_id(ED_get); nengel@2: nengel@2: if (line + 2 < hc->mb_height){ nengel@2: while(!dep_resolved(&spe)); nengel@2: spu_dma_get(list1_mb_type_table[cur], (unsigned) (s->list1.mb_type -1 + (line+2)*mb_stride), mb_stride*sizeof(uint32_t), ED_get); nengel@2: spu_dma_get(list1_ref_index_table[cur][0], (unsigned) (s->list1.ref_index[0] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); nengel@2: spu_dma_get(list1_ref_index_table[cur][1], (unsigned) (s->list1.ref_index[1] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); nengel@2: } nengel@2: hc->list1_mb_type = list1_mb_type_table[bottom]+1; nengel@2: hc->list1_ref_index[0] = list1_ref_index_table[bottom][0]; nengel@2: hc->list1_ref_index[1] = list1_ref_index_table[bottom][1]; nengel@2: } nengel@2: nengel@2: } nengel@2: nengel@2: // void printmbdiff(EDSlice_spu *s, H264Cabac_spu *hc, H264Mb *mp, H264Mb *ms){ nengel@2: // nengel@2: // printf("mb_x %d, %d\n", mp->mb_x, ms->mb_x); nengel@2: // printf("mb_y %d, %d\n", mp->mb_y, ms->mb_y); nengel@2: // printf("mb_xy %d, %d\n", mp->mb_xy, ms->mb_xy); nengel@2: // printf("top_mb_xy %d, %d\n", mp->top_mb_xy, ms->top_mb_xy); nengel@2: // printf("left_mb_xy %d, %d\n", mp->left_mb_xy, ms->left_mb_xy); nengel@2: // printf("chroma_pred_mode %d, %d\n", mp->chroma_pred_mode, ms->chroma_pred_mode); nengel@2: // printf("intra16x16_pred_mode %d, %d\n", mp->intra16x16_pred_mode, ms->intra16x16_pred_mode); nengel@2: // printf("topleft_samples %d, %d\n", mp->topleft_samples_available, ms->topleft_samples_available); nengel@2: // printf("topright_samples %d, %d\n", mp->topright_samples_available, ms->topright_samples_available); nengel@2: // printf("top_samples %d, %d\n", mp->top_samples_available, ms->top_samples_available); nengel@2: // printf("left_samples %d, %d\n", mp->left_samples_available, ms->left_samples_available); nengel@2: // nengel@2: // if (memcmp(mp->intra4x4_pred_mode_cache, ms->intra4x4_pred_mode_cache, 40)){ nengel@2: // for (int i=0; i<5; i++){ nengel@2: // for (int j=0; j<8; j++){ nengel@2: // printf("%d, %d\t", mp->intra4x4_pred_mode_cache[i*8+j],ms->intra4x4_pred_mode_cache[i*8+j]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // if (memcmp(mp->non_zero_count_cache, ms->non_zero_count_cache, 48)){ nengel@2: // for (int i=0; i<6; i++){ nengel@2: // for (int j=0; j<8; j++){ nengel@2: // printf("%u, %u\t", mp->non_zero_count_cache[i*8+j],ms->non_zero_count_cache[i*8+j]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // if (memcmp(mp->sub_mb_type, ms->sub_mb_type, 8)){ nengel@2: // for (int i=0; i<4; i++){ nengel@2: // printf("%u, %u\t", mp->sub_mb_type[i], mp->sub_mb_type[i]); nengel@2: // printf("\n"); nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // if (memcmp(mp->mv_cache, ms->mv_cache, 320)){ nengel@2: // for (int k=0; k<2; k++){ nengel@2: // for (int i=0; i<5; i++){ nengel@2: // for (int j=0; j<8; j++){ nengel@2: // printf("%d, %d, %d, %d\t", mp->mv_cache[k][i*8+j][0], mp->mv_cache[k][i*8+j][1], ms->mv_cache[k][i*8+j][0], ms->mv_cache[k][i*8+j][1]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // if (memcmp(mp->ref_cache, ms->ref_cache, 80)){ nengel@2: // for (int k=0; k<2; k++){ nengel@2: // for (int i=0; i<5; i++){ nengel@2: // for (int j=0; j<8; j++){ nengel@2: // printf("%d, %d\t", mp->ref_cache[k][i*8+j], ms->ref_cache[k][i*8+j]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // printf("cbp %d, %d\n", mp->cbp, ms->cbp); nengel@2: // for (int i=0; imb_stride; i++){ nengel@2: // printf("%d, ", hc->cbp[i]); fflush(0); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // nengel@2: // printf("mb_type %x, %x\n", mp->mb_type, ms->mb_type); nengel@2: // printf("mb_type IS_INTRA %d, IS_INTRA16x16 %d, IS_DIRECT %d\n", IS_INTRA(ms->mb_type), IS_INTRA16x16(ms->mb_type), IS_DIRECT(ms->mb_type) ); nengel@2: // printf("left_type %d, %d\n", mp->left_type, ms->left_type); nengel@2: // printf("top_type %d, %d\n", mp->top_type, ms->top_type); nengel@2: // printf("qscale_mb_xy %d, %d\n", mp->qscale_mb_xy, ms->qscale_mb_xy); nengel@2: // printf("qscale_left_mb_xy %d, %d\n", mp->qscale_left_mb_xy, ms->qscale_left_mb_xy); nengel@2: // printf("qscale_top_mb_xy %d, %d\n", mp->qscale_top_mb_xy, ms->qscale_top_mb_xy); nengel@2: // // for (int i=0; imb_stride; i++){ nengel@2: // // printf("%d, ", qscale_table[0][i]); fflush(0); nengel@2: // // } nengel@2: // nengel@2: // if (memcmp(mp->mb, ms->mb, 768)){ nengel@2: // for (int i=0; i<16; i++){ nengel@2: // for (int j=0; j<16; j++){ nengel@2: // printf("%d, %d\t", mp->mb[j + i*16], ms->ref_cache[j + i*16]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // for (int i=0; i<8; i++){ nengel@2: // for (int j=0; j<8; j++){ nengel@2: // printf("%d, %d\t", mp->mb[256 + j + i*8], ms->ref_cache[j + i*8]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // for (int i=0; i<8; i++){ nengel@2: // for (int j=0; j<8; j++){ nengel@2: // printf("%d, %d\t", mp->mb[320+ j + i*8], ms->ref_cache[j + i*8]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // } nengel@2: // nengel@2: // if (memcmp(mp->bS, ms->bS, 32)){ nengel@2: // for (int k=0; k<2; k++){ nengel@2: // for (int i=0; i<4; i++){ nengel@2: // for (int j=0; j<4; j++){ nengel@2: // printf("%d, %d\t", mp->bS[k][i][j], mp->mv_cache[k][i][j]); nengel@2: // } nengel@2: // printf("\n"); nengel@2: // } nengel@2: // } nengel@2: // } nengel@2: // if (memcmp(mp->edges, ms->edges, 4)){ nengel@2: // printf("edges %d, %d, %d, %d\n", mp->edges[0], ms->edges[0], mp->edges[1], ms->edges[1]); nengel@2: // printf("deblock %d, %d\n", mp->deblock_mb, ms->deblock_mb); nengel@2: // } nengel@2: // nengel@2: // printf("dequant4_coeff_y %d, %d\n", mp->dequant4_coeff_y, ms->dequant4_coeff_y); nengel@2: // printf("dequant4_coeff_cb %d, %d\n", mp->dequant4_coeff_cb, ms->dequant4_coeff_cb); nengel@2: // printf("dequant4_coeff_cr %d, %d\n", mp->dequant4_coeff_cr, ms->dequant4_coeff_cr); nengel@2: // } nengel@2: // DECLARE_ALIGNED_16(H264Mb, tmp); nengel@2: nengel@2: nengel@2: int main(unsigned long long id, unsigned long long argp){ nengel@2: EDSlice_spu *s; nengel@2: H264Cabac_spu *hc = &hcabac; nengel@2: CABACContext *c = &cabac; nengel@2: H264spe *p = &spe; nengel@2: nengel@2: spu_write_out_mbox((unsigned) slice); nengel@2: spu_dma_get(p, (unsigned) argp, sizeof(H264spe), ED_spe); //ID_slice is used out of convienience nengel@2: wait_dma_id(ED_spe); nengel@2: nengel@2: ff_init_cabac_states(); nengel@2: init_cabac(p, hc); nengel@2: hc->blocking=0; nengel@2: for(;;){ nengel@2: spu_read_in_mbox(); nengel@2: s = &slice[0]; nengel@2: reset_cabac_buffers(); nengel@2: init_entropy_buf(hc, s); nengel@2: nengel@2: if (hc->blocking) wait_dma_id(ED_get); nengel@2: //printf("framesize %d\n", s->byte_bufsize);fflush(0); nengel@2: init_dequant_tables(s, hc); nengel@2: ff_init_cabac_decoder( c, s->bytestream_start, s->byte_bufsize ); nengel@2: ff_h264_init_cabac_states(s, c); nengel@2: nengel@2: int mb_slot=0; nengel@2: for(int j=0; jmb_height; j++){ nengel@2: for(int i=0; imb_width; i++){ nengel@2: int eos,ret; nengel@2: H264Mb *m = &mb[mb_slot]; nengel@2: m->mb_x=i; nengel@2: m->mb_y=j; nengel@2: s->m = m; nengel@2: nengel@2: ret = ff_h264_decode_mb_cabac(hc, s, c); nengel@2: nengel@2: // spu_dma_get(&tmp, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_get); nengel@2: // wait_dma_id(ED_get); nengel@2: // if (memcmp(&tmp, m, sizeof(H264Mb))){ nengel@2: // printf("coded pic num %d\n", s->coded_pic_num); nengel@2: // printmbdiff(s, hc,&tmp, m); nengel@2: // return 0; nengel@2: // } nengel@2: //printf("qscale %d\n", m->qscale_mb_xy); nengel@2: if (!hc->blocking){ nengel@2: if (mb_slot){ nengel@2: spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb1); nengel@2: wait_dma_id(ED_putmb0); nengel@2: }else { nengel@2: spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0); nengel@2: wait_dma_id(ED_putmb1); nengel@2: } nengel@2: mb_slot++; mb_slot%=2; nengel@2: }else { nengel@2: spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0); nengel@2: wait_dma_id(ED_putmb0); nengel@2: } nengel@2: nengel@2: nengel@2: eos = get_cabac_terminate( c); nengel@2: nengel@2: if( ret < 0) { nengel@2: fprintf(stderr, "error at %d bytecount\n", bytecount); nengel@2: return -1; nengel@2: } nengel@2: } nengel@2: update_entropy_buf(hc, s, j); nengel@2: if (hc->blocking){ wait_dma_id(ED_get); wait_dma_id(ED_put);} nengel@2: } nengel@2: wait_dma_id(ED_put); nengel@2: spu_write_out_mbox(1); nengel@2: nengel@2: } nengel@2: nengel@2: return 0; nengel@2: nengel@2: nengel@2: }