Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
comparison libavcodec/cell/spe_ed.c @ 4:96e628866d41
naming some tasks to help debugging
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 19 Dec 2012 15:40:26 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9f09cfeec445 |
|---|---|
| 1 #define CELL_SPE | |
| 2 | |
| 3 #include <string.h> | |
| 4 #include <stdio.h> | |
| 5 #include <spu_intrinsics.h> | |
| 6 #include <spu_mfcio.h> | |
| 7 #include "libavcodec/avcodec.h" | |
| 8 #include "h264_cabac_spu.h" | |
| 9 #include "cabac_spu.h" | |
| 10 #include "h264_types_spu.h" | |
| 11 #include "h264_tables.h" | |
| 12 #include "h264_dma.h" | |
| 13 #include "h264_tables.h" | |
| 14 | |
| 15 #define MB_WIDTH 240 | |
| 16 #define MB_STRIDE (MB_WIDTH+16) | |
| 17 | |
| 18 H264Cabac_spu hcabac; | |
| 19 CABACContext cabac; | |
| 20 DECLARE_ALIGNED_16(EDSlice_spu, slice[2]); | |
| 21 DECLARE_ALIGNED_16(H264Mb, mb[2]); | |
| 22 DECLARE_ALIGNED_16(H264spe, spe); | |
| 23 | |
| 24 DECLARE_ALIGNED_16(uint8_t, non_zero_count_table[2][MB_STRIDE][32]); | |
| 25 DECLARE_ALIGNED_16(uint8_t, mvd_table[2][2][8*MB_STRIDE][2]); | |
| 26 DECLARE_ALIGNED_16(uint8_t, direct_table[2][4*MB_STRIDE]); | |
| 27 DECLARE_ALIGNED_16(uint8_t, chroma_pred_mode_table[2][MB_STRIDE]); | |
| 28 DECLARE_ALIGNED_16(uint8_t, intra4x4_pred_mode_table[2][8*MB_STRIDE]); | |
| 29 DECLARE_ALIGNED_16(uint16_t,cbp_table[2][MB_STRIDE]); | |
| 30 DECLARE_ALIGNED_16(uint8_t, qscale_table[2][MB_STRIDE]); | |
| 31 | |
| 32 DECLARE_ALIGNED_16(uint32_t, mb_type_table[2][MB_STRIDE]); | |
| 33 DECLARE_ALIGNED_16(int8_t, ref_index_table[2][2][4*MB_STRIDE]); | |
| 34 DECLARE_ALIGNED_16(int16_t, motion_val_table[2][2][4*4*MB_WIDTH][2]); | |
| 35 | |
| 36 DECLARE_ALIGNED(128, uint8_t, bytestream_ls[4096]); | |
| 37 DECLARE_ALIGNED_16(uint32_t, list1_mb_type_table[2][MB_STRIDE]); | |
| 38 DECLARE_ALIGNED_16(int8_t, list1_ref_index_table[2][2][4*MB_STRIDE]); | |
| 39 | |
| 40 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending | |
| 41 //mb position of neighbouring spes | |
| 42 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1 | |
| 43 static int total_lines; | |
| 44 | |
| 45 static inline int dep_resolved(H264spe *p){ | |
| 46 int spe_id = p->spe_id; | |
| 47 volatile int lines_proc = src_spe.count; | |
| 48 if (spe_id==0) | |
| 49 return (total_lines < lines_proc-1 +p->mb_height)? 1:0; | |
| 50 else | |
| 51 return (total_lines < lines_proc-1)? 1:0; | |
| 52 } | |
| 53 | |
| 54 static void update_tgt_spe_dep(H264spe *p, int end){ | |
| 55 // if (end ){ | |
| 56 total_lines++; | |
| 57 spe_pos* dma_spe = &dma_temp; | |
| 58 spe_pos* tgt_spe = p->tgt_spe + (unsigned) &src_spe; //located in target spe local store | |
| 59 dma_spe->count = end? total_lines+1: total_lines; | |
| 60 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), ED_put); | |
| 61 // } | |
| 62 | |
| 63 } | |
| 64 | |
| 65 static int init_cabac(H264spe *p, H264Cabac_spu *hc){ | |
| 66 hc->mb_height = p->mb_height; | |
| 67 hc->mb_width = p->mb_width; | |
| 68 hc->b_stride = 4*p->mb_width; | |
| 69 hc->mb_stride = p->mb_stride; | |
| 70 | |
| 71 for(int i=0; i<16; i++){ | |
| 72 #define T(x) (x>>2) | ((x<<2) & 0xF) | |
| 73 hc->zigzag_scan[i] = T(zigzag_scan[i]); | |
| 74 #undef T | |
| 75 } | |
| 76 for(int i=0; i<64; i++){ | |
| 77 #define T(x) (x>>3) | ((x&7)<<3) | |
| 78 hc->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); | |
| 79 #undef T | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 static void reset_cabac_buffers(){ | |
| 84 memset(intra4x4_pred_mode_table, 0, sizeof(intra4x4_pred_mode_table)); | |
| 85 memset(mvd_table, 0, sizeof(mvd_table)); | |
| 86 memset(direct_table, 0, sizeof(direct_table)); | |
| 87 memset(chroma_pred_mode_table, 0, sizeof(chroma_pred_mode_table)); | |
| 88 memset(cbp_table, 0, sizeof(cbp_table)); | |
| 89 memset(qscale_table, 0, sizeof(qscale_table)); | |
| 90 memset(mb_type_table, 0, sizeof(mb_type_table)); | |
| 91 memset(ref_index_table, 0, sizeof(ref_index_table)); | |
| 92 memset(motion_val_table, 0, sizeof(motion_val_table)); | |
| 93 } | |
| 94 | |
| 95 static void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int bufsize){ | |
| 96 int align = (unsigned) buf & 0xF; | |
| 97 int dma_size; | |
| 98 | |
| 99 c->bytestream_ea_start= | |
| 100 c->bytestream_ea= buf; | |
| 101 c->bytestream_ea_end= buf + bufsize; | |
| 102 c->bufsize = bufsize; | |
| 103 | |
| 104 if (bufsize + align >= sizeof(bytestream_ls)){ | |
| 105 dma_size = sizeof(bytestream_ls); | |
| 106 c->bufsize = c->bufsize +align - sizeof(bytestream_ls); | |
| 107 }else{ | |
| 108 int align_end = (bufsize+align) &0xF; | |
| 109 if (align_end) | |
| 110 dma_size = bufsize+align + 16-align_end; | |
| 111 else | |
| 112 dma_size = bufsize+align; | |
| 113 c->bufsize = 0; | |
| 114 } | |
| 115 // printf("%d\n", dma_size); | |
| 116 c->bytestream_end = &bytestream_ls[dma_size]; | |
| 117 c->bytestream_start= c->bytestream = &bytestream_ls[align]; | |
| 118 spu_dma_get(bytestream_ls, (unsigned) buf - align, dma_size, ED_get ); | |
| 119 c->bytestream_ea_start= | |
| 120 c->bytestream_ea= buf + dma_size -align; | |
| 121 | |
| 122 wait_dma_id(ED_get); | |
| 123 | |
| 124 if (align %2){ | |
| 125 c->low = (*c->bytestream++)<<18; | |
| 126 c->low+= (*c->bytestream++)<<10; | |
| 127 c->low+= ((*c->bytestream++)<<2) + 2; | |
| 128 }else { | |
| 129 c->low = (*c->bytestream++)<<18; | |
| 130 c->low+= (*c->bytestream++)<<10; | |
| 131 c->low+= (2<<8); | |
| 132 } | |
| 133 | |
| 134 c->range= 0x1FE; | |
| 135 bytecount=0; | |
| 136 } | |
| 137 | |
| 138 static void init_dequant8_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){ | |
| 139 int i,q,x; | |
| 140 const int transpose = HAVE_ALTIVEC; | |
| 141 hc->dequant8_coeff[0] = hc->dequant8_buffer[0]; | |
| 142 hc->dequant8_coeff[1] = hc->dequant8_buffer[1]; | |
| 143 | |
| 144 for(i=0; i<2; i++){ | |
| 145 if(i && !memcmp(s->pps.scaling_matrix8[0], s->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ | |
| 146 hc->dequant8_coeff[1] = hc->dequant8_buffer[0]; | |
| 147 break; | |
| 148 } | |
| 149 | |
| 150 for(q=0; q<52; q++){ | |
| 151 int shift = div6[q]; | |
| 152 int idx = rem6[q]; | |
| 153 for(x=0; x<64; x++) | |
| 154 hc->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = | |
| 155 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * | |
| 156 s->pps.scaling_matrix8[i][x]) << shift; | |
| 157 } | |
| 158 } | |
| 159 } | |
| 160 | |
| 161 static void init_dequant4_coeff_table(EDSlice_spu *s, H264Cabac_spu *hc){ | |
| 162 int i,j,q,x; | |
| 163 const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON; | |
| 164 for(i=0; i<6; i++ ){ | |
| 165 hc->dequant4_coeff[i] = hc->dequant4_buffer[i]; | |
| 166 for(j=0; j<i; j++){ | |
| 167 if(!memcmp(s->pps.scaling_matrix4[j], s->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ | |
| 168 hc->dequant4_coeff[i] = hc->dequant4_buffer[j]; | |
| 169 break; | |
| 170 } | |
| 171 } | |
| 172 if(j<i) | |
| 173 continue; | |
| 174 | |
| 175 for(q=0; q<52; q++){ | |
| 176 int shift = div6[q] + 2; | |
| 177 int idx = rem6[q]; | |
| 178 for(x=0; x<16; x++) | |
| 179 hc->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = | |
| 180 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * | |
| 181 s->pps.scaling_matrix4[i][x]) << shift; | |
| 182 } | |
| 183 } | |
| 184 } | |
| 185 | |
| 186 static void init_dequant_tables(EDSlice_spu *s, H264Cabac_spu *hc){ | |
| 187 int i,x; | |
| 188 | |
| 189 init_dequant4_coeff_table(s, hc); | |
| 190 if(s->pps.transform_8x8_mode) | |
| 191 init_dequant8_coeff_table(s, hc); | |
| 192 if(s->transform_bypass){ | |
| 193 for(i=0; i<6; i++) | |
| 194 for(x=0; x<16; x++) | |
| 195 hc->dequant4_coeff[i][0][x] = 1<<6; | |
| 196 if(s->pps.transform_8x8_mode) | |
| 197 for(i=0; i<2; i++) | |
| 198 for(x=0; x<64; x++) | |
| 199 hc->dequant8_coeff[i][0][x] = 1<<6; | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 static void init_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s){ | |
| 204 hc->non_zero_count_top = non_zero_count_table[0]; | |
| 205 hc->non_zero_count = non_zero_count_table[1]; | |
| 206 hc->mvd_top[0] = mvd_table[0][0]; | |
| 207 hc->mvd[0] = mvd_table[0][1]; | |
| 208 hc->mvd_top[1] = mvd_table[1][0]; | |
| 209 hc->mvd[1] = mvd_table[1][1]; | |
| 210 hc->direct_top = direct_table[0]; | |
| 211 hc->direct = direct_table[1]; | |
| 212 hc->chroma_pred_mode_top = chroma_pred_mode_table[0]; | |
| 213 hc->chroma_pred_mode = chroma_pred_mode_table[1]; | |
| 214 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[0]; | |
| 215 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[1]; | |
| 216 hc->cbp_top = cbp_table[0]; | |
| 217 hc->cbp = cbp_table[1]; | |
| 218 hc->qscale_top = qscale_table[0] +1; | |
| 219 hc->qscale = qscale_table[1] +1; | |
| 220 | |
| 221 hc->mb_type_top = mb_type_table[0]+1; | |
| 222 hc->mb_type = mb_type_table[1]+1; | |
| 223 hc->ref_index_top[0] = ref_index_table[0][0]; | |
| 224 hc->ref_index_top[1] = ref_index_table[1][0]; | |
| 225 hc->ref_index[0] = ref_index_table[0][1]; | |
| 226 hc->ref_index[1] = ref_index_table[1][1]; | |
| 227 hc->motion_val_top[0] = motion_val_table[0][0]; | |
| 228 hc->motion_val_top[1] = motion_val_table[1][0]; | |
| 229 hc->motion_val[0] = motion_val_table[0][1]; | |
| 230 hc->motion_val[1] = motion_val_table[1][1]; | |
| 231 | |
| 232 int mb_stride = hc->mb_stride; | |
| 233 | |
| 234 if (s->slice_type_nos == FF_B_TYPE){ | |
| 235 while(!dep_resolved(&spe)); | |
| 236 spu_dma_get(list1_mb_type_table[0], (unsigned) (s->list1.mb_type -1), mb_stride*sizeof(uint32_t), ED_get); | |
| 237 spu_dma_get(list1_ref_index_table[0][0], (unsigned) s->list1.ref_index[0], mb_stride*4*sizeof(int8_t), ED_get); | |
| 238 spu_dma_get(list1_ref_index_table[0][1], (unsigned) s->list1.ref_index[1], mb_stride*4*sizeof(int8_t), ED_get); | |
| 239 wait_dma_id(ED_get); | |
| 240 spu_dma_get(list1_mb_type_table[1], (unsigned) (s->list1.mb_type -1 + mb_stride), mb_stride*sizeof(uint32_t), ED_get); | |
| 241 spu_dma_get(list1_ref_index_table[1][0], (unsigned) (s->list1.ref_index[0] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); | |
| 242 spu_dma_get(list1_ref_index_table[1][1], (unsigned) (s->list1.ref_index[1] + 4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); | |
| 243 hc->list1_mb_type = list1_mb_type_table[0]+1; | |
| 244 hc->list1_ref_index[0] = list1_ref_index_table[0][0]; | |
| 245 hc->list1_ref_index[1] = list1_ref_index_table[0][1]; | |
| 246 } | |
| 247 | |
| 248 } | |
| 249 | |
| 250 static void update_entropy_buf(H264Cabac_spu *hc, EDSlice_spu *s, int line){ | |
| 251 int mb_stride = hc->mb_stride; | |
| 252 int mb_width = hc->mb_width; | |
| 253 int top = (line+1)%2; | |
| 254 int cur = line%2; | |
| 255 int bottom = (line+1)%2; //same as top, but to identify prebuffering of next line. | |
| 256 | |
| 257 hc->non_zero_count_top = non_zero_count_table[top]; | |
| 258 hc->non_zero_count = non_zero_count_table[cur]; | |
| 259 hc->mvd_top[0] = mvd_table[0][top]; | |
| 260 hc->mvd[0] = mvd_table[0][cur]; | |
| 261 hc->mvd_top[1] = mvd_table[1][top]; | |
| 262 hc->mvd[1] = mvd_table[1][cur]; | |
| 263 hc->direct_top = direct_table[top]; | |
| 264 hc->direct = direct_table[cur]; | |
| 265 hc->chroma_pred_mode_top = chroma_pred_mode_table[top]; | |
| 266 hc->chroma_pred_mode = chroma_pred_mode_table[cur]; | |
| 267 hc->intra4x4_pred_mode_top = intra4x4_pred_mode_table[top]; | |
| 268 hc->intra4x4_pred_mode = intra4x4_pred_mode_table[cur]; | |
| 269 hc->cbp_top = cbp_table[top]; | |
| 270 hc->cbp = cbp_table[cur]; | |
| 271 hc->qscale_top = qscale_table[top] +1; | |
| 272 hc->qscale = qscale_table[cur] +1; | |
| 273 | |
| 274 hc->mb_type_top = mb_type_table[top]+1; | |
| 275 hc->mb_type = mb_type_table[cur]+1; | |
| 276 hc->ref_index_top[0] = ref_index_table[0][top]; | |
| 277 hc->ref_index_top[1] = ref_index_table[1][top]; | |
| 278 hc->ref_index[0] = ref_index_table[0][cur]; | |
| 279 hc->ref_index[1] = ref_index_table[1][cur]; | |
| 280 hc->motion_val_top[0] = motion_val_table[0][top]; | |
| 281 hc->motion_val_top[1] = motion_val_table[1][top]; | |
| 282 hc->motion_val[0] = motion_val_table[0][cur]; | |
| 283 hc->motion_val[1] = motion_val_table[1][cur]; | |
| 284 | |
| 285 wait_dma_id(ED_put); | |
| 286 | |
| 287 spu_dma_put(mb_type_table[top], (unsigned) (s->pic.mb_type -1 + line*mb_stride), mb_stride*sizeof(uint32_t), ED_put); | |
| 288 spu_dma_put(ref_index_table[0][top], (unsigned) (s->pic.ref_index[0] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put); | |
| 289 spu_dma_put(ref_index_table[1][top], (unsigned) (s->pic.ref_index[1] + line*4*mb_stride), 4*mb_stride*sizeof(int8_t), ED_put); | |
| 290 spu_dma_put(motion_val_table[0][top], (unsigned) (s->pic.motion_val[0]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put); | |
| 291 spu_dma_put(motion_val_table[1][top], (unsigned) (s->pic.motion_val[1]+ line*16*mb_width), 16*mb_width*2*sizeof(int16_t), ED_put); | |
| 292 | |
| 293 if (s->slice_type_nos == FF_B_TYPE){ | |
| 294 update_tgt_spe_dep(&spe, 0); | |
| 295 wait_dma_id(ED_get); | |
| 296 | |
| 297 if (line + 2 < hc->mb_height){ | |
| 298 while(!dep_resolved(&spe)); | |
| 299 spu_dma_get(list1_mb_type_table[cur], (unsigned) (s->list1.mb_type -1 + (line+2)*mb_stride), mb_stride*sizeof(uint32_t), ED_get); | |
| 300 spu_dma_get(list1_ref_index_table[cur][0], (unsigned) (s->list1.ref_index[0] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); | |
| 301 spu_dma_get(list1_ref_index_table[cur][1], (unsigned) (s->list1.ref_index[1] + (line+2)*4*mb_stride), mb_stride*4*sizeof(int8_t), ED_get); | |
| 302 } | |
| 303 hc->list1_mb_type = list1_mb_type_table[bottom]+1; | |
| 304 hc->list1_ref_index[0] = list1_ref_index_table[bottom][0]; | |
| 305 hc->list1_ref_index[1] = list1_ref_index_table[bottom][1]; | |
| 306 } | |
| 307 | |
| 308 } | |
| 309 | |
| 310 // void printmbdiff(EDSlice_spu *s, H264Cabac_spu *hc, H264Mb *mp, H264Mb *ms){ | |
| 311 // | |
| 312 // printf("mb_x %d, %d\n", mp->mb_x, ms->mb_x); | |
| 313 // printf("mb_y %d, %d\n", mp->mb_y, ms->mb_y); | |
| 314 // printf("mb_xy %d, %d\n", mp->mb_xy, ms->mb_xy); | |
| 315 // printf("top_mb_xy %d, %d\n", mp->top_mb_xy, ms->top_mb_xy); | |
| 316 // printf("left_mb_xy %d, %d\n", mp->left_mb_xy, ms->left_mb_xy); | |
| 317 // printf("chroma_pred_mode %d, %d\n", mp->chroma_pred_mode, ms->chroma_pred_mode); | |
| 318 // printf("intra16x16_pred_mode %d, %d\n", mp->intra16x16_pred_mode, ms->intra16x16_pred_mode); | |
| 319 // printf("topleft_samples %d, %d\n", mp->topleft_samples_available, ms->topleft_samples_available); | |
| 320 // printf("topright_samples %d, %d\n", mp->topright_samples_available, ms->topright_samples_available); | |
| 321 // printf("top_samples %d, %d\n", mp->top_samples_available, ms->top_samples_available); | |
| 322 // printf("left_samples %d, %d\n", mp->left_samples_available, ms->left_samples_available); | |
| 323 // | |
| 324 // if (memcmp(mp->intra4x4_pred_mode_cache, ms->intra4x4_pred_mode_cache, 40)){ | |
| 325 // for (int i=0; i<5; i++){ | |
| 326 // for (int j=0; j<8; j++){ | |
| 327 // printf("%d, %d\t", mp->intra4x4_pred_mode_cache[i*8+j],ms->intra4x4_pred_mode_cache[i*8+j]); | |
| 328 // } | |
| 329 // printf("\n"); | |
| 330 // } | |
| 331 // } | |
| 332 // | |
| 333 // if (memcmp(mp->non_zero_count_cache, ms->non_zero_count_cache, 48)){ | |
| 334 // for (int i=0; i<6; i++){ | |
| 335 // for (int j=0; j<8; j++){ | |
| 336 // printf("%u, %u\t", mp->non_zero_count_cache[i*8+j],ms->non_zero_count_cache[i*8+j]); | |
| 337 // } | |
| 338 // printf("\n"); | |
| 339 // } | |
| 340 // } | |
| 341 // | |
| 342 // if (memcmp(mp->sub_mb_type, ms->sub_mb_type, 8)){ | |
| 343 // for (int i=0; i<4; i++){ | |
| 344 // printf("%u, %u\t", mp->sub_mb_type[i], mp->sub_mb_type[i]); | |
| 345 // printf("\n"); | |
| 346 // } | |
| 347 // } | |
| 348 // | |
| 349 // if (memcmp(mp->mv_cache, ms->mv_cache, 320)){ | |
| 350 // for (int k=0; k<2; k++){ | |
| 351 // for (int i=0; i<5; i++){ | |
| 352 // for (int j=0; j<8; j++){ | |
| 353 // printf("%d, %d, %d, %d\t", mp->mv_cache[k][i*8+j][0], mp->mv_cache[k][i*8+j][1], ms->mv_cache[k][i*8+j][0], ms->mv_cache[k][i*8+j][1]); | |
| 354 // } | |
| 355 // printf("\n"); | |
| 356 // } | |
| 357 // } | |
| 358 // } | |
| 359 // | |
| 360 // if (memcmp(mp->ref_cache, ms->ref_cache, 80)){ | |
| 361 // for (int k=0; k<2; k++){ | |
| 362 // for (int i=0; i<5; i++){ | |
| 363 // for (int j=0; j<8; j++){ | |
| 364 // printf("%d, %d\t", mp->ref_cache[k][i*8+j], ms->ref_cache[k][i*8+j]); | |
| 365 // } | |
| 366 // printf("\n"); | |
| 367 // } | |
| 368 // } | |
| 369 // } | |
| 370 // | |
| 371 // printf("cbp %d, %d\n", mp->cbp, ms->cbp); | |
| 372 // for (int i=0; i<hc->mb_stride; i++){ | |
| 373 // printf("%d, ", hc->cbp[i]); fflush(0); | |
| 374 // } | |
| 375 // printf("\n"); | |
| 376 // | |
| 377 // printf("mb_type %x, %x\n", mp->mb_type, ms->mb_type); | |
| 378 // printf("mb_type IS_INTRA %d, IS_INTRA16x16 %d, IS_DIRECT %d\n", IS_INTRA(ms->mb_type), IS_INTRA16x16(ms->mb_type), IS_DIRECT(ms->mb_type) ); | |
| 379 // printf("left_type %d, %d\n", mp->left_type, ms->left_type); | |
| 380 // printf("top_type %d, %d\n", mp->top_type, ms->top_type); | |
| 381 // printf("qscale_mb_xy %d, %d\n", mp->qscale_mb_xy, ms->qscale_mb_xy); | |
| 382 // printf("qscale_left_mb_xy %d, %d\n", mp->qscale_left_mb_xy, ms->qscale_left_mb_xy); | |
| 383 // printf("qscale_top_mb_xy %d, %d\n", mp->qscale_top_mb_xy, ms->qscale_top_mb_xy); | |
| 384 // // for (int i=0; i<hc->mb_stride; i++){ | |
| 385 // // printf("%d, ", qscale_table[0][i]); fflush(0); | |
| 386 // // } | |
| 387 // | |
| 388 // if (memcmp(mp->mb, ms->mb, 768)){ | |
| 389 // for (int i=0; i<16; i++){ | |
| 390 // for (int j=0; j<16; j++){ | |
| 391 // printf("%d, %d\t", mp->mb[j + i*16], ms->ref_cache[j + i*16]); | |
| 392 // } | |
| 393 // printf("\n"); | |
| 394 // } | |
| 395 // for (int i=0; i<8; i++){ | |
| 396 // for (int j=0; j<8; j++){ | |
| 397 // printf("%d, %d\t", mp->mb[256 + j + i*8], ms->ref_cache[j + i*8]); | |
| 398 // } | |
| 399 // printf("\n"); | |
| 400 // } | |
| 401 // for (int i=0; i<8; i++){ | |
| 402 // for (int j=0; j<8; j++){ | |
| 403 // printf("%d, %d\t", mp->mb[320+ j + i*8], ms->ref_cache[j + i*8]); | |
| 404 // } | |
| 405 // printf("\n"); | |
| 406 // } | |
| 407 // } | |
| 408 // | |
| 409 // if (memcmp(mp->bS, ms->bS, 32)){ | |
| 410 // for (int k=0; k<2; k++){ | |
| 411 // for (int i=0; i<4; i++){ | |
| 412 // for (int j=0; j<4; j++){ | |
| 413 // printf("%d, %d\t", mp->bS[k][i][j], mp->mv_cache[k][i][j]); | |
| 414 // } | |
| 415 // printf("\n"); | |
| 416 // } | |
| 417 // } | |
| 418 // } | |
| 419 // if (memcmp(mp->edges, ms->edges, 4)){ | |
| 420 // printf("edges %d, %d, %d, %d\n", mp->edges[0], ms->edges[0], mp->edges[1], ms->edges[1]); | |
| 421 // printf("deblock %d, %d\n", mp->deblock_mb, ms->deblock_mb); | |
| 422 // } | |
| 423 // | |
| 424 // printf("dequant4_coeff_y %d, %d\n", mp->dequant4_coeff_y, ms->dequant4_coeff_y); | |
| 425 // printf("dequant4_coeff_cb %d, %d\n", mp->dequant4_coeff_cb, ms->dequant4_coeff_cb); | |
| 426 // printf("dequant4_coeff_cr %d, %d\n", mp->dequant4_coeff_cr, ms->dequant4_coeff_cr); | |
| 427 // } | |
| 428 // DECLARE_ALIGNED_16(H264Mb, tmp); | |
| 429 | |
| 430 | |
| 431 int main(unsigned long long id, unsigned long long argp){ | |
| 432 EDSlice_spu *s; | |
| 433 H264Cabac_spu *hc = &hcabac; | |
| 434 CABACContext *c = &cabac; | |
| 435 H264spe *p = &spe; | |
| 436 | |
| 437 spu_write_out_mbox((unsigned) slice); | |
| 438 spu_dma_get(p, (unsigned) argp, sizeof(H264spe), ED_spe); //ID_slice is used out of convienience | |
| 439 wait_dma_id(ED_spe); | |
| 440 | |
| 441 ff_init_cabac_states(); | |
| 442 init_cabac(p, hc); | |
| 443 hc->blocking=0; | |
| 444 for(;;){ | |
| 445 spu_read_in_mbox(); | |
| 446 s = &slice[0]; | |
| 447 reset_cabac_buffers(); | |
| 448 init_entropy_buf(hc, s); | |
| 449 | |
| 450 if (hc->blocking) wait_dma_id(ED_get); | |
| 451 //printf("framesize %d\n", s->byte_bufsize);fflush(0); | |
| 452 init_dequant_tables(s, hc); | |
| 453 ff_init_cabac_decoder( c, s->bytestream_start, s->byte_bufsize ); | |
| 454 ff_h264_init_cabac_states(s, c); | |
| 455 | |
| 456 int mb_slot=0; | |
| 457 for(int j=0; j<hc->mb_height; j++){ | |
| 458 for(int i=0; i<hc->mb_width; i++){ | |
| 459 int eos,ret; | |
| 460 H264Mb *m = &mb[mb_slot]; | |
| 461 m->mb_x=i; | |
| 462 m->mb_y=j; | |
| 463 s->m = m; | |
| 464 | |
| 465 ret = ff_h264_decode_mb_cabac(hc, s, c); | |
| 466 | |
| 467 // spu_dma_get(&tmp, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_get); | |
| 468 // wait_dma_id(ED_get); | |
| 469 // if (memcmp(&tmp, m, sizeof(H264Mb))){ | |
| 470 // printf("coded pic num %d\n", s->coded_pic_num); | |
| 471 // printmbdiff(s, hc,&tmp, m); | |
| 472 // return 0; | |
| 473 // } | |
| 474 //printf("qscale %d\n", m->qscale_mb_xy); | |
| 475 if (!hc->blocking){ | |
| 476 if (mb_slot){ | |
| 477 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb1); | |
| 478 wait_dma_id(ED_putmb0); | |
| 479 }else { | |
| 480 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0); | |
| 481 wait_dma_id(ED_putmb1); | |
| 482 } | |
| 483 mb_slot++; mb_slot%=2; | |
| 484 }else { | |
| 485 spu_dma_put(m, (unsigned) &s->mbs[j*hc->mb_width + i], sizeof(H264Mb), ED_putmb0); | |
| 486 wait_dma_id(ED_putmb0); | |
| 487 } | |
| 488 | |
| 489 | |
| 490 eos = get_cabac_terminate( c); | |
| 491 | |
| 492 if( ret < 0) { | |
| 493 fprintf(stderr, "error at %d bytecount\n", bytecount); | |
| 494 return -1; | |
| 495 } | |
| 496 } | |
| 497 update_entropy_buf(hc, s, j); | |
| 498 if (hc->blocking){ wait_dma_id(ED_get); wait_dma_id(ED_put);} | |
| 499 } | |
| 500 wait_dma_id(ED_put); | |
| 501 spu_write_out_mbox(1); | |
| 502 | |
| 503 } | |
| 504 | |
| 505 return 0; | |
| 506 | |
| 507 | |
| 508 } |
