annotate libavcodec/cell/spe_mbd.c @ 9:ea1ba68cf0ed

update to match api changes + add sscc produced source
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Wed, 05 Jun 2013 14:43:26 +0200
parents
children
rev   line source
nengel@2 1 /*
nengel@2 2 * Copyright (c) 2009 TUDelft
nengel@2 3 *
nengel@2 4 * Cell Parallel SPU - 2DWave Macroblock Decoding.
nengel@2 5 */
nengel@2 6
nengel@2 7 /**
nengel@2 8 * @file libavcodec/cell/spu/h264_main_spu.c
nengel@2 9 * Cell Parallel SPU - 2DWave Macroblock Decoding
nengel@2 10 * @author C C Chi <c.c.chi@student.tudelft.nl>
nengel@2 11 *
nengel@2 12 * SIMD kernels
nengel@2 13 * H.264/AVC motion compensation
nengel@2 14 * @author Mauricio Alvarez <alvarez@ac.upc.edu>
nengel@2 15 * @author Albert Paradis <apar7632@hotmail.com>
nengel@2 16 */
nengel@2 17
nengel@2 18
nengel@2 19 /* Enable this lines to enable simulator statistic or generate traces */
nengel@2 20
nengel@2 21 //#define ENABLE_SIMULATOR
nengel@2 22 //#define ENABLE_PARAVER_TRACING_CELL
nengel@2 23
nengel@2 24 #ifdef ENABLE_SIMULATOR
nengel@2 25 #include "/opt/ibm/systemsim-cell/include/callthru/spu/profile.h"
nengel@2 26 #endif
nengel@2 27
nengel@2 28 #ifdef ENABLE_TRACES
nengel@2 29 #include "spu_trace.h"
nengel@2 30 #endif
nengel@2 31 #include <unistd.h>
nengel@2 32 #include <stdio.h>
nengel@2 33 #include <spu_intrinsics.h>
nengel@2 34 #include <spu_mfcio.h>
nengel@2 35 #include <libsync.h>
nengel@2 36 #include <sys/time.h>
nengel@2 37 #include <assert.h>
nengel@2 38
nengel@2 39 //#include "dsputil_cell.h"
nengel@2 40 #include "types_spu.h"
nengel@2 41 #include "h264_intra_spu.h"
nengel@2 42 #include "h264_decode_mb_spu.h"
nengel@2 43 #include "h264_mc_spu.h"
nengel@2 44 #include "h264_tables.h"
nengel@2 45 #include "h264_dma.h"
nengel@2 46
nengel@2 47
nengel@2 48 /** functions for supporting tracing with paraver for the SPU
nengel@2 49 *
nengel@2 50 */
nengel@2 51 inline void trace_init_SPU(){
nengel@2 52 #ifdef ENABLE_PARAVER_TRACING_CELL
nengel@2 53 SPUtrace_init ();
nengel@2 54 #endif
nengel@2 55 }
nengel@2 56
nengel@2 57 inline void trace_fini_SPU(){
nengel@2 58 #ifdef ENABLE_PARAVER_TRACING_CELL
nengel@2 59 SPUtrace_fini ();
nengel@2 60 #endif
nengel@2 61 }
nengel@2 62
nengel@2 63 inline void trace_event_SPU(int event, int id){
nengel@2 64 #ifdef ENABLE_PARAVER_TRACING_CELL
nengel@2 65 SPUtrace_event (event, id);
nengel@2 66 #else
nengel@2 67 (void) event;
nengel@2 68 (void) id;
nengel@2 69 #endif
nengel@2 70 }
nengel@2 71
nengel@2 72 // for simulator statistic
nengel@2 73 inline void clear_statistic(){
nengel@2 74 #ifdef ENABLE_SIMULATOR
nengel@2 75 prof_clear();
nengel@2 76 #endif
nengel@2 77 }
nengel@2 78
nengel@2 79 inline void start_statistic(){
nengel@2 80 #ifdef ENABLE_SIMULATOR
nengel@2 81 prof_start();
nengel@2 82 #endif
nengel@2 83 }
nengel@2 84
nengel@2 85 inline void stop_statistic(){
nengel@2 86 #ifdef ENABLE_SIMULATOR
nengel@2 87 prof_stop();
nengel@2 88 #endif
nengel@2 89 }
nengel@2 90
nengel@2 91 H264Context_spu h_context; // struct that contain all the params to decode a macroblock
nengel@2 92
nengel@2 93 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending
nengel@2 94 //mb position of neighbouring spes
nengel@2 95 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1
nengel@2 96 //DECLARE_ALIGNED_16(spe_pos, tgt_spe); //written by SPE_ID +1
nengel@2 97
nengel@2 98 /**
nengel@2 99 * Initializes the buffering of the mb data and associated mc data. The init_mb_buffer needs to
nengel@2 100 * be called before any get_next_mb and only once at the beginning of the slice.
nengel@2 101 *
nengel@2 102 * Note: init_mc_buffer and get_next_mb expect the width of the picture to be more than 2 mb's
nengel@2 103 */
nengel@2 104 #define TAG_OFFSET_MB MBD_buf1
nengel@2 105 #define TAG_OFFSET_MC MBD_mc_buf1
nengel@2 106 static void init_mb_buffer(H264Context_spu* h){
nengel@2 107 H264slice *s = h->s;
nengel@2 108 H264Mb *next_mb;
nengel@2 109 int mb_height = s->mb_height;
nengel@2 110 int mb_width = s->mb_width;
nengel@2 111
nengel@2 112 h->mc_idx =0;
nengel@2 113
nengel@2 114 h->mb_dec = 0;
nengel@2 115 h->mb_mc = 0;
nengel@2 116 h->mb_dma = 0;
nengel@2 117
nengel@2 118 h->curr_line %= mb_height;
nengel@2 119 h->next_mb_idx = h->curr_line * mb_width;
nengel@2 120 h->mb_id = h->curr_line * mb_width;
nengel@2 121 h->n_mc= h->curr_line * mb_width;
nengel@2 122
nengel@2 123 next_mb = s->blocks + h->mb_id;
nengel@2 124 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
nengel@2 125 h->mb_dma++;
nengel@2 126 h->mb_id++;
nengel@2 127
nengel@2 128 next_mb = s->blocks + h->mb_id;
nengel@2 129 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
nengel@2 130 h->mb_dma++;
nengel@2 131 h->mb_id++;
nengel@2 132 wait_dma_id(0 + TAG_OFFSET_MB);
nengel@2 133
nengel@2 134 H264Mb *mb = &h->mb_buf[0];
nengel@2 135 H264mc *mc = &h->mc_buf[0];
nengel@2 136 if(!IS_INTRA(mb->mb_type)){
nengel@2 137 calc_mc_params(mb, mc);
nengel@2 138 fill_ref_buf(h, mb, mc);
nengel@2 139 }
nengel@2 140 h->n_mc++;
nengel@2 141 h->mb_mc++;
nengel@2 142 }
nengel@2 143
nengel@2 144 static void *get_next_mb(H264Context_spu *h){
nengel@2 145 H264slice *s = h->s;
nengel@2 146 H264spe *spe = &h->spe;
nengel@2 147 H264Mb *mb_buf = h->mb_buf;
nengel@2 148 H264mc *mc_buf = h->mc_buf;
nengel@2 149 H264Mb *next_mb;
nengel@2 150 H264Mb *next_dma_mb;
nengel@2 151
nengel@2 152 if (h->curr_line >= s->mb_height)
nengel@2 153 return NULL;
nengel@2 154
nengel@2 155 if (h->mb_id < h->mb_total){
nengel@2 156 next_dma_mb = s->blocks + h->mb_id;
nengel@2 157 spu_dma_get(&mb_buf[h->mb_dma], (unsigned) next_dma_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
nengel@2 158 h->mb_dma = (h->mb_dma+1)%3;
nengel@2 159 h->mb_id++;
nengel@2 160 if (h->mb_id%s->mb_width ==0){
nengel@2 161 h->mb_id+=(spe->spe_total-1)*s->mb_width;
nengel@2 162 }
nengel@2 163 }
nengel@2 164
nengel@2 165 h->mc = &mc_buf[h->mc_idx];
nengel@2 166 wait_dma_id(h->mc_idx + TAG_OFFSET_MC);
nengel@2 167 h->mc_idx = (h->mc_idx+1)%2;
nengel@2 168 if (h->n_mc < h->mb_total){
nengel@2 169 wait_dma_id(h->mb_mc + TAG_OFFSET_MB);
nengel@2 170 H264Mb *mb = &mb_buf[h->mb_mc];
nengel@2 171 H264mc *mc = &mc_buf[h->mc_idx];
nengel@2 172 if(!IS_INTRA(mb->mb_type)){
nengel@2 173 calc_mc_params(mb, mc);
nengel@2 174 fill_ref_buf(h, mb, mc);
nengel@2 175 }
nengel@2 176 h->n_mc++;
nengel@2 177 if (h->n_mc%s->mb_width ==0){
nengel@2 178 h->n_mc+=(spe->spe_total-1)*s->mb_width;
nengel@2 179 }
nengel@2 180 }
nengel@2 181 h->next_mb_idx++;
nengel@2 182 if (h->next_mb_idx % s->mb_width ==0){
nengel@2 183 h->next_mb_idx+=(spe->spe_total-1)*s->mb_width;
nengel@2 184 h->curr_line+=spe->spe_total;
nengel@2 185 }
nengel@2 186
nengel@2 187 h->mb_mc = (h->mb_mc+1)%3;
nengel@2 188 next_mb = &mb_buf[h->mb_dec];
nengel@2 189 h->mb_dec = (h->mb_dec+1)%3;
nengel@2 190 return next_mb;
nengel@2 191 }
nengel@2 192
nengel@2 193 static void *get_next_mb_blocking(H264Context_spu *h){
nengel@2 194 H264slice *s = h->s;
nengel@2 195 H264spe *spe = &h->spe;
nengel@2 196 H264Mb *mb_buf = h->mb_buf;
nengel@2 197 H264mc *mc_buf = h->mc_buf;
nengel@2 198 H264Mb *next_mb;
nengel@2 199 H264Mb *next_dma_mb;
nengel@2 200
nengel@2 201 if (h->mb_id >= h->mb_total)
nengel@2 202 return NULL;
nengel@2 203
nengel@2 204 //printf("%d\n", h->mb_id);
nengel@2 205 next_dma_mb = s->blocks + h->mb_id;
nengel@2 206 spu_dma_get(&mb_buf[0], (unsigned) next_dma_mb, sizeof(H264Mb), MBD_buf1);
nengel@2 207 //h->mb_dma = (h->mb_dma+1)%3;
nengel@2 208 h->mb_id++;
nengel@2 209 if (h->mb_id%s->mb_width ==0){
nengel@2 210 h->mb_id+=(spe->spe_total-1)*s->mb_width;
nengel@2 211 }
nengel@2 212 wait_dma_id(MBD_buf1);
nengel@2 213
nengel@2 214 h->mc = &mc_buf[0];
nengel@2 215 //h->mc_idx = (h->mc_idx+1)%2;
nengel@2 216 //if (h->n_mc < h->mb_total){
nengel@2 217 H264Mb *mb = &mb_buf[0];
nengel@2 218 H264mc *mc = &mc_buf[0];
nengel@2 219 if(!IS_INTRA(mb->mb_type)){
nengel@2 220 calc_mc_params(mb, mc);
nengel@2 221 fill_ref_buf(h, mb, mc);
nengel@2 222 }
nengel@2 223 //h->n_mc++;
nengel@2 224 /*if (h->n_mc%s->mb_width ==0){
nengel@2 225 h->n_mc+=(spe->spe_total-1)*s->mb_width;
nengel@2 226 }*/
nengel@2 227 // wait_dma_id(MBD_mc_buf1);
nengel@2 228
nengel@2 229 // h->next_mb_idx++;
nengel@2 230 // if (h->next_mb_idx % s->mb_width ==0){
nengel@2 231 // h->next_mb_idx+=(spe->spe_total-1)*s->mb_width;
nengel@2 232 // h->curr_line+=spe->spe_total;
nengel@2 233 // }
nengel@2 234
nengel@2 235 // h->mb_mc = (h->mb_mc+1)%3;
nengel@2 236 next_mb = &mb_buf[0];
nengel@2 237 // h->mb_dec = (h->mb_dec+1)%3;
nengel@2 238 return next_mb;
nengel@2 239 }
nengel@2 240
nengel@2 241
nengel@2 242 #undef TAG_OFFSET_MB
nengel@2 243 #undef TAG_OFFSET_MC
nengel@2 244 static inline int dep_resolved(H264Context_spu *h){
nengel@2 245 H264slice *s = h->s;
nengel@2 246 int spe_id = h->spe.spe_id;
nengel@2 247 volatile int mb_proc_dep = src_spe.count;
nengel@2 248 if (spe_id==0)
nengel@2 249 return (h->mb_proc < mb_proc_dep-1 +s->mb_width)? 1:0;
nengel@2 250 else
nengel@2 251 return (h->mb_proc < mb_proc_dep-1)? 1:0;
nengel@2 252 }
nengel@2 253
nengel@2 254 void update_tgt_spe_dep(H264Context_spu *h, int end){
nengel@2 255 H264Mb *mb = h->mb;
nengel@2 256 H264slice *s = h->s;
nengel@2 257 H264spe *spe = &h->spe;
nengel@2 258 int mb_x = mb->mb_x;
nengel@2 259
nengel@2 260 if (end || (mb_x%2==0 && mb_x!=0) || mb_x==s->mb_width-1){
nengel@2 261 spe_pos* dma_spe = &dma_temp;
nengel@2 262 spe_pos* tgt_spe = (spe_pos*) ((unsigned) spe->tgt_spe + (unsigned) &src_spe); //located in target spe local store
nengel@2 263 dma_spe->count = end? h->mb_proc+1: h->mb_proc;
nengel@2 264 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), MBD_put);
nengel@2 265 }
nengel@2 266 h->mb_proc++;
nengel@2 267 }
nengel@2 268
nengel@2 269
nengel@2 270 int main(unsigned long long id, unsigned long long argp)
nengel@2 271 {
nengel@2 272 (void) id;
nengel@2 273 H264Context_spu* h = &h_context;
nengel@2 274 H264spe *spe_params = (H264spe *) (unsigned) argp;
nengel@2 275
nengel@2 276 spu_dma_get(&h->spe, (unsigned) spe_params, sizeof(H264spe), MBD_slice); //ID_slice is used out of convienience
nengel@2 277 wait_dma_id(MBD_slice);
nengel@2 278
nengel@2 279 //clear_statistic();
nengel@2 280 dsputil_h264_init_cell(&h->dsp);
nengel@2 281 ff_cropTbl_init();
nengel@2 282 init_pred_ptrs(&h->hpc);
nengel@2 283
nengel@2 284 //send slice_buf to ppe
nengel@2 285 spu_write_out_mbox((unsigned) h->slice_buf);
nengel@2 286 h->sl_idx=0;
nengel@2 287 // initialize tracing with paraver
nengel@2 288 //trace_init_SPU();
nengel@2 289 h->frames =0;
nengel@2 290 src_spe.count =0;
nengel@2 291 h->mb_proc = 0;
nengel@2 292
nengel@2 293 h->mb_id=0;
nengel@2 294 h->mc_idx=0;
nengel@2 295 h->mb_dec=0;
nengel@2 296 h->mb_mc=0;
nengel@2 297 h->mb_dma=0;
nengel@2 298 h->next_mb_idx=0;
nengel@2 299
nengel@2 300 h->blocking=0;
nengel@2 301
nengel@2 302
nengel@2 303 H264spe* p = &h->spe;
nengel@2 304 h->curr_line =p->spe_id;
nengel@2 305 h->mb_total = p->mb_height*p->mb_width;
nengel@2 306 int stride_y = 32;
nengel@2 307 int stride_c = 16;
nengel@2 308 //init block_offset array
nengel@2 309 init_block_offset(stride_y, stride_c);
nengel@2 310 for(;;){
nengel@2 311 spu_read_in_mbox();
nengel@2 312
nengel@2 313 h->s = &h->slice_buf[h->sl_idx];
nengel@2 314 h->sl_idx++; h->sl_idx%=2;
nengel@2 315
nengel@2 316 if (h->s->state< 0){
nengel@2 317 break;
nengel@2 318 }
nengel@2 319
nengel@2 320 {
nengel@2 321 if(!h->blocking){
nengel@2 322 init_mb_buffer(h);
nengel@2 323 while((h->mb=(H264Mb *)get_next_mb(h))){
nengel@2 324 while(!dep_resolved(h));
nengel@2 325 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id);
nengel@2 326 hl_decode_mb_internal(h, stride_y, stride_c);
nengel@2 327 }
nengel@2 328 update_tgt_spe_dep(h, 1);
nengel@2 329 }else{
nengel@2 330 h->mb_id=0;
nengel@2 331 while((h->mb=(H264Mb *)get_next_mb_blocking(h))){
nengel@2 332 while(!dep_resolved(h));
nengel@2 333 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id);
nengel@2 334 hl_decode_mb_internal(h, stride_y, stride_c);
nengel@2 335 }
nengel@2 336 update_tgt_spe_dep(h, 1);
nengel@2 337 }
nengel@2 338
nengel@2 339 }
nengel@2 340
nengel@2 341 h->frames++;
nengel@2 342
nengel@2 343 if (p->spe_id == ((h->frames*p->mb_height -1)%p->spe_total)){
nengel@2 344 //printf("spe %d, %d\n", atomic_read(p->rl_cnt), h->frames);
nengel@2 345 //MBSlice is copied beforehand.
nengel@2 346 //only inc cnt.
nengel@2 347 atomic_inc(p->rl_cnt);
nengel@2 348 }
nengel@2 349 {
nengel@2 350 atomic_dec(p->cnt);
nengel@2 351 }
nengel@2 352 }
nengel@2 353
nengel@2 354 return 0;
nengel@2 355 }
nengel@2 356