Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
comparison libavcodec/cell/spe_mbd.c @ 9:ea1ba68cf0ed
update to match api changes + add sscc produced source
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 05 Jun 2013 14:43:26 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2e3deeb14dc5 |
|---|---|
| 1 /* | |
| 2 * Copyright (c) 2009 TUDelft | |
| 3 * | |
| 4 * Cell Parallel SPU - 2DWave Macroblock Decoding. | |
| 5 */ | |
| 6 | |
| 7 /** | |
| 8 * @file libavcodec/cell/spu/h264_main_spu.c | |
| 9 * Cell Parallel SPU - 2DWave Macroblock Decoding | |
| 10 * @author C C Chi <c.c.chi@student.tudelft.nl> | |
| 11 * | |
| 12 * SIMD kernels | |
| 13 * H.264/AVC motion compensation | |
| 14 * @author Mauricio Alvarez <alvarez@ac.upc.edu> | |
| 15 * @author Albert Paradis <apar7632@hotmail.com> | |
| 16 */ | |
| 17 | |
| 18 | |
| 19 /* Enable this lines to enable simulator statistic or generate traces */ | |
| 20 | |
| 21 //#define ENABLE_SIMULATOR | |
| 22 //#define ENABLE_PARAVER_TRACING_CELL | |
| 23 | |
| 24 #ifdef ENABLE_SIMULATOR | |
| 25 #include "/opt/ibm/systemsim-cell/include/callthru/spu/profile.h" | |
| 26 #endif | |
| 27 | |
| 28 #ifdef ENABLE_TRACES | |
| 29 #include "spu_trace.h" | |
| 30 #endif | |
| 31 #include <unistd.h> | |
| 32 #include <stdio.h> | |
| 33 #include <spu_intrinsics.h> | |
| 34 #include <spu_mfcio.h> | |
| 35 #include <libsync.h> | |
| 36 #include <sys/time.h> | |
| 37 #include <assert.h> | |
| 38 | |
| 39 //#include "dsputil_cell.h" | |
| 40 #include "types_spu.h" | |
| 41 #include "h264_intra_spu.h" | |
| 42 #include "h264_decode_mb_spu.h" | |
| 43 #include "h264_mc_spu.h" | |
| 44 #include "h264_tables.h" | |
| 45 #include "h264_dma.h" | |
| 46 | |
| 47 | |
| 48 /** functions for supporting tracing with paraver for the SPU | |
| 49 * | |
| 50 */ | |
| 51 inline void trace_init_SPU(){ | |
| 52 #ifdef ENABLE_PARAVER_TRACING_CELL | |
| 53 SPUtrace_init (); | |
| 54 #endif | |
| 55 } | |
| 56 | |
| 57 inline void trace_fini_SPU(){ | |
| 58 #ifdef ENABLE_PARAVER_TRACING_CELL | |
| 59 SPUtrace_fini (); | |
| 60 #endif | |
| 61 } | |
| 62 | |
| 63 inline void trace_event_SPU(int event, int id){ | |
| 64 #ifdef ENABLE_PARAVER_TRACING_CELL | |
| 65 SPUtrace_event (event, id); | |
| 66 #else | |
| 67 (void) event; | |
| 68 (void) id; | |
| 69 #endif | |
| 70 } | |
| 71 | |
| 72 // for simulator statistic | |
| 73 inline void clear_statistic(){ | |
| 74 #ifdef ENABLE_SIMULATOR | |
| 75 prof_clear(); | |
| 76 #endif | |
| 77 } | |
| 78 | |
| 79 inline void start_statistic(){ | |
| 80 #ifdef ENABLE_SIMULATOR | |
| 81 prof_start(); | |
| 82 #endif | |
| 83 } | |
| 84 | |
| 85 inline void stop_statistic(){ | |
| 86 #ifdef ENABLE_SIMULATOR | |
| 87 prof_stop(); | |
| 88 #endif | |
| 89 } | |
| 90 | |
| 91 H264Context_spu h_context; // struct that contain all the params to decode a macroblock | |
| 92 | |
| 93 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending | |
| 94 //mb position of neighbouring spes | |
| 95 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1 | |
| 96 //DECLARE_ALIGNED_16(spe_pos, tgt_spe); //written by SPE_ID +1 | |
| 97 | |
| 98 /** | |
| 99 * Initializes the buffering of the mb data and associated mc data. The init_mb_buffer needs to | |
| 100 * be called before any get_next_mb and only once at the beginning of the slice. | |
| 101 * | |
| 102 * Note: init_mc_buffer and get_next_mb expect the width of the picture to be more than 2 mb's | |
| 103 */ | |
| 104 #define TAG_OFFSET_MB MBD_buf1 | |
| 105 #define TAG_OFFSET_MC MBD_mc_buf1 | |
| 106 static void init_mb_buffer(H264Context_spu* h){ | |
| 107 H264slice *s = h->s; | |
| 108 H264Mb *next_mb; | |
| 109 int mb_height = s->mb_height; | |
| 110 int mb_width = s->mb_width; | |
| 111 | |
| 112 h->mc_idx =0; | |
| 113 | |
| 114 h->mb_dec = 0; | |
| 115 h->mb_mc = 0; | |
| 116 h->mb_dma = 0; | |
| 117 | |
| 118 h->curr_line %= mb_height; | |
| 119 h->next_mb_idx = h->curr_line * mb_width; | |
| 120 h->mb_id = h->curr_line * mb_width; | |
| 121 h->n_mc= h->curr_line * mb_width; | |
| 122 | |
| 123 next_mb = s->blocks + h->mb_id; | |
| 124 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB); | |
| 125 h->mb_dma++; | |
| 126 h->mb_id++; | |
| 127 | |
| 128 next_mb = s->blocks + h->mb_id; | |
| 129 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB); | |
| 130 h->mb_dma++; | |
| 131 h->mb_id++; | |
| 132 wait_dma_id(0 + TAG_OFFSET_MB); | |
| 133 | |
| 134 H264Mb *mb = &h->mb_buf[0]; | |
| 135 H264mc *mc = &h->mc_buf[0]; | |
| 136 if(!IS_INTRA(mb->mb_type)){ | |
| 137 calc_mc_params(mb, mc); | |
| 138 fill_ref_buf(h, mb, mc); | |
| 139 } | |
| 140 h->n_mc++; | |
| 141 h->mb_mc++; | |
| 142 } | |
| 143 | |
| 144 static void *get_next_mb(H264Context_spu *h){ | |
| 145 H264slice *s = h->s; | |
| 146 H264spe *spe = &h->spe; | |
| 147 H264Mb *mb_buf = h->mb_buf; | |
| 148 H264mc *mc_buf = h->mc_buf; | |
| 149 H264Mb *next_mb; | |
| 150 H264Mb *next_dma_mb; | |
| 151 | |
| 152 if (h->curr_line >= s->mb_height) | |
| 153 return NULL; | |
| 154 | |
| 155 if (h->mb_id < h->mb_total){ | |
| 156 next_dma_mb = s->blocks + h->mb_id; | |
| 157 spu_dma_get(&mb_buf[h->mb_dma], (unsigned) next_dma_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB); | |
| 158 h->mb_dma = (h->mb_dma+1)%3; | |
| 159 h->mb_id++; | |
| 160 if (h->mb_id%s->mb_width ==0){ | |
| 161 h->mb_id+=(spe->spe_total-1)*s->mb_width; | |
| 162 } | |
| 163 } | |
| 164 | |
| 165 h->mc = &mc_buf[h->mc_idx]; | |
| 166 wait_dma_id(h->mc_idx + TAG_OFFSET_MC); | |
| 167 h->mc_idx = (h->mc_idx+1)%2; | |
| 168 if (h->n_mc < h->mb_total){ | |
| 169 wait_dma_id(h->mb_mc + TAG_OFFSET_MB); | |
| 170 H264Mb *mb = &mb_buf[h->mb_mc]; | |
| 171 H264mc *mc = &mc_buf[h->mc_idx]; | |
| 172 if(!IS_INTRA(mb->mb_type)){ | |
| 173 calc_mc_params(mb, mc); | |
| 174 fill_ref_buf(h, mb, mc); | |
| 175 } | |
| 176 h->n_mc++; | |
| 177 if (h->n_mc%s->mb_width ==0){ | |
| 178 h->n_mc+=(spe->spe_total-1)*s->mb_width; | |
| 179 } | |
| 180 } | |
| 181 h->next_mb_idx++; | |
| 182 if (h->next_mb_idx % s->mb_width ==0){ | |
| 183 h->next_mb_idx+=(spe->spe_total-1)*s->mb_width; | |
| 184 h->curr_line+=spe->spe_total; | |
| 185 } | |
| 186 | |
| 187 h->mb_mc = (h->mb_mc+1)%3; | |
| 188 next_mb = &mb_buf[h->mb_dec]; | |
| 189 h->mb_dec = (h->mb_dec+1)%3; | |
| 190 return next_mb; | |
| 191 } | |
| 192 | |
| 193 static void *get_next_mb_blocking(H264Context_spu *h){ | |
| 194 H264slice *s = h->s; | |
| 195 H264spe *spe = &h->spe; | |
| 196 H264Mb *mb_buf = h->mb_buf; | |
| 197 H264mc *mc_buf = h->mc_buf; | |
| 198 H264Mb *next_mb; | |
| 199 H264Mb *next_dma_mb; | |
| 200 | |
| 201 if (h->mb_id >= h->mb_total) | |
| 202 return NULL; | |
| 203 | |
| 204 //printf("%d\n", h->mb_id); | |
| 205 next_dma_mb = s->blocks + h->mb_id; | |
| 206 spu_dma_get(&mb_buf[0], (unsigned) next_dma_mb, sizeof(H264Mb), MBD_buf1); | |
| 207 //h->mb_dma = (h->mb_dma+1)%3; | |
| 208 h->mb_id++; | |
| 209 if (h->mb_id%s->mb_width ==0){ | |
| 210 h->mb_id+=(spe->spe_total-1)*s->mb_width; | |
| 211 } | |
| 212 wait_dma_id(MBD_buf1); | |
| 213 | |
| 214 h->mc = &mc_buf[0]; | |
| 215 //h->mc_idx = (h->mc_idx+1)%2; | |
| 216 //if (h->n_mc < h->mb_total){ | |
| 217 H264Mb *mb = &mb_buf[0]; | |
| 218 H264mc *mc = &mc_buf[0]; | |
| 219 if(!IS_INTRA(mb->mb_type)){ | |
| 220 calc_mc_params(mb, mc); | |
| 221 fill_ref_buf(h, mb, mc); | |
| 222 } | |
| 223 //h->n_mc++; | |
| 224 /*if (h->n_mc%s->mb_width ==0){ | |
| 225 h->n_mc+=(spe->spe_total-1)*s->mb_width; | |
| 226 }*/ | |
| 227 // wait_dma_id(MBD_mc_buf1); | |
| 228 | |
| 229 // h->next_mb_idx++; | |
| 230 // if (h->next_mb_idx % s->mb_width ==0){ | |
| 231 // h->next_mb_idx+=(spe->spe_total-1)*s->mb_width; | |
| 232 // h->curr_line+=spe->spe_total; | |
| 233 // } | |
| 234 | |
| 235 // h->mb_mc = (h->mb_mc+1)%3; | |
| 236 next_mb = &mb_buf[0]; | |
| 237 // h->mb_dec = (h->mb_dec+1)%3; | |
| 238 return next_mb; | |
| 239 } | |
| 240 | |
| 241 | |
| 242 #undef TAG_OFFSET_MB | |
| 243 #undef TAG_OFFSET_MC | |
| 244 static inline int dep_resolved(H264Context_spu *h){ | |
| 245 H264slice *s = h->s; | |
| 246 int spe_id = h->spe.spe_id; | |
| 247 volatile int mb_proc_dep = src_spe.count; | |
| 248 if (spe_id==0) | |
| 249 return (h->mb_proc < mb_proc_dep-1 +s->mb_width)? 1:0; | |
| 250 else | |
| 251 return (h->mb_proc < mb_proc_dep-1)? 1:0; | |
| 252 } | |
| 253 | |
| 254 void update_tgt_spe_dep(H264Context_spu *h, int end){ | |
| 255 H264Mb *mb = h->mb; | |
| 256 H264slice *s = h->s; | |
| 257 H264spe *spe = &h->spe; | |
| 258 int mb_x = mb->mb_x; | |
| 259 | |
| 260 if (end || (mb_x%2==0 && mb_x!=0) || mb_x==s->mb_width-1){ | |
| 261 spe_pos* dma_spe = &dma_temp; | |
| 262 spe_pos* tgt_spe = (spe_pos*) ((unsigned) spe->tgt_spe + (unsigned) &src_spe); //located in target spe local store | |
| 263 dma_spe->count = end? h->mb_proc+1: h->mb_proc; | |
| 264 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), MBD_put); | |
| 265 } | |
| 266 h->mb_proc++; | |
| 267 } | |
| 268 | |
| 269 | |
| 270 int main(unsigned long long id, unsigned long long argp) | |
| 271 { | |
| 272 (void) id; | |
| 273 H264Context_spu* h = &h_context; | |
| 274 H264spe *spe_params = (H264spe *) (unsigned) argp; | |
| 275 | |
| 276 spu_dma_get(&h->spe, (unsigned) spe_params, sizeof(H264spe), MBD_slice); //ID_slice is used out of convienience | |
| 277 wait_dma_id(MBD_slice); | |
| 278 | |
| 279 //clear_statistic(); | |
| 280 dsputil_h264_init_cell(&h->dsp); | |
| 281 ff_cropTbl_init(); | |
| 282 init_pred_ptrs(&h->hpc); | |
| 283 | |
| 284 //send slice_buf to ppe | |
| 285 spu_write_out_mbox((unsigned) h->slice_buf); | |
| 286 h->sl_idx=0; | |
| 287 // initialize tracing with paraver | |
| 288 //trace_init_SPU(); | |
| 289 h->frames =0; | |
| 290 src_spe.count =0; | |
| 291 h->mb_proc = 0; | |
| 292 | |
| 293 h->mb_id=0; | |
| 294 h->mc_idx=0; | |
| 295 h->mb_dec=0; | |
| 296 h->mb_mc=0; | |
| 297 h->mb_dma=0; | |
| 298 h->next_mb_idx=0; | |
| 299 | |
| 300 h->blocking=0; | |
| 301 | |
| 302 | |
| 303 H264spe* p = &h->spe; | |
| 304 h->curr_line =p->spe_id; | |
| 305 h->mb_total = p->mb_height*p->mb_width; | |
| 306 int stride_y = 32; | |
| 307 int stride_c = 16; | |
| 308 //init block_offset array | |
| 309 init_block_offset(stride_y, stride_c); | |
| 310 for(;;){ | |
| 311 spu_read_in_mbox(); | |
| 312 | |
| 313 h->s = &h->slice_buf[h->sl_idx]; | |
| 314 h->sl_idx++; h->sl_idx%=2; | |
| 315 | |
| 316 if (h->s->state< 0){ | |
| 317 break; | |
| 318 } | |
| 319 | |
| 320 { | |
| 321 if(!h->blocking){ | |
| 322 init_mb_buffer(h); | |
| 323 while((h->mb=(H264Mb *)get_next_mb(h))){ | |
| 324 while(!dep_resolved(h)); | |
| 325 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id); | |
| 326 hl_decode_mb_internal(h, stride_y, stride_c); | |
| 327 } | |
| 328 update_tgt_spe_dep(h, 1); | |
| 329 }else{ | |
| 330 h->mb_id=0; | |
| 331 while((h->mb=(H264Mb *)get_next_mb_blocking(h))){ | |
| 332 while(!dep_resolved(h)); | |
| 333 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id); | |
| 334 hl_decode_mb_internal(h, stride_y, stride_c); | |
| 335 } | |
| 336 update_tgt_spe_dep(h, 1); | |
| 337 } | |
| 338 | |
| 339 } | |
| 340 | |
| 341 h->frames++; | |
| 342 | |
| 343 if (p->spe_id == ((h->frames*p->mb_height -1)%p->spe_total)){ | |
| 344 //printf("spe %d, %d\n", atomic_read(p->rl_cnt), h->frames); | |
| 345 //MBSlice is copied beforehand. | |
| 346 //only inc cnt. | |
| 347 atomic_inc(p->rl_cnt); | |
| 348 } | |
| 349 { | |
| 350 atomic_dec(p->cnt); | |
| 351 } | |
| 352 } | |
| 353 | |
| 354 return 0; | |
| 355 } | |
| 356 |
