| rev |
line source |
|
nengel@2
|
1 /*
|
|
nengel@2
|
2 * Copyright (c) 2009 TUDelft
|
|
nengel@2
|
3 *
|
|
nengel@2
|
4 * Cell Parallel SPU - 2DWave Macroblock Decoding.
|
|
nengel@2
|
5 */
|
|
nengel@2
|
6
|
|
nengel@2
|
7 /**
|
|
nengel@2
|
8 * @file libavcodec/cell/spu/h264_main_spu.c
|
|
nengel@2
|
9 * Cell Parallel SPU - 2DWave Macroblock Decoding
|
|
nengel@2
|
10 * @author C C Chi <c.c.chi@student.tudelft.nl>
|
|
nengel@2
|
11 *
|
|
nengel@2
|
12 * SIMD kernels
|
|
nengel@2
|
13 * H.264/AVC motion compensation
|
|
nengel@2
|
14 * @author Mauricio Alvarez <alvarez@ac.upc.edu>
|
|
nengel@2
|
15 * @author Albert Paradis <apar7632@hotmail.com>
|
|
nengel@2
|
16 */
|
|
nengel@2
|
17
|
|
nengel@2
|
18
|
|
nengel@2
|
19 /* Enable this lines to enable simulator statistic or generate traces */
|
|
nengel@2
|
20
|
|
nengel@2
|
21 //#define ENABLE_SIMULATOR
|
|
nengel@2
|
22 //#define ENABLE_PARAVER_TRACING_CELL
|
|
nengel@2
|
23
|
|
nengel@2
|
24 #ifdef ENABLE_SIMULATOR
|
|
nengel@2
|
25 #include "/opt/ibm/systemsim-cell/include/callthru/spu/profile.h"
|
|
nengel@2
|
26 #endif
|
|
nengel@2
|
27
|
|
nengel@2
|
28 #ifdef ENABLE_TRACES
|
|
nengel@2
|
29 #include "spu_trace.h"
|
|
nengel@2
|
30 #endif
|
|
nengel@2
|
31 #include <unistd.h>
|
|
nengel@2
|
32 #include <stdio.h>
|
|
nengel@2
|
33 #include <spu_intrinsics.h>
|
|
nengel@2
|
34 #include <spu_mfcio.h>
|
|
nengel@2
|
35 #include <libsync.h>
|
|
nengel@2
|
36 #include <sys/time.h>
|
|
nengel@2
|
37 #include <assert.h>
|
|
nengel@2
|
38
|
|
nengel@2
|
39 //#include "dsputil_cell.h"
|
|
nengel@2
|
40 #include "types_spu.h"
|
|
nengel@2
|
41 #include "h264_intra_spu.h"
|
|
nengel@2
|
42 #include "h264_decode_mb_spu.h"
|
|
nengel@2
|
43 #include "h264_mc_spu.h"
|
|
nengel@2
|
44 #include "h264_tables.h"
|
|
nengel@2
|
45 #include "h264_dma.h"
|
|
nengel@2
|
46
|
|
nengel@2
|
47
|
|
nengel@2
|
48 /** functions for supporting tracing with paraver for the SPU
|
|
nengel@2
|
49 *
|
|
nengel@2
|
50 */
|
|
nengel@2
|
51 inline void trace_init_SPU(){
|
|
nengel@2
|
52 #ifdef ENABLE_PARAVER_TRACING_CELL
|
|
nengel@2
|
53 SPUtrace_init ();
|
|
nengel@2
|
54 #endif
|
|
nengel@2
|
55 }
|
|
nengel@2
|
56
|
|
nengel@2
|
57 inline void trace_fini_SPU(){
|
|
nengel@2
|
58 #ifdef ENABLE_PARAVER_TRACING_CELL
|
|
nengel@2
|
59 SPUtrace_fini ();
|
|
nengel@2
|
60 #endif
|
|
nengel@2
|
61 }
|
|
nengel@2
|
62
|
|
nengel@2
|
63 inline void trace_event_SPU(int event, int id){
|
|
nengel@2
|
64 #ifdef ENABLE_PARAVER_TRACING_CELL
|
|
nengel@2
|
65 SPUtrace_event (event, id);
|
|
nengel@2
|
66 #else
|
|
nengel@2
|
67 (void) event;
|
|
nengel@2
|
68 (void) id;
|
|
nengel@2
|
69 #endif
|
|
nengel@2
|
70 }
|
|
nengel@2
|
71
|
|
nengel@2
|
72 // for simulator statistic
|
|
nengel@2
|
73 inline void clear_statistic(){
|
|
nengel@2
|
74 #ifdef ENABLE_SIMULATOR
|
|
nengel@2
|
75 prof_clear();
|
|
nengel@2
|
76 #endif
|
|
nengel@2
|
77 }
|
|
nengel@2
|
78
|
|
nengel@2
|
79 inline void start_statistic(){
|
|
nengel@2
|
80 #ifdef ENABLE_SIMULATOR
|
|
nengel@2
|
81 prof_start();
|
|
nengel@2
|
82 #endif
|
|
nengel@2
|
83 }
|
|
nengel@2
|
84
|
|
nengel@2
|
85 inline void stop_statistic(){
|
|
nengel@2
|
86 #ifdef ENABLE_SIMULATOR
|
|
nengel@2
|
87 prof_stop();
|
|
nengel@2
|
88 #endif
|
|
nengel@2
|
89 }
|
|
nengel@2
|
90
|
|
nengel@2
|
91 H264Context_spu h_context; // struct that contain all the params to decode a macroblock
|
|
nengel@2
|
92
|
|
nengel@2
|
93 DECLARE_ALIGNED_16(spe_pos, dma_temp); //dma temp for sending
|
|
nengel@2
|
94 //mb position of neighbouring spes
|
|
nengel@2
|
95 DECLARE_ALIGNED_16(volatile spe_pos, src_spe); //written by SPE_ID -1
|
|
nengel@2
|
96 //DECLARE_ALIGNED_16(spe_pos, tgt_spe); //written by SPE_ID +1
|
|
nengel@2
|
97
|
|
nengel@2
|
98 /**
|
|
nengel@2
|
99 * Initializes the buffering of the mb data and associated mc data. The init_mb_buffer needs to
|
|
nengel@2
|
100 * be called before any get_next_mb and only once at the beginning of the slice.
|
|
nengel@2
|
101 *
|
|
nengel@2
|
102 * Note: init_mc_buffer and get_next_mb expect the width of the picture to be more than 2 mb's
|
|
nengel@2
|
103 */
|
|
nengel@2
|
104 #define TAG_OFFSET_MB MBD_buf1
|
|
nengel@2
|
105 #define TAG_OFFSET_MC MBD_mc_buf1
|
|
nengel@2
|
106 static void init_mb_buffer(H264Context_spu* h){
|
|
nengel@2
|
107 H264slice *s = h->s;
|
|
nengel@2
|
108 H264Mb *next_mb;
|
|
nengel@2
|
109 int mb_height = s->mb_height;
|
|
nengel@2
|
110 int mb_width = s->mb_width;
|
|
nengel@2
|
111
|
|
nengel@2
|
112 h->mc_idx =0;
|
|
nengel@2
|
113
|
|
nengel@2
|
114 h->mb_dec = 0;
|
|
nengel@2
|
115 h->mb_mc = 0;
|
|
nengel@2
|
116 h->mb_dma = 0;
|
|
nengel@2
|
117
|
|
nengel@2
|
118 h->curr_line %= mb_height;
|
|
nengel@2
|
119 h->next_mb_idx = h->curr_line * mb_width;
|
|
nengel@2
|
120 h->mb_id = h->curr_line * mb_width;
|
|
nengel@2
|
121 h->n_mc= h->curr_line * mb_width;
|
|
nengel@2
|
122
|
|
nengel@2
|
123 next_mb = s->blocks + h->mb_id;
|
|
nengel@2
|
124 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
|
|
nengel@2
|
125 h->mb_dma++;
|
|
nengel@2
|
126 h->mb_id++;
|
|
nengel@2
|
127
|
|
nengel@2
|
128 next_mb = s->blocks + h->mb_id;
|
|
nengel@2
|
129 spu_dma_get(&h->mb_buf[h->mb_dma], (unsigned) next_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
|
|
nengel@2
|
130 h->mb_dma++;
|
|
nengel@2
|
131 h->mb_id++;
|
|
nengel@2
|
132 wait_dma_id(0 + TAG_OFFSET_MB);
|
|
nengel@2
|
133
|
|
nengel@2
|
134 H264Mb *mb = &h->mb_buf[0];
|
|
nengel@2
|
135 H264mc *mc = &h->mc_buf[0];
|
|
nengel@2
|
136 if(!IS_INTRA(mb->mb_type)){
|
|
nengel@2
|
137 calc_mc_params(mb, mc);
|
|
nengel@2
|
138 fill_ref_buf(h, mb, mc);
|
|
nengel@2
|
139 }
|
|
nengel@2
|
140 h->n_mc++;
|
|
nengel@2
|
141 h->mb_mc++;
|
|
nengel@2
|
142 }
|
|
nengel@2
|
143
|
|
nengel@2
|
144 static void *get_next_mb(H264Context_spu *h){
|
|
nengel@2
|
145 H264slice *s = h->s;
|
|
nengel@2
|
146 H264spe *spe = &h->spe;
|
|
nengel@2
|
147 H264Mb *mb_buf = h->mb_buf;
|
|
nengel@2
|
148 H264mc *mc_buf = h->mc_buf;
|
|
nengel@2
|
149 H264Mb *next_mb;
|
|
nengel@2
|
150 H264Mb *next_dma_mb;
|
|
nengel@2
|
151
|
|
nengel@2
|
152 if (h->curr_line >= s->mb_height)
|
|
nengel@2
|
153 return NULL;
|
|
nengel@2
|
154
|
|
nengel@2
|
155 if (h->mb_id < h->mb_total){
|
|
nengel@2
|
156 next_dma_mb = s->blocks + h->mb_id;
|
|
nengel@2
|
157 spu_dma_get(&mb_buf[h->mb_dma], (unsigned) next_dma_mb, sizeof(H264Mb), h->mb_dma + TAG_OFFSET_MB);
|
|
nengel@2
|
158 h->mb_dma = (h->mb_dma+1)%3;
|
|
nengel@2
|
159 h->mb_id++;
|
|
nengel@2
|
160 if (h->mb_id%s->mb_width ==0){
|
|
nengel@2
|
161 h->mb_id+=(spe->spe_total-1)*s->mb_width;
|
|
nengel@2
|
162 }
|
|
nengel@2
|
163 }
|
|
nengel@2
|
164
|
|
nengel@2
|
165 h->mc = &mc_buf[h->mc_idx];
|
|
nengel@2
|
166 wait_dma_id(h->mc_idx + TAG_OFFSET_MC);
|
|
nengel@2
|
167 h->mc_idx = (h->mc_idx+1)%2;
|
|
nengel@2
|
168 if (h->n_mc < h->mb_total){
|
|
nengel@2
|
169 wait_dma_id(h->mb_mc + TAG_OFFSET_MB);
|
|
nengel@2
|
170 H264Mb *mb = &mb_buf[h->mb_mc];
|
|
nengel@2
|
171 H264mc *mc = &mc_buf[h->mc_idx];
|
|
nengel@2
|
172 if(!IS_INTRA(mb->mb_type)){
|
|
nengel@2
|
173 calc_mc_params(mb, mc);
|
|
nengel@2
|
174 fill_ref_buf(h, mb, mc);
|
|
nengel@2
|
175 }
|
|
nengel@2
|
176 h->n_mc++;
|
|
nengel@2
|
177 if (h->n_mc%s->mb_width ==0){
|
|
nengel@2
|
178 h->n_mc+=(spe->spe_total-1)*s->mb_width;
|
|
nengel@2
|
179 }
|
|
nengel@2
|
180 }
|
|
nengel@2
|
181 h->next_mb_idx++;
|
|
nengel@2
|
182 if (h->next_mb_idx % s->mb_width ==0){
|
|
nengel@2
|
183 h->next_mb_idx+=(spe->spe_total-1)*s->mb_width;
|
|
nengel@2
|
184 h->curr_line+=spe->spe_total;
|
|
nengel@2
|
185 }
|
|
nengel@2
|
186
|
|
nengel@2
|
187 h->mb_mc = (h->mb_mc+1)%3;
|
|
nengel@2
|
188 next_mb = &mb_buf[h->mb_dec];
|
|
nengel@2
|
189 h->mb_dec = (h->mb_dec+1)%3;
|
|
nengel@2
|
190 return next_mb;
|
|
nengel@2
|
191 }
|
|
nengel@2
|
192
|
|
nengel@2
|
193 static void *get_next_mb_blocking(H264Context_spu *h){
|
|
nengel@2
|
194 H264slice *s = h->s;
|
|
nengel@2
|
195 H264spe *spe = &h->spe;
|
|
nengel@2
|
196 H264Mb *mb_buf = h->mb_buf;
|
|
nengel@2
|
197 H264mc *mc_buf = h->mc_buf;
|
|
nengel@2
|
198 H264Mb *next_mb;
|
|
nengel@2
|
199 H264Mb *next_dma_mb;
|
|
nengel@2
|
200
|
|
nengel@2
|
201 if (h->mb_id >= h->mb_total)
|
|
nengel@2
|
202 return NULL;
|
|
nengel@2
|
203
|
|
nengel@2
|
204 //printf("%d\n", h->mb_id);
|
|
nengel@2
|
205 next_dma_mb = s->blocks + h->mb_id;
|
|
nengel@2
|
206 spu_dma_get(&mb_buf[0], (unsigned) next_dma_mb, sizeof(H264Mb), MBD_buf1);
|
|
nengel@2
|
207 //h->mb_dma = (h->mb_dma+1)%3;
|
|
nengel@2
|
208 h->mb_id++;
|
|
nengel@2
|
209 if (h->mb_id%s->mb_width ==0){
|
|
nengel@2
|
210 h->mb_id+=(spe->spe_total-1)*s->mb_width;
|
|
nengel@2
|
211 }
|
|
nengel@2
|
212 wait_dma_id(MBD_buf1);
|
|
nengel@2
|
213
|
|
nengel@2
|
214 h->mc = &mc_buf[0];
|
|
nengel@2
|
215 //h->mc_idx = (h->mc_idx+1)%2;
|
|
nengel@2
|
216 //if (h->n_mc < h->mb_total){
|
|
nengel@2
|
217 H264Mb *mb = &mb_buf[0];
|
|
nengel@2
|
218 H264mc *mc = &mc_buf[0];
|
|
nengel@2
|
219 if(!IS_INTRA(mb->mb_type)){
|
|
nengel@2
|
220 calc_mc_params(mb, mc);
|
|
nengel@2
|
221 fill_ref_buf(h, mb, mc);
|
|
nengel@2
|
222 }
|
|
nengel@2
|
223 //h->n_mc++;
|
|
nengel@2
|
224 /*if (h->n_mc%s->mb_width ==0){
|
|
nengel@2
|
225 h->n_mc+=(spe->spe_total-1)*s->mb_width;
|
|
nengel@2
|
226 }*/
|
|
nengel@2
|
227 // wait_dma_id(MBD_mc_buf1);
|
|
nengel@2
|
228
|
|
nengel@2
|
229 // h->next_mb_idx++;
|
|
nengel@2
|
230 // if (h->next_mb_idx % s->mb_width ==0){
|
|
nengel@2
|
231 // h->next_mb_idx+=(spe->spe_total-1)*s->mb_width;
|
|
nengel@2
|
232 // h->curr_line+=spe->spe_total;
|
|
nengel@2
|
233 // }
|
|
nengel@2
|
234
|
|
nengel@2
|
235 // h->mb_mc = (h->mb_mc+1)%3;
|
|
nengel@2
|
236 next_mb = &mb_buf[0];
|
|
nengel@2
|
237 // h->mb_dec = (h->mb_dec+1)%3;
|
|
nengel@2
|
238 return next_mb;
|
|
nengel@2
|
239 }
|
|
nengel@2
|
240
|
|
nengel@2
|
241
|
|
nengel@2
|
242 #undef TAG_OFFSET_MB
|
|
nengel@2
|
243 #undef TAG_OFFSET_MC
|
|
nengel@2
|
244 static inline int dep_resolved(H264Context_spu *h){
|
|
nengel@2
|
245 H264slice *s = h->s;
|
|
nengel@2
|
246 int spe_id = h->spe.spe_id;
|
|
nengel@2
|
247 volatile int mb_proc_dep = src_spe.count;
|
|
nengel@2
|
248 if (spe_id==0)
|
|
nengel@2
|
249 return (h->mb_proc < mb_proc_dep-1 +s->mb_width)? 1:0;
|
|
nengel@2
|
250 else
|
|
nengel@2
|
251 return (h->mb_proc < mb_proc_dep-1)? 1:0;
|
|
nengel@2
|
252 }
|
|
nengel@2
|
253
|
|
nengel@2
|
254 void update_tgt_spe_dep(H264Context_spu *h, int end){
|
|
nengel@2
|
255 H264Mb *mb = h->mb;
|
|
nengel@2
|
256 H264slice *s = h->s;
|
|
nengel@2
|
257 H264spe *spe = &h->spe;
|
|
nengel@2
|
258 int mb_x = mb->mb_x;
|
|
nengel@2
|
259
|
|
nengel@2
|
260 if (end || (mb_x%2==0 && mb_x!=0) || mb_x==s->mb_width-1){
|
|
nengel@2
|
261 spe_pos* dma_spe = &dma_temp;
|
|
nengel@2
|
262 spe_pos* tgt_spe = (spe_pos*) ((unsigned) spe->tgt_spe + (unsigned) &src_spe); //located in target spe local store
|
|
nengel@2
|
263 dma_spe->count = end? h->mb_proc+1: h->mb_proc;
|
|
nengel@2
|
264 spu_dma_barrier_put(dma_spe, (unsigned) tgt_spe, sizeof(dma_temp), MBD_put);
|
|
nengel@2
|
265 }
|
|
nengel@2
|
266 h->mb_proc++;
|
|
nengel@2
|
267 }
|
|
nengel@2
|
268
|
|
nengel@2
|
269
|
|
nengel@2
|
270 int main(unsigned long long id, unsigned long long argp)
|
|
nengel@2
|
271 {
|
|
nengel@2
|
272 (void) id;
|
|
nengel@2
|
273 H264Context_spu* h = &h_context;
|
|
nengel@2
|
274 H264spe *spe_params = (H264spe *) (unsigned) argp;
|
|
nengel@2
|
275
|
|
nengel@2
|
276 spu_dma_get(&h->spe, (unsigned) spe_params, sizeof(H264spe), MBD_slice); //ID_slice is used out of convienience
|
|
nengel@2
|
277 wait_dma_id(MBD_slice);
|
|
nengel@2
|
278
|
|
nengel@2
|
279 //clear_statistic();
|
|
nengel@2
|
280 dsputil_h264_init_cell(&h->dsp);
|
|
nengel@2
|
281 ff_cropTbl_init();
|
|
nengel@2
|
282 init_pred_ptrs(&h->hpc);
|
|
nengel@2
|
283
|
|
nengel@2
|
284 //send slice_buf to ppe
|
|
nengel@2
|
285 spu_write_out_mbox((unsigned) h->slice_buf);
|
|
nengel@2
|
286 h->sl_idx=0;
|
|
nengel@2
|
287 // initialize tracing with paraver
|
|
nengel@2
|
288 //trace_init_SPU();
|
|
nengel@2
|
289 h->frames =0;
|
|
nengel@2
|
290 src_spe.count =0;
|
|
nengel@2
|
291 h->mb_proc = 0;
|
|
nengel@2
|
292
|
|
nengel@2
|
293 h->mb_id=0;
|
|
nengel@2
|
294 h->mc_idx=0;
|
|
nengel@2
|
295 h->mb_dec=0;
|
|
nengel@2
|
296 h->mb_mc=0;
|
|
nengel@2
|
297 h->mb_dma=0;
|
|
nengel@2
|
298 h->next_mb_idx=0;
|
|
nengel@2
|
299
|
|
nengel@2
|
300 h->blocking=0;
|
|
nengel@2
|
301
|
|
nengel@2
|
302
|
|
nengel@2
|
303 H264spe* p = &h->spe;
|
|
nengel@2
|
304 h->curr_line =p->spe_id;
|
|
nengel@2
|
305 h->mb_total = p->mb_height*p->mb_width;
|
|
nengel@2
|
306 int stride_y = 32;
|
|
nengel@2
|
307 int stride_c = 16;
|
|
nengel@2
|
308 //init block_offset array
|
|
nengel@2
|
309 init_block_offset(stride_y, stride_c);
|
|
nengel@2
|
310 for(;;){
|
|
nengel@2
|
311 spu_read_in_mbox();
|
|
nengel@2
|
312
|
|
nengel@2
|
313 h->s = &h->slice_buf[h->sl_idx];
|
|
nengel@2
|
314 h->sl_idx++; h->sl_idx%=2;
|
|
nengel@2
|
315
|
|
nengel@2
|
316 if (h->s->state< 0){
|
|
nengel@2
|
317 break;
|
|
nengel@2
|
318 }
|
|
nengel@2
|
319
|
|
nengel@2
|
320 {
|
|
nengel@2
|
321 if(!h->blocking){
|
|
nengel@2
|
322 init_mb_buffer(h);
|
|
nengel@2
|
323 while((h->mb=(H264Mb *)get_next_mb(h))){
|
|
nengel@2
|
324 while(!dep_resolved(h));
|
|
nengel@2
|
325 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id);
|
|
nengel@2
|
326 hl_decode_mb_internal(h, stride_y, stride_c);
|
|
nengel@2
|
327 }
|
|
nengel@2
|
328 update_tgt_spe_dep(h, 1);
|
|
nengel@2
|
329 }else{
|
|
nengel@2
|
330 h->mb_id=0;
|
|
nengel@2
|
331 while((h->mb=(H264Mb *)get_next_mb_blocking(h))){
|
|
nengel@2
|
332 while(!dep_resolved(h));
|
|
nengel@2
|
333 //printf("frame %d mbx %d\t mby %d id %d\n", h->frames, h->mb->mb_x, h->mb->mb_y, p- >spe_id);
|
|
nengel@2
|
334 hl_decode_mb_internal(h, stride_y, stride_c);
|
|
nengel@2
|
335 }
|
|
nengel@2
|
336 update_tgt_spe_dep(h, 1);
|
|
nengel@2
|
337 }
|
|
nengel@2
|
338
|
|
nengel@2
|
339 }
|
|
nengel@2
|
340
|
|
nengel@2
|
341 h->frames++;
|
|
nengel@2
|
342
|
|
nengel@2
|
343 if (p->spe_id == ((h->frames*p->mb_height -1)%p->spe_total)){
|
|
nengel@2
|
344 //printf("spe %d, %d\n", atomic_read(p->rl_cnt), h->frames);
|
|
nengel@2
|
345 //MBSlice is copied beforehand.
|
|
nengel@2
|
346 //only inc cnt.
|
|
nengel@2
|
347 atomic_inc(p->rl_cnt);
|
|
nengel@2
|
348 }
|
|
nengel@2
|
349 {
|
|
nengel@2
|
350 atomic_dec(p->cnt);
|
|
nengel@2
|
351 }
|
|
nengel@2
|
352 }
|
|
nengel@2
|
353
|
|
nengel@2
|
354 return 0;
|
|
nengel@2
|
355 }
|
|
nengel@2
|
356
|