| rev |
line source |
|
nengel@2
|
1
|
|
nengel@2
|
2 #include "h264_types.h"
|
|
nengel@2
|
3 #include "h264_parser.h"
|
|
nengel@2
|
4 #include "h264_nal.h"
|
|
nengel@2
|
5 #include "h264_entropy.h"
|
|
nengel@2
|
6 #include "h264_rec.h"
|
|
nengel@2
|
7 #include "h264_misc.h"
|
|
nengel@2
|
8 #include "cell/h264_types_spu.h"
|
|
nengel@2
|
9 #include "h264_pthread.h"
|
|
nengel@2
|
10
|
|
nengel@2
|
11 #include <pthread.h>
|
|
nengel@2
|
12 #include <assert.h>
|
|
nengel@2
|
13 #include <unistd.h>
|
|
nengel@2
|
14
|
|
nengel@2
|
15 #include <libspe2.h>
|
|
nengel@2
|
16 #include <ppu_intrinsics.h>
|
|
nengel@2
|
17 #include <cbe_mfc.h>
|
|
nengel@2
|
18 #include <libsync.h>
|
|
nengel@2
|
19
|
|
nengel@2
|
20 // spe global variables
|
|
nengel@2
|
21 unsigned rl_cnt_var, rl_mutex_var, rl_cond_var;
|
|
nengel@2
|
22 atomic_ea_t rl_cnt;
|
|
nengel@2
|
23 cond_ea_t rl_cond;
|
|
nengel@2
|
24 mutex_ea_t rl_lock;
|
|
nengel@2
|
25
|
|
nengel@2
|
26 H264spe * spe_params;
|
|
nengel@2
|
27 unsigned mutex_var[16];
|
|
nengel@2
|
28 unsigned cond_var[16];
|
|
nengel@2
|
29 unsigned atomic_var[16];
|
|
nengel@2
|
30
|
|
nengel@2
|
31 pthread_t * spe_tid;
|
|
nengel@2
|
32 spe_context_ptr_t *spe_context;
|
|
nengel@2
|
33 void** spe_control_area;
|
|
nengel@2
|
34 void** spe_ls_area;
|
|
nengel@2
|
35 H264slice **spe_slice_buf;
|
|
nengel@2
|
36
|
|
nengel@2
|
37 H264spe * spe_ed_params;
|
|
nengel@2
|
38 unsigned mutex_ed_var[16];
|
|
nengel@2
|
39 unsigned cond_ed_var[16];
|
|
nengel@2
|
40 unsigned atomic_ed_var[16];
|
|
nengel@2
|
41
|
|
nengel@2
|
42 pthread_t * spe_ed_tid;
|
|
nengel@2
|
43 spe_context_ptr_t *spe_ed_context;
|
|
nengel@2
|
44 void** spe_ed_control_area;
|
|
nengel@2
|
45 void** spe_ed_ls_area;
|
|
nengel@2
|
46 EDSlice_spu **spe_ed_slice_buf;
|
|
nengel@2
|
47
|
|
nengel@2
|
48 //structs to propagate stop signal
|
|
nengel@2
|
49 MBSlice last_slice;
|
|
nengel@2
|
50 EDSlice last_ed_slice;
|
|
nengel@2
|
51 DecodedPicture last_pic;
|
|
nengel@2
|
52 RawFrame last_frm;
|
|
nengel@2
|
53
|
|
nengel@2
|
54 static int direct_B_resolved(EDSlice *s, int *poc_list, int *poc_cnt){
|
|
nengel@2
|
55 int i;
|
|
nengel@2
|
56 int cnt = *poc_cnt;
|
|
nengel@2
|
57 for(i=0; i<cnt; i++){
|
|
nengel@2
|
58 if (poc_list[i]==s->ref_list[1][0]->poc){
|
|
nengel@2
|
59 *poc_cnt=i+1;
|
|
nengel@2
|
60 while(++i<cnt)
|
|
nengel@2
|
61 poc_list[i]=0;
|
|
nengel@2
|
62 return 1;
|
|
nengel@2
|
63 }
|
|
nengel@2
|
64 }
|
|
nengel@2
|
65 return 0;
|
|
nengel@2
|
66 }
|
|
nengel@2
|
67
|
|
nengel@2
|
68 static void update_IP_poc_list(int *poc_list, int *poc_cnt, int poc) {
|
|
nengel@2
|
69 int i=0;
|
|
nengel@2
|
70 int cnt = *poc_cnt;
|
|
nengel@2
|
71
|
|
nengel@2
|
72 while (poc_list[i] > poc) { i++;}
|
|
nengel@2
|
73 if ( i< cnt)
|
|
nengel@2
|
74 memmove(&poc_list[i+1], &poc_list[i], (cnt-i)*sizeof(int));
|
|
nengel@2
|
75
|
|
nengel@2
|
76 poc_list[i]=poc;
|
|
nengel@2
|
77 (*poc_cnt)++;
|
|
nengel@2
|
78 }
|
|
nengel@2
|
79
|
|
nengel@2
|
80 static void *spe_ed_thread(void *arg){
|
|
nengel@2
|
81 H264spe *params = (H264spe *)arg;
|
|
nengel@2
|
82 unsigned int idx = params->idx;
|
|
nengel@2
|
83 unsigned int runflags = 0;
|
|
nengel@2
|
84 unsigned int entry = SPE_DEFAULT_ENTRY;
|
|
nengel@2
|
85 // run SPE context
|
|
nengel@2
|
86 spe_context_run(spe_ed_context[idx], &entry, runflags, (void*) params, NULL, NULL);
|
|
nengel@2
|
87 // done - now exit thread
|
|
nengel@2
|
88 pthread_exit(NULL);
|
|
nengel@2
|
89 }
|
|
nengel@2
|
90
|
|
nengel@2
|
91 static void create_spe_ED_threads(H264Context *h, int ip_threads, int b_threads) {
|
|
nengel@2
|
92 int i;
|
|
nengel@2
|
93 int num_threads = ip_threads+b_threads;
|
|
nengel@2
|
94 spe_program_handle_t * spe_program = spe_image_open("spe_ed");
|
|
nengel@2
|
95 // reserve memory for spe thread id, context and argument addresses
|
|
nengel@2
|
96 spe_ed_tid = av_malloc(num_threads * sizeof (pthread_t));
|
|
nengel@2
|
97 spe_ed_context = av_malloc(num_threads * sizeof (spe_context_ptr_t));
|
|
nengel@2
|
98 spe_ed_params = av_malloc(num_threads * sizeof (H264spe));
|
|
nengel@2
|
99 spe_ed_control_area = av_malloc(num_threads * sizeof (void*));
|
|
nengel@2
|
100 spe_ed_ls_area = av_malloc(num_threads * sizeof (void*));
|
|
nengel@2
|
101 spe_ed_slice_buf = av_malloc(num_threads * sizeof (void*));
|
|
nengel@2
|
102
|
|
nengel@2
|
103 if (spe_program == NULL)
|
|
nengel@2
|
104 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
105
|
|
nengel@2
|
106 for (i = 0; i < num_threads; i++) {
|
|
nengel@2
|
107 // create context for spe program
|
|
nengel@2
|
108 spe_ed_context[i] = spe_context_create(SPE_MAP_PS, NULL);
|
|
nengel@2
|
109 if (spe_ed_context[i] == NULL)
|
|
nengel@2
|
110 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
111 // load SPE program into main memory
|
|
nengel@2
|
112 if ((spe_program_load(spe_ed_context[i], spe_program)) == -1)
|
|
nengel@2
|
113 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
114 //get the control_area for fast mailboxing
|
|
nengel@2
|
115 if ((spe_ed_control_area[i] = spe_ps_area_get(spe_ed_context[i], SPE_CONTROL_AREA)) == NULL)
|
|
nengel@2
|
116 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
117 //get ls area for inter spe communication
|
|
nengel@2
|
118 if ((spe_ed_ls_area[i] = spe_ls_area_get(spe_ed_context[i])) == NULL)
|
|
nengel@2
|
119 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
120 }
|
|
nengel@2
|
121
|
|
nengel@2
|
122 for (i = 0; i < ip_threads; i++) {
|
|
nengel@2
|
123 spe_ed_params[i].mb_width = h->mb_width;
|
|
nengel@2
|
124 spe_ed_params[i].mb_stride = h->mb_stride;
|
|
nengel@2
|
125 spe_ed_params[i].mb_height = h->mb_height;
|
|
nengel@2
|
126 spe_ed_params[i].type = EDIP;
|
|
nengel@2
|
127 spe_ed_params[i].spe_id = i;
|
|
nengel@2
|
128 spe_ed_params[i].idx = i;
|
|
nengel@2
|
129 //spe_ed_params[i].spe_total = ip_threads; //not used
|
|
nengel@2
|
130 //spe_params[i].slice_params= &slice_params;
|
|
nengel@2
|
131 spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads];
|
|
nengel@2
|
132 spe_ed_params[i].tgt_spe = spe_ed_ls_area[(i+1)%num_threads];
|
|
nengel@2
|
133
|
|
nengel@2
|
134 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i];
|
|
nengel@2
|
135 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i];
|
|
nengel@2
|
136 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0);
|
|
nengel@2
|
137
|
|
nengel@2
|
138 mutex_init(spe_ed_params[i].lock);
|
|
nengel@2
|
139 cond_init(spe_ed_params[i].cond);
|
|
nengel@2
|
140 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i]))
|
|
nengel@2
|
141 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
|
|
nengel@2
|
142
|
|
nengel@2
|
143 //slicebufaddr
|
|
nengel@2
|
144 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]);
|
|
nengel@2
|
145 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
|
|
nengel@2
|
146 }
|
|
nengel@2
|
147 for (int j = 0; j < b_threads; j++) {
|
|
nengel@2
|
148 i = j+ip_threads;
|
|
nengel@2
|
149 spe_ed_params[i].mb_width = h->mb_width;
|
|
nengel@2
|
150 spe_ed_params[i].mb_stride = h->mb_stride;
|
|
nengel@2
|
151 spe_ed_params[i].mb_height = h->mb_height;
|
|
nengel@2
|
152 spe_ed_params[i].type = EDB;
|
|
nengel@2
|
153 spe_ed_params[i].idx = i;
|
|
nengel@2
|
154 spe_ed_params[i].spe_id = j;
|
|
nengel@2
|
155 spe_ed_params[i].spe_total = b_threads;
|
|
nengel@2
|
156 //spe_params[i].slice_params= &slice_params;
|
|
nengel@2
|
157 //spe_ed_params[i].src_spe = spe_ed_ls_area[(i-1+num_threads)%num_threads];
|
|
nengel@2
|
158 spe_ed_params[i].tgt_spe = spe_ed_ls_area[((j+1)%b_threads) + ip_threads];
|
|
nengel@2
|
159
|
|
nengel@2
|
160 spe_ed_params[i].lock = (mutex_ea_t) (unsigned) &mutex_ed_var[i];
|
|
nengel@2
|
161 spe_ed_params[i].cond = (cond_ea_t) (unsigned) &cond_ed_var[i];
|
|
nengel@2
|
162 spe_ed_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_ed_var[i]; atomic_set(spe_ed_params[i].cnt, 0);
|
|
nengel@2
|
163
|
|
nengel@2
|
164 mutex_init(spe_ed_params[i].lock);
|
|
nengel@2
|
165 cond_init(spe_ed_params[i].cond);
|
|
nengel@2
|
166 if (pthread_create(&spe_ed_tid[i], NULL, spe_ed_thread, (void *) &spe_ed_params[i]))
|
|
nengel@2
|
167 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
|
|
nengel@2
|
168
|
|
nengel@2
|
169 //slicebufaddr
|
|
nengel@2
|
170 spe_ed_slice_buf[i] = (EDSlice_spu *) _spe_out_mbox_read(spe_ed_control_area[i]);
|
|
nengel@2
|
171 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
|
|
nengel@2
|
172 }
|
|
nengel@2
|
173 spe_image_close(spe_program);
|
|
nengel@2
|
174
|
|
nengel@2
|
175 }
|
|
nengel@2
|
176
|
|
nengel@2
|
177 static void fill_EDSlice_spu(EDSlice_spu *dst, EDSlice *src){
|
|
nengel@2
|
178 dst->pps = src->pps;
|
|
nengel@2
|
179 dst->mbs = src->mbs;
|
|
nengel@2
|
180 dst->state = src->state;
|
|
nengel@2
|
181 dst->qp_thresh = src->qp_thresh;
|
|
nengel@2
|
182 dst->pic = *src->current_picture;
|
|
nengel@2
|
183
|
|
nengel@2
|
184 dst->ref_count[0] = src->ref_count[0];
|
|
nengel@2
|
185 dst->ref_count[1] = src->ref_count[1];
|
|
nengel@2
|
186 dst->slice_type = src->slice_type;
|
|
nengel@2
|
187 dst->slice_type_nos = src->slice_type_nos;
|
|
nengel@2
|
188 dst->direct_8x8_inference_flag = src->direct_8x8_inference_flag;
|
|
nengel@2
|
189 dst->list_count = src->list_count;
|
|
nengel@2
|
190 dst->coded_pic_num = src->coded_pic_num;
|
|
nengel@2
|
191
|
|
nengel@2
|
192 GetBitContext *gb = &src->gb;
|
|
nengel@2
|
193 align_get_bits( gb);
|
|
nengel@2
|
194 dst->bytestream_start = gb->buffer + get_bits_count(gb)/8;
|
|
nengel@2
|
195 dst->byte_bufsize = (get_bits_left(gb) + 7)/8;
|
|
nengel@2
|
196
|
|
nengel@2
|
197 dst->transform_bypass = src->transform_bypass;
|
|
nengel@2
|
198 dst->direct_spatial_mv_pred = src->direct_spatial_mv_pred;
|
|
nengel@2
|
199 memcpy(dst->map_col_to_list0, src->map_col_to_list0, 2*16*sizeof(int));
|
|
nengel@2
|
200 memcpy(dst->dist_scale_factor, src->dist_scale_factor, 16*sizeof(int));
|
|
nengel@2
|
201 dst->cabac_init_idc = src->cabac_init_idc;
|
|
nengel@2
|
202 memcpy(dst->ref2frm, src->ref2frm, 2*64*sizeof(int));
|
|
nengel@2
|
203 dst->chroma_qp[0]= src->chroma_qp[0];
|
|
nengel@2
|
204 dst->chroma_qp[1]= src->chroma_qp[1];
|
|
nengel@2
|
205 dst->qscale = src->qscale;
|
|
nengel@2
|
206 dst->last_qscale_diff = src->last_qscale_diff;
|
|
nengel@2
|
207
|
|
nengel@2
|
208 if (src->slice_type_nos == FF_B_TYPE) dst->list1 = *src->ref_list[1][0];
|
|
nengel@2
|
209 }
|
|
nengel@2
|
210
|
|
nengel@2
|
211 static void send_slice_to_spe_and_wait(EDSlice_spu *s, int id){
|
|
nengel@2
|
212 unsigned status;
|
|
nengel@2
|
213
|
|
nengel@2
|
214 spe_mfcio_get(spe_ed_context[id], (unsigned) spe_ed_slice_buf[id], s, sizeof(EDSlice_spu), 14, 0, 0);
|
|
nengel@2
|
215 spe_mfcio_tag_status_read(spe_ed_context[id], 1<<14, SPE_TAG_ALL, &status);
|
|
nengel@2
|
216
|
|
nengel@2
|
217
|
|
nengel@2
|
218 _spe_in_mbox_write(spe_ed_control_area[id], 0);
|
|
nengel@2
|
219
|
|
nengel@2
|
220 while (!spe_out_mbox_status(spe_ed_context[id])){
|
|
nengel@2
|
221 //pthread_yield();
|
|
nengel@2
|
222 usleep(1000);
|
|
nengel@2
|
223 }
|
|
nengel@2
|
224 _spe_out_mbox_read(spe_ed_control_area[id]);
|
|
nengel@2
|
225 }
|
|
nengel@2
|
226
|
|
nengel@2
|
227 static int decode_slice_entropy_cell(EntropyContext *ec, EDSlice *s, int id){
|
|
nengel@2
|
228 int i,j;
|
|
nengel@2
|
229
|
|
nengel@2
|
230 if( !s->pps.cabac ){
|
|
nengel@2
|
231 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
|
|
nengel@2
|
232 return -1;
|
|
nengel@2
|
233 }
|
|
nengel@2
|
234 DECLARE_ALIGNED(16, EDSlice_spu, slice);
|
|
nengel@2
|
235 fill_EDSlice_spu(&slice, s);
|
|
nengel@2
|
236
|
|
nengel@2
|
237 send_slice_to_spe_and_wait(&slice, id);
|
|
nengel@2
|
238
|
|
nengel@2
|
239 return 0;
|
|
nengel@2
|
240 }
|
|
nengel@2
|
241
|
|
nengel@2
|
242 static int decode_slice_entropy_cell_seq(H264Context *h, EntropyContext *ec, EDSlice *s){
|
|
nengel@2
|
243 int i,j;
|
|
nengel@2
|
244
|
|
nengel@2
|
245 if( !s->pps.cabac ){
|
|
nengel@2
|
246 av_log(AV_LOG_ERROR, "Only cabac encoded streams are supported\n");
|
|
nengel@2
|
247 return -1;
|
|
nengel@2
|
248 }
|
|
nengel@2
|
249 DECLARE_ALIGNED(16, EDSlice_spu, slice);
|
|
nengel@2
|
250 fill_EDSlice_spu(&slice, s);
|
|
nengel@2
|
251
|
|
nengel@2
|
252 send_slice_to_spe_and_wait(&slice, 0);
|
|
nengel@2
|
253
|
|
nengel@2
|
254 if (s->release_cnt>0) {
|
|
nengel@2
|
255 for (int i=0; i<s->release_cnt; i++){
|
|
nengel@2
|
256 release_pib_entry(h, s->release_ref[i], 2);
|
|
nengel@2
|
257 }
|
|
nengel@2
|
258 s->release_cnt=0;
|
|
nengel@2
|
259 }
|
|
nengel@2
|
260
|
|
nengel@2
|
261 release_pib_entry(h, s->current_picture, 1);
|
|
nengel@2
|
262 av_freep(&s->gb.raw);
|
|
nengel@2
|
263 if (s->gb.rbsp)
|
|
nengel@2
|
264 av_freep(&s->gb.rbsp);
|
|
nengel@2
|
265
|
|
nengel@2
|
266 return 0;
|
|
nengel@2
|
267 }
|
|
nengel@2
|
268
|
|
nengel@2
|
269 static void *entr_IP_spe_thread(void *arg){
|
|
nengel@2
|
270 EDThreadContext *eip = (EDThreadContext *) arg;
|
|
nengel@2
|
271 H264Context *h = eip->h;
|
|
nengel@2
|
272 // printf("eip %d, pid %d\n", eip->thread_num, syscall(SYS_gettid));
|
|
nengel@2
|
273 for (int i=0; i<SLICE_BUFS; i++){
|
|
nengel@2
|
274 eip->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb));
|
|
nengel@2
|
275 }
|
|
nengel@2
|
276
|
|
nengel@2
|
277 EntropyContext *ec = get_entropy_context(h);
|
|
nengel@2
|
278 EDSlice *s;
|
|
nengel@2
|
279
|
|
nengel@2
|
280 for(;;){
|
|
nengel@2
|
281 {
|
|
nengel@2
|
282 pthread_mutex_lock(&eip->ed_lock);
|
|
nengel@2
|
283 while (eip->ed_cnt <= 0)
|
|
nengel@2
|
284 pthread_cond_wait(&eip->ed_cond, &eip->ed_lock);
|
|
nengel@2
|
285 s = &eip->ed_q[eip->ed_fo];
|
|
nengel@2
|
286 eip->ed_fo++; eip->ed_fo %= MAX_SLICE_COUNT;
|
|
nengel@2
|
287 pthread_mutex_unlock(&eip->ed_lock);
|
|
nengel@2
|
288 }
|
|
nengel@2
|
289
|
|
nengel@2
|
290 if (s->state<0)
|
|
nengel@2
|
291 break;
|
|
nengel@2
|
292 {
|
|
nengel@2
|
293 pthread_mutex_lock(&eip->mbs_lock);
|
|
nengel@2
|
294 while (eip->mbs_cnt <= 0)
|
|
nengel@2
|
295 pthread_cond_wait(&eip->mbs_cond, &eip->mbs_lock);
|
|
nengel@2
|
296
|
|
nengel@2
|
297 s->mbs = eip->mbs[eip->mbs_fo];
|
|
nengel@2
|
298 s->ed = eip;
|
|
nengel@2
|
299 eip->mbs_cnt--;
|
|
nengel@2
|
300 eip->mbs_fo++; eip->mbs_fo%=SLICE_BUFS;
|
|
nengel@2
|
301 pthread_mutex_unlock(&eip->mbs_lock);
|
|
nengel@2
|
302 }
|
|
nengel@2
|
303 if (eip->cell){
|
|
nengel@2
|
304 decode_slice_entropy_cell(ec, s, eip->thread_num);
|
|
nengel@2
|
305 }else{
|
|
nengel@2
|
306 decode_slice_entropy(ec, s);
|
|
nengel@2
|
307 }
|
|
nengel@2
|
308
|
|
nengel@2
|
309 // {
|
|
nengel@2
|
310 // pthread_mutex_lock(&h->lock[ENTROPY2]);
|
|
nengel@2
|
311 // h->ed_poc[h->ed_poc_fi++ % MAX_SLICE_COUNT] = s->current_picture->poc;
|
|
nengel@2
|
312 // while (h->ed_poc_fi > h->ed_poc_fo + MAX_SLICE_COUNT)
|
|
nengel@2
|
313 // h->ed_poc_fo++;
|
|
nengel@2
|
314 //
|
|
nengel@2
|
315 // pthread_cond_signal(&h->cond[ENTROPY2]);
|
|
nengel@2
|
316 // pthread_mutex_unlock(&h->lock[ENTROPY2]);
|
|
nengel@2
|
317 // }
|
|
nengel@2
|
318
|
|
nengel@2
|
319 {
|
|
nengel@2
|
320 pthread_mutex_lock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
321 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
|
|
nengel@2
|
322 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
|
|
nengel@2
|
323 h->ed_reorder_q[h->ed_reorder_fi] = *s;
|
|
nengel@2
|
324 h->ed_reorder_cnt++;
|
|
nengel@2
|
325 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
326 pthread_cond_signal(&h->cond[ENTROPY4]);
|
|
nengel@2
|
327 pthread_mutex_unlock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
328 }
|
|
nengel@2
|
329
|
|
nengel@2
|
330 {
|
|
nengel@2
|
331 pthread_mutex_lock(&eip->ed_lock);
|
|
nengel@2
|
332 eip->ed_cnt--;
|
|
nengel@2
|
333 pthread_cond_signal(&eip->ed_cond);
|
|
nengel@2
|
334 pthread_mutex_unlock(&eip->ed_lock);
|
|
nengel@2
|
335 }
|
|
nengel@2
|
336 }
|
|
nengel@2
|
337
|
|
nengel@2
|
338 free_entropy_context(ec);
|
|
nengel@2
|
339
|
|
nengel@2
|
340 pthread_exit(NULL);
|
|
nengel@2
|
341 return NULL;
|
|
nengel@2
|
342 }
|
|
nengel@2
|
343
|
|
nengel@2
|
344 static void *entr_B_spe_thread(void *arg){
|
|
nengel@2
|
345 EDThreadContext *eb = (EDThreadContext *) arg;
|
|
nengel@2
|
346 H264Context *h = eb->h;
|
|
nengel@2
|
347 // printf("eb %d, pid %d\n", eb->thread_num, syscall(SYS_gettid));
|
|
nengel@2
|
348 for (int i=0; i<SLICE_BUFS; i++){
|
|
nengel@2
|
349 eb->mbs[i] = av_malloc(h->mb_height*h->mb_width*sizeof(H264Mb));
|
|
nengel@2
|
350 }
|
|
nengel@2
|
351
|
|
nengel@2
|
352 EntropyContext *ec = get_entropy_context(h);
|
|
nengel@2
|
353 EDSlice *s;
|
|
nengel@2
|
354
|
|
nengel@2
|
355 for(;;){
|
|
nengel@2
|
356 {
|
|
nengel@2
|
357 pthread_mutex_lock(&eb->ed_lock);
|
|
nengel@2
|
358 while (eb->ed_cnt <= 0)
|
|
nengel@2
|
359 pthread_cond_wait(&eb->ed_cond, &eb->ed_lock);
|
|
nengel@2
|
360 s = &eb->ed_q[eb->ed_fo];
|
|
nengel@2
|
361 eb->ed_fo++; eb->ed_fo %= MAX_SLICE_COUNT;
|
|
nengel@2
|
362 pthread_mutex_unlock(&eb->ed_lock);
|
|
nengel@2
|
363 }
|
|
nengel@2
|
364
|
|
nengel@2
|
365 if (s->state<0)
|
|
nengel@2
|
366 break;
|
|
nengel@2
|
367 {
|
|
nengel@2
|
368 pthread_mutex_lock(&eb->mbs_lock);
|
|
nengel@2
|
369 while (eb->mbs_cnt <= 0)
|
|
nengel@2
|
370 pthread_cond_wait(&eb->mbs_cond, &eb->mbs_lock);
|
|
nengel@2
|
371 s->mbs = eb->mbs[eb->mbs_fo];
|
|
nengel@2
|
372 s->ed = eb;
|
|
nengel@2
|
373 eb->mbs_cnt--;
|
|
nengel@2
|
374 eb->mbs_fo++; eb->mbs_fo%=SLICE_BUFS;
|
|
nengel@2
|
375 pthread_mutex_unlock(&eb->mbs_lock);
|
|
nengel@2
|
376 }
|
|
nengel@2
|
377 //decode_B_slice_entropy(&hcabac, &cabac, s, eb, eb->prev_ed);
|
|
nengel@2
|
378 decode_slice_entropy_cell(ec, s, eb->thread_num + h->edip_threads);
|
|
nengel@2
|
379
|
|
nengel@2
|
380 {
|
|
nengel@2
|
381 pthread_mutex_lock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
382 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
|
|
nengel@2
|
383 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
|
|
nengel@2
|
384 h->ed_reorder_q[h->ed_reorder_fi] = *s;
|
|
nengel@2
|
385 h->ed_reorder_cnt++;
|
|
nengel@2
|
386 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
387 pthread_cond_signal(&h->cond[ENTROPY4]);
|
|
nengel@2
|
388 pthread_mutex_unlock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
389
|
|
nengel@2
|
390 }
|
|
nengel@2
|
391
|
|
nengel@2
|
392 {
|
|
nengel@2
|
393 pthread_mutex_lock(&eb->ed_lock);
|
|
nengel@2
|
394 eb->ed_cnt--;
|
|
nengel@2
|
395 pthread_cond_signal(&eb->ed_cond);
|
|
nengel@2
|
396 pthread_mutex_unlock(&eb->ed_lock);
|
|
nengel@2
|
397 }
|
|
nengel@2
|
398 }
|
|
nengel@2
|
399 eb->lines_cnt++;
|
|
nengel@2
|
400
|
|
nengel@2
|
401 free_entropy_context(ec);
|
|
nengel@2
|
402
|
|
nengel@2
|
403 pthread_exit(NULL);
|
|
nengel@2
|
404 return NULL;
|
|
nengel@2
|
405 }
|
|
nengel@2
|
406
|
|
nengel@2
|
407 static void *entr_B_distribute(void *arg){
|
|
nengel@2
|
408 H264Context *h = (H264Context *) arg;
|
|
nengel@2
|
409 EDSlice *s;
|
|
nengel@2
|
410
|
|
nengel@2
|
411 int i, n=0, poc;
|
|
nengel@2
|
412
|
|
nengel@2
|
413 // printf("eb dist, pid %d\n", syscall(SYS_gettid));
|
|
nengel@2
|
414
|
|
nengel@2
|
415 for(i=0; i<h->edb_threads; i++){
|
|
nengel@2
|
416 h->b[i].h =h;
|
|
nengel@2
|
417 h->b[i].thread_num =i;
|
|
nengel@2
|
418 h->b[i].thread_total =h->edb_threads;
|
|
nengel@2
|
419 pthread_mutex_init(&h->b[i].mbs_lock, NULL);
|
|
nengel@2
|
420 pthread_cond_init(&h->b[i].mbs_cond, NULL);
|
|
nengel@2
|
421 h->b[i].mbs_fo = 0;
|
|
nengel@2
|
422 h->b[i].mbs_cnt = SLICE_BUFS;
|
|
nengel@2
|
423 h->b[i].ed_fi =0;
|
|
nengel@2
|
424 h->b[i].ed_fo =0;
|
|
nengel@2
|
425 h->b[i].ed_cnt =0;
|
|
nengel@2
|
426 h->b[i].lines_cnt =0;
|
|
nengel@2
|
427 h->b[i].prev_ed = &h->b[(i-1 +h->edb_threads) % h->edb_threads];
|
|
nengel@2
|
428 pthread_mutex_init(&h->b[i].ed_lock, NULL);
|
|
nengel@2
|
429 pthread_cond_init(&h->b[i].ed_cond, NULL);
|
|
nengel@2
|
430 pthread_create(&h->ed_B_thr[i], NULL, entr_B_spe_thread, &h->b[i]);
|
|
nengel@2
|
431 }
|
|
nengel@2
|
432
|
|
nengel@2
|
433 for(;;){
|
|
nengel@2
|
434 {
|
|
nengel@2
|
435 pthread_mutex_lock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
436 while (h->ed_B_cnt<=0)
|
|
nengel@2
|
437 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
|
|
nengel@2
|
438 s= &h->ed_B_q[h->ed_B_fo];
|
|
nengel@2
|
439 h->ed_B_fo++; h->ed_B_fo %= MAX_SLICE_COUNT;
|
|
nengel@2
|
440 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
441
|
|
nengel@2
|
442 }
|
|
nengel@2
|
443 if (s->state<0)
|
|
nengel@2
|
444 break;
|
|
nengel@2
|
445
|
|
nengel@2
|
446 if (s->ref_list[1][0]->slice_type_nos != FF_B_TYPE){
|
|
nengel@2
|
447 while (poc < s->ref_list[1][0]->poc){
|
|
nengel@2
|
448 pthread_mutex_lock(&h->lock[ENTROPY2]);
|
|
nengel@2
|
449 while (poc == h->ed_poc)
|
|
nengel@2
|
450 pthread_cond_wait(&h->cond[ENTROPY2], &h->lock[ENTROPY2]);
|
|
nengel@2
|
451 poc = h->ed_poc;
|
|
nengel@2
|
452 pthread_mutex_unlock(&h->lock[ENTROPY2]);
|
|
nengel@2
|
453 }
|
|
nengel@2
|
454 }
|
|
nengel@2
|
455 {
|
|
nengel@2
|
456 pthread_mutex_lock(&h->b[n].ed_lock);
|
|
nengel@2
|
457 while (h->b[n].ed_cnt >= MAX_SLICE_COUNT)
|
|
nengel@2
|
458 pthread_cond_wait(&h->b[n].ed_cond, &h->b[n].ed_lock);
|
|
nengel@2
|
459 h->b[n].ed_q[ h->b[n].ed_fi] = *s;
|
|
nengel@2
|
460 h->b[n].ed_cnt++;
|
|
nengel@2
|
461 h->b[n].ed_fi++; h->b[n].ed_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
462 pthread_cond_signal(&h->b[n].ed_cond);
|
|
nengel@2
|
463 pthread_mutex_unlock(&h->b[n].ed_lock);
|
|
nengel@2
|
464
|
|
nengel@2
|
465 n++; n%=h->edb_threads;
|
|
nengel@2
|
466 }
|
|
nengel@2
|
467 {
|
|
nengel@2
|
468 pthread_mutex_lock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
469 h->ed_B_cnt--;
|
|
nengel@2
|
470 pthread_cond_signal(&h->cond[ENTROPY3B]);
|
|
nengel@2
|
471 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
472
|
|
nengel@2
|
473 }
|
|
nengel@2
|
474
|
|
nengel@2
|
475 }
|
|
nengel@2
|
476
|
|
nengel@2
|
477 for (i=0; i<h->edb_threads; i++){
|
|
nengel@2
|
478 pthread_mutex_lock(&h->b[i].ed_lock);
|
|
nengel@2
|
479 while (h->b[i].ed_cnt >= MAX_SLICE_COUNT)
|
|
nengel@2
|
480 pthread_cond_wait(&h->b[i].ed_cond, &h->b[i].ed_lock);
|
|
nengel@2
|
481 h->b[i].ed_q[ h->b[i].ed_fi] = *s;
|
|
nengel@2
|
482 h->b[i].ed_cnt++;
|
|
nengel@2
|
483 h->b[i].ed_fi++; h->b[i].ed_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
484 pthread_cond_signal(&h->b[i].ed_cond);
|
|
nengel@2
|
485 pthread_mutex_unlock(&h->b[i].ed_lock);
|
|
nengel@2
|
486
|
|
nengel@2
|
487 }
|
|
nengel@2
|
488 for(int i=0; i<h->edb_threads; i++){
|
|
nengel@2
|
489 pthread_join(h->ed_B_thr[i], NULL);
|
|
nengel@2
|
490 }
|
|
nengel@2
|
491 pthread_exit(NULL);
|
|
nengel@2
|
492 return NULL;
|
|
nengel@2
|
493 }
|
|
nengel@2
|
494
|
|
nengel@2
|
495
|
|
nengel@2
|
496 static void *entr_IPB_distribute(void *arg){
|
|
nengel@2
|
497 H264Context *h = (H264Context *) arg;
|
|
nengel@2
|
498 EDSlice *s;
|
|
nengel@2
|
499 int i,n=0;
|
|
nengel@2
|
500
|
|
nengel@2
|
501 create_spe_ED_threads(h, h->edip_threads, h->edb_threads);
|
|
nengel@2
|
502 pthread_create(&h->ed_B_dist, NULL, entr_B_distribute, h);
|
|
nengel@2
|
503 for(i=0; i<h->edip_threads + h->edip_ppe_threads; i++){
|
|
nengel@2
|
504 h->ip[i].h =h;
|
|
nengel@2
|
505 h->ip[i].cell = (i >= h->edip_ppe_threads);
|
|
nengel@2
|
506 pthread_mutex_init(&h->ip[i].mbs_lock, NULL);
|
|
nengel@2
|
507 pthread_cond_init(&h->ip[i].mbs_cond, NULL);
|
|
nengel@2
|
508 h->ip[i].thread_num = i - h->edip_ppe_threads;
|
|
nengel@2
|
509 h->ip[i].thread_total=h->edip_threads+ h->edip_ppe_threads;
|
|
nengel@2
|
510 h->ip[i].mbs_fo = 0;
|
|
nengel@2
|
511 h->ip[i].mbs_cnt = SLICE_BUFS;
|
|
nengel@2
|
512 h->ip[i].ed_fi =0;
|
|
nengel@2
|
513 h->ip[i].ed_fo =0;
|
|
nengel@2
|
514 pthread_mutex_init(&h->ip[i].ed_lock, NULL);
|
|
nengel@2
|
515 pthread_cond_init(&h->ip[i].ed_cond, NULL);
|
|
nengel@2
|
516 pthread_create(&h->ed_IP_thr[i], NULL, entr_IP_spe_thread, &h->ip[i]);
|
|
nengel@2
|
517 }
|
|
nengel@2
|
518
|
|
nengel@2
|
519 for(;;){
|
|
nengel@2
|
520 {
|
|
nengel@2
|
521 pthread_mutex_lock(&h->lock[ENTROPY]);
|
|
nengel@2
|
522 while (h->ed_cnt<=0)
|
|
nengel@2
|
523 pthread_cond_wait(&h->cond[ENTROPY], &h->lock[ENTROPY]);
|
|
nengel@2
|
524 s= &h->ed_q[h->ed_fo];
|
|
nengel@2
|
525
|
|
nengel@2
|
526 pthread_mutex_unlock(&h->lock[ENTROPY]);
|
|
nengel@2
|
527 h->ed_fo++; h->ed_fo %= MAX_SLICE_COUNT;
|
|
nengel@2
|
528 }
|
|
nengel@2
|
529 if (s->state<0)
|
|
nengel@2
|
530 break;
|
|
nengel@2
|
531
|
|
nengel@2
|
532 assert(s->current_picture);
|
|
nengel@2
|
533 if (s->slice_type_nos == FF_B_TYPE )
|
|
nengel@2
|
534 {
|
|
nengel@2
|
535 pthread_mutex_lock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
536 while (h->ed_B_cnt>=MAX_SLICE_COUNT)
|
|
nengel@2
|
537 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
|
|
nengel@2
|
538 h->ed_B_q[h->ed_B_fi] = *s;
|
|
nengel@2
|
539 h->ed_B_cnt++;
|
|
nengel@2
|
540 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
541 pthread_cond_signal(&h->cond[ENTROPY3B]);
|
|
nengel@2
|
542 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
543 }else
|
|
nengel@2
|
544 {
|
|
nengel@2
|
545 ///round robin now, change to based on rawframes size.
|
|
nengel@2
|
546 pthread_mutex_lock(&h->ip[n].ed_lock);
|
|
nengel@2
|
547 while (h->ip[n].ed_cnt >= MAX_SLICE_COUNT)
|
|
nengel@2
|
548 pthread_cond_wait(&h->ip[n].ed_cond, &h->ip[n].ed_lock);
|
|
nengel@2
|
549 h->ip[n].ed_q[ h->ip[n].ed_fi] = *s;
|
|
nengel@2
|
550 h->ip[n].ed_cnt++;
|
|
nengel@2
|
551 h->ip[n].ed_fi++; h->ip[n].ed_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
552 pthread_cond_signal(&h->ip[n].ed_cond);
|
|
nengel@2
|
553 pthread_mutex_unlock(&h->ip[n].ed_lock);
|
|
nengel@2
|
554
|
|
nengel@2
|
555 n++; n %=(h->edip_threads+h->edip_ppe_threads);
|
|
nengel@2
|
556 }
|
|
nengel@2
|
557 {
|
|
nengel@2
|
558 pthread_mutex_lock(&h->lock[ENTROPY]);
|
|
nengel@2
|
559 h->ed_cnt--;
|
|
nengel@2
|
560 pthread_cond_signal(&h->cond[ENTROPY]);
|
|
nengel@2
|
561 pthread_mutex_unlock(&h->lock[ENTROPY]);
|
|
nengel@2
|
562
|
|
nengel@2
|
563 }
|
|
nengel@2
|
564 }
|
|
nengel@2
|
565
|
|
nengel@2
|
566 {
|
|
nengel@2
|
567 pthread_mutex_lock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
568 while (h->ed_B_cnt>=MAX_SLICE_COUNT)
|
|
nengel@2
|
569 pthread_cond_wait(&h->cond[ENTROPY3B], &h->lock[ENTROPY3B]);
|
|
nengel@2
|
570 h->ed_B_q[h->ed_B_fi] = *s;
|
|
nengel@2
|
571 h->ed_B_cnt++;
|
|
nengel@2
|
572 h->ed_B_fi++; h->ed_B_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
573 pthread_cond_signal(&h->cond[ENTROPY3B]);
|
|
nengel@2
|
574 pthread_mutex_unlock(&h->lock[ENTROPY3B]);
|
|
nengel@2
|
575 }
|
|
nengel@2
|
576 {
|
|
nengel@2
|
577 for (i=0; i<h->edip_threads + h->edip_ppe_threads; i++){
|
|
nengel@2
|
578 pthread_mutex_lock(&h->ip[i].ed_lock);
|
|
nengel@2
|
579 while (h->ip[i].ed_cnt >= MAX_SLICE_COUNT)
|
|
nengel@2
|
580 pthread_cond_wait(&h->ip[i].ed_cond, &h->ip[i].ed_lock);
|
|
nengel@2
|
581 h->ip[i].ed_q[ h->ip[i].ed_fi] = *s;
|
|
nengel@2
|
582 h->ip[i].ed_cnt++;
|
|
nengel@2
|
583 h->ip[i].ed_fi++; h->ip[i].ed_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
584 pthread_cond_signal(&h->ip[i].ed_cond);
|
|
nengel@2
|
585 pthread_mutex_unlock(&h->ip[i].ed_lock);
|
|
nengel@2
|
586 }
|
|
nengel@2
|
587 }
|
|
nengel@2
|
588 {
|
|
nengel@2
|
589 pthread_mutex_lock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
590 while (h->ed_reorder_cnt>=MAX_SLICE_COUNT)
|
|
nengel@2
|
591 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
|
|
nengel@2
|
592 h->ed_reorder_q[h->ed_reorder_fi] = *s;
|
|
nengel@2
|
593 h->ed_reorder_cnt++;
|
|
nengel@2
|
594 h->ed_reorder_fi++; h->ed_reorder_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
595 pthread_cond_signal(&h->cond[ENTROPY4]);
|
|
nengel@2
|
596 pthread_mutex_unlock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
597
|
|
nengel@2
|
598 }
|
|
nengel@2
|
599 pthread_join(h->ed_B_dist, NULL);
|
|
nengel@2
|
600 for(i=0; i<h->edip_threads; i++){
|
|
nengel@2
|
601 pthread_join(h->ed_IP_thr[i], NULL);
|
|
nengel@2
|
602 }
|
|
nengel@2
|
603 pthread_exit(NULL);
|
|
nengel@2
|
604 return NULL;
|
|
nengel@2
|
605 }
|
|
nengel@2
|
606
|
|
nengel@2
|
607 static pthread_t ed_IPB_dist;
|
|
nengel@2
|
608 static void *entropy_IPB_cell_thread(void *arg){
|
|
nengel@2
|
609 H264Context *h = (H264Context *) arg;
|
|
nengel@2
|
610 int i;
|
|
nengel@2
|
611 EDSlice reorder[MAX_SLICE_COUNT];
|
|
nengel@2
|
612 int ip_poc[MAX_SLICE_COUNT][2]={0,};
|
|
nengel@2
|
613 int next_ip_id=0;
|
|
nengel@2
|
614 int ip_poc_cnt=0;
|
|
nengel@2
|
615 EDSlice *s;
|
|
nengel@2
|
616 int reorder_cnt=0;
|
|
nengel@2
|
617 unsigned next_pic_num=0;
|
|
nengel@2
|
618
|
|
nengel@2
|
619 pthread_create(&ed_IPB_dist, NULL, entr_IPB_distribute, h);
|
|
nengel@2
|
620 int count =0;
|
|
nengel@2
|
621 for(;;){
|
|
nengel@2
|
622 //signals received from the entropy decoders
|
|
nengel@2
|
623 {
|
|
nengel@2
|
624 pthread_mutex_lock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
625 while (h->ed_reorder_cnt<=0)
|
|
nengel@2
|
626 pthread_cond_wait(&h->cond[ENTROPY4], &h->lock[ENTROPY4]);
|
|
nengel@2
|
627 s= &h->ed_reorder_q[h->ed_reorder_fo];
|
|
nengel@2
|
628 h->ed_reorder_fo++; h->ed_reorder_fo %=MAX_SLICE_COUNT;
|
|
nengel@2
|
629 pthread_mutex_unlock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
630 }
|
|
nengel@2
|
631
|
|
nengel@2
|
632 if (s->state >=0 && s->slice_type_nos != FF_B_TYPE){
|
|
nengel@2
|
633 for (i=0; i<ip_poc_cnt; i++){
|
|
nengel@2
|
634 if (s->ip_id < ip_poc[i][0]){
|
|
nengel@2
|
635 memmove(ip_poc[i+1], ip_poc[i], 2*(ip_poc_cnt-i)*sizeof(int));
|
|
nengel@2
|
636 break;
|
|
nengel@2
|
637 }
|
|
nengel@2
|
638 }
|
|
nengel@2
|
639 ip_poc[i][0]= s->ip_id;
|
|
nengel@2
|
640 ip_poc[i][1]= s->current_picture->poc;
|
|
nengel@2
|
641 ip_poc_cnt++;
|
|
nengel@2
|
642
|
|
nengel@2
|
643 while (next_ip_id == ip_poc[0][0]){
|
|
nengel@2
|
644 pthread_mutex_lock(&h->lock[ENTROPY2]);
|
|
nengel@2
|
645 h->ed_poc = ip_poc[0][1];
|
|
nengel@2
|
646
|
|
nengel@2
|
647 pthread_cond_signal(&h->cond[ENTROPY2]);
|
|
nengel@2
|
648 pthread_mutex_unlock(&h->lock[ENTROPY2]);
|
|
nengel@2
|
649 memmove(ip_poc[0], ip_poc[1], 2*(ip_poc_cnt-1)*sizeof(int));
|
|
nengel@2
|
650 ip_poc_cnt--;
|
|
nengel@2
|
651 next_ip_id++;
|
|
nengel@2
|
652 }
|
|
nengel@2
|
653 }
|
|
nengel@2
|
654
|
|
nengel@2
|
655 for(i=reorder_cnt; i>0; i--){
|
|
nengel@2
|
656 if (s->coded_pic_num < reorder[i-1].coded_pic_num)
|
|
nengel@2
|
657 break;
|
|
nengel@2
|
658 reorder[i]=reorder[i-1];
|
|
nengel@2
|
659 }
|
|
nengel@2
|
660 reorder[i]=*s;
|
|
nengel@2
|
661
|
|
nengel@2
|
662 while(reorder_cnt>=0){
|
|
nengel@2
|
663 if (next_pic_num!=reorder[reorder_cnt].coded_pic_num){
|
|
nengel@2
|
664 break;
|
|
nengel@2
|
665 }
|
|
nengel@2
|
666 EDSlice *es = &reorder[reorder_cnt];
|
|
nengel@2
|
667
|
|
nengel@2
|
668 {
|
|
nengel@2
|
669 pthread_mutex_lock(&h->lock[MBDEC]);
|
|
nengel@2
|
670 while (h->mbdec_cnt >= MAX_SLICE_COUNT)
|
|
nengel@2
|
671 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]);
|
|
nengel@2
|
672 copyEDtoMBSlice(&h->mbdec_q[h->mbdec_fi], es);
|
|
nengel@2
|
673
|
|
nengel@2
|
674 h->mbdec_cnt++;
|
|
nengel@2
|
675 h->mbdec_fi++; h->mbdec_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
676 pthread_cond_signal(&h->cond[MBDEC]);
|
|
nengel@2
|
677 pthread_mutex_unlock(&h->lock[MBDEC]);
|
|
nengel@2
|
678
|
|
nengel@2
|
679 }
|
|
nengel@2
|
680
|
|
nengel@2
|
681 if (es->state<0)
|
|
nengel@2
|
682 goto end;
|
|
nengel@2
|
683
|
|
nengel@2
|
684 assert(es->current_picture);
|
|
nengel@2
|
685 for (int i=0; i<es->release_cnt; i++){
|
|
nengel@2
|
686 release_pib_entry(h, es->release_ref[i], 2);
|
|
nengel@2
|
687 }
|
|
nengel@2
|
688 release_pib_entry(h, es->current_picture, 1);
|
|
nengel@2
|
689 av_freep(&es->gb.raw);
|
|
nengel@2
|
690 if (es->gb.rbsp)
|
|
nengel@2
|
691 av_freep(&es->gb.rbsp);
|
|
nengel@2
|
692
|
|
nengel@2
|
693 next_pic_num++;
|
|
nengel@2
|
694 reorder_cnt--;
|
|
nengel@2
|
695 }
|
|
nengel@2
|
696 reorder_cnt++;
|
|
nengel@2
|
697
|
|
nengel@2
|
698 {
|
|
nengel@2
|
699 pthread_mutex_lock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
700 h->ed_reorder_cnt--;
|
|
nengel@2
|
701 pthread_cond_signal(&h->cond[ENTROPY4]);
|
|
nengel@2
|
702 pthread_mutex_unlock(&h->lock[ENTROPY4]);
|
|
nengel@2
|
703 }
|
|
nengel@2
|
704 }
|
|
nengel@2
|
705
|
|
nengel@2
|
706 end:
|
|
nengel@2
|
707 pthread_join(ed_IPB_dist, NULL);
|
|
nengel@2
|
708 pthread_exit(NULL);
|
|
nengel@2
|
709 return NULL;
|
|
nengel@2
|
710 }
|
|
nengel@2
|
711
|
|
nengel@2
|
712
|
|
nengel@2
|
713 static void fill_spe_slice(H264slice *dst, const MBSlice *src, H264Context *h){
|
|
nengel@2
|
714 dst->deblocking_filter =1;
|
|
nengel@2
|
715 dst->linesize = src->current_picture->linesize[0];
|
|
nengel@2
|
716 dst->uvlinesize = src->current_picture->linesize[1];
|
|
nengel@2
|
717 dst->mb_width = h->mb_width;
|
|
nengel@2
|
718 dst->mb_height = h->mb_height;
|
|
nengel@2
|
719 dst->use_weight = src->use_weight;
|
|
nengel@2
|
720 dst->use_weight_chroma = src->use_weight_chroma;
|
|
nengel@2
|
721 dst->luma_log2_weight_denom = src->luma_log2_weight_denom;
|
|
nengel@2
|
722 dst->chroma_log2_weight_denom = src->chroma_log2_weight_denom;
|
|
nengel@2
|
723
|
|
nengel@2
|
724 //weights later
|
|
nengel@2
|
725 memcpy(dst->luma_weight, src->luma_weight, 16*2*2*sizeof(int16_t));
|
|
nengel@2
|
726 memcpy(dst->chroma_weight, src->chroma_weight, 16*2*2*2*sizeof(int16_t));
|
|
nengel@2
|
727 memcpy(dst->implicit_weight, src->implicit_weight, 16*16*2*sizeof(int16_t));
|
|
nengel@2
|
728
|
|
nengel@2
|
729 for(int list=0; list<2; list++){
|
|
nengel@2
|
730 for (int i=0; i<src->ref_count[list]; i++){
|
|
nengel@2
|
731 Picture_spu *p_dst = &dst->ref_list[list][i];
|
|
nengel@2
|
732 DecodedPicture *p_src = src->ref_list[list][i];
|
|
nengel@2
|
733 if (p_src){
|
|
nengel@2
|
734 p_dst->data[0] = p_src->data[0];
|
|
nengel@2
|
735 p_dst->data[1] = p_src->data[1];
|
|
nengel@2
|
736 p_dst->data[2] = p_src->data[2];
|
|
nengel@2
|
737 }
|
|
nengel@2
|
738 }
|
|
nengel@2
|
739 }
|
|
nengel@2
|
740 dst->state = src->state;
|
|
nengel@2
|
741
|
|
nengel@2
|
742 dst->emu_edge_width =32;
|
|
nengel@2
|
743 dst->emu_edge_height =32;
|
|
nengel@2
|
744 dst->slice_type = src->slice_type;
|
|
nengel@2
|
745 dst->slice_type_nos = src->slice_type_nos;
|
|
nengel@2
|
746 dst->slice_alpha_c0_offset = src->slice_alpha_c0_offset;
|
|
nengel@2
|
747 dst->slice_beta_offset = src->slice_beta_offset;
|
|
nengel@2
|
748
|
|
nengel@2
|
749 memcpy(dst->chroma_qp_table, src->pps.chroma_qp_table, 2*64);
|
|
nengel@2
|
750
|
|
nengel@2
|
751 dst->blocks = src->mbs;
|
|
nengel@2
|
752 dst->dst_y = src->current_picture->data[0];
|
|
nengel@2
|
753 dst->dst_cb = src->current_picture->data[1];
|
|
nengel@2
|
754 dst->dst_cr = src->current_picture->data[2];
|
|
nengel@2
|
755 }
|
|
nengel@2
|
756
|
|
nengel@2
|
757 static void decode_slice_mb_seq_cell(H264Context *h, MBRecContext *d, MBSlice *s, DecodedPicture *tmp){
|
|
nengel@2
|
758 static int rl_fi=0;
|
|
nengel@2
|
759
|
|
nengel@2
|
760 DECLARE_ALIGNED(16, H264slice, spe_slice);
|
|
nengel@2
|
761 H264spe *p=&spe_params[0];
|
|
nengel@2
|
762 unsigned status;
|
|
nengel@2
|
763 uint8_t *dst_y, *dst_cb, *dst_cr;
|
|
nengel@2
|
764
|
|
nengel@2
|
765 DecodedPicture *dp;
|
|
nengel@2
|
766
|
|
nengel@2
|
767 for (int i=0; i<2; i++){
|
|
nengel@2
|
768 for(int j=0; j< s->ref_count[i]; j++){
|
|
nengel@2
|
769 if (s->ref_list_cpn[i][j] ==-1)
|
|
nengel@2
|
770 continue;
|
|
nengel@2
|
771 int k;
|
|
nengel@2
|
772 for (k=0; k<DPB_SIZE; k++){
|
|
nengel@2
|
773 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
|
|
nengel@2
|
774 s->ref_list[i][j] = &h->dpb[k];
|
|
nengel@2
|
775 break;
|
|
nengel@2
|
776 }
|
|
nengel@2
|
777 }
|
|
nengel@2
|
778 }
|
|
nengel@2
|
779 }
|
|
nengel@2
|
780
|
|
nengel@2
|
781 dp = get_dpb_entry(h);
|
|
nengel@2
|
782 init_dpb_entry(dp, s, d->width, d->height);
|
|
nengel@2
|
783
|
|
nengel@2
|
784 if (h->no_mbd)
|
|
nengel@2
|
785 return;
|
|
nengel@2
|
786
|
|
nengel@2
|
787
|
|
nengel@2
|
788 fill_spe_slice(&spe_slice, s, h);
|
|
nengel@2
|
789 spe_mfcio_get(spe_context[0], (unsigned) (spe_slice_buf[0] + rl_fi), &spe_slice, sizeof(H264slice), 15, 0, 0);
|
|
nengel@2
|
790 spe_mfcio_tag_status_read(spe_context[0], 1<<15, SPE_TAG_ALL, &status);
|
|
nengel@2
|
791 rl_fi++; rl_fi %= 2;
|
|
nengel@2
|
792
|
|
nengel@2
|
793 _spe_in_mbox_write(spe_control_area[0], 0);
|
|
nengel@2
|
794 while (atomic_read(rl_cnt)<=0){
|
|
nengel@2
|
795 //pthread_yield();
|
|
nengel@2
|
796 usleep(1000);
|
|
nengel@2
|
797 }
|
|
nengel@2
|
798 atomic_dec(rl_cnt);
|
|
nengel@2
|
799
|
|
nengel@2
|
800
|
|
nengel@2
|
801 /** This is error free, no visual artifacts, however, md5sum fails.... (WTF) **/
|
|
nengel@2
|
802 // memcpy(tmp->data[0], s->current_picture->data[0], tmp->linesize[0]*h->mb_height*16);
|
|
nengel@2
|
803 // memcpy(tmp->data[1], s->current_picture->data[1], tmp->linesize[1]*h->mb_height*8);
|
|
nengel@2
|
804 // memcpy(tmp->data[2], s->current_picture->data[2], tmp->linesize[1]*h->mb_height*8);
|
|
nengel@2
|
805 //
|
|
nengel@2
|
806 // memset(s->current_picture->data[0], 0, tmp->linesize[0]*h->mb_height*16);
|
|
nengel@2
|
807 // memset(s->current_picture->data[1], 0, tmp->linesize[1]*h->mb_height*8);
|
|
nengel@2
|
808 // memset(s->current_picture->data[2], 0, tmp->linesize[1]*h->mb_height*8);
|
|
nengel@2
|
809 //
|
|
nengel@2
|
810 // decode_slice_mb_seq(d, s);
|
|
nengel@2
|
811 //
|
|
nengel@2
|
812 // for (int i=0; i<h->mb_height*16; i++){
|
|
nengel@2
|
813 // for (int j=0; j<h->width; j++){
|
|
nengel@2
|
814 // if (tmp->data[0][j + i*tmp->linesize[0]] != s->current_picture->data[0][j + i*tmp->linesize[0]]){
|
|
nengel@2
|
815 // printf("%d, %d, %d, %d\n", j, i, tmp->data[0][j + i*tmp->linesize[0]], s->current_picture->data[0][j + i*tmp->linesize[0]]);
|
|
nengel@2
|
816 // return;
|
|
nengel@2
|
817 // }
|
|
nengel@2
|
818 // }
|
|
nengel@2
|
819 // }
|
|
nengel@2
|
820 //
|
|
nengel@2
|
821 // for (int i=0; i<h->mb_height*8; i++){
|
|
nengel@2
|
822 // for (int j=0; j<h->width/2; j++){
|
|
nengel@2
|
823 // if (tmp->data[1][j + i*tmp->linesize[1]] != s->current_picture->data[1][j + i*tmp->linesize[1]]){
|
|
nengel@2
|
824 // printf("%d, %d, %d, %d\n", j, i, tmp->data[1][j + i*tmp->linesize[1]], s->current_picture->data[1][j + i*tmp->linesize[1]]);
|
|
nengel@2
|
825 // return;
|
|
nengel@2
|
826 // }
|
|
nengel@2
|
827 // }
|
|
nengel@2
|
828 // }
|
|
nengel@2
|
829 //
|
|
nengel@2
|
830 // for (int i=0; i<h->mb_height*8; i++){
|
|
nengel@2
|
831 // for (int j=0; j<h->width/2; j++){
|
|
nengel@2
|
832 // if (tmp->data[2][j + i*tmp->linesize[1]] != s->current_picture->data[2][j + i*tmp->linesize[1]]){
|
|
nengel@2
|
833 // printf("%d, %d, %d, %d\n", j, i, tmp->data[2][j + i*tmp->linesize[1]], s->current_picture->data[2][j + i*tmp->linesize[1]]);
|
|
nengel@2
|
834 // return;
|
|
nengel@2
|
835 // }
|
|
nengel@2
|
836 // }
|
|
nengel@2
|
837 // }
|
|
nengel@2
|
838
|
|
nengel@2
|
839
|
|
nengel@2
|
840 //printf("dst_y %p\n", dst_y);
|
|
nengel@2
|
841
|
|
nengel@2
|
842
|
|
nengel@2
|
843 for (int i=0; i<s->release_cnt; i++){
|
|
nengel@2
|
844 for(int j=0; j<DPB_SIZE; j++){
|
|
nengel@2
|
845 if(h->dpb[j].cpn== s->release_ref_cpn[i]){
|
|
nengel@2
|
846 release_dpb_entry(h, &h->dpb[j], 2);
|
|
nengel@2
|
847 break;
|
|
nengel@2
|
848 }
|
|
nengel@2
|
849 }
|
|
nengel@2
|
850 }
|
|
nengel@2
|
851 s->release_cnt=0;
|
|
nengel@2
|
852
|
|
nengel@2
|
853 }
|
|
nengel@2
|
854
|
|
nengel@2
|
855 static void *h264_spe_thread(void * thread_args ) {
|
|
nengel@2
|
856 H264spe *params = (H264spe *)thread_args;
|
|
nengel@2
|
857 unsigned int spe_id = params->spe_id;
|
|
nengel@2
|
858 unsigned int runflags = 0;
|
|
nengel@2
|
859 unsigned int entry = SPE_DEFAULT_ENTRY;
|
|
nengel@2
|
860 // run SPE context
|
|
nengel@2
|
861 spe_context_run(spe_context[spe_id], &entry, runflags, (void*) params, NULL, NULL);
|
|
nengel@2
|
862 // done - now exit thread
|
|
nengel@2
|
863 pthread_exit(NULL);
|
|
nengel@2
|
864 }
|
|
nengel@2
|
865
|
|
nengel@2
|
866 static int create_spe_MBR_threads(H264Context *h, int num_threads) {
|
|
nengel@2
|
867 int i;
|
|
nengel@2
|
868
|
|
nengel@2
|
869 // reserve memory for spe thread id, context and argument addresses
|
|
nengel@2
|
870 spe_tid = av_malloc(num_threads * sizeof (pthread_t));
|
|
nengel@2
|
871 spe_context = av_malloc(num_threads * sizeof (spe_context_ptr_t));
|
|
nengel@2
|
872 spe_params = av_malloc(num_threads * sizeof (H264spe));
|
|
nengel@2
|
873 spe_control_area = av_malloc(num_threads * sizeof (void*));
|
|
nengel@2
|
874 spe_ls_area = av_malloc(num_threads * sizeof (void*));
|
|
nengel@2
|
875 spe_slice_buf = av_malloc(num_threads * sizeof (void*));
|
|
nengel@2
|
876
|
|
nengel@2
|
877 spe_program_handle_t *spe_program = spe_image_open("spe_mbd");
|
|
nengel@2
|
878
|
|
nengel@2
|
879 if (spe_program == NULL)
|
|
nengel@2
|
880 av_log(AV_LOG_ERROR, "PPE: error opening SPE object image:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
881
|
|
nengel@2
|
882 for (i = 0; i < num_threads; i++) {
|
|
nengel@2
|
883 // create context for spe program
|
|
nengel@2
|
884 spe_context[i] = spe_context_create(SPE_MAP_PS, NULL);
|
|
nengel@2
|
885 if (spe_context[i] == NULL)
|
|
nengel@2
|
886 av_log(AV_LOG_ERROR, "PPE: error creating SPE context:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
887 // load SPE program into main memory
|
|
nengel@2
|
888 if ((spe_program_load(spe_context[i], spe_program)) == -1)
|
|
nengel@2
|
889 av_log(AV_LOG_ERROR, "PPE: error loading SPE context:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
890 //get the control_area for fast mailboxing
|
|
nengel@2
|
891 if ((spe_control_area[i] = spe_ps_area_get(spe_context[i], SPE_CONTROL_AREA)) == NULL)
|
|
nengel@2
|
892 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE control area:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
893 //get ls area for inter spe communication
|
|
nengel@2
|
894 if ((spe_ls_area[i] = spe_ls_area_get(spe_context[i])) == NULL)
|
|
nengel@2
|
895 av_log(AV_LOG_ERROR, "PPE: error retrieving SPE ls area:%d. error=%s \n", errno, strerror(errno));
|
|
nengel@2
|
896 }
|
|
nengel@2
|
897
|
|
nengel@2
|
898 for (i = 0; i < num_threads; i++) {
|
|
nengel@2
|
899 spe_params[i].mb_width = h->mb_width;
|
|
nengel@2
|
900 spe_params[i].mb_height = h->mb_height;
|
|
nengel@2
|
901 spe_params[i].mb_stride = h->mb_stride;
|
|
nengel@2
|
902 spe_params[i].spe_id = i;
|
|
nengel@2
|
903 spe_params[i].spe_total = num_threads;
|
|
nengel@2
|
904 //spe_params[i].slice_params= &slice_params;
|
|
nengel@2
|
905 spe_params[i].src_spe = spe_ls_area[(i-1+num_threads)%num_threads];
|
|
nengel@2
|
906 spe_params[i].tgt_spe = spe_ls_area[(i+1)%num_threads];
|
|
nengel@2
|
907
|
|
nengel@2
|
908 spe_params[i].rl_lock = rl_lock;
|
|
nengel@2
|
909 spe_params[i].rl_cond = rl_cond;
|
|
nengel@2
|
910 spe_params[i].rl_cnt = rl_cnt;
|
|
nengel@2
|
911 spe_params[i].lock = (mutex_ea_t) (unsigned) &mutex_var[i];
|
|
nengel@2
|
912 spe_params[i].cond = (cond_ea_t) (unsigned) &cond_var[i];
|
|
nengel@2
|
913 spe_params[i].cnt = (atomic_ea_t)(unsigned) &atomic_var[i]; atomic_set(spe_params[i].cnt, 0);
|
|
nengel@2
|
914
|
|
nengel@2
|
915 mutex_init(spe_params[i].lock);
|
|
nengel@2
|
916 cond_init(spe_params[i].cond);
|
|
nengel@2
|
917 if (pthread_create(&spe_tid[i], NULL, h264_spe_thread, (void *) &spe_params[i]))
|
|
nengel@2
|
918 av_log(AV_LOG_ERROR, "create_workers: pthread create for spe failed %d\n", i);
|
|
nengel@2
|
919
|
|
nengel@2
|
920 //slicebufaddr
|
|
nengel@2
|
921 spe_slice_buf[i] = (H264slice *) _spe_out_mbox_read(spe_control_area[i]);
|
|
nengel@2
|
922
|
|
nengel@2
|
923 av_log(AV_LOG_DEBUG, "create_workers: created spe thread %d\n", i);
|
|
nengel@2
|
924 }
|
|
nengel@2
|
925 spe_image_close(spe_program);
|
|
nengel@2
|
926 return 0;
|
|
nengel@2
|
927 }
|
|
nengel@2
|
928
|
|
nengel@2
|
929 //_spe_out_mbox_read(spe_control_area[i]);
|
|
nengel@2
|
930 /**
|
|
nengel@2
|
931 * joins all the spe worker threads.
|
|
nengel@2
|
932 */
|
|
nengel@2
|
933 static void join_spe_worker_threads(H264slice *s, int num_threads, int *rl_fi) {
|
|
nengel@2
|
934 int i;
|
|
nengel@2
|
935 ///just to keep coding consistency.
|
|
nengel@2
|
936 {
|
|
nengel@2
|
937 for (i=0; i<num_threads; i++){
|
|
nengel@2
|
938 H264spe *p=&spe_params[i];
|
|
nengel@2
|
939 unsigned status;
|
|
nengel@2
|
940
|
|
nengel@2
|
941 while (atomic_read(p->cnt)>=2) {//double buffered
|
|
nengel@2
|
942 usleep(1000);//cond_wait(p->cond, p->lock);
|
|
nengel@2
|
943 }
|
|
nengel@2
|
944
|
|
nengel@2
|
945 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), s, sizeof(H264slice), 15, 0, 0);
|
|
nengel@2
|
946 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status);
|
|
nengel@2
|
947 //mutex_unlock(p->lock);
|
|
nengel@2
|
948 _spe_in_mbox_write(spe_control_area[i], 0);
|
|
nengel@2
|
949 }
|
|
nengel@2
|
950 }
|
|
nengel@2
|
951
|
|
nengel@2
|
952 for (i=0; i<num_threads; i++){
|
|
nengel@2
|
953 pthread_join(spe_tid[i], NULL);
|
|
nengel@2
|
954 }
|
|
nengel@2
|
955
|
|
nengel@2
|
956 for (i=0; i<num_threads; i++){
|
|
nengel@2
|
957 spe_context_destroy(spe_context[i]);
|
|
nengel@2
|
958 }
|
|
nengel@2
|
959 atomic_inc(rl_cnt);
|
|
nengel@2
|
960
|
|
nengel@2
|
961 // destroy memory reserved for spe thread id, context and argument addresses
|
|
nengel@2
|
962 av_freep(&spe_tid);
|
|
nengel@2
|
963 av_freep(&spe_context);
|
|
nengel@2
|
964 av_freep(&spe_params);
|
|
nengel@2
|
965 av_freep(&spe_control_area);
|
|
nengel@2
|
966 av_freep(&spe_slice_buf);
|
|
nengel@2
|
967 }
|
|
nengel@2
|
968
|
|
nengel@2
|
969
|
|
nengel@2
|
970 static void *rl_dist_thread(void *arg){
|
|
nengel@2
|
971 int i;
|
|
nengel@2
|
972 H264Context *h = (H264Context *) arg;
|
|
nengel@2
|
973 MBSlice *s;
|
|
nengel@2
|
974 DecodedPicture *dp;
|
|
nengel@2
|
975 int rl_fi[16]={0,};
|
|
nengel@2
|
976 DECLARE_ALIGNED(16, H264slice, spe_slice);
|
|
nengel@2
|
977
|
|
nengel@2
|
978 create_spe_MBR_threads(h, h->rl_threads);
|
|
nengel@2
|
979 for(;;){
|
|
nengel@2
|
980 {
|
|
nengel@2
|
981 pthread_mutex_lock(&h->lock[MBDEC]);
|
|
nengel@2
|
982 while (h->mbdec_cnt<=0)
|
|
nengel@2
|
983 pthread_cond_wait(&h->cond[MBDEC], &h->lock[MBDEC]);
|
|
nengel@2
|
984 s= &h->mbdec_q[h->mbdec_fo];
|
|
nengel@2
|
985 h->mbdec_fo++; h->mbdec_fo %= MAX_SLICE_COUNT;
|
|
nengel@2
|
986 pthread_mutex_unlock(&h->lock[MBDEC]);
|
|
nengel@2
|
987 }
|
|
nengel@2
|
988
|
|
nengel@2
|
989 if (s->state<0){
|
|
nengel@2
|
990 break;
|
|
nengel@2
|
991 }
|
|
nengel@2
|
992 for (int i=0; i<2; i++){
|
|
nengel@2
|
993 for(int j=0; j< s->ref_count[i]; j++){
|
|
nengel@2
|
994 if (s->ref_list_cpn[i][j] ==-1)
|
|
nengel@2
|
995 continue;
|
|
nengel@2
|
996 int k;
|
|
nengel@2
|
997 for (k=0; k<DPB_SIZE; k++){
|
|
nengel@2
|
998 if(h->dpb[k].reference >= 2 && h->dpb[k].cpn == s->ref_list_cpn[i][j]){
|
|
nengel@2
|
999 s->ref_list[i][j] = &h->dpb[k];
|
|
nengel@2
|
1000 break;
|
|
nengel@2
|
1001 }
|
|
nengel@2
|
1002 }
|
|
nengel@2
|
1003
|
|
nengel@2
|
1004 }
|
|
nengel@2
|
1005 }
|
|
nengel@2
|
1006 dp = get_dpb_entry(h);
|
|
nengel@2
|
1007 init_dpb_entry(dp, s, h->width, h->height);
|
|
nengel@2
|
1008 assert(s->current_picture);
|
|
nengel@2
|
1009 {
|
|
nengel@2
|
1010 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){
|
|
nengel@2
|
1011 usleep(1000);
|
|
nengel@2
|
1012 }
|
|
nengel@2
|
1013 h->mbrel_q[h->mbrel_fi] = *s;
|
|
nengel@2
|
1014
|
|
nengel@2
|
1015 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
1016 }
|
|
nengel@2
|
1017 {
|
|
nengel@2
|
1018 if(h->no_mbd){
|
|
nengel@2
|
1019 atomic_inc(rl_cnt);
|
|
nengel@2
|
1020 }else {
|
|
nengel@2
|
1021 fill_spe_slice(&spe_slice, s, h);
|
|
nengel@2
|
1022 for (i=0; i<h->rl_threads; i++){
|
|
nengel@2
|
1023 H264spe *p=&spe_params[i];
|
|
nengel@2
|
1024 unsigned status;
|
|
nengel@2
|
1025 while (atomic_read(p->cnt)>=2){ //double buffered
|
|
nengel@2
|
1026 usleep(1000);
|
|
nengel@2
|
1027 //cond_wait(p->cond, p->lock);
|
|
nengel@2
|
1028 }
|
|
nengel@2
|
1029 spe_mfcio_get(spe_context[i], (unsigned) (spe_slice_buf[i] + rl_fi[i]), &spe_slice, sizeof(H264slice), 15, 0, 0);
|
|
nengel@2
|
1030 spe_mfcio_tag_status_read(spe_context[i], 1<<15, SPE_TAG_ALL, &status);
|
|
nengel@2
|
1031 rl_fi[i]++; rl_fi[i] %= 2;
|
|
nengel@2
|
1032 atomic_inc(p->cnt);
|
|
nengel@2
|
1033
|
|
nengel@2
|
1034 _spe_in_mbox_write(spe_control_area[i], 0);
|
|
nengel@2
|
1035 }
|
|
nengel@2
|
1036 }
|
|
nengel@2
|
1037 }
|
|
nengel@2
|
1038
|
|
nengel@2
|
1039 {
|
|
nengel@2
|
1040 pthread_mutex_lock(&h->lock[MBDEC]);
|
|
nengel@2
|
1041 h->mbdec_cnt--;
|
|
nengel@2
|
1042 pthread_cond_signal(&h->cond[MBDEC]);
|
|
nengel@2
|
1043 pthread_mutex_unlock(&h->lock[MBDEC]);
|
|
nengel@2
|
1044 }
|
|
nengel@2
|
1045
|
|
nengel@2
|
1046 }
|
|
nengel@2
|
1047
|
|
nengel@2
|
1048 {
|
|
nengel@2
|
1049 while (atomic_read(rl_cnt) >=MAX_SLICE_COUNT){
|
|
nengel@2
|
1050 usleep(1000);
|
|
nengel@2
|
1051 }
|
|
nengel@2
|
1052 h->mbrel_q[h->mbrel_fi] = *s;
|
|
nengel@2
|
1053
|
|
nengel@2
|
1054 h->mbrel_fi++; h->mbrel_fi %= MAX_SLICE_COUNT;
|
|
nengel@2
|
1055 }
|
|
nengel@2
|
1056 spe_slice.state=-1;
|
|
nengel@2
|
1057 join_spe_worker_threads(&spe_slice, h->rl_threads, rl_fi);
|
|
nengel@2
|
1058 pthread_exit(NULL);
|
|
nengel@2
|
1059 return NULL;
|
|
nengel@2
|
1060 }
|
|
nengel@2
|
1061
|
|
nengel@2
|
1062 static void *mbdec_cell_thread(void *arg){
|
|
nengel@2
|
1063 H264Context *h = (H264Context *) arg;
|
|
nengel@2
|
1064
|
|
nengel@2
|
1065 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var;
|
|
nengel@2
|
1066 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var;
|
|
nengel@2
|
1067 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var;
|
|
nengel@2
|
1068 atomic_set(rl_cnt, 0);
|
|
nengel@2
|
1069 mutex_init(rl_lock);
|
|
nengel@2
|
1070 cond_init(rl_cond);
|
|
nengel@2
|
1071 // printf("mbdec, pid %d\n", syscall(SYS_gettid));
|
|
nengel@2
|
1072 pthread_create(&h->rl_dist_thr, NULL, rl_dist_thread, h);
|
|
nengel@2
|
1073
|
|
nengel@2
|
1074 for(;;){
|
|
nengel@2
|
1075 MBSlice *s=NULL;
|
|
nengel@2
|
1076 {
|
|
nengel@2
|
1077 while (atomic_read(rl_cnt)<=0){
|
|
nengel@2
|
1078 usleep(1000);
|
|
nengel@2
|
1079 }
|
|
nengel@2
|
1080 s= &h->mbrel_q[h->mbrel_fo];
|
|
nengel@2
|
1081 h->mbrel_fo++; h->mbrel_fo %= MAX_SLICE_COUNT;
|
|
nengel@2
|
1082 }
|
|
nengel@2
|
1083
|
|
nengel@2
|
1084 if (s->state<0)
|
|
nengel@2
|
1085 break;
|
|
nengel@2
|
1086
|
|
nengel@2
|
1087 for (int i=0; i<s->release_cnt; i++){
|
|
nengel@2
|
1088 for(int j=0; j<DPB_SIZE; j++){
|
|
nengel@2
|
1089 if(h->dpb[j].cpn== s->release_ref_cpn[i]){
|
|
nengel@2
|
1090 release_dpb_entry(h, &h->dpb[j], 2);
|
|
nengel@2
|
1091 break;
|
|
nengel@2
|
1092 }
|
|
nengel@2
|
1093 }
|
|
nengel@2
|
1094 }
|
|
nengel@2
|
1095
|
|
nengel@2
|
1096 {
|
|
nengel@2
|
1097 EDThreadContext *ed = s->ed;
|
|
nengel@2
|
1098 pthread_mutex_lock(&ed->mbs_lock);
|
|
nengel@2
|
1099 ed->mbs_cnt++;
|
|
nengel@2
|
1100 pthread_cond_signal(&ed->mbs_cond);
|
|
nengel@2
|
1101 pthread_mutex_unlock(&ed->mbs_lock);
|
|
nengel@2
|
1102 }
|
|
nengel@2
|
1103
|
|
nengel@2
|
1104 {
|
|
nengel@2
|
1105 pthread_mutex_lock(&h->lock[WRITE]);
|
|
nengel@2
|
1106 while (h->write_cnt>= DPB_SIZE)
|
|
nengel@2
|
1107 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]);
|
|
nengel@2
|
1108 assert(s);
|
|
nengel@2
|
1109 assert(s->current_picture);
|
|
nengel@2
|
1110 h->write_q[h->write_fi]= s->current_picture;
|
|
nengel@2
|
1111 h->write_cnt++;
|
|
nengel@2
|
1112 h->write_fi++; h->write_fi %= DPB_SIZE;
|
|
nengel@2
|
1113 pthread_cond_signal(&h->cond[WRITE]);
|
|
nengel@2
|
1114 pthread_mutex_unlock(&h->lock[WRITE]);
|
|
nengel@2
|
1115
|
|
nengel@2
|
1116 }
|
|
nengel@2
|
1117 {
|
|
nengel@2
|
1118 atomic_dec(rl_cnt);
|
|
nengel@2
|
1119 }
|
|
nengel@2
|
1120
|
|
nengel@2
|
1121 }
|
|
nengel@2
|
1122
|
|
nengel@2
|
1123 {//propagate exit
|
|
nengel@2
|
1124 pthread_mutex_lock(&h->lock[WRITE]);
|
|
nengel@2
|
1125 while (h->write_cnt>= DPB_SIZE)
|
|
nengel@2
|
1126 pthread_cond_wait(&h->cond[WRITE], &h->lock[WRITE]);
|
|
nengel@2
|
1127 last_pic.reference = -1;
|
|
nengel@2
|
1128 h->write_q[h->write_fi] = &last_pic;
|
|
nengel@2
|
1129 h->write_cnt++;
|
|
nengel@2
|
1130 h->write_fi++; h->write_fi %= DPB_SIZE;
|
|
nengel@2
|
1131 pthread_cond_signal(&h->cond[WRITE]);
|
|
nengel@2
|
1132 pthread_mutex_unlock(&h->lock[WRITE]);
|
|
nengel@2
|
1133
|
|
nengel@2
|
1134 }
|
|
nengel@2
|
1135 pthread_join(h->rl_dist_thr, NULL);
|
|
nengel@2
|
1136 pthread_exit(NULL);
|
|
nengel@2
|
1137 return NULL;
|
|
nengel@2
|
1138 }
|
|
nengel@2
|
1139
|
|
nengel@2
|
1140 /*
|
|
nengel@2
|
1141 * The following code is the main loop of the file converter
|
|
nengel@2
|
1142 */
|
|
nengel@2
|
1143 int h264_decode_cell(H264Context *h) {
|
|
nengel@2
|
1144
|
|
nengel@2
|
1145 pthread_t read_thr, parsenal_thr, entropy_thr, mbdec_thr, write_thr;
|
|
nengel@2
|
1146
|
|
nengel@2
|
1147 start_timer();
|
|
nengel@2
|
1148
|
|
nengel@2
|
1149 pthread_create(&read_thr, NULL, read_thread, h);
|
|
nengel@2
|
1150 pthread_create(&parsenal_thr, NULL, parsenal_thread, h);
|
|
nengel@2
|
1151 pthread_create(&entropy_thr, NULL, entropy_IPB_cell_thread, h);
|
|
nengel@2
|
1152 pthread_create(&mbdec_thr, NULL, mbdec_cell_thread, h);
|
|
nengel@2
|
1153 pthread_create(&write_thr, NULL, write_thread, h);
|
|
nengel@2
|
1154
|
|
nengel@2
|
1155 pthread_join(read_thr, NULL);
|
|
nengel@2
|
1156 pthread_join(parsenal_thr, NULL);
|
|
nengel@2
|
1157 pthread_join(entropy_thr, NULL);
|
|
nengel@2
|
1158 pthread_join(mbdec_thr, NULL);
|
|
nengel@2
|
1159 pthread_join(write_thr, NULL);
|
|
nengel@2
|
1160
|
|
nengel@2
|
1161 return 0;
|
|
nengel@2
|
1162 }
|
|
nengel@2
|
1163
|
|
nengel@2
|
1164 /*
|
|
nengel@2
|
1165 * The following code is the main loop of the file converter
|
|
nengel@2
|
1166 */
|
|
nengel@2
|
1167 int h264_decode_cell_seq(H264Context *h) {
|
|
nengel@2
|
1168 ParserContext *pc;
|
|
nengel@2
|
1169 NalContext *nc;
|
|
nengel@2
|
1170 EntropyContext *ec;
|
|
nengel@2
|
1171 MBRecContext *rc;
|
|
nengel@2
|
1172 OutputContext *oc;
|
|
nengel@2
|
1173
|
|
nengel@2
|
1174 RawFrame frm;
|
|
nengel@2
|
1175 EDSlice slice, *s=&slice;
|
|
nengel@2
|
1176 MBSlice mbslice, *s2=&mbslice;
|
|
nengel@2
|
1177 PictureInfo *pic=NULL;
|
|
nengel@2
|
1178 DecodedPicture *out;
|
|
nengel@2
|
1179 int size;
|
|
nengel@2
|
1180 int frames=0;
|
|
nengel@2
|
1181
|
|
nengel@2
|
1182 pc = get_parse_context(h->ifile);
|
|
nengel@2
|
1183 nc = get_nal_context(h->width, h->height);
|
|
nengel@2
|
1184 ec = get_entropy_context( h );
|
|
nengel@2
|
1185 rc = get_mbrec_context(h);
|
|
nengel@2
|
1186 oc = get_output_context( h );
|
|
nengel@2
|
1187
|
|
nengel@2
|
1188 rl_lock = (mutex_ea_t) (unsigned) &rl_mutex_var;
|
|
nengel@2
|
1189 rl_cond = (cond_ea_t) (unsigned) &rl_cond_var;
|
|
nengel@2
|
1190 rl_cnt = (atomic_ea_t) (unsigned) &rl_cnt_var;
|
|
nengel@2
|
1191 atomic_set(rl_cnt, 0);
|
|
nengel@2
|
1192 mutex_init(rl_lock);
|
|
nengel@2
|
1193 cond_init(rl_cond);
|
|
nengel@2
|
1194
|
|
nengel@2
|
1195 memset(s, 0, sizeof(EDSlice));
|
|
nengel@2
|
1196 ff_init_slice(nc, s);
|
|
nengel@2
|
1197 s->mbs = av_malloc( h->mb_height * h->mb_width * sizeof(H264Mb));
|
|
nengel@2
|
1198
|
|
nengel@2
|
1199 DecodedPicture tmp;
|
|
nengel@2
|
1200 tmp.base[0]=0;
|
|
nengel@2
|
1201 ///fix this when want to debug the Cell errors
|
|
nengel@2
|
1202 //init_dpb_entry(&tmp, h->width, h->height);
|
|
nengel@2
|
1203
|
|
nengel@2
|
1204 create_spe_ED_threads(h, 1, 0);
|
|
nengel@2
|
1205 create_spe_MBR_threads(h, 1);
|
|
nengel@2
|
1206
|
|
nengel@2
|
1207 start_timer();
|
|
nengel@2
|
1208
|
|
nengel@2
|
1209 while(!pc->final_frame && frames++ < h->num_frames){
|
|
nengel@2
|
1210
|
|
nengel@2
|
1211 av_read_frame_internal(pc, &frm);
|
|
nengel@2
|
1212
|
|
nengel@2
|
1213 PictureInfo *pic=get_pib_entry(h);
|
|
nengel@2
|
1214 ff_alloc_picture_info(nc, s, pic);
|
|
nengel@2
|
1215 decode_nal_units(nc, s, &frm);
|
|
nengel@2
|
1216
|
|
nengel@2
|
1217 copyEDtoMBSlice(s2, s);
|
|
nengel@2
|
1218 decode_slice_entropy_cell_seq(h, ec, s);
|
|
nengel@2
|
1219
|
|
nengel@2
|
1220 decode_slice_mb_seq_cell(h, rc, s2, &tmp);
|
|
nengel@2
|
1221
|
|
nengel@2
|
1222 out =output_frame(h, oc, s2->current_picture, h->ofile, h->frame_width, h->frame_height);
|
|
nengel@2
|
1223
|
|
nengel@2
|
1224 if (out){
|
|
nengel@2
|
1225 release_dpb_entry(h, out, 1);
|
|
nengel@2
|
1226 }
|
|
nengel@2
|
1227 print_report(oc->frame_number, oc->video_size, 0, h->verbose);
|
|
nengel@2
|
1228 }
|
|
nengel@2
|
1229 while ((out=output_frame(h, oc, NULL, h->ofile, h->frame_width, h->frame_height))) ;
|
|
nengel@2
|
1230
|
|
nengel@2
|
1231 print_report(oc->frame_number, oc->video_size, 1, h->verbose);
|
|
nengel@2
|
1232
|
|
nengel@2
|
1233 /* finished ! */
|
|
nengel@2
|
1234 av_freep(&s->mbs);
|
|
nengel@2
|
1235
|
|
nengel@2
|
1236 free_parse_context(pc);
|
|
nengel@2
|
1237 free_nal_context (nc);
|
|
nengel@2
|
1238 free_entropy_context(ec);
|
|
nengel@2
|
1239 free_mbrec_context(rc);
|
|
nengel@2
|
1240 free_output_context(oc);
|
|
nengel@2
|
1241 return 0;
|
|
nengel@2
|
1242 }
|