PR/Applications/VSs/VSs__H264__App: 0b056460c67d libavcodec/x86/dsputil

view libavcodec/x86/dsputil_mmx.c @ 3:0b056460c67d

changed code to use VSs

author	Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date	Mon, 29 Oct 2012 16:44:27 +0100
parents
children

line source

1 /*

2 * MMX optimized DSP utils

5 *

6 * This file is part of FFmpeg.

7 *

8 * FFmpeg is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Lesser General Public

10 * License as published by the Free Software Foundation; either

11 * version 2.1 of the License, or (at your option) any later version.

12 *

13 * FFmpeg is distributed in the hope that it will be useful,

14 * but WITHOUT ANY WARRANTY; without even the implied warranty of

15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

16 * Lesser General Public License for more details.

17 *

18 * You should have received a copy of the GNU Lesser General Public

19 * License along with FFmpeg; if not, write to the Free Software

20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

21 *

22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>

23 */

25 #include "libavutil/x86_cpu.h"

26 #include "libavutil/internal.h"

27 #include "libavcodec/dsputil.h"

28 #include "libavcodec/h264_dsp.h"

29 #include "dsputil_mmx.h"

32 //#undef NDEBUG

33 //#include <assert.h>

35 int mm_flags; /* multimedia extension flags */

37 /* pixel operations */

38 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;

39 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;

41 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =

42 {0x8000000080000000ULL, 0x8000000080000000ULL};

44 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;

45 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;

46 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};

47 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};

48 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;

49 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};

50 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;

51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};

52 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};

53 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;

54 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};

55 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;

56 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;

57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;

59 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;

60 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;

61 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;

62 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;

63 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;

64 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;

65 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;

66 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;

68 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };

69 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };

71 #define ASMALIGN(ZEROBITS) ".align 1 << " #ZEROBITS "\n\t"

72 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)

73 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)

75 #define MOVQ_BFE(regd) \

76 __asm__ volatile ( \

77 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\

78 "paddb %%" #regd ", %%" #regd " \n\t" ::)

80 #ifndef PIC

81 #define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))

82 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))

83 #else

84 // for shared library it's better to use this way for accessing constants

85 // pcmpeqd -> -1

86 #define MOVQ_BONE(regd) \

87 __asm__ volatile ( \

88 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \

89 "psrlw $15, %%" #regd " \n\t" \

90 "packuswb %%" #regd ", %%" #regd " \n\t" ::)

92 #define MOVQ_WTWO(regd) \

93 __asm__ volatile ( \

94 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \

95 "psrlw $15, %%" #regd " \n\t" \

96 "psllw $1, %%" #regd " \n\t"::)

98 #endif

100 // using regr as temporary and for the output result

101 // first argument is unmodifed and second is trashed

102 // regfe is supposed to contain 0xfefefefefefefefe

103 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \

104 "movq " #rega ", " #regr " \n\t"\

105 "pand " #regb ", " #regr " \n\t"\

106 "pxor " #rega ", " #regb " \n\t"\

107 "pand " #regfe "," #regb " \n\t"\

108 "psrlq $1, " #regb " \n\t"\

109 "paddb " #regb ", " #regr " \n\t"

110

111 #define PAVGB_MMX(rega, regb, regr, regfe) \

112 "movq " #rega ", " #regr " \n\t"\

113 "por " #regb ", " #regr " \n\t"\

114 "pxor " #rega ", " #regb " \n\t"\

115 "pand " #regfe "," #regb " \n\t"\

116 "psrlq $1, " #regb " \n\t"\

117 "psubb " #regb ", " #regr " \n\t"

118

119 // mm6 is supposed to contain 0xfefefefefefefefe

120 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \

121 "movq " #rega ", " #regr " \n\t"\

122 "movq " #regc ", " #regp " \n\t"\

123 "pand " #regb ", " #regr " \n\t"\

124 "pand " #regd ", " #regp " \n\t"\

125 "pxor " #rega ", " #regb " \n\t"\

126 "pxor " #regc ", " #regd " \n\t"\

127 "pand %%mm6, " #regb " \n\t"\

128 "pand %%mm6, " #regd " \n\t"\

129 "psrlq $1, " #regb " \n\t"\

130 "psrlq $1, " #regd " \n\t"\

131 "paddb " #regb ", " #regr " \n\t"\

132 "paddb " #regd ", " #regp " \n\t"

133

134 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \

135 "movq " #rega ", " #regr " \n\t"\

136 "movq " #regc ", " #regp " \n\t"\

137 "por " #regb ", " #regr " \n\t"\

138 "por " #regd ", " #regp " \n\t"\

139 "pxor " #rega ", " #regb " \n\t"\

140 "pxor " #regc ", " #regd " \n\t"\

141 "pand %%mm6, " #regb " \n\t"\

142 "pand %%mm6, " #regd " \n\t"\

143 "psrlq $1, " #regd " \n\t"\

144 "psrlq $1, " #regb " \n\t"\

145 "psubb " #regb ", " #regr " \n\t"\

146 "psubb " #regd ", " #regp " \n\t"

147

148 /***********************************/

149 /* MMX2 specific */

150

151 #define DEF(x) x ## _mmx2

152

153 /* Introduced only in MMX2 set */

154 #define PAVGB "pavgb"

155 #define OP_AVG PAVGB

156

157 #include "dsputil_mmx_avg_template.c"

158

159 #undef DEF

160 #undef PAVGB

161 #undef OP_AVG

162

163 #define put_no_rnd_pixels16_mmx put_pixels16_mmx

164 #define put_no_rnd_pixels8_mmx put_pixels8_mmx

165 #define put_pixels16_mmx2 put_pixels16_mmx

166 #define put_pixels8_mmx2 put_pixels8_mmx

167 #define put_pixels4_mmx2 put_pixels4_mmx

168 #define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx

169 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx

170 #define put_pixels16_3dnow put_pixels16_mmx

171 #define put_pixels8_3dnow put_pixels8_mmx

172 #define put_pixels4_3dnow put_pixels4_mmx

173 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx

174 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx

175

176 /***********************************/

177 /* standard MMX */

178

179 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)

180 {

181 const DCTELEM *p;

182 uint8_t *pix;

183

184 /* read the pixels */

185 p = block;

186 pix = pixels;

187 /* unrolled loop */

188 __asm__ volatile(

189 "movq %3, %%mm0 \n\t"

190 "movq 8%3, %%mm1 \n\t"

191 "movq 16%3, %%mm2 \n\t"

192 "movq 24%3, %%mm3 \n\t"

193 "movq 32%3, %%mm4 \n\t"

194 "movq 40%3, %%mm5 \n\t"

195 "movq 48%3, %%mm6 \n\t"

196 "movq 56%3, %%mm7 \n\t"

197 "packuswb %%mm1, %%mm0 \n\t"

198 "packuswb %%mm3, %%mm2 \n\t"

199 "packuswb %%mm5, %%mm4 \n\t"

200 "packuswb %%mm7, %%mm6 \n\t"

201 "movq %%mm0, (%0) \n\t"

202 "movq %%mm2, (%0, %1) \n\t"

203 "movq %%mm4, (%0, %1, 2) \n\t"

204 "movq %%mm6, (%0, %2) \n\t"

205 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p)

206 :"memory");

207 pix += line_size*4;

208 p += 32;

209

210 // if here would be an exact copy of the code above

211 // compiler would generate some very strange code

212 // thus using "r"

213 __asm__ volatile(

214 "movq (%3), %%mm0 \n\t"

215 "movq 8(%3), %%mm1 \n\t"

216 "movq 16(%3), %%mm2 \n\t"

217 "movq 24(%3), %%mm3 \n\t"

218 "movq 32(%3), %%mm4 \n\t"

219 "movq 40(%3), %%mm5 \n\t"

220 "movq 48(%3), %%mm6 \n\t"

221 "movq 56(%3), %%mm7 \n\t"

222 "packuswb %%mm1, %%mm0 \n\t"

223 "packuswb %%mm3, %%mm2 \n\t"

224 "packuswb %%mm5, %%mm4 \n\t"

225 "packuswb %%mm7, %%mm6 \n\t"

226 "movq %%mm0, (%0) \n\t"

227 "movq %%mm2, (%0, %1) \n\t"

228 "movq %%mm4, (%0, %1, 2) \n\t"

229 "movq %%mm6, (%0, %2) \n\t"

230 ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p)

231 :"memory");

232 }

233

234 DECLARE_ASM_CONST(8, uint8_t, ff_vector128)[8] =

235 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };

236

237 #define put_signed_pixels_clamped_mmx_half(off) \

238 "movq "#off"(%2), %%mm1 \n\t"\

239 "movq 16+"#off"(%2), %%mm2 \n\t"\

240 "movq 32+"#off"(%2), %%mm3 \n\t"\

241 "movq 48+"#off"(%2), %%mm4 \n\t"\

242 "packsswb 8+"#off"(%2), %%mm1 \n\t"\

243 "packsswb 24+"#off"(%2), %%mm2 \n\t"\

244 "packsswb 40+"#off"(%2), %%mm3 \n\t"\

245 "packsswb 56+"#off"(%2), %%mm4 \n\t"\

246 "paddb %%mm0, %%mm1 \n\t"\

247 "paddb %%mm0, %%mm2 \n\t"\

248 "paddb %%mm0, %%mm3 \n\t"\

249 "paddb %%mm0, %%mm4 \n\t"\

250 "movq %%mm1, (%0) \n\t"\

251 "movq %%mm2, (%0, %3) \n\t"\

252 "movq %%mm3, (%0, %3, 2) \n\t"\

253 "movq %%mm4, (%0, %1) \n\t"

254

255 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)

256 {

257 x86_reg line_skip = line_size;

258 x86_reg line_skip3;

259

260 __asm__ volatile (

261 "movq "MANGLE(ff_vector128)", %%mm0 \n\t"

262 "lea (%3, %3, 2), %1 \n\t"

263 put_signed_pixels_clamped_mmx_half(0)

264 "lea (%0, %3, 4), %0 \n\t"

265 put_signed_pixels_clamped_mmx_half(64)

266 :"+&r" (pixels), "=&r" (line_skip3)

267 :"r" (block), "r"(line_skip)

268 :"memory");

269 }

270

271 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)

272 {

273 const DCTELEM *p;

274 uint8_t *pix;

275 int i;

276

277 /* read the pixels */

278 p = block;

279 pix = pixels;

280 MOVQ_ZERO(mm7);

281 i = 4;

282 do {

283 __asm__ volatile(

284 "movq (%2), %%mm0 \n\t"

285 "movq 8(%2), %%mm1 \n\t"

286 "movq 16(%2), %%mm2 \n\t"

287 "movq 24(%2), %%mm3 \n\t"

288 "movq %0, %%mm4 \n\t"

289 "movq %1, %%mm6 \n\t"

290 "movq %%mm4, %%mm5 \n\t"

291 "punpcklbw %%mm7, %%mm4 \n\t"

292 "punpckhbw %%mm7, %%mm5 \n\t"

293 "paddsw %%mm4, %%mm0 \n\t"

294 "paddsw %%mm5, %%mm1 \n\t"

295 "movq %%mm6, %%mm5 \n\t"

296 "punpcklbw %%mm7, %%mm6 \n\t"

297 "punpckhbw %%mm7, %%mm5 \n\t"

298 "paddsw %%mm6, %%mm2 \n\t"

299 "paddsw %%mm5, %%mm3 \n\t"

300 "packuswb %%mm1, %%mm0 \n\t"

301 "packuswb %%mm3, %%mm2 \n\t"

302 "movq %%mm0, %0 \n\t"

303 "movq %%mm2, %1 \n\t"

304 :"+m"(*pix), "+m"(*(pix+line_size))

305 :"r"(p)

306 :"memory");

307 pix += line_size*2;

308 p += 16;

309 } while (--i);

310 }

311

312 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)

313 {

314 __asm__ volatile(

315 "lea (%3, %3), %%"REG_a" \n\t"

316 ASMALIGN(3)

317 "1: \n\t"

318 "movq (%1), %%mm0 \n\t"

319 "movq (%1, %3), %%mm1 \n\t"

320 "movq %%mm0, (%2) \n\t"

321 "movq %%mm1, (%2, %3) \n\t"

322 "add %%"REG_a", %1 \n\t"

323 "add %%"REG_a", %2 \n\t"

324 "movq (%1), %%mm0 \n\t"

325 "movq (%1, %3), %%mm1 \n\t"

326 "movq %%mm0, (%2) \n\t"

327 "movq %%mm1, (%2, %3) \n\t"

328 "add %%"REG_a", %1 \n\t"

329 "add %%"REG_a", %2 \n\t"

330 "subl $4, %0 \n\t"

331 "jnz 1b \n\t"

332 : "+g"(h), "+r" (pixels), "+r" (block)

333 : "r"((x86_reg)line_size)

334 : "%"REG_a, "memory"

335 );

336 }

337

338 static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)

339 {

340 __asm__ volatile(

341 "1: \n\t"

342 "movdqu (%1), %%xmm0 \n\t"

343 "movdqu (%1,%3), %%xmm1 \n\t"

344 "movdqu (%1,%3,2), %%xmm2 \n\t"

345 "movdqu (%1,%4), %%xmm3 \n\t"

346 "movdqa %%xmm0, (%2) \n\t"

347 "movdqa %%xmm1, (%2,%3) \n\t"

348 "movdqa %%xmm2, (%2,%3,2) \n\t"

349 "movdqa %%xmm3, (%2,%4) \n\t"

350 "subl $4, %0 \n\t"

351 "lea (%1,%3,4), %1 \n\t"

352 "lea (%2,%3,4), %2 \n\t"

353 "jnz 1b \n\t"

354 : "+g"(h), "+r" (pixels), "+r" (block)

355 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)

356 : "memory"

357 );

358 }

359

360 static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)

361 {

362 __asm__ volatile(

363 "1: \n\t"

364 "movdqu (%1), %%xmm0 \n\t"

365 "movdqu (%1,%3), %%xmm1 \n\t"

366 "movdqu (%1,%3,2), %%xmm2 \n\t"

367 "movdqu (%1,%4), %%xmm3 \n\t"

368 "pavgb (%2), %%xmm0 \n\t"

369 "pavgb (%2,%3), %%xmm1 \n\t"

370 "pavgb (%2,%3,2), %%xmm2 \n\t"

371 "pavgb (%2,%4), %%xmm3 \n\t"

372 "movdqa %%xmm0, (%2) \n\t"

373 "movdqa %%xmm1, (%2,%3) \n\t"

374 "movdqa %%xmm2, (%2,%3,2) \n\t"

375 "movdqa %%xmm3, (%2,%4) \n\t"

376 "subl $4, %0 \n\t"

377 "lea (%1,%3,4), %1 \n\t"

378 "lea (%2,%3,4), %2 \n\t"

379 "jnz 1b \n\t"

380 : "+g"(h), "+r" (pixels), "+r" (block)

381 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)

382 : "memory"

383 );

384 }

385

386 static void clear_block_sse(DCTELEM *block)

387 {

388 __asm__ volatile(

389 "xorps %%xmm0, %%xmm0 \n"

390 "movaps %%xmm0, (%0) \n"

391 "movaps %%xmm0, 16(%0) \n"

392 "movaps %%xmm0, 32(%0) \n"

393 "movaps %%xmm0, 48(%0) \n"

394 "movaps %%xmm0, 64(%0) \n"

395 "movaps %%xmm0, 80(%0) \n"

396 "movaps %%xmm0, 96(%0) \n"

397 "movaps %%xmm0, 112(%0) \n"

398 :: "r"(block)

399 : "memory"

400 );

401 }

402

403 static void clear_blocks_sse(DCTELEM *blocks)

404 {\

405 __asm__ volatile(

406 "xorps %%xmm0, %%xmm0 \n"

407 "mov %1, %%"REG_a" \n"

408 "1: \n"

409 "movaps %%xmm0, (%0, %%"REG_a") \n"

410 "movaps %%xmm0, 16(%0, %%"REG_a") \n"

411 "movaps %%xmm0, 32(%0, %%"REG_a") \n"

412 "movaps %%xmm0, 48(%0, %%"REG_a") \n"

413 "movaps %%xmm0, 64(%0, %%"REG_a") \n"

414 "movaps %%xmm0, 80(%0, %%"REG_a") \n"

415 "movaps %%xmm0, 96(%0, %%"REG_a") \n"

416 "movaps %%xmm0, 112(%0, %%"REG_a") \n"

417 "add $128, %%"REG_a" \n"

418 " js 1b \n"

419 : : "r" (((uint8_t *)blocks)+128*6),

420 "i" (-128*6)

421 : "%"REG_a

422 );

423 }

424

425 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){

426 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...

427 "movd %4, %%mm0 \n\t"

428 "movd %5, %%mm1 \n\t"

429 "movd %6, %%mm2 \n\t"

430 "movd %7, %%mm3 \n\t"

431 "punpcklbw %%mm1, %%mm0 \n\t"

432 "punpcklbw %%mm3, %%mm2 \n\t"

433 "movq %%mm0, %%mm1 \n\t"

434 "punpcklwd %%mm2, %%mm0 \n\t"

435 "punpckhwd %%mm2, %%mm1 \n\t"

436 "movd %%mm0, %0 \n\t"

437 "punpckhdq %%mm0, %%mm0 \n\t"

438 "movd %%mm0, %1 \n\t"

439 "movd %%mm1, %2 \n\t"

440 "punpckhdq %%mm1, %%mm1 \n\t"

441 "movd %%mm1, %3 \n\t"

442

443 : "=m" (*(uint32_t*)(dst + 0*dst_stride)),

444 "=m" (*(uint32_t*)(dst + 1*dst_stride)),

445 "=m" (*(uint32_t*)(dst + 2*dst_stride)),

446 "=m" (*(uint32_t*)(dst + 3*dst_stride))

447 : "m" (*(uint32_t*)(src + 0*src_stride)),

448 "m" (*(uint32_t*)(src + 1*src_stride)),

449 "m" (*(uint32_t*)(src + 2*src_stride)),

450 "m" (*(uint32_t*)(src + 3*src_stride))

451 );

452 }

453

454 #define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\

455 \

456 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\

457 OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);\

458 }\

459 \

460 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

461 uint64_t temp[8];\

462 uint8_t * const half= (uint8_t*)temp;\

463 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\

464 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\

465 }\

466 \

467 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

468 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\

469 }\

470 \

471 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

472 uint64_t temp[8];\

473 uint8_t * const half= (uint8_t*)temp;\

474 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\

475 OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\

476 }\

477 \

478 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

479 uint64_t temp[8];\

480 uint8_t * const half= (uint8_t*)temp;\

481 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\

482 OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\

483 }\

484 \

485 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

486 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\

487 }\

488 \

489 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

490 uint64_t temp[8];\

491 uint8_t * const half= (uint8_t*)temp;\

492 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\

493 OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\

494 }\

495 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

496 uint64_t half[8 + 9];\

497 uint8_t * const halfH= ((uint8_t*)half) + 64;\

498 uint8_t * const halfHV= ((uint8_t*)half);\

499 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

500 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\

501 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\

502 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\

503 }\

504 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

505 uint64_t half[8 + 9];\

506 uint8_t * const halfH= ((uint8_t*)half) + 64;\

507 uint8_t * const halfHV= ((uint8_t*)half);\

508 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

509 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\

510 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\

511 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\

512 }\

513 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

514 uint64_t half[8 + 9];\

515 uint8_t * const halfH= ((uint8_t*)half) + 64;\

516 uint8_t * const halfHV= ((uint8_t*)half);\

517 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

518 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\

519 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\

520 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\

521 }\

522 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

523 uint64_t half[8 + 9];\

524 uint8_t * const halfH= ((uint8_t*)half) + 64;\

525 uint8_t * const halfHV= ((uint8_t*)half);\

526 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

527 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\

528 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\

529 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\

530 }\

531 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

532 uint64_t half[8 + 9];\

533 uint8_t * const halfH= ((uint8_t*)half) + 64;\

534 uint8_t * const halfHV= ((uint8_t*)half);\

535 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

536 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\

537 OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\

538 }\

539 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

540 uint64_t half[8 + 9];\

541 uint8_t * const halfH= ((uint8_t*)half) + 64;\

542 uint8_t * const halfHV= ((uint8_t*)half);\

543 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

544 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\

545 OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\

546 }\

547 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

548 uint64_t half[8 + 9];\

549 uint8_t * const halfH= ((uint8_t*)half);\

550 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

551 put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\

552 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\

553 }\

554 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

555 uint64_t half[8 + 9];\

556 uint8_t * const halfH= ((uint8_t*)half);\

557 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

558 put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\

559 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\

560 }\

561 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

562 uint64_t half[9];\

563 uint8_t * const halfH= ((uint8_t*)half);\

564 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\

565 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\

566 }\

567 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\

568 OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);\

569 }\

570 \

571 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

572 uint64_t temp[32];\

573 uint8_t * const half= (uint8_t*)temp;\

574 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\

575 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\

576 }\

577 \

578 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

579 OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\

580 }\

581 \

582 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

583 uint64_t temp[32];\

584 uint8_t * const half= (uint8_t*)temp;\

585 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\

586 OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\

587 }\

588 \

589 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

590 uint64_t temp[32];\

591 uint8_t * const half= (uint8_t*)temp;\

592 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\

593 OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\

594 }\

595 \

596 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

597 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\

598 }\

599 \

600 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

601 uint64_t temp[32];\

602 uint8_t * const half= (uint8_t*)temp;\

603 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\

604 OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\

605 }\

606 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

607 uint64_t half[16*2 + 17*2];\

608 uint8_t * const halfH= ((uint8_t*)half) + 256;\

609 uint8_t * const halfHV= ((uint8_t*)half);\

610 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

611 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\

612 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\

613 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\

614 }\

615 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

616 uint64_t half[16*2 + 17*2];\

617 uint8_t * const halfH= ((uint8_t*)half) + 256;\

618 uint8_t * const halfHV= ((uint8_t*)half);\

619 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

620 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\

621 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\

622 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\

623 }\

624 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

625 uint64_t half[16*2 + 17*2];\

626 uint8_t * const halfH= ((uint8_t*)half) + 256;\

627 uint8_t * const halfHV= ((uint8_t*)half);\

628 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

629 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\

630 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\

631 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\

632 }\

633 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

634 uint64_t half[16*2 + 17*2];\

635 uint8_t * const halfH= ((uint8_t*)half) + 256;\

636 uint8_t * const halfHV= ((uint8_t*)half);\

637 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

638 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\

639 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\

640 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\

641 }\

642 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

643 uint64_t half[16*2 + 17*2];\

644 uint8_t * const halfH= ((uint8_t*)half) + 256;\

645 uint8_t * const halfHV= ((uint8_t*)half);\

646 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

647 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\

648 OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\

649 }\

650 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

651 uint64_t half[16*2 + 17*2];\

652 uint8_t * const halfH= ((uint8_t*)half) + 256;\

653 uint8_t * const halfHV= ((uint8_t*)half);\

654 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

655 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\

656 OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\

657 }\

658 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

659 uint64_t half[17*2];\

660 uint8_t * const halfH= ((uint8_t*)half);\

661 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

662 put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\

663 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\

664 }\

665 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

666 uint64_t half[17*2];\

667 uint8_t * const halfH= ((uint8_t*)half);\

668 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

669 put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\

670 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\

671 }\

672 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\

673 uint64_t half[17*2];\

674 uint8_t * const halfH= ((uint8_t*)half);\

675 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\

676 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\

677 }

678

679 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"

680 #define AVG_3DNOW_OP(a,b,temp, size) \

681 "mov" #size " " #b ", " #temp " \n\t"\

682 "pavgusb " #temp ", " #a " \n\t"\

683 "mov" #size " " #a ", " #b " \n\t"

684 #define AVG_MMX2_OP(a,b,temp, size) \

685 "mov" #size " " #b ", " #temp " \n\t"\

686 "pavgb " #temp ", " #a " \n\t"\

687 "mov" #size " " #a ", " #b " \n\t"

688

689 #define PREFETCH(name, op) \

690 static void name(void *mem, int stride, int h){\

691 const uint8_t *p= mem;\

692 do{\

693 __asm__ volatile(#op" %0" :: "m"(*p));\

694 p+= stride;\

695 }while(--h);\

696 }

697 PREFETCH(prefetch_mmx2, prefetcht0)

698 #undef PREFETCH

699

700 #include "h264dsp_mmx.c"

701

702 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);

703 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);

704 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);

705 void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);

706 void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);

707

708 void dsputil_init_mmx(DSPContext* c)

709 {

710 mm_flags = mm_support();

711

712 if (mm_flags & FF_MM_MMX) {

713 c->clear_block = clear_block_sse;

714 c->clear_blocks = clear_blocks_sse;

715 c->prefetch = prefetch_mmx2;

716

717

718 #define H264_QPEL_FUNCS(x, y, CPU)\

719 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\

720 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\

721 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\

722 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;

723

724 if((mm_flags & FF_MM_SSE2)){

725 c->put_pixels_tab[0][0] = put_pixels16_sse2;

726 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;

727

728 }

729 if(mm_flags & FF_MM_SSE2){

730 H264_QPEL_FUNCS(0, 1, sse2);

731 H264_QPEL_FUNCS(0, 2, sse2);

732 H264_QPEL_FUNCS(0, 3, sse2);

733 H264_QPEL_FUNCS(1, 1, sse2);

734 H264_QPEL_FUNCS(1, 2, sse2);

735 H264_QPEL_FUNCS(1, 3, sse2);

736 H264_QPEL_FUNCS(2, 1, sse2);

737 H264_QPEL_FUNCS(2, 2, sse2);

738 H264_QPEL_FUNCS(2, 3, sse2);

739 H264_QPEL_FUNCS(3, 1, sse2);

740 H264_QPEL_FUNCS(3, 2, sse2);

741 H264_QPEL_FUNCS(3, 3, sse2);

742 }

743 #if HAVE_SSSE3

744 if(mm_flags & FF_MM_SSSE3){

745 H264_QPEL_FUNCS(1, 0, ssse3);

746 H264_QPEL_FUNCS(1, 1, ssse3);

747 H264_QPEL_FUNCS(1, 2, ssse3);

748 H264_QPEL_FUNCS(1, 3, ssse3);

749 H264_QPEL_FUNCS(2, 0, ssse3);

750 H264_QPEL_FUNCS(2, 1, ssse3);

751 H264_QPEL_FUNCS(2, 2, ssse3);

752 H264_QPEL_FUNCS(2, 3, ssse3);

753 H264_QPEL_FUNCS(3, 0, ssse3);

754 H264_QPEL_FUNCS(3, 1, ssse3);

755 H264_QPEL_FUNCS(3, 2, ssse3);

756 H264_QPEL_FUNCS(3, 3, ssse3);

757

758 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;

759 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;

760 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;

761 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;

762 }

763 #endif

764

765

766 }

767 }

768

769 void ff_h264dsp_init_x86(H264DSPContext *c)

770 {

771 mm_flags = mm_support();

772

773 if (mm_flags & FF_MM_MMX) {

774 c->h264_idct_dc_add=

775 c->h264_idct_add= ff_h264_idct_add_mmx;

776 c->h264_idct8_dc_add=

777 c->h264_idct8_add= ff_h264_idct8_add_mmx;

778

779 if (mm_flags & FF_MM_MMX2) {

780 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;

781 c->h264_idct_add8 = ff_h264_idct_add8_mmx2;

782 c->h264_idct_add16 = ff_h264_idct_add16_mmx2;

783 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;

784

785 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;

786 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;

787

788 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;

789 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;

790 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;

791 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;

792 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;

793 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;

794 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;

795

796 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;

797 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;

798 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;

799 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;

800 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;

801 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;

802 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;

803 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;

804

805 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;

806 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;

807 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;

808 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;

809 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;

810 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;

811 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;

812 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;

813 }

814 if(mm_flags & FF_MM_SSE2){

815 c->h264_idct8_add = ff_h264_idct8_add_sse2;

816 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;

817 }

818

819 }

820 }

821

Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App

view libavcodec/x86/dsputil_mmx.c @ 3:0b056460c67d

Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSsH264App