diff libavcodec/dsputil.h @ 2:897f711a7157

rearrange to work with autoconf
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 25 Sep 2012 15:55:33 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/libavcodec/dsputil.h	Tue Sep 25 15:55:33 2012 +0200
     1.3 @@ -0,0 +1,465 @@
     1.4 +/*
     1.5 + * DSP utils
     1.6 + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
     1.7 + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
     1.8 + *
     1.9 + * This file is part of FFmpeg.
    1.10 + *
    1.11 + * FFmpeg is free software; you can redistribute it and/or
    1.12 + * modify it under the terms of the GNU Lesser General Public
    1.13 + * License as published by the Free Software Foundation; either
    1.14 + * version 2.1 of the License, or (at your option) any later version.
    1.15 + *
    1.16 + * FFmpeg is distributed in the hope that it will be useful,
    1.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.19 + * Lesser General Public License for more details.
    1.20 + *
    1.21 + * You should have received a copy of the GNU Lesser General Public
    1.22 + * License along with FFmpeg; if not, write to the Free Software
    1.23 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    1.24 + */
    1.25 +
    1.26 +/**
    1.27 + * @file
    1.28 + * DSP utils.
    1.29 + * note, many functions in here may use MMX which trashes the FPU state, it is
    1.30 + * absolutely necessary to call emms_c() between dsp & float/double code
    1.31 + */
    1.32 +
    1.33 +#ifndef AVCODEC_DSPUTIL_H
    1.34 +#define AVCODEC_DSPUTIL_H
    1.35 +
    1.36 +#include "libavutil/intreadwrite.h"
    1.37 +#include "avcodec.h"
    1.38 +#include "h264_idct.h"
    1.39 +// 
    1.40 +void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
    1.41 +                             const float *win, float add_bias, int len);
    1.42 +void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
    1.43 +void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
    1.44 +
    1.45 +/* encoding scans */
    1.46 +extern const uint8_t ff_alternate_horizontal_scan[64];
    1.47 +extern const uint8_t ff_alternate_vertical_scan[64];
    1.48 +extern const uint8_t ff_zigzag_direct[64];
    1.49 +extern const uint8_t ff_zigzag248_direct[64];
    1.50 +
    1.51 +/* pixel operations */
    1.52 +#define MAX_NEG_CROP 1024
    1.53 +
    1.54 +/* temporary */
    1.55 +extern uint32_t ff_squareTbl[512];
    1.56 +extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
    1.57 +
    1.58 +/* VP3 DSP functions */
    1.59 +void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
    1.60 +void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
    1.61 +void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
    1.62 +void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
    1.63 +
    1.64 +void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
    1.65 +void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
    1.66 +
    1.67 +/* VP6 DSP functions */
    1.68 +void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
    1.69 +                           const int16_t *h_weights, const int16_t *v_weights);
    1.70 +
    1.71 +/* Bink functions */
    1.72 +void ff_bink_idct_c    (DCTELEM *block);
    1.73 +void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block);
    1.74 +void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
    1.75 +
    1.76 +/* CAVS functions */
    1.77 +void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
    1.78 +void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
    1.79 +void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
    1.80 +void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
    1.81 +
    1.82 +/* VC1 functions */
    1.83 +void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
    1.84 +void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
    1.85 +
    1.86 +/* EA functions */
    1.87 +void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
    1.88 +
    1.89 +/* 1/2^n downscaling functions from imgconvert.c */
    1.90 +void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
    1.91 +void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
    1.92 +void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
    1.93 +void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
    1.94 +
    1.95 +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
    1.96 +              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
    1.97 +
    1.98 +/* minimum alignment rules ;)
    1.99 +If you notice errors in the align stuff, need more alignment for some ASM code
   1.100 +for some CPU or need to use a function with less aligned data then send a mail
   1.101 +to the ffmpeg-devel mailing list, ...
   1.102 +
   1.103 +!warning These alignments might not match reality, (missing attribute((align))
   1.104 +stuff somewhere possible).
   1.105 +I (Michael) did not check them, these are just the alignments which I think
   1.106 +could be reached easily ...
   1.107 +
   1.108 +!future video codecs might need functions with less strict alignment
   1.109 +*/
   1.110 +
   1.111 +/*
   1.112 +void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
   1.113 +void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
   1.114 +void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
   1.115 +void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
   1.116 +void clear_blocks_c(DCTELEM *blocks);
   1.117 +*/
   1.118 +
   1.119 +/* add and put pixel (decoding) */
   1.120 +// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
   1.121 +//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
   1.122 +typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
   1.123 +typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
   1.124 +typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
   1.125 +typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
   1.126 +
   1.127 +typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
   1.128 +
   1.129 +#define DEF_OLD_QPEL(name)\
   1.130 +void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
   1.131 +void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
   1.132 +void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
   1.133 +
   1.134 +DEF_OLD_QPEL(qpel16_mc11_old_c)
   1.135 +DEF_OLD_QPEL(qpel16_mc31_old_c)
   1.136 +DEF_OLD_QPEL(qpel16_mc12_old_c)
   1.137 +DEF_OLD_QPEL(qpel16_mc32_old_c)
   1.138 +DEF_OLD_QPEL(qpel16_mc13_old_c)
   1.139 +DEF_OLD_QPEL(qpel16_mc33_old_c)
   1.140 +DEF_OLD_QPEL(qpel8_mc11_old_c)
   1.141 +DEF_OLD_QPEL(qpel8_mc31_old_c)
   1.142 +DEF_OLD_QPEL(qpel8_mc12_old_c)
   1.143 +DEF_OLD_QPEL(qpel8_mc32_old_c)
   1.144 +DEF_OLD_QPEL(qpel8_mc13_old_c)
   1.145 +DEF_OLD_QPEL(qpel8_mc33_old_c)
   1.146 +
   1.147 +#define CALL_2X_PIXELS(a, b, n)\
   1.148 +static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
   1.149 +    b(block  , pixels  , line_size, h);\
   1.150 +    b(block+n, pixels+n, line_size, h);\
   1.151 +}
   1.152 +
   1.153 +/* motion estimation */
   1.154 +// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
   1.155 +// although currently h<4 is not used as functions with width <8 are neither used nor implemented
   1.156 +typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
   1.157 +
   1.158 +/**
   1.159 + * Scantable.
   1.160 + */
   1.161 +typedef struct ScanTable{
   1.162 +    const uint8_t *scantable;
   1.163 +    uint8_t permutated[64];
   1.164 +    uint8_t raster_end[64];
   1.165 +#if ARCH_PPC
   1.166 +                /** Used by dct_quantize_altivec to find last-non-zero */
   1.167 +    DECLARE_ALIGNED(16, uint8_t, inverse)[64];
   1.168 +#endif
   1.169 +} ScanTable;
   1.170 +
   1.171 +void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
   1.172 +
   1.173 +void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
   1.174 +                         int block_w, int block_h,
   1.175 +                         int src_x, int src_y, int w, int h);
   1.176 +
   1.177 +
   1.178 +/**
   1.179 + * DSPContext.
   1.180 + */
   1.181 +typedef struct DSPContext {
   1.182 +    /* pixel ops : interface with DCT */
   1.183 +    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
   1.184 +    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
   1.185 +    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
   1.186 +    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
   1.187 +    void (*put_pixels_nonclamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
   1.188 +    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
   1.189 +    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
   1.190 +    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
   1.191 +    
   1.192 +    void (*clear_block)(DCTELEM *block/*align 16*/);
   1.193 +    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
   1.194 +
   1.195 +
   1.196 +    /**
   1.197 +     * Halfpel motion compensation with rounding (a+b+1)>>1.
   1.198 +     * this is an array[4][4] of motion compensation functions for 4
   1.199 +     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
   1.200 +     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
   1.201 +     * @param block destination where the result is stored
   1.202 +     * @param pixels source
   1.203 +     * @param line_size number of bytes in a horizontal line of block
   1.204 +     * @param h height
   1.205 +     */
   1.206 +    op_pixels_func put_pixels_tab[4][4];
   1.207 +
   1.208 +    /**
   1.209 +     * Halfpel motion compensation with rounding (a+b+1)>>1.
   1.210 +     * This is an array[4][4] of motion compensation functions for 4
   1.211 +     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
   1.212 +     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
   1.213 +     * @param block destination into which the result is averaged (a+b+1)>>1
   1.214 +     * @param pixels source
   1.215 +     * @param line_size number of bytes in a horizontal line of block
   1.216 +     * @param h height
   1.217 +     */
   1.218 +    op_pixels_func avg_pixels_tab[4][4];
   1.219 +
   1.220 +    /**
   1.221 +     * Halfpel motion compensation with no rounding (a+b)>>1.
   1.222 +     * this is an array[2][4] of motion compensation functions for 2
   1.223 +     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
   1.224 +     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
   1.225 +     * @param block destination where the result is stored
   1.226 +     * @param pixels source
   1.227 +     * @param line_size number of bytes in a horizontal line of block
   1.228 +     * @param h height
   1.229 +     */
   1.230 +    op_pixels_func put_no_rnd_pixels_tab[4][4];
   1.231 +
   1.232 +    /**
   1.233 +     * Halfpel motion compensation with no rounding (a+b)>>1.
   1.234 +     * this is an array[2][4] of motion compensation functions for 2
   1.235 +     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
   1.236 +     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
   1.237 +     * @param block destination into which the result is averaged (a+b)>>1
   1.238 +     * @param pixels source
   1.239 +     * @param line_size number of bytes in a horizontal line of block
   1.240 +     * @param h height
   1.241 +     */
   1.242 +    op_pixels_func avg_no_rnd_pixels_tab[4][4];
   1.243 +
   1.244 +    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
   1.245 +
   1.246 +
   1.247 +    qpel_mc_func put_qpel_pixels_tab[2][16];
   1.248 +    qpel_mc_func avg_qpel_pixels_tab[2][16];
   1.249 +    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
   1.250 +    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
   1.251 +    qpel_mc_func put_mspel_pixels_tab[8];
   1.252 +
   1.253 +    /**
   1.254 +     * h264 Chroma MC
   1.255 +     */
   1.256 +    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
   1.257 +    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
   1.258 +    /* This is really one func used in VC-1 decoding */
   1.259 +    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
   1.260 +    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
   1.261 +
   1.262 +    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
   1.263 +    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
   1.264 +
   1.265 +    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
   1.266 +    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
   1.267 +
   1.268 +   
   1.269 +    /* (I)DCT */
   1.270 +    void (*fdct)(DCTELEM *block/* align 16*/);
   1.271 +    void (*fdct248)(DCTELEM *block/* align 16*/);
   1.272 +
   1.273 +    /* IDCT really*/
   1.274 +    void (*idct)(DCTELEM *block/* align 16*/);
   1.275 +
   1.276 +    /**
   1.277 +     * block -> idct -> clip to unsigned 8 bit -> dest.
   1.278 +     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
   1.279 +     * @param line_size size in bytes of a horizontal line of dest
   1.280 +     */
   1.281 +    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
   1.282 +
   1.283 +    /**
   1.284 +     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
   1.285 +     * @param line_size size in bytes of a horizontal line of dest
   1.286 +     */
   1.287 +    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
   1.288 +
   1.289 +    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
   1.290 +#define EDGE_WIDTH 32
   1.291 +
   1.292 +    void (*prefetch)(void *mem, int stride, int h);
   1.293 +
   1.294 +} DSPContext;
   1.295 +
   1.296 +void dsputil_static_init(void);
   1.297 +void dsputil_init(DSPContext* p);
   1.298 +
   1.299 +int ff_check_alignment(void);
   1.300 +
   1.301 +/**
   1.302 + * permute block according to permuatation.
   1.303 + * @param last last non zero element in scantable order
   1.304 + */
   1.305 +void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
   1.306 +
   1.307 +void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
   1.308 +
   1.309 +#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
   1.310 +
   1.311 +static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
   1.312 +{
   1.313 +    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
   1.314 +}
   1.315 +
   1.316 +static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
   1.317 +{
   1.318 +    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
   1.319 +}
   1.320 +
   1.321 +
   1.322 +/**
   1.323 + * Empty mmx state.
   1.324 + * this must be called between any dsp function and float/double code.
   1.325 + * for example sin(); dsp->idct_put(); emms_c(); cos()
   1.326 + */
   1.327 +#define emms_c()
   1.328 +
   1.329 +/* should be defined by architectures supporting
   1.330 +   one or more MultiMedia extension */
   1.331 +int mm_support(void);
   1.332 +extern int mm_flags;
   1.333 +
   1.334 +void dsputil_init_arm(DSPContext* c);
   1.335 +void dsputil_init_mmx(DSPContext* c);
   1.336 +void dsputil_init_ppc(DSPContext* c);
   1.337 +
   1.338 +void ff_dsputil_init_dwt(DSPContext *c);
   1.339 +
   1.340 +#if HAVE_MMX
   1.341 +
   1.342 +#undef emms_c
   1.343 +
   1.344 +static inline void emms(void)
   1.345 +{
   1.346 +    __asm__ volatile ("emms;":::"memory");
   1.347 +}
   1.348 +
   1.349 +
   1.350 +#define emms_c() \
   1.351 +{\
   1.352 +    if (mm_flags & FF_MM_MMX)\
   1.353 +        emms();\
   1.354 +}
   1.355 +
   1.356 +#elif ARCH_ARM
   1.357 +
   1.358 +#if HAVE_NEON
   1.359 +#   define STRIDE_ALIGN 16
   1.360 +#endif
   1.361 +
   1.362 +#elif ARCH_PPC || ARCH_PPC64 || ARCH_CELL
   1.363 +
   1.364 +#define STRIDE_ALIGN 16
   1.365 +
   1.366 +#endif
   1.367 +
   1.368 +#ifndef STRIDE_ALIGN
   1.369 +#   define STRIDE_ALIGN 8
   1.370 +#endif
   1.371 +
   1.372 +#define WRAPPER8_16(name8, name16)\
   1.373 +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
   1.374 +    return name8(s, dst           , src           , stride, h)\
   1.375 +          +name8(s, dst+8         , src+8         , stride, h);\
   1.376 +}
   1.377 +
   1.378 +#define WRAPPER8_16_SQ(name8, name16)\
   1.379 +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
   1.380 +    int score=0;\
   1.381 +    score +=name8(s, dst           , src           , stride, 8);\
   1.382 +    score +=name8(s, dst+8         , src+8         , stride, 8);\
   1.383 +    if(h==16){\
   1.384 +        dst += 8*stride;\
   1.385 +        src += 8*stride;\
   1.386 +        score +=name8(s, dst           , src           , stride, 8);\
   1.387 +        score +=name8(s, dst+8         , src+8         , stride, 8);\
   1.388 +    }\
   1.389 +    return score;\
   1.390 +}
   1.391 +
   1.392 +static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
   1.393 +{
   1.394 +    int i;
   1.395 +    for(i=0; i<h; i++)
   1.396 +    {
   1.397 +        AV_WN16(dst   , AV_RN16(src   ));
   1.398 +        dst+=dstStride;
   1.399 +        src+=srcStride;
   1.400 +    }
   1.401 +}
   1.402 +
   1.403 +static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
   1.404 +{
   1.405 +    int i;
   1.406 +    for(i=0; i<h; i++)
   1.407 +    {
   1.408 +        AV_WN32(dst   , AV_RN32(src   ));
   1.409 +        dst+=dstStride;
   1.410 +        src+=srcStride;
   1.411 +    }
   1.412 +}
   1.413 +
   1.414 +static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
   1.415 +{
   1.416 +    int i;
   1.417 +    for(i=0; i<h; i++)
   1.418 +    {
   1.419 +        AV_WN32(dst   , AV_RN32(src   ));
   1.420 +        AV_WN32(dst+4 , AV_RN32(src+4 ));
   1.421 +        dst+=dstStride;
   1.422 +        src+=srcStride;
   1.423 +    }
   1.424 +}
   1.425 +
   1.426 +static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
   1.427 +{
   1.428 +    int i;
   1.429 +    for(i=0; i<h; i++)
   1.430 +    {
   1.431 +        AV_WN32(dst   , AV_RN32(src   ));
   1.432 +        AV_WN32(dst+4 , AV_RN32(src+4 ));
   1.433 +        dst[8]= src[8];
   1.434 +        dst+=dstStride;
   1.435 +        src+=srcStride;
   1.436 +    }
   1.437 +}
   1.438 +
   1.439 +static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
   1.440 +{
   1.441 +    int i;
   1.442 +    for(i=0; i<h; i++)
   1.443 +    {
   1.444 +        AV_WN32(dst   , AV_RN32(src   ));
   1.445 +        AV_WN32(dst+4 , AV_RN32(src+4 ));
   1.446 +        AV_WN32(dst+8 , AV_RN32(src+8 ));
   1.447 +        AV_WN32(dst+12, AV_RN32(src+12));
   1.448 +        dst+=dstStride;
   1.449 +        src+=srcStride;
   1.450 +    }
   1.451 +}
   1.452 +
   1.453 +static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
   1.454 +{
   1.455 +    int i;
   1.456 +    for(i=0; i<h; i++)
   1.457 +    {
   1.458 +        AV_WN32(dst   , AV_RN32(src   ));
   1.459 +        AV_WN32(dst+4 , AV_RN32(src+4 ));
   1.460 +        AV_WN32(dst+8 , AV_RN32(src+8 ));
   1.461 +        AV_WN32(dst+12, AV_RN32(src+12));
   1.462 +        dst[16]= src[16];
   1.463 +        dst+=dstStride;
   1.464 +        src+=srcStride;
   1.465 +    }
   1.466 +}
   1.467 +
   1.468 +#endif /* AVCODEC_DSPUTIL_H */