nengel@2: /* nengel@2: * iWMMXt optimized DSP utils nengel@2: * Copyright (c) 2004 AGAWA Koji nengel@2: * nengel@2: * This file is part of FFmpeg. nengel@2: * nengel@2: * FFmpeg is free software; you can redistribute it and/or nengel@2: * modify it under the terms of the GNU Lesser General Public nengel@2: * License as published by the Free Software Foundation; either nengel@2: * version 2.1 of the License, or (at your option) any later version. nengel@2: * nengel@2: * FFmpeg is distributed in the hope that it will be useful, nengel@2: * but WITHOUT ANY WARRANTY; without even the implied warranty of nengel@2: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU nengel@2: * Lesser General Public License for more details. nengel@2: * nengel@2: * You should have received a copy of the GNU Lesser General Public nengel@2: * License along with FFmpeg; if not, write to the Free Software nengel@2: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA nengel@2: */ nengel@2: nengel@2: #include "libavcodec/dsputil.h" nengel@2: nengel@2: #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt nengel@2: #define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); nengel@2: #define WAVG2B "wavg2b" nengel@2: #include "dsputil_iwmmxt_rnd_template.c" nengel@2: #undef DEF nengel@2: #undef SET_RND nengel@2: #undef WAVG2B nengel@2: nengel@2: #define DEF(x, y) x ## _ ## y ##_iwmmxt nengel@2: #define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); nengel@2: #define WAVG2B "wavg2br" nengel@2: #include "dsputil_iwmmxt_rnd_template.c" nengel@2: #undef DEF nengel@2: #undef SET_RND nengel@2: #undef WAVG2BR nengel@2: nengel@2: // need scheduling nengel@2: #define OP(AVG) \ nengel@2: __asm__ volatile ( \ nengel@2: /* alignment */ \ nengel@2: "and r12, %[pixels], #7 \n\t" \ nengel@2: "bic %[pixels], %[pixels], #7 \n\t" \ nengel@2: "tmcr wcgr1, r12 \n\t" \ nengel@2: \ nengel@2: "wldrd wr0, [%[pixels]] \n\t" \ nengel@2: "wldrd wr1, [%[pixels], #8] \n\t" \ nengel@2: "add %[pixels], %[pixels], %[line_size] \n\t" \ nengel@2: "walignr1 wr4, wr0, wr1 \n\t" \ nengel@2: \ nengel@2: "1: \n\t" \ nengel@2: \ nengel@2: "wldrd wr2, [%[pixels]] \n\t" \ nengel@2: "wldrd wr3, [%[pixels], #8] \n\t" \ nengel@2: "add %[pixels], %[pixels], %[line_size] \n\t" \ nengel@2: "pld [%[pixels]] \n\t" \ nengel@2: "walignr1 wr5, wr2, wr3 \n\t" \ nengel@2: AVG " wr6, wr4, wr5 \n\t" \ nengel@2: "wstrd wr6, [%[block]] \n\t" \ nengel@2: "add %[block], %[block], %[line_size] \n\t" \ nengel@2: \ nengel@2: "wldrd wr0, [%[pixels]] \n\t" \ nengel@2: "wldrd wr1, [%[pixels], #8] \n\t" \ nengel@2: "add %[pixels], %[pixels], %[line_size] \n\t" \ nengel@2: "walignr1 wr4, wr0, wr1 \n\t" \ nengel@2: "pld [%[pixels]] \n\t" \ nengel@2: AVG " wr6, wr4, wr5 \n\t" \ nengel@2: "wstrd wr6, [%[block]] \n\t" \ nengel@2: "add %[block], %[block], %[line_size] \n\t" \ nengel@2: \ nengel@2: "subs %[h], %[h], #2 \n\t" \ nengel@2: "bne 1b \n\t" \ nengel@2: : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \ nengel@2: : [line_size]"r"(line_size) \ nengel@2: : "memory", "r12"); nengel@2: void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) nengel@2: { nengel@2: OP("wavg2br"); nengel@2: } nengel@2: void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) nengel@2: { nengel@2: OP("wavg2b"); nengel@2: } nengel@2: #undef OP nengel@2: nengel@2: void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size) nengel@2: { nengel@2: uint8_t *pixels2 = pixels + line_size; nengel@2: nengel@2: __asm__ volatile ( nengel@2: "mov r12, #4 \n\t" nengel@2: "1: \n\t" nengel@2: "pld [%[pixels], %[line_size2]] \n\t" nengel@2: "pld [%[pixels2], %[line_size2]] \n\t" nengel@2: "wldrd wr4, [%[pixels]] \n\t" nengel@2: "wldrd wr5, [%[pixels2]] \n\t" nengel@2: "pld [%[block], #32] \n\t" nengel@2: "wunpckelub wr6, wr4 \n\t" nengel@2: "wldrd wr0, [%[block]] \n\t" nengel@2: "wunpckehub wr7, wr4 \n\t" nengel@2: "wldrd wr1, [%[block], #8] \n\t" nengel@2: "wunpckelub wr8, wr5 \n\t" nengel@2: "wldrd wr2, [%[block], #16] \n\t" nengel@2: "wunpckehub wr9, wr5 \n\t" nengel@2: "wldrd wr3, [%[block], #24] \n\t" nengel@2: "add %[block], %[block], #32 \n\t" nengel@2: "waddhss wr10, wr0, wr6 \n\t" nengel@2: "waddhss wr11, wr1, wr7 \n\t" nengel@2: "waddhss wr12, wr2, wr8 \n\t" nengel@2: "waddhss wr13, wr3, wr9 \n\t" nengel@2: "wpackhus wr14, wr10, wr11 \n\t" nengel@2: "wpackhus wr15, wr12, wr13 \n\t" nengel@2: "wstrd wr14, [%[pixels]] \n\t" nengel@2: "add %[pixels], %[pixels], %[line_size2] \n\t" nengel@2: "subs r12, r12, #1 \n\t" nengel@2: "wstrd wr15, [%[pixels2]] \n\t" nengel@2: "add %[pixels2], %[pixels2], %[line_size2] \n\t" nengel@2: "bne 1b \n\t" nengel@2: : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2) nengel@2: : [line_size2]"r"(line_size << 1) nengel@2: : "cc", "memory", "r12"); nengel@2: } nengel@2: nengel@2: static void clear_blocks_iwmmxt(DCTELEM *blocks) nengel@2: { nengel@2: __asm__ volatile( nengel@2: "wzero wr0 \n\t" nengel@2: "mov r1, #(128 * 6 / 32) \n\t" nengel@2: "1: \n\t" nengel@2: "wstrd wr0, [%0] \n\t" nengel@2: "wstrd wr0, [%0, #8] \n\t" nengel@2: "wstrd wr0, [%0, #16] \n\t" nengel@2: "wstrd wr0, [%0, #24] \n\t" nengel@2: "subs r1, r1, #1 \n\t" nengel@2: "add %0, %0, #32 \n\t" nengel@2: "bne 1b \n\t" nengel@2: : "+r"(blocks) nengel@2: : nengel@2: : "r1" nengel@2: ); nengel@2: } nengel@2: nengel@2: static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) nengel@2: { nengel@2: return; nengel@2: } nengel@2: nengel@2: /* A run time test is not simple. If this file is compiled in nengel@2: * then we should install the functions nengel@2: */ nengel@2: int mm_flags = FF_MM_IWMMXT; /* multimedia extension flags */ nengel@2: nengel@2: void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) nengel@2: { nengel@2: if (avctx->dsp_mask) { nengel@2: if (avctx->dsp_mask & FF_MM_FORCE) nengel@2: mm_flags |= (avctx->dsp_mask & 0xffff); nengel@2: else nengel@2: mm_flags &= ~(avctx->dsp_mask & 0xffff); nengel@2: } nengel@2: nengel@2: if (!(mm_flags & FF_MM_IWMMXT)) return; nengel@2: nengel@2: c->add_pixels_clamped = add_pixels_clamped_iwmmxt; nengel@2: nengel@2: c->clear_blocks = clear_blocks_iwmmxt; nengel@2: nengel@2: c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; nengel@2: c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt; nengel@2: c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt; nengel@2: c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt; nengel@2: nengel@2: c->put_pixels_tab[1][0] = put_pixels8_iwmmxt; nengel@2: c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt; nengel@2: c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt; nengel@2: c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt; nengel@2: c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt; nengel@2: nengel@2: c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt; nengel@2: c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt; nengel@2: c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt; nengel@2: c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt; nengel@2: nengel@2: c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt; nengel@2: c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt; nengel@2: c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt; nengel@2: c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt; nengel@2: c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt; nengel@2: }