/*
 * H.26L/H.264/AVC/JVT/14496-10/... cabac decoding
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * H.264 / AVC / MPEG4 part10 cabac decoding.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

#include "avcodec.h"
#include "h264_types.h"
#include "h264_data.h"
#include "cabac.h"
#include "rectangle.h"
#include "h264_misc.h"

// #undef NDEBUG
#include <assert.h>

/* Cabac pre state table */

static const int8_t cabac_context_init_I[460][2] =
{
    /* 0 - 10 */
    { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
    {  2,  54 }, {  3, 74 },  { -28,127 }, { -23, 104 },
    { -6,  53 }, { -1, 54 },  {  7,  51 },

    /* 11 - 23 unsused for I */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },

    /* 24- 39 */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },

    /* 40 - 53 */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },

    /* 54 - 59 */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },

    /* 60 - 69 */
    { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
    { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
    { 13, 41 },  { 3, 62 },

    /* 70 -> 87 */
    { 0, 11 },   { 1, 55 },   { 0, 69 },     { -17, 127 },
    { -13, 102 },{ 0, 82 },   { -7, 74 },    { -21, 107 },
    { -27, 127 },{ -31, 127 },{ -24, 127 },  { -18, 95 },
    { -27, 127 },{ -21, 114 },{ -30, 127 },  { -17, 123 },
    { -12, 115 },{ -16, 122 },

    /* 88 -> 104 */
    { -11, 115 },{ -12, 63 }, { -2, 68 },    { -15, 84 },
    { -13, 104 },{ -3, 70 },  { -8, 93 },    { -10, 90 },
    { -30, 127 },{ -1, 74 },  { -6, 97 },    { -7, 91 },
    { -20, 127 },{ -4, 56 },  { -5, 82 },    { -7, 76 },
    { -22, 125 },

    /* 105 -> 135 */
    { -7, 93 },  { -11, 87 }, { -3, 77 },    { -5, 71 },
    { -4, 63 },  { -4, 68 },  { -12, 84 },   { -7, 62 },
    { -7, 65 },  { 8, 61 },   { 5, 56 },     { -2, 66 },
    { 1, 64 },   { 0, 61 },   { -2, 78 },    { 1, 50 },
    { 7, 52 },   { 10, 35 },  { 0, 44 },     { 11, 38 },
    { 1, 45 },   { 0, 46 },   { 5, 44 },     { 31, 17 },
    { 1, 51 },   { 7, 50 },   { 28, 19 },    { 16, 33 },
    { 14, 62 },  { -13, 108 },{ -15, 100 },

    /* 136 -> 165 */
    { -13, 101 },{ -13, 91 }, { -12, 94 },   { -10, 88 },
    { -16, 84 }, { -10, 86 }, { -7, 83 },    { -13, 87 },
    { -19, 94 }, { 1, 70 },   { 0, 72 },     { -5, 74 },
    { 18, 59 },  { -8, 102 }, { -15, 100 },  { 0, 95 },
    { -4, 75 },  { 2, 72 },   { -11, 75 },   { -3, 71 },
    { 15, 46 },  { -13, 69 }, { 0, 62 },     { 0, 65 },
    { 21, 37 },  { -15, 72 }, { 9, 57 },     { 16, 54 },
    { 0, 62 },   { 12, 72 },

    /* 166 -> 196 */
    { 24, 0 },   { 15, 9 },   { 8, 25 },     { 13, 18 },
    { 15, 9 },   { 13, 19 },  { 10, 37 },    { 12, 18 },
    { 6, 29 },   { 20, 33 },  { 15, 30 },    { 4, 45 },
    { 1, 58 },   { 0, 62 },   { 7, 61 },     { 12, 38 },
    { 11, 45 },  { 15, 39 },  { 11, 42 },    { 13, 44 },
    { 16, 45 },  { 12, 41 },  { 10, 49 },    { 30, 34 },
    { 18, 42 },  { 10, 55 },  { 17, 51 },    { 17, 46 },
    { 0, 89 },   { 26, -19 }, { 22, -17 },

    /* 197 -> 226 */
    { 26, -17 }, { 30, -25 }, { 28, -20 },   { 33, -23 },
    { 37, -27 }, { 33, -23 }, { 40, -28 },   { 38, -17 },
    { 33, -11 }, { 40, -15 }, { 41, -6 },    { 38, 1 },
    { 41, 17 },  { 30, -6 },  { 27, 3 },     { 26, 22 },
    { 37, -16 }, { 35, -4 },  { 38, -8 },    { 38, -3 },
    { 37, 3 },   { 38, 5 },   { 42, 0 },     { 35, 16 },
    { 39, 22 },  { 14, 48 },  { 27, 37 },    { 21, 60 },
    { 12, 68 },  { 2, 97 },

    /* 227 -> 251 */
    { -3, 71 },  { -6, 42 },  { -5, 50 },    { -3, 54 },
    { -2, 62 },  { 0, 58 },   { 1, 63 },     { -2, 72 },
    { -1, 74 },  { -9, 91 },  { -5, 67 },    { -5, 27 },
    { -3, 39 },  { -2, 44 },  { 0, 46 },     { -16, 64 },
    { -8, 68 },  { -10, 78 }, { -6, 77 },    { -10, 86 },
    { -12, 92 }, { -15, 55 }, { -10, 60 },   { -6, 62 },
    { -4, 65 },

    /* 252 -> 275 */
    { -12, 73 }, { -8, 76 },  { -7, 80 },    { -9, 88 },
    { -17, 110 },{ -11, 97 }, { -20, 84 },   { -11, 79 },
    { -6, 73 },  { -4, 74 },  { -13, 86 },   { -13, 96 },
    { -11, 97 }, { -19, 117 },{ -8, 78 },    { -5, 33 },
    { -4, 48 },  { -2, 53 },  { -3, 62 },    { -13, 71 },
    { -10, 79 }, { -12, 86 }, { -13, 90 },   { -14, 97 },

    /* 276 a bit special (not used, bypass is used instead) */
    { 0, 0 },

    /* 277 -> 307 */
    { -6, 93 },  { -6, 84 },  { -8, 79 },    { 0, 66 },
    { -1, 71 },  { 0, 62 },   { -2, 60 },    { -2, 59 },
    { -5, 75 },  { -3, 62 },  { -4, 58 },    { -9, 66 },
    { -1, 79 },  { 0, 71 },   { 3, 68 },     { 10, 44 },
    { -7, 62 },  { 15, 36 },  { 14, 40 },    { 16, 27 },
    { 12, 29 },  { 1, 44 },   { 20, 36 },    { 18, 32 },
    { 5, 42 },   { 1, 48 },   { 10, 62 },    { 17, 46 },
    { 9, 64 },   { -12, 104 },{ -11, 97 },

    /* 308 -> 337 */
    { -16, 96 }, { -7, 88 },  { -8, 85 },    { -7, 85 },
    { -9, 85 },  { -13, 88 }, { 4, 66 },     { -3, 77 },
    { -3, 76 },  { -6, 76 },  { 10, 58 },    { -1, 76 },
    { -1, 83 },  { -7, 99 },  { -14, 95 },   { 2, 95 },
    { 0, 76 },   { -5, 74 },  { 0, 70 },     { -11, 75 },
    { 1, 68 },   { 0, 65 },   { -14, 73 },   { 3, 62 },
    { 4, 62 },   { -1, 68 },  { -13, 75 },   { 11, 55 },
    { 5, 64 },   { 12, 70 },

    /* 338 -> 368 */
    { 15, 6 },   { 6, 19 },   { 7, 16 },     { 12, 14 },
    { 18, 13 },  { 13, 11 },  { 13, 15 },    { 15, 16 },
    { 12, 23 },  { 13, 23 },  { 15, 20 },    { 14, 26 },
    { 14, 44 },  { 17, 40 },  { 17, 47 },    { 24, 17 },
    { 21, 21 },  { 25, 22 },  { 31, 27 },    { 22, 29 },
    { 19, 35 },  { 14, 50 },  { 10, 57 },    { 7, 63 },
    { -2, 77 },  { -4, 82 },  { -3, 94 },    { 9, 69 },
    { -12, 109 },{ 36, -35 }, { 36, -34 },

    /* 369 -> 398 */
    { 32, -26 }, { 37, -30 }, { 44, -32 },   { 34, -18 },
    { 34, -15 }, { 40, -15 }, { 33, -7 },    { 35, -5 },
    { 33, 0 },   { 38, 2 },   { 33, 13 },    { 23, 35 },
    { 13, 58 },  { 29, -3 },  { 26, 0 },     { 22, 30 },
    { 31, -7 },  { 35, -15 }, { 34, -3 },    { 34, 3 },
    { 36, -1 },  { 34, 5 },   { 32, 11 },    { 35, 5 },
    { 34, 12 },  { 39, 11 },  { 30, 29 },    { 34, 26 },
    { 29, 39 },  { 19, 66 },

    /* 399 -> 435 */
    {  31,  21 }, {  31,  31 }, {  25,  50 },
    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
    { -23,  68 }, { -24,  50 }, { -11,  74 }, {  23, -13 },
    {  26, -13 }, {  40, -15 }, {  49, -14 }, {  44,   3 },
    {  45,   6 }, {  44,  34 }, {  33,  54 }, {  19,  82 },
    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
    {   0,  68 }, {  -9,  92 },

    /* 436 -> 459 */
    { -14, 106 }, { -13,  97 }, { -15,  90 }, { -12,  90 },
    { -18,  88 }, { -10,  73 }, {  -9,  79 }, { -14,  86 },
    { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
    {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 }
};

static const int8_t cabac_context_init_PB[3][460][2] =
{
    /* i_cabac_init_idc == 0 */
    {
        /* 0 - 10 */
        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
        {  -6,  53 }, {  -1,  54 }, {   7,  51 },

        /* 11 - 23 */
        {  23,  33 }, {  23,   2 }, {  21,   0 }, {   1,   9 },
        {   0,  49 }, { -37, 118 }, {   5,  57 }, { -13,  78 },
        { -11,  65 }, {   1,  62 }, {  12,  49 }, {  -4,  73 },
        {  17,  50 },

        /* 24 - 39 */
        {  18,  64 }, {   9,  43 }, {  29,   0 }, {  26,  67 },
        {  16,  90 }, {   9, 104 }, { -46, 127 }, { -20, 104 },
        {   1,  67 }, { -13,  78 }, { -11,  65 }, {   1,  62 },
        {  -6,  86 }, { -17,  95 }, {  -6,  61 }, {   9,  45 },

        /* 40 - 53 */
        {  -3,  69 }, {  -6,  81 }, { -11,  96 }, {   6,  55 },
        {   7,  67 }, {  -5,  86 }, {   2,  88 }, {   0,  58 },
        {  -3,  76 }, { -10,  94 }, {   5,  54 }, {   4,  69 },
        {  -3,  81 }, {   0,  88 },

        /* 54 - 59 */
        {  -7,  67 }, {  -5,  74 }, {  -4,  74 }, {  -5,  80 },
        {  -7,  72 }, {   1,  58 },

        /* 60 - 69 */
        {   0,  41 }, {   0,  63 }, {   0,  63 }, { 0, 63 },
        {  -9,  83 }, {   4,  86 }, {   0,  97 }, { -7, 72 },
        {  13,  41 }, {   3,  62 },

        /* 70 - 87 */
        {   0,  45 }, {  -4,  78 }, {  -3,  96 }, { -27,  126 },
        { -28,  98 }, { -25, 101 }, { -23,  67 }, { -28,  82 },
        { -20,  94 }, { -16,  83 }, { -22, 110 }, { -21,  91 },
        { -18, 102 }, { -13,  93 }, { -29, 127 }, {  -7,  92 },
        {  -5,  89 }, {  -7,  96 }, { -13, 108 }, {  -3,  46 },
        {  -1,  65 }, {  -1,  57 }, {  -9,  93 }, {  -3,  74 },
        {  -9,  92 }, {  -8,  87 }, { -23, 126 }, {   5,  54 },
        {   6,  60 }, {   6,  59 }, {   6,  69 }, {  -1,  48 },
        {   0,  68 }, {  -4,  69 }, {  -8,  88 },

        /* 105 -> 165 */
        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
        {   3,  64 }, {   1,  61 }, {   9,  63 }, {   7,  50 },
        {  16,  39 }, {   5,  44 }, {   4,  52 }, {  11,  48 },
        {  -5,  60 }, {  -1,  59 }, {   0,  59 }, {  22,  33 },
        {   5,  44 }, {  14,  43 }, {  -1,  78 }, {   0,  60 },
        {   9,  69 },

        /* 166 - 226 */
        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
        {   1,  67 }, {   5,  59 }, {   9,  67 }, {  16,  30 },
        {  18,  32 }, {  18,  35 }, {  22,  29 }, {  24,  31 },
        {  23,  38 }, {  18,  43 }, {  20,  41 }, {  11,  63 },
        {   9,  59 }, {   9,  64 }, {  -1,  94 }, {  -2,  89 },
        {  -9, 108 },

        /* 227 - 275 */
        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
        {  -3,  74 }, { -10,  90 }, {   0,  70 }, {  -4,  29 },
        {   5,  31 }, {   7,  42 }, {   1,  59 }, {  -2,  58 },
        {  -3,  72 }, {  -3,  81 }, { -11,  97 }, {   0,  58 },
        {   8,   5 }, {  10,  14 }, {  14,  18 }, {  13,  27 },
        {   2,  40 }, {   0,  58 }, {  -3,  70 }, {  -6,  79 },
        {  -8,  85 },

        /* 276 a bit special (not used, bypass is used instead) */
        { 0, 0 },

        /* 277 - 337 */
        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
        {  -2,  69 }, {  -2,  59 }, {   6,  70 }, {  10,  44 },
        {   9,  31 }, {  12,  43 }, {   3,  53 }, {  14,  34 },
        {  10,  38 }, {  -3,  52 }, {  13,  40 }, {  17,  32 },
        {   7,  44 }, {   7,  38 }, {  13,  50 }, {  10,  57 },
        {  26,  43 },

        /* 338 - 398 */
        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
        {   8,  60 }, {   6,  63 }, {  17,  65 }, {  21,  24 },
        {  23,  20 }, {  26,  23 }, {  27,  32 }, {  28,  23 },
        {  28,  24 }, {  23,  40 }, {  24,  32 }, {  28,  29 },
        {  23,  42 }, {  19,  57 }, {  22,  53 }, {  22,  61 },
        {  11,  86 },

        /* 399 - 435 */
        {  12,  40 }, {  11,  51 }, {  14,  59 },
        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
        { -16,  66 }, { -22,  65 }, { -20,  63 }, {   9,  -2 },
        {  26,  -9 }, {  33,  -9 }, {  39,  -7 }, {  41,  -2 },
        {  45,   3 }, {  49,   9 }, {  45,  27 }, {  36,  59 },
        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
        {  -8,  66 }, {  -8,  76 },

        /* 436 - 459 */
        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
    },

    /* i_cabac_init_idc == 1 */
    {
        /* 0 - 10 */
        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
        {  -6,  53 }, {  -1,  54 }, {   7,  51 },

        /* 11 - 23 */
        {  22,  25 }, {  34,   0 }, {  16,   0 }, {  -2,   9 },
        {   4,  41 }, { -29, 118 }, {   2,  65 }, {  -6,  71 },
        { -13,  79 }, {   5,  52 }, {   9,  50 }, {  -3,  70 },
        {  10,  54 },

        /* 24 - 39 */
        {  26,  34 }, {  19,  22 }, {  40,   0 }, {  57,   2 },
        {  41,  36 }, {  26,  69 }, { -45, 127 }, { -15, 101 },
        {  -4,  76 }, {  -6,  71 }, { -13,  79 }, {   5,  52 },
        {   6,  69 }, { -13,  90 }, {   0,  52 }, {   8,  43 },

        /* 40 - 53 */
        {  -2,  69 },{  -5,  82 },{ -10,  96 },{   2,  59 },
        {   2,  75 },{  -3,  87 },{  -3,  100 },{   1,  56 },
        {  -3,  74 },{  -6,  85 },{   0,  59 },{  -3,  81 },
        {  -7,  86 },{  -5,  95 },

        /* 54 - 59 */
        {  -1,  66 },{  -1,  77 },{   1,  70 },{  -2,  86 },
        {  -5,  72 },{   0,  61 },

        /* 60 - 69 */
        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
        { 13, 41 },  { 3, 62 },

        /* 70 - 104 */
        {  13,  15 }, {   7,  51 }, {   2,  80 }, { -39, 127 },
        { -18,  91 }, { -17,  96 }, { -26,  81 }, { -35,  98 },
        { -24, 102 }, { -23,  97 }, { -27, 119 }, { -24,  99 },
        { -21, 110 }, { -18, 102 }, { -36, 127 }, {   0,  80 },
        {  -5,  89 }, {  -7,  94 }, {  -4,  92 }, {   0,  39 },
        {   0,  65 }, { -15,  84 }, { -35, 127 }, {  -2,  73 },
        { -12, 104 }, {  -9,  91 }, { -31, 127 }, {   3,  55 },
        {   7,  56 }, {   7,  55 }, {   8,  61 }, {  -3,  53 },
        {   0,  68 }, {  -7,  74 }, {  -9,  88 },

        /* 105 -> 165 */
        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
        {  -4,  71 }, {   0,  58 }, {   7,  61 }, {   9,  41 },
        {  18,  25 }, {   9,  32 }, {   5,  43 }, {   9,  47 },
        {   0,  44 }, {   0,  51 }, {   2,  46 }, {  19,  38 },
        {  -4,  66 }, {  15,  38 }, {  12,  42 }, {   9,  34 },
        {   0,  89 },

        /* 166 - 226 */
        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
        {   0,  75 }, {   2,  72 }, {   8,  77 }, {  14,  35 },
        {  18,  31 }, {  17,  35 }, {  21,  30 }, {  17,  45 },
        {  20,  42 }, {  18,  45 }, {  27,  26 }, {  16,  54 },
        {   7,  66 }, {  16,  56 }, {  11,  73 }, {  10,  67 },
        { -10, 116 },

        /* 227 - 275 */
        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
        {  -5,  74 }, {  -9,  86 }, {   2,  66 }, {  -9,  34 },
        {   1,  32 }, {  11,  31 }, {   5,  52 }, {  -2,  55 },
        {  -2,  67 }, {   0,  73 }, {  -8,  89 }, {   3,  52 },
        {   7,   4 }, {  10,   8 }, {  17,   8 }, {  16,  19 },
        {   3,  37 }, {  -1,  61 }, {  -5,  73 }, {  -1,  70 },
        {  -4,  78 },

        /* 276 a bit special (not used, bypass is used instead) */
        { 0, 0 },

        /* 277 - 337 */
        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
        {  -1,  70 }, {  -9,  72 }, {  14,  60 }, {  16,  37 },
        {   0,  47 }, {  18,  35 }, {  11,  37 }, {  12,  41 },
        {  10,  41 }, {   2,  48 }, {  12,  41 }, {  13,  41 },
        {   0,  59 }, {   3,  50 }, {  19,  40 }, {   3,  66 },
        {  18,  50 },

        /* 338 - 398 */
        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
        {  12,  48 }, {  11,  49 }, {  26,  45 }, {  22,  22 },
        {  23,  22 }, {  27,  21 }, {  33,  20 }, {  26,  28 },
        {  30,  24 }, {  27,  34 }, {  18,  42 }, {  25,  39 },
        {  18,  50 }, {  12,  70 }, {  21,  54 }, {  14,  71 },
        {  11,  83 },

        /* 399 - 435 */
        {  25,  32 }, {  21,  49 }, {  21,  54 },
        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  17, -10 },
        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
        {  -4,  67 }, {  -7,  82 },

        /* 436 - 459 */
        {  -3,  81 }, {  -3,  76 }, {  -7,  72 }, {  -6,  78 },
        { -12,  72 }, { -14,  68 }, {  -3,  70 }, {  -6,  76 },
        {  -5,  66 }, {  -5,  62 }, {   0,  57 }, {  -4,  61 },
        {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
    },

    /* i_cabac_init_idc == 2 */
    {
        /* 0 - 10 */
        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
        {  -6,  53 }, {  -1,  54 }, {   7,  51 },

        /* 11 - 23 */
        {  29,  16 }, {  25,   0 }, {  14,   0 }, { -10,  51 },
        {  -3,  62 }, { -27,  99 }, {  26,  16 }, {  -4,  85 },
        { -24, 102 }, {   5,  57 }, {   6,  57 }, { -17,  73 },
        {  14,  57 },

        /* 24 - 39 */
        {  20,  40 }, {  20,  10 }, {  29,   0 }, {  54,   0 },
        {  37,  42 }, {  12,  97 }, { -32, 127 }, { -22, 117 },
        {  -2,  74 }, {  -4,  85 }, { -24, 102 }, {   5,  57 },
        {  -6,  93 }, { -14,  88 }, {  -6,  44 }, {   4,  55 },

        /* 40 - 53 */
        { -11,  89 },{ -15,  103 },{ -21,  116 },{  19,  57 },
        {  20,  58 },{   4,  84 },{   6,  96 },{   1,  63 },
        {  -5,  85 },{ -13,  106 },{   5,  63 },{   6,  75 },
        {  -3,  90 },{  -1,  101 },

        /* 54 - 59 */
        {   3,  55 },{  -4,  79 },{  -2,  75 },{ -12,  97 },
        {  -7,  50 },{   1,  60 },

        /* 60 - 69 */
        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
        { 13, 41 },  { 3, 62 },

        /* 70 - 104 */
        {   7,  34 }, {  -9,  88 }, { -20, 127 }, { -36, 127 },
        { -17,  91 }, { -14,  95 }, { -25,  84 }, { -25,  86 },
        { -12,  89 }, { -17,  91 }, { -31, 127 }, { -14,  76 },
        { -18, 103 }, { -13,  90 }, { -37, 127 }, {  11,  80 },
        {   5,  76 }, {   2,  84 }, {   5,  78 }, {  -6,  55 },
        {   4,  61 }, { -14,  83 }, { -37, 127 }, {  -5,  79 },
        { -11, 104 }, { -11,  91 }, { -30, 127 }, {   0,  65 },
        {  -2,  79 }, {   0,  72 }, {  -4,  92 }, {  -6,  56 },
        {   3,  68 }, {  -8,  71 }, { -13,  98 },

        /* 105 -> 165 */
        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
        {   3,  65 }, {  -7,  69 }, {   8,  77 }, { -10,  66 },
        {   3,  62 }, {  -3,  68 }, { -20,  81 }, {   0,  30 },
        {   1,   7 }, {  -3,  23 }, { -21,  74 }, {  16,  66 },
        { -23, 124 }, {  17,  37 }, {  44, -18 }, {  50, -34 },
        { -22, 127 },

        /* 166 - 226 */
        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
        {  20,  34 }, {  19,  31 }, {  27,  44 }, {  19,  16 },
        {  15,  36 }, {  15,  36 }, {  21,  28 }, {  25,  21 },
        {  30,  20 }, {  31,  12 }, {  27,  16 }, {  24,  42 },
        {   0,  93 }, {  14,  56 }, {  15,  57 }, {  26,  38 },
        { -24, 127 },

        /* 227 - 275 */
        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
        { -12,  92 }, { -18, 108 }, {  -4,  79 }, { -22,  69 },
        { -16,  75 }, {  -2,  58 }, {   1,  58 }, { -13,  78 },
        {  -9,  83 }, {  -4,  81 }, { -13,  99 }, { -13,  81 },
        {  -6,  38 }, { -13,  62 }, {  -6,  58 }, {  -2,  59 },
        { -16,  73 }, { -10,  76 }, { -13,  86 }, {  -9,  83 },
        { -10,  87 },

        /* 276 a bit special (not used, bypass is used instead) */
        { 0, 0 },

        /* 277 - 337 */
        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
        {  -2,  76 }, { -18,  86 }, {  12,  70 }, {   5,  64 },
        { -12,  70 }, {  11,  55 }, {   5,  56 }, {   0,  69 },
        {   2,  65 }, {  -6,  74 }, {   5,  54 }, {   7,  54 },
        {  -6,  76 }, { -11,  82 }, {  -2,  77 }, {  -2,  77 },
        {  25,  42 },

        /* 338 - 398 */
        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
        {  18,  31 }, {  19,  26 }, {  36,  24 }, {  24,  23 },
        {  27,  16 }, {  24,  30 }, {  31,  29 }, {  22,  41 },
        {  22,  42 }, {  16,  60 }, {  15,  52 }, {  14,  60 },
        {   3,  78 }, { -16, 123 }, {  21,  53 }, {  22,  56 },
        {  25,  61 },

        /* 399 - 435 */
        {  21,  33 }, {  19,  50 }, {  17,  61 },
        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
        {  -6,  68 }, { -10,  79 },

        /* 436 - 459 */
        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
    }
};

static const uint8_t left_block_options[4][16]={
    {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
    {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
    {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
    {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
};

static const uint8_t rem6[52]={
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
};

static const uint8_t div6[52]={
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
};

static void init_dequant8_coeff_table(H264Slice *s, EntropyContext *ec){
    int i,q,x;
    const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON;
    ec->dequant8_coeff[0] = ec->dequant8_buffer[0];
    ec->dequant8_coeff[1] = ec->dequant8_buffer[1];

    for(i=0; i<2; i++){
        if(i && !memcmp(s->pps.scaling_matrix8[0], s->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
            ec->dequant8_coeff[1] = ec->dequant8_buffer[0];
            break;
        }

        for(q=0; q<52; q++){
            int shift = div6[q];
            int idx = rem6[q];
            for(x=0; x<64; x++)
                ec->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
                    ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
                    s->pps.scaling_matrix8[i][x]) << shift;
        }
    }
}

static void init_dequant4_coeff_table(H264Slice *s, EntropyContext *ec){
    int i,j,q,x;
    const int transpose = HAVE_MMX | HAVE_ALTIVEC | HAVE_NEON;
    for(i=0; i<6; i++ ){
        ec->dequant4_coeff[i] = ec->dequant4_buffer[i];
        for(j=0; j<i; j++){
            if(!memcmp(s->pps.scaling_matrix4[j], s->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
                ec->dequant4_coeff[i] = ec->dequant4_buffer[j];
                break;
            }
        }
        if(j<i)
            continue;

        for(q=0; q<52; q++){
            int shift = div6[q] + 2;
            int idx = rem6[q];
            for(x=0; x<16; x++)
                ec->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
                    ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
                    s->pps.scaling_matrix4[i][x]) << shift;
        }
    }
}

void init_dequant_tables(H264Slice *s, EntropyContext *ec){
    int i,x;

    init_dequant4_coeff_table(s, ec);
    if(s->pps.transform_8x8_mode)
        init_dequant8_coeff_table(s, ec);
    if(s->transform_bypass){
        for(i=0; i<6; i++)
            for(x=0; x<16; x++)
                ec->dequant4_coeff[i][0][x] = 1<<6;
        if(s->pps.transform_8x8_mode)
            for(i=0; i<2; i++)
                for(x=0; x<64; x++)
                    ec->dequant8_coeff[i][0][x] = 1<<6;
    }
}

void ff_h264_init_cabac_states(EntropyContext *ec, H264Slice *s, CABACContext *c) {
    int i;
    const int8_t (*tab)[2];

    if( s->slice_type_nos == FF_I_TYPE ) tab = cabac_context_init_I;
    else                                 tab = cabac_context_init_PB[s->cabac_init_idc];

    /* calculate pre-state */
    for( i= 0; i < 460; i++ ) {
        int pre = 2*(((tab[i][0] * ec->curr_qscale) >>4 ) + tab[i][1]) - 127;

        pre^= pre>>31;
        if(pre > 124)
            pre= 124 + (pre&1);

        c->cabac_state[i] =  pre;
    }
}

static void fill_decode_neighbors(EntropyContext *ec, H264Slice *s){
    H264Mb *m = ec->m;
	const int mb_x = m->mb_x;

    if (m->mb_y){
        ec->top_type     = ec->mb_type_top[mb_x];
        ec->topright_type= ec->mb_type_top[mb_x+1];
        ec->topleft_type = ec->mb_type_top[mb_x-1];
        m->qscale_top_mb_xy = ec->qscale_top[mb_x];
    } else {
        ec->top_type     = 0;
        ec->topright_type= 0;
        ec->topleft_type = 0;
        m->qscale_top_mb_xy = 0;
    }

    ec->left_type    = ec->mb_type[mb_x-1] ;
    m->qscale_left_mb_xy = ec->qscale[mb_x-1];

}

static void fill_decode_caches(EntropyContext *ec, H264Slice *s, int mb_type){
    H264Mb *m = ec->m;
    int topleft_type, top_type, topright_type, left_type;
    const uint8_t * left_block= left_block_options[0];
	const int mb_x = m->mb_x;
    int i;

    topleft_type = ec->topleft_type;
	top_type     = ec->top_type;
    topright_type= ec->topright_type;
	left_type    = ec->left_type;

    if(!IS_SKIP(mb_type)){
        if(top_type){
            AV_COPY32(&ec->non_zero_count_cache[4+8*0], &ec->non_zero_count_top[mb_x][0]);
            ec->non_zero_count_cache[1+8*0]= ec->non_zero_count_top[mb_x][4];
            ec->non_zero_count_cache[2+8*0]= ec->non_zero_count_top[mb_x][5];
            ec->non_zero_count_cache[1+8*3]= ec->non_zero_count_top[mb_x][6];
            ec->non_zero_count_cache[2+8*3]= ec->non_zero_count_top[mb_x][7];

        }else {
            ec->non_zero_count_cache[1+8*0]=
            ec->non_zero_count_cache[2+8*0]=
            ec->non_zero_count_cache[1+8*3]=
            ec->non_zero_count_cache[2+8*3]=
            AV_WN32A(&ec->non_zero_count_cache[4+8*0], !IS_INTRA(mb_type) ? 0 : 0x40404040);
        }

        if(left_type){
            for (i=0; i<2; i++) {
                ec->non_zero_count_cache[3+8*1 + 2*8*i]= ec->non_zero_count_left[i*2+0];
                ec->non_zero_count_cache[3+8*2 + 2*8*i]= ec->non_zero_count_left[i*2+1];
                ec->non_zero_count_cache[0+8*1 + 3*8*i]= ec->non_zero_count_left[4+i*2+0];
                ec->non_zero_count_cache[0+8*2 + 3*8*i]= ec->non_zero_count_left[4+i*2+1];
            }
        }
        else{
            for (i=0; i<2; i++) {
                ec->non_zero_count_cache[3+8*1 + 2*8*i]=
                ec->non_zero_count_cache[3+8*2 + 2*8*i]=
                ec->non_zero_count_cache[0+8*1 + 3*8*i]=
                ec->non_zero_count_cache[0+8*2 + 3*8*i]= !IS_INTRA(mb_type) ? 0 : 64;
            }
        }

		// top_cbp
		if(top_type) {
			ec->top_cbp = ec->cbp_top[mb_x];
		} else {
			ec->top_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
		}
		// left_cbp
		if (left_type) {
			ec->left_cbp = (ec->cbp[mb_x-1] & 0x1f0)
			|  ((ec->cbp[mb_x-1]>>(left_block[0]&(~1)))&2)
			| (((ec->cbp[mb_x-1]>>(left_block[2]&(~1)))&2) << 2);
		} else {
			ec->left_cbp = IS_INTRA(mb_type) ? 0x1CF : 0x00F;
		}
    }

    if(IS_INTER(mb_type) ||(IS_DIRECT(mb_type) && s->direct_spatial_mv_pred)){
        int list;

        ec->ref_cache[0][scan8[5 ]+1] = ec->ref_cache[0][scan8[7 ]+1] = ec->ref_cache[0][scan8[13]+1] =
        ec->ref_cache[1][scan8[5 ]+1] = ec->ref_cache[1][scan8[7 ]+1] = ec->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;

        for(list=0; list<s->list_count; list++){
            if(!USES_LIST(mb_type, list)){
                continue;
            }
            assert(!(IS_DIRECT(mb_type) && !s->direct_spatial_mv_pred));

            if(USES_LIST(top_type, list)){
                ec->ref_cache[list][scan8[0] + 0 - 1*8]=
                ec->ref_cache[list][scan8[0] + 1 - 1*8]= ec->ref_index_top[list][4*mb_x + 2];
                ec->ref_cache[list][scan8[0] + 2 - 1*8]=
                ec->ref_cache[list][scan8[0] + 3 - 1*8]= ec->ref_index_top[list][4*mb_x + 3];
            }else{
                AV_WN32A(&ec->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
            }

            if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
                for(i=0; i<2; i++){
                    int cache_idx = scan8[0] - 1 + i*2*8;
                    if(USES_LIST(left_type, list)){
                        const int b8_x= 4*(mb_x-1) + 1;
                        ec->ref_cache[list][cache_idx  ]= ec->ref_index[list][b8_x + (left_block[0+i*2]&~1)];
                        ec->ref_cache[list][cache_idx+8]= ec->ref_index[list][b8_x + (left_block[1+i*2]&~1)];
                    }else{
                        ec->ref_cache[list][cache_idx  ]=
                        ec->ref_cache[list][cache_idx+8]= (left_type ? LIST_NOT_USED : PART_NOT_AVAILABLE);
                    }
                }
            }else{
                if(USES_LIST(left_type, list)){
                    const int b8_x= 4*(mb_x-1) + 1;
                    ec->ref_cache[list][scan8[0] - 1]= ec->ref_index[list][b8_x + (left_block[0]&~1)];
                }else{
                    ec->ref_cache[list][scan8[0] - 1]= left_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
            }

            if(USES_LIST(topright_type, list)){
                ec->ref_cache[list][scan8[0] + 4 - 1*8]= ec->ref_index_top[list][4*(mb_x+1) + 2];
            }else{
                ec->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
            }
            if(ec->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
                int topleft_partition= -1;
                if(USES_LIST(topleft_type, list)){
                    const int b8_x= 4*(mb_x-1) + 1 + (topleft_partition & 2);
                    ec->ref_cache[list][scan8[0] - 1 - 1*8]= ec->ref_index_top[list][b8_x];
                }else{
                    ec->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
            }

            if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)))
                continue;

            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
                ec->ref_cache[list][scan8[4 ]] =
                ec->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;

				/* XXX beurk, Load mvd */
				if(USES_LIST(top_type, list)){
					AV_COPY64(ec->mvd_cache[list][scan8[0] + 0 - 1*8], ec->mvd_top[list][8*mb_x + 0]);
				}else{
					AV_ZERO64(ec->mvd_cache[list][scan8[0] + 0 - 1*8]);
				}
				if(USES_LIST(left_type, list)){
					AV_COPY16(ec->mvd_cache[list][scan8[0] - 1 + 0*8], ec->mvd[list][8*(mb_x-1) + 6 - left_block[0]]);
					AV_COPY16(ec->mvd_cache[list][scan8[0] - 1 + 1*8], ec->mvd[list][8*(mb_x-1) + 6 - left_block[1]]);
				}else{
					AV_ZERO16(ec->mvd_cache [list][scan8[0] - 1 + 0*8]);
					AV_ZERO16(ec->mvd_cache [list][scan8[0] - 1 + 1*8]);
				}
				if(USES_LIST(left_type, list)){
					AV_COPY16(ec->mvd_cache[list][scan8[0] - 1 + 2*8], ec->mvd[list][8*(mb_x-1) + 6 - left_block[2]]);
					AV_COPY16(ec->mvd_cache[list][scan8[0] - 1 + 3*8], ec->mvd[list][8*(mb_x-1) + 6 - left_block[3]]);
				}else{
					AV_ZERO16(ec->mvd_cache [list][scan8[0] - 1 + 2*8]);
					AV_ZERO16(ec->mvd_cache [list][scan8[0] - 1 + 3*8]);
				}
				AV_ZERO16(ec->mvd_cache [list][scan8[4 ]]);
				AV_ZERO16(ec->mvd_cache [list][scan8[12]]);
				if(s->slice_type_nos == FF_B_TYPE){
					fill_rectangle(&ec->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);

					if(IS_DIRECT(top_type)){
						AV_WN32A(&ec->direct_cache[scan8[0] - 1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
					}else if(IS_8X8(top_type)){
						int b8_x = 4*mb_x;
						ec->direct_cache[scan8[0] + 0 - 1*8]= ec->direct_top[b8_x + 2];
						ec->direct_cache[scan8[0] + 2 - 1*8]= ec->direct_top[b8_x + 3];
					}else{
						AV_WN32A(&ec->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
					}

					if(IS_DIRECT(left_type))
						ec->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
					else if(IS_8X8(left_type))
						ec->direct_cache[scan8[0] - 1 + 0*8]= ec->direct[4*(mb_x-1) + 1 + (left_block[0]&~1)];
					else
						ec->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;

					if(IS_DIRECT(left_type))
						ec->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
					else if(IS_8X8(left_type))
						ec->direct_cache[scan8[0] - 1 + 2*8]= ec->direct[4*(mb_x-1) + 1 + (left_block[2]&~1)];
					else
						ec->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
				}
            }
        }
    }
    ec->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type);
}

static inline void write_back_non_zero_count(EntropyContext *ec, H264Slice *s){
    H264Mb *m = ec->m;
    const int mb_x= m->mb_x;

    //bottom nnz
    AV_COPY32(&ec->non_zero_count[mb_x][0], &ec->non_zero_count_cache[4+8*4] );
    ec->non_zero_count[mb_x][4] = ec->non_zero_count_cache[1+8*2];
    ec->non_zero_count[mb_x][5] = ec->non_zero_count_cache[2+8*2];
    ec->non_zero_count[mb_x][6] = ec->non_zero_count_cache[1+8*5];
    ec->non_zero_count[mb_x][7] = ec->non_zero_count_cache[2+8*5];

    for (int i=0; i<2; i++) {
        ec->non_zero_count_left[i*2+0]   = ec->non_zero_count_cache[7+8*1 + 2*8*i];
        ec->non_zero_count_left[i*2+1]   = ec->non_zero_count_cache[7+8*2 + 2*8*i];
        ec->non_zero_count_left[4+i*2+0] = ec->non_zero_count_cache[2+8*1 + 3*8*i];
        ec->non_zero_count_left[4+i*2+1] = ec->non_zero_count_cache[2+8*2 + 3*8*i];
    }

    AV_COPY32(&m->non_zero_count[ 0], &ec->non_zero_count_cache[4+8*1]);
    AV_COPY32(&m->non_zero_count[ 4], &ec->non_zero_count_cache[4+8*2]);
    AV_COPY32(&m->non_zero_count[ 8], &ec->non_zero_count_cache[4+8*3]);
    AV_COPY32(&m->non_zero_count[12], &ec->non_zero_count_cache[4+8*4]);

    for (int i=0; i<2; i++) {
        m->non_zero_count[16 + i*2   ] = ec->non_zero_count_cache[8*1 + 8*i + 1];
        m->non_zero_count[16 + i*2 +1] = ec->non_zero_count_cache[8*1 + 8*i + 2];
        m->non_zero_count[20 + i*2   ] = ec->non_zero_count_cache[8*4 + 8*i + 1];
        m->non_zero_count[20 + i*2 +1] = ec->non_zero_count_cache[8*4 + 8*i + 2];
    }
}

static inline void write_back_motion(EntropyContext *ec, H264Slice *s, int mb_type){
    H264Mb *m = ec->m;
	const int mb_x = m->mb_x;
    const int b_x = 4*m->mb_x; //try mb2b(8)_xy
    int list;

    for(list=0; list<s->list_count; list++){
        if(!USES_LIST(mb_type, list))
            continue;

        {
            uint8_t (*mvd_dst)[2] = (void *) ec->mvd[list][8*mb_x];
            uint8_t (*mvd_src)[2] = &ec->mvd_cache[list][scan8[0]];
            if(IS_SKIP(mb_type))
                AV_ZERO128(mvd_dst);
            else{
				AV_COPY64(mvd_dst, mvd_src + 8*3);
                AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
                AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
                AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
            }
        }
        int8_t *ref_index = &ec->ref_index[list][b_x];
        {
            ref_index[0+0*2]= ec->ref_cache[list][scan8[0]];
            ref_index[1+0*2]= ec->ref_cache[list][scan8[4]];
            ref_index[0+1*2]= ec->ref_cache[list][scan8[8]];
            ref_index[1+1*2]= ec->ref_cache[list][scan8[12]];
        }
    }

    if(s->slice_type_nos == FF_B_TYPE){
        if(IS_8X8(mb_type)){
            uint8_t *direct = &ec->direct[4*mb_x];
            direct[1] = m->sub_mb_type[1]>>1;
            direct[2] = m->sub_mb_type[2]>>1;
            direct[3] = m->sub_mb_type[3]>>1;
        }
    }
}

static inline int get_dct8x8_allowed(EntropyContext *ec, H264Slice *s){
    H264Mb *m = ec->m;
    if(s->direct_8x8_inference_flag)
        return !(AV_RN64A(m->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
    else
        return !(AV_RN64A(m->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
}

/**
 * decodes a P_SKIP or B_SKIP macroblock
 */
static void decode_mb_skip(EntropyContext *ec, H264Slice *s){
    H264Mb *m = ec->m;
	const int mb_x = m->mb_x;
    int mb_type;

    if( s->slice_type_nos == FF_B_TYPE )
        mb_type= MB_TYPE_16x16|MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
    else
        mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;

    fill_rectangle(&ec->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
    write_back_motion(ec, s, mb_type);
    m->mb_type = ec->mb_type[mb_x] = mb_type;
    m->qscale_mb_xy = ec->qscale[mb_x]= ec->curr_qscale;

    AV_ZERO64(ec->non_zero_count[mb_x]);
    AV_ZERO64(ec->non_zero_count_left);
    memset(m->non_zero_count, 0, 24);
}

static int decode_cabac_intra_mb_type(EntropyContext *ec, H264Slice *s, CABACContext *c, int ctx_base, int intra_slice) {
    uint8_t *state= &c->cabac_state[ctx_base];
    int mb_type;

    if(intra_slice){
        int ctx=0;
        if( ec->left_type & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
            ctx++;
        if( ec->top_type     & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
            ctx++;
        if( get_cabac_noinline( c, &state[ctx] ) == 0 )
            return 0;   /* I4x4 */
        state += 2;
    }else{
        if( get_cabac_noinline( c, state ) == 0 )
            return 0;   /* I4x4 */
    }

    if( get_cabac_terminate( c ) )
        return 25;  /* PCM */

    mb_type = 1; /* I16x16 */
    mb_type += 12 * get_cabac_noinline( c, &state[1] ); /* cbp_luma != 0 */
    if( get_cabac_noinline(c, &state[2] ) ) /* cbp_chroma */
        mb_type += 4 + 4 * get_cabac_noinline(c, &state[2+intra_slice] );
    mb_type += 2 * get_cabac_noinline(c, &state[3+intra_slice] );
    mb_type += 1 * get_cabac_noinline(c, &state[3+2*intra_slice] );
    return mb_type;
}

static int decode_cabac_mb_skip(EntropyContext *ec, H264Slice *s, H264Mb *m, CABACContext *c) {
    int ctx = 0;

	if( m->mb_x>0 && !IS_SKIP( ec->left_type ))
        ctx++;
	if( m->mb_y>0 && !IS_SKIP( ec->top_type ))
        ctx++;

    if( s->slice_type_nos == FF_B_TYPE )
        ctx += 13;
    return get_cabac_noinline(c, &c->cabac_state[11+ctx] );
}

static int decode_cabac_mb_intra4x4_pred_mode_delta( CABACContext *c) {
    int mode = 0;

    if( get_cabac(c, &c->cabac_state[68] ) )
        return -1;

    mode += 1 * get_cabac(c, &c->cabac_state[69] );
    mode += 2 * get_cabac(c, &c->cabac_state[69] );
    mode += 4 * get_cabac(c, &c->cabac_state[69] );

    return mode;
}

static int decode_cabac_mb_chroma_pre_mode(EntropyContext *ec, H264Slice *s, CABACContext *c) {
    H264Mb *m = ec->m;
	const int mb_x = m->mb_x;

    int ctx = 0;

    /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode to 0 */
    if( ec->left_type && ec->chroma_pred_mode[mb_x-1] != 0 )
        ctx++;

    if( ec->top_type     && ec->chroma_pred_mode_top[mb_x] != 0 )
        ctx++;

    if( get_cabac_noinline(c, &c->cabac_state[64+ctx] ) == 0 )
        return 0;

    if( get_cabac_noinline(c, &c->cabac_state[64+3] ) == 0 )
        return 1;
    if( get_cabac_noinline(c, &c->cabac_state[64+3] ) == 0 )
        return 2;
    else
        return 3;
}

static int decode_cabac_mb_cbp_luma(EntropyContext *ec, CABACContext *c) {
    int cbp_b, cbp_a, ctx, cbp = 0;

    cbp_a = ec->left_cbp;
    cbp_b = ec->top_cbp;

    ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
    cbp += get_cabac_noinline(c, &c->cabac_state[73 + ctx]);
    ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
    cbp += get_cabac_noinline(c, &c->cabac_state[73 + ctx]) << 1;
    ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
    cbp += get_cabac_noinline(c, &c->cabac_state[73 + ctx]) << 2;
    ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
    cbp += get_cabac_noinline(c, &c->cabac_state[73 + ctx]) << 3;
    return cbp;
}
static int decode_cabac_mb_cbp_chroma(EntropyContext *ec, CABACContext *c) {
    int ctx;
    int cbp_a, cbp_b;

    cbp_a = (ec->left_cbp>>4)&0x03;
    cbp_b = (ec-> top_cbp>>4)&0x03;

    ctx = 0;
    if( cbp_a > 0 ) ctx++;
    if( cbp_b > 0 ) ctx += 2;
    if( get_cabac_noinline(c, &c->cabac_state[77 + ctx] ) == 0 )
        return 0;

    ctx = 4;
    if( cbp_a == 2 ) ctx++;
    if( cbp_b == 2 ) ctx += 2;
    return 1 + get_cabac_noinline(c, &c->cabac_state[77 + ctx] );
}

static int decode_cabac_p_mb_sub_type( CABACContext *c) {
    if( get_cabac(c, &c->cabac_state[21] ) )
        return 0;   /* 8x8 */
    if( !get_cabac(c, &c->cabac_state[22] ) )
        return 1;   /* 8x4 */
    if( get_cabac(c, &c->cabac_state[23] ) )
        return 2;   /* 4x8 */
    return 3;       /* 4x4 */
}
static int decode_cabac_b_mb_sub_type(CABACContext *c) {
    int type;
    if( !get_cabac(c, &c->cabac_state[36] ) )
        return 0;   /* B_Direct_8x8 */
    if( !get_cabac(c, &c->cabac_state[37] ) )
        return 1 + get_cabac(c, &c->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
    type = 3;
    if( get_cabac(c, &c->cabac_state[38] ) ) {
        if( get_cabac(c, &c->cabac_state[39] ) )
            return 11 + get_cabac(c, &c->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
        type += 4;
    }
    type += 2*get_cabac(c, &c->cabac_state[39] );
    type +=   get_cabac(c, &c->cabac_state[39] );
    return type;
}

static int decode_cabac_mb_ref(EntropyContext *ec, H264Slice *s, CABACContext *c, int list, int n ) {
    int refa = ec->ref_cache[list][scan8[n] - 1];
    int refb = ec->ref_cache[list][scan8[n] - 8];
    int ref  = 0;
    int ctx  = 0;

    if( s->slice_type_nos == FF_B_TYPE) {
        if( refa > 0 && !(ec->direct_cache[scan8[n] - 1]&(MB_TYPE_DIRECT2>>1)) )
            ctx++;
        if( refb > 0 && !(ec->direct_cache[scan8[n] - 8]&(MB_TYPE_DIRECT2>>1)) )
            ctx += 2;
    } else {
        if( refa > 0 )
            ctx++;
        if( refb > 0 )
            ctx += 2;
    }

    while( get_cabac(c, &c->cabac_state[54+ctx] ) ) {
        ref++;
        ctx = (ctx>>2)+4;
        if(ref >= 32 /*h->ref_list[list]*/){
            return -1;
        }
    }
    return ref;
}

static int decode_cabac_mb_mvd( CABACContext *c, int ctxbase, int amvd, int *mvda) {
    int mvd;

    if(!get_cabac(c, &c->cabac_state[ctxbase+((amvd-3)>>(INT_BIT-1))+((amvd-33)>>(INT_BIT-1))+2])){
        *mvda= 0;
        return 0;
    }

    mvd= 1;
    ctxbase+= 3;
    while( mvd < 9 && get_cabac(c, &c->cabac_state[ctxbase] ) ) {
        if( mvd < 4 )
            ctxbase++;
        mvd++;
    }

    if( mvd >= 9 ) {
        int k = 3;
        while( get_cabac_bypass(c ) ) {
            mvd += 1 << k;
            k++;
            if(k>24){
                av_log(AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
                return INT_MIN;
            }
        }
        while( k-- ) {
            mvd += get_cabac_bypass(c )<<k;
        }
        *mvda=mvd < 70 ? mvd : 70;
    }else
        *mvda=mvd;
    return get_cabac_bypass_sign(c, -mvd );
}

#define DECODE_CABAC_MB_MVD( ec, c, list,  n )\
{\
    int amvd0 = ec->mvd_cache[list][scan8[n] - 1][0] +\
                ec->mvd_cache[list][scan8[n] - 8][0];\
    int amvd1 = ec->mvd_cache[list][scan8[n] - 1][1] +\
                ec->mvd_cache[list][scan8[n] - 8][1];\
\
    m->mvd[list][mp][0] = decode_cabac_mb_mvd( c, 40, amvd0, &mpx ); \
    m->mvd[list][mp][1] = decode_cabac_mb_mvd( c, 47, amvd1, &mpy ); \
    mp++; \
}

static av_always_inline int get_cabac_cbf_ctx(EntropyContext *ec, H264Slice *s, int cat, int idx, int is_dc ) {
    int nza, nzb;
    int ctx = 0;

    if( is_dc ) {
        if( cat == 0 ) {
            nza = ec->left_cbp&0x100;
            nzb = ec-> top_cbp&0x100;
        } else {
            nza = (ec->left_cbp>>(6+idx))&0x01;
            nzb = (ec-> top_cbp>>(6+idx))&0x01;
        }
    } else {
        assert(cat == 1 || cat == 2 || cat == 4);
        nza = ec->non_zero_count_cache[scan8[idx] - 1];
        nzb = ec->non_zero_count_cache[scan8[idx] - 8];
    }

    if( nza > 0 )
        ctx++;

    if( nzb > 0 )
        ctx += 2;

    return ctx + 4 * cat;
}

DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};

static const int significant_coeff_flag_offset[2][6] = {
    { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
    { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
};
static const int last_coeff_flag_offset[2][6] = {
    { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
    { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
};
static const int coeff_abs_level_m1_offset[6] = {
    227+0, 227+10, 227+20, 227+30, 227+39, 426
};
static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
    { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
    4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
    7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
    12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
    { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
    6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
    9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
    9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
};
/* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
* 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
* map node ctx => cabac ctx for level=1 */
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
/* map node ctx => cabac ctx for level>1 */
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
static const uint8_t coeff_abs_level_transition[2][8] = {
    /* update node ctx after decoding a level=1 */
    { 1, 2, 3, 3, 4, 5, 6, 7 },
    /* update node ctx after decoding a level>1 */
    { 4, 4, 4, 4, 5, 6, 7, 7 }
};

static av_always_inline void decode_cabac_residual_internal(EntropyContext *ec, H264Slice *s, CABACContext *c, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
    H264Mb *m = ec->m;
	const int mb_x = m->mb_x;
    int index[64];

    int av_unused last;
    int coeff_count = 0;
    int node_ctx = 0;

    uint8_t *significant_coeff_ctx_base;
    uint8_t *last_coeff_ctx_base;
    uint8_t *abs_level_m1_ctx_base;

    /* read coded block flag */
    if( is_dc || cat != 5 ) {
        if( get_cabac( c, &c->cabac_state[85 + get_cabac_cbf_ctx( ec, s, cat, n, is_dc ) ] ) == 0 ) {
            if( !is_dc )
                ec->non_zero_count_cache[scan8[n]] = 0;
            return;
        }
    }

    significant_coeff_ctx_base = c->cabac_state
        + significant_coeff_flag_offset[0][cat];
    last_coeff_ctx_base = c->cabac_state
        + last_coeff_flag_offset[0][cat];
    abs_level_m1_ctx_base = c->cabac_state
        + coeff_abs_level_m1_offset[cat];

    if( !is_dc && cat == 5 ) {
#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
        for(last= 0; last < coefs; last++) { \
            uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
            if( get_cabac( c, sig_ctx )) { \
                uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
                index[coeff_count++] = last; \
                if( get_cabac( c, last_ctx ) ) { \
                    last= max_coeff; \
                    break; \
                } \
            } \
        }\
        if( last == max_coeff -1 ) {\
            index[coeff_count++] = last;\
        }

        const uint8_t *sig_off = significant_coeff_flag_offset_8x8[0];
        DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
    } else {
        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
    }
    assert(coeff_count > 0);

    if( is_dc ) {
        if( cat == 0 )
            ec->cbp[mb_x] |= 0x100;
        else
            ec->cbp[mb_x] |= 0x40 << n;
    } else {
        if( cat == 5 )
            fill_rectangle(&ec->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
        else {
            assert( cat == 1 || cat == 2 || cat == 4 );
            ec->non_zero_count_cache[scan8[n]] = coeff_count;
        }
    }

    do {
        uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;

        int j= scantable[index[--coeff_count]];

        if( get_cabac( c, ctx ) == 0 ) {
            node_ctx = coeff_abs_level_transition[0][node_ctx];
            if( is_dc ) {
                block[j] = get_cabac_bypass_sign( c, -1);
            }else{
                block[j] = (get_cabac_bypass_sign( c, -qmul[j]) + 32) >> 6;
            }
        } else {
            int coeff_abs = 2;
            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
            node_ctx = coeff_abs_level_transition[1][node_ctx];

            while( coeff_abs < 15 && get_cabac( c, ctx ) ) {
                coeff_abs++;
            }

            if( coeff_abs >= 15 ) {
                int j = 0;
                while( get_cabac_bypass( c ) ) {
                    j++;
                }

                coeff_abs=1;
                while( j-- ) {
                    coeff_abs += coeff_abs + get_cabac_bypass( c );
                }
                coeff_abs+= 14;
            }

            if( is_dc ) {
                block[j] = get_cabac_bypass_sign( c, -coeff_abs );
            }else{
                block[j] = (get_cabac_bypass_sign( c, -coeff_abs ) * qmul[j] + 32) >> 6;
            }
        }
    } while( coeff_count );

}

static void decode_cabac_residual_dc( EntropyContext *ec, H264Slice *s, CABACContext *c, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
    decode_cabac_residual_internal( ec, s, c, block, cat, n, scantable, NULL, max_coeff, 1);
}

static void decode_cabac_residual_nondc( EntropyContext *ec, H264Slice *s, CABACContext *c, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
    decode_cabac_residual_internal( ec, s, c, block, cat, n, scantable, qmul, max_coeff, 0);
}

/**
 * decodes a macroblock
 * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
 */
int ff_h264_decode_mb_cabac(EntropyContext *ec, H264Slice *s, CABACContext *c) {
    H264Mb *m = ec->m;
	int mb_x = m->mb_x;
    int mb_type, partition_count, cbp = 0;
    int dct8x8_allowed= s->pps.transform_8x8_mode;

    fill_decode_neighbors(ec, s);

    if( s->slice_type_nos != FF_I_TYPE ) {
        int skip;
        /* a skipped mb needs the aff flag from the following mb */
        skip = decode_cabac_mb_skip( ec, s, m, c);

        /* read skip flags */
        if( skip ) {
            decode_mb_skip(ec, s);
            m->cbp = ec->cbp[mb_x] = 0;
            ec->chroma_pred_mode[mb_x] = 0;
            ec->last_qscale_diff = 0;
            return 0;
        }
    }

    if( s->slice_type_nos == FF_B_TYPE ) {
        int ctx = 0;

        if( !IS_DIRECT( ec->left_type-1 ) )
            ctx++;
        if( !IS_DIRECT( ec->top_type-1 ) )
            ctx++;

        if( !get_cabac_noinline(c, &c->cabac_state[27+ctx] ) ){
            mb_type= 0; /* B_Direct_16x16 */
        }else if( !get_cabac_noinline(c, &c->cabac_state[27+3] ) ) {
            mb_type= 1 + get_cabac_noinline(c, &c->cabac_state[27+5] ); /* B_L[01]_16x16 */
        }else{
            int bits;
            bits = get_cabac_noinline(c, &c->cabac_state[27+4] ) << 3;
            bits+= get_cabac_noinline(c, &c->cabac_state[27+5] ) << 2;
            bits+= get_cabac_noinline(c, &c->cabac_state[27+5] ) << 1;
            bits+= get_cabac_noinline(c, &c->cabac_state[27+5] );
            if( bits < 8 ){
                mb_type= bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
            }else if( bits == 13 ){
                mb_type= decode_cabac_intra_mb_type(ec, s, c, 32, 0);
                goto decode_intra_mb;
            }else if( bits == 14 ){
                mb_type= 11; /* B_L1_L0_8x16 */
            }else if( bits == 15 ){
                mb_type= 22; /* B_8x8 */
            }else{
                bits= ( bits<<1 ) + get_cabac_noinline(c, &c->cabac_state[27+5] );
                mb_type= bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
            }
        }
        partition_count= b_mb_type_info[mb_type].partition_count;
        mb_type=         b_mb_type_info[mb_type].type;
    } else if( s->slice_type_nos == FF_P_TYPE ) {
        if( get_cabac_noinline(c, &c->cabac_state[14] ) == 0 ) {
            /* P-type */
            if( get_cabac_noinline(c, &c->cabac_state[15] ) == 0 ) {
                /* P_L0_D16x16, P_8x8 */
                mb_type= 3 * get_cabac_noinline(c, &c->cabac_state[16] );
            } else {
                /* P_L0_D8x16, P_L0_D16x8 */
                mb_type= 2 - get_cabac_noinline(c, &c->cabac_state[17] );
            }
            partition_count= p_mb_type_info[mb_type].partition_count;
            mb_type=         p_mb_type_info[mb_type].type;
        } else {
            mb_type= decode_cabac_intra_mb_type(ec, s, c, 17, 0);
            goto decode_intra_mb;
        }
    } else {
        mb_type= decode_cabac_intra_mb_type(ec, s ,c, 3, 1);
        if(s->slice_type == FF_SI_TYPE && mb_type)
            mb_type--;
        assert(s->slice_type_nos == FF_I_TYPE);
decode_intra_mb:
        partition_count = 0;
        cbp= i_mb_type_info[mb_type].cbp;
        m->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
        mb_type= i_mb_type_info[mb_type].type;
    }

    if(IS_INTRA_PCM(mb_type)) {
        const uint8_t *ptr;
        // We assume these blocks are very rare so we do not optimize it.
        // FIXME The two following lines get the bitstream position in the cabac
        // decode, I think it should be done by a function in cabac.h (or cabac.c).
        ptr=c->bytestream;
        if(c->low&0x1) ptr--;
        if(CABAC_BITS==16){
            if(c->low&0x1FF) ptr--;
        }
		//printf("pcm\n");
        // The pixels are stored in the same order as levels in h->mb array.
        memcpy(m->mb, ptr, 256); ptr+=256;
		memcpy(m->mb+128, ptr, 128); ptr+=128;

        ff_init_cabac_decoder(c, ptr, c->bytestream_end - ptr);

        // All blocks are present
        m->cbp= ec->cbp[mb_x] = 0x1ef;
        ec->chroma_pred_mode[mb_x] = 0;
        // In deblocking, the quantizer is 0
        m->qscale_mb_xy = ec->qscale[mb_x]= 0;
        // All coeffs are present
        memset(ec->non_zero_count[mb_x], 16, 8);
		m->mb_type = ec->mb_type[mb_x]=  mb_type;
        ec->last_qscale_diff = 0;

        return 0;
    }

    fill_decode_caches(ec, s, mb_type);

    int mp = 0;
    if( IS_INTRA( mb_type ) ) {
        int i, pred_mode;
        if( IS_INTRA4x4( mb_type ) ) {
            if( dct8x8_allowed && get_cabac_noinline(c, &c->cabac_state[399 + ec->neighbor_transform_size] ) ) {
                mb_type |= MB_TYPE_8x8DCT;
                for( i = 0; i < 16; i+=4 ) {
                    m->intra4x4_pred_mode[i] = decode_cabac_mb_intra4x4_pred_mode_delta(c);
                }
            } else {
                for( i = 0; i < 16; i++ ) {
                    m->intra4x4_pred_mode[i] = decode_cabac_mb_intra4x4_pred_mode_delta(c);
                }
            }
        }

        m->chroma_pred_mode= ec->chroma_pred_mode[mb_x] =
		pred_mode = decode_cabac_mb_chroma_pre_mode( ec, s, c );

    } else if( partition_count == 4 ) {
        int i, j, sub_partition_count[4], list;

        if( s->slice_type_nos == FF_B_TYPE ) {
            for( i = 0; i < 4; i++ ) {
                m->sub_mb_type[i] = decode_cabac_b_mb_sub_type( c );
                sub_partition_count[i]= b_sub_mb_type_info[ m->sub_mb_type[i] ].partition_count;
                m->sub_mb_type[i]=      b_sub_mb_type_info[ m->sub_mb_type[i] ].type;
            }
            if( IS_DIRECT(m->sub_mb_type[0] | m->sub_mb_type[1] |
                          m->sub_mb_type[2] | m->sub_mb_type[3]) ) {
                ec->ref_cache[0][scan8[4]] =
                ec->ref_cache[1][scan8[4]] =
                ec->ref_cache[0][scan8[12]] =
                ec->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;

                for( i = 0; i < 4; i++ )
                    fill_rectangle( &ec->direct_cache[scan8[4*i]], 2, 2, 8, (m->sub_mb_type[i]>>1)&0xFF, 1 );
            }
        } else {
            for( i = 0; i < 4; i++ ) {
                m->sub_mb_type[i] = decode_cabac_p_mb_sub_type( c );
                sub_partition_count[i]= p_sub_mb_type_info[ m->sub_mb_type[i] ].partition_count;
                m->sub_mb_type[i]=      p_sub_mb_type_info[ m->sub_mb_type[i] ].type;
            }
        }

        for( list = 0; list < s->list_count; list++ ) {
            for( i = 0; i < 4; i++ ) {
                if(IS_DIRECT(m->sub_mb_type[i])) continue;
                if(IS_DIR(m->sub_mb_type[i], 0, list)){
                    if( s->ref_count[list] > 1 ){
                        m->ref_index[list][i] = decode_cabac_mb_ref(ec, s, c, list, 4*i );
                        if(m->ref_index[list][i] >= s->ref_count[list]){
                            av_log(AV_LOG_ERROR, "Reference %d >= %d\n", m->ref_index[list][i], s->ref_count[list]);
                            return -1;
                        }
                    }else
                        m->ref_index[list][i] = 0;
                } else {
                    m->ref_index[list][i] = -1;
                }
                ec->ref_cache[list][ scan8[4*i]   ]=ec->ref_cache[list][ scan8[4*i]+1 ]=
                ec->ref_cache[list][ scan8[4*i]+8 ]=ec->ref_cache[list][ scan8[4*i]+9 ]= m->ref_index[list][i];
            }
        }

        if(dct8x8_allowed){
//             assert(0);
            dct8x8_allowed = get_dct8x8_allowed(ec, s);
        }

        for(list=0; list<s->list_count; list++){
            for(i=0; i<4; i++){
//                 ec->ref_cache[list][ scan8[4*i]   ]=ec->ref_cache[list][ scan8[4*i]+1 ];
                if(IS_DIRECT(m->sub_mb_type[i])){
                    fill_rectangle(ec->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 2);
                    continue;
                }

                if(IS_DIR(m->sub_mb_type[i], 0, list) && !IS_DIRECT(m->sub_mb_type[i])){
                    const int sub_mb_type= m->sub_mb_type[i];
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
                    for(j=0; j<sub_partition_count[i]; j++){
                        int mpx, mpy;
                        const int index= 4*i + block_width*j;
                        uint8_t (* mvd_cache)[2]= &ec->mvd_cache[list][ scan8[index]];

                        DECODE_CABAC_MB_MVD( ec, c, list, index)

                        if(IS_SUB_8X8(sub_mb_type)){
                            mvd_cache[ 1 ][0]=
                            mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mpx;
                            mvd_cache[ 1 ][1]=
                            mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= mpy;
                        }else if(IS_SUB_8X4(sub_mb_type)){
                            mvd_cache[ 1 ][0]=  mpx;
                            mvd_cache[ 1 ][1]= mpy;
                        }else if(IS_SUB_4X8(sub_mb_type)){
                            mvd_cache[ 8 ][0]= mpx;
                            mvd_cache[ 8 ][1]= mpy;
                        }
                        mvd_cache[ 0 ][0]= mpx;
                        mvd_cache[ 0 ][1]= mpy;
                    }
                }else{
                    fill_rectangle(ec->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 2);
                }
            }
        }
    } else if( IS_DIRECT(mb_type) ) {
        mb_type |= MB_TYPE_16x16;
        fill_rectangle(ec->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 2);
        fill_rectangle(ec->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 2);
        dct8x8_allowed &= s->direct_8x8_inference_flag;
    } else {
        int list, i;
        if(IS_16X16(mb_type)){
            for(list=0; list<s->list_count; list++){
                if(IS_DIR(mb_type, 0, list)){
                    int ref;
                    if(s->ref_count[list] > 1){
                        ref= decode_cabac_mb_ref(ec, s, c, list, 0);
                        if(ref >= s->ref_count[list]){
                            av_log(AV_LOG_ERROR, "Reference %d >= %d\n", ref, s->ref_count[list]);
                            return -1;
                        }
                    }else
                        ref=0;
                    m->ref_index[list][0]= ref;
                    fill_rectangle(&ec->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
                }
            }
            for(list=0; list<s->list_count; list++){
                if(IS_DIR(mb_type, 0, list)){
                    int mpx,mpy;
                    DECODE_CABAC_MB_MVD( ec, c, list, 0)

                    fill_rectangle(ec->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack8to16(mpx,mpy), 2);
                }

            }
        }
        else if(IS_16X8(mb_type)){
            for(list=0; list<s->list_count; list++){
                for(i=0; i<2; i++){
                    if(IS_DIR(mb_type, i, list)){
                        int ref;
                        if(s->ref_count[list] > 1){
                            ref= decode_cabac_mb_ref(ec, s, c, list, 8*i );
                            if(ref >= s->ref_count[list]){
                                av_log(AV_LOG_ERROR, "Reference %d >= %d\n", ref, s->ref_count[list]);
                                return -1;
                            }
                        }else
                            ref=0;
                        m->ref_index[list][i]= ref;
                        fill_rectangle(&ec->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
                    }else{
                        m->ref_index[list][i]= LIST_NOT_USED;
                        fill_rectangle(&ec->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
                    }
                }
            }
            for(list=0; list<s->list_count; list++){
                for(i=0; i<2; i++){
                    if(IS_DIR(mb_type, i, list)){
                        int mpx,mpy;
                        DECODE_CABAC_MB_MVD( ec, c, list, 8*i)

                        fill_rectangle(ec->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack8to16(mpx,mpy), 2);
                    }else{
                        fill_rectangle(ec->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 2);
                    }
                }
            }
        }else{
            assert(IS_8X16(mb_type));
            for(list=0; list<s->list_count; list++){
                for(i=0; i<2; i++){
                    if(IS_DIR(mb_type, i, list)){ //FIXME optimize
                        int ref;
                        if(s->ref_count[list] > 1){
                            ref= decode_cabac_mb_ref(ec, s, c, list, 4*i );
                            if(ref >= s->ref_count[list]){
                                av_log(AV_LOG_ERROR, "Reference %d >= %d\n", ref, s->ref_count[list]);
                                return -1;
                            }
                        }else
                            ref=0;
                        m->ref_index[list][i]= ref;
                        fill_rectangle(&ec->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
                    }else{
                        m->ref_index[list][i]= LIST_NOT_USED;
                        fill_rectangle(&ec->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
                    }
                }
            }
            for(list=0; list<s->list_count; list++){
                for(i=0; i<2; i++){
                    if(IS_DIR(mb_type, i, list)){
                        int mpx,mpy;
                        DECODE_CABAC_MB_MVD( ec, c, list, 4*i)

                        fill_rectangle(ec->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack8to16(mpx,mpy), 2);
                    }else{
                        fill_rectangle(ec->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 2);
                    }
                }
            }
        }
    }

    if( IS_INTER( mb_type ) ||(IS_DIRECT(mb_type))) {
        ec->chroma_pred_mode[mb_x] = 0;
        write_back_motion( ec, s, mb_type );
    }

    if( !IS_INTRA16x16( mb_type ) ) {
        cbp  = decode_cabac_mb_cbp_luma( ec, c);
		cbp |= decode_cabac_mb_cbp_chroma( ec, c ) << 4;
    }

    ec->cbp[mb_x] = m->cbp = cbp;

    if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
        int t = get_cabac_noinline(c, &c->cabac_state[399 + ec->neighbor_transform_size] );
        mb_type |= MB_TYPE_8x8DCT * t;
    }
    m->mb_type = ec->mb_type[mb_x] = mb_type;

    if( cbp || IS_INTRA16x16( mb_type ) ) {
        const uint8_t *scan, *scan8x8, *dc_scan;
        const uint32_t *qmul;


        if (s->transform_bypass && ec->curr_qscale){
            scan8x8= ff_zigzag_direct;
            scan= zigzag_scan;
        }else{
            scan8x8= ec->zigzag_scan8x8;
            scan= ec->zigzag_scan;
        }
        dc_scan= luma_dc_zigzag_scan;

        // decode_cabac_mb_dqp
        if(get_cabac_noinline(c, &c->cabac_state[60 + (ec->last_qscale_diff != 0)])){
            int val = 1;
            int ctx= 2;

            while( get_cabac_noinline(c, &c->cabac_state[60 + ctx] ) ) {
                ctx= 3;
                val++;
                if(val > 102){ //prevent infinite loop
                    av_log(AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", m->mb_x, m->mb_y);
                    return -1;
                }
            }

            if( val&0x01 )
                val=   (val + 1)>>1 ;
            else
                val= -((val + 1)>>1);
            ec->last_qscale_diff = val;
            ec->curr_qscale += val;
            if(((unsigned)ec->curr_qscale) > 51){
                if(ec->curr_qscale<0) ec->curr_qscale+= 52;
                else            ec->curr_qscale-= 52;
            }
            ec->chroma_qp[0] = get_chroma_qp( s, 0, ec->curr_qscale);
            ec->chroma_qp[1] = get_chroma_qp( s, 1, ec->curr_qscale);
        }else
            ec->last_qscale_diff=0;

        memset(m->mb, 0, 16*16 * sizeof(DCTELEM));
        if( IS_INTRA16x16( mb_type ) ) {
            int i;

            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
            decode_cabac_residual_dc( ec, s, c, m->mb, 0, 0, dc_scan, 16);
            qmul = ec->dequant4_coeff[0][ec->curr_qscale];
            if( cbp&15 ) {
                for( i = 0; i < 16; i++ ) {
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
                    decode_cabac_residual_nondc( ec, s, c, m->mb + 16*i, 1, i, scan + 1, qmul, 15);
                }
            } else {
                fill_rectangle(&ec->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
            }
            h264_luma_dc_dequant_idct_c(m->mb, qmul[0]);
        } else {

            int i8x8, i4x4;
            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
                if( cbp & (1<<i8x8) ) {
                    if( IS_8x8DCT(mb_type) ) {
                        decode_cabac_residual_nondc(ec, s, c, m->mb + 64*i8x8, 5, 4*i8x8,
                            scan8x8, ec->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][ec->curr_qscale], 64);
                    } else {
                        qmul = ec->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][ec->curr_qscale];
                        for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
                            const int index = 4*i8x8 + i4x4;
                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
                            decode_cabac_residual_nondc(ec, s, c, m->mb + 16*index, 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
                        }
                    }
                } else {
                    uint8_t * const nnz= &ec->non_zero_count_cache[ scan8[4*i8x8] ];
                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
                }
            }
        }

        if( cbp&0x30 ){
            memset(m->mb + 256, 0, 2*64 * sizeof(DCTELEM));
            for( int i = 0; i < 2; i++ ) {
                const uint32_t dequant4_coeff = ec->dequant4_coeff[IS_INTRA(mb_type) ? 1+i:4+i][ec->chroma_qp[i]][0];

                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
                decode_cabac_residual_dc(ec, s, c, m->mb + 256 + 16*4*i, 3, i, chroma_dc_scan, 4);
                chroma_dc_dequant_idct_c(m->mb + 256 + 16*4*i, dequant4_coeff);
            }
        }

        if( cbp&0x20 ) {
            int i, j;
            for( i = 0; i < 2; i++ ) {
                qmul = ec->dequant4_coeff[i+1+(IS_INTRA( mb_type ) ? 0:3)][ec->chroma_qp[i]];
                for( j = 0; j < 4; j++ ) {
                    const int index = 16 + 4 * i + j;
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
                    decode_cabac_residual_nondc( ec, s, c, m->mb + 16*index, 4, index, scan + 1, qmul, 15);
                }
            }
        } else {
            uint8_t * const nnz= &ec->non_zero_count_cache[0];
            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
        }

    } else {
        uint8_t * const nnz= &ec->non_zero_count_cache[0];
        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
        ec->last_qscale_diff = 0;
    }

    m->qscale_mb_xy = ec->qscale[mb_x]= ec->curr_qscale;
    write_back_non_zero_count(ec, s);


    return 0;
}

void free_entropy_context(EntropyContext *ec){
    av_freep(&ec->non_zero_count_row[0]);
    av_freep(&ec->non_zero_count_row[1]);
    av_freep(&ec->mvd_table[0][0]);
    av_freep(&ec->mvd_table[0][1]);
    av_freep(&ec->mvd_table[1][0]);
    av_freep(&ec->mvd_table[1][1]);

    av_freep(&ec->direct_table[0]);
    av_freep(&ec->direct_table[1]);
    av_freep(&ec->chroma_pred_mode_table[0]);
    av_freep(&ec->chroma_pred_mode_table[1]);
    av_freep(&ec->cbp_table[0]);
    av_freep(&ec->cbp_table[1]);
    av_freep(&ec->qscale_table[0]);
    av_freep(&ec->qscale_table[1]);

    av_freep(&ec->mb_type_table[0]);
    av_freep(&ec->mb_type_table[1]);
    av_freep(&ec->ref_index_table[0][0]);
    av_freep(&ec->ref_index_table[0][1]);
    av_freep(&ec->ref_index_table[1][0]);
    av_freep(&ec->ref_index_table[1][1]);


    av_free(ec);
}

EntropyContext *get_entropy_context(H264Context *h){
    const int mb_height = h->mb_height;
    const int mb_width  = h->mb_width;
    const int mb_stride = h->mb_stride;

    EntropyContext *ec = av_mallocz(sizeof(EntropyContext));

    ec->mb_width = mb_width;
    ec->mb_height = mb_height;
    ec->b_stride  = mb_width*4;
    ec->mb_stride = mb_stride;

    FF_ALLOCZ_OR_GOTO(ec->non_zero_count_row[0], mb_stride * 8 * sizeof(uint8_t), fail)
    FF_ALLOCZ_OR_GOTO(ec->non_zero_count_row[1], mb_stride * 8 * sizeof(uint8_t), fail)

    FF_ALLOCZ_OR_GOTO(ec->mvd_table[0][0], 16*mb_stride * sizeof(uint8_t), fail);
    FF_ALLOCZ_OR_GOTO(ec->mvd_table[0][1], 16*mb_stride * sizeof(uint8_t), fail);
    FF_ALLOCZ_OR_GOTO(ec->mvd_table[1][0], 16*mb_stride * sizeof(uint8_t), fail);
    FF_ALLOCZ_OR_GOTO(ec->mvd_table[1][1], 16*mb_stride * sizeof(uint8_t), fail);

    FF_ALLOCZ_OR_GOTO(ec->direct_table[0], 4*mb_stride * sizeof(uint8_t) , fail);
    FF_ALLOCZ_OR_GOTO(ec->direct_table[1], 4*mb_stride * sizeof(uint8_t) , fail);

    FF_ALLOCZ_OR_GOTO(ec->chroma_pred_mode_table[0], mb_stride * sizeof(uint8_t), fail)
    FF_ALLOCZ_OR_GOTO(ec->chroma_pred_mode_table[1], mb_stride * sizeof(uint8_t), fail)

    FF_ALLOCZ_OR_GOTO(ec->cbp_table[0], mb_stride * sizeof(uint16_t), fail)
    FF_ALLOCZ_OR_GOTO(ec->cbp_table[1], mb_stride * sizeof(uint16_t), fail)

    FF_ALLOCZ_OR_GOTO(ec->qscale_table[0], mb_stride * sizeof(uint8_t) , fail)
    FF_ALLOCZ_OR_GOTO(ec->qscale_table[1], mb_stride * sizeof(uint8_t) , fail)

    FF_ALLOCZ_OR_GOTO(ec->mb_type_table[0] , (mb_stride+1) * sizeof(uint32_t), fail)
    FF_ALLOCZ_OR_GOTO(ec->mb_type_table[1] , (mb_stride+1) * sizeof(uint32_t), fail)

    FF_ALLOCZ_OR_GOTO(ec->ref_index_table[0][0], 4*mb_stride * sizeof(int8_t), fail)
    FF_ALLOCZ_OR_GOTO(ec->ref_index_table[1][0], 4*mb_stride * sizeof(int8_t), fail)
    FF_ALLOCZ_OR_GOTO(ec->ref_index_table[0][1], 4*mb_stride * sizeof(int8_t), fail)
    FF_ALLOCZ_OR_GOTO(ec->ref_index_table[1][1], 4*mb_stride * sizeof(int8_t), fail)

    ec->zigzag_scan = h->zigzag_scan;
    ec->zigzag_scan8x8 = h->zigzag_scan8x8;

    return ec;
fail:
    free_entropy_context(ec);
    return NULL;
}

void init_entropy_buf(EntropyContext *ec, H264Slice *s, int line){
    int top = (line+1)%2;
    int cur = line%2;

    ec->non_zero_count_top      = ec->non_zero_count_row[top];
    ec->non_zero_count          = ec->non_zero_count_row[cur];
    ec->mvd_top[0]              = ec->mvd_table[0][top];
    ec->mvd[0]                  = ec->mvd_table[0][cur];
    ec->mvd_top[1]              = ec->mvd_table[1][top];
    ec->mvd[1]                  = ec->mvd_table[1][cur];
    ec->direct_top              = ec->direct_table[top];
    ec->direct                  = ec->direct_table[cur];
    ec->chroma_pred_mode_top    = ec->chroma_pred_mode_table[top];
    ec->chroma_pred_mode        = ec->chroma_pred_mode_table[cur];
    ec->cbp_top                 = ec->cbp_table[top];
    ec->cbp                     = ec->cbp_table[cur];
    ec->qscale_top              = ec->qscale_table[top] +1;
    ec->qscale                  = ec->qscale_table[cur] +1;
    ec->mb_type_top             = ec->mb_type_table[top]+1;
    ec->mb_type                 = ec->mb_type_table[cur]+1;
    ec->ref_index_top[0]        = ec->ref_index_table[0][top];
    ec->ref_index_top[1]        = ec->ref_index_table[1][top];
    ec->ref_index[0]            = ec->ref_index_table[0][cur];
    ec->ref_index[1]            = ec->ref_index_table[1][cur];

}
